File: /usr/src/linux/include/net/tcp.h

1     /*
2      * INET		An implementation of the TCP/IP protocol suite for the LINUX
3      *		operating system.  INET is implemented using the  BSD Socket
4      *		interface as the means of communication with the user level.
5      *
6      *		Definitions for the TCP module.
7      *
8      * Version:	@(#)tcp.h	1.0.5	05/23/93
9      *
10      * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
11      *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12      *
13      *		This program is free software; you can redistribute it and/or
14      *		modify it under the terms of the GNU General Public License
15      *		as published by the Free Software Foundation; either version
16      *		2 of the License, or (at your option) any later version.
17      */
18     #ifndef _TCP_H
19     #define _TCP_H
20     
21     #define TCP_DEBUG 1
22     #define FASTRETRANS_DEBUG 1
23     
24     /* Cancel timers, when they are not required. */
25     #undef TCP_CLEAR_TIMERS
26     
27     #include <linux/config.h>
28     #include <linux/tcp.h>
29     #include <linux/slab.h>
30     #include <net/checksum.h>
31     #include <net/sock.h>
32     
33     /* This is for all connections with a full identity, no wildcards.
34      * New scheme, half the table is for TIME_WAIT, the other half is
35      * for the rest.  I'll experiment with dynamic table growth later.
36      */
37     struct tcp_ehash_bucket {
38     	rwlock_t	lock;
39     	struct sock	*chain;
40     } __attribute__((__aligned__(8)));
41     
42     /* This is for listening sockets, thus all sockets which possess wildcards. */
43     #define TCP_LHTABLE_SIZE	32	/* Yes, really, this is all you need. */
44     
45     /* There are a few simple rules, which allow for local port reuse by
46      * an application.  In essence:
47      *
48      *	1) Sockets bound to different interfaces may share a local port.
49      *	   Failing that, goto test 2.
50      *	2) If all sockets have sk->reuse set, and none of them are in
51      *	   TCP_LISTEN state, the port may be shared.
52      *	   Failing that, goto test 3.
53      *	3) If all sockets are bound to a specific sk->rcv_saddr local
54      *	   address, and none of them are the same, the port may be
55      *	   shared.
56      *	   Failing this, the port cannot be shared.
57      *
58      * The interesting point, is test #2.  This is what an FTP server does
59      * all day.  To optimize this case we use a specific flag bit defined
60      * below.  As we add sockets to a bind bucket list, we perform a
61      * check of: (newsk->reuse && (newsk->state != TCP_LISTEN))
62      * As long as all sockets added to a bind bucket pass this test,
63      * the flag bit will be set.
64      * The resulting situation is that tcp_v[46]_verify_bind() can just check
65      * for this flag bit, if it is set and the socket trying to bind has
66      * sk->reuse set, we don't even have to walk the owners list at all,
67      * we return that it is ok to bind this socket to the requested local port.
68      *
69      * Sounds like a lot of work, but it is worth it.  In a more naive
70      * implementation (ie. current FreeBSD etc.) the entire list of ports
71      * must be walked for each data port opened by an ftp server.  Needless
72      * to say, this does not scale at all.  With a couple thousand FTP
73      * users logged onto your box, isn't it nice to know that new data
74      * ports are created in O(1) time?  I thought so. ;-)	-DaveM
75      */
76     struct tcp_bind_bucket {
77     	unsigned short		port;
78     	unsigned short		fastreuse;
79     	struct tcp_bind_bucket	*next;
80     	struct sock		*owners;
81     	struct tcp_bind_bucket	**pprev;
82     };
83     
84     struct tcp_bind_hashbucket {
85     	spinlock_t		lock;
86     	struct tcp_bind_bucket	*chain;
87     };
88     
89     extern struct tcp_hashinfo {
90     	/* This is for sockets with full identity only.  Sockets here will
91     	 * always be without wildcards and will have the following invariant:
92     	 *
93     	 *          TCP_ESTABLISHED <= sk->state < TCP_CLOSE
94     	 *
95     	 * First half of the table is for sockets not in TIME_WAIT, second half
96     	 * is for TIME_WAIT sockets only.
97     	 */
98     	struct tcp_ehash_bucket *__tcp_ehash;
99     
100     	/* Ok, let's try this, I give up, we do need a local binding
101     	 * TCP hash as well as the others for fast bind/connect.
102     	 */
103     	struct tcp_bind_hashbucket *__tcp_bhash;
104     
105     	int __tcp_bhash_size;
106     	int __tcp_ehash_size;
107     
108     	/* All sockets in TCP_LISTEN state will be in here.  This is the only
109     	 * table where wildcard'd TCP sockets can exist.  Hash function here
110     	 * is just local port number.
111     	 */
112     	struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE];
113     
114     	/* All the above members are written once at bootup and
115     	 * never written again _or_ are predominantly read-access.
116     	 *
117     	 * Now align to a new cache line as all the following members
118     	 * are often dirty.
119     	 */
120     	rwlock_t __tcp_lhash_lock
121     		__attribute__((__aligned__(SMP_CACHE_BYTES)));
122     	atomic_t __tcp_lhash_users;
123     	wait_queue_head_t __tcp_lhash_wait;
124     	spinlock_t __tcp_portalloc_lock;
125     } tcp_hashinfo;
126     
127     #define tcp_ehash	(tcp_hashinfo.__tcp_ehash)
128     #define tcp_bhash	(tcp_hashinfo.__tcp_bhash)
129     #define tcp_ehash_size	(tcp_hashinfo.__tcp_ehash_size)
130     #define tcp_bhash_size	(tcp_hashinfo.__tcp_bhash_size)
131     #define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
132     #define tcp_lhash_lock	(tcp_hashinfo.__tcp_lhash_lock)
133     #define tcp_lhash_users	(tcp_hashinfo.__tcp_lhash_users)
134     #define tcp_lhash_wait	(tcp_hashinfo.__tcp_lhash_wait)
135     #define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
136     
137     extern kmem_cache_t *tcp_bucket_cachep;
138     extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
139     						 unsigned short snum);
140     extern void tcp_bucket_unlock(struct sock *sk);
141     extern int tcp_port_rover;
142     extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
143     
144     /* These are AF independent. */
145     static __inline__ int tcp_bhashfn(__u16 lport)
146     {
147     	return (lport & (tcp_bhash_size - 1));
148     }
149     
150     /* This is a TIME_WAIT bucket.  It works around the memory consumption
151      * problems of sockets in such a state on heavily loaded servers, but
152      * without violating the protocol specification.
153      */
154     struct tcp_tw_bucket {
155     	/* These _must_ match the beginning of struct sock precisely.
156     	 * XXX Yes I know this is gross, but I'd have to edit every single
157     	 * XXX networking file if I created a "struct sock_header". -DaveM
158     	 */
159     	__u32			daddr;
160     	__u32			rcv_saddr;
161     	__u16			dport;
162     	unsigned short		num;
163     	int			bound_dev_if;
164     	struct sock		*next;
165     	struct sock		**pprev;
166     	struct sock		*bind_next;
167     	struct sock		**bind_pprev;
168     	unsigned char		state,
169     				substate; /* "zapped" is replaced with "substate" */
170     	__u16			sport;
171     	unsigned short		family;
172     	unsigned char		reuse,
173     				rcv_wscale; /* It is also TW bucket specific */
174     	atomic_t		refcnt;
175     
176     	/* And these are ours. */
177     	int			hashent;
178     	int			timeout;
179     	__u32			rcv_nxt;
180     	__u32			snd_nxt;
181     	__u32			rcv_wnd;
182             __u32			ts_recent;
183             long			ts_recent_stamp;
184     	unsigned long		ttd;
185     	struct tcp_bind_bucket	*tb;
186     	struct tcp_tw_bucket	*next_death;
187     	struct tcp_tw_bucket	**pprev_death;
188     
189     #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
190     	struct in6_addr		v6_daddr;
191     	struct in6_addr		v6_rcv_saddr;
192     #endif
193     };
194     
195     extern kmem_cache_t *tcp_timewait_cachep;
196     
197     static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
198     {
199     	if (atomic_dec_and_test(&tw->refcnt)) {
200     #ifdef INET_REFCNT_DEBUG
201     		printk(KERN_DEBUG "tw_bucket %p released\n", tw);
202     #endif
203     		kmem_cache_free(tcp_timewait_cachep, tw);
204     	}
205     }
206     
207     extern atomic_t tcp_orphan_count;
208     extern int tcp_tw_count;
209     extern void tcp_time_wait(struct sock *sk, int state, int timeo);
210     extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
211     extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
212     extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
213     
214     
215     /* Socket demux engine toys. */
216     #ifdef __BIG_ENDIAN
217     #define TCP_COMBINED_PORTS(__sport, __dport) \
218     	(((__u32)(__sport)<<16) | (__u32)(__dport))
219     #else /* __LITTLE_ENDIAN */
220     #define TCP_COMBINED_PORTS(__sport, __dport) \
221     	(((__u32)(__dport)<<16) | (__u32)(__sport))
222     #endif
223     
224     #if (BITS_PER_LONG == 64)
225     #ifdef __BIG_ENDIAN
226     #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
227     	__u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
228     #else /* __LITTLE_ENDIAN */
229     #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
230     	__u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
231     #endif /* __BIG_ENDIAN */
232     #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
233     	(((*((__u64 *)&((__sk)->daddr)))== (__cookie))	&&		\
234     	 ((*((__u32 *)&((__sk)->dport)))== (__ports))   &&		\
235     	 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
236     #else /* 32-bit arch */
237     #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
238     #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
239     	(((__sk)->daddr			== (__saddr))	&&		\
240     	 ((__sk)->rcv_saddr		== (__daddr))	&&		\
241     	 ((*((__u32 *)&((__sk)->dport)))== (__ports))   &&		\
242     	 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
243     #endif /* 64-bit arch */
244     
245     #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif)			   \
246     	(((*((__u32 *)&((__sk)->dport)))== (__ports))   			&& \
247     	 ((__sk)->family		== AF_INET6)				&& \
248     	 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr))		&& \
249     	 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr))	&& \
250     	 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
251     
252     /* These can have wildcards, don't try too hard. */
253     static __inline__ int tcp_lhashfn(unsigned short num)
254     {
255     	return num & (TCP_LHTABLE_SIZE - 1);
256     }
257     
258     static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
259     {
260     	return tcp_lhashfn(sk->num);
261     }
262     
263     #define MAX_TCP_HEADER	(128 + MAX_HEADER)
264     
265     /* 
266      * Never offer a window over 32767 without using window scaling. Some
267      * poor stacks do signed 16bit maths! 
268      */
269     #define MAX_TCP_WINDOW		32767
270     
271     /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
272     #define TCP_MIN_MSS		88
273     
274     /* Minimal RCV_MSS. */
275     #define TCP_MIN_RCVMSS		536
276     
277     /* After receiving this amount of duplicate ACKs fast retransmit starts. */
278     #define TCP_FASTRETRANS_THRESH 3
279     
280     /* Maximal reordering. */
281     #define TCP_MAX_REORDERING	127
282     
283     /* Maximal number of ACKs sent quickly to accelerate slow-start. */
284     #define TCP_MAX_QUICKACKS	16
285     
286     /* urg_data states */
287     #define TCP_URG_VALID	0x0100
288     #define TCP_URG_NOTYET	0x0200
289     #define TCP_URG_READ	0x0400
290     
291     #define TCP_RETR1	3	/*
292     				 * This is how many retries it does before it
293     				 * tries to figure out if the gateway is
294     				 * down. Minimal RFC value is 3; it corresponds
295     				 * to ~3sec-8min depending on RTO.
296     				 */
297     
298     #define TCP_RETR2	15	/*
299     				 * This should take at least
300     				 * 90 minutes to time out.
301     				 * RFC1122 says that the limit is 100 sec.
302     				 * 15 is ~13-30min depending on RTO.
303     				 */
304     
305     #define TCP_SYN_RETRIES	 5	/* number of times to retry active opening a
306     				 * connection: ~180sec is RFC minumum	*/
307     
308     #define TCP_SYNACK_RETRIES 5	/* number of times to retry passive opening a
309     				 * connection: ~180sec is RFC minumum	*/
310     
311     
312     #define TCP_ORPHAN_RETRIES 7	/* number of times to retry on an orphaned
313     				 * socket. 7 is ~50sec-16min.
314     				 */
315     
316     
317     #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
318     				  * state, about 60 seconds	*/
319     #define TCP_FIN_TIMEOUT	TCP_TIMEWAIT_LEN
320                                      /* BSD style FIN_WAIT2 deadlock breaker.
321     				  * It used to be 3min, new value is 60sec,
322     				  * to combine FIN-WAIT-2 timeout with
323     				  * TIME-WAIT timer.
324     				  */
325     
326     #define TCP_DELACK_MAX	(HZ/5)	/* maximal time to delay before sending an ACK */
327     #if HZ >= 100
328     #define TCP_DELACK_MIN	(HZ/25)	/* minimal time to delay before sending an ACK */
329     #define TCP_ATO_MIN	(HZ/25)
330     #else
331     #define TCP_DELACK_MIN	4
332     #define TCP_ATO_MIN	4
333     #endif
334     #define TCP_RTO_MAX	(120*HZ)
335     #define TCP_RTO_MIN	(HZ/5)
336     #define TCP_TIMEOUT_INIT (3*HZ)	/* RFC 1122 initial RTO value	*/
337     
338     #define TCP_RESOURCE_PROBE_INTERVAL (HZ/2) /* Maximal interval between probes
339     					    * for local resources.
340     					    */
341     
342     #define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
343     #define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
344     #define TCP_KEEPALIVE_INTVL	(75*HZ)
345     
346     #define MAX_TCP_KEEPIDLE	32767
347     #define MAX_TCP_KEEPINTVL	32767
348     #define MAX_TCP_KEEPCNT		127
349     #define MAX_TCP_SYNCNT		127
350     
351     /* TIME_WAIT reaping mechanism. */
352     #define TCP_TWKILL_SLOTS	8	/* Please keep this a power of 2. */
353     #define TCP_TWKILL_PERIOD	(TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
354     
355     #define TCP_SYNQ_INTERVAL	(HZ/5)	/* Period of SYNACK timer */
356     #define TCP_SYNQ_HSIZE		512	/* Size of SYNACK hash table */
357     
358     #define TCP_PAWS_24DAYS	(60 * 60 * 24 * 24)
359     #define TCP_PAWS_MSL	60		/* Per-host timestamps are invalidated
360     					 * after this time. It should be equal
361     					 * (or greater than) TCP_TIMEWAIT_LEN
362     					 * to provide reliability equal to one
363     					 * provided by timewait state.
364     					 */
365     #define TCP_PAWS_WINDOW	1		/* Replay window for per-host
366     					 * timestamps. It must be less than
367     					 * minimal timewait lifetime.
368     					 */
369     
370     #define TCP_TW_RECYCLE_SLOTS_LOG	5
371     #define TCP_TW_RECYCLE_SLOTS		(1<<TCP_TW_RECYCLE_SLOTS_LOG)
372     
373     /* If time > 4sec, it is "slow" path, no recycling is required,
374        so that we select tick to get range about 4 seconds.
375      */
376     
377     #if HZ <= 16 || HZ > 4096
378     # error Unsupported: HZ <= 16 or HZ > 4096
379     #elif HZ <= 32
380     # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
381     #elif HZ <= 64
382     # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
383     #elif HZ <= 128
384     # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
385     #elif HZ <= 256
386     # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
387     #elif HZ <= 512
388     # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
389     #elif HZ <= 1024
390     # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
391     #elif HZ <= 2048
392     # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
393     #else
394     # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
395     #endif
396     
397     /*
398      *	TCP option
399      */
400      
401     #define TCPOPT_NOP		1	/* Padding */
402     #define TCPOPT_EOL		0	/* End of options */
403     #define TCPOPT_MSS		2	/* Segment size negotiating */
404     #define TCPOPT_WINDOW		3	/* Window scaling */
405     #define TCPOPT_SACK_PERM        4       /* SACK Permitted */
406     #define TCPOPT_SACK             5       /* SACK Block */
407     #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
408     
409     /*
410      *     TCP option lengths
411      */
412     
413     #define TCPOLEN_MSS            4
414     #define TCPOLEN_WINDOW         3
415     #define TCPOLEN_SACK_PERM      2
416     #define TCPOLEN_TIMESTAMP      10
417     
418     /* But this is what stacks really send out. */
419     #define TCPOLEN_TSTAMP_ALIGNED		12
420     #define TCPOLEN_WSCALE_ALIGNED		4
421     #define TCPOLEN_SACKPERM_ALIGNED	4
422     #define TCPOLEN_SACK_BASE		2
423     #define TCPOLEN_SACK_BASE_ALIGNED	4
424     #define TCPOLEN_SACK_PERBLOCK		8
425     
426     #define TCP_TIME_RETRANS	1	/* Retransmit timer */
427     #define TCP_TIME_DACK		2	/* Delayed ack timer */
428     #define TCP_TIME_PROBE0		3	/* Zero window probe timer */
429     #define TCP_TIME_KEEPOPEN	4	/* Keepalive timer */
430     
431     /* sysctl variables for tcp */
432     extern int sysctl_max_syn_backlog;
433     extern int sysctl_tcp_timestamps;
434     extern int sysctl_tcp_window_scaling;
435     extern int sysctl_tcp_sack;
436     extern int sysctl_tcp_fin_timeout;
437     extern int sysctl_tcp_tw_recycle;
438     extern int sysctl_tcp_keepalive_time;
439     extern int sysctl_tcp_keepalive_probes;
440     extern int sysctl_tcp_keepalive_intvl;
441     extern int sysctl_tcp_syn_retries;
442     extern int sysctl_tcp_synack_retries;
443     extern int sysctl_tcp_retries1;
444     extern int sysctl_tcp_retries2;
445     extern int sysctl_tcp_orphan_retries;
446     extern int sysctl_tcp_syncookies;
447     extern int sysctl_tcp_retrans_collapse;
448     extern int sysctl_tcp_stdurg;
449     extern int sysctl_tcp_rfc1337;
450     extern int sysctl_tcp_tw_recycle;
451     extern int sysctl_tcp_abort_on_overflow;
452     extern int sysctl_tcp_max_orphans;
453     extern int sysctl_tcp_max_tw_buckets;
454     extern int sysctl_tcp_fack;
455     extern int sysctl_tcp_reordering;
456     extern int sysctl_tcp_ecn;
457     extern int sysctl_tcp_dsack;
458     extern int sysctl_tcp_mem[3];
459     extern int sysctl_tcp_wmem[3];
460     extern int sysctl_tcp_rmem[3];
461     extern int sysctl_tcp_app_win;
462     extern int sysctl_tcp_adv_win_scale;
463     
464     extern atomic_t tcp_memory_allocated;
465     extern atomic_t tcp_sockets_allocated;
466     extern int tcp_memory_pressure;
467     
468     struct open_request;
469     
470     struct or_calltable {
471     	int  family;
472     	int  (*rtx_syn_ack)	(struct sock *sk, struct open_request *req, struct dst_entry*);
473     	void (*send_ack)	(struct sk_buff *skb, struct open_request *req);
474     	void (*destructor)	(struct open_request *req);
475     	void (*send_reset)	(struct sk_buff *skb);
476     };
477     
478     struct tcp_v4_open_req {
479     	__u32			loc_addr;
480     	__u32			rmt_addr;
481     	struct ip_options	*opt;
482     };
483     
484     #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
485     struct tcp_v6_open_req {
486     	struct in6_addr		loc_addr;
487     	struct in6_addr		rmt_addr;
488     	struct sk_buff		*pktopts;
489     	int			iif;
490     };
491     #endif
492     
493     /* this structure is too big */
494     struct open_request {
495     	struct open_request	*dl_next; /* Must be first member! */
496     	__u32			rcv_isn;
497     	__u32			snt_isn;
498     	__u16			rmt_port;
499     	__u16			mss;
500     	__u8			retrans;
501     	__u8			index;
502     	__u16	snd_wscale : 4, 
503     		rcv_wscale : 4, 
504     		tstamp_ok : 1,
505     		sack_ok : 1,
506     		wscale_ok : 1,
507     		ecn_ok : 1,
508     		acked : 1;
509     	/* The following two fields can be easily recomputed I think -AK */
510     	__u32			window_clamp;	/* window clamp at creation time */
511     	__u32			rcv_wnd;	/* rcv_wnd offered first time */
512     	__u32			ts_recent;
513     	unsigned long		expires;
514     	struct or_calltable	*class;
515     	struct sock		*sk;
516     	union {
517     		struct tcp_v4_open_req v4_req;
518     #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
519     		struct tcp_v6_open_req v6_req;
520     #endif
521     	} af;
522     };
523     
524     /* SLAB cache for open requests. */
525     extern kmem_cache_t *tcp_openreq_cachep;
526     
527     #define tcp_openreq_alloc()		kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
528     #define tcp_openreq_fastfree(req)	kmem_cache_free(tcp_openreq_cachep, req)
529     
530     static inline void tcp_openreq_free(struct open_request *req)
531     {
532     	req->class->destructor(req);
533     	tcp_openreq_fastfree(req);
534     }
535     
536     #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
537     #define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
538     #else
539     #define TCP_INET_FAMILY(fam) 1
540     #endif
541     
542     /*
543      *	Pointers to address related TCP functions
544      *	(i.e. things that depend on the address family)
545      *
546      * 	BUGGG_FUTURE: all the idea behind this struct is wrong.
547      *	It mixes socket frontend with transport function.
548      *	With port sharing between IPv6/v4 it gives the only advantage,
549      *	only poor IPv6 needs to permanently recheck, that it
550      *	is still IPv6 8)8) It must be cleaned up as soon as possible.
551      *						--ANK (980802)
552      */
553     
554     struct tcp_func {
555     	int			(*queue_xmit)		(struct sk_buff *skb);
556     
557     	void			(*send_check)		(struct sock *sk,
558     							 struct tcphdr *th,
559     							 int len,
560     							 struct sk_buff *skb);
561     
562     	int			(*rebuild_header)	(struct sock *sk);
563     
564     	int			(*conn_request)		(struct sock *sk,
565     							 struct sk_buff *skb);
566     
567     	struct sock *		(*syn_recv_sock)	(struct sock *sk,
568     							 struct sk_buff *skb,
569     							 struct open_request *req,
570     							 struct dst_entry *dst);
571     	
572     	int			(*hash_connecting)	(struct sock *sk);
573     
574     	int			(*remember_stamp)	(struct sock *sk);
575     
576     	__u16			net_header_len;
577     
578     	int			(*setsockopt)		(struct sock *sk, 
579     							 int level, 
580     							 int optname, 
581     							 char *optval, 
582     							 int optlen);
583     
584     	int			(*getsockopt)		(struct sock *sk, 
585     							 int level, 
586     							 int optname, 
587     							 char *optval, 
588     							 int *optlen);
589     
590     
591     	void			(*addr2sockaddr)	(struct sock *sk,
592     							 struct sockaddr *);
593     
594     	int sockaddr_len;
595     };
596     
597     /*
598      * The next routines deal with comparing 32 bit unsigned ints
599      * and worry about wraparound (automatic with unsigned arithmetic).
600      */
601     
602     extern __inline int before(__u32 seq1, __u32 seq2)
603     {
604             return (__s32)(seq1-seq2) < 0;
605     }
606     
607     extern __inline int after(__u32 seq1, __u32 seq2)
608     {
609     	return (__s32)(seq2-seq1) < 0;
610     }
611     
612     
613     /* is s2<=s1<=s3 ? */
614     extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
615     {
616     	return seq3 - seq2 >= seq1 - seq2;
617     }
618     
619     
620     extern struct proto tcp_prot;
621     
622     extern struct tcp_mib tcp_statistics[NR_CPUS*2];
623     #define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
624     #define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
625     #define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(tcp_statistics, field)
626     
627     extern void			tcp_put_port(struct sock *sk);
628     extern void			__tcp_put_port(struct sock *sk);
629     extern void			tcp_inherit_port(struct sock *sk, struct sock *child);
630     
631     extern void			tcp_v4_err(struct sk_buff *skb, u32);
632     
633     extern void			tcp_shutdown (struct sock *sk, int how);
634     
635     extern int			tcp_v4_rcv(struct sk_buff *skb);
636     
637     extern int			tcp_v4_remember_stamp(struct sock *sk);
638     
639     extern int		    	tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
640     
641     extern int			tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
642     extern ssize_t			tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
643     
644     extern int			tcp_ioctl(struct sock *sk, 
645     					  int cmd, 
646     					  unsigned long arg);
647     
648     extern int			tcp_rcv_state_process(struct sock *sk, 
649     						      struct sk_buff *skb,
650     						      struct tcphdr *th,
651     						      unsigned len);
652     
653     extern int			tcp_rcv_established(struct sock *sk, 
654     						    struct sk_buff *skb,
655     						    struct tcphdr *th, 
656     						    unsigned len);
657     
658     enum tcp_ack_state_t
659     {
660     	TCP_ACK_SCHED = 1,
661     	TCP_ACK_TIMER = 2,
662     	TCP_ACK_PUSHED= 4
663     };
664     
665     static inline void tcp_schedule_ack(struct tcp_opt *tp)
666     {
667     	tp->ack.pending |= TCP_ACK_SCHED;
668     }
669     
670     static inline int tcp_ack_scheduled(struct tcp_opt *tp)
671     {
672     	return tp->ack.pending&TCP_ACK_SCHED;
673     }
674     
675     static __inline__ void tcp_dec_quickack_mode(struct tcp_opt *tp)
676     {
677     	if (tp->ack.quick && --tp->ack.quick == 0) {
678     		/* Leaving quickack mode we deflate ATO. */
679     		tp->ack.ato = TCP_ATO_MIN;
680     	}
681     }
682     
683     extern void tcp_enter_quickack_mode(struct tcp_opt *tp);
684     
685     static __inline__ void tcp_delack_init(struct tcp_opt *tp)
686     {
687     	memset(&tp->ack, 0, sizeof(tp->ack));
688     }
689     
690     static inline void tcp_clear_options(struct tcp_opt *tp)
691     {
692      	tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
693     }
694     
695     enum tcp_tw_status
696     {
697     	TCP_TW_SUCCESS = 0,
698     	TCP_TW_RST = 1,
699     	TCP_TW_ACK = 2,
700     	TCP_TW_SYN = 3
701     };
702     
703     
704     extern enum tcp_tw_status	tcp_timewait_state_process(struct tcp_tw_bucket *tw,
705     							   struct sk_buff *skb,
706     							   struct tcphdr *th,
707     							   unsigned len);
708     
709     extern struct sock *		tcp_check_req(struct sock *sk,struct sk_buff *skb,
710     					      struct open_request *req,
711     					      struct open_request **prev);
712     extern int			tcp_child_process(struct sock *parent,
713     						  struct sock *child,
714     						  struct sk_buff *skb);
715     extern void			tcp_enter_loss(struct sock *sk, int how);
716     extern void			tcp_clear_retrans(struct tcp_opt *tp);
717     extern void			tcp_update_metrics(struct sock *sk);
718     
719     extern void			tcp_close(struct sock *sk, 
720     					  long timeout);
721     extern struct sock *		tcp_accept(struct sock *sk, int flags, int *err);
722     extern unsigned int		tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
723     extern void			tcp_write_space(struct sock *sk); 
724     
725     extern int			tcp_getsockopt(struct sock *sk, int level, 
726     					       int optname, char *optval, 
727     					       int *optlen);
728     extern int			tcp_setsockopt(struct sock *sk, int level, 
729     					       int optname, char *optval, 
730     					       int optlen);
731     extern void			tcp_set_keepalive(struct sock *sk, int val);
732     extern int			tcp_recvmsg(struct sock *sk, 
733     					    struct msghdr *msg,
734     					    int len, int nonblock, 
735     					    int flags, int *addr_len);
736     
737     extern int			tcp_listen_start(struct sock *sk);
738     
739     extern void			tcp_parse_options(struct sk_buff *skb,
740     						  struct tcp_opt *tp,
741     						  int estab);
742     
743     /*
744      *	TCP v4 functions exported for the inet6 API
745      */
746     
747     extern int		       	tcp_v4_rebuild_header(struct sock *sk);
748     
749     extern int		       	tcp_v4_build_header(struct sock *sk, 
750     						    struct sk_buff *skb);
751     
752     extern void		       	tcp_v4_send_check(struct sock *sk, 
753     						  struct tcphdr *th, int len, 
754     						  struct sk_buff *skb);
755     
756     extern int			tcp_v4_conn_request(struct sock *sk,
757     						    struct sk_buff *skb);
758     
759     extern struct sock *		tcp_create_openreq_child(struct sock *sk,
760     							 struct open_request *req,
761     							 struct sk_buff *skb);
762     
763     extern struct sock *		tcp_v4_syn_recv_sock(struct sock *sk,
764     						     struct sk_buff *skb,
765     						     struct open_request *req,
766     							struct dst_entry *dst);
767     
768     extern int			tcp_v4_do_rcv(struct sock *sk,
769     					      struct sk_buff *skb);
770     
771     extern int			tcp_v4_connect(struct sock *sk,
772     					       struct sockaddr *uaddr,
773     					       int addr_len);
774     
775     extern int			tcp_connect(struct sock *sk,
776     					    struct sk_buff *skb);
777     
778     extern struct sk_buff *		tcp_make_synack(struct sock *sk,
779     						struct dst_entry *dst,
780     						struct open_request *req);
781     
782     extern int			tcp_disconnect(struct sock *sk, int flags);
783     
784     extern void			tcp_unhash(struct sock *sk);
785     
786     extern int			tcp_v4_hash_connecting(struct sock *sk);
787     
788     
789     /* From syncookies.c */
790     extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
791     				    struct ip_options *opt);
792     extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
793     				     __u16 *mss);
794     
795     /* tcp_output.c */
796     
797     extern int tcp_write_xmit(struct sock *, int nonagle);
798     extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
799     extern void tcp_xmit_retransmit_queue(struct sock *);
800     extern void tcp_simple_retransmit(struct sock *);
801     
802     extern void tcp_send_probe0(struct sock *);
803     extern void tcp_send_partial(struct sock *);
804     extern int  tcp_write_wakeup(struct sock *);
805     extern void tcp_send_fin(struct sock *sk);
806     extern void tcp_send_active_reset(struct sock *sk, int priority);
807     extern int  tcp_send_synack(struct sock *);
808     extern int  tcp_transmit_skb(struct sock *, struct sk_buff *);
809     extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue, unsigned mss_now);
810     extern void tcp_push_one(struct sock *, unsigned mss_now);
811     extern void tcp_send_ack(struct sock *sk);
812     extern void tcp_send_delayed_ack(struct sock *sk);
813     
814     /* tcp_timer.c */
815     extern void tcp_init_xmit_timers(struct sock *);
816     extern void tcp_clear_xmit_timers(struct sock *);
817     
818     extern void tcp_delete_keepalive_timer (struct sock *);
819     extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
820     extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
821     
822     extern const char timer_bug_msg[];
823     
824     
825     static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
826     {
827     	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
828     	
829     	switch (what) {
830     	case TCP_TIME_RETRANS:
831     	case TCP_TIME_PROBE0:
832     		tp->pending = 0;
833     
834     #ifdef TCP_CLEAR_TIMERS
835     		if (timer_pending(&tp->retransmit_timer) &&
836     		    del_timer(&tp->retransmit_timer))
837     			__sock_put(sk);
838     #endif
839     		break;
840     	case TCP_TIME_DACK:
841     		tp->ack.blocked = 0;
842     		tp->ack.pending = 0;
843     
844     #ifdef TCP_CLEAR_TIMERS
845     		if (timer_pending(&tp->delack_timer) &&
846     		    del_timer(&tp->delack_timer))
847     			__sock_put(sk);
848     #endif
849     		break;
850     	default:
851     		printk(timer_bug_msg);
852     		return;
853     	};
854     
855     }
856     
857     /*
858      *	Reset the retransmission timer
859      */
860     static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
861     {
862     	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
863     
864     	if (when > TCP_RTO_MAX) {
865     #ifdef TCP_DEBUG
866     		printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
867     #endif
868     		when = TCP_RTO_MAX;
869     	}
870     
871     	switch (what) {
872     	case TCP_TIME_RETRANS:
873     	case TCP_TIME_PROBE0:
874     		tp->pending = what;
875     		tp->timeout = jiffies+when;
876     		if (!mod_timer(&tp->retransmit_timer, tp->timeout))
877     			sock_hold(sk);
878     		break;
879     
880     	case TCP_TIME_DACK:
881     		tp->ack.pending |= TCP_ACK_TIMER;
882     		tp->ack.timeout = jiffies+when;
883     		if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
884     			sock_hold(sk);
885     		break;
886     
887     	default:
888     		printk(KERN_DEBUG "bug: unknown timer value\n");
889     	};
890     }
891     
892     /* Compute the current effective MSS, taking SACKs and IP options,
893      * and even PMTU discovery events into account.
894      */
895     
896     static __inline__ unsigned int tcp_current_mss(struct sock *sk)
897     {
898     	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
899     	struct dst_entry *dst = __sk_dst_get(sk);
900     	int mss_now = tp->mss_cache; 
901     
902     	if (dst && dst->pmtu != tp->pmtu_cookie)
903     		mss_now = tcp_sync_mss(sk, dst->pmtu);
904     
905     	if (tp->eff_sacks)
906     		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
907     			    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
908     	return mss_now;
909     }
910     
911     /* Initialize RCV_MSS value.
912      * RCV_MSS is an our guess about MSS used by the peer.
913      * We haven't any direct information about the MSS.
914      * It's better to underestimate the RCV_MSS rather than overestimate.
915      * Overestimations make us ACKing less frequently than needed.
916      * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
917      */
918     
919     static inline void tcp_initialize_rcv_mss(struct sock *sk)
920     {
921     	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
922     	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
923     
924     	hint = min_t(unsigned int, hint, tp->rcv_wnd/2);
925     		
926     	tp->ack.rcv_mss = max_t(unsigned int,
927     			      min_t(unsigned int,
928     				  hint, TCP_MIN_RCVMSS),
929     			      TCP_MIN_MSS);
930     }
931     
932     static __inline__ void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd)
933     {
934     	tp->pred_flags = htonl((tp->tcp_header_len << 26) |
935     			       ntohl(TCP_FLAG_ACK) |
936     			       snd_wnd);
937     }
938     
939     static __inline__ void tcp_fast_path_on(struct tcp_opt *tp)
940     {
941     	__tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
942     }
943     
944     static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
945     {
946     	if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
947     	    tp->rcv_wnd &&
948     	    atomic_read(&sk->rmem_alloc) < sk->rcvbuf &&
949     	    !tp->urg_data)
950     		tcp_fast_path_on(tp);
951     }
952     
953     /* Compute the actual receive window we are currently advertising.
954      * Rcv_nxt can be after the window if our peer push more data
955      * than the offered window.
956      */
957     static __inline__ u32 tcp_receive_window(struct tcp_opt *tp)
958     {
959     	s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
960     
961     	if (win < 0)
962     		win = 0;
963     	return (u32) win;
964     }
965     
966     /* Choose a new window, without checks for shrinking, and without
967      * scaling applied to the result.  The caller does these things
968      * if necessary.  This is a "raw" window selection.
969      */
970     extern u32	__tcp_select_window(struct sock *sk);
971     
972     /* TCP timestamps are only 32-bits, this causes a slight
973      * complication on 64-bit systems since we store a snapshot
974      * of jiffies in the buffer control blocks below.  We decidely
975      * only use of the low 32-bits of jiffies and hide the ugly
976      * casts with the following macro.
977      */
978     #define tcp_time_stamp		((__u32)(jiffies))
979     
980     /* This is what the send packet queueing engine uses to pass
981      * TCP per-packet control information to the transmission
982      * code.  We also store the host-order sequence numbers in
983      * here too.  This is 36 bytes on 32-bit architectures,
984      * 40 bytes on 64-bit machines, if this grows please adjust
985      * skbuff.h:skbuff->cb[xxx] size appropriately.
986      */
987     struct tcp_skb_cb {
988     	union {
989     		struct inet_skb_parm	h4;
990     #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
991     		struct inet6_skb_parm	h6;
992     #endif
993     	} header;	/* For incoming frames		*/
994     	__u32		seq;		/* Starting sequence number	*/
995     	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
996     	__u32		when;		/* used to compute rtt's	*/
997     	__u8		flags;		/* TCP header flags.		*/
998     
999     	/* NOTE: These must match up to the flags byte in a
1000     	 *       real TCP header.
1001     	 */
1002     #define TCPCB_FLAG_FIN		0x01
1003     #define TCPCB_FLAG_SYN		0x02
1004     #define TCPCB_FLAG_RST		0x04
1005     #define TCPCB_FLAG_PSH		0x08
1006     #define TCPCB_FLAG_ACK		0x10
1007     #define TCPCB_FLAG_URG		0x20
1008     #define TCPCB_FLAG_ECE		0x40
1009     #define TCPCB_FLAG_CWR		0x80
1010     
1011     	__u8		sacked;		/* State flags for SACK/FACK.	*/
1012     #define TCPCB_SACKED_ACKED	0x01	/* SKB ACK'd by a SACK block	*/
1013     #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
1014     #define TCPCB_LOST		0x04	/* SKB is lost			*/
1015     #define TCPCB_TAGBITS		0x07	/* All tag bits			*/
1016     
1017     #define TCPCB_EVER_RETRANS	0x80	/* Ever retransmitted frame	*/
1018     #define TCPCB_RETRANS		(TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
1019     
1020     #define TCPCB_URG		0x20	/* Urgent pointer advenced here	*/
1021     
1022     #define TCPCB_AT_TAIL		(TCPCB_URG)
1023     
1024     	__u16		urg_ptr;	/* Valid w/URG flags is set.	*/
1025     	__u32		ack_seq;	/* Sequence number ACK'd	*/
1026     };
1027     
1028     #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
1029     
1030     #define for_retrans_queue(skb, sk, tp) \
1031     		for (skb = (sk)->write_queue.next;			\
1032     		     (skb != (tp)->send_head) &&			\
1033     		     (skb != (struct sk_buff *)&(sk)->write_queue);	\
1034     		     skb=skb->next)
1035     
1036     
1037     #include <net/tcp_ecn.h>
1038     
1039     
1040     /*
1041      *	Compute minimal free write space needed to queue new packets. 
1042      */
1043     static inline int tcp_min_write_space(struct sock *sk)
1044     {
1045     	return sk->wmem_queued/2;
1046     }
1047      
1048     static inline int tcp_wspace(struct sock *sk)
1049     {
1050     	return sk->sndbuf - sk->wmem_queued;
1051     }
1052     
1053     
1054     /* This determines how many packets are "in the network" to the best
1055      * of our knowledge.  In many cases it is conservative, but where
1056      * detailed information is available from the receiver (via SACK
1057      * blocks etc.) we can make more aggressive calculations.
1058      *
1059      * Use this for decisions involving congestion control, use just
1060      * tp->packets_out to determine if the send queue is empty or not.
1061      *
1062      * Read this equation as:
1063      *
1064      *	"Packets sent once on transmission queue" MINUS
1065      *	"Packets left network, but not honestly ACKed yet" PLUS
1066      *	"Packets fast retransmitted"
1067      */
1068     static __inline__ int tcp_packets_in_flight(struct tcp_opt *tp)
1069     {
1070     	return tp->packets_out - tp->left_out + tp->retrans_out;
1071     }
1072     
1073     /* Recalculate snd_ssthresh, we want to set it to:
1074      *
1075      * 	one half the current congestion window, but no
1076      *	less than two segments
1077      */
1078     static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
1079     {
1080     	return max_t(u32, tp->snd_cwnd >> 1, 2);
1081     }
1082     
1083     /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
1084      * The exception is rate halving phase, when cwnd is decreasing towards
1085      * ssthresh.
1086      */
1087     static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
1088     {
1089     	if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
1090     		return tp->snd_ssthresh;
1091     	else
1092     		return max_t(u32, tp->snd_ssthresh,
1093     			   ((tp->snd_cwnd >> 1) +
1094     			    (tp->snd_cwnd >> 2)));
1095     }
1096     
1097     static inline void tcp_sync_left_out(struct tcp_opt *tp)
1098     {
1099     	if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
1100     		tp->sacked_out = tp->packets_out - tp->lost_out;
1101     	tp->left_out = tp->sacked_out + tp->lost_out;
1102     }
1103     
1104     extern void tcp_cwnd_application_limited(struct sock *sk);
1105     
1106     /* Congestion window validation. (RFC2861) */
1107     
1108     static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
1109     {
1110     	if (tp->packets_out >= tp->snd_cwnd) {
1111     		/* Network is feed fully. */
1112     		tp->snd_cwnd_used = 0;
1113     		tp->snd_cwnd_stamp = tcp_time_stamp;
1114     	} else {
1115     		/* Network starves. */
1116     		if (tp->packets_out > tp->snd_cwnd_used)
1117     			tp->snd_cwnd_used = tp->packets_out;
1118     
1119     		if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
1120     			tcp_cwnd_application_limited(sk);
1121     	}
1122     }
1123     
1124     /* Set slow start threshould and cwnd not falling to slow start */
1125     static inline void __tcp_enter_cwr(struct tcp_opt *tp)
1126     {
1127     	tp->undo_marker = 0;
1128     	tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
1129     	tp->snd_cwnd = min_t(u32, tp->snd_cwnd,
1130     			   tcp_packets_in_flight(tp) + 1);
1131     	tp->snd_cwnd_cnt = 0;
1132     	tp->high_seq = tp->snd_nxt;
1133     	tp->snd_cwnd_stamp = tcp_time_stamp;
1134     	TCP_ECN_queue_cwr(tp);
1135     }
1136     
1137     static inline void tcp_enter_cwr(struct tcp_opt *tp)
1138     {
1139     	tp->prior_ssthresh = 0;
1140     	if (tp->ca_state < TCP_CA_CWR) {
1141     		__tcp_enter_cwr(tp);
1142     		tp->ca_state = TCP_CA_CWR;
1143     	}
1144     }
1145     
1146     extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
1147     
1148     /* Slow start with delack produces 3 packets of burst, so that
1149      * it is safe "de facto".
1150      */
1151     static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp)
1152     {
1153     	return 3;
1154     }
1155     
1156     static __inline__ int tcp_minshall_check(struct tcp_opt *tp)
1157     {
1158     	return after(tp->snd_sml,tp->snd_una) &&
1159     		!after(tp->snd_sml, tp->snd_nxt);
1160     }
1161     
1162     static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb)
1163     {
1164     	if (skb->len < mss)
1165     		tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1166     }
1167     
1168     /* Return 0, if packet can be sent now without violation Nagle's rules:
1169        1. It is full sized.
1170        2. Or it contains FIN.
1171        3. Or TCP_NODELAY was set.
1172        4. Or TCP_CORK is not set, and all sent packets are ACKed.
1173           With Minshall's modification: all sent small packets are ACKed.
1174      */
1175     
1176     static __inline__ int
1177     tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle)
1178     {
1179     	return (skb->len < mss_now &&
1180     		!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1181     		(nonagle == 2 ||
1182     		 (!nonagle &&
1183     		  tp->packets_out &&
1184     		  tcp_minshall_check(tp))));
1185     }
1186     
1187     /* This checks if the data bearing packet SKB (usually tp->send_head)
1188      * should be put on the wire right now.
1189      */
1190     static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
1191     				   unsigned cur_mss, int nonagle)
1192     {
1193     	/*	RFC 1122 - section 4.2.3.4
1194     	 *
1195     	 *	We must queue if
1196     	 *
1197     	 *	a) The right edge of this frame exceeds the window
1198     	 *	b) There are packets in flight and we have a small segment
1199     	 *	   [SWS avoidance and Nagle algorithm]
1200     	 *	   (part of SWS is done on packetization)
1201     	 *	   Minshall version sounds: there are no _small_
1202     	 *	   segments in flight. (tcp_nagle_check)
1203     	 *	c) We have too many packets 'in flight'
1204     	 *
1205     	 * 	Don't use the nagle rule for urgent data (or
1206     	 *	for the final FIN -DaveM).
1207     	 *
1208     	 *	Also, Nagle rule does not apply to frames, which
1209     	 *	sit in the middle of queue (they have no chances
1210     	 *	to get new data) and if room at tail of skb is
1211     	 *	not enough to save something seriously (<32 for now).
1212     	 */
1213     
1214     	/* Don't be strict about the congestion window for the
1215     	 * final FIN frame.  -DaveM
1216     	 */
1217     	return ((nonagle==1 || tp->urg_mode
1218     		 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
1219     		((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
1220     		 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
1221     		!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
1222     }
1223     
1224     static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
1225     {
1226     	if (!tp->packets_out && !tp->pending)
1227     		tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
1228     }
1229     
1230     static __inline__ int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb)
1231     {
1232     	return (skb->next == (struct sk_buff*)&sk->write_queue);
1233     }
1234     
1235     /* Push out any pending frames which were held back due to
1236      * TCP_CORK or attempt at coalescing tiny packets.
1237      * The socket must be locked by the caller.
1238      */
1239     static __inline__ void __tcp_push_pending_frames(struct sock *sk,
1240     						 struct tcp_opt *tp,
1241     						 unsigned cur_mss,
1242     						 int nonagle)
1243     {
1244     	struct sk_buff *skb = tp->send_head;
1245     
1246     	if (skb) {
1247     		if (!tcp_skb_is_last(sk, skb))
1248     			nonagle = 1;
1249     		if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
1250     		    tcp_write_xmit(sk, nonagle))
1251     			tcp_check_probe_timer(sk, tp);
1252     	}
1253     	tcp_cwnd_validate(sk, tp);
1254     }
1255     
1256     static __inline__ void tcp_push_pending_frames(struct sock *sk,
1257     					       struct tcp_opt *tp)
1258     {
1259     	__tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), tp->nonagle);
1260     }
1261     
1262     static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
1263     {
1264     	struct sk_buff *skb = tp->send_head;
1265     
1266     	return (skb &&
1267     		tcp_snd_test(tp, skb, tcp_current_mss(sk),
1268     			     tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle));
1269     }
1270     
1271     static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq)
1272     {
1273     	tp->snd_wl1 = seq;
1274     }
1275     
1276     static __inline__ void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq)
1277     {
1278     	tp->snd_wl1 = seq;
1279     }
1280     
1281     extern void			tcp_destroy_sock(struct sock *sk);
1282     
1283     
1284     /*
1285      * Calculate(/check) TCP checksum
1286      */
1287     static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
1288     				   unsigned long saddr, unsigned long daddr, 
1289     				   unsigned long base)
1290     {
1291     	return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
1292     }
1293     
1294     static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
1295     {
1296     	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
1297     }
1298     
1299     static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
1300     {
1301     	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
1302     		__tcp_checksum_complete(skb);
1303     }
1304     
1305     /* Prequeue for VJ style copy to user, combined with checksumming. */
1306     
1307     static __inline__ void tcp_prequeue_init(struct tcp_opt *tp)
1308     {
1309     	tp->ucopy.task = NULL;
1310     	tp->ucopy.len = 0;
1311     	tp->ucopy.memory = 0;
1312     	skb_queue_head_init(&tp->ucopy.prequeue);
1313     }
1314     
1315     /* Packet is added to VJ-style prequeue for processing in process
1316      * context, if a reader task is waiting. Apparently, this exciting
1317      * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1318      * failed somewhere. Latency? Burstiness? Well, at least now we will
1319      * see, why it failed. 8)8)				  --ANK
1320      *
1321      * NOTE: is this not too big to inline?
1322      */
1323     static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1324     {
1325     	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1326     
1327     	if (tp->ucopy.task) {
1328     		__skb_queue_tail(&tp->ucopy.prequeue, skb);
1329     		tp->ucopy.memory += skb->truesize;
1330     		if (tp->ucopy.memory > sk->rcvbuf) {
1331     			struct sk_buff *skb1;
1332     
1333     			if (sk->lock.users) BUG();
1334     
1335     			while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1336     				sk->backlog_rcv(sk, skb1);
1337     				NET_INC_STATS_BH(TCPPrequeueDropped);
1338     			}
1339     
1340     			tp->ucopy.memory = 0;
1341     		} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1342     			wake_up_interruptible(sk->sleep);
1343     			if (!tcp_ack_scheduled(tp))
1344     				tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
1345     		}
1346     		return 1;
1347     	}
1348     	return 0;
1349     }
1350     
1351     
1352     #undef STATE_TRACE
1353     
1354     #ifdef STATE_TRACE
1355     static char *statename[]={
1356     	"Unused","Established","Syn Sent","Syn Recv",
1357     	"Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1358     	"Close Wait","Last ACK","Listen","Closing"
1359     };
1360     #endif
1361     
1362     static __inline__ void tcp_set_state(struct sock *sk, int state)
1363     {
1364     	int oldstate = sk->state;
1365     
1366     	switch (state) {
1367     	case TCP_ESTABLISHED:
1368     		if (oldstate != TCP_ESTABLISHED)
1369     			TCP_INC_STATS(TcpCurrEstab);
1370     		break;
1371     
1372     	case TCP_CLOSE:
1373     		sk->prot->unhash(sk);
1374     		if (sk->prev && !(sk->userlocks&SOCK_BINDPORT_LOCK))
1375     			tcp_put_port(sk);
1376     		/* fall through */
1377     	default:
1378     		if (oldstate==TCP_ESTABLISHED)
1379     			tcp_statistics[smp_processor_id()*2+!in_softirq()].TcpCurrEstab--;
1380     	}
1381     
1382     	/* Change state AFTER socket is unhashed to avoid closed
1383     	 * socket sitting in hash tables.
1384     	 */
1385     	sk->state = state;
1386     
1387     #ifdef STATE_TRACE
1388     	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1389     #endif	
1390     }
1391     
1392     static __inline__ void tcp_done(struct sock *sk)
1393     {
1394     	tcp_set_state(sk, TCP_CLOSE);
1395     	tcp_clear_xmit_timers(sk);
1396     
1397     	sk->shutdown = SHUTDOWN_MASK;
1398     
1399     	if (!sk->dead)
1400     		sk->state_change(sk);
1401     	else
1402     		tcp_destroy_sock(sk);
1403     }
1404     
1405     static __inline__ void tcp_sack_reset(struct tcp_opt *tp)
1406     {
1407     	tp->dsack = 0;
1408     	tp->eff_sacks = 0;
1409     	tp->num_sacks = 0;
1410     }
1411     
1412     static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
1413     {
1414     	if (tp->tstamp_ok) {
1415     		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1416     					  (TCPOPT_NOP << 16) |
1417     					  (TCPOPT_TIMESTAMP << 8) |
1418     					  TCPOLEN_TIMESTAMP);
1419     		*ptr++ = htonl(tstamp);
1420     		*ptr++ = htonl(tp->ts_recent);
1421     	}
1422     	if (tp->eff_sacks) {
1423     		struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
1424     		int this_sack;
1425     
1426     		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1427     					  (TCPOPT_NOP << 16) |
1428     					  (TCPOPT_SACK << 8) |
1429     					  (TCPOLEN_SACK_BASE +
1430     					   (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
1431     		for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
1432     			*ptr++ = htonl(sp[this_sack].start_seq);
1433     			*ptr++ = htonl(sp[this_sack].end_seq);
1434     		}
1435     		if (tp->dsack) {
1436     			tp->dsack = 0;
1437     			tp->eff_sacks--;
1438     		}
1439     	}
1440     }
1441     
1442     /* Construct a tcp options header for a SYN or SYN_ACK packet.
1443      * If this is every changed make sure to change the definition of
1444      * MAX_SYN_SIZE to match the new maximum number of options that you
1445      * can generate.
1446      */
1447     static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
1448     					     int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
1449     {
1450     	/* We always get an MSS option.
1451     	 * The option bytes which will be seen in normal data
1452     	 * packets should timestamps be used, must be in the MSS
1453     	 * advertised.  But we subtract them from tp->mss_cache so
1454     	 * that calculations in tcp_sendmsg are simpler etc.
1455     	 * So account for this fact here if necessary.  If we
1456     	 * don't do this correctly, as a receiver we won't
1457     	 * recognize data packets as being full sized when we
1458     	 * should, and thus we won't abide by the delayed ACK
1459     	 * rules correctly.
1460     	 * SACKs don't matter, we never delay an ACK when we
1461     	 * have any of those going out.
1462     	 */
1463     	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
1464     	if (ts) {
1465     		if(sack)
1466     			*ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
1467     						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1468     		else
1469     			*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1470     						  (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1471     		*ptr++ = htonl(tstamp);		/* TSVAL */
1472     		*ptr++ = htonl(ts_recent);	/* TSECR */
1473     	} else if(sack)
1474     		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1475     					  (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
1476     	if (offer_wscale)
1477     		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
1478     }
1479     
1480     /* Determine a window scaling and initial window to offer.
1481      * Based on the assumption that the given amount of space
1482      * will be offered. Store the results in the tp structure.
1483      * NOTE: for smooth operation initial space offering should
1484      * be a multiple of mss if possible. We assume here that mss >= 1.
1485      * This MUST be enforced by all callers.
1486      */
1487     static inline void tcp_select_initial_window(int space, __u32 mss,
1488     	__u32 *rcv_wnd,
1489     	__u32 *window_clamp,
1490     	int wscale_ok,
1491     	__u8 *rcv_wscale)
1492     {
1493     	/* If no clamp set the clamp to the max possible scaled window */
1494     	if (*window_clamp == 0)
1495     		(*window_clamp) = (65535 << 14);
1496     	space = min_t(u32, *window_clamp, space);
1497     
1498     	/* Quantize space offering to a multiple of mss if possible. */
1499     	if (space > mss)
1500     		space = (space / mss) * mss;
1501     
1502     	/* NOTE: offering an initial window larger than 32767
1503     	 * will break some buggy TCP stacks. We try to be nice.
1504     	 * If we are not window scaling, then this truncates
1505     	 * our initial window offering to 32k. There should also
1506     	 * be a sysctl option to stop being nice.
1507     	 */
1508     	(*rcv_wnd) = min_t(int, space, MAX_TCP_WINDOW);
1509     	(*rcv_wscale) = 0;
1510     	if (wscale_ok) {
1511     		/* See RFC1323 for an explanation of the limit to 14 */
1512     		while (space > 65535 && (*rcv_wscale) < 14) {
1513     			space >>= 1;
1514     			(*rcv_wscale)++;
1515     		}
1516     		if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
1517     		    space - max_t(unsigned int, (space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
1518     			(*rcv_wscale)--;
1519     	}
1520     
1521     	/* Set initial window to value enough for senders,
1522     	 * following RFC1414. Senders, not following this RFC,
1523     	 * will be satisfied with 2.
1524     	 */
1525     	if (mss > (1<<*rcv_wscale)) {
1526     		int init_cwnd = 4;
1527     		if (mss > 1460*3)
1528     			init_cwnd = 2;
1529     		else if (mss > 1460)
1530     			init_cwnd = 3;
1531     		if (*rcv_wnd > init_cwnd*mss)
1532     			*rcv_wnd = init_cwnd*mss;
1533     	}
1534     	/* Set the clamp no higher than max representable value */
1535     	(*window_clamp) = min_t(u32, 65535 << (*rcv_wscale), *window_clamp);
1536     }
1537     
1538     static inline int tcp_win_from_space(int space)
1539     {
1540     	return sysctl_tcp_adv_win_scale<=0 ?
1541     		(space>>(-sysctl_tcp_adv_win_scale)) :
1542     		space - (space>>sysctl_tcp_adv_win_scale);
1543     }
1544     
1545     /* Note: caller must be prepared to deal with negative returns */ 
1546     static inline int tcp_space(struct sock *sk)
1547     {
1548     	return tcp_win_from_space(sk->rcvbuf - atomic_read(&sk->rmem_alloc));
1549     } 
1550     
1551     static inline int tcp_full_space( struct sock *sk)
1552     {
1553     	return tcp_win_from_space(sk->rcvbuf); 
1554     }
1555     
1556     static inline void tcp_acceptq_removed(struct sock *sk)
1557     {
1558     	sk->ack_backlog--;
1559     }
1560     
1561     static inline void tcp_acceptq_added(struct sock *sk)
1562     {
1563     	sk->ack_backlog++;
1564     }
1565     
1566     static inline int tcp_acceptq_is_full(struct sock *sk)
1567     {
1568     	return sk->ack_backlog > sk->max_ack_backlog;
1569     }
1570     
1571     static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
1572     					 struct sock *child)
1573     {
1574     	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1575     
1576     	req->sk = child;
1577     	tcp_acceptq_added(sk);
1578     
1579     	if (!tp->accept_queue_tail) {
1580     		tp->accept_queue = req;
1581     	} else {
1582     		tp->accept_queue_tail->dl_next = req;
1583     	}
1584     	tp->accept_queue_tail = req;
1585     	req->dl_next = NULL;
1586     }
1587     
1588     struct tcp_listen_opt
1589     {
1590     	u8			max_qlen_log;	/* log_2 of maximal queued SYNs */
1591     	int			qlen;
1592     	int			qlen_young;
1593     	int			clock_hand;
1594     	struct open_request	*syn_table[TCP_SYNQ_HSIZE];
1595     };
1596     
1597     static inline void
1598     tcp_synq_removed(struct sock *sk, struct open_request *req)
1599     {
1600     	struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
1601     
1602     	if (--lopt->qlen == 0)
1603     		tcp_delete_keepalive_timer(sk);
1604     	if (req->retrans == 0)
1605     		lopt->qlen_young--;
1606     }
1607     
1608     static inline void tcp_synq_added(struct sock *sk)
1609     {
1610     	struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
1611     
1612     	if (lopt->qlen++ == 0)
1613     		tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
1614     	lopt->qlen_young++;
1615     }
1616     
1617     static inline int tcp_synq_len(struct sock *sk)
1618     {
1619     	return sk->tp_pinfo.af_tcp.listen_opt->qlen;
1620     }
1621     
1622     static inline int tcp_synq_young(struct sock *sk)
1623     {
1624     	return sk->tp_pinfo.af_tcp.listen_opt->qlen_young;
1625     }
1626     
1627     static inline int tcp_synq_is_full(struct sock *sk)
1628     {
1629     	return tcp_synq_len(sk)>>sk->tp_pinfo.af_tcp.listen_opt->max_qlen_log;
1630     }
1631     
1632     static inline void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req,
1633     				       struct open_request **prev)
1634     {
1635     	write_lock(&tp->syn_wait_lock);
1636     	*prev = req->dl_next;
1637     	write_unlock(&tp->syn_wait_lock);
1638     }
1639     
1640     static inline void tcp_synq_drop(struct sock *sk, struct open_request *req,
1641     				     struct open_request **prev)
1642     {
1643     	tcp_synq_unlink(&sk->tp_pinfo.af_tcp, req, prev);
1644     	tcp_synq_removed(sk, req);
1645     	tcp_openreq_free(req);
1646     }
1647     
1648     static __inline__ void tcp_openreq_init(struct open_request *req,
1649     					struct tcp_opt *tp,
1650     					struct sk_buff *skb)
1651     {
1652     	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
1653     	req->rcv_isn = TCP_SKB_CB(skb)->seq;
1654     	req->mss = tp->mss_clamp;
1655     	req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
1656     	req->tstamp_ok = tp->tstamp_ok;
1657     	req->sack_ok = tp->sack_ok;
1658     	req->snd_wscale = tp->snd_wscale;
1659     	req->wscale_ok = tp->wscale_ok;
1660     	req->acked = 0;
1661     	req->ecn_ok = 0;
1662     	req->rmt_port = skb->h.th->source;
1663     }
1664     
1665     #define TCP_MEM_QUANTUM	((int)PAGE_SIZE)
1666     
1667     static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb)
1668     {
1669     	sk->tp_pinfo.af_tcp.queue_shrunk = 1;
1670     	sk->wmem_queued -= skb->truesize;
1671     	sk->forward_alloc += skb->truesize;
1672     	__kfree_skb(skb);
1673     }
1674     
1675     static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb)
1676     {
1677     	sk->wmem_queued += skb->truesize;
1678     	sk->forward_alloc -= skb->truesize;
1679     }
1680     
1681     extern void __tcp_mem_reclaim(struct sock *sk);
1682     extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
1683     
1684     static inline void tcp_mem_reclaim(struct sock *sk)
1685     {
1686     	if (sk->forward_alloc >= TCP_MEM_QUANTUM)
1687     		__tcp_mem_reclaim(sk);
1688     }
1689     
1690     static inline void tcp_enter_memory_pressure(void)
1691     {
1692     	if (!tcp_memory_pressure) {
1693     		NET_INC_STATS(TCPMemoryPressures);
1694     		tcp_memory_pressure = 1;
1695     	}
1696     }
1697     
1698     static inline void tcp_moderate_sndbuf(struct sock *sk)
1699     {
1700     	if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) {
1701     		sk->sndbuf = min_t(int, sk->sndbuf, sk->wmem_queued/2);
1702     		sk->sndbuf = max_t(int, sk->sndbuf, SOCK_MIN_SNDBUF);
1703     	}
1704     }
1705     
1706     static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
1707     {
1708     	struct sk_buff *skb = alloc_skb(size+MAX_TCP_HEADER, gfp);
1709     
1710     	if (skb) {
1711     		skb->truesize += mem;
1712     		if (sk->forward_alloc >= (int)skb->truesize ||
1713     		    tcp_mem_schedule(sk, skb->truesize, 0)) {
1714     			skb_reserve(skb, MAX_TCP_HEADER);
1715     			return skb;
1716     		}
1717     		__kfree_skb(skb);
1718     	} else {
1719     		tcp_enter_memory_pressure();
1720     		tcp_moderate_sndbuf(sk);
1721     	}
1722     	return NULL;
1723     }
1724     
1725     static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
1726     {
1727     	return tcp_alloc_pskb(sk, size, 0, gfp);
1728     }
1729     
1730     static inline struct page * tcp_alloc_page(struct sock *sk)
1731     {
1732     	if (sk->forward_alloc >= (int)PAGE_SIZE ||
1733     	    tcp_mem_schedule(sk, PAGE_SIZE, 0)) {
1734     		struct page *page = alloc_pages(sk->allocation, 0);
1735     		if (page)
1736     			return page;
1737     	}
1738     	tcp_enter_memory_pressure();
1739     	tcp_moderate_sndbuf(sk);
1740     	return NULL;
1741     }
1742     
1743     static inline void tcp_writequeue_purge(struct sock *sk)
1744     {
1745     	struct sk_buff *skb;
1746     
1747     	while ((skb = __skb_dequeue(&sk->write_queue)) != NULL)
1748     		tcp_free_skb(sk, skb);
1749     	tcp_mem_reclaim(sk);
1750     }
1751     
1752     extern void tcp_rfree(struct sk_buff *skb);
1753     
1754     static inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
1755     {
1756     	skb->sk = sk;
1757     	skb->destructor = tcp_rfree;
1758     	atomic_add(skb->truesize, &sk->rmem_alloc);
1759     	sk->forward_alloc -= skb->truesize;
1760     }
1761     
1762     extern void tcp_listen_wlock(void);
1763     
1764     /* - We may sleep inside this lock.
1765      * - If sleeping is not required (or called from BH),
1766      *   use plain read_(un)lock(&tcp_lhash_lock).
1767      */
1768     
1769     static inline void tcp_listen_lock(void)
1770     {
1771     	/* read_lock synchronizes to candidates to writers */
1772     	read_lock(&tcp_lhash_lock);
1773     	atomic_inc(&tcp_lhash_users);
1774     	read_unlock(&tcp_lhash_lock);
1775     }
1776     
1777     static inline void tcp_listen_unlock(void)
1778     {
1779     	if (atomic_dec_and_test(&tcp_lhash_users))
1780     		wake_up(&tcp_lhash_wait);
1781     }
1782     
1783     static inline int keepalive_intvl_when(struct tcp_opt *tp)
1784     {
1785     	return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1786     }
1787     
1788     static inline int keepalive_time_when(struct tcp_opt *tp)
1789     {
1790     	return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1791     }
1792     
1793     static inline int tcp_fin_time(struct tcp_opt *tp)
1794     {
1795     	int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
1796     
1797     	if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
1798     		fin_timeout = (tp->rto<<2) - (tp->rto>>1);
1799     
1800     	return fin_timeout;
1801     }
1802     
1803     static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
1804     {
1805     	if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
1806     		return 0;
1807     	if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
1808     		return 0;
1809     
1810     	/* RST segments are not recommended to carry timestamp,
1811     	   and, if they do, it is recommended to ignore PAWS because
1812     	   "their cleanup function should take precedence over timestamps."
1813     	   Certainly, it is mistake. It is necessary to understand the reasons
1814     	   of this constraint to relax it: if peer reboots, clock may go
1815     	   out-of-sync and half-open connections will not be reset.
1816     	   Actually, the problem would be not existing if all
1817     	   the implementations followed draft about maintaining clock
1818     	   via reboots. Linux-2.2 DOES NOT!
1819     
1820     	   However, we can relax time bounds for RST segments to MSL.
1821     	 */
1822     	if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
1823     		return 0;
1824     	return 1;
1825     }
1826     
1827     #define TCP_CHECK_TIMER(sk) do { } while (0);
1828     
1829     #endif	/* _TCP_H */
1830