File: /usr/src/linux/include/net/tcp.h
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Definitions for the TCP module.
7 *
8 * Version: @(#)tcp.h 1.0.5 05/23/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version
16 * 2 of the License, or (at your option) any later version.
17 */
18 #ifndef _TCP_H
19 #define _TCP_H
20
21 #define TCP_DEBUG 1
22 #define FASTRETRANS_DEBUG 1
23
24 /* Cancel timers, when they are not required. */
25 #undef TCP_CLEAR_TIMERS
26
27 #include <linux/config.h>
28 #include <linux/tcp.h>
29 #include <linux/slab.h>
30 #include <net/checksum.h>
31 #include <net/sock.h>
32
33 /* This is for all connections with a full identity, no wildcards.
34 * New scheme, half the table is for TIME_WAIT, the other half is
35 * for the rest. I'll experiment with dynamic table growth later.
36 */
37 struct tcp_ehash_bucket {
38 rwlock_t lock;
39 struct sock *chain;
40 } __attribute__((__aligned__(8)));
41
42 /* This is for listening sockets, thus all sockets which possess wildcards. */
43 #define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
44
45 /* There are a few simple rules, which allow for local port reuse by
46 * an application. In essence:
47 *
48 * 1) Sockets bound to different interfaces may share a local port.
49 * Failing that, goto test 2.
50 * 2) If all sockets have sk->reuse set, and none of them are in
51 * TCP_LISTEN state, the port may be shared.
52 * Failing that, goto test 3.
53 * 3) If all sockets are bound to a specific sk->rcv_saddr local
54 * address, and none of them are the same, the port may be
55 * shared.
56 * Failing this, the port cannot be shared.
57 *
58 * The interesting point, is test #2. This is what an FTP server does
59 * all day. To optimize this case we use a specific flag bit defined
60 * below. As we add sockets to a bind bucket list, we perform a
61 * check of: (newsk->reuse && (newsk->state != TCP_LISTEN))
62 * As long as all sockets added to a bind bucket pass this test,
63 * the flag bit will be set.
64 * The resulting situation is that tcp_v[46]_verify_bind() can just check
65 * for this flag bit, if it is set and the socket trying to bind has
66 * sk->reuse set, we don't even have to walk the owners list at all,
67 * we return that it is ok to bind this socket to the requested local port.
68 *
69 * Sounds like a lot of work, but it is worth it. In a more naive
70 * implementation (ie. current FreeBSD etc.) the entire list of ports
71 * must be walked for each data port opened by an ftp server. Needless
72 * to say, this does not scale at all. With a couple thousand FTP
73 * users logged onto your box, isn't it nice to know that new data
74 * ports are created in O(1) time? I thought so. ;-) -DaveM
75 */
76 struct tcp_bind_bucket {
77 unsigned short port;
78 unsigned short fastreuse;
79 struct tcp_bind_bucket *next;
80 struct sock *owners;
81 struct tcp_bind_bucket **pprev;
82 };
83
84 struct tcp_bind_hashbucket {
85 spinlock_t lock;
86 struct tcp_bind_bucket *chain;
87 };
88
89 extern struct tcp_hashinfo {
90 /* This is for sockets with full identity only. Sockets here will
91 * always be without wildcards and will have the following invariant:
92 *
93 * TCP_ESTABLISHED <= sk->state < TCP_CLOSE
94 *
95 * First half of the table is for sockets not in TIME_WAIT, second half
96 * is for TIME_WAIT sockets only.
97 */
98 struct tcp_ehash_bucket *__tcp_ehash;
99
100 /* Ok, let's try this, I give up, we do need a local binding
101 * TCP hash as well as the others for fast bind/connect.
102 */
103 struct tcp_bind_hashbucket *__tcp_bhash;
104
105 int __tcp_bhash_size;
106 int __tcp_ehash_size;
107
108 /* All sockets in TCP_LISTEN state will be in here. This is the only
109 * table where wildcard'd TCP sockets can exist. Hash function here
110 * is just local port number.
111 */
112 struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE];
113
114 /* All the above members are written once at bootup and
115 * never written again _or_ are predominantly read-access.
116 *
117 * Now align to a new cache line as all the following members
118 * are often dirty.
119 */
120 rwlock_t __tcp_lhash_lock
121 __attribute__((__aligned__(SMP_CACHE_BYTES)));
122 atomic_t __tcp_lhash_users;
123 wait_queue_head_t __tcp_lhash_wait;
124 spinlock_t __tcp_portalloc_lock;
125 } tcp_hashinfo;
126
127 #define tcp_ehash (tcp_hashinfo.__tcp_ehash)
128 #define tcp_bhash (tcp_hashinfo.__tcp_bhash)
129 #define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
130 #define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
131 #define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
132 #define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
133 #define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
134 #define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
135 #define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
136
137 extern kmem_cache_t *tcp_bucket_cachep;
138 extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
139 unsigned short snum);
140 extern void tcp_bucket_unlock(struct sock *sk);
141 extern int tcp_port_rover;
142 extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
143
144 /* These are AF independent. */
145 static __inline__ int tcp_bhashfn(__u16 lport)
146 {
147 return (lport & (tcp_bhash_size - 1));
148 }
149
150 /* This is a TIME_WAIT bucket. It works around the memory consumption
151 * problems of sockets in such a state on heavily loaded servers, but
152 * without violating the protocol specification.
153 */
154 struct tcp_tw_bucket {
155 /* These _must_ match the beginning of struct sock precisely.
156 * XXX Yes I know this is gross, but I'd have to edit every single
157 * XXX networking file if I created a "struct sock_header". -DaveM
158 */
159 __u32 daddr;
160 __u32 rcv_saddr;
161 __u16 dport;
162 unsigned short num;
163 int bound_dev_if;
164 struct sock *next;
165 struct sock **pprev;
166 struct sock *bind_next;
167 struct sock **bind_pprev;
168 unsigned char state,
169 substate; /* "zapped" is replaced with "substate" */
170 __u16 sport;
171 unsigned short family;
172 unsigned char reuse,
173 rcv_wscale; /* It is also TW bucket specific */
174 atomic_t refcnt;
175
176 /* And these are ours. */
177 int hashent;
178 int timeout;
179 __u32 rcv_nxt;
180 __u32 snd_nxt;
181 __u32 rcv_wnd;
182 __u32 ts_recent;
183 long ts_recent_stamp;
184 unsigned long ttd;
185 struct tcp_bind_bucket *tb;
186 struct tcp_tw_bucket *next_death;
187 struct tcp_tw_bucket **pprev_death;
188
189 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
190 struct in6_addr v6_daddr;
191 struct in6_addr v6_rcv_saddr;
192 #endif
193 };
194
195 extern kmem_cache_t *tcp_timewait_cachep;
196
197 static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
198 {
199 if (atomic_dec_and_test(&tw->refcnt)) {
200 #ifdef INET_REFCNT_DEBUG
201 printk(KERN_DEBUG "tw_bucket %p released\n", tw);
202 #endif
203 kmem_cache_free(tcp_timewait_cachep, tw);
204 }
205 }
206
207 extern atomic_t tcp_orphan_count;
208 extern int tcp_tw_count;
209 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
210 extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
211 extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
212 extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
213
214
215 /* Socket demux engine toys. */
216 #ifdef __BIG_ENDIAN
217 #define TCP_COMBINED_PORTS(__sport, __dport) \
218 (((__u32)(__sport)<<16) | (__u32)(__dport))
219 #else /* __LITTLE_ENDIAN */
220 #define TCP_COMBINED_PORTS(__sport, __dport) \
221 (((__u32)(__dport)<<16) | (__u32)(__sport))
222 #endif
223
224 #if (BITS_PER_LONG == 64)
225 #ifdef __BIG_ENDIAN
226 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
227 __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
228 #else /* __LITTLE_ENDIAN */
229 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
230 __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
231 #endif /* __BIG_ENDIAN */
232 #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
233 (((*((__u64 *)&((__sk)->daddr)))== (__cookie)) && \
234 ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
235 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
236 #else /* 32-bit arch */
237 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
238 #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
239 (((__sk)->daddr == (__saddr)) && \
240 ((__sk)->rcv_saddr == (__daddr)) && \
241 ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
242 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
243 #endif /* 64-bit arch */
244
245 #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
246 (((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
247 ((__sk)->family == AF_INET6) && \
248 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr)) && \
249 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr)) && \
250 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
251
252 /* These can have wildcards, don't try too hard. */
253 static __inline__ int tcp_lhashfn(unsigned short num)
254 {
255 return num & (TCP_LHTABLE_SIZE - 1);
256 }
257
258 static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
259 {
260 return tcp_lhashfn(sk->num);
261 }
262
263 #define MAX_TCP_HEADER (128 + MAX_HEADER)
264
265 /*
266 * Never offer a window over 32767 without using window scaling. Some
267 * poor stacks do signed 16bit maths!
268 */
269 #define MAX_TCP_WINDOW 32767
270
271 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
272 #define TCP_MIN_MSS 88
273
274 /* Minimal RCV_MSS. */
275 #define TCP_MIN_RCVMSS 536
276
277 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
278 #define TCP_FASTRETRANS_THRESH 3
279
280 /* Maximal reordering. */
281 #define TCP_MAX_REORDERING 127
282
283 /* Maximal number of ACKs sent quickly to accelerate slow-start. */
284 #define TCP_MAX_QUICKACKS 16
285
286 /* urg_data states */
287 #define TCP_URG_VALID 0x0100
288 #define TCP_URG_NOTYET 0x0200
289 #define TCP_URG_READ 0x0400
290
291 #define TCP_RETR1 3 /*
292 * This is how many retries it does before it
293 * tries to figure out if the gateway is
294 * down. Minimal RFC value is 3; it corresponds
295 * to ~3sec-8min depending on RTO.
296 */
297
298 #define TCP_RETR2 15 /*
299 * This should take at least
300 * 90 minutes to time out.
301 * RFC1122 says that the limit is 100 sec.
302 * 15 is ~13-30min depending on RTO.
303 */
304
305 #define TCP_SYN_RETRIES 5 /* number of times to retry active opening a
306 * connection: ~180sec is RFC minumum */
307
308 #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a
309 * connection: ~180sec is RFC minumum */
310
311
312 #define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned
313 * socket. 7 is ~50sec-16min.
314 */
315
316
317 #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
318 * state, about 60 seconds */
319 #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
320 /* BSD style FIN_WAIT2 deadlock breaker.
321 * It used to be 3min, new value is 60sec,
322 * to combine FIN-WAIT-2 timeout with
323 * TIME-WAIT timer.
324 */
325
326 #define TCP_DELACK_MAX (HZ/5) /* maximal time to delay before sending an ACK */
327 #if HZ >= 100
328 #define TCP_DELACK_MIN (HZ/25) /* minimal time to delay before sending an ACK */
329 #define TCP_ATO_MIN (HZ/25)
330 #else
331 #define TCP_DELACK_MIN 4
332 #define TCP_ATO_MIN 4
333 #endif
334 #define TCP_RTO_MAX (120*HZ)
335 #define TCP_RTO_MIN (HZ/5)
336 #define TCP_TIMEOUT_INIT (3*HZ) /* RFC 1122 initial RTO value */
337
338 #define TCP_RESOURCE_PROBE_INTERVAL (HZ/2) /* Maximal interval between probes
339 * for local resources.
340 */
341
342 #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
343 #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
344 #define TCP_KEEPALIVE_INTVL (75*HZ)
345
346 #define MAX_TCP_KEEPIDLE 32767
347 #define MAX_TCP_KEEPINTVL 32767
348 #define MAX_TCP_KEEPCNT 127
349 #define MAX_TCP_SYNCNT 127
350
351 /* TIME_WAIT reaping mechanism. */
352 #define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
353 #define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
354
355 #define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
356 #define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */
357
358 #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
359 #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
360 * after this time. It should be equal
361 * (or greater than) TCP_TIMEWAIT_LEN
362 * to provide reliability equal to one
363 * provided by timewait state.
364 */
365 #define TCP_PAWS_WINDOW 1 /* Replay window for per-host
366 * timestamps. It must be less than
367 * minimal timewait lifetime.
368 */
369
370 #define TCP_TW_RECYCLE_SLOTS_LOG 5
371 #define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
372
373 /* If time > 4sec, it is "slow" path, no recycling is required,
374 so that we select tick to get range about 4 seconds.
375 */
376
377 #if HZ <= 16 || HZ > 4096
378 # error Unsupported: HZ <= 16 or HZ > 4096
379 #elif HZ <= 32
380 # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
381 #elif HZ <= 64
382 # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
383 #elif HZ <= 128
384 # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
385 #elif HZ <= 256
386 # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
387 #elif HZ <= 512
388 # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
389 #elif HZ <= 1024
390 # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
391 #elif HZ <= 2048
392 # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
393 #else
394 # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
395 #endif
396
397 /*
398 * TCP option
399 */
400
401 #define TCPOPT_NOP 1 /* Padding */
402 #define TCPOPT_EOL 0 /* End of options */
403 #define TCPOPT_MSS 2 /* Segment size negotiating */
404 #define TCPOPT_WINDOW 3 /* Window scaling */
405 #define TCPOPT_SACK_PERM 4 /* SACK Permitted */
406 #define TCPOPT_SACK 5 /* SACK Block */
407 #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
408
409 /*
410 * TCP option lengths
411 */
412
413 #define TCPOLEN_MSS 4
414 #define TCPOLEN_WINDOW 3
415 #define TCPOLEN_SACK_PERM 2
416 #define TCPOLEN_TIMESTAMP 10
417
418 /* But this is what stacks really send out. */
419 #define TCPOLEN_TSTAMP_ALIGNED 12
420 #define TCPOLEN_WSCALE_ALIGNED 4
421 #define TCPOLEN_SACKPERM_ALIGNED 4
422 #define TCPOLEN_SACK_BASE 2
423 #define TCPOLEN_SACK_BASE_ALIGNED 4
424 #define TCPOLEN_SACK_PERBLOCK 8
425
426 #define TCP_TIME_RETRANS 1 /* Retransmit timer */
427 #define TCP_TIME_DACK 2 /* Delayed ack timer */
428 #define TCP_TIME_PROBE0 3 /* Zero window probe timer */
429 #define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */
430
431 /* sysctl variables for tcp */
432 extern int sysctl_max_syn_backlog;
433 extern int sysctl_tcp_timestamps;
434 extern int sysctl_tcp_window_scaling;
435 extern int sysctl_tcp_sack;
436 extern int sysctl_tcp_fin_timeout;
437 extern int sysctl_tcp_tw_recycle;
438 extern int sysctl_tcp_keepalive_time;
439 extern int sysctl_tcp_keepalive_probes;
440 extern int sysctl_tcp_keepalive_intvl;
441 extern int sysctl_tcp_syn_retries;
442 extern int sysctl_tcp_synack_retries;
443 extern int sysctl_tcp_retries1;
444 extern int sysctl_tcp_retries2;
445 extern int sysctl_tcp_orphan_retries;
446 extern int sysctl_tcp_syncookies;
447 extern int sysctl_tcp_retrans_collapse;
448 extern int sysctl_tcp_stdurg;
449 extern int sysctl_tcp_rfc1337;
450 extern int sysctl_tcp_tw_recycle;
451 extern int sysctl_tcp_abort_on_overflow;
452 extern int sysctl_tcp_max_orphans;
453 extern int sysctl_tcp_max_tw_buckets;
454 extern int sysctl_tcp_fack;
455 extern int sysctl_tcp_reordering;
456 extern int sysctl_tcp_ecn;
457 extern int sysctl_tcp_dsack;
458 extern int sysctl_tcp_mem[3];
459 extern int sysctl_tcp_wmem[3];
460 extern int sysctl_tcp_rmem[3];
461 extern int sysctl_tcp_app_win;
462 extern int sysctl_tcp_adv_win_scale;
463
464 extern atomic_t tcp_memory_allocated;
465 extern atomic_t tcp_sockets_allocated;
466 extern int tcp_memory_pressure;
467
468 struct open_request;
469
470 struct or_calltable {
471 int family;
472 int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*);
473 void (*send_ack) (struct sk_buff *skb, struct open_request *req);
474 void (*destructor) (struct open_request *req);
475 void (*send_reset) (struct sk_buff *skb);
476 };
477
478 struct tcp_v4_open_req {
479 __u32 loc_addr;
480 __u32 rmt_addr;
481 struct ip_options *opt;
482 };
483
484 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
485 struct tcp_v6_open_req {
486 struct in6_addr loc_addr;
487 struct in6_addr rmt_addr;
488 struct sk_buff *pktopts;
489 int iif;
490 };
491 #endif
492
493 /* this structure is too big */
494 struct open_request {
495 struct open_request *dl_next; /* Must be first member! */
496 __u32 rcv_isn;
497 __u32 snt_isn;
498 __u16 rmt_port;
499 __u16 mss;
500 __u8 retrans;
501 __u8 index;
502 __u16 snd_wscale : 4,
503 rcv_wscale : 4,
504 tstamp_ok : 1,
505 sack_ok : 1,
506 wscale_ok : 1,
507 ecn_ok : 1,
508 acked : 1;
509 /* The following two fields can be easily recomputed I think -AK */
510 __u32 window_clamp; /* window clamp at creation time */
511 __u32 rcv_wnd; /* rcv_wnd offered first time */
512 __u32 ts_recent;
513 unsigned long expires;
514 struct or_calltable *class;
515 struct sock *sk;
516 union {
517 struct tcp_v4_open_req v4_req;
518 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
519 struct tcp_v6_open_req v6_req;
520 #endif
521 } af;
522 };
523
524 /* SLAB cache for open requests. */
525 extern kmem_cache_t *tcp_openreq_cachep;
526
527 #define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
528 #define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req)
529
530 static inline void tcp_openreq_free(struct open_request *req)
531 {
532 req->class->destructor(req);
533 tcp_openreq_fastfree(req);
534 }
535
536 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
537 #define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
538 #else
539 #define TCP_INET_FAMILY(fam) 1
540 #endif
541
542 /*
543 * Pointers to address related TCP functions
544 * (i.e. things that depend on the address family)
545 *
546 * BUGGG_FUTURE: all the idea behind this struct is wrong.
547 * It mixes socket frontend with transport function.
548 * With port sharing between IPv6/v4 it gives the only advantage,
549 * only poor IPv6 needs to permanently recheck, that it
550 * is still IPv6 8)8) It must be cleaned up as soon as possible.
551 * --ANK (980802)
552 */
553
554 struct tcp_func {
555 int (*queue_xmit) (struct sk_buff *skb);
556
557 void (*send_check) (struct sock *sk,
558 struct tcphdr *th,
559 int len,
560 struct sk_buff *skb);
561
562 int (*rebuild_header) (struct sock *sk);
563
564 int (*conn_request) (struct sock *sk,
565 struct sk_buff *skb);
566
567 struct sock * (*syn_recv_sock) (struct sock *sk,
568 struct sk_buff *skb,
569 struct open_request *req,
570 struct dst_entry *dst);
571
572 int (*hash_connecting) (struct sock *sk);
573
574 int (*remember_stamp) (struct sock *sk);
575
576 __u16 net_header_len;
577
578 int (*setsockopt) (struct sock *sk,
579 int level,
580 int optname,
581 char *optval,
582 int optlen);
583
584 int (*getsockopt) (struct sock *sk,
585 int level,
586 int optname,
587 char *optval,
588 int *optlen);
589
590
591 void (*addr2sockaddr) (struct sock *sk,
592 struct sockaddr *);
593
594 int sockaddr_len;
595 };
596
597 /*
598 * The next routines deal with comparing 32 bit unsigned ints
599 * and worry about wraparound (automatic with unsigned arithmetic).
600 */
601
602 extern __inline int before(__u32 seq1, __u32 seq2)
603 {
604 return (__s32)(seq1-seq2) < 0;
605 }
606
607 extern __inline int after(__u32 seq1, __u32 seq2)
608 {
609 return (__s32)(seq2-seq1) < 0;
610 }
611
612
613 /* is s2<=s1<=s3 ? */
614 extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
615 {
616 return seq3 - seq2 >= seq1 - seq2;
617 }
618
619
620 extern struct proto tcp_prot;
621
622 extern struct tcp_mib tcp_statistics[NR_CPUS*2];
623 #define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
624 #define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
625 #define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
626
627 extern void tcp_put_port(struct sock *sk);
628 extern void __tcp_put_port(struct sock *sk);
629 extern void tcp_inherit_port(struct sock *sk, struct sock *child);
630
631 extern void tcp_v4_err(struct sk_buff *skb, u32);
632
633 extern void tcp_shutdown (struct sock *sk, int how);
634
635 extern int tcp_v4_rcv(struct sk_buff *skb);
636
637 extern int tcp_v4_remember_stamp(struct sock *sk);
638
639 extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
640
641 extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
642 extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
643
644 extern int tcp_ioctl(struct sock *sk,
645 int cmd,
646 unsigned long arg);
647
648 extern int tcp_rcv_state_process(struct sock *sk,
649 struct sk_buff *skb,
650 struct tcphdr *th,
651 unsigned len);
652
653 extern int tcp_rcv_established(struct sock *sk,
654 struct sk_buff *skb,
655 struct tcphdr *th,
656 unsigned len);
657
658 enum tcp_ack_state_t
659 {
660 TCP_ACK_SCHED = 1,
661 TCP_ACK_TIMER = 2,
662 TCP_ACK_PUSHED= 4
663 };
664
665 static inline void tcp_schedule_ack(struct tcp_opt *tp)
666 {
667 tp->ack.pending |= TCP_ACK_SCHED;
668 }
669
670 static inline int tcp_ack_scheduled(struct tcp_opt *tp)
671 {
672 return tp->ack.pending&TCP_ACK_SCHED;
673 }
674
675 static __inline__ void tcp_dec_quickack_mode(struct tcp_opt *tp)
676 {
677 if (tp->ack.quick && --tp->ack.quick == 0) {
678 /* Leaving quickack mode we deflate ATO. */
679 tp->ack.ato = TCP_ATO_MIN;
680 }
681 }
682
683 extern void tcp_enter_quickack_mode(struct tcp_opt *tp);
684
685 static __inline__ void tcp_delack_init(struct tcp_opt *tp)
686 {
687 memset(&tp->ack, 0, sizeof(tp->ack));
688 }
689
690 static inline void tcp_clear_options(struct tcp_opt *tp)
691 {
692 tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
693 }
694
695 enum tcp_tw_status
696 {
697 TCP_TW_SUCCESS = 0,
698 TCP_TW_RST = 1,
699 TCP_TW_ACK = 2,
700 TCP_TW_SYN = 3
701 };
702
703
704 extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw,
705 struct sk_buff *skb,
706 struct tcphdr *th,
707 unsigned len);
708
709 extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
710 struct open_request *req,
711 struct open_request **prev);
712 extern int tcp_child_process(struct sock *parent,
713 struct sock *child,
714 struct sk_buff *skb);
715 extern void tcp_enter_loss(struct sock *sk, int how);
716 extern void tcp_clear_retrans(struct tcp_opt *tp);
717 extern void tcp_update_metrics(struct sock *sk);
718
719 extern void tcp_close(struct sock *sk,
720 long timeout);
721 extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
722 extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
723 extern void tcp_write_space(struct sock *sk);
724
725 extern int tcp_getsockopt(struct sock *sk, int level,
726 int optname, char *optval,
727 int *optlen);
728 extern int tcp_setsockopt(struct sock *sk, int level,
729 int optname, char *optval,
730 int optlen);
731 extern void tcp_set_keepalive(struct sock *sk, int val);
732 extern int tcp_recvmsg(struct sock *sk,
733 struct msghdr *msg,
734 int len, int nonblock,
735 int flags, int *addr_len);
736
737 extern int tcp_listen_start(struct sock *sk);
738
739 extern void tcp_parse_options(struct sk_buff *skb,
740 struct tcp_opt *tp,
741 int estab);
742
743 /*
744 * TCP v4 functions exported for the inet6 API
745 */
746
747 extern int tcp_v4_rebuild_header(struct sock *sk);
748
749 extern int tcp_v4_build_header(struct sock *sk,
750 struct sk_buff *skb);
751
752 extern void tcp_v4_send_check(struct sock *sk,
753 struct tcphdr *th, int len,
754 struct sk_buff *skb);
755
756 extern int tcp_v4_conn_request(struct sock *sk,
757 struct sk_buff *skb);
758
759 extern struct sock * tcp_create_openreq_child(struct sock *sk,
760 struct open_request *req,
761 struct sk_buff *skb);
762
763 extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk,
764 struct sk_buff *skb,
765 struct open_request *req,
766 struct dst_entry *dst);
767
768 extern int tcp_v4_do_rcv(struct sock *sk,
769 struct sk_buff *skb);
770
771 extern int tcp_v4_connect(struct sock *sk,
772 struct sockaddr *uaddr,
773 int addr_len);
774
775 extern int tcp_connect(struct sock *sk,
776 struct sk_buff *skb);
777
778 extern struct sk_buff * tcp_make_synack(struct sock *sk,
779 struct dst_entry *dst,
780 struct open_request *req);
781
782 extern int tcp_disconnect(struct sock *sk, int flags);
783
784 extern void tcp_unhash(struct sock *sk);
785
786 extern int tcp_v4_hash_connecting(struct sock *sk);
787
788
789 /* From syncookies.c */
790 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
791 struct ip_options *opt);
792 extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
793 __u16 *mss);
794
795 /* tcp_output.c */
796
797 extern int tcp_write_xmit(struct sock *, int nonagle);
798 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
799 extern void tcp_xmit_retransmit_queue(struct sock *);
800 extern void tcp_simple_retransmit(struct sock *);
801
802 extern void tcp_send_probe0(struct sock *);
803 extern void tcp_send_partial(struct sock *);
804 extern int tcp_write_wakeup(struct sock *);
805 extern void tcp_send_fin(struct sock *sk);
806 extern void tcp_send_active_reset(struct sock *sk, int priority);
807 extern int tcp_send_synack(struct sock *);
808 extern int tcp_transmit_skb(struct sock *, struct sk_buff *);
809 extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue, unsigned mss_now);
810 extern void tcp_push_one(struct sock *, unsigned mss_now);
811 extern void tcp_send_ack(struct sock *sk);
812 extern void tcp_send_delayed_ack(struct sock *sk);
813
814 /* tcp_timer.c */
815 extern void tcp_init_xmit_timers(struct sock *);
816 extern void tcp_clear_xmit_timers(struct sock *);
817
818 extern void tcp_delete_keepalive_timer (struct sock *);
819 extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
820 extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
821
822 extern const char timer_bug_msg[];
823
824
825 static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
826 {
827 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
828
829 switch (what) {
830 case TCP_TIME_RETRANS:
831 case TCP_TIME_PROBE0:
832 tp->pending = 0;
833
834 #ifdef TCP_CLEAR_TIMERS
835 if (timer_pending(&tp->retransmit_timer) &&
836 del_timer(&tp->retransmit_timer))
837 __sock_put(sk);
838 #endif
839 break;
840 case TCP_TIME_DACK:
841 tp->ack.blocked = 0;
842 tp->ack.pending = 0;
843
844 #ifdef TCP_CLEAR_TIMERS
845 if (timer_pending(&tp->delack_timer) &&
846 del_timer(&tp->delack_timer))
847 __sock_put(sk);
848 #endif
849 break;
850 default:
851 printk(timer_bug_msg);
852 return;
853 };
854
855 }
856
857 /*
858 * Reset the retransmission timer
859 */
860 static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
861 {
862 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
863
864 if (when > TCP_RTO_MAX) {
865 #ifdef TCP_DEBUG
866 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
867 #endif
868 when = TCP_RTO_MAX;
869 }
870
871 switch (what) {
872 case TCP_TIME_RETRANS:
873 case TCP_TIME_PROBE0:
874 tp->pending = what;
875 tp->timeout = jiffies+when;
876 if (!mod_timer(&tp->retransmit_timer, tp->timeout))
877 sock_hold(sk);
878 break;
879
880 case TCP_TIME_DACK:
881 tp->ack.pending |= TCP_ACK_TIMER;
882 tp->ack.timeout = jiffies+when;
883 if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
884 sock_hold(sk);
885 break;
886
887 default:
888 printk(KERN_DEBUG "bug: unknown timer value\n");
889 };
890 }
891
892 /* Compute the current effective MSS, taking SACKs and IP options,
893 * and even PMTU discovery events into account.
894 */
895
896 static __inline__ unsigned int tcp_current_mss(struct sock *sk)
897 {
898 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
899 struct dst_entry *dst = __sk_dst_get(sk);
900 int mss_now = tp->mss_cache;
901
902 if (dst && dst->pmtu != tp->pmtu_cookie)
903 mss_now = tcp_sync_mss(sk, dst->pmtu);
904
905 if (tp->eff_sacks)
906 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
907 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
908 return mss_now;
909 }
910
911 /* Initialize RCV_MSS value.
912 * RCV_MSS is an our guess about MSS used by the peer.
913 * We haven't any direct information about the MSS.
914 * It's better to underestimate the RCV_MSS rather than overestimate.
915 * Overestimations make us ACKing less frequently than needed.
916 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
917 */
918
919 static inline void tcp_initialize_rcv_mss(struct sock *sk)
920 {
921 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
922 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
923
924 hint = min_t(unsigned int, hint, tp->rcv_wnd/2);
925
926 tp->ack.rcv_mss = max_t(unsigned int,
927 min_t(unsigned int,
928 hint, TCP_MIN_RCVMSS),
929 TCP_MIN_MSS);
930 }
931
932 static __inline__ void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd)
933 {
934 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
935 ntohl(TCP_FLAG_ACK) |
936 snd_wnd);
937 }
938
939 static __inline__ void tcp_fast_path_on(struct tcp_opt *tp)
940 {
941 __tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
942 }
943
944 static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
945 {
946 if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
947 tp->rcv_wnd &&
948 atomic_read(&sk->rmem_alloc) < sk->rcvbuf &&
949 !tp->urg_data)
950 tcp_fast_path_on(tp);
951 }
952
953 /* Compute the actual receive window we are currently advertising.
954 * Rcv_nxt can be after the window if our peer push more data
955 * than the offered window.
956 */
957 static __inline__ u32 tcp_receive_window(struct tcp_opt *tp)
958 {
959 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
960
961 if (win < 0)
962 win = 0;
963 return (u32) win;
964 }
965
966 /* Choose a new window, without checks for shrinking, and without
967 * scaling applied to the result. The caller does these things
968 * if necessary. This is a "raw" window selection.
969 */
970 extern u32 __tcp_select_window(struct sock *sk);
971
972 /* TCP timestamps are only 32-bits, this causes a slight
973 * complication on 64-bit systems since we store a snapshot
974 * of jiffies in the buffer control blocks below. We decidely
975 * only use of the low 32-bits of jiffies and hide the ugly
976 * casts with the following macro.
977 */
978 #define tcp_time_stamp ((__u32)(jiffies))
979
980 /* This is what the send packet queueing engine uses to pass
981 * TCP per-packet control information to the transmission
982 * code. We also store the host-order sequence numbers in
983 * here too. This is 36 bytes on 32-bit architectures,
984 * 40 bytes on 64-bit machines, if this grows please adjust
985 * skbuff.h:skbuff->cb[xxx] size appropriately.
986 */
987 struct tcp_skb_cb {
988 union {
989 struct inet_skb_parm h4;
990 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
991 struct inet6_skb_parm h6;
992 #endif
993 } header; /* For incoming frames */
994 __u32 seq; /* Starting sequence number */
995 __u32 end_seq; /* SEQ + FIN + SYN + datalen */
996 __u32 when; /* used to compute rtt's */
997 __u8 flags; /* TCP header flags. */
998
999 /* NOTE: These must match up to the flags byte in a
1000 * real TCP header.
1001 */
1002 #define TCPCB_FLAG_FIN 0x01
1003 #define TCPCB_FLAG_SYN 0x02
1004 #define TCPCB_FLAG_RST 0x04
1005 #define TCPCB_FLAG_PSH 0x08
1006 #define TCPCB_FLAG_ACK 0x10
1007 #define TCPCB_FLAG_URG 0x20
1008 #define TCPCB_FLAG_ECE 0x40
1009 #define TCPCB_FLAG_CWR 0x80
1010
1011 __u8 sacked; /* State flags for SACK/FACK. */
1012 #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
1013 #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
1014 #define TCPCB_LOST 0x04 /* SKB is lost */
1015 #define TCPCB_TAGBITS 0x07 /* All tag bits */
1016
1017 #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
1018 #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
1019
1020 #define TCPCB_URG 0x20 /* Urgent pointer advenced here */
1021
1022 #define TCPCB_AT_TAIL (TCPCB_URG)
1023
1024 __u16 urg_ptr; /* Valid w/URG flags is set. */
1025 __u32 ack_seq; /* Sequence number ACK'd */
1026 };
1027
1028 #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
1029
1030 #define for_retrans_queue(skb, sk, tp) \
1031 for (skb = (sk)->write_queue.next; \
1032 (skb != (tp)->send_head) && \
1033 (skb != (struct sk_buff *)&(sk)->write_queue); \
1034 skb=skb->next)
1035
1036
1037 #include <net/tcp_ecn.h>
1038
1039
1040 /*
1041 * Compute minimal free write space needed to queue new packets.
1042 */
1043 static inline int tcp_min_write_space(struct sock *sk)
1044 {
1045 return sk->wmem_queued/2;
1046 }
1047
1048 static inline int tcp_wspace(struct sock *sk)
1049 {
1050 return sk->sndbuf - sk->wmem_queued;
1051 }
1052
1053
1054 /* This determines how many packets are "in the network" to the best
1055 * of our knowledge. In many cases it is conservative, but where
1056 * detailed information is available from the receiver (via SACK
1057 * blocks etc.) we can make more aggressive calculations.
1058 *
1059 * Use this for decisions involving congestion control, use just
1060 * tp->packets_out to determine if the send queue is empty or not.
1061 *
1062 * Read this equation as:
1063 *
1064 * "Packets sent once on transmission queue" MINUS
1065 * "Packets left network, but not honestly ACKed yet" PLUS
1066 * "Packets fast retransmitted"
1067 */
1068 static __inline__ int tcp_packets_in_flight(struct tcp_opt *tp)
1069 {
1070 return tp->packets_out - tp->left_out + tp->retrans_out;
1071 }
1072
1073 /* Recalculate snd_ssthresh, we want to set it to:
1074 *
1075 * one half the current congestion window, but no
1076 * less than two segments
1077 */
1078 static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
1079 {
1080 return max_t(u32, tp->snd_cwnd >> 1, 2);
1081 }
1082
1083 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
1084 * The exception is rate halving phase, when cwnd is decreasing towards
1085 * ssthresh.
1086 */
1087 static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
1088 {
1089 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
1090 return tp->snd_ssthresh;
1091 else
1092 return max_t(u32, tp->snd_ssthresh,
1093 ((tp->snd_cwnd >> 1) +
1094 (tp->snd_cwnd >> 2)));
1095 }
1096
1097 static inline void tcp_sync_left_out(struct tcp_opt *tp)
1098 {
1099 if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
1100 tp->sacked_out = tp->packets_out - tp->lost_out;
1101 tp->left_out = tp->sacked_out + tp->lost_out;
1102 }
1103
1104 extern void tcp_cwnd_application_limited(struct sock *sk);
1105
1106 /* Congestion window validation. (RFC2861) */
1107
1108 static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
1109 {
1110 if (tp->packets_out >= tp->snd_cwnd) {
1111 /* Network is feed fully. */
1112 tp->snd_cwnd_used = 0;
1113 tp->snd_cwnd_stamp = tcp_time_stamp;
1114 } else {
1115 /* Network starves. */
1116 if (tp->packets_out > tp->snd_cwnd_used)
1117 tp->snd_cwnd_used = tp->packets_out;
1118
1119 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
1120 tcp_cwnd_application_limited(sk);
1121 }
1122 }
1123
1124 /* Set slow start threshould and cwnd not falling to slow start */
1125 static inline void __tcp_enter_cwr(struct tcp_opt *tp)
1126 {
1127 tp->undo_marker = 0;
1128 tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
1129 tp->snd_cwnd = min_t(u32, tp->snd_cwnd,
1130 tcp_packets_in_flight(tp) + 1);
1131 tp->snd_cwnd_cnt = 0;
1132 tp->high_seq = tp->snd_nxt;
1133 tp->snd_cwnd_stamp = tcp_time_stamp;
1134 TCP_ECN_queue_cwr(tp);
1135 }
1136
1137 static inline void tcp_enter_cwr(struct tcp_opt *tp)
1138 {
1139 tp->prior_ssthresh = 0;
1140 if (tp->ca_state < TCP_CA_CWR) {
1141 __tcp_enter_cwr(tp);
1142 tp->ca_state = TCP_CA_CWR;
1143 }
1144 }
1145
1146 extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
1147
1148 /* Slow start with delack produces 3 packets of burst, so that
1149 * it is safe "de facto".
1150 */
1151 static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp)
1152 {
1153 return 3;
1154 }
1155
1156 static __inline__ int tcp_minshall_check(struct tcp_opt *tp)
1157 {
1158 return after(tp->snd_sml,tp->snd_una) &&
1159 !after(tp->snd_sml, tp->snd_nxt);
1160 }
1161
1162 static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb)
1163 {
1164 if (skb->len < mss)
1165 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1166 }
1167
1168 /* Return 0, if packet can be sent now without violation Nagle's rules:
1169 1. It is full sized.
1170 2. Or it contains FIN.
1171 3. Or TCP_NODELAY was set.
1172 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1173 With Minshall's modification: all sent small packets are ACKed.
1174 */
1175
1176 static __inline__ int
1177 tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle)
1178 {
1179 return (skb->len < mss_now &&
1180 !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1181 (nonagle == 2 ||
1182 (!nonagle &&
1183 tp->packets_out &&
1184 tcp_minshall_check(tp))));
1185 }
1186
1187 /* This checks if the data bearing packet SKB (usually tp->send_head)
1188 * should be put on the wire right now.
1189 */
1190 static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
1191 unsigned cur_mss, int nonagle)
1192 {
1193 /* RFC 1122 - section 4.2.3.4
1194 *
1195 * We must queue if
1196 *
1197 * a) The right edge of this frame exceeds the window
1198 * b) There are packets in flight and we have a small segment
1199 * [SWS avoidance and Nagle algorithm]
1200 * (part of SWS is done on packetization)
1201 * Minshall version sounds: there are no _small_
1202 * segments in flight. (tcp_nagle_check)
1203 * c) We have too many packets 'in flight'
1204 *
1205 * Don't use the nagle rule for urgent data (or
1206 * for the final FIN -DaveM).
1207 *
1208 * Also, Nagle rule does not apply to frames, which
1209 * sit in the middle of queue (they have no chances
1210 * to get new data) and if room at tail of skb is
1211 * not enough to save something seriously (<32 for now).
1212 */
1213
1214 /* Don't be strict about the congestion window for the
1215 * final FIN frame. -DaveM
1216 */
1217 return ((nonagle==1 || tp->urg_mode
1218 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
1219 ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
1220 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
1221 !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
1222 }
1223
1224 static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
1225 {
1226 if (!tp->packets_out && !tp->pending)
1227 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
1228 }
1229
1230 static __inline__ int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb)
1231 {
1232 return (skb->next == (struct sk_buff*)&sk->write_queue);
1233 }
1234
1235 /* Push out any pending frames which were held back due to
1236 * TCP_CORK or attempt at coalescing tiny packets.
1237 * The socket must be locked by the caller.
1238 */
1239 static __inline__ void __tcp_push_pending_frames(struct sock *sk,
1240 struct tcp_opt *tp,
1241 unsigned cur_mss,
1242 int nonagle)
1243 {
1244 struct sk_buff *skb = tp->send_head;
1245
1246 if (skb) {
1247 if (!tcp_skb_is_last(sk, skb))
1248 nonagle = 1;
1249 if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
1250 tcp_write_xmit(sk, nonagle))
1251 tcp_check_probe_timer(sk, tp);
1252 }
1253 tcp_cwnd_validate(sk, tp);
1254 }
1255
1256 static __inline__ void tcp_push_pending_frames(struct sock *sk,
1257 struct tcp_opt *tp)
1258 {
1259 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), tp->nonagle);
1260 }
1261
1262 static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
1263 {
1264 struct sk_buff *skb = tp->send_head;
1265
1266 return (skb &&
1267 tcp_snd_test(tp, skb, tcp_current_mss(sk),
1268 tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle));
1269 }
1270
1271 static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq)
1272 {
1273 tp->snd_wl1 = seq;
1274 }
1275
1276 static __inline__ void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq)
1277 {
1278 tp->snd_wl1 = seq;
1279 }
1280
1281 extern void tcp_destroy_sock(struct sock *sk);
1282
1283
1284 /*
1285 * Calculate(/check) TCP checksum
1286 */
1287 static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
1288 unsigned long saddr, unsigned long daddr,
1289 unsigned long base)
1290 {
1291 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
1292 }
1293
1294 static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
1295 {
1296 return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
1297 }
1298
1299 static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
1300 {
1301 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
1302 __tcp_checksum_complete(skb);
1303 }
1304
1305 /* Prequeue for VJ style copy to user, combined with checksumming. */
1306
1307 static __inline__ void tcp_prequeue_init(struct tcp_opt *tp)
1308 {
1309 tp->ucopy.task = NULL;
1310 tp->ucopy.len = 0;
1311 tp->ucopy.memory = 0;
1312 skb_queue_head_init(&tp->ucopy.prequeue);
1313 }
1314
1315 /* Packet is added to VJ-style prequeue for processing in process
1316 * context, if a reader task is waiting. Apparently, this exciting
1317 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1318 * failed somewhere. Latency? Burstiness? Well, at least now we will
1319 * see, why it failed. 8)8) --ANK
1320 *
1321 * NOTE: is this not too big to inline?
1322 */
1323 static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1324 {
1325 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1326
1327 if (tp->ucopy.task) {
1328 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1329 tp->ucopy.memory += skb->truesize;
1330 if (tp->ucopy.memory > sk->rcvbuf) {
1331 struct sk_buff *skb1;
1332
1333 if (sk->lock.users) BUG();
1334
1335 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1336 sk->backlog_rcv(sk, skb1);
1337 NET_INC_STATS_BH(TCPPrequeueDropped);
1338 }
1339
1340 tp->ucopy.memory = 0;
1341 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1342 wake_up_interruptible(sk->sleep);
1343 if (!tcp_ack_scheduled(tp))
1344 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
1345 }
1346 return 1;
1347 }
1348 return 0;
1349 }
1350
1351
1352 #undef STATE_TRACE
1353
1354 #ifdef STATE_TRACE
1355 static char *statename[]={
1356 "Unused","Established","Syn Sent","Syn Recv",
1357 "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1358 "Close Wait","Last ACK","Listen","Closing"
1359 };
1360 #endif
1361
1362 static __inline__ void tcp_set_state(struct sock *sk, int state)
1363 {
1364 int oldstate = sk->state;
1365
1366 switch (state) {
1367 case TCP_ESTABLISHED:
1368 if (oldstate != TCP_ESTABLISHED)
1369 TCP_INC_STATS(TcpCurrEstab);
1370 break;
1371
1372 case TCP_CLOSE:
1373 sk->prot->unhash(sk);
1374 if (sk->prev && !(sk->userlocks&SOCK_BINDPORT_LOCK))
1375 tcp_put_port(sk);
1376 /* fall through */
1377 default:
1378 if (oldstate==TCP_ESTABLISHED)
1379 tcp_statistics[smp_processor_id()*2+!in_softirq()].TcpCurrEstab--;
1380 }
1381
1382 /* Change state AFTER socket is unhashed to avoid closed
1383 * socket sitting in hash tables.
1384 */
1385 sk->state = state;
1386
1387 #ifdef STATE_TRACE
1388 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1389 #endif
1390 }
1391
1392 static __inline__ void tcp_done(struct sock *sk)
1393 {
1394 tcp_set_state(sk, TCP_CLOSE);
1395 tcp_clear_xmit_timers(sk);
1396
1397 sk->shutdown = SHUTDOWN_MASK;
1398
1399 if (!sk->dead)
1400 sk->state_change(sk);
1401 else
1402 tcp_destroy_sock(sk);
1403 }
1404
1405 static __inline__ void tcp_sack_reset(struct tcp_opt *tp)
1406 {
1407 tp->dsack = 0;
1408 tp->eff_sacks = 0;
1409 tp->num_sacks = 0;
1410 }
1411
1412 static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
1413 {
1414 if (tp->tstamp_ok) {
1415 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1416 (TCPOPT_NOP << 16) |
1417 (TCPOPT_TIMESTAMP << 8) |
1418 TCPOLEN_TIMESTAMP);
1419 *ptr++ = htonl(tstamp);
1420 *ptr++ = htonl(tp->ts_recent);
1421 }
1422 if (tp->eff_sacks) {
1423 struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
1424 int this_sack;
1425
1426 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1427 (TCPOPT_NOP << 16) |
1428 (TCPOPT_SACK << 8) |
1429 (TCPOLEN_SACK_BASE +
1430 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
1431 for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
1432 *ptr++ = htonl(sp[this_sack].start_seq);
1433 *ptr++ = htonl(sp[this_sack].end_seq);
1434 }
1435 if (tp->dsack) {
1436 tp->dsack = 0;
1437 tp->eff_sacks--;
1438 }
1439 }
1440 }
1441
1442 /* Construct a tcp options header for a SYN or SYN_ACK packet.
1443 * If this is every changed make sure to change the definition of
1444 * MAX_SYN_SIZE to match the new maximum number of options that you
1445 * can generate.
1446 */
1447 static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
1448 int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
1449 {
1450 /* We always get an MSS option.
1451 * The option bytes which will be seen in normal data
1452 * packets should timestamps be used, must be in the MSS
1453 * advertised. But we subtract them from tp->mss_cache so
1454 * that calculations in tcp_sendmsg are simpler etc.
1455 * So account for this fact here if necessary. If we
1456 * don't do this correctly, as a receiver we won't
1457 * recognize data packets as being full sized when we
1458 * should, and thus we won't abide by the delayed ACK
1459 * rules correctly.
1460 * SACKs don't matter, we never delay an ACK when we
1461 * have any of those going out.
1462 */
1463 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
1464 if (ts) {
1465 if(sack)
1466 *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
1467 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1468 else
1469 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1470 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1471 *ptr++ = htonl(tstamp); /* TSVAL */
1472 *ptr++ = htonl(ts_recent); /* TSECR */
1473 } else if(sack)
1474 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1475 (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
1476 if (offer_wscale)
1477 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
1478 }
1479
1480 /* Determine a window scaling and initial window to offer.
1481 * Based on the assumption that the given amount of space
1482 * will be offered. Store the results in the tp structure.
1483 * NOTE: for smooth operation initial space offering should
1484 * be a multiple of mss if possible. We assume here that mss >= 1.
1485 * This MUST be enforced by all callers.
1486 */
1487 static inline void tcp_select_initial_window(int space, __u32 mss,
1488 __u32 *rcv_wnd,
1489 __u32 *window_clamp,
1490 int wscale_ok,
1491 __u8 *rcv_wscale)
1492 {
1493 /* If no clamp set the clamp to the max possible scaled window */
1494 if (*window_clamp == 0)
1495 (*window_clamp) = (65535 << 14);
1496 space = min_t(u32, *window_clamp, space);
1497
1498 /* Quantize space offering to a multiple of mss if possible. */
1499 if (space > mss)
1500 space = (space / mss) * mss;
1501
1502 /* NOTE: offering an initial window larger than 32767
1503 * will break some buggy TCP stacks. We try to be nice.
1504 * If we are not window scaling, then this truncates
1505 * our initial window offering to 32k. There should also
1506 * be a sysctl option to stop being nice.
1507 */
1508 (*rcv_wnd) = min_t(int, space, MAX_TCP_WINDOW);
1509 (*rcv_wscale) = 0;
1510 if (wscale_ok) {
1511 /* See RFC1323 for an explanation of the limit to 14 */
1512 while (space > 65535 && (*rcv_wscale) < 14) {
1513 space >>= 1;
1514 (*rcv_wscale)++;
1515 }
1516 if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
1517 space - max_t(unsigned int, (space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
1518 (*rcv_wscale)--;
1519 }
1520
1521 /* Set initial window to value enough for senders,
1522 * following RFC1414. Senders, not following this RFC,
1523 * will be satisfied with 2.
1524 */
1525 if (mss > (1<<*rcv_wscale)) {
1526 int init_cwnd = 4;
1527 if (mss > 1460*3)
1528 init_cwnd = 2;
1529 else if (mss > 1460)
1530 init_cwnd = 3;
1531 if (*rcv_wnd > init_cwnd*mss)
1532 *rcv_wnd = init_cwnd*mss;
1533 }
1534 /* Set the clamp no higher than max representable value */
1535 (*window_clamp) = min_t(u32, 65535 << (*rcv_wscale), *window_clamp);
1536 }
1537
1538 static inline int tcp_win_from_space(int space)
1539 {
1540 return sysctl_tcp_adv_win_scale<=0 ?
1541 (space>>(-sysctl_tcp_adv_win_scale)) :
1542 space - (space>>sysctl_tcp_adv_win_scale);
1543 }
1544
1545 /* Note: caller must be prepared to deal with negative returns */
1546 static inline int tcp_space(struct sock *sk)
1547 {
1548 return tcp_win_from_space(sk->rcvbuf - atomic_read(&sk->rmem_alloc));
1549 }
1550
1551 static inline int tcp_full_space( struct sock *sk)
1552 {
1553 return tcp_win_from_space(sk->rcvbuf);
1554 }
1555
1556 static inline void tcp_acceptq_removed(struct sock *sk)
1557 {
1558 sk->ack_backlog--;
1559 }
1560
1561 static inline void tcp_acceptq_added(struct sock *sk)
1562 {
1563 sk->ack_backlog++;
1564 }
1565
1566 static inline int tcp_acceptq_is_full(struct sock *sk)
1567 {
1568 return sk->ack_backlog > sk->max_ack_backlog;
1569 }
1570
1571 static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
1572 struct sock *child)
1573 {
1574 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1575
1576 req->sk = child;
1577 tcp_acceptq_added(sk);
1578
1579 if (!tp->accept_queue_tail) {
1580 tp->accept_queue = req;
1581 } else {
1582 tp->accept_queue_tail->dl_next = req;
1583 }
1584 tp->accept_queue_tail = req;
1585 req->dl_next = NULL;
1586 }
1587
1588 struct tcp_listen_opt
1589 {
1590 u8 max_qlen_log; /* log_2 of maximal queued SYNs */
1591 int qlen;
1592 int qlen_young;
1593 int clock_hand;
1594 struct open_request *syn_table[TCP_SYNQ_HSIZE];
1595 };
1596
1597 static inline void
1598 tcp_synq_removed(struct sock *sk, struct open_request *req)
1599 {
1600 struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
1601
1602 if (--lopt->qlen == 0)
1603 tcp_delete_keepalive_timer(sk);
1604 if (req->retrans == 0)
1605 lopt->qlen_young--;
1606 }
1607
1608 static inline void tcp_synq_added(struct sock *sk)
1609 {
1610 struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
1611
1612 if (lopt->qlen++ == 0)
1613 tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
1614 lopt->qlen_young++;
1615 }
1616
1617 static inline int tcp_synq_len(struct sock *sk)
1618 {
1619 return sk->tp_pinfo.af_tcp.listen_opt->qlen;
1620 }
1621
1622 static inline int tcp_synq_young(struct sock *sk)
1623 {
1624 return sk->tp_pinfo.af_tcp.listen_opt->qlen_young;
1625 }
1626
1627 static inline int tcp_synq_is_full(struct sock *sk)
1628 {
1629 return tcp_synq_len(sk)>>sk->tp_pinfo.af_tcp.listen_opt->max_qlen_log;
1630 }
1631
1632 static inline void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req,
1633 struct open_request **prev)
1634 {
1635 write_lock(&tp->syn_wait_lock);
1636 *prev = req->dl_next;
1637 write_unlock(&tp->syn_wait_lock);
1638 }
1639
1640 static inline void tcp_synq_drop(struct sock *sk, struct open_request *req,
1641 struct open_request **prev)
1642 {
1643 tcp_synq_unlink(&sk->tp_pinfo.af_tcp, req, prev);
1644 tcp_synq_removed(sk, req);
1645 tcp_openreq_free(req);
1646 }
1647
1648 static __inline__ void tcp_openreq_init(struct open_request *req,
1649 struct tcp_opt *tp,
1650 struct sk_buff *skb)
1651 {
1652 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
1653 req->rcv_isn = TCP_SKB_CB(skb)->seq;
1654 req->mss = tp->mss_clamp;
1655 req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
1656 req->tstamp_ok = tp->tstamp_ok;
1657 req->sack_ok = tp->sack_ok;
1658 req->snd_wscale = tp->snd_wscale;
1659 req->wscale_ok = tp->wscale_ok;
1660 req->acked = 0;
1661 req->ecn_ok = 0;
1662 req->rmt_port = skb->h.th->source;
1663 }
1664
1665 #define TCP_MEM_QUANTUM ((int)PAGE_SIZE)
1666
1667 static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb)
1668 {
1669 sk->tp_pinfo.af_tcp.queue_shrunk = 1;
1670 sk->wmem_queued -= skb->truesize;
1671 sk->forward_alloc += skb->truesize;
1672 __kfree_skb(skb);
1673 }
1674
1675 static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb)
1676 {
1677 sk->wmem_queued += skb->truesize;
1678 sk->forward_alloc -= skb->truesize;
1679 }
1680
1681 extern void __tcp_mem_reclaim(struct sock *sk);
1682 extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
1683
1684 static inline void tcp_mem_reclaim(struct sock *sk)
1685 {
1686 if (sk->forward_alloc >= TCP_MEM_QUANTUM)
1687 __tcp_mem_reclaim(sk);
1688 }
1689
1690 static inline void tcp_enter_memory_pressure(void)
1691 {
1692 if (!tcp_memory_pressure) {
1693 NET_INC_STATS(TCPMemoryPressures);
1694 tcp_memory_pressure = 1;
1695 }
1696 }
1697
1698 static inline void tcp_moderate_sndbuf(struct sock *sk)
1699 {
1700 if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) {
1701 sk->sndbuf = min_t(int, sk->sndbuf, sk->wmem_queued/2);
1702 sk->sndbuf = max_t(int, sk->sndbuf, SOCK_MIN_SNDBUF);
1703 }
1704 }
1705
1706 static inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
1707 {
1708 struct sk_buff *skb = alloc_skb(size+MAX_TCP_HEADER, gfp);
1709
1710 if (skb) {
1711 skb->truesize += mem;
1712 if (sk->forward_alloc >= (int)skb->truesize ||
1713 tcp_mem_schedule(sk, skb->truesize, 0)) {
1714 skb_reserve(skb, MAX_TCP_HEADER);
1715 return skb;
1716 }
1717 __kfree_skb(skb);
1718 } else {
1719 tcp_enter_memory_pressure();
1720 tcp_moderate_sndbuf(sk);
1721 }
1722 return NULL;
1723 }
1724
1725 static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
1726 {
1727 return tcp_alloc_pskb(sk, size, 0, gfp);
1728 }
1729
1730 static inline struct page * tcp_alloc_page(struct sock *sk)
1731 {
1732 if (sk->forward_alloc >= (int)PAGE_SIZE ||
1733 tcp_mem_schedule(sk, PAGE_SIZE, 0)) {
1734 struct page *page = alloc_pages(sk->allocation, 0);
1735 if (page)
1736 return page;
1737 }
1738 tcp_enter_memory_pressure();
1739 tcp_moderate_sndbuf(sk);
1740 return NULL;
1741 }
1742
1743 static inline void tcp_writequeue_purge(struct sock *sk)
1744 {
1745 struct sk_buff *skb;
1746
1747 while ((skb = __skb_dequeue(&sk->write_queue)) != NULL)
1748 tcp_free_skb(sk, skb);
1749 tcp_mem_reclaim(sk);
1750 }
1751
1752 extern void tcp_rfree(struct sk_buff *skb);
1753
1754 static inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
1755 {
1756 skb->sk = sk;
1757 skb->destructor = tcp_rfree;
1758 atomic_add(skb->truesize, &sk->rmem_alloc);
1759 sk->forward_alloc -= skb->truesize;
1760 }
1761
1762 extern void tcp_listen_wlock(void);
1763
1764 /* - We may sleep inside this lock.
1765 * - If sleeping is not required (or called from BH),
1766 * use plain read_(un)lock(&tcp_lhash_lock).
1767 */
1768
1769 static inline void tcp_listen_lock(void)
1770 {
1771 /* read_lock synchronizes to candidates to writers */
1772 read_lock(&tcp_lhash_lock);
1773 atomic_inc(&tcp_lhash_users);
1774 read_unlock(&tcp_lhash_lock);
1775 }
1776
1777 static inline void tcp_listen_unlock(void)
1778 {
1779 if (atomic_dec_and_test(&tcp_lhash_users))
1780 wake_up(&tcp_lhash_wait);
1781 }
1782
1783 static inline int keepalive_intvl_when(struct tcp_opt *tp)
1784 {
1785 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1786 }
1787
1788 static inline int keepalive_time_when(struct tcp_opt *tp)
1789 {
1790 return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1791 }
1792
1793 static inline int tcp_fin_time(struct tcp_opt *tp)
1794 {
1795 int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
1796
1797 if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
1798 fin_timeout = (tp->rto<<2) - (tp->rto>>1);
1799
1800 return fin_timeout;
1801 }
1802
1803 static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
1804 {
1805 if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
1806 return 0;
1807 if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
1808 return 0;
1809
1810 /* RST segments are not recommended to carry timestamp,
1811 and, if they do, it is recommended to ignore PAWS because
1812 "their cleanup function should take precedence over timestamps."
1813 Certainly, it is mistake. It is necessary to understand the reasons
1814 of this constraint to relax it: if peer reboots, clock may go
1815 out-of-sync and half-open connections will not be reset.
1816 Actually, the problem would be not existing if all
1817 the implementations followed draft about maintaining clock
1818 via reboots. Linux-2.2 DOES NOT!
1819
1820 However, we can relax time bounds for RST segments to MSL.
1821 */
1822 if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
1823 return 0;
1824 return 1;
1825 }
1826
1827 #define TCP_CHECK_TIMER(sk) do { } while (0);
1828
1829 #endif /* _TCP_H */
1830