File: /usr/src/linux/net/core/skbuff.c
1 /*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
7 * Version: $Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
8 *
9 * Fixes:
10 * Alan Cox : Fixed the worst of the load balancer bugs.
11 * Dave Platt : Interrupt stacking fix.
12 * Richard Kooijman : Timestamp fixes.
13 * Alan Cox : Changed buffer format.
14 * Alan Cox : destructor hook for AF_UNIX etc.
15 * Linus Torvalds : Better skb_clone.
16 * Alan Cox : Added skb_copy.
17 * Alan Cox : Added all the changed routines Linus
18 * only put in the headers
19 * Ray VanTassle : Fixed --skb->lock in free
20 * Alan Cox : skb_copy copy arp field
21 * Andi Kleen : slabified it.
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
34
35 /*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
38
39 #include <linux/config.h>
40 #include <linux/types.h>
41 #include <linux/kernel.h>
42 #include <linux/sched.h>
43 #include <linux/mm.h>
44 #include <linux/interrupt.h>
45 #include <linux/in.h>
46 #include <linux/inet.h>
47 #include <linux/slab.h>
48 #include <linux/netdevice.h>
49 #include <linux/string.h>
50 #include <linux/skbuff.h>
51 #include <linux/cache.h>
52 #include <linux/init.h>
53 #include <linux/highmem.h>
54
55 #include <net/ip.h>
56 #include <net/protocol.h>
57 #include <net/dst.h>
58 #include <net/tcp.h>
59 #include <net/udp.h>
60 #include <net/sock.h>
61
62 #include <asm/uaccess.h>
63 #include <asm/system.h>
64
65 int sysctl_hot_list_len = 128;
66
67 static kmem_cache_t *skbuff_head_cache;
68
69 static union {
70 struct sk_buff_head list;
71 char pad[SMP_CACHE_BYTES];
72 } skb_head_pool[NR_CPUS];
73
74 /*
75 * Keep out-of-line to prevent kernel bloat.
76 * __builtin_return_address is not used because it is not always
77 * reliable.
78 */
79
80 /**
81 * skb_over_panic - private function
82 * @skb: buffer
83 * @sz: size
84 * @here: address
85 *
86 * Out of line support code for skb_put(). Not user callable.
87 */
88
89 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
90 {
91 printk("skput:over: %p:%d put:%d dev:%s",
92 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
93 BUG();
94 }
95
96 /**
97 * skb_under_panic - private function
98 * @skb: buffer
99 * @sz: size
100 * @here: address
101 *
102 * Out of line support code for skb_push(). Not user callable.
103 */
104
105
106 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
107 {
108 printk("skput:under: %p:%d put:%d dev:%s",
109 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
110 BUG();
111 }
112
113 static __inline__ struct sk_buff *skb_head_from_pool(void)
114 {
115 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
116
117 if (skb_queue_len(list)) {
118 struct sk_buff *skb;
119 unsigned long flags;
120
121 local_irq_save(flags);
122 skb = __skb_dequeue(list);
123 local_irq_restore(flags);
124 return skb;
125 }
126 return NULL;
127 }
128
129 static __inline__ void skb_head_to_pool(struct sk_buff *skb)
130 {
131 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
132
133 if (skb_queue_len(list) < sysctl_hot_list_len) {
134 unsigned long flags;
135
136 local_irq_save(flags);
137 __skb_queue_head(list, skb);
138 local_irq_restore(flags);
139
140 return;
141 }
142 kmem_cache_free(skbuff_head_cache, skb);
143 }
144
145
146 /* Allocate a new skbuff. We do this ourselves so we can fill in a few
147 * 'private' fields and also do memory statistics to find all the
148 * [BEEP] leaks.
149 *
150 */
151
152 /**
153 * alloc_skb - allocate a network buffer
154 * @size: size to allocate
155 * @gfp_mask: allocation mask
156 *
157 * Allocate a new &sk_buff. The returned buffer has no headroom and a
158 * tail room of size bytes. The object has a reference count of one.
159 * The return is the buffer. On a failure the return is %NULL.
160 *
161 * Buffers may only be allocated from interrupts using a @gfp_mask of
162 * %GFP_ATOMIC.
163 */
164
165 struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
166 {
167 struct sk_buff *skb;
168 u8 *data;
169
170 if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
171 static int count = 0;
172 if (++count < 5) {
173 printk(KERN_ERR "alloc_skb called nonatomically "
174 "from interrupt %p\n", NET_CALLER(size));
175 BUG();
176 }
177 gfp_mask &= ~__GFP_WAIT;
178 }
179
180 /* Get the HEAD */
181 skb = skb_head_from_pool();
182 if (skb == NULL) {
183 skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
184 if (skb == NULL)
185 goto nohead;
186 }
187
188 /* Get the DATA. Size must match skb_add_mtu(). */
189 size = SKB_DATA_ALIGN(size);
190 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
191 if (data == NULL)
192 goto nodata;
193
194 /* XXX: does not include slab overhead */
195 skb->truesize = size + sizeof(struct sk_buff);
196
197 /* Load the data pointers. */
198 skb->head = data;
199 skb->data = data;
200 skb->tail = data;
201 skb->end = data + size;
202
203 /* Set up other state */
204 skb->len = 0;
205 skb->cloned = 0;
206 skb->data_len = 0;
207
208 atomic_set(&skb->users, 1);
209 atomic_set(&(skb_shinfo(skb)->dataref), 1);
210 skb_shinfo(skb)->nr_frags = 0;
211 skb_shinfo(skb)->frag_list = NULL;
212 return skb;
213
214 nodata:
215 skb_head_to_pool(skb);
216 nohead:
217 return NULL;
218 }
219
220
221 /*
222 * Slab constructor for a skb head.
223 */
224 static inline void skb_headerinit(void *p, kmem_cache_t *cache,
225 unsigned long flags)
226 {
227 struct sk_buff *skb = p;
228
229 skb->next = NULL;
230 skb->prev = NULL;
231 skb->list = NULL;
232 skb->sk = NULL;
233 skb->stamp.tv_sec=0; /* No idea about time */
234 skb->dev = NULL;
235 skb->dst = NULL;
236 memset(skb->cb, 0, sizeof(skb->cb));
237 skb->pkt_type = PACKET_HOST; /* Default type */
238 skb->ip_summed = 0;
239 skb->priority = 0;
240 skb->security = 0; /* By default packets are insecure */
241 skb->destructor = NULL;
242
243 #ifdef CONFIG_NETFILTER
244 skb->nfmark = skb->nfcache = 0;
245 skb->nfct = NULL;
246 #ifdef CONFIG_NETFILTER_DEBUG
247 skb->nf_debug = 0;
248 #endif
249 #endif
250 #ifdef CONFIG_NET_SCHED
251 skb->tc_index = 0;
252 #endif
253 }
254
255 static void skb_drop_fraglist(struct sk_buff *skb)
256 {
257 struct sk_buff *list = skb_shinfo(skb)->frag_list;
258
259 skb_shinfo(skb)->frag_list = NULL;
260
261 do {
262 struct sk_buff *this = list;
263 list = list->next;
264 kfree_skb(this);
265 } while (list);
266 }
267
268 static void skb_clone_fraglist(struct sk_buff *skb)
269 {
270 struct sk_buff *list;
271
272 for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
273 skb_get(list);
274 }
275
276 static void skb_release_data(struct sk_buff *skb)
277 {
278 if (!skb->cloned ||
279 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
280 if (skb_shinfo(skb)->nr_frags) {
281 int i;
282 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
283 put_page(skb_shinfo(skb)->frags[i].page);
284 }
285
286 if (skb_shinfo(skb)->frag_list)
287 skb_drop_fraglist(skb);
288
289 kfree(skb->head);
290 }
291 }
292
293 /*
294 * Free an skbuff by memory without cleaning the state.
295 */
296 void kfree_skbmem(struct sk_buff *skb)
297 {
298 skb_release_data(skb);
299 skb_head_to_pool(skb);
300 }
301
302 /**
303 * __kfree_skb - private function
304 * @skb: buffer
305 *
306 * Free an sk_buff. Release anything attached to the buffer.
307 * Clean the state. This is an internal helper function. Users should
308 * always call kfree_skb
309 */
310
311 void __kfree_skb(struct sk_buff *skb)
312 {
313 if (skb->list) {
314 printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
315 "on a list (from %p).\n", NET_CALLER(skb));
316 BUG();
317 }
318
319 dst_release(skb->dst);
320 if(skb->destructor) {
321 if (in_irq()) {
322 printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
323 NET_CALLER(skb));
324 }
325 skb->destructor(skb);
326 }
327 #ifdef CONFIG_NETFILTER
328 nf_conntrack_put(skb->nfct);
329 #endif
330 skb_headerinit(skb, NULL, 0); /* clean state */
331 kfree_skbmem(skb);
332 }
333
334 /**
335 * skb_clone - duplicate an sk_buff
336 * @skb: buffer to clone
337 * @gfp_mask: allocation priority
338 *
339 * Duplicate an &sk_buff. The new one is not owned by a socket. Both
340 * copies share the same packet data but not structure. The new
341 * buffer has a reference count of 1. If the allocation fails the
342 * function returns %NULL otherwise the new buffer is returned.
343 *
344 * If this function is called from an interrupt gfp_mask() must be
345 * %GFP_ATOMIC.
346 */
347
348 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
349 {
350 struct sk_buff *n;
351
352 n = skb_head_from_pool();
353 if (!n) {
354 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
355 if (!n)
356 return NULL;
357 }
358
359 #define C(x) n->x = skb->x
360
361 n->next = n->prev = NULL;
362 n->list = NULL;
363 n->sk = NULL;
364 C(stamp);
365 C(dev);
366 C(h);
367 C(nh);
368 C(mac);
369 C(dst);
370 dst_clone(n->dst);
371 memcpy(n->cb, skb->cb, sizeof(skb->cb));
372 C(len);
373 C(data_len);
374 C(csum);
375 n->cloned = 1;
376 C(pkt_type);
377 C(ip_summed);
378 C(priority);
379 atomic_set(&n->users, 1);
380 C(protocol);
381 C(security);
382 C(truesize);
383 C(head);
384 C(data);
385 C(tail);
386 C(end);
387 n->destructor = NULL;
388 #ifdef CONFIG_NETFILTER
389 C(nfmark);
390 C(nfcache);
391 C(nfct);
392 #ifdef CONFIG_NETFILTER_DEBUG
393 C(nf_debug);
394 #endif
395 #endif /*CONFIG_NETFILTER*/
396 #if defined(CONFIG_HIPPI)
397 C(private);
398 #endif
399 #ifdef CONFIG_NET_SCHED
400 C(tc_index);
401 #endif
402
403 atomic_inc(&(skb_shinfo(skb)->dataref));
404 skb->cloned = 1;
405 #ifdef CONFIG_NETFILTER
406 nf_conntrack_get(skb->nfct);
407 #endif
408 return n;
409 }
410
411 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
412 {
413 /*
414 * Shift between the two data areas in bytes
415 */
416 unsigned long offset = new->data - old->data;
417
418 new->list=NULL;
419 new->sk=NULL;
420 new->dev=old->dev;
421 new->priority=old->priority;
422 new->protocol=old->protocol;
423 new->dst=dst_clone(old->dst);
424 new->h.raw=old->h.raw+offset;
425 new->nh.raw=old->nh.raw+offset;
426 new->mac.raw=old->mac.raw+offset;
427 memcpy(new->cb, old->cb, sizeof(old->cb));
428 atomic_set(&new->users, 1);
429 new->pkt_type=old->pkt_type;
430 new->stamp=old->stamp;
431 new->destructor = NULL;
432 new->security=old->security;
433 #ifdef CONFIG_NETFILTER
434 new->nfmark=old->nfmark;
435 new->nfcache=old->nfcache;
436 new->nfct=old->nfct;
437 nf_conntrack_get(new->nfct);
438 #ifdef CONFIG_NETFILTER_DEBUG
439 new->nf_debug=old->nf_debug;
440 #endif
441 #endif
442 #ifdef CONFIG_NET_SCHED
443 new->tc_index = old->tc_index;
444 #endif
445 }
446
447 /**
448 * skb_copy - create private copy of an sk_buff
449 * @skb: buffer to copy
450 * @gfp_mask: allocation priority
451 *
452 * Make a copy of both an &sk_buff and its data. This is used when the
453 * caller wishes to modify the data and needs a private copy of the
454 * data to alter. Returns %NULL on failure or the pointer to the buffer
455 * on success. The returned buffer has a reference count of 1.
456 *
457 * As by-product this function converts non-linear &sk_buff to linear
458 * one, so that &sk_buff becomes completely private and caller is allowed
459 * to modify all the data of returned buffer. This means that this
460 * function is not recommended for use in circumstances when only
461 * header is going to be modified. Use pskb_copy() instead.
462 */
463
464 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
465 {
466 struct sk_buff *n;
467 int headerlen = skb->data-skb->head;
468
469 /*
470 * Allocate the copy buffer
471 */
472 n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
473 if(n==NULL)
474 return NULL;
475
476 /* Set the data pointer */
477 skb_reserve(n,headerlen);
478 /* Set the tail pointer and length */
479 skb_put(n,skb->len);
480 n->csum = skb->csum;
481 n->ip_summed = skb->ip_summed;
482
483 if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
484 BUG();
485
486 copy_skb_header(n, skb);
487
488 return n;
489 }
490
491 /* Keep head the same: replace data */
492 int skb_linearize(struct sk_buff *skb, int gfp_mask)
493 {
494 unsigned int size;
495 u8 *data;
496 long offset;
497 int headerlen = skb->data - skb->head;
498 int expand = (skb->tail+skb->data_len) - skb->end;
499
500 if (skb_shared(skb))
501 BUG();
502
503 if (expand <= 0)
504 expand = 0;
505
506 size = (skb->end - skb->head + expand);
507 size = SKB_DATA_ALIGN(size);
508 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
509 if (data == NULL)
510 return -ENOMEM;
511
512 /* Copy entire thing */
513 if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
514 BUG();
515
516 /* Offset between the two in bytes */
517 offset = data - skb->head;
518
519 /* Free old data. */
520 skb_release_data(skb);
521
522 skb->head = data;
523 skb->end = data + size;
524
525 /* Set up new pointers */
526 skb->h.raw += offset;
527 skb->nh.raw += offset;
528 skb->mac.raw += offset;
529 skb->tail += offset;
530 skb->data += offset;
531
532 /* Set up shinfo */
533 atomic_set(&(skb_shinfo(skb)->dataref), 1);
534 skb_shinfo(skb)->nr_frags = 0;
535 skb_shinfo(skb)->frag_list = NULL;
536
537 /* We are no longer a clone, even if we were. */
538 skb->cloned = 0;
539
540 skb->tail += skb->data_len;
541 skb->data_len = 0;
542 return 0;
543 }
544
545
546 /**
547 * pskb_copy - create copy of an sk_buff with private head.
548 * @skb: buffer to copy
549 * @gfp_mask: allocation priority
550 *
551 * Make a copy of both an &sk_buff and part of its data, located
552 * in header. Fragmented data remain shared. This is used when
553 * the caller wishes to modify only header of &sk_buff and needs
554 * private copy of the header to alter. Returns %NULL on failure
555 * or the pointer to the buffer on success.
556 * The returned buffer has a reference count of 1.
557 */
558
559 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
560 {
561 struct sk_buff *n;
562
563 /*
564 * Allocate the copy buffer
565 */
566 n=alloc_skb(skb->end - skb->head, gfp_mask);
567 if(n==NULL)
568 return NULL;
569
570 /* Set the data pointer */
571 skb_reserve(n,skb->data-skb->head);
572 /* Set the tail pointer and length */
573 skb_put(n,skb_headlen(skb));
574 /* Copy the bytes */
575 memcpy(n->data, skb->data, n->len);
576 n->csum = skb->csum;
577 n->ip_summed = skb->ip_summed;
578
579 n->data_len = skb->data_len;
580 n->len = skb->len;
581
582 if (skb_shinfo(skb)->nr_frags) {
583 int i;
584
585 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
586 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
587 get_page(skb_shinfo(n)->frags[i].page);
588 }
589 skb_shinfo(n)->nr_frags = i;
590 }
591
592 if (skb_shinfo(skb)->frag_list) {
593 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
594 skb_clone_fraglist(n);
595 }
596
597 copy_skb_header(n, skb);
598
599 return n;
600 }
601
602 /**
603 * pskb_expand_head - reallocate header of &sk_buff
604 * @skb: buffer to reallocate
605 * @nhead: room to add at head
606 * @ntail: room to add at tail
607 * @gfp_mask: allocation priority
608 *
609 * Expands (or creates identical copy, if &nhead and &ntail are zero)
610 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
611 * reference count of 1. Returns zero in the case of success or error,
612 * if expansion failed. In the last case, &sk_buff is not changed.
613 *
614 * All the pointers pointing into skb header may change and must be
615 * reloaded after call to this function.
616 */
617
618 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
619 {
620 int i;
621 u8 *data;
622 int size = nhead + (skb->end - skb->head) + ntail;
623 long off;
624
625 if (skb_shared(skb))
626 BUG();
627
628 size = SKB_DATA_ALIGN(size);
629
630 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
631 if (data == NULL)
632 goto nodata;
633
634 /* Copy only real data... and, alas, header. This should be
635 * optimized for the cases when header is void. */
636 memcpy(data+nhead, skb->head, skb->tail-skb->head);
637 memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
638
639 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
640 get_page(skb_shinfo(skb)->frags[i].page);
641
642 if (skb_shinfo(skb)->frag_list)
643 skb_clone_fraglist(skb);
644
645 skb_release_data(skb);
646
647 off = (data+nhead) - skb->head;
648
649 skb->head = data;
650 skb->end = data+size;
651
652 skb->data += off;
653 skb->tail += off;
654 skb->mac.raw += off;
655 skb->h.raw += off;
656 skb->nh.raw += off;
657 skb->cloned = 0;
658 atomic_set(&skb_shinfo(skb)->dataref, 1);
659 return 0;
660
661 nodata:
662 return -ENOMEM;
663 }
664
665 /* Make private copy of skb with writable head and some headroom */
666
667 struct sk_buff *
668 skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
669 {
670 struct sk_buff *skb2;
671 int delta = headroom - skb_headroom(skb);
672
673 if (delta <= 0)
674 return pskb_copy(skb, GFP_ATOMIC);
675
676 skb2 = skb_clone(skb, GFP_ATOMIC);
677 if (skb2 == NULL ||
678 !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
679 return skb2;
680
681 kfree_skb(skb2);
682 return NULL;
683 }
684
685
686 /**
687 * skb_copy_expand - copy and expand sk_buff
688 * @skb: buffer to copy
689 * @newheadroom: new free bytes at head
690 * @newtailroom: new free bytes at tail
691 * @gfp_mask: allocation priority
692 *
693 * Make a copy of both an &sk_buff and its data and while doing so
694 * allocate additional space.
695 *
696 * This is used when the caller wishes to modify the data and needs a
697 * private copy of the data to alter as well as more space for new fields.
698 * Returns %NULL on failure or the pointer to the buffer
699 * on success. The returned buffer has a reference count of 1.
700 *
701 * You must pass %GFP_ATOMIC as the allocation priority if this function
702 * is called from an interrupt.
703 */
704
705
706 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
707 int newheadroom,
708 int newtailroom,
709 int gfp_mask)
710 {
711 struct sk_buff *n;
712
713 /*
714 * Allocate the copy buffer
715 */
716
717 n=alloc_skb(newheadroom + skb->len + newtailroom,
718 gfp_mask);
719 if(n==NULL)
720 return NULL;
721
722 skb_reserve(n,newheadroom);
723
724 /* Set the tail pointer and length */
725 skb_put(n,skb->len);
726
727 /* Copy the data only. */
728 if (skb_copy_bits(skb, 0, n->data, skb->len))
729 BUG();
730
731 copy_skb_header(n, skb);
732 return n;
733 }
734
735 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
736 * If realloc==0 and trimming is impossible without change of data,
737 * it is BUG().
738 */
739
740 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
741 {
742 int offset = skb_headlen(skb);
743 int nfrags = skb_shinfo(skb)->nr_frags;
744 int i;
745
746 for (i=0; i<nfrags; i++) {
747 int end = offset + skb_shinfo(skb)->frags[i].size;
748 if (end > len) {
749 if (skb_cloned(skb)) {
750 if (!realloc)
751 BUG();
752 if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
753 return -ENOMEM;
754 }
755 if (len <= offset) {
756 put_page(skb_shinfo(skb)->frags[i].page);
757 skb_shinfo(skb)->nr_frags--;
758 } else {
759 skb_shinfo(skb)->frags[i].size = len-offset;
760 }
761 }
762 offset = end;
763 }
764
765 if (offset < len) {
766 skb->data_len -= skb->len - len;
767 skb->len = len;
768 } else {
769 if (len <= skb_headlen(skb)) {
770 skb->len = len;
771 skb->data_len = 0;
772 skb->tail = skb->data + len;
773 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
774 skb_drop_fraglist(skb);
775 } else {
776 skb->data_len -= skb->len - len;
777 skb->len = len;
778 }
779 }
780
781 return 0;
782 }
783
784 /**
785 * __pskb_pull_tail - advance tail of skb header
786 * @skb: buffer to reallocate
787 * @delta: number of bytes to advance tail
788 *
789 * The function makes a sense only on a fragmented &sk_buff,
790 * it expands header moving its tail forward and copying necessary
791 * data from fragmented part.
792 *
793 * &sk_buff MUST have reference count of 1.
794 *
795 * Returns %NULL (and &sk_buff does not change) if pull failed
796 * or value of new tail of skb in the case of success.
797 *
798 * All the pointers pointing into skb header may change and must be
799 * reloaded after call to this function.
800 */
801
802 /* Moves tail of skb head forward, copying data from fragmented part,
803 * when it is necessary.
804 * 1. It may fail due to malloc failure.
805 * 2. It may change skb pointers.
806 *
807 * It is pretty complicated. Luckily, it is called only in exceptional cases.
808 */
809 unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
810 {
811 int i, k, eat;
812
813 /* If skb has not enough free space at tail, get new one
814 * plus 128 bytes for future expansions. If we have enough
815 * room at tail, reallocate without expansion only if skb is cloned.
816 */
817 eat = (skb->tail+delta) - skb->end;
818
819 if (eat > 0 || skb_cloned(skb)) {
820 if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
821 return NULL;
822 }
823
824 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
825 BUG();
826
827 /* Optimization: no fragments, no reasons to preestimate
828 * size of pulled pages. Superb.
829 */
830 if (skb_shinfo(skb)->frag_list == NULL)
831 goto pull_pages;
832
833 /* Estimate size of pulled pages. */
834 eat = delta;
835 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
836 if (skb_shinfo(skb)->frags[i].size >= eat)
837 goto pull_pages;
838 eat -= skb_shinfo(skb)->frags[i].size;
839 }
840
841 /* If we need update frag list, we are in troubles.
842 * Certainly, it possible to add an offset to skb data,
843 * but taking into account that pulling is expected to
844 * be very rare operation, it is worth to fight against
845 * further bloating skb head and crucify ourselves here instead.
846 * Pure masohism, indeed. 8)8)
847 */
848 if (eat) {
849 struct sk_buff *list = skb_shinfo(skb)->frag_list;
850 struct sk_buff *clone = NULL;
851 struct sk_buff *insp = NULL;
852
853 do {
854 if (list == NULL)
855 BUG();
856
857 if (list->len <= eat) {
858 /* Eaten as whole. */
859 eat -= list->len;
860 list = list->next;
861 insp = list;
862 } else {
863 /* Eaten partially. */
864
865 if (skb_shared(list)) {
866 /* Sucks! We need to fork list. :-( */
867 clone = skb_clone(list, GFP_ATOMIC);
868 if (clone == NULL)
869 return NULL;
870 insp = list->next;
871 list = clone;
872 } else {
873 /* This may be pulled without
874 * problems. */
875 insp = list;
876 }
877 if (pskb_pull(list, eat) == NULL) {
878 if (clone)
879 kfree_skb(clone);
880 return NULL;
881 }
882 break;
883 }
884 } while (eat);
885
886 /* Free pulled out fragments. */
887 while ((list = skb_shinfo(skb)->frag_list) != insp) {
888 skb_shinfo(skb)->frag_list = list->next;
889 kfree_skb(list);
890 }
891 /* And insert new clone at head. */
892 if (clone) {
893 clone->next = list;
894 skb_shinfo(skb)->frag_list = clone;
895 }
896 }
897 /* Success! Now we may commit changes to skb data. */
898
899 pull_pages:
900 eat = delta;
901 k = 0;
902 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
903 if (skb_shinfo(skb)->frags[i].size <= eat) {
904 put_page(skb_shinfo(skb)->frags[i].page);
905 eat -= skb_shinfo(skb)->frags[i].size;
906 } else {
907 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
908 if (eat) {
909 skb_shinfo(skb)->frags[k].page_offset += eat;
910 skb_shinfo(skb)->frags[k].size -= eat;
911 eat = 0;
912 }
913 k++;
914 }
915 }
916 skb_shinfo(skb)->nr_frags = k;
917
918 skb->tail += delta;
919 skb->data_len -= delta;
920
921 return skb->tail;
922 }
923
924 /* Copy some data bits from skb to kernel buffer. */
925
926 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
927 {
928 int i, copy;
929 int start = skb->len - skb->data_len;
930
931 if (offset > (int)skb->len-len)
932 goto fault;
933
934 /* Copy header. */
935 if ((copy = start-offset) > 0) {
936 if (copy > len)
937 copy = len;
938 memcpy(to, skb->data + offset, copy);
939 if ((len -= copy) == 0)
940 return 0;
941 offset += copy;
942 to += copy;
943 }
944
945 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
946 int end;
947
948 BUG_TRAP(start <= offset+len);
949
950 end = start + skb_shinfo(skb)->frags[i].size;
951 if ((copy = end-offset) > 0) {
952 u8 *vaddr;
953
954 if (copy > len)
955 copy = len;
956
957 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
958 memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
959 offset-start, copy);
960 kunmap_skb_frag(vaddr);
961
962 if ((len -= copy) == 0)
963 return 0;
964 offset += copy;
965 to += copy;
966 }
967 start = end;
968 }
969
970 if (skb_shinfo(skb)->frag_list) {
971 struct sk_buff *list;
972
973 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
974 int end;
975
976 BUG_TRAP(start <= offset+len);
977
978 end = start + list->len;
979 if ((copy = end-offset) > 0) {
980 if (copy > len)
981 copy = len;
982 if (skb_copy_bits(list, offset-start, to, copy))
983 goto fault;
984 if ((len -= copy) == 0)
985 return 0;
986 offset += copy;
987 to += copy;
988 }
989 start = end;
990 }
991 }
992 if (len == 0)
993 return 0;
994
995 fault:
996 return -EFAULT;
997 }
998
999 /* Checksum skb data. */
1000
1001 unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
1002 {
1003 int i, copy;
1004 int start = skb->len - skb->data_len;
1005 int pos = 0;
1006
1007 /* Checksum header. */
1008 if ((copy = start-offset) > 0) {
1009 if (copy > len)
1010 copy = len;
1011 csum = csum_partial(skb->data+offset, copy, csum);
1012 if ((len -= copy) == 0)
1013 return csum;
1014 offset += copy;
1015 pos = copy;
1016 }
1017
1018 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1019 int end;
1020
1021 BUG_TRAP(start <= offset+len);
1022
1023 end = start + skb_shinfo(skb)->frags[i].size;
1024 if ((copy = end-offset) > 0) {
1025 unsigned int csum2;
1026 u8 *vaddr;
1027 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1028
1029 if (copy > len)
1030 copy = len;
1031 vaddr = kmap_skb_frag(frag);
1032 csum2 = csum_partial(vaddr + frag->page_offset +
1033 offset-start, copy, 0);
1034 kunmap_skb_frag(vaddr);
1035 csum = csum_block_add(csum, csum2, pos);
1036 if (!(len -= copy))
1037 return csum;
1038 offset += copy;
1039 pos += copy;
1040 }
1041 start = end;
1042 }
1043
1044 if (skb_shinfo(skb)->frag_list) {
1045 struct sk_buff *list;
1046
1047 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1048 int end;
1049
1050 BUG_TRAP(start <= offset+len);
1051
1052 end = start + list->len;
1053 if ((copy = end-offset) > 0) {
1054 unsigned int csum2;
1055 if (copy > len)
1056 copy = len;
1057 csum2 = skb_checksum(list, offset-start, copy, 0);
1058 csum = csum_block_add(csum, csum2, pos);
1059 if ((len -= copy) == 0)
1060 return csum;
1061 offset += copy;
1062 pos += copy;
1063 }
1064 start = end;
1065 }
1066 }
1067 if (len == 0)
1068 return csum;
1069
1070 BUG();
1071 return csum;
1072 }
1073
1074 /* Both of above in one bottle. */
1075
1076 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
1077 {
1078 int i, copy;
1079 int start = skb->len - skb->data_len;
1080 int pos = 0;
1081
1082 /* Copy header. */
1083 if ((copy = start-offset) > 0) {
1084 if (copy > len)
1085 copy = len;
1086 csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
1087 if ((len -= copy) == 0)
1088 return csum;
1089 offset += copy;
1090 to += copy;
1091 pos = copy;
1092 }
1093
1094 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1095 int end;
1096
1097 BUG_TRAP(start <= offset+len);
1098
1099 end = start + skb_shinfo(skb)->frags[i].size;
1100 if ((copy = end-offset) > 0) {
1101 unsigned int csum2;
1102 u8 *vaddr;
1103 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1104
1105 if (copy > len)
1106 copy = len;
1107 vaddr = kmap_skb_frag(frag);
1108 csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
1109 offset-start, to, copy, 0);
1110 kunmap_skb_frag(vaddr);
1111 csum = csum_block_add(csum, csum2, pos);
1112 if (!(len -= copy))
1113 return csum;
1114 offset += copy;
1115 to += copy;
1116 pos += copy;
1117 }
1118 start = end;
1119 }
1120
1121 if (skb_shinfo(skb)->frag_list) {
1122 struct sk_buff *list;
1123
1124 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1125 unsigned int csum2;
1126 int end;
1127
1128 BUG_TRAP(start <= offset+len);
1129
1130 end = start + list->len;
1131 if ((copy = end-offset) > 0) {
1132 if (copy > len)
1133 copy = len;
1134 csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
1135 csum = csum_block_add(csum, csum2, pos);
1136 if ((len -= copy) == 0)
1137 return csum;
1138 offset += copy;
1139 to += copy;
1140 pos += copy;
1141 }
1142 start = end;
1143 }
1144 }
1145 if (len == 0)
1146 return csum;
1147
1148 BUG();
1149 return csum;
1150 }
1151
1152 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1153 {
1154 unsigned int csum;
1155 long csstart;
1156
1157 if (skb->ip_summed == CHECKSUM_HW)
1158 csstart = skb->h.raw - skb->data;
1159 else
1160 csstart = skb->len - skb->data_len;
1161
1162 if (csstart > skb->len - skb->data_len)
1163 BUG();
1164
1165 memcpy(to, skb->data, csstart);
1166
1167 csum = 0;
1168 if (csstart != skb->len)
1169 csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
1170 skb->len-csstart, 0);
1171
1172 if (skb->ip_summed == CHECKSUM_HW) {
1173 long csstuff = csstart + skb->csum;
1174
1175 *((unsigned short *)(to + csstuff)) = csum_fold(csum);
1176 }
1177 }
1178
1179 #if 0
1180 /*
1181 * Tune the memory allocator for a new MTU size.
1182 */
1183 void skb_add_mtu(int mtu)
1184 {
1185 /* Must match allocation in alloc_skb */
1186 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
1187
1188 kmem_add_cache_size(mtu);
1189 }
1190 #endif
1191
1192 void __init skb_init(void)
1193 {
1194 int i;
1195
1196 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
1197 sizeof(struct sk_buff),
1198 0,
1199 SLAB_HWCACHE_ALIGN,
1200 skb_headerinit, NULL);
1201 if (!skbuff_head_cache)
1202 panic("cannot create skbuff cache");
1203
1204 for (i=0; i<NR_CPUS; i++)
1205 skb_queue_head_init(&skb_head_pool[i].list);
1206 }
1207