File: /usr/src/linux/net/core/skbuff.c

1     /*
2      *	Routines having to do with the 'struct sk_buff' memory handlers.
3      *
4      *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
5      *			Florian La Roche <rzsfl@rz.uni-sb.de>
6      *
7      *	Version:	$Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
8      *
9      *	Fixes:	
10      *		Alan Cox	:	Fixed the worst of the load balancer bugs.
11      *		Dave Platt	:	Interrupt stacking fix.
12      *	Richard Kooijman	:	Timestamp fixes.
13      *		Alan Cox	:	Changed buffer format.
14      *		Alan Cox	:	destructor hook for AF_UNIX etc.
15      *		Linus Torvalds	:	Better skb_clone.
16      *		Alan Cox	:	Added skb_copy.
17      *		Alan Cox	:	Added all the changed routines Linus
18      *					only put in the headers
19      *		Ray VanTassle	:	Fixed --skb->lock in free
20      *		Alan Cox	:	skb_copy copy arp field
21      *		Andi Kleen	:	slabified it.
22      *
23      *	NOTE:
24      *		The __skb_ routines should be called with interrupts 
25      *	disabled, or you better be *real* sure that the operation is atomic 
26      *	with respect to whatever list is being frobbed (e.g. via lock_sock()
27      *	or via disabling bottom half handlers, etc).
28      *
29      *	This program is free software; you can redistribute it and/or
30      *	modify it under the terms of the GNU General Public License
31      *	as published by the Free Software Foundation; either version
32      *	2 of the License, or (at your option) any later version.
33      */
34     
35     /*
36      *	The functions in this file will not compile correctly with gcc 2.4.x
37      */
38     
39     #include <linux/config.h>
40     #include <linux/types.h>
41     #include <linux/kernel.h>
42     #include <linux/sched.h>
43     #include <linux/mm.h>
44     #include <linux/interrupt.h>
45     #include <linux/in.h>
46     #include <linux/inet.h>
47     #include <linux/slab.h>
48     #include <linux/netdevice.h>
49     #include <linux/string.h>
50     #include <linux/skbuff.h>
51     #include <linux/cache.h>
52     #include <linux/init.h>
53     #include <linux/highmem.h>
54     
55     #include <net/ip.h>
56     #include <net/protocol.h>
57     #include <net/dst.h>
58     #include <net/tcp.h>
59     #include <net/udp.h>
60     #include <net/sock.h>
61     
62     #include <asm/uaccess.h>
63     #include <asm/system.h>
64     
65     int sysctl_hot_list_len = 128;
66     
67     static kmem_cache_t *skbuff_head_cache;
68     
69     static union {
70     	struct sk_buff_head	list;
71     	char			pad[SMP_CACHE_BYTES];
72     } skb_head_pool[NR_CPUS];
73     
74     /*
75      *	Keep out-of-line to prevent kernel bloat.
76      *	__builtin_return_address is not used because it is not always
77      *	reliable. 
78      */
79     
80     /**
81      *	skb_over_panic	- 	private function
82      *	@skb: buffer
83      *	@sz: size
84      *	@here: address
85      *
86      *	Out of line support code for skb_put(). Not user callable.
87      */
88      
89     void skb_over_panic(struct sk_buff *skb, int sz, void *here)
90     {
91     	printk("skput:over: %p:%d put:%d dev:%s", 
92     		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
93     	BUG();
94     }
95     
96     /**
97      *	skb_under_panic	- 	private function
98      *	@skb: buffer
99      *	@sz: size
100      *	@here: address
101      *
102      *	Out of line support code for skb_push(). Not user callable.
103      */
104      
105     
106     void skb_under_panic(struct sk_buff *skb, int sz, void *here)
107     {
108             printk("skput:under: %p:%d put:%d dev:%s",
109                     here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
110     	BUG();
111     }
112     
113     static __inline__ struct sk_buff *skb_head_from_pool(void)
114     {
115     	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
116     
117     	if (skb_queue_len(list)) {
118     		struct sk_buff *skb;
119     		unsigned long flags;
120     
121     		local_irq_save(flags);
122     		skb = __skb_dequeue(list);
123     		local_irq_restore(flags);
124     		return skb;
125     	}
126     	return NULL;
127     }
128     
129     static __inline__ void skb_head_to_pool(struct sk_buff *skb)
130     {
131     	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
132     
133     	if (skb_queue_len(list) < sysctl_hot_list_len) {
134     		unsigned long flags;
135     
136     		local_irq_save(flags);
137     		__skb_queue_head(list, skb);
138     		local_irq_restore(flags);
139     
140     		return;
141     	}
142     	kmem_cache_free(skbuff_head_cache, skb);
143     }
144     
145     
146     /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
147      *	'private' fields and also do memory statistics to find all the
148      *	[BEEP] leaks.
149      * 
150      */
151     
152     /**
153      *	alloc_skb	-	allocate a network buffer
154      *	@size: size to allocate
155      *	@gfp_mask: allocation mask
156      *
157      *	Allocate a new &sk_buff. The returned buffer has no headroom and a
158      *	tail room of size bytes. The object has a reference count of one.
159      *	The return is the buffer. On a failure the return is %NULL.
160      *
161      *	Buffers may only be allocated from interrupts using a @gfp_mask of
162      *	%GFP_ATOMIC.
163      */
164      
165     struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
166     {
167     	struct sk_buff *skb;
168     	u8 *data;
169     
170     	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
171     		static int count = 0;
172     		if (++count < 5) {
173     			printk(KERN_ERR "alloc_skb called nonatomically "
174     			       "from interrupt %p\n", NET_CALLER(size));
175      			BUG();
176     		}
177     		gfp_mask &= ~__GFP_WAIT;
178     	}
179     
180     	/* Get the HEAD */
181     	skb = skb_head_from_pool();
182     	if (skb == NULL) {
183     		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
184     		if (skb == NULL)
185     			goto nohead;
186     	}
187     
188     	/* Get the DATA. Size must match skb_add_mtu(). */
189     	size = SKB_DATA_ALIGN(size);
190     	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
191     	if (data == NULL)
192     		goto nodata;
193     
194     	/* XXX: does not include slab overhead */ 
195     	skb->truesize = size + sizeof(struct sk_buff);
196     
197     	/* Load the data pointers. */
198     	skb->head = data;
199     	skb->data = data;
200     	skb->tail = data;
201     	skb->end = data + size;
202     
203     	/* Set up other state */
204     	skb->len = 0;
205     	skb->cloned = 0;
206     	skb->data_len = 0;
207     
208     	atomic_set(&skb->users, 1); 
209     	atomic_set(&(skb_shinfo(skb)->dataref), 1);
210     	skb_shinfo(skb)->nr_frags = 0;
211     	skb_shinfo(skb)->frag_list = NULL;
212     	return skb;
213     
214     nodata:
215     	skb_head_to_pool(skb);
216     nohead:
217     	return NULL;
218     }
219     
220     
221     /*
222      *	Slab constructor for a skb head. 
223      */ 
224     static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
225     				  unsigned long flags)
226     {
227     	struct sk_buff *skb = p;
228     
229     	skb->next = NULL;
230     	skb->prev = NULL;
231     	skb->list = NULL;
232     	skb->sk = NULL;
233     	skb->stamp.tv_sec=0;	/* No idea about time */
234     	skb->dev = NULL;
235     	skb->dst = NULL;
236     	memset(skb->cb, 0, sizeof(skb->cb));
237     	skb->pkt_type = PACKET_HOST;	/* Default type */
238     	skb->ip_summed = 0;
239     	skb->priority = 0;
240     	skb->security = 0;	/* By default packets are insecure */
241     	skb->destructor = NULL;
242     
243     #ifdef CONFIG_NETFILTER
244     	skb->nfmark = skb->nfcache = 0;
245     	skb->nfct = NULL;
246     #ifdef CONFIG_NETFILTER_DEBUG
247     	skb->nf_debug = 0;
248     #endif
249     #endif
250     #ifdef CONFIG_NET_SCHED
251     	skb->tc_index = 0;
252     #endif
253     }
254     
255     static void skb_drop_fraglist(struct sk_buff *skb)
256     {
257     	struct sk_buff *list = skb_shinfo(skb)->frag_list;
258     
259     	skb_shinfo(skb)->frag_list = NULL;
260     
261     	do {
262     		struct sk_buff *this = list;
263     		list = list->next;
264     		kfree_skb(this);
265     	} while (list);
266     }
267     
268     static void skb_clone_fraglist(struct sk_buff *skb)
269     {
270     	struct sk_buff *list;
271     
272     	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
273     		skb_get(list);
274     }
275     
276     static void skb_release_data(struct sk_buff *skb)
277     {
278     	if (!skb->cloned ||
279     	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
280     		if (skb_shinfo(skb)->nr_frags) {
281     			int i;
282     			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
283     				put_page(skb_shinfo(skb)->frags[i].page);
284     		}
285     
286     		if (skb_shinfo(skb)->frag_list)
287     			skb_drop_fraglist(skb);
288     
289     		kfree(skb->head);
290     	}
291     }
292     
293     /*
294      *	Free an skbuff by memory without cleaning the state. 
295      */
296     void kfree_skbmem(struct sk_buff *skb)
297     {
298     	skb_release_data(skb);
299     	skb_head_to_pool(skb);
300     }
301     
302     /**
303      *	__kfree_skb - private function 
304      *	@skb: buffer
305      *
306      *	Free an sk_buff. Release anything attached to the buffer. 
307      *	Clean the state. This is an internal helper function. Users should
308      *	always call kfree_skb
309      */
310     
311     void __kfree_skb(struct sk_buff *skb)
312     {
313     	if (skb->list) {
314     	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
315     		       "on a list (from %p).\n", NET_CALLER(skb));
316     		BUG();
317     	}
318     
319     	dst_release(skb->dst);
320     	if(skb->destructor) {
321     		if (in_irq()) {
322     			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
323     				NET_CALLER(skb));
324     		}
325     		skb->destructor(skb);
326     	}
327     #ifdef CONFIG_NETFILTER
328     	nf_conntrack_put(skb->nfct);
329     #endif
330     	skb_headerinit(skb, NULL, 0);  /* clean state */
331     	kfree_skbmem(skb);
332     }
333     
334     /**
335      *	skb_clone	-	duplicate an sk_buff
336      *	@skb: buffer to clone
337      *	@gfp_mask: allocation priority
338      *
339      *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
340      *	copies share the same packet data but not structure. The new
341      *	buffer has a reference count of 1. If the allocation fails the 
342      *	function returns %NULL otherwise the new buffer is returned.
343      *	
344      *	If this function is called from an interrupt gfp_mask() must be
345      *	%GFP_ATOMIC.
346      */
347     
348     struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
349     {
350     	struct sk_buff *n;
351     
352     	n = skb_head_from_pool();
353     	if (!n) {
354     		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
355     		if (!n)
356     			return NULL;
357     	}
358     
359     #define C(x) n->x = skb->x
360     
361     	n->next = n->prev = NULL;
362     	n->list = NULL;
363     	n->sk = NULL;
364     	C(stamp);
365     	C(dev);
366     	C(h);
367     	C(nh);
368     	C(mac);
369     	C(dst);
370     	dst_clone(n->dst);
371     	memcpy(n->cb, skb->cb, sizeof(skb->cb));
372     	C(len);
373     	C(data_len);
374     	C(csum);
375     	n->cloned = 1;
376     	C(pkt_type);
377     	C(ip_summed);
378     	C(priority);
379     	atomic_set(&n->users, 1);
380     	C(protocol);
381     	C(security);
382     	C(truesize);
383     	C(head);
384     	C(data);
385     	C(tail);
386     	C(end);
387     	n->destructor = NULL;
388     #ifdef CONFIG_NETFILTER
389     	C(nfmark);
390     	C(nfcache);
391     	C(nfct);
392     #ifdef CONFIG_NETFILTER_DEBUG
393     	C(nf_debug);
394     #endif
395     #endif /*CONFIG_NETFILTER*/
396     #if defined(CONFIG_HIPPI)
397     	C(private);
398     #endif
399     #ifdef CONFIG_NET_SCHED
400     	C(tc_index);
401     #endif
402     
403     	atomic_inc(&(skb_shinfo(skb)->dataref));
404     	skb->cloned = 1;
405     #ifdef CONFIG_NETFILTER
406     	nf_conntrack_get(skb->nfct);
407     #endif
408     	return n;
409     }
410     
411     static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
412     {
413     	/*
414     	 *	Shift between the two data areas in bytes
415     	 */
416     	unsigned long offset = new->data - old->data;
417     
418     	new->list=NULL;
419     	new->sk=NULL;
420     	new->dev=old->dev;
421     	new->priority=old->priority;
422     	new->protocol=old->protocol;
423     	new->dst=dst_clone(old->dst);
424     	new->h.raw=old->h.raw+offset;
425     	new->nh.raw=old->nh.raw+offset;
426     	new->mac.raw=old->mac.raw+offset;
427     	memcpy(new->cb, old->cb, sizeof(old->cb));
428     	atomic_set(&new->users, 1);
429     	new->pkt_type=old->pkt_type;
430     	new->stamp=old->stamp;
431     	new->destructor = NULL;
432     	new->security=old->security;
433     #ifdef CONFIG_NETFILTER
434     	new->nfmark=old->nfmark;
435     	new->nfcache=old->nfcache;
436     	new->nfct=old->nfct;
437     	nf_conntrack_get(new->nfct);
438     #ifdef CONFIG_NETFILTER_DEBUG
439     	new->nf_debug=old->nf_debug;
440     #endif
441     #endif
442     #ifdef CONFIG_NET_SCHED
443     	new->tc_index = old->tc_index;
444     #endif
445     }
446     
447     /**
448      *	skb_copy	-	create private copy of an sk_buff
449      *	@skb: buffer to copy
450      *	@gfp_mask: allocation priority
451      *
452      *	Make a copy of both an &sk_buff and its data. This is used when the
453      *	caller wishes to modify the data and needs a private copy of the 
454      *	data to alter. Returns %NULL on failure or the pointer to the buffer
455      *	on success. The returned buffer has a reference count of 1.
456      *
457      *	As by-product this function converts non-linear &sk_buff to linear
458      *	one, so that &sk_buff becomes completely private and caller is allowed
459      *	to modify all the data of returned buffer. This means that this
460      *	function is not recommended for use in circumstances when only
461      *	header is going to be modified. Use pskb_copy() instead.
462      */
463      
464     struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
465     {
466     	struct sk_buff *n;
467     	int headerlen = skb->data-skb->head;
468     
469     	/*
470     	 *	Allocate the copy buffer
471     	 */
472     	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
473     	if(n==NULL)
474     		return NULL;
475     
476     	/* Set the data pointer */
477     	skb_reserve(n,headerlen);
478     	/* Set the tail pointer and length */
479     	skb_put(n,skb->len);
480     	n->csum = skb->csum;
481     	n->ip_summed = skb->ip_summed;
482     
483     	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
484     		BUG();
485     
486     	copy_skb_header(n, skb);
487     
488     	return n;
489     }
490     
491     /* Keep head the same: replace data */
492     int skb_linearize(struct sk_buff *skb, int gfp_mask)
493     {
494     	unsigned int size;
495     	u8 *data;
496     	long offset;
497     	int headerlen = skb->data - skb->head;
498     	int expand = (skb->tail+skb->data_len) - skb->end;
499     
500     	if (skb_shared(skb))
501     		BUG();
502     
503     	if (expand <= 0)
504     		expand = 0;
505     
506     	size = (skb->end - skb->head + expand);
507     	size = SKB_DATA_ALIGN(size);
508     	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
509     	if (data == NULL)
510     		return -ENOMEM;
511     
512     	/* Copy entire thing */
513     	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
514     		BUG();
515     
516     	/* Offset between the two in bytes */
517     	offset = data - skb->head;
518     
519     	/* Free old data. */
520     	skb_release_data(skb);
521     
522     	skb->head = data;
523     	skb->end  = data + size;
524     
525     	/* Set up new pointers */
526     	skb->h.raw += offset;
527     	skb->nh.raw += offset;
528     	skb->mac.raw += offset;
529     	skb->tail += offset;
530     	skb->data += offset;
531     
532     	/* Set up shinfo */
533     	atomic_set(&(skb_shinfo(skb)->dataref), 1);
534     	skb_shinfo(skb)->nr_frags = 0;
535     	skb_shinfo(skb)->frag_list = NULL;
536     
537     	/* We are no longer a clone, even if we were. */
538     	skb->cloned = 0;
539     
540     	skb->tail += skb->data_len;
541     	skb->data_len = 0;
542     	return 0;
543     }
544     
545     
546     /**
547      *	pskb_copy	-	create copy of an sk_buff with private head.
548      *	@skb: buffer to copy
549      *	@gfp_mask: allocation priority
550      *
551      *	Make a copy of both an &sk_buff and part of its data, located
552      *	in header. Fragmented data remain shared. This is used when
553      *	the caller wishes to modify only header of &sk_buff and needs
554      *	private copy of the header to alter. Returns %NULL on failure
555      *	or the pointer to the buffer on success.
556      *	The returned buffer has a reference count of 1.
557      */
558     
559     struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
560     {
561     	struct sk_buff *n;
562     
563     	/*
564     	 *	Allocate the copy buffer
565     	 */
566     	n=alloc_skb(skb->end - skb->head, gfp_mask);
567     	if(n==NULL)
568     		return NULL;
569     
570     	/* Set the data pointer */
571     	skb_reserve(n,skb->data-skb->head);
572     	/* Set the tail pointer and length */
573     	skb_put(n,skb_headlen(skb));
574     	/* Copy the bytes */
575     	memcpy(n->data, skb->data, n->len);
576     	n->csum = skb->csum;
577     	n->ip_summed = skb->ip_summed;
578     
579     	n->data_len = skb->data_len;
580     	n->len = skb->len;
581     
582     	if (skb_shinfo(skb)->nr_frags) {
583     		int i;
584     
585     		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
586     			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
587     			get_page(skb_shinfo(n)->frags[i].page);
588     		}
589     		skb_shinfo(n)->nr_frags = i;
590     	}
591     
592     	if (skb_shinfo(skb)->frag_list) {
593     		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
594     		skb_clone_fraglist(n);
595     	}
596     
597     	copy_skb_header(n, skb);
598     
599     	return n;
600     }
601     
602     /**
603      *	pskb_expand_head - reallocate header of &sk_buff
604      *	@skb: buffer to reallocate
605      *	@nhead: room to add at head
606      *	@ntail: room to add at tail
607      *	@gfp_mask: allocation priority
608      *
609      *	Expands (or creates identical copy, if &nhead and &ntail are zero)
610      *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
611      *	reference count of 1. Returns zero in the case of success or error,
612      *	if expansion failed. In the last case, &sk_buff is not changed.
613      *
614      *	All the pointers pointing into skb header may change and must be
615      *	reloaded after call to this function.
616      */
617     
618     int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
619     {
620     	int i;
621     	u8 *data;
622     	int size = nhead + (skb->end - skb->head) + ntail;
623     	long off;
624     
625     	if (skb_shared(skb))
626     		BUG();
627     
628     	size = SKB_DATA_ALIGN(size);
629     
630     	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
631     	if (data == NULL)
632     		goto nodata;
633     
634     	/* Copy only real data... and, alas, header. This should be
635     	 * optimized for the cases when header is void. */
636     	memcpy(data+nhead, skb->head, skb->tail-skb->head);
637     	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
638     
639     	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
640     		get_page(skb_shinfo(skb)->frags[i].page);
641     
642     	if (skb_shinfo(skb)->frag_list)
643     		skb_clone_fraglist(skb);
644     
645     	skb_release_data(skb);
646     
647     	off = (data+nhead) - skb->head;
648     
649     	skb->head = data;
650     	skb->end  = data+size;
651     
652     	skb->data += off;
653     	skb->tail += off;
654     	skb->mac.raw += off;
655     	skb->h.raw += off;
656     	skb->nh.raw += off;
657     	skb->cloned = 0;
658     	atomic_set(&skb_shinfo(skb)->dataref, 1);
659     	return 0;
660     
661     nodata:
662     	return -ENOMEM;
663     }
664     
665     /* Make private copy of skb with writable head and some headroom */
666     
667     struct sk_buff *
668     skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
669     {
670     	struct sk_buff *skb2;
671     	int delta = headroom - skb_headroom(skb);
672     
673     	if (delta <= 0)
674     		return pskb_copy(skb, GFP_ATOMIC);
675     
676     	skb2 = skb_clone(skb, GFP_ATOMIC);
677     	if (skb2 == NULL ||
678     	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
679     		return skb2;
680     
681     	kfree_skb(skb2);
682     	return NULL;
683     }
684     
685     
686     /**
687      *	skb_copy_expand	-	copy and expand sk_buff
688      *	@skb: buffer to copy
689      *	@newheadroom: new free bytes at head
690      *	@newtailroom: new free bytes at tail
691      *	@gfp_mask: allocation priority
692      *
693      *	Make a copy of both an &sk_buff and its data and while doing so 
694      *	allocate additional space.
695      *
696      *	This is used when the caller wishes to modify the data and needs a 
697      *	private copy of the data to alter as well as more space for new fields.
698      *	Returns %NULL on failure or the pointer to the buffer
699      *	on success. The returned buffer has a reference count of 1.
700      *
701      *	You must pass %GFP_ATOMIC as the allocation priority if this function
702      *	is called from an interrupt.
703      */
704      
705     
706     struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
707     				int newheadroom,
708     				int newtailroom,
709     				int gfp_mask)
710     {
711     	struct sk_buff *n;
712     
713     	/*
714     	 *	Allocate the copy buffer
715     	 */
716      	 
717     	n=alloc_skb(newheadroom + skb->len + newtailroom,
718     		    gfp_mask);
719     	if(n==NULL)
720     		return NULL;
721     
722     	skb_reserve(n,newheadroom);
723     
724     	/* Set the tail pointer and length */
725     	skb_put(n,skb->len);
726     
727     	/* Copy the data only. */
728     	if (skb_copy_bits(skb, 0, n->data, skb->len))
729     		BUG();
730     
731     	copy_skb_header(n, skb);
732     	return n;
733     }
734     
735     /* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
736      * If realloc==0 and trimming is impossible without change of data,
737      * it is BUG().
738      */
739     
740     int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
741     {
742     	int offset = skb_headlen(skb);
743     	int nfrags = skb_shinfo(skb)->nr_frags;
744     	int i;
745     
746     	for (i=0; i<nfrags; i++) {
747     		int end = offset + skb_shinfo(skb)->frags[i].size;
748     		if (end > len) {
749     			if (skb_cloned(skb)) {
750     				if (!realloc)
751     					BUG();
752     				if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
753     					return -ENOMEM;
754     			}
755     			if (len <= offset) {
756     				put_page(skb_shinfo(skb)->frags[i].page);
757     				skb_shinfo(skb)->nr_frags--;
758     			} else {
759     				skb_shinfo(skb)->frags[i].size = len-offset;
760     			}
761     		}
762     		offset = end;
763     	}
764     
765     	if (offset < len) {
766     		skb->data_len -= skb->len - len;
767     		skb->len = len;
768     	} else {
769     		if (len <= skb_headlen(skb)) {
770     			skb->len = len;
771     			skb->data_len = 0;
772     			skb->tail = skb->data + len;
773     			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
774     				skb_drop_fraglist(skb);
775     		} else {
776     			skb->data_len -= skb->len - len;
777     			skb->len = len;
778     		}
779     	}
780     
781     	return 0;
782     }
783     
784     /**
785      *	__pskb_pull_tail - advance tail of skb header 
786      *	@skb: buffer to reallocate
787      *	@delta: number of bytes to advance tail
788      *
789      *	The function makes a sense only on a fragmented &sk_buff,
790      *	it expands header moving its tail forward and copying necessary
791      *	data from fragmented part.
792      *
793      *	&sk_buff MUST have reference count of 1.
794      *
795      *	Returns %NULL (and &sk_buff does not change) if pull failed
796      *	or value of new tail of skb in the case of success.
797      *
798      *	All the pointers pointing into skb header may change and must be
799      *	reloaded after call to this function.
800      */
801     
802     /* Moves tail of skb head forward, copying data from fragmented part,
803      * when it is necessary.
804      * 1. It may fail due to malloc failure.
805      * 2. It may change skb pointers.
806      *
807      * It is pretty complicated. Luckily, it is called only in exceptional cases.
808      */
809     unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
810     {
811     	int i, k, eat;
812     
813     	/* If skb has not enough free space at tail, get new one
814     	 * plus 128 bytes for future expansions. If we have enough
815     	 * room at tail, reallocate without expansion only if skb is cloned.
816     	 */
817     	eat = (skb->tail+delta) - skb->end;
818     
819     	if (eat > 0 || skb_cloned(skb)) {
820     		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
821     			return NULL;
822     	}
823     
824     	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
825     		BUG();
826     
827     	/* Optimization: no fragments, no reasons to preestimate
828     	 * size of pulled pages. Superb.
829     	 */
830     	if (skb_shinfo(skb)->frag_list == NULL)
831     		goto pull_pages;
832     
833     	/* Estimate size of pulled pages. */
834     	eat = delta;
835     	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
836     		if (skb_shinfo(skb)->frags[i].size >= eat)
837     			goto pull_pages;
838     		eat -= skb_shinfo(skb)->frags[i].size;
839     	}
840     
841     	/* If we need update frag list, we are in troubles.
842     	 * Certainly, it possible to add an offset to skb data,
843     	 * but taking into account that pulling is expected to
844     	 * be very rare operation, it is worth to fight against
845     	 * further bloating skb head and crucify ourselves here instead.
846     	 * Pure masohism, indeed. 8)8)
847     	 */
848     	if (eat) {
849     		struct sk_buff *list = skb_shinfo(skb)->frag_list;
850     		struct sk_buff *clone = NULL;
851     		struct sk_buff *insp = NULL;
852     
853     		do {
854     			if (list == NULL)
855     				BUG();
856     
857     			if (list->len <= eat) {
858     				/* Eaten as whole. */
859     				eat -= list->len;
860     				list = list->next;
861     				insp = list;
862     			} else {
863     				/* Eaten partially. */
864     
865     				if (skb_shared(list)) {
866     					/* Sucks! We need to fork list. :-( */
867     					clone = skb_clone(list, GFP_ATOMIC);
868     					if (clone == NULL)
869     						return NULL;
870     					insp = list->next;
871     					list = clone;
872     				} else {
873     					/* This may be pulled without
874     					 * problems. */
875     					insp = list;
876     				}
877     				if (pskb_pull(list, eat) == NULL) {
878     					if (clone)
879     						kfree_skb(clone);
880     					return NULL;
881     				}
882     				break;
883     			}
884     		} while (eat);
885     
886     		/* Free pulled out fragments. */
887     		while ((list = skb_shinfo(skb)->frag_list) != insp) {
888     			skb_shinfo(skb)->frag_list = list->next;
889     			kfree_skb(list);
890     		}
891     		/* And insert new clone at head. */
892     		if (clone) {
893     			clone->next = list;
894     			skb_shinfo(skb)->frag_list = clone;
895     		}
896     	}
897     	/* Success! Now we may commit changes to skb data. */
898     
899     pull_pages:
900     	eat = delta;
901     	k = 0;
902     	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
903     		if (skb_shinfo(skb)->frags[i].size <= eat) {
904     			put_page(skb_shinfo(skb)->frags[i].page);
905     			eat -= skb_shinfo(skb)->frags[i].size;
906     		} else {
907     			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
908     			if (eat) {
909     				skb_shinfo(skb)->frags[k].page_offset += eat;
910     				skb_shinfo(skb)->frags[k].size -= eat;
911     				eat = 0;
912     			}
913     			k++;
914     		}
915     	}
916     	skb_shinfo(skb)->nr_frags = k;
917     
918     	skb->tail += delta;
919     	skb->data_len -= delta;
920     
921     	return skb->tail;
922     }
923     
924     /* Copy some data bits from skb to kernel buffer. */
925     
926     int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
927     {
928     	int i, copy;
929     	int start = skb->len - skb->data_len;
930     
931     	if (offset > (int)skb->len-len)
932     		goto fault;
933     
934     	/* Copy header. */
935     	if ((copy = start-offset) > 0) {
936     		if (copy > len)
937     			copy = len;
938     		memcpy(to, skb->data + offset, copy);
939     		if ((len -= copy) == 0)
940     			return 0;
941     		offset += copy;
942     		to += copy;
943     	}
944     
945     	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
946     		int end;
947     
948     		BUG_TRAP(start <= offset+len);
949     
950     		end = start + skb_shinfo(skb)->frags[i].size;
951     		if ((copy = end-offset) > 0) {
952     			u8 *vaddr;
953     
954     			if (copy > len)
955     				copy = len;
956     
957     			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
958     			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
959     			       offset-start, copy);
960     			kunmap_skb_frag(vaddr);
961     
962     			if ((len -= copy) == 0)
963     				return 0;
964     			offset += copy;
965     			to += copy;
966     		}
967     		start = end;
968     	}
969     
970     	if (skb_shinfo(skb)->frag_list) {
971     		struct sk_buff *list;
972     
973     		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
974     			int end;
975     
976     			BUG_TRAP(start <= offset+len);
977     
978     			end = start + list->len;
979     			if ((copy = end-offset) > 0) {
980     				if (copy > len)
981     					copy = len;
982     				if (skb_copy_bits(list, offset-start, to, copy))
983     					goto fault;
984     				if ((len -= copy) == 0)
985     					return 0;
986     				offset += copy;
987     				to += copy;
988     			}
989     			start = end;
990     		}
991     	}
992     	if (len == 0)
993     		return 0;
994     
995     fault:
996     	return -EFAULT;
997     }
998     
999     /* Checksum skb data. */
1000     
1001     unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
1002     {
1003     	int i, copy;
1004     	int start = skb->len - skb->data_len;
1005     	int pos = 0;
1006     
1007     	/* Checksum header. */
1008     	if ((copy = start-offset) > 0) {
1009     		if (copy > len)
1010     			copy = len;
1011     		csum = csum_partial(skb->data+offset, copy, csum);
1012     		if ((len -= copy) == 0)
1013     			return csum;
1014     		offset += copy;
1015     		pos = copy;
1016     	}
1017     
1018     	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1019     		int end;
1020     
1021     		BUG_TRAP(start <= offset+len);
1022     
1023     		end = start + skb_shinfo(skb)->frags[i].size;
1024     		if ((copy = end-offset) > 0) {
1025     			unsigned int csum2;
1026     			u8 *vaddr;
1027     			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1028     
1029     			if (copy > len)
1030     				copy = len;
1031     			vaddr = kmap_skb_frag(frag);
1032     			csum2 = csum_partial(vaddr + frag->page_offset +
1033     					     offset-start, copy, 0);
1034     			kunmap_skb_frag(vaddr);
1035     			csum = csum_block_add(csum, csum2, pos);
1036     			if (!(len -= copy))
1037     				return csum;
1038     			offset += copy;
1039     			pos += copy;
1040     		}
1041     		start = end;
1042     	}
1043     
1044     	if (skb_shinfo(skb)->frag_list) {
1045     		struct sk_buff *list;
1046     
1047     		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1048     			int end;
1049     
1050     			BUG_TRAP(start <= offset+len);
1051     
1052     			end = start + list->len;
1053     			if ((copy = end-offset) > 0) {
1054     				unsigned int csum2;
1055     				if (copy > len)
1056     					copy = len;
1057     				csum2 = skb_checksum(list, offset-start, copy, 0);
1058     				csum = csum_block_add(csum, csum2, pos);
1059     				if ((len -= copy) == 0)
1060     					return csum;
1061     				offset += copy;
1062     				pos += copy;
1063     			}
1064     			start = end;
1065     		}
1066     	}
1067     	if (len == 0)
1068     		return csum;
1069     
1070     	BUG();
1071     	return csum;
1072     }
1073     
1074     /* Both of above in one bottle. */
1075     
1076     unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
1077     {
1078     	int i, copy;
1079     	int start = skb->len - skb->data_len;
1080     	int pos = 0;
1081     
1082     	/* Copy header. */
1083     	if ((copy = start-offset) > 0) {
1084     		if (copy > len)
1085     			copy = len;
1086     		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
1087     		if ((len -= copy) == 0)
1088     			return csum;
1089     		offset += copy;
1090     		to += copy;
1091     		pos = copy;
1092     	}
1093     
1094     	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1095     		int end;
1096     
1097     		BUG_TRAP(start <= offset+len);
1098     
1099     		end = start + skb_shinfo(skb)->frags[i].size;
1100     		if ((copy = end-offset) > 0) {
1101     			unsigned int csum2;
1102     			u8 *vaddr;
1103     			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1104     
1105     			if (copy > len)
1106     				copy = len;
1107     			vaddr = kmap_skb_frag(frag);
1108     			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
1109     						      offset-start, to, copy, 0);
1110     			kunmap_skb_frag(vaddr);
1111     			csum = csum_block_add(csum, csum2, pos);
1112     			if (!(len -= copy))
1113     				return csum;
1114     			offset += copy;
1115     			to += copy;
1116     			pos += copy;
1117     		}
1118     		start = end;
1119     	}
1120     
1121     	if (skb_shinfo(skb)->frag_list) {
1122     		struct sk_buff *list;
1123     
1124     		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1125     			unsigned int csum2;
1126     			int end;
1127     
1128     			BUG_TRAP(start <= offset+len);
1129     
1130     			end = start + list->len;
1131     			if ((copy = end-offset) > 0) {
1132     				if (copy > len)
1133     					copy = len;
1134     				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
1135     				csum = csum_block_add(csum, csum2, pos);
1136     				if ((len -= copy) == 0)
1137     					return csum;
1138     				offset += copy;
1139     				to += copy;
1140     				pos += copy;
1141     			}
1142     			start = end;
1143     		}
1144     	}
1145     	if (len == 0)
1146     		return csum;
1147     
1148     	BUG();
1149     	return csum;
1150     }
1151     
1152     void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1153     {
1154     	unsigned int csum;
1155     	long csstart;
1156     
1157     	if (skb->ip_summed == CHECKSUM_HW)
1158     		csstart = skb->h.raw - skb->data;
1159     	else
1160     		csstart = skb->len - skb->data_len;
1161     
1162     	if (csstart > skb->len - skb->data_len)
1163     		BUG();
1164     
1165     	memcpy(to, skb->data, csstart);
1166     
1167     	csum = 0;
1168     	if (csstart != skb->len)
1169     		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
1170     				skb->len-csstart, 0);
1171     
1172     	if (skb->ip_summed == CHECKSUM_HW) {
1173     		long csstuff = csstart + skb->csum;
1174     
1175     		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
1176     	}
1177     }
1178     
1179     #if 0
1180     /* 
1181      * 	Tune the memory allocator for a new MTU size.
1182      */
1183     void skb_add_mtu(int mtu)
1184     {
1185     	/* Must match allocation in alloc_skb */
1186     	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
1187     
1188     	kmem_add_cache_size(mtu);
1189     }
1190     #endif
1191     
1192     void __init skb_init(void)
1193     {
1194     	int i;
1195     
1196     	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
1197     					      sizeof(struct sk_buff),
1198     					      0,
1199     					      SLAB_HWCACHE_ALIGN,
1200     					      skb_headerinit, NULL);
1201     	if (!skbuff_head_cache)
1202     		panic("cannot create skbuff cache");
1203     
1204     	for (i=0; i<NR_CPUS; i++)
1205     		skb_queue_head_init(&skb_head_pool[i].list);
1206     }
1207