File: /usr/src/linux/mm/slab.c

1     /*
2      * linux/mm/slab.c
3      * Written by Mark Hemment, 1996/97.
4      * (markhe@nextd.demon.co.uk)
5      *
6      * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
7      *
8      * Major cleanup, different bufctl logic, per-cpu arrays
9      *	(c) 2000 Manfred Spraul
10      *
11      * An implementation of the Slab Allocator as described in outline in;
12      *	UNIX Internals: The New Frontiers by Uresh Vahalia
13      *	Pub: Prentice Hall	ISBN 0-13-101908-2
14      * or with a little more detail in;
15      *	The Slab Allocator: An Object-Caching Kernel Memory Allocator
16      *	Jeff Bonwick (Sun Microsystems).
17      *	Presented at: USENIX Summer 1994 Technical Conference
18      *
19      *
20      * The memory is organized in caches, one cache for each object type.
21      * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
22      * Each cache consists out of many slabs (they are small (usually one
23      * page long) and always contiguous), and each slab contains multiple
24      * initialized objects.
25      *
26      * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
27      * normal). If you need a special memory type, then must create a new
28      * cache for that memory type.
29      *
30      * In order to reduce fragmentation, the slabs are sorted in 3 groups:
31      *   full slabs with 0 free objects
32      *   partial slabs
33      *   empty slabs with no allocated objects
34      *
35      * If partial slabs exist, then new allocations come from these slabs,
36      * otherwise from empty slabs or new slabs are allocated.
37      *
38      * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
39      * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
40      *
41      * On SMP systems, each cache has a short per-cpu head array, most allocs
42      * and frees go into that array, and if that array overflows, then 1/2
43      * of the entries in the array are given back into the global cache.
44      * This reduces the number of spinlock operations.
45      *
46      * The c_cpuarray may not be read with enabled local interrupts.
47      *
48      * SMP synchronization:
49      *  constructors and destructors are called without any locking.
50      *  Several members in kmem_cache_t and slab_t never change, they
51      *	are accessed without any locking.
52      *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
53      *  The non-constant members are protected with a per-cache irq spinlock.
54      *
55      * Further notes from the original documentation:
56      *
57      * 11 April '97.  Started multi-threading - markhe
58      *	The global cache-chain is protected by the semaphore 'cache_chain_sem'.
59      *	The sem is only needed when accessing/extending the cache-chain, which
60      *	can never happen inside an interrupt (kmem_cache_create(),
61      *	kmem_cache_shrink() and kmem_cache_reap()).
62      *
63      *	To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
64      *	maybe be sleeping and therefore not holding the semaphore/lock), the
65      *	growing field is used.  This also prevents reaping from a cache.
66      *
67      *	At present, each engine can be growing a cache.  This should be blocked.
68      *
69      */
70     
71     #include	<linux/config.h>
72     #include	<linux/slab.h>
73     #include	<linux/interrupt.h>
74     #include	<linux/init.h>
75     #include	<linux/compiler.h>
76     #include	<asm/uaccess.h>
77     
78     /*
79      * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
80      *		  SLAB_RED_ZONE & SLAB_POISON.
81      *		  0 for faster, smaller code (especially in the critical paths).
82      *
83      * STATS	- 1 to collect stats for /proc/slabinfo.
84      *		  0 for faster, smaller code (especially in the critical paths).
85      *
86      * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
87      */
88     
89     #ifdef CONFIG_DEBUG_SLAB
90     #define	DEBUG		1
91     #define	STATS		1
92     #define	FORCED_DEBUG	1
93     #else
94     #define	DEBUG		0
95     #define	STATS		0
96     #define	FORCED_DEBUG	0
97     #endif
98     
99     /*
100      * Parameters for kmem_cache_reap
101      */
102     #define REAP_SCANLEN	10
103     #define REAP_PERFECT	10
104     
105     /* Shouldn't this be in a header file somewhere? */
106     #define	BYTES_PER_WORD		sizeof(void *)
107     
108     /* Legal flag mask for kmem_cache_create(). */
109     #if DEBUG
110     # define CREATE_MASK	(SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
111     			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
112     			 SLAB_NO_REAP | SLAB_CACHE_DMA)
113     #else
114     # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
115     #endif
116     
117     /*
118      * kmem_bufctl_t:
119      *
120      * Bufctl's are used for linking objs within a slab
121      * linked offsets.
122      *
123      * This implementaion relies on "struct page" for locating the cache &
124      * slab an object belongs to.
125      * This allows the bufctl structure to be small (one int), but limits
126      * the number of objects a slab (not a cache) can contain when off-slab
127      * bufctls are used. The limit is the size of the largest general cache
128      * that does not use off-slab slabs.
129      * For 32bit archs with 4 kB pages, is this 56.
130      * This is not serious, as it is only for large objects, when it is unwise
131      * to have too many per slab.
132      * Note: This limit can be raised by introducing a general cache whose size
133      * is less than 512 (PAGE_SIZE<<3), but greater than 256.
134      */
135     
136     #define BUFCTL_END 0xffffFFFF
137     #define	SLAB_LIMIT 0xffffFFFE
138     typedef unsigned int kmem_bufctl_t;
139     
140     /* Max number of objs-per-slab for caches which use off-slab slabs.
141      * Needed to avoid a possible looping condition in kmem_cache_grow().
142      */
143     static unsigned long offslab_limit;
144     
145     /*
146      * slab_t
147      *
148      * Manages the objs in a slab. Placed either at the beginning of mem allocated
149      * for a slab, or allocated from an general cache.
150      * Slabs are chained into three list: fully used, partial, fully free slabs.
151      */
152     typedef struct slab_s {
153     	struct list_head	list;
154     	unsigned long		colouroff;
155     	void			*s_mem;		/* including colour offset */
156     	unsigned int		inuse;		/* num of objs active in slab */
157     	kmem_bufctl_t		free;
158     } slab_t;
159     
160     #define slab_bufctl(slabp) \
161     	((kmem_bufctl_t *)(((slab_t*)slabp)+1))
162     
163     /*
164      * cpucache_t
165      *
166      * Per cpu structures
167      * The limit is stored in the per-cpu structure to reduce the data cache
168      * footprint.
169      */
170     typedef struct cpucache_s {
171     	unsigned int avail;
172     	unsigned int limit;
173     } cpucache_t;
174     
175     #define cc_entry(cpucache) \
176     	((void **)(((cpucache_t*)(cpucache))+1))
177     #define cc_data(cachep) \
178     	((cachep)->cpudata[smp_processor_id()])
179     /*
180      * kmem_cache_t
181      *
182      * manages a cache.
183      */
184     
185     #define CACHE_NAMELEN	20	/* max name length for a slab cache */
186     
187     struct kmem_cache_s {
188     /* 1) each alloc & free */
189     	/* full, partial first, then free */
190     	struct list_head	slabs_full;
191     	struct list_head	slabs_partial;
192     	struct list_head	slabs_free;
193     	unsigned int		objsize;
194     	unsigned int	 	flags;	/* constant flags */
195     	unsigned int		num;	/* # of objs per slab */
196     	spinlock_t		spinlock;
197     #ifdef CONFIG_SMP
198     	unsigned int		batchcount;
199     #endif
200     
201     /* 2) slab additions /removals */
202     	/* order of pgs per slab (2^n) */
203     	unsigned int		gfporder;
204     
205     	/* force GFP flags, e.g. GFP_DMA */
206     	unsigned int		gfpflags;
207     
208     	size_t			colour;		/* cache colouring range */
209     	unsigned int		colour_off;	/* colour offset */
210     	unsigned int		colour_next;	/* cache colouring */
211     	kmem_cache_t		*slabp_cache;
212     	unsigned int		growing;
213     	unsigned int		dflags;		/* dynamic flags */
214     
215     	/* constructor func */
216     	void (*ctor)(void *, kmem_cache_t *, unsigned long);
217     
218     	/* de-constructor func */
219     	void (*dtor)(void *, kmem_cache_t *, unsigned long);
220     
221     	unsigned long		failures;
222     
223     /* 3) cache creation/removal */
224     	char			name[CACHE_NAMELEN];
225     	struct list_head	next;
226     #ifdef CONFIG_SMP
227     /* 4) per-cpu data */
228     	cpucache_t		*cpudata[NR_CPUS];
229     #endif
230     #if STATS
231     	unsigned long		num_active;
232     	unsigned long		num_allocations;
233     	unsigned long		high_mark;
234     	unsigned long		grown;
235     	unsigned long		reaped;
236     	unsigned long 		errors;
237     #ifdef CONFIG_SMP
238     	atomic_t		allochit;
239     	atomic_t		allocmiss;
240     	atomic_t		freehit;
241     	atomic_t		freemiss;
242     #endif
243     #endif
244     };
245     
246     /* internal c_flags */
247     #define	CFLGS_OFF_SLAB	0x010000UL	/* slab management in own cache */
248     #define	CFLGS_OPTIMIZE	0x020000UL	/* optimized slab lookup */
249     
250     /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
251     #define	DFLGS_GROWN	0x000001UL	/* don't reap a recently grown */
252     
253     #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
254     #define	OPTIMIZE(x)	((x)->flags & CFLGS_OPTIMIZE)
255     #define	GROWN(x)	((x)->dlags & DFLGS_GROWN)
256     
257     #if STATS
258     #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
259     #define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
260     #define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
261     #define	STATS_INC_GROWN(x)	((x)->grown++)
262     #define	STATS_INC_REAPED(x)	((x)->reaped++)
263     #define	STATS_SET_HIGH(x)	do { if ((x)->num_active > (x)->high_mark) \
264     					(x)->high_mark = (x)->num_active; \
265     				} while (0)
266     #define	STATS_INC_ERR(x)	((x)->errors++)
267     #else
268     #define	STATS_INC_ACTIVE(x)	do { } while (0)
269     #define	STATS_DEC_ACTIVE(x)	do { } while (0)
270     #define	STATS_INC_ALLOCED(x)	do { } while (0)
271     #define	STATS_INC_GROWN(x)	do { } while (0)
272     #define	STATS_INC_REAPED(x)	do { } while (0)
273     #define	STATS_SET_HIGH(x)	do { } while (0)
274     #define	STATS_INC_ERR(x)	do { } while (0)
275     #endif
276     
277     #if STATS && defined(CONFIG_SMP)
278     #define STATS_INC_ALLOCHIT(x)	atomic_inc(&(x)->allochit)
279     #define STATS_INC_ALLOCMISS(x)	atomic_inc(&(x)->allocmiss)
280     #define STATS_INC_FREEHIT(x)	atomic_inc(&(x)->freehit)
281     #define STATS_INC_FREEMISS(x)	atomic_inc(&(x)->freemiss)
282     #else
283     #define STATS_INC_ALLOCHIT(x)	do { } while (0)
284     #define STATS_INC_ALLOCMISS(x)	do { } while (0)
285     #define STATS_INC_FREEHIT(x)	do { } while (0)
286     #define STATS_INC_FREEMISS(x)	do { } while (0)
287     #endif
288     
289     #if DEBUG
290     /* Magic nums for obj red zoning.
291      * Placed in the first word before and the first word after an obj.
292      */
293     #define	RED_MAGIC1	0x5A2CF071UL	/* when obj is active */
294     #define	RED_MAGIC2	0x170FC2A5UL	/* when obj is inactive */
295     
296     /* ...and for poisoning */
297     #define	POISON_BYTE	0x5a		/* byte value for poisoning */
298     #define	POISON_END	0xa5		/* end-byte of poisoning */
299     
300     #endif
301     
302     /* maximum size of an obj (in 2^order pages) */
303     #define	MAX_OBJ_ORDER	5	/* 32 pages */
304     
305     /*
306      * Do not go above this order unless 0 objects fit into the slab.
307      */
308     #define	BREAK_GFP_ORDER_HI	2
309     #define	BREAK_GFP_ORDER_LO	1
310     static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
311     
312     /*
313      * Absolute limit for the gfp order
314      */
315     #define	MAX_GFP_ORDER	5	/* 32 pages */
316     
317     
318     /* Macros for storing/retrieving the cachep and or slab from the
319      * global 'mem_map'. These are used to find the slab an obj belongs to.
320      * With kfree(), these are used to find the cache which an obj belongs to.
321      */
322     #define	SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
323     #define	GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
324     #define	SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
325     #define	GET_PAGE_SLAB(pg)     ((slab_t *)(pg)->list.prev)
326     
327     /* Size description struct for general caches. */
328     typedef struct cache_sizes {
329     	size_t		 cs_size;
330     	kmem_cache_t	*cs_cachep;
331     	kmem_cache_t	*cs_dmacachep;
332     } cache_sizes_t;
333     
334     static cache_sizes_t cache_sizes[] = {
335     #if PAGE_SIZE == 4096
336     	{    32,	NULL, NULL},
337     #endif
338     	{    64,	NULL, NULL},
339     	{   128,	NULL, NULL},
340     	{   256,	NULL, NULL},
341     	{   512,	NULL, NULL},
342     	{  1024,	NULL, NULL},
343     	{  2048,	NULL, NULL},
344     	{  4096,	NULL, NULL},
345     	{  8192,	NULL, NULL},
346     	{ 16384,	NULL, NULL},
347     	{ 32768,	NULL, NULL},
348     	{ 65536,	NULL, NULL},
349     	{131072,	NULL, NULL},
350     	{     0,	NULL, NULL}
351     };
352     
353     /* internal cache of cache description objs */
354     static kmem_cache_t cache_cache = {
355     	slabs_full:	LIST_HEAD_INIT(cache_cache.slabs_full),
356     	slabs_partial:	LIST_HEAD_INIT(cache_cache.slabs_partial),
357     	slabs_free:	LIST_HEAD_INIT(cache_cache.slabs_free),
358     	objsize:	sizeof(kmem_cache_t),
359     	flags:		SLAB_NO_REAP,
360     	spinlock:	SPIN_LOCK_UNLOCKED,
361     	colour_off:	L1_CACHE_BYTES,
362     	name:		"kmem_cache",
363     };
364     
365     /* Guard access to the cache-chain. */
366     static struct semaphore	cache_chain_sem;
367     
368     /* Place maintainer for reaping. */
369     static kmem_cache_t *clock_searchp = &cache_cache;
370     
371     #define cache_chain (cache_cache.next)
372     
373     #ifdef CONFIG_SMP
374     /*
375      * chicken and egg problem: delay the per-cpu array allocation
376      * until the general caches are up.
377      */
378     static int g_cpucache_up;
379     
380     static void enable_cpucache (kmem_cache_t *cachep);
381     static void enable_all_cpucaches (void);
382     #endif
383     
384     /* Cal the num objs, wastage, and bytes left over for a given slab size. */
385     static void kmem_cache_estimate (unsigned long gfporder, size_t size,
386     		 int flags, size_t *left_over, unsigned int *num)
387     {
388     	int i;
389     	size_t wastage = PAGE_SIZE<<gfporder;
390     	size_t extra = 0;
391     	size_t base = 0;
392     
393     	if (!(flags & CFLGS_OFF_SLAB)) {
394     		base = sizeof(slab_t);
395     		extra = sizeof(kmem_bufctl_t);
396     	}
397     	i = 0;
398     	while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
399     		i++;
400     	if (i > 0)
401     		i--;
402     
403     	if (i > SLAB_LIMIT)
404     		i = SLAB_LIMIT;
405     
406     	*num = i;
407     	wastage -= i*size;
408     	wastage -= L1_CACHE_ALIGN(base+i*extra);
409     	*left_over = wastage;
410     }
411     
412     /* Initialisation - setup the `cache' cache. */
413     void __init kmem_cache_init(void)
414     {
415     	size_t left_over;
416     
417     	init_MUTEX(&cache_chain_sem);
418     	INIT_LIST_HEAD(&cache_chain);
419     
420     	kmem_cache_estimate(0, cache_cache.objsize, 0,
421     			&left_over, &cache_cache.num);
422     	if (!cache_cache.num)
423     		BUG();
424     
425     	cache_cache.colour = left_over/cache_cache.colour_off;
426     	cache_cache.colour_next = 0;
427     }
428     
429     
430     /* Initialisation - setup remaining internal and general caches.
431      * Called after the gfp() functions have been enabled, and before smp_init().
432      */
433     void __init kmem_cache_sizes_init(void)
434     {
435     	cache_sizes_t *sizes = cache_sizes;
436     	char name[20];
437     	/*
438     	 * Fragmentation resistance on low memory - only use bigger
439     	 * page orders on machines with more than 32MB of memory.
440     	 */
441     	if (num_physpages > (32 << 20) >> PAGE_SHIFT)
442     		slab_break_gfp_order = BREAK_GFP_ORDER_HI;
443     	do {
444     		/* For performance, all the general caches are L1 aligned.
445     		 * This should be particularly beneficial on SMP boxes, as it
446     		 * eliminates "false sharing".
447     		 * Note for systems short on memory removing the alignment will
448     		 * allow tighter packing of the smaller caches. */
449     		sprintf(name,"size-%Zd",sizes->cs_size);
450     		if (!(sizes->cs_cachep =
451     			kmem_cache_create(name, sizes->cs_size,
452     					0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
453     			BUG();
454     		}
455     
456     		/* Inc off-slab bufctl limit until the ceiling is hit. */
457     		if (!(OFF_SLAB(sizes->cs_cachep))) {
458     			offslab_limit = sizes->cs_size-sizeof(slab_t);
459     			offslab_limit /= 2;
460     		}
461     		sprintf(name, "size-%Zd(DMA)",sizes->cs_size);
462     		sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
463     			      SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
464     		if (!sizes->cs_dmacachep)
465     			BUG();
466     		sizes++;
467     	} while (sizes->cs_size);
468     }
469     
470     int __init kmem_cpucache_init(void)
471     {
472     #ifdef CONFIG_SMP
473     	g_cpucache_up = 1;
474     	enable_all_cpucaches();
475     #endif
476     	return 0;
477     }
478     
479     __initcall(kmem_cpucache_init);
480     
481     /* Interface to system's page allocator. No need to hold the cache-lock.
482      */
483     static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
484     {
485     	void	*addr;
486     
487     	/*
488     	 * If we requested dmaable memory, we will get it. Even if we
489     	 * did not request dmaable memory, we might get it, but that
490     	 * would be relatively rare and ignorable.
491     	 */
492     	flags |= cachep->gfpflags;
493     	addr = (void*) __get_free_pages(flags, cachep->gfporder);
494     	/* Assume that now we have the pages no one else can legally
495     	 * messes with the 'struct page's.
496     	 * However vm_scan() might try to test the structure to see if
497     	 * it is a named-page or buffer-page.  The members it tests are
498     	 * of no interest here.....
499     	 */
500     	return addr;
501     }
502     
503     /* Interface to system's page release. */
504     static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
505     {
506     	unsigned long i = (1<<cachep->gfporder);
507     	struct page *page = virt_to_page(addr);
508     
509     	/* free_pages() does not clear the type bit - we do that.
510     	 * The pages have been unlinked from their cache-slab,
511     	 * but their 'struct page's might be accessed in
512     	 * vm_scan(). Shouldn't be a worry.
513     	 */
514     	while (i--) {
515     		PageClearSlab(page);
516     		page++;
517     	}
518     	free_pages((unsigned long)addr, cachep->gfporder);
519     }
520     
521     #if DEBUG
522     static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
523     {
524     	int size = cachep->objsize;
525     	if (cachep->flags & SLAB_RED_ZONE) {
526     		addr += BYTES_PER_WORD;
527     		size -= 2*BYTES_PER_WORD;
528     	}
529     	memset(addr, POISON_BYTE, size);
530     	*(unsigned char *)(addr+size-1) = POISON_END;
531     }
532     
533     static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
534     {
535     	int size = cachep->objsize;
536     	void *end;
537     	if (cachep->flags & SLAB_RED_ZONE) {
538     		addr += BYTES_PER_WORD;
539     		size -= 2*BYTES_PER_WORD;
540     	}
541     	end = memchr(addr, POISON_END, size);
542     	if (end != (addr+size-1))
543     		return 1;
544     	return 0;
545     }
546     #endif
547     
548     /* Destroy all the objs in a slab, and release the mem back to the system.
549      * Before calling the slab must have been unlinked from the cache.
550      * The cache-lock is not held/needed.
551      */
552     static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
553     {
554     	if (cachep->dtor
555     #if DEBUG
556     		|| cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
557     #endif
558     	) {
559     		int i;
560     		for (i = 0; i < cachep->num; i++) {
561     			void* objp = slabp->s_mem+cachep->objsize*i;
562     #if DEBUG
563     			if (cachep->flags & SLAB_RED_ZONE) {
564     				if (*((unsigned long*)(objp)) != RED_MAGIC1)
565     					BUG();
566     				if (*((unsigned long*)(objp + cachep->objsize
567     						-BYTES_PER_WORD)) != RED_MAGIC1)
568     					BUG();
569     				objp += BYTES_PER_WORD;
570     			}
571     #endif
572     			if (cachep->dtor)
573     				(cachep->dtor)(objp, cachep, 0);
574     #if DEBUG
575     			if (cachep->flags & SLAB_RED_ZONE) {
576     				objp -= BYTES_PER_WORD;
577     			}	
578     			if ((cachep->flags & SLAB_POISON)  &&
579     				kmem_check_poison_obj(cachep, objp))
580     				BUG();
581     #endif
582     		}
583     	}
584     
585     	kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
586     	if (OFF_SLAB(cachep))
587     		kmem_cache_free(cachep->slabp_cache, slabp);
588     }
589     
590     /**
591      * kmem_cache_create - Create a cache.
592      * @name: A string which is used in /proc/slabinfo to identify this cache.
593      * @size: The size of objects to be created in this cache.
594      * @offset: The offset to use within the page.
595      * @flags: SLAB flags
596      * @ctor: A constructor for the objects.
597      * @dtor: A destructor for the objects.
598      *
599      * Returns a ptr to the cache on success, NULL on failure.
600      * Cannot be called within a int, but can be interrupted.
601      * The @ctor is run when new pages are allocated by the cache
602      * and the @dtor is run before the pages are handed back.
603      * The flags are
604      *
605      * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
606      * to catch references to uninitialised memory.
607      *
608      * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
609      * for buffer overruns.
610      *
611      * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
612      * memory pressure.
613      *
614      * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
615      * cacheline.  This can be beneficial if you're counting cycles as closely
616      * as davem.
617      */
618     kmem_cache_t *
619     kmem_cache_create (const char *name, size_t size, size_t offset,
620     	unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
621     	void (*dtor)(void*, kmem_cache_t *, unsigned long))
622     {
623     	const char *func_nm = KERN_ERR "kmem_create: ";
624     	size_t left_over, align, slab_size;
625     	kmem_cache_t *cachep = NULL;
626     
627     	/*
628     	 * Sanity checks... these are all serious usage bugs.
629     	 */
630     	if ((!name) ||
631     		((strlen(name) >= CACHE_NAMELEN - 1)) ||
632     		in_interrupt() ||
633     		(size < BYTES_PER_WORD) ||
634     		(size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
635     		(dtor && !ctor) ||
636     		(offset < 0 || offset > size))
637     			BUG();
638     
639     #if DEBUG
640     	if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
641     		/* No constructor, but inital state check requested */
642     		printk("%sNo con, but init state check requested - %s\n", func_nm, name);
643     		flags &= ~SLAB_DEBUG_INITIAL;
644     	}
645     
646     	if ((flags & SLAB_POISON) && ctor) {
647     		/* request for poisoning, but we can't do that with a constructor */
648     		printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
649     		flags &= ~SLAB_POISON;
650     	}
651     #if FORCED_DEBUG
652     	if (size < (PAGE_SIZE>>3))
653     		/*
654     		 * do not red zone large object, causes severe
655     		 * fragmentation.
656     		 */
657     		flags |= SLAB_RED_ZONE;
658     	if (!ctor)
659     		flags |= SLAB_POISON;
660     #endif
661     #endif
662     
663     	/*
664     	 * Always checks flags, a caller might be expecting debug
665     	 * support which isn't available.
666     	 */
667     	if (flags & ~CREATE_MASK)
668     		BUG();
669     
670     	/* Get cache's description obj. */
671     	cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
672     	if (!cachep)
673     		goto opps;
674     	memset(cachep, 0, sizeof(kmem_cache_t));
675     
676     	/* Check that size is in terms of words.  This is needed to avoid
677     	 * unaligned accesses for some archs when redzoning is used, and makes
678     	 * sure any on-slab bufctl's are also correctly aligned.
679     	 */
680     	if (size & (BYTES_PER_WORD-1)) {
681     		size += (BYTES_PER_WORD-1);
682     		size &= ~(BYTES_PER_WORD-1);
683     		printk("%sForcing size word alignment - %s\n", func_nm, name);
684     	}
685     	
686     #if DEBUG
687     	if (flags & SLAB_RED_ZONE) {
688     		/*
689     		 * There is no point trying to honour cache alignment
690     		 * when redzoning.
691     		 */
692     		flags &= ~SLAB_HWCACHE_ALIGN;
693     		size += 2*BYTES_PER_WORD;	/* words for redzone */
694     	}
695     #endif
696     	align = BYTES_PER_WORD;
697     	if (flags & SLAB_HWCACHE_ALIGN)
698     		align = L1_CACHE_BYTES;
699     
700     	/* Determine if the slab management is 'on' or 'off' slab. */
701     	if (size >= (PAGE_SIZE>>3))
702     		/*
703     		 * Size is large, assume best to place the slab management obj
704     		 * off-slab (should allow better packing of objs).
705     		 */
706     		flags |= CFLGS_OFF_SLAB;
707     
708     	if (flags & SLAB_HWCACHE_ALIGN) {
709     		/* Need to adjust size so that objs are cache aligned. */
710     		/* Small obj size, can get at least two per cache line. */
711     		/* FIXME: only power of 2 supported, was better */
712     		while (size < align/2)
713     			align /= 2;
714     		size = (size+align-1)&(~(align-1));
715     	}
716     
717     	/* Cal size (in pages) of slabs, and the num of objs per slab.
718     	 * This could be made much more intelligent.  For now, try to avoid
719     	 * using high page-orders for slabs.  When the gfp() funcs are more
720     	 * friendly towards high-order requests, this should be changed.
721     	 */
722     	do {
723     		unsigned int break_flag = 0;
724     cal_wastage:
725     		kmem_cache_estimate(cachep->gfporder, size, flags,
726     						&left_over, &cachep->num);
727     		if (break_flag)
728     			break;
729     		if (cachep->gfporder >= MAX_GFP_ORDER)
730     			break;
731     		if (!cachep->num)
732     			goto next;
733     		if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
734     			/* Oops, this num of objs will cause problems. */
735     			cachep->gfporder--;
736     			break_flag++;
737     			goto cal_wastage;
738     		}
739     
740     		/*
741     		 * Large num of objs is good, but v. large slabs are currently
742     		 * bad for the gfp()s.
743     		 */
744     		if (cachep->gfporder >= slab_break_gfp_order)
745     			break;
746     
747     		if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
748     			break;	/* Acceptable internal fragmentation. */
749     next:
750     		cachep->gfporder++;
751     	} while (1);
752     
753     	if (!cachep->num) {
754     		printk("kmem_cache_create: couldn't create cache %s.\n", name);
755     		kmem_cache_free(&cache_cache, cachep);
756     		cachep = NULL;
757     		goto opps;
758     	}
759     	slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
760     
761     	/*
762     	 * If the slab has been placed off-slab, and we have enough space then
763     	 * move it on-slab. This is at the expense of any extra colouring.
764     	 */
765     	if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
766     		flags &= ~CFLGS_OFF_SLAB;
767     		left_over -= slab_size;
768     	}
769     
770     	/* Offset must be a multiple of the alignment. */
771     	offset += (align-1);
772     	offset &= ~(align-1);
773     	if (!offset)
774     		offset = L1_CACHE_BYTES;
775     	cachep->colour_off = offset;
776     	cachep->colour = left_over/offset;
777     
778     	/* init remaining fields */
779     	if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
780     		flags |= CFLGS_OPTIMIZE;
781     
782     	cachep->flags = flags;
783     	cachep->gfpflags = 0;
784     	if (flags & SLAB_CACHE_DMA)
785     		cachep->gfpflags |= GFP_DMA;
786     	spin_lock_init(&cachep->spinlock);
787     	cachep->objsize = size;
788     	INIT_LIST_HEAD(&cachep->slabs_full);
789     	INIT_LIST_HEAD(&cachep->slabs_partial);
790     	INIT_LIST_HEAD(&cachep->slabs_free);
791     
792     	if (flags & CFLGS_OFF_SLAB)
793     		cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
794     	cachep->ctor = ctor;
795     	cachep->dtor = dtor;
796     	/* Copy name over so we don't have problems with unloaded modules */
797     	strcpy(cachep->name, name);
798     
799     #ifdef CONFIG_SMP
800     	if (g_cpucache_up)
801     		enable_cpucache(cachep);
802     #endif
803     	/* Need the semaphore to access the chain. */
804     	down(&cache_chain_sem);
805     	{
806     		struct list_head *p;
807     
808     		list_for_each(p, &cache_chain) {
809     			kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
810     
811     			/* The name field is constant - no lock needed. */
812     			if (!strcmp(pc->name, name))
813     				BUG();
814     		}
815     	}
816     
817     	/* There is no reason to lock our new cache before we
818     	 * link it in - no one knows about it yet...
819     	 */
820     	list_add(&cachep->next, &cache_chain);
821     	up(&cache_chain_sem);
822     opps:
823     	return cachep;
824     }
825     
826     
827     #if DEBUG
828     /*
829      * This check if the kmem_cache_t pointer is chained in the cache_cache
830      * list. -arca
831      */
832     static int is_chained_kmem_cache(kmem_cache_t * cachep)
833     {
834     	struct list_head *p;
835     	int ret = 0;
836     
837     	/* Find the cache in the chain of caches. */
838     	down(&cache_chain_sem);
839     	list_for_each(p, &cache_chain) {
840     		if (p == &cachep->next) {
841     			ret = 1;
842     			break;
843     		}
844     	}
845     	up(&cache_chain_sem);
846     
847     	return ret;
848     }
849     #else
850     #define is_chained_kmem_cache(x) 1
851     #endif
852     
853     #ifdef CONFIG_SMP
854     /*
855      * Waits for all CPUs to execute func().
856      */
857     static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
858     {
859     	local_irq_disable();
860     	func(arg);
861     	local_irq_enable();
862     
863     	if (smp_call_function(func, arg, 1, 1))
864     		BUG();
865     }
866     typedef struct ccupdate_struct_s
867     {
868     	kmem_cache_t *cachep;
869     	cpucache_t *new[NR_CPUS];
870     } ccupdate_struct_t;
871     
872     static void do_ccupdate_local(void *info)
873     {
874     	ccupdate_struct_t *new = (ccupdate_struct_t *)info;
875     	cpucache_t *old = cc_data(new->cachep);
876     	
877     	cc_data(new->cachep) = new->new[smp_processor_id()];
878     	new->new[smp_processor_id()] = old;
879     }
880     
881     static void free_block (kmem_cache_t* cachep, void** objpp, int len);
882     
883     static void drain_cpu_caches(kmem_cache_t *cachep)
884     {
885     	ccupdate_struct_t new;
886     	int i;
887     
888     	memset(&new.new,0,sizeof(new.new));
889     
890     	new.cachep = cachep;
891     
892     	down(&cache_chain_sem);
893     	smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
894     
895     	for (i = 0; i < smp_num_cpus; i++) {
896     		cpucache_t* ccold = new.new[cpu_logical_map(i)];
897     		if (!ccold || (ccold->avail == 0))
898     			continue;
899     		local_irq_disable();
900     		free_block(cachep, cc_entry(ccold), ccold->avail);
901     		local_irq_enable();
902     		ccold->avail = 0;
903     	}
904     	smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
905     	up(&cache_chain_sem);
906     }
907     
908     #else
909     #define drain_cpu_caches(cachep)	do { } while (0)
910     #endif
911     
912     static int __kmem_cache_shrink(kmem_cache_t *cachep)
913     {
914     	slab_t *slabp;
915     	int ret;
916     
917     	drain_cpu_caches(cachep);
918     
919     	spin_lock_irq(&cachep->spinlock);
920     
921     	/* If the cache is growing, stop shrinking. */
922     	while (!cachep->growing) {
923     		struct list_head *p;
924     
925     		p = cachep->slabs_free.prev;
926     		if (p == &cachep->slabs_free)
927     			break;
928     
929     		slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
930     #if DEBUG
931     		if (slabp->inuse)
932     			BUG();
933     #endif
934     		list_del(&slabp->list);
935     
936     		spin_unlock_irq(&cachep->spinlock);
937     		kmem_slab_destroy(cachep, slabp);
938     		spin_lock_irq(&cachep->spinlock);
939     	}
940     	ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial);
941     	spin_unlock_irq(&cachep->spinlock);
942     	return ret;
943     }
944     
945     /**
946      * kmem_cache_shrink - Shrink a cache.
947      * @cachep: The cache to shrink.
948      *
949      * Releases as many slabs as possible for a cache.
950      * To help debugging, a zero exit status indicates all slabs were released.
951      */
952     int kmem_cache_shrink(kmem_cache_t *cachep)
953     {
954     	if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
955     		BUG();
956     
957     	return __kmem_cache_shrink(cachep);
958     }
959     
960     /**
961      * kmem_cache_destroy - delete a cache
962      * @cachep: the cache to destroy
963      *
964      * Remove a kmem_cache_t object from the slab cache.
965      * Returns 0 on success.
966      *
967      * It is expected this function will be called by a module when it is
968      * unloaded.  This will remove the cache completely, and avoid a duplicate
969      * cache being allocated each time a module is loaded and unloaded, if the
970      * module doesn't have persistent in-kernel storage across loads and unloads.
971      *
972      * The caller must guarantee that noone will allocate memory from the cache
973      * during the kmem_cache_destroy().
974      */
975     int kmem_cache_destroy (kmem_cache_t * cachep)
976     {
977     	if (!cachep || in_interrupt() || cachep->growing)
978     		BUG();
979     
980     	/* Find the cache in the chain of caches. */
981     	down(&cache_chain_sem);
982     	/* the chain is never empty, cache_cache is never destroyed */
983     	if (clock_searchp == cachep)
984     		clock_searchp = list_entry(cachep->next.next,
985     						kmem_cache_t, next);
986     	list_del(&cachep->next);
987     	up(&cache_chain_sem);
988     
989     	if (__kmem_cache_shrink(cachep)) {
990     		printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
991     		       cachep);
992     		down(&cache_chain_sem);
993     		list_add(&cachep->next,&cache_chain);
994     		up(&cache_chain_sem);
995     		return 1;
996     	}
997     #ifdef CONFIG_SMP
998     	{
999     		int i;
1000     		for (i = 0; i < NR_CPUS; i++)
1001     			kfree(cachep->cpudata[i]);
1002     	}
1003     #endif
1004     	kmem_cache_free(&cache_cache, cachep);
1005     
1006     	return 0;
1007     }
1008     
1009     /* Get the memory for a slab management obj. */
1010     static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
1011     			void *objp, int colour_off, int local_flags)
1012     {
1013     	slab_t *slabp;
1014     	
1015     	if (OFF_SLAB(cachep)) {
1016     		/* Slab management obj is off-slab. */
1017     		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
1018     		if (!slabp)
1019     			return NULL;
1020     	} else {
1021     		/* FIXME: change to
1022     			slabp = objp
1023     		 * if you enable OPTIMIZE
1024     		 */
1025     		slabp = objp+colour_off;
1026     		colour_off += L1_CACHE_ALIGN(cachep->num *
1027     				sizeof(kmem_bufctl_t) + sizeof(slab_t));
1028     	}
1029     	slabp->inuse = 0;
1030     	slabp->colouroff = colour_off;
1031     	slabp->s_mem = objp+colour_off;
1032     
1033     	return slabp;
1034     }
1035     
1036     static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
1037     			slab_t * slabp, unsigned long ctor_flags)
1038     {
1039     	int i;
1040     
1041     	for (i = 0; i < cachep->num; i++) {
1042     		void* objp = slabp->s_mem+cachep->objsize*i;
1043     #if DEBUG
1044     		if (cachep->flags & SLAB_RED_ZONE) {
1045     			*((unsigned long*)(objp)) = RED_MAGIC1;
1046     			*((unsigned long*)(objp + cachep->objsize -
1047     					BYTES_PER_WORD)) = RED_MAGIC1;
1048     			objp += BYTES_PER_WORD;
1049     		}
1050     #endif
1051     
1052     		/*
1053     		 * Constructors are not allowed to allocate memory from
1054     		 * the same cache which they are a constructor for.
1055     		 * Otherwise, deadlock. They must also be threaded.
1056     		 */
1057     		if (cachep->ctor)
1058     			cachep->ctor(objp, cachep, ctor_flags);
1059     #if DEBUG
1060     		if (cachep->flags & SLAB_RED_ZONE)
1061     			objp -= BYTES_PER_WORD;
1062     		if (cachep->flags & SLAB_POISON)
1063     			/* need to poison the objs */
1064     			kmem_poison_obj(cachep, objp);
1065     		if (cachep->flags & SLAB_RED_ZONE) {
1066     			if (*((unsigned long*)(objp)) != RED_MAGIC1)
1067     				BUG();
1068     			if (*((unsigned long*)(objp + cachep->objsize -
1069     					BYTES_PER_WORD)) != RED_MAGIC1)
1070     				BUG();
1071     		}
1072     #endif
1073     		slab_bufctl(slabp)[i] = i+1;
1074     	}
1075     	slab_bufctl(slabp)[i-1] = BUFCTL_END;
1076     	slabp->free = 0;
1077     }
1078     
1079     /*
1080      * Grow (by 1) the number of slabs within a cache.  This is called by
1081      * kmem_cache_alloc() when there are no active objs left in a cache.
1082      */
1083     static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1084     {
1085     	slab_t	*slabp;
1086     	struct page	*page;
1087     	void		*objp;
1088     	size_t		 offset;
1089     	unsigned int	 i, local_flags;
1090     	unsigned long	 ctor_flags;
1091     	unsigned long	 save_flags;
1092     
1093     	/* Be lazy and only check for valid flags here,
1094      	 * keeping it out of the critical path in kmem_cache_alloc().
1095     	 */
1096     	if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1097     		BUG();
1098     	if (flags & SLAB_NO_GROW)
1099     		return 0;
1100     
1101     	/*
1102     	 * The test for missing atomic flag is performed here, rather than
1103     	 * the more obvious place, simply to reduce the critical path length
1104     	 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1105     	 * will eventually be caught here (where it matters).
1106     	 */
1107     	if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
1108     		BUG();
1109     
1110     	ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1111     	local_flags = (flags & SLAB_LEVEL_MASK);
1112     	if (local_flags == SLAB_ATOMIC)
1113     		/*
1114     		 * Not allowed to sleep.  Need to tell a constructor about
1115     		 * this - it might need to know...
1116     		 */
1117     		ctor_flags |= SLAB_CTOR_ATOMIC;
1118     
1119     	/* About to mess with non-constant members - lock. */
1120     	spin_lock_irqsave(&cachep->spinlock, save_flags);
1121     
1122     	/* Get colour for the slab, and cal the next value. */
1123     	offset = cachep->colour_next;
1124     	cachep->colour_next++;
1125     	if (cachep->colour_next >= cachep->colour)
1126     		cachep->colour_next = 0;
1127     	offset *= cachep->colour_off;
1128     	cachep->dflags |= DFLGS_GROWN;
1129     
1130     	cachep->growing++;
1131     	spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1132     
1133     	/* A series of memory allocations for a new slab.
1134     	 * Neither the cache-chain semaphore, or cache-lock, are
1135     	 * held, but the incrementing c_growing prevents this
1136     	 * cache from being reaped or shrunk.
1137     	 * Note: The cache could be selected in for reaping in
1138     	 * kmem_cache_reap(), but when the final test is made the
1139     	 * growing value will be seen.
1140     	 */
1141     
1142     	/* Get mem for the objs. */
1143     	if (!(objp = kmem_getpages(cachep, flags)))
1144     		goto failed;
1145     
1146     	/* Get slab management. */
1147     	if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1148     		goto opps1;
1149     
1150     	/* Nasty!!!!!! I hope this is OK. */
1151     	i = 1 << cachep->gfporder;
1152     	page = virt_to_page(objp);
1153     	do {
1154     		SET_PAGE_CACHE(page, cachep);
1155     		SET_PAGE_SLAB(page, slabp);
1156     		PageSetSlab(page);
1157     		page++;
1158     	} while (--i);
1159     
1160     	kmem_cache_init_objs(cachep, slabp, ctor_flags);
1161     
1162     	spin_lock_irqsave(&cachep->spinlock, save_flags);
1163     	cachep->growing--;
1164     
1165     	/* Make slab active. */
1166     	list_add_tail(&slabp->list, &cachep->slabs_free);
1167     	STATS_INC_GROWN(cachep);
1168     	cachep->failures = 0;
1169     
1170     	spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1171     	return 1;
1172     opps1:
1173     	kmem_freepages(cachep, objp);
1174     failed:
1175     	spin_lock_irqsave(&cachep->spinlock, save_flags);
1176     	cachep->growing--;
1177     	spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1178     	return 0;
1179     }
1180     
1181     /*
1182      * Perform extra freeing checks:
1183      * - detect double free
1184      * - detect bad pointers.
1185      * Called with the cache-lock held.
1186      */
1187     
1188     #if DEBUG
1189     static int kmem_extra_free_checks (kmem_cache_t * cachep,
1190     			slab_t *slabp, void * objp)
1191     {
1192     	int i;
1193     	unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1194     
1195     	if (objnr >= cachep->num)
1196     		BUG();
1197     	if (objp != slabp->s_mem + objnr*cachep->objsize)
1198     		BUG();
1199     
1200     	/* Check slab's freelist to see if this obj is there. */
1201     	for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1202     		if (i == objnr)
1203     			BUG();
1204     	}
1205     	return 0;
1206     }
1207     #endif
1208     
1209     static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1210     {
1211     	if (flags & SLAB_DMA) {
1212     		if (!(cachep->gfpflags & GFP_DMA))
1213     			BUG();
1214     	} else {
1215     		if (cachep->gfpflags & GFP_DMA)
1216     			BUG();
1217     	}
1218     }
1219     
1220     static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1221     						slab_t *slabp)
1222     {
1223     	void *objp;
1224     
1225     	STATS_INC_ALLOCED(cachep);
1226     	STATS_INC_ACTIVE(cachep);
1227     	STATS_SET_HIGH(cachep);
1228     
1229     	/* get obj pointer */
1230     	slabp->inuse++;
1231     	objp = slabp->s_mem + slabp->free*cachep->objsize;
1232     	slabp->free=slab_bufctl(slabp)[slabp->free];
1233     
1234     	if (unlikely(slabp->free == BUFCTL_END)) {
1235     		list_del(&slabp->list);
1236     		list_add(&slabp->list, &cachep->slabs_full);
1237     	}
1238     #if DEBUG
1239     	if (cachep->flags & SLAB_POISON)
1240     		if (kmem_check_poison_obj(cachep, objp))
1241     			BUG();
1242     	if (cachep->flags & SLAB_RED_ZONE) {
1243     		/* Set alloc red-zone, and check old one. */
1244     		if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1245     							 RED_MAGIC1)
1246     			BUG();
1247     		if (xchg((unsigned long *)(objp+cachep->objsize -
1248     			  BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1249     			BUG();
1250     		objp += BYTES_PER_WORD;
1251     	}
1252     #endif
1253     	return objp;
1254     }
1255     
1256     /*
1257      * Returns a ptr to an obj in the given cache.
1258      * caller must guarantee synchronization
1259      * #define for the goto optimization 8-)
1260      */
1261     #define kmem_cache_alloc_one(cachep)				\
1262     ({								\
1263     	struct list_head * slabs_partial, * entry;		\
1264     	slab_t *slabp;						\
1265     								\
1266     	slabs_partial = &(cachep)->slabs_partial;		\
1267     	entry = slabs_partial->next;				\
1268     	if (unlikely(entry == slabs_partial)) {			\
1269     		struct list_head * slabs_free;			\
1270     		slabs_free = &(cachep)->slabs_free;		\
1271     		entry = slabs_free->next;			\
1272     		if (unlikely(entry == slabs_free))		\
1273     			goto alloc_new_slab;			\
1274     		list_del(entry);				\
1275     		list_add(entry, slabs_partial);			\
1276     	}							\
1277     								\
1278     	slabp = list_entry(entry, slab_t, list);		\
1279     	kmem_cache_alloc_one_tail(cachep, slabp);		\
1280     })
1281     
1282     #ifdef CONFIG_SMP
1283     void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
1284     {
1285     	int batchcount = cachep->batchcount;
1286     	cpucache_t* cc = cc_data(cachep);
1287     
1288     	spin_lock(&cachep->spinlock);
1289     	while (batchcount--) {
1290     		struct list_head * slabs_partial, * entry;
1291     		slab_t *slabp;
1292     		/* Get slab alloc is to come from. */
1293     		slabs_partial = &(cachep)->slabs_partial;
1294     		entry = slabs_partial->next;
1295     		if (unlikely(entry == slabs_partial)) {
1296     			struct list_head * slabs_free;
1297     			slabs_free = &(cachep)->slabs_free;
1298     			entry = slabs_free->next;
1299     			if (unlikely(entry == slabs_free))
1300     				break;
1301     			list_del(entry);
1302     			list_add(entry, slabs_partial);
1303     		}
1304     
1305     		slabp = list_entry(entry, slab_t, list);
1306     		cc_entry(cc)[cc->avail++] =
1307     				kmem_cache_alloc_one_tail(cachep, slabp);
1308     	}
1309     	spin_unlock(&cachep->spinlock);
1310     
1311     	if (cc->avail)
1312     		return cc_entry(cc)[--cc->avail];
1313     	return NULL;
1314     }
1315     #endif
1316     
1317     static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1318     {
1319     	unsigned long save_flags;
1320     	void* objp;
1321     
1322     	kmem_cache_alloc_head(cachep, flags);
1323     try_again:
1324     	local_irq_save(save_flags);
1325     #ifdef CONFIG_SMP
1326     	{
1327     		cpucache_t *cc = cc_data(cachep);
1328     
1329     		if (cc) {
1330     			if (cc->avail) {
1331     				STATS_INC_ALLOCHIT(cachep);
1332     				objp = cc_entry(cc)[--cc->avail];
1333     			} else {
1334     				STATS_INC_ALLOCMISS(cachep);
1335     				objp = kmem_cache_alloc_batch(cachep,flags);
1336     				if (!objp)
1337     					goto alloc_new_slab_nolock;
1338     			}
1339     		} else {
1340     			spin_lock(&cachep->spinlock);
1341     			objp = kmem_cache_alloc_one(cachep);
1342     			spin_unlock(&cachep->spinlock);
1343     		}
1344     	}
1345     #else
1346     	objp = kmem_cache_alloc_one(cachep);
1347     #endif
1348     	local_irq_restore(save_flags);
1349     	return objp;
1350     alloc_new_slab:
1351     #ifdef CONFIG_SMP
1352     	spin_unlock(&cachep->spinlock);
1353     alloc_new_slab_nolock:
1354     #endif
1355     	local_irq_restore(save_flags);
1356     	if (kmem_cache_grow(cachep, flags))
1357     		/* Someone may have stolen our objs.  Doesn't matter, we'll
1358     		 * just come back here again.
1359     		 */
1360     		goto try_again;
1361     	return NULL;
1362     }
1363     
1364     /*
1365      * Release an obj back to its cache. If the obj has a constructed
1366      * state, it should be in this state _before_ it is released.
1367      * - caller is responsible for the synchronization
1368      */
1369     
1370     #if DEBUG
1371     # define CHECK_NR(pg)						\
1372     	do {							\
1373     		if (!VALID_PAGE(pg)) {				\
1374     			printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1375     				(unsigned long)objp);		\
1376     			BUG();					\
1377     		} \
1378     	} while (0)
1379     # define CHECK_PAGE(page)					\
1380     	do {							\
1381     		CHECK_NR(page);					\
1382     		if (!PageSlab(page)) {				\
1383     			printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1384     				(unsigned long)objp);		\
1385     			BUG();					\
1386     		}						\
1387     	} while (0)
1388     
1389     #else
1390     # define CHECK_PAGE(pg)	do { } while (0)
1391     #endif
1392     
1393     static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1394     {
1395     	slab_t* slabp;
1396     
1397     	CHECK_PAGE(virt_to_page(objp));
1398     	/* reduces memory footprint
1399     	 *
1400     	if (OPTIMIZE(cachep))
1401     		slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1402     	 else
1403     	 */
1404     	slabp = GET_PAGE_SLAB(virt_to_page(objp));
1405     
1406     #if DEBUG
1407     	if (cachep->flags & SLAB_DEBUG_INITIAL)
1408     		/* Need to call the slab's constructor so the
1409     		 * caller can perform a verify of its state (debugging).
1410     		 * Called without the cache-lock held.
1411     		 */
1412     		cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1413     
1414     	if (cachep->flags & SLAB_RED_ZONE) {
1415     		objp -= BYTES_PER_WORD;
1416     		if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1417     			/* Either write before start, or a double free. */
1418     			BUG();
1419     		if (xchg((unsigned long *)(objp+cachep->objsize -
1420     				BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1421     			/* Either write past end, or a double free. */
1422     			BUG();
1423     	}
1424     	if (cachep->flags & SLAB_POISON)
1425     		kmem_poison_obj(cachep, objp);
1426     	if (kmem_extra_free_checks(cachep, slabp, objp))
1427     		return;
1428     #endif
1429     	{
1430     		unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1431     
1432     		slab_bufctl(slabp)[objnr] = slabp->free;
1433     		slabp->free = objnr;
1434     	}
1435     	STATS_DEC_ACTIVE(cachep);
1436     	
1437     	/* fixup slab chains */
1438     	{
1439     		int inuse = slabp->inuse;
1440     		if (unlikely(!--slabp->inuse)) {
1441     			/* Was partial or full, now empty. */
1442     			list_del(&slabp->list);
1443     			list_add(&slabp->list, &cachep->slabs_free);
1444     		} else if (unlikely(inuse == cachep->num)) {
1445     			/* Was full. */
1446     			list_del(&slabp->list);
1447     			list_add(&slabp->list, &cachep->slabs_partial);
1448     		}
1449     	}
1450     }
1451     
1452     #ifdef CONFIG_SMP
1453     static inline void __free_block (kmem_cache_t* cachep,
1454     							void** objpp, int len)
1455     {
1456     	for ( ; len > 0; len--, objpp++)
1457     		kmem_cache_free_one(cachep, *objpp);
1458     }
1459     
1460     static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1461     {
1462     	spin_lock(&cachep->spinlock);
1463     	__free_block(cachep, objpp, len);
1464     	spin_unlock(&cachep->spinlock);
1465     }
1466     #endif
1467     
1468     /*
1469      * __kmem_cache_free
1470      * called with disabled ints
1471      */
1472     static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1473     {
1474     #ifdef CONFIG_SMP
1475     	cpucache_t *cc = cc_data(cachep);
1476     
1477     	CHECK_PAGE(virt_to_page(objp));
1478     	if (cc) {
1479     		int batchcount;
1480     		if (cc->avail < cc->limit) {
1481     			STATS_INC_FREEHIT(cachep);
1482     			cc_entry(cc)[cc->avail++] = objp;
1483     			return;
1484     		}
1485     		STATS_INC_FREEMISS(cachep);
1486     		batchcount = cachep->batchcount;
1487     		cc->avail -= batchcount;
1488     		free_block(cachep,
1489     					&cc_entry(cc)[cc->avail],batchcount);
1490     		cc_entry(cc)[cc->avail++] = objp;
1491     		return;
1492     	} else {
1493     		free_block(cachep, &objp, 1);
1494     	}
1495     #else
1496     	kmem_cache_free_one(cachep, objp);
1497     #endif
1498     }
1499     
1500     /**
1501      * kmem_cache_alloc - Allocate an object
1502      * @cachep: The cache to allocate from.
1503      * @flags: See kmalloc().
1504      *
1505      * Allocate an object from this cache.  The flags are only relevant
1506      * if the cache has no available objects.
1507      */
1508     void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1509     {
1510     	return __kmem_cache_alloc(cachep, flags);
1511     }
1512     
1513     /**
1514      * kmalloc - allocate memory
1515      * @size: how many bytes of memory are required.
1516      * @flags: the type of memory to allocate.
1517      *
1518      * kmalloc is the normal method of allocating memory
1519      * in the kernel.
1520      *
1521      * The @flags argument may be one of:
1522      *
1523      * %GFP_USER - Allocate memory on behalf of user.  May sleep.
1524      *
1525      * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
1526      *
1527      * %GFP_ATOMIC - Allocation will not sleep.  Use inside interrupt handlers.
1528      *
1529      * Additionally, the %GFP_DMA flag may be set to indicate the memory
1530      * must be suitable for DMA.  This can mean different things on different
1531      * platforms.  For example, on i386, it means that the memory must come
1532      * from the first 16MB.
1533      */
1534     void * kmalloc (size_t size, int flags)
1535     {
1536     	cache_sizes_t *csizep = cache_sizes;
1537     
1538     	for (; csizep->cs_size; csizep++) {
1539     		if (size > csizep->cs_size)
1540     			continue;
1541     		return __kmem_cache_alloc(flags & GFP_DMA ?
1542     			 csizep->cs_dmacachep : csizep->cs_cachep, flags);
1543     	}
1544     	return NULL;
1545     }
1546     
1547     /**
1548      * kmem_cache_free - Deallocate an object
1549      * @cachep: The cache the allocation was from.
1550      * @objp: The previously allocated object.
1551      *
1552      * Free an object which was previously allocated from this
1553      * cache.
1554      */
1555     void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1556     {
1557     	unsigned long flags;
1558     #if DEBUG
1559     	CHECK_PAGE(virt_to_page(objp));
1560     	if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
1561     		BUG();
1562     #endif
1563     
1564     	local_irq_save(flags);
1565     	__kmem_cache_free(cachep, objp);
1566     	local_irq_restore(flags);
1567     }
1568     
1569     /**
1570      * kfree - free previously allocated memory
1571      * @objp: pointer returned by kmalloc.
1572      *
1573      * Don't free memory not originally allocated by kmalloc()
1574      * or you will run into trouble.
1575      */
1576     void kfree (const void *objp)
1577     {
1578     	kmem_cache_t *c;
1579     	unsigned long flags;
1580     
1581     	if (!objp)
1582     		return;
1583     	local_irq_save(flags);
1584     	CHECK_PAGE(virt_to_page(objp));
1585     	c = GET_PAGE_CACHE(virt_to_page(objp));
1586     	__kmem_cache_free(c, (void*)objp);
1587     	local_irq_restore(flags);
1588     }
1589     
1590     kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1591     {
1592     	cache_sizes_t *csizep = cache_sizes;
1593     
1594     	/* This function could be moved to the header file, and
1595     	 * made inline so consumers can quickly determine what
1596     	 * cache pointer they require.
1597     	 */
1598     	for ( ; csizep->cs_size; csizep++) {
1599     		if (size > csizep->cs_size)
1600     			continue;
1601     		break;
1602     	}
1603     	return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1604     }
1605     
1606     #ifdef CONFIG_SMP
1607     
1608     /* called with cache_chain_sem acquired.  */
1609     static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1610     {
1611     	ccupdate_struct_t new;
1612     	int i;
1613     
1614     	/*
1615     	 * These are admin-provided, so we are more graceful.
1616     	 */
1617     	if (limit < 0)
1618     		return -EINVAL;
1619     	if (batchcount < 0)
1620     		return -EINVAL;
1621     	if (batchcount > limit)
1622     		return -EINVAL;
1623     	if (limit != 0 && !batchcount)
1624     		return -EINVAL;
1625     
1626     	memset(&new.new,0,sizeof(new.new));
1627     	if (limit) {
1628     		for (i = 0; i< smp_num_cpus; i++) {
1629     			cpucache_t* ccnew;
1630     
1631     			ccnew = kmalloc(sizeof(void*)*limit+
1632     					sizeof(cpucache_t), GFP_KERNEL);
1633     			if (!ccnew)
1634     				goto oom;
1635     			ccnew->limit = limit;
1636     			ccnew->avail = 0;
1637     			new.new[cpu_logical_map(i)] = ccnew;
1638     		}
1639     	}
1640     	new.cachep = cachep;
1641     	spin_lock_irq(&cachep->spinlock);
1642     	cachep->batchcount = batchcount;
1643     	spin_unlock_irq(&cachep->spinlock);
1644     
1645     	smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
1646     
1647     	for (i = 0; i < smp_num_cpus; i++) {
1648     		cpucache_t* ccold = new.new[cpu_logical_map(i)];
1649     		if (!ccold)
1650     			continue;
1651     		local_irq_disable();
1652     		free_block(cachep, cc_entry(ccold), ccold->avail);
1653     		local_irq_enable();
1654     		kfree(ccold);
1655     	}
1656     	return 0;
1657     oom:
1658     	for (i--; i >= 0; i--)
1659     		kfree(new.new[cpu_logical_map(i)]);
1660     	return -ENOMEM;
1661     }
1662     
1663     static void enable_cpucache (kmem_cache_t *cachep)
1664     {
1665     	int err;
1666     	int limit;
1667     
1668     	/* FIXME: optimize */
1669     	if (cachep->objsize > PAGE_SIZE)
1670     		return;
1671     	if (cachep->objsize > 1024)
1672     		limit = 60;
1673     	else if (cachep->objsize > 256)
1674     		limit = 124;
1675     	else
1676     		limit = 252;
1677     
1678     	err = kmem_tune_cpucache(cachep, limit, limit/2);
1679     	if (err)
1680     		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1681     					cachep->name, -err);
1682     }
1683     
1684     static void enable_all_cpucaches (void)
1685     {
1686     	struct list_head* p;
1687     
1688     	down(&cache_chain_sem);
1689     
1690     	p = &cache_cache.next;
1691     	do {
1692     		kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1693     
1694     		enable_cpucache(cachep);
1695     		p = cachep->next.next;
1696     	} while (p != &cache_cache.next);
1697     
1698     	up(&cache_chain_sem);
1699     }
1700     #endif
1701     
1702     /**
1703      * kmem_cache_reap - Reclaim memory from caches.
1704      * @gfp_mask: the type of memory required.
1705      *
1706      * Called from do_try_to_free_pages() and __alloc_pages()
1707      */
1708     int kmem_cache_reap (int gfp_mask)
1709     {
1710     	slab_t *slabp;
1711     	kmem_cache_t *searchp;
1712     	kmem_cache_t *best_cachep;
1713     	unsigned int best_pages;
1714     	unsigned int best_len;
1715     	unsigned int scan;
1716     	int ret = 0;
1717     
1718     	if (gfp_mask & __GFP_WAIT)
1719     		down(&cache_chain_sem);
1720     	else
1721     		if (down_trylock(&cache_chain_sem))
1722     			return 0;
1723     
1724     	scan = REAP_SCANLEN;
1725     	best_len = 0;
1726     	best_pages = 0;
1727     	best_cachep = NULL;
1728     	searchp = clock_searchp;
1729     	do {
1730     		unsigned int pages;
1731     		struct list_head* p;
1732     		unsigned int full_free;
1733     
1734     		/* It's safe to test this without holding the cache-lock. */
1735     		if (searchp->flags & SLAB_NO_REAP)
1736     			goto next;
1737     		spin_lock_irq(&searchp->spinlock);
1738     		if (searchp->growing)
1739     			goto next_unlock;
1740     		if (searchp->dflags & DFLGS_GROWN) {
1741     			searchp->dflags &= ~DFLGS_GROWN;
1742     			goto next_unlock;
1743     		}
1744     #ifdef CONFIG_SMP
1745     		{
1746     			cpucache_t *cc = cc_data(searchp);
1747     			if (cc && cc->avail) {
1748     				__free_block(searchp, cc_entry(cc), cc->avail);
1749     				cc->avail = 0;
1750     			}
1751     		}
1752     #endif
1753     
1754     		full_free = 0;
1755     		p = searchp->slabs_free.next;
1756     		while (p != &searchp->slabs_free) {
1757     			slabp = list_entry(p, slab_t, list);
1758     #if DEBUG
1759     			if (slabp->inuse)
1760     				BUG();
1761     #endif
1762     			full_free++;
1763     			p = p->next;
1764     		}
1765     
1766     		/*
1767     		 * Try to avoid slabs with constructors and/or
1768     		 * more than one page per slab (as it can be difficult
1769     		 * to get high orders from gfp()).
1770     		 */
1771     		pages = full_free * (1<<searchp->gfporder);
1772     		if (searchp->ctor)
1773     			pages = (pages*4+1)/5;
1774     		if (searchp->gfporder)
1775     			pages = (pages*4+1)/5;
1776     		if (pages > best_pages) {
1777     			best_cachep = searchp;
1778     			best_len = full_free;
1779     			best_pages = pages;
1780     			if (pages >= REAP_PERFECT) {
1781     				clock_searchp = list_entry(searchp->next.next,
1782     							kmem_cache_t,next);
1783     				goto perfect;
1784     			}
1785     		}
1786     next_unlock:
1787     		spin_unlock_irq(&searchp->spinlock);
1788     next:
1789     		searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1790     	} while (--scan && searchp != clock_searchp);
1791     
1792     	clock_searchp = searchp;
1793     
1794     	if (!best_cachep)
1795     		/* couldn't find anything to reap */
1796     		goto out;
1797     
1798     	spin_lock_irq(&best_cachep->spinlock);
1799     perfect:
1800     	/* free only 50% of the free slabs */
1801     	best_len = (best_len + 1)/2;
1802     	for (scan = 0; scan < best_len; scan++) {
1803     		struct list_head *p;
1804     
1805     		if (best_cachep->growing)
1806     			break;
1807     		p = best_cachep->slabs_free.prev;
1808     		if (p == &best_cachep->slabs_free)
1809     			break;
1810     		slabp = list_entry(p,slab_t,list);
1811     #if DEBUG
1812     		if (slabp->inuse)
1813     			BUG();
1814     #endif
1815     		list_del(&slabp->list);
1816     		STATS_INC_REAPED(best_cachep);
1817     
1818     		/* Safe to drop the lock. The slab is no longer linked to the
1819     		 * cache.
1820     		 */
1821     		spin_unlock_irq(&best_cachep->spinlock);
1822     		kmem_slab_destroy(best_cachep, slabp);
1823     		spin_lock_irq(&best_cachep->spinlock);
1824     	}
1825     	spin_unlock_irq(&best_cachep->spinlock);
1826     	ret = scan * (1 << best_cachep->gfporder);
1827     out:
1828     	up(&cache_chain_sem);
1829     	return ret;
1830     }
1831     
1832     #ifdef CONFIG_PROC_FS
1833     /* /proc/slabinfo
1834      *	cache-name num-active-objs total-objs
1835      *	obj-size num-active-slabs total-slabs
1836      *	num-pages-per-slab
1837      */
1838     #define FIXUP(t)				\
1839     	do {					\
1840     		if (len <= off) {		\
1841     			off -= len;		\
1842     			len = 0;		\
1843     		} else {			\
1844     			if (len-off > count)	\
1845     				goto t;		\
1846     		}				\
1847     	} while (0)
1848     
1849     static int proc_getdata (char*page, char**start, off_t off, int count)
1850     {
1851     	struct list_head *p;
1852     	int len = 0;
1853     
1854     	/* Output format version, so at least we can change it without _too_
1855     	 * many complaints.
1856     	 */
1857     	len += sprintf(page+len, "slabinfo - version: 1.1"
1858     #if STATS
1859     				" (statistics)"
1860     #endif
1861     #ifdef CONFIG_SMP
1862     				" (SMP)"
1863     #endif
1864     				"\n");
1865     	FIXUP(got_data);
1866     
1867     	down(&cache_chain_sem);
1868     	p = &cache_cache.next;
1869     	do {
1870     		kmem_cache_t	*cachep;
1871     		struct list_head *q;
1872     		slab_t		*slabp;
1873     		unsigned long	active_objs;
1874     		unsigned long	num_objs;
1875     		unsigned long	active_slabs = 0;
1876     		unsigned long	num_slabs;
1877     		cachep = list_entry(p, kmem_cache_t, next);
1878     
1879     		spin_lock_irq(&cachep->spinlock);
1880     		active_objs = 0;
1881     		num_slabs = 0;
1882     		list_for_each(q,&cachep->slabs_full) {
1883     			slabp = list_entry(q, slab_t, list);
1884     			if (slabp->inuse != cachep->num)
1885     				BUG();
1886     			active_objs += cachep->num;
1887     			active_slabs++;
1888     		}
1889     		list_for_each(q,&cachep->slabs_partial) {
1890     			slabp = list_entry(q, slab_t, list);
1891     			if (slabp->inuse == cachep->num || !slabp->inuse)
1892     				BUG();
1893     			active_objs += slabp->inuse;
1894     			active_slabs++;
1895     		}
1896     		list_for_each(q,&cachep->slabs_free) {
1897     			slabp = list_entry(q, slab_t, list);
1898     			if (slabp->inuse)
1899     				BUG();
1900     			num_slabs++;
1901     		}
1902     		num_slabs+=active_slabs;
1903     		num_objs = num_slabs*cachep->num;
1904     
1905     		len += sprintf(page+len, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1906     			cachep->name, active_objs, num_objs, cachep->objsize,
1907     			active_slabs, num_slabs, (1<<cachep->gfporder));
1908     
1909     #if STATS
1910     		{
1911     			unsigned long errors = cachep->errors;
1912     			unsigned long high = cachep->high_mark;
1913     			unsigned long grown = cachep->grown;
1914     			unsigned long reaped = cachep->reaped;
1915     			unsigned long allocs = cachep->num_allocations;
1916     
1917     			len += sprintf(page+len, " : %6lu %7lu %5lu %4lu %4lu",
1918     					high, allocs, grown, reaped, errors);
1919     		}
1920     #endif
1921     #ifdef CONFIG_SMP
1922     		{
1923     			unsigned int batchcount = cachep->batchcount;
1924     			unsigned int limit;
1925     
1926     			if (cc_data(cachep))
1927     				limit = cc_data(cachep)->limit;
1928     			 else
1929     				limit = 0;
1930     			len += sprintf(page+len, " : %4u %4u",
1931     					limit, batchcount);
1932     		}
1933     #endif
1934     #if STATS && defined(CONFIG_SMP)
1935     		{
1936     			unsigned long allochit = atomic_read(&cachep->allochit);
1937     			unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1938     			unsigned long freehit = atomic_read(&cachep->freehit);
1939     			unsigned long freemiss = atomic_read(&cachep->freemiss);
1940     			len += sprintf(page+len, " : %6lu %6lu %6lu %6lu",
1941     					allochit, allocmiss, freehit, freemiss);
1942     		}
1943     #endif
1944     		len += sprintf(page+len,"\n");
1945     		spin_unlock_irq(&cachep->spinlock);
1946     		FIXUP(got_data_up);
1947     		p = cachep->next.next;
1948     	} while (p != &cache_cache.next);
1949     got_data_up:
1950     	up(&cache_chain_sem);
1951     
1952     got_data:
1953     	*start = page+off;
1954     	return len;
1955     }
1956     
1957     /**
1958      * slabinfo_read_proc - generates /proc/slabinfo
1959      * @page: scratch area, one page long
1960      * @start: pointer to the pointer to the output buffer
1961      * @off: offset within /proc/slabinfo the caller is interested in
1962      * @count: requested len in bytes
1963      * @eof: eof marker
1964      * @data: unused
1965      *
1966      * The contents of the buffer are
1967      * cache-name
1968      * num-active-objs
1969      * total-objs
1970      * object size
1971      * num-active-slabs
1972      * total-slabs
1973      * num-pages-per-slab
1974      * + further values on SMP and with statistics enabled
1975      */
1976     int slabinfo_read_proc (char *page, char **start, off_t off,
1977     				 int count, int *eof, void *data)
1978     {
1979     	int len = proc_getdata(page, start, off, count);
1980     	len -= (*start-page);
1981     	if (len <= count)
1982     		*eof = 1;
1983     	if (len>count) len = count;
1984     	if (len<0) len = 0;
1985     	return len;
1986     }
1987     
1988     #define MAX_SLABINFO_WRITE 128
1989     /**
1990      * slabinfo_write_proc - SMP tuning for the slab allocator
1991      * @file: unused
1992      * @buffer: user buffer
1993      * @count: data len
1994      * @data: unused
1995      */
1996     int slabinfo_write_proc (struct file *file, const char *buffer,
1997     				unsigned long count, void *data)
1998     {
1999     #ifdef CONFIG_SMP
2000     	char kbuf[MAX_SLABINFO_WRITE+1], *tmp;
2001     	int limit, batchcount, res;
2002     	struct list_head *p;
2003     	
2004     	if (count > MAX_SLABINFO_WRITE)
2005     		return -EINVAL;
2006     	if (copy_from_user(&kbuf, buffer, count))
2007     		return -EFAULT;
2008     	kbuf[MAX_SLABINFO_WRITE] = '\0'; 
2009     
2010     	tmp = strchr(kbuf, ' ');
2011     	if (!tmp)
2012     		return -EINVAL;
2013     	*tmp = '\0';
2014     	tmp++;
2015     	limit = simple_strtol(tmp, &tmp, 10);
2016     	while (*tmp == ' ')
2017     		tmp++;
2018     	batchcount = simple_strtol(tmp, &tmp, 10);
2019     
2020     	/* Find the cache in the chain of caches. */
2021     	down(&cache_chain_sem);
2022     	res = -EINVAL;
2023     	list_for_each(p,&cache_chain) {
2024     		kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2025     
2026     		if (!strcmp(cachep->name, kbuf)) {
2027     			res = kmem_tune_cpucache(cachep, limit, batchcount);
2028     			break;
2029     		}
2030     	}
2031     	up(&cache_chain_sem);
2032     	if (res >= 0)
2033     		res = count;
2034     	return res;
2035     #else
2036     	return -EINVAL;
2037     #endif
2038     }
2039     #endif
2040