File: /usr/src/linux/fs/dcache.c

1     /*
2      * fs/dcache.c
3      *
4      * Complete reimplementation
5      * (C) 1997 Thomas Schoebel-Theuer,
6      * with heavy changes by Linus Torvalds
7      */
8     
9     /*
10      * Notes on the allocation strategy:
11      *
12      * The dcache is a master of the icache - whenever a dcache entry
13      * exists, the inode will always exist. "iput()" is done either when
14      * the dcache entry is deleted or garbage collected.
15      */
16     
17     #include <linux/config.h>
18     #include <linux/string.h>
19     #include <linux/mm.h>
20     #include <linux/fs.h>
21     #include <linux/slab.h>
22     #include <linux/init.h>
23     #include <linux/smp_lock.h>
24     #include <linux/cache.h>
25     #include <linux/module.h>
26     
27     #include <asm/uaccess.h>
28     
29     #define DCACHE_PARANOIA 1
30     /* #define DCACHE_DEBUG 1 */
31     
32     spinlock_t dcache_lock = SPIN_LOCK_UNLOCKED;
33     
34     /* Right now the dcache depends on the kernel lock */
35     #define check_lock()	if (!kernel_locked()) BUG()
36     
37     static kmem_cache_t *dentry_cache; 
38     
39     /*
40      * This is the single most critical data structure when it comes
41      * to the dcache: the hashtable for lookups. Somebody should try
42      * to make this good - I've just made it work.
43      *
44      * This hash-function tries to avoid losing too many bits of hash
45      * information, yet avoid using a prime hash-size or similar.
46      */
47     #define D_HASHBITS     d_hash_shift
48     #define D_HASHMASK     d_hash_mask
49     
50     static unsigned int d_hash_mask;
51     static unsigned int d_hash_shift;
52     static struct list_head *dentry_hashtable;
53     static LIST_HEAD(dentry_unused);
54     
55     /* Statistics gathering. */
56     struct dentry_stat_t dentry_stat = {0, 0, 45, 0,};
57     
58     /* no dcache_lock, please */
59     static inline void d_free(struct dentry *dentry)
60     {
61     	if (dentry->d_op && dentry->d_op->d_release)
62     		dentry->d_op->d_release(dentry);
63     	if (dname_external(dentry)) 
64     		kfree(dentry->d_name.name);
65     	kmem_cache_free(dentry_cache, dentry); 
66     	dentry_stat.nr_dentry--;
67     }
68     
69     /*
70      * Release the dentry's inode, using the fileystem
71      * d_iput() operation if defined.
72      * Called with dcache_lock held, drops it.
73      */
74     static inline void dentry_iput(struct dentry * dentry)
75     {
76     	struct inode *inode = dentry->d_inode;
77     	if (inode) {
78     		dentry->d_inode = NULL;
79     		list_del_init(&dentry->d_alias);
80     		spin_unlock(&dcache_lock);
81     		if (dentry->d_op && dentry->d_op->d_iput)
82     			dentry->d_op->d_iput(dentry, inode);
83     		else
84     			iput(inode);
85     	} else
86     		spin_unlock(&dcache_lock);
87     }
88     
89     /* 
90      * This is dput
91      *
92      * This is complicated by the fact that we do not want to put
93      * dentries that are no longer on any hash chain on the unused
94      * list: we'd much rather just get rid of them immediately.
95      *
96      * However, that implies that we have to traverse the dentry
97      * tree upwards to the parents which might _also_ now be
98      * scheduled for deletion (it may have been only waiting for
99      * its last child to go away).
100      *
101      * This tail recursion is done by hand as we don't want to depend
102      * on the compiler to always get this right (gcc generally doesn't).
103      * Real recursion would eat up our stack space.
104      */
105     
106     /*
107      * dput - release a dentry
108      * @dentry: dentry to release 
109      *
110      * Release a dentry. This will drop the usage count and if appropriate
111      * call the dentry unlink method as well as removing it from the queues and
112      * releasing its resources. If the parent dentries were scheduled for release
113      * they too may now get deleted.
114      *
115      * no dcache lock, please.
116      */
117     
118     void dput(struct dentry *dentry)
119     {
120     	if (!dentry)
121     		return;
122     
123     repeat:
124     	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
125     		return;
126     
127     	/* dput on a free dentry? */
128     	if (!list_empty(&dentry->d_lru))
129     		BUG();
130     	/*
131     	 * AV: ->d_delete() is _NOT_ allowed to block now.
132     	 */
133     	if (dentry->d_op && dentry->d_op->d_delete) {
134     		if (dentry->d_op->d_delete(dentry))
135     			goto unhash_it;
136     	}
137     	/* Unreachable? Get rid of it */
138     	if (list_empty(&dentry->d_hash))
139     		goto kill_it;
140     	list_add(&dentry->d_lru, &dentry_unused);
141     	dentry_stat.nr_unused++;
142     	spin_unlock(&dcache_lock);
143     	return;
144     
145     unhash_it:
146     	list_del_init(&dentry->d_hash);
147     
148     kill_it: {
149     		struct dentry *parent;
150     		list_del(&dentry->d_child);
151     		/* drops the lock, at that point nobody can reach this dentry */
152     		dentry_iput(dentry);
153     		parent = dentry->d_parent;
154     		d_free(dentry);
155     		if (dentry == parent)
156     			return;
157     		dentry = parent;
158     		goto repeat;
159     	}
160     }
161     
162     /**
163      * d_invalidate - invalidate a dentry
164      * @dentry: dentry to invalidate
165      *
166      * Try to invalidate the dentry if it turns out to be
167      * possible. If there are other dentries that can be
168      * reached through this one we can't delete it and we
169      * return -EBUSY. On success we return 0.
170      *
171      * no dcache lock.
172      */
173      
174     int d_invalidate(struct dentry * dentry)
175     {
176     	/*
177     	 * If it's already been dropped, return OK.
178     	 */
179     	spin_lock(&dcache_lock);
180     	if (list_empty(&dentry->d_hash)) {
181     		spin_unlock(&dcache_lock);
182     		return 0;
183     	}
184     	/*
185     	 * Check whether to do a partial shrink_dcache
186     	 * to get rid of unused child entries.
187     	 */
188     	if (!list_empty(&dentry->d_subdirs)) {
189     		spin_unlock(&dcache_lock);
190     		shrink_dcache_parent(dentry);
191     		spin_lock(&dcache_lock);
192     	}
193     
194     	/*
195     	 * Somebody else still using it?
196     	 *
197     	 * If it's a directory, we can't drop it
198     	 * for fear of somebody re-populating it
199     	 * with children (even though dropping it
200     	 * would make it unreachable from the root,
201     	 * we might still populate it if it was a
202     	 * working directory or similar).
203     	 */
204     	if (atomic_read(&dentry->d_count) > 1) {
205     		if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
206     			spin_unlock(&dcache_lock);
207     			return -EBUSY;
208     		}
209     	}
210     
211     	list_del_init(&dentry->d_hash);
212     	spin_unlock(&dcache_lock);
213     	return 0;
214     }
215     
216     /* This should be called _only_ with dcache_lock held */
217     
218     static inline struct dentry * __dget_locked(struct dentry *dentry)
219     {
220     	atomic_inc(&dentry->d_count);
221     	if (atomic_read(&dentry->d_count) == 1) {
222     		dentry_stat.nr_unused--;
223     		list_del_init(&dentry->d_lru);
224     	}
225     	return dentry;
226     }
227     
228     struct dentry * dget_locked(struct dentry *dentry)
229     {
230     	return __dget_locked(dentry);
231     }
232     
233     /**
234      * d_find_alias - grab a hashed alias of inode
235      * @inode: inode in question
236      *
237      * If inode has a hashed alias - acquire the reference to alias and
238      * return it. Otherwise return NULL. Notice that if inode is a directory
239      * there can be only one alias and it can be unhashed only if it has
240      * no children.
241      */
242     
243     struct dentry * d_find_alias(struct inode *inode)
244     {
245     	struct list_head *head, *next, *tmp;
246     	struct dentry *alias;
247     
248     	spin_lock(&dcache_lock);
249     	head = &inode->i_dentry;
250     	next = inode->i_dentry.next;
251     	while (next != head) {
252     		tmp = next;
253     		next = tmp->next;
254     		alias = list_entry(tmp, struct dentry, d_alias);
255     		if (!list_empty(&alias->d_hash)) {
256     			__dget_locked(alias);
257     			spin_unlock(&dcache_lock);
258     			return alias;
259     		}
260     	}
261     	spin_unlock(&dcache_lock);
262     	return NULL;
263     }
264     
265     /*
266      *	Try to kill dentries associated with this inode.
267      * WARNING: you must own a reference to inode.
268      */
269     void d_prune_aliases(struct inode *inode)
270     {
271     	struct list_head *tmp, *head = &inode->i_dentry;
272     restart:
273     	spin_lock(&dcache_lock);
274     	tmp = head;
275     	while ((tmp = tmp->next) != head) {
276     		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
277     		if (!atomic_read(&dentry->d_count)) {
278     			__dget_locked(dentry);
279     			spin_unlock(&dcache_lock);
280     			d_drop(dentry);
281     			dput(dentry);
282     			goto restart;
283     		}
284     	}
285     	spin_unlock(&dcache_lock);
286     }
287     
288     /*
289      * Throw away a dentry - free the inode, dput the parent.
290      * This requires that the LRU list has already been
291      * removed.
292      * Called with dcache_lock, drops it and then regains.
293      */
294     static inline void prune_one_dentry(struct dentry * dentry)
295     {
296     	struct dentry * parent;
297     
298     	list_del_init(&dentry->d_hash);
299     	list_del(&dentry->d_child);
300     	dentry_iput(dentry);
301     	parent = dentry->d_parent;
302     	d_free(dentry);
303     	if (parent != dentry)
304     		dput(parent);
305     	spin_lock(&dcache_lock);
306     }
307     
308     /**
309      * prune_dcache - shrink the dcache
310      * @count: number of entries to try and free
311      *
312      * Shrink the dcache. This is done when we need
313      * more memory, or simply when we need to unmount
314      * something (at which point we need to unuse
315      * all dentries).
316      *
317      * This function may fail to free any resources if
318      * all the dentries are in use.
319      */
320      
321     void prune_dcache(int count)
322     {
323     	spin_lock(&dcache_lock);
324     	for (;;) {
325     		struct dentry *dentry;
326     		struct list_head *tmp;
327     
328     		tmp = dentry_unused.prev;
329     
330     		if (tmp == &dentry_unused)
331     			break;
332     		list_del_init(tmp);
333     		dentry = list_entry(tmp, struct dentry, d_lru);
334     
335     		/* If the dentry was recently referenced, don't free it. */
336     		if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
337     			dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
338     			list_add(&dentry->d_lru, &dentry_unused);
339     			continue;
340     		}
341     		dentry_stat.nr_unused--;
342     
343     		/* Unused dentry with a count? */
344     		if (atomic_read(&dentry->d_count))
345     			BUG();
346     
347     		prune_one_dentry(dentry);
348     		if (!--count)
349     			break;
350     	}
351     	spin_unlock(&dcache_lock);
352     }
353     
354     /*
355      * Shrink the dcache for the specified super block.
356      * This allows us to unmount a device without disturbing
357      * the dcache for the other devices.
358      *
359      * This implementation makes just two traversals of the
360      * unused list.  On the first pass we move the selected
361      * dentries to the most recent end, and on the second
362      * pass we free them.  The second pass must restart after
363      * each dput(), but since the target dentries are all at
364      * the end, it's really just a single traversal.
365      */
366     
367     /**
368      * shrink_dcache_sb - shrink dcache for a superblock
369      * @sb: superblock
370      *
371      * Shrink the dcache for the specified super block. This
372      * is used to free the dcache before unmounting a file
373      * system
374      */
375     
376     void shrink_dcache_sb(struct super_block * sb)
377     {
378     	struct list_head *tmp, *next;
379     	struct dentry *dentry;
380     
381     	/*
382     	 * Pass one ... move the dentries for the specified
383     	 * superblock to the most recent end of the unused list.
384     	 */
385     	spin_lock(&dcache_lock);
386     	next = dentry_unused.next;
387     	while (next != &dentry_unused) {
388     		tmp = next;
389     		next = tmp->next;
390     		dentry = list_entry(tmp, struct dentry, d_lru);
391     		if (dentry->d_sb != sb)
392     			continue;
393     		list_del(tmp);
394     		list_add(tmp, &dentry_unused);
395     	}
396     
397     	/*
398     	 * Pass two ... free the dentries for this superblock.
399     	 */
400     repeat:
401     	next = dentry_unused.next;
402     	while (next != &dentry_unused) {
403     		tmp = next;
404     		next = tmp->next;
405     		dentry = list_entry(tmp, struct dentry, d_lru);
406     		if (dentry->d_sb != sb)
407     			continue;
408     		if (atomic_read(&dentry->d_count))
409     			continue;
410     		dentry_stat.nr_unused--;
411     		list_del_init(tmp);
412     		prune_one_dentry(dentry);
413     		goto repeat;
414     	}
415     	spin_unlock(&dcache_lock);
416     }
417     
418     /*
419      * Search for at least 1 mount point in the dentry's subdirs.
420      * We descend to the next level whenever the d_subdirs
421      * list is non-empty and continue searching.
422      */
423      
424     /**
425      * have_submounts - check for mounts over a dentry
426      * @parent: dentry to check.
427      *
428      * Return true if the parent or its subdirectories contain
429      * a mount point
430      */
431      
432     int have_submounts(struct dentry *parent)
433     {
434     	struct dentry *this_parent = parent;
435     	struct list_head *next;
436     
437     	spin_lock(&dcache_lock);
438     	if (d_mountpoint(parent))
439     		goto positive;
440     repeat:
441     	next = this_parent->d_subdirs.next;
442     resume:
443     	while (next != &this_parent->d_subdirs) {
444     		struct list_head *tmp = next;
445     		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
446     		next = tmp->next;
447     		/* Have we found a mount point ? */
448     		if (d_mountpoint(dentry))
449     			goto positive;
450     		if (!list_empty(&dentry->d_subdirs)) {
451     			this_parent = dentry;
452     			goto repeat;
453     		}
454     	}
455     	/*
456     	 * All done at this level ... ascend and resume the search.
457     	 */
458     	if (this_parent != parent) {
459     		next = this_parent->d_child.next; 
460     		this_parent = this_parent->d_parent;
461     		goto resume;
462     	}
463     	spin_unlock(&dcache_lock);
464     	return 0; /* No mount points found in tree */
465     positive:
466     	spin_unlock(&dcache_lock);
467     	return 1;
468     }
469     
470     /*
471      * Search the dentry child list for the specified parent,
472      * and move any unused dentries to the end of the unused
473      * list for prune_dcache(). We descend to the next level
474      * whenever the d_subdirs list is non-empty and continue
475      * searching.
476      */
477     static int select_parent(struct dentry * parent)
478     {
479     	struct dentry *this_parent = parent;
480     	struct list_head *next;
481     	int found = 0;
482     
483     	spin_lock(&dcache_lock);
484     repeat:
485     	next = this_parent->d_subdirs.next;
486     resume:
487     	while (next != &this_parent->d_subdirs) {
488     		struct list_head *tmp = next;
489     		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
490     		next = tmp->next;
491     		if (!atomic_read(&dentry->d_count)) {
492     			list_del(&dentry->d_lru);
493     			list_add(&dentry->d_lru, dentry_unused.prev);
494     			found++;
495     		}
496     		/*
497     		 * Descend a level if the d_subdirs list is non-empty.
498     		 */
499     		if (!list_empty(&dentry->d_subdirs)) {
500     			this_parent = dentry;
501     #ifdef DCACHE_DEBUG
502     printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
503     dentry->d_parent->d_name.name, dentry->d_name.name, found);
504     #endif
505     			goto repeat;
506     		}
507     	}
508     	/*
509     	 * All done at this level ... ascend and resume the search.
510     	 */
511     	if (this_parent != parent) {
512     		next = this_parent->d_child.next; 
513     		this_parent = this_parent->d_parent;
514     #ifdef DCACHE_DEBUG
515     printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
516     this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
517     #endif
518     		goto resume;
519     	}
520     	spin_unlock(&dcache_lock);
521     	return found;
522     }
523     
524     /**
525      * shrink_dcache_parent - prune dcache
526      * @parent: parent of entries to prune
527      *
528      * Prune the dcache to remove unused children of the parent dentry.
529      */
530      
531     void shrink_dcache_parent(struct dentry * parent)
532     {
533     	int found;
534     
535     	while ((found = select_parent(parent)) != 0)
536     		prune_dcache(found);
537     }
538     
539     /*
540      * This is called from kswapd when we think we need some
541      * more memory, but aren't really sure how much. So we
542      * carefully try to free a _bit_ of our dcache, but not
543      * too much.
544      *
545      * Priority:
546      *   0 - very urgent: shrink everything
547      *  ...
548      *   6 - base-level: try to shrink a bit.
549      */
550     int shrink_dcache_memory(int priority, unsigned int gfp_mask)
551     {
552     	int count = 0;
553     
554     	/*
555     	 * Nasty deadlock avoidance.
556     	 *
557     	 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
558     	 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->
559     	 * put_inode->ext2_discard_prealloc->ext2_free_blocks->lock_super->
560     	 * DEADLOCK.
561     	 *
562     	 * We should make sure we don't hold the superblock lock over
563     	 * block allocations, but for now:
564     	 */
565     	if (!(gfp_mask & __GFP_FS))
566     		return 0;
567     
568     	count = dentry_stat.nr_unused / priority;
569     
570     	prune_dcache(count);
571     	kmem_cache_shrink(dentry_cache);
572     	return 0;
573     }
574     
575     #define NAME_ALLOC_LEN(len)	((len+16) & ~15)
576     
577     /**
578      * d_alloc	-	allocate a dcache entry
579      * @parent: parent of entry to allocate
580      * @name: qstr of the name
581      *
582      * Allocates a dentry. It returns %NULL if there is insufficient memory
583      * available. On a success the dentry is returned. The name passed in is
584      * copied and the copy passed in may be reused after this call.
585      */
586      
587     struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
588     {
589     	char * str;
590     	struct dentry *dentry;
591     
592     	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
593     	if (!dentry)
594     		return NULL;
595     
596     	if (name->len > DNAME_INLINE_LEN-1) {
597     		str = kmalloc(NAME_ALLOC_LEN(name->len), GFP_KERNEL);
598     		if (!str) {
599     			kmem_cache_free(dentry_cache, dentry); 
600     			return NULL;
601     		}
602     	} else
603     		str = dentry->d_iname; 
604     
605     	memcpy(str, name->name, name->len);
606     	str[name->len] = 0;
607     
608     	atomic_set(&dentry->d_count, 1);
609     	dentry->d_vfs_flags = 0;
610     	dentry->d_flags = 0;
611     	dentry->d_inode = NULL;
612     	dentry->d_parent = NULL;
613     	dentry->d_sb = NULL;
614     	dentry->d_name.name = str;
615     	dentry->d_name.len = name->len;
616     	dentry->d_name.hash = name->hash;
617     	dentry->d_op = NULL;
618     	dentry->d_fsdata = NULL;
619     	dentry->d_mounted = 0;
620     	INIT_LIST_HEAD(&dentry->d_hash);
621     	INIT_LIST_HEAD(&dentry->d_lru);
622     	INIT_LIST_HEAD(&dentry->d_subdirs);
623     	INIT_LIST_HEAD(&dentry->d_alias);
624     	if (parent) {
625     		dentry->d_parent = dget(parent);
626     		dentry->d_sb = parent->d_sb;
627     		spin_lock(&dcache_lock);
628     		list_add(&dentry->d_child, &parent->d_subdirs);
629     		spin_unlock(&dcache_lock);
630     	} else
631     		INIT_LIST_HEAD(&dentry->d_child);
632     
633     	dentry_stat.nr_dentry++;
634     	return dentry;
635     }
636     
637     /**
638      * d_instantiate - fill in inode information for a dentry
639      * @entry: dentry to complete
640      * @inode: inode to attach to this dentry
641      *
642      * Fill in inode information in the entry.
643      *
644      * This turns negative dentries into productive full members
645      * of society.
646      *
647      * NOTE! This assumes that the inode count has been incremented
648      * (or otherwise set) by the caller to indicate that it is now
649      * in use by the dcache.
650      */
651      
652     void d_instantiate(struct dentry *entry, struct inode * inode)
653     {
654     	if (!list_empty(&entry->d_alias)) BUG();
655     	spin_lock(&dcache_lock);
656     	if (inode)
657     		list_add(&entry->d_alias, &inode->i_dentry);
658     	entry->d_inode = inode;
659     	spin_unlock(&dcache_lock);
660     }
661     
662     /**
663      * d_alloc_root - allocate root dentry
664      * @root_inode: inode to allocate the root for
665      *
666      * Allocate a root ("/") dentry for the inode given. The inode is
667      * instantiated and returned. %NULL is returned if there is insufficient
668      * memory or the inode passed is %NULL.
669      */
670      
671     struct dentry * d_alloc_root(struct inode * root_inode)
672     {
673     	struct dentry *res = NULL;
674     
675     	if (root_inode) {
676     		res = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 });
677     		if (res) {
678     			res->d_sb = root_inode->i_sb;
679     			res->d_parent = res;
680     			d_instantiate(res, root_inode);
681     		}
682     	}
683     	return res;
684     }
685     
686     static inline struct list_head * d_hash(struct dentry * parent, unsigned long hash)
687     {
688     	hash += (unsigned long) parent / L1_CACHE_BYTES;
689     	hash = hash ^ (hash >> D_HASHBITS);
690     	return dentry_hashtable + (hash & D_HASHMASK);
691     }
692     
693     /**
694      * d_lookup - search for a dentry
695      * @parent: parent dentry
696      * @name: qstr of name we wish to find
697      *
698      * Searches the children of the parent dentry for the name in question. If
699      * the dentry is found its reference count is incremented and the dentry
700      * is returned. The caller must use d_put to free the entry when it has
701      * finished using it. %NULL is returned on failure.
702      */
703      
704     struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
705     {
706     	unsigned int len = name->len;
707     	unsigned int hash = name->hash;
708     	const unsigned char *str = name->name;
709     	struct list_head *head = d_hash(parent,hash);
710     	struct list_head *tmp;
711     
712     	spin_lock(&dcache_lock);
713     	tmp = head->next;
714     	for (;;) {
715     		struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
716     		if (tmp == head)
717     			break;
718     		tmp = tmp->next;
719     		if (dentry->d_name.hash != hash)
720     			continue;
721     		if (dentry->d_parent != parent)
722     			continue;
723     		if (parent->d_op && parent->d_op->d_compare) {
724     			if (parent->d_op->d_compare(parent, &dentry->d_name, name))
725     				continue;
726     		} else {
727     			if (dentry->d_name.len != len)
728     				continue;
729     			if (memcmp(dentry->d_name.name, str, len))
730     				continue;
731     		}
732     		__dget_locked(dentry);
733     		dentry->d_vfs_flags |= DCACHE_REFERENCED;
734     		spin_unlock(&dcache_lock);
735     		return dentry;
736     	}
737     	spin_unlock(&dcache_lock);
738     	return NULL;
739     }
740     
741     /**
742      * d_validate - verify dentry provided from insecure source
743      * @dentry: The dentry alleged to be valid child of @dparent
744      * @dparent: The parent dentry (known to be valid)
745      * @hash: Hash of the dentry
746      * @len: Length of the name
747      *
748      * An insecure source has sent us a dentry, here we verify it and dget() it.
749      * This is used by ncpfs in its readdir implementation.
750      * Zero is returned in the dentry is invalid.
751      */
752      
753     int d_validate(struct dentry *dentry, struct dentry *dparent)
754     {
755     	unsigned long dent_addr = (unsigned long) dentry;
756     	unsigned long min_addr = PAGE_OFFSET;
757     	unsigned long align_mask = 0x0F;
758     	struct list_head *base, *lhp;
759     
760     	if (dent_addr < min_addr)
761     		goto out;
762     	if (dent_addr > (unsigned long)high_memory - sizeof(struct dentry))
763     		goto out;
764     	if (dent_addr & align_mask)
765     		goto out;
766     	if ((!kern_addr_valid(dent_addr)) || (!kern_addr_valid(dent_addr -1 +
767     						sizeof(struct dentry))))
768     		goto out;
769     
770     	if (dentry->d_parent != dparent)
771     		goto out;
772     
773     	spin_lock(&dcache_lock);
774     	lhp = base = d_hash(dparent, dentry->d_name.hash);
775     	while ((lhp = lhp->next) != base) {
776     		if (dentry == list_entry(lhp, struct dentry, d_hash)) {
777     			__dget_locked(dentry);
778     			spin_unlock(&dcache_lock);
779     			return 1;
780     		}
781     	}
782     	spin_unlock(&dcache_lock);
783     out:
784     	return 0;
785     }
786     
787     /*
788      * When a file is deleted, we have two options:
789      * - turn this dentry into a negative dentry
790      * - unhash this dentry and free it.
791      *
792      * Usually, we want to just turn this into
793      * a negative dentry, but if anybody else is
794      * currently using the dentry or the inode
795      * we can't do that and we fall back on removing
796      * it from the hash queues and waiting for
797      * it to be deleted later when it has no users
798      */
799      
800     /**
801      * d_delete - delete a dentry
802      * @dentry: The dentry to delete
803      *
804      * Turn the dentry into a negative dentry if possible, otherwise
805      * remove it from the hash queues so it can be deleted later
806      */
807      
808     void d_delete(struct dentry * dentry)
809     {
810     	/*
811     	 * Are we the only user?
812     	 */
813     	spin_lock(&dcache_lock);
814     	if (atomic_read(&dentry->d_count) == 1) {
815     		dentry_iput(dentry);
816     		return;
817     	}
818     	spin_unlock(&dcache_lock);
819     
820     	/*
821     	 * If not, just drop the dentry and let dput
822     	 * pick up the tab..
823     	 */
824     	d_drop(dentry);
825     }
826     
827     /**
828      * d_rehash	- add an entry back to the hash
829      * @entry: dentry to add to the hash
830      *
831      * Adds a dentry to the hash according to its name.
832      */
833      
834     void d_rehash(struct dentry * entry)
835     {
836     	struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
837     	if (!list_empty(&entry->d_hash)) BUG();
838     	spin_lock(&dcache_lock);
839     	list_add(&entry->d_hash, list);
840     	spin_unlock(&dcache_lock);
841     }
842     
843     #define do_switch(x,y) do { \
844     	__typeof__ (x) __tmp = x; \
845     	x = y; y = __tmp; } while (0)
846     
847     /*
848      * When switching names, the actual string doesn't strictly have to
849      * be preserved in the target - because we're dropping the target
850      * anyway. As such, we can just do a simple memcpy() to copy over
851      * the new name before we switch.
852      *
853      * Note that we have to be a lot more careful about getting the hash
854      * switched - we have to switch the hash value properly even if it
855      * then no longer matches the actual (corrupted) string of the target.
856      * The hash value has to match the hash queue that the dentry is on..
857      */
858     static inline void switch_names(struct dentry * dentry, struct dentry * target)
859     {
860     	const unsigned char *old_name, *new_name;
861     
862     	check_lock();
863     	memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); 
864     	old_name = target->d_name.name;
865     	new_name = dentry->d_name.name;
866     	if (old_name == target->d_iname)
867     		old_name = dentry->d_iname;
868     	if (new_name == dentry->d_iname)
869     		new_name = target->d_iname;
870     	target->d_name.name = new_name;
871     	dentry->d_name.name = old_name;
872     }
873     
874     /*
875      * We cannibalize "target" when moving dentry on top of it,
876      * because it's going to be thrown away anyway. We could be more
877      * polite about it, though.
878      *
879      * This forceful removal will result in ugly /proc output if
880      * somebody holds a file open that got deleted due to a rename.
881      * We could be nicer about the deleted file, and let it show
882      * up under the name it got deleted rather than the name that
883      * deleted it.
884      *
885      * Careful with the hash switch. The hash switch depends on
886      * the fact that any list-entry can be a head of the list.
887      * Think about it.
888      */
889      
890     /**
891      * d_move - move a dentry
892      * @dentry: entry to move
893      * @target: new dentry
894      *
895      * Update the dcache to reflect the move of a file name. Negative
896      * dcache entries should not be moved in this way.
897      */
898       
899     void d_move(struct dentry * dentry, struct dentry * target)
900     {
901     	check_lock();
902     
903     	if (!dentry->d_inode)
904     		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
905     
906     	spin_lock(&dcache_lock);
907     	/* Move the dentry to the target hash queue */
908     	list_del(&dentry->d_hash);
909     	list_add(&dentry->d_hash, &target->d_hash);
910     
911     	/* Unhash the target: dput() will then get rid of it */
912     	list_del_init(&target->d_hash);
913     
914     	list_del(&dentry->d_child);
915     	list_del(&target->d_child);
916     
917     	/* Switch the parents and the names.. */
918     	switch_names(dentry, target);
919     	do_switch(dentry->d_parent, target->d_parent);
920     	do_switch(dentry->d_name.len, target->d_name.len);
921     	do_switch(dentry->d_name.hash, target->d_name.hash);
922     
923     	/* And add them back to the (new) parent lists */
924     	list_add(&target->d_child, &target->d_parent->d_subdirs);
925     	list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
926     	spin_unlock(&dcache_lock);
927     }
928     
929     /**
930      * d_path - return the path of a dentry
931      * @dentry: dentry to report
932      * @vfsmnt: vfsmnt to which the dentry belongs
933      * @root: root dentry
934      * @rootmnt: vfsmnt to which the root dentry belongs
935      * @buffer: buffer to return value in
936      * @buflen: buffer length
937      *
938      * Convert a dentry into an ASCII path name. If the entry has been deleted
939      * the string " (deleted)" is appended. Note that this is ambiguous. Returns
940      * the buffer.
941      *
942      * "buflen" should be %PAGE_SIZE or more. Caller holds the dcache_lock.
943      */
944     char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
945     		struct dentry *root, struct vfsmount *rootmnt,
946     		char *buffer, int buflen)
947     {
948     	char * end = buffer+buflen;
949     	char * retval;
950     	int namelen;
951     
952     	*--end = '\0';
953     	buflen--;
954     	if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
955     		buflen -= 10;
956     		end -= 10;
957     		memcpy(end, " (deleted)", 10);
958     	}
959     
960     	/* Get '/' right */
961     	retval = end-1;
962     	*retval = '/';
963     
964     	for (;;) {
965     		struct dentry * parent;
966     
967     		if (dentry == root && vfsmnt == rootmnt)
968     			break;
969     		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
970     			/* Global root? */
971     			if (vfsmnt->mnt_parent == vfsmnt)
972     				goto global_root;
973     			dentry = vfsmnt->mnt_mountpoint;
974     			vfsmnt = vfsmnt->mnt_parent;
975     			continue;
976     		}
977     		parent = dentry->d_parent;
978     		namelen = dentry->d_name.len;
979     		buflen -= namelen + 1;
980     		if (buflen < 0)
981     			break;
982     		end -= namelen;
983     		memcpy(end, dentry->d_name.name, namelen);
984     		*--end = '/';
985     		retval = end;
986     		dentry = parent;
987     	}
988     	return retval;
989     global_root:
990     	namelen = dentry->d_name.len;
991     	buflen -= namelen;
992     	if (buflen >= 0) {
993     		retval -= namelen-1;	/* hit the slash */
994     		memcpy(retval, dentry->d_name.name, namelen);
995     	}
996     	return retval;
997     }
998     
999     /*
1000      * NOTE! The user-level library version returns a
1001      * character pointer. The kernel system call just
1002      * returns the length of the buffer filled (which
1003      * includes the ending '\0' character), or a negative
1004      * error value. So libc would do something like
1005      *
1006      *	char *getcwd(char * buf, size_t size)
1007      *	{
1008      *		int retval;
1009      *
1010      *		retval = sys_getcwd(buf, size);
1011      *		if (retval >= 0)
1012      *			return buf;
1013      *		errno = -retval;
1014      *		return NULL;
1015      *	}
1016      */
1017     asmlinkage long sys_getcwd(char *buf, unsigned long size)
1018     {
1019     	int error;
1020     	struct vfsmount *pwdmnt, *rootmnt;
1021     	struct dentry *pwd, *root;
1022     	char *page = (char *) __get_free_page(GFP_USER);
1023     
1024     	if (!page)
1025     		return -ENOMEM;
1026     
1027     	read_lock(&current->fs->lock);
1028     	pwdmnt = mntget(current->fs->pwdmnt);
1029     	pwd = dget(current->fs->pwd);
1030     	rootmnt = mntget(current->fs->rootmnt);
1031     	root = dget(current->fs->root);
1032     	read_unlock(&current->fs->lock);
1033     
1034     	error = -ENOENT;
1035     	/* Has the current directory has been unlinked? */
1036     	spin_lock(&dcache_lock);
1037     	if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
1038     		unsigned long len;
1039     		char * cwd;
1040     
1041     		cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
1042     		spin_unlock(&dcache_lock);
1043     
1044     		error = -ERANGE;
1045     		len = PAGE_SIZE + page - cwd;
1046     		if (len <= size) {
1047     			error = len;
1048     			if (copy_to_user(buf, cwd, len))
1049     				error = -EFAULT;
1050     		}
1051     	} else
1052     		spin_unlock(&dcache_lock);
1053     	dput(pwd);
1054     	mntput(pwdmnt);
1055     	dput(root);
1056     	mntput(rootmnt);
1057     	free_page((unsigned long) page);
1058     	return error;
1059     }
1060     
1061     /*
1062      * Test whether new_dentry is a subdirectory of old_dentry.
1063      *
1064      * Trivially implemented using the dcache structure
1065      */
1066     
1067     /**
1068      * is_subdir - is new dentry a subdirectory of old_dentry
1069      * @new_dentry: new dentry
1070      * @old_dentry: old dentry
1071      *
1072      * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
1073      * Returns 0 otherwise.
1074      */
1075       
1076     int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
1077     {
1078     	int result;
1079     
1080     	result = 0;
1081     	for (;;) {
1082     		if (new_dentry != old_dentry) {
1083     			struct dentry * parent = new_dentry->d_parent;
1084     			if (parent == new_dentry)
1085     				break;
1086     			new_dentry = parent;
1087     			continue;
1088     		}
1089     		result = 1;
1090     		break;
1091     	}
1092     	return result;
1093     }
1094     
1095     void d_genocide(struct dentry *root)
1096     {
1097     	struct dentry *this_parent = root;
1098     	struct list_head *next;
1099     
1100     	spin_lock(&dcache_lock);
1101     repeat:
1102     	next = this_parent->d_subdirs.next;
1103     resume:
1104     	while (next != &this_parent->d_subdirs) {
1105     		struct list_head *tmp = next;
1106     		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1107     		next = tmp->next;
1108     		if (d_unhashed(dentry)||!dentry->d_inode)
1109     			continue;
1110     		if (!list_empty(&dentry->d_subdirs)) {
1111     			this_parent = dentry;
1112     			goto repeat;
1113     		}
1114     		atomic_dec(&dentry->d_count);
1115     	}
1116     	if (this_parent != root) {
1117     		next = this_parent->d_child.next; 
1118     		atomic_dec(&this_parent->d_count);
1119     		this_parent = this_parent->d_parent;
1120     		goto resume;
1121     	}
1122     	spin_unlock(&dcache_lock);
1123     }
1124     
1125     /**
1126      * find_inode_number - check for dentry with name
1127      * @dir: directory to check
1128      * @name: Name to find.
1129      *
1130      * Check whether a dentry already exists for the given name,
1131      * and return the inode number if it has an inode. Otherwise
1132      * 0 is returned.
1133      *
1134      * This routine is used to post-process directory listings for
1135      * filesystems using synthetic inode numbers, and is necessary
1136      * to keep getcwd() working.
1137      */
1138      
1139     ino_t find_inode_number(struct dentry *dir, struct qstr *name)
1140     {
1141     	struct dentry * dentry;
1142     	ino_t ino = 0;
1143     
1144     	/*
1145     	 * Check for a fs-specific hash function. Note that we must
1146     	 * calculate the standard hash first, as the d_op->d_hash()
1147     	 * routine may choose to leave the hash value unchanged.
1148     	 */
1149     	name->hash = full_name_hash(name->name, name->len);
1150     	if (dir->d_op && dir->d_op->d_hash)
1151     	{
1152     		if (dir->d_op->d_hash(dir, name) != 0)
1153     			goto out;
1154     	}
1155     
1156     	dentry = d_lookup(dir, name);
1157     	if (dentry)
1158     	{
1159     		if (dentry->d_inode)
1160     			ino = dentry->d_inode->i_ino;
1161     		dput(dentry);
1162     	}
1163     out:
1164     	return ino;
1165     }
1166     
1167     static void __init dcache_init(unsigned long mempages)
1168     {
1169     	struct list_head *d;
1170     	unsigned long order;
1171     	unsigned int nr_hash;
1172     	int i;
1173     
1174     	/* 
1175     	 * A constructor could be added for stable state like the lists,
1176     	 * but it is probably not worth it because of the cache nature
1177     	 * of the dcache. 
1178     	 * If fragmentation is too bad then the SLAB_HWCACHE_ALIGN
1179     	 * flag could be removed here, to hint to the allocator that
1180     	 * it should not try to get multiple page regions.  
1181     	 */
1182     	dentry_cache = kmem_cache_create("dentry_cache",
1183     					 sizeof(struct dentry),
1184     					 0,
1185     					 SLAB_HWCACHE_ALIGN,
1186     					 NULL, NULL);
1187     	if (!dentry_cache)
1188     		panic("Cannot create dentry cache");
1189     
1190     #if PAGE_SHIFT < 13
1191     	mempages >>= (13 - PAGE_SHIFT);
1192     #endif
1193     	mempages *= sizeof(struct list_head);
1194     	for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
1195     		;
1196     
1197     	do {
1198     		unsigned long tmp;
1199     
1200     		nr_hash = (1UL << order) * PAGE_SIZE /
1201     			sizeof(struct list_head);
1202     		d_hash_mask = (nr_hash - 1);
1203     
1204     		tmp = nr_hash;
1205     		d_hash_shift = 0;
1206     		while ((tmp >>= 1UL) != 0UL)
1207     			d_hash_shift++;
1208     
1209     		dentry_hashtable = (struct list_head *)
1210     			__get_free_pages(GFP_ATOMIC, order);
1211     	} while (dentry_hashtable == NULL && --order >= 0);
1212     
1213     	printk("Dentry-cache hash table entries: %d (order: %ld, %ld bytes)\n",
1214     			nr_hash, order, (PAGE_SIZE << order));
1215     
1216     	if (!dentry_hashtable)
1217     		panic("Failed to allocate dcache hash table\n");
1218     
1219     	d = dentry_hashtable;
1220     	i = nr_hash;
1221     	do {
1222     		INIT_LIST_HEAD(d);
1223     		d++;
1224     		i--;
1225     	} while (i);
1226     }
1227     
1228     static void init_buffer_head(void * foo, kmem_cache_t * cachep, unsigned long flags)
1229     {
1230     	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1231     	    SLAB_CTOR_CONSTRUCTOR)
1232     	{
1233     		struct buffer_head * bh = (struct buffer_head *) foo;
1234     
1235     		memset(bh, 0, sizeof(*bh));
1236     		init_waitqueue_head(&bh->b_wait);
1237     	}
1238     }
1239     
1240     /* SLAB cache for __getname() consumers */
1241     kmem_cache_t *names_cachep;
1242     
1243     /* SLAB cache for file structures */
1244     kmem_cache_t *filp_cachep;
1245     
1246     /* SLAB cache for dquot structures */
1247     kmem_cache_t *dquot_cachep;
1248     
1249     /* SLAB cache for buffer_head structures */
1250     kmem_cache_t *bh_cachep;
1251     EXPORT_SYMBOL(bh_cachep);
1252     
1253     extern void bdev_cache_init(void);
1254     extern void cdev_cache_init(void);
1255     
1256     void __init vfs_caches_init(unsigned long mempages)
1257     {
1258     	bh_cachep = kmem_cache_create("buffer_head",
1259     			sizeof(struct buffer_head), 0,
1260     			SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
1261     	if(!bh_cachep)
1262     		panic("Cannot create buffer head SLAB cache");
1263     
1264     	names_cachep = kmem_cache_create("names_cache", 
1265     			PATH_MAX + 1, 0, 
1266     			SLAB_HWCACHE_ALIGN, NULL, NULL);
1267     	if (!names_cachep)
1268     		panic("Cannot create names SLAB cache");
1269     
1270     	filp_cachep = kmem_cache_create("filp", 
1271     			sizeof(struct file), 0,
1272     			SLAB_HWCACHE_ALIGN, NULL, NULL);
1273     	if(!filp_cachep)
1274     		panic("Cannot create filp SLAB cache");
1275     
1276     #if defined (CONFIG_QUOTA)
1277     	dquot_cachep = kmem_cache_create("dquot", 
1278     			sizeof(struct dquot), sizeof(unsigned long) * 4,
1279     			SLAB_HWCACHE_ALIGN, NULL, NULL);
1280     	if (!dquot_cachep)
1281     		panic("Cannot create dquot SLAB cache");
1282     #endif
1283     
1284     	dcache_init(mempages);
1285     	inode_init(mempages);
1286     	mnt_init(mempages);
1287     	bdev_cache_init();
1288     	cdev_cache_init();
1289     }
1290