File: /usr/src/linux/fs/namei.c

1     /*
2      *  linux/fs/namei.c
3      *
4      *  Copyright (C) 1991, 1992  Linus Torvalds
5      */
6     
7     /*
8      * Some corrections by tytso.
9      */
10     
11     /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12      * lookup logic.
13      */
14     /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
15      */
16     
17     #include <linux/init.h>
18     #include <linux/slab.h>
19     #include <linux/fs.h>
20     #include <linux/quotaops.h>
21     #include <linux/pagemap.h>
22     #include <linux/dnotify.h>
23     #include <linux/smp_lock.h>
24     #include <linux/personality.h>
25     
26     #include <asm/namei.h>
27     #include <asm/uaccess.h>
28     
29     #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
30     
31     /* [Feb-1997 T. Schoebel-Theuer]
32      * Fundamental changes in the pathname lookup mechanisms (namei)
33      * were necessary because of omirr.  The reason is that omirr needs
34      * to know the _real_ pathname, not the user-supplied one, in case
35      * of symlinks (and also when transname replacements occur).
36      *
37      * The new code replaces the old recursive symlink resolution with
38      * an iterative one (in case of non-nested symlink chains).  It does
39      * this with calls to <fs>_follow_link().
40      * As a side effect, dir_namei(), _namei() and follow_link() are now 
41      * replaced with a single function lookup_dentry() that can handle all 
42      * the special cases of the former code.
43      *
44      * With the new dcache, the pathname is stored at each inode, at least as
45      * long as the refcount of the inode is positive.  As a side effect, the
46      * size of the dcache depends on the inode cache and thus is dynamic.
47      *
48      * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
49      * resolution to correspond with current state of the code.
50      *
51      * Note that the symlink resolution is not *completely* iterative.
52      * There is still a significant amount of tail- and mid- recursion in
53      * the algorithm.  Also, note that <fs>_readlink() is not used in
54      * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
55      * may return different results than <fs>_follow_link().  Many virtual
56      * filesystems (including /proc) exhibit this behavior.
57      */
58     
59     /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
60      * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
61      * and the name already exists in form of a symlink, try to create the new
62      * name indicated by the symlink. The old code always complained that the
63      * name already exists, due to not following the symlink even if its target
64      * is nonexistent.  The new semantics affects also mknod() and link() when
65      * the name is a symlink pointing to a non-existant name.
66      *
67      * I don't know which semantics is the right one, since I have no access
68      * to standards. But I found by trial that HP-UX 9.0 has the full "new"
69      * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
70      * "old" one. Personally, I think the new semantics is much more logical.
71      * Note that "ln old new" where "new" is a symlink pointing to a non-existing
72      * file does succeed in both HP-UX and SunOs, but not in Solaris
73      * and in the old Linux semantics.
74      */
75     
76     /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
77      * semantics.  See the comments in "open_namei" and "do_link" below.
78      *
79      * [10-Sep-98 Alan Modra] Another symlink change.
80      */
81     
82     /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
83      *	inside the path - always follow.
84      *	in the last component in creation/removal/renaming - never follow.
85      *	if LOOKUP_FOLLOW passed - follow.
86      *	if the pathname has trailing slashes - follow.
87      *	otherwise - don't follow.
88      * (applied in that order).
89      *
90      * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
91      * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
92      * During the 2.4 we need to fix the userland stuff depending on it -
93      * hopefully we will be able to get rid of that wart in 2.5. So far only
94      * XEmacs seems to be relying on it...
95      */
96     
97     /* In order to reduce some races, while at the same time doing additional
98      * checking and hopefully speeding things up, we copy filenames to the
99      * kernel data space before using them..
100      *
101      * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
102      */
103     static inline int do_getname(const char *filename, char *page)
104     {
105     	int retval;
106     	unsigned long len = PATH_MAX + 1;
107     
108     	if ((unsigned long) filename >= TASK_SIZE) {
109     		if (!segment_eq(get_fs(), KERNEL_DS))
110     			return -EFAULT;
111     	} else if (TASK_SIZE - (unsigned long) filename < PATH_MAX + 1)
112     		len = TASK_SIZE - (unsigned long) filename;
113     
114     	retval = strncpy_from_user((char *)page, filename, len);
115     	if (retval > 0) {
116     		if (retval < len)
117     			return 0;
118     		return -ENAMETOOLONG;
119     	} else if (!retval)
120     		retval = -ENOENT;
121     	return retval;
122     }
123     
124     char * getname(const char * filename)
125     {
126     	char *tmp, *result;
127     
128     	result = ERR_PTR(-ENOMEM);
129     	tmp = __getname();
130     	if (tmp)  {
131     		int retval = do_getname(filename, tmp);
132     
133     		result = tmp;
134     		if (retval < 0) {
135     			putname(tmp);
136     			result = ERR_PTR(retval);
137     		}
138     	}
139     	return result;
140     }
141     
142     /*
143      *	vfs_permission()
144      *
145      * is used to check for read/write/execute permissions on a file.
146      * We use "fsuid" for this, letting us set arbitrary permissions
147      * for filesystem access without changing the "normal" uids which
148      * are used for other things..
149      */
150     int vfs_permission(struct inode * inode, int mask)
151     {
152     	umode_t			mode = inode->i_mode;
153     
154     	if (mask & MAY_WRITE) {
155     		/*
156     		 * Nobody gets write access to a read-only fs.
157     		 */
158     		if (IS_RDONLY(inode) &&
159     		    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
160     			return -EROFS;
161     
162     		/*
163     		 * Nobody gets write access to an immutable file.
164     		 */
165     		if (IS_IMMUTABLE(inode))
166     			return -EACCES;
167     	}
168     
169     	if (current->fsuid == inode->i_uid)
170     		mode >>= 6;
171     	else if (in_group_p(inode->i_gid))
172     		mode >>= 3;
173     
174     	/*
175     	 * If the DACs are ok we don't need any capability check.
176     	 */
177     	if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
178     		return 0;
179     
180     	/*
181     	 * Read/write DACs are always overridable.
182     	 * Executable DACs are overridable if at least one exec bit is set.
183     	 */
184     	if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
185     		if (capable(CAP_DAC_OVERRIDE))
186     			return 0;
187     
188     	/*
189     	 * Searching includes executable on directories, else just read.
190     	 */
191     	if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
192     		if (capable(CAP_DAC_READ_SEARCH))
193     			return 0;
194     
195     	return -EACCES;
196     }
197     
198     int permission(struct inode * inode,int mask)
199     {
200     	if (inode->i_op && inode->i_op->permission) {
201     		int retval;
202     		lock_kernel();
203     		retval = inode->i_op->permission(inode, mask);
204     		unlock_kernel();
205     		return retval;
206     	}
207     	return vfs_permission(inode, mask);
208     }
209     
210     /*
211      * get_write_access() gets write permission for a file.
212      * put_write_access() releases this write permission.
213      * This is used for regular files.
214      * We cannot support write (and maybe mmap read-write shared) accesses and
215      * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
216      * can have the following values:
217      * 0: no writers, no VM_DENYWRITE mappings
218      * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
219      * > 0: (i_writecount) users are writing to the file.
220      *
221      * Normally we operate on that counter with atomic_{inc,dec} and it's safe
222      * except for the cases where we don't hold i_writecount yet. Then we need to
223      * use {get,deny}_write_access() - these functions check the sign and refuse
224      * to do the change if sign is wrong. Exclusion between them is provided by
225      * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
226      * who will try to move it in struct inode - just leave it here.
227      */
228     static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
229     int get_write_access(struct inode * inode)
230     {
231     	spin_lock(&arbitration_lock);
232     	if (atomic_read(&inode->i_writecount) < 0) {
233     		spin_unlock(&arbitration_lock);
234     		return -ETXTBSY;
235     	}
236     	atomic_inc(&inode->i_writecount);
237     	spin_unlock(&arbitration_lock);
238     	return 0;
239     }
240     int deny_write_access(struct file * file)
241     {
242     	spin_lock(&arbitration_lock);
243     	if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
244     		spin_unlock(&arbitration_lock);
245     		return -ETXTBSY;
246     	}
247     	atomic_dec(&file->f_dentry->d_inode->i_writecount);
248     	spin_unlock(&arbitration_lock);
249     	return 0;
250     }
251     
252     void path_release(struct nameidata *nd)
253     {
254     	dput(nd->dentry);
255     	mntput(nd->mnt);
256     }
257     
258     /*
259      * Internal lookup() using the new generic dcache.
260      * SMP-safe
261      */
262     static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
263     {
264     	struct dentry * dentry = d_lookup(parent, name);
265     
266     	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
267     		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
268     			dput(dentry);
269     			dentry = NULL;
270     		}
271     	}
272     	return dentry;
273     }
274     
275     /*
276      * This is called when everything else fails, and we actually have
277      * to go to the low-level filesystem to find out what we should do..
278      *
279      * We get the directory semaphore, and after getting that we also
280      * make sure that nobody added the entry to the dcache in the meantime..
281      * SMP-safe
282      */
283     static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
284     {
285     	struct dentry * result;
286     	struct inode *dir = parent->d_inode;
287     
288     	down(&dir->i_sem);
289     	/*
290     	 * First re-do the cached lookup just in case it was created
291     	 * while we waited for the directory semaphore..
292     	 *
293     	 * FIXME! This could use version numbering or similar to
294     	 * avoid unnecessary cache lookups.
295     	 */
296     	result = d_lookup(parent, name);
297     	if (!result) {
298     		struct dentry * dentry = d_alloc(parent, name);
299     		result = ERR_PTR(-ENOMEM);
300     		if (dentry) {
301     			lock_kernel();
302     			result = dir->i_op->lookup(dir, dentry);
303     			unlock_kernel();
304     			if (result)
305     				dput(dentry);
306     			else
307     				result = dentry;
308     		}
309     		up(&dir->i_sem);
310     		return result;
311     	}
312     
313     	/*
314     	 * Uhhuh! Nasty case: the cache was re-populated while
315     	 * we waited on the semaphore. Need to revalidate.
316     	 */
317     	up(&dir->i_sem);
318     	if (result->d_op && result->d_op->d_revalidate) {
319     		if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
320     			dput(result);
321     			result = ERR_PTR(-ENOENT);
322     		}
323     	}
324     	return result;
325     }
326     
327     static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
328     {
329     	int err;
330     	if (current->link_count >= 8)
331     		goto loop;
332     	if (current->need_resched) {
333     		current->state = TASK_RUNNING;
334     		schedule();
335     	}
336     	current->link_count++;
337     	UPDATE_ATIME(dentry->d_inode);
338     	err = dentry->d_inode->i_op->follow_link(dentry, nd);
339     	current->link_count--;
340     	return err;
341     loop:
342     	path_release(nd);
343     	return -ELOOP;
344     }
345     
346     static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
347     {
348     	struct vfsmount *parent;
349     	struct dentry *dentry;
350     	spin_lock(&dcache_lock);
351     	parent=(*mnt)->mnt_parent;
352     	if (parent == *mnt) {
353     		spin_unlock(&dcache_lock);
354     		return 0;
355     	}
356     	mntget(parent);
357     	dentry=dget((*mnt)->mnt_mountpoint);
358     	spin_unlock(&dcache_lock);
359     	dput(*base);
360     	*base = dentry;
361     	mntput(*mnt);
362     	*mnt = parent;
363     	return 1;
364     }
365     
366     int follow_up(struct vfsmount **mnt, struct dentry **dentry)
367     {
368     	return __follow_up(mnt, dentry);
369     }
370     
371     static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
372     {
373     	struct vfsmount *mounted;
374     
375     	spin_lock(&dcache_lock);
376     	mounted = lookup_mnt(*mnt, *dentry);
377     	if (mounted) {
378     		*mnt = mntget(mounted);
379     		spin_unlock(&dcache_lock);
380     		dput(*dentry);
381     		mntput(mounted->mnt_parent);
382     		*dentry = dget(mounted->mnt_root);
383     		return 1;
384     	}
385     	spin_unlock(&dcache_lock);
386     	return 0;
387     }
388     
389     int follow_down(struct vfsmount **mnt, struct dentry **dentry)
390     {
391     	return __follow_down(mnt,dentry);
392     }
393      
394     static inline void follow_dotdot(struct nameidata *nd)
395     {
396     	while(1) {
397     		struct vfsmount *parent;
398     		struct dentry *dentry;
399     		read_lock(&current->fs->lock);
400     		if (nd->dentry == current->fs->root &&
401     		    nd->mnt == current->fs->rootmnt)  {
402     			read_unlock(&current->fs->lock);
403     			break;
404     		}
405     		read_unlock(&current->fs->lock);
406     		spin_lock(&dcache_lock);
407     		if (nd->dentry != nd->mnt->mnt_root) {
408     			dentry = dget(nd->dentry->d_parent);
409     			spin_unlock(&dcache_lock);
410     			dput(nd->dentry);
411     			nd->dentry = dentry;
412     			break;
413     		}
414     		parent=nd->mnt->mnt_parent;
415     		if (parent == nd->mnt) {
416     			spin_unlock(&dcache_lock);
417     			break;
418     		}
419     		mntget(parent);
420     		dentry=dget(nd->mnt->mnt_mountpoint);
421     		spin_unlock(&dcache_lock);
422     		dput(nd->dentry);
423     		nd->dentry = dentry;
424     		mntput(nd->mnt);
425     		nd->mnt = parent;
426     	}
427     }
428     /*
429      * Name resolution.
430      *
431      * This is the basic name resolution function, turning a pathname
432      * into the final dentry.
433      *
434      * We expect 'base' to be positive and a directory.
435      */
436     int path_walk(const char * name, struct nameidata *nd)
437     {
438     	struct dentry *dentry;
439     	struct inode *inode;
440     	int err;
441     	unsigned int lookup_flags = nd->flags;
442     
443     	while (*name=='/')
444     		name++;
445     	if (!*name)
446     		goto return_base;
447     
448     	inode = nd->dentry->d_inode;
449     	if (current->link_count)
450     		lookup_flags = LOOKUP_FOLLOW;
451     
452     	/* At this point we know we have a real path component. */
453     	for(;;) {
454     		unsigned long hash;
455     		struct qstr this;
456     		unsigned int c;
457     
458     		err = permission(inode, MAY_EXEC);
459     		dentry = ERR_PTR(err);
460      		if (err)
461     			break;
462     
463     		this.name = name;
464     		c = *(const unsigned char *)name;
465     
466     		hash = init_name_hash();
467     		do {
468     			name++;
469     			hash = partial_name_hash(c, hash);
470     			c = *(const unsigned char *)name;
471     		} while (c && (c != '/'));
472     		this.len = name - (const char *) this.name;
473     		this.hash = end_name_hash(hash);
474     
475     		/* remove trailing slashes? */
476     		if (!c)
477     			goto last_component;
478     		while (*++name == '/');
479     		if (!*name)
480     			goto last_with_slashes;
481     
482     		/*
483     		 * "." and ".." are special - ".." especially so because it has
484     		 * to be able to know about the current root directory and
485     		 * parent relationships.
486     		 */
487     		if (this.name[0] == '.') switch (this.len) {
488     			default:
489     				break;
490     			case 2:	
491     				if (this.name[1] != '.')
492     					break;
493     				follow_dotdot(nd);
494     				inode = nd->dentry->d_inode;
495     				/* fallthrough */
496     			case 1:
497     				continue;
498     		}
499     		/*
500     		 * See if the low-level filesystem might want
501     		 * to use its own hash..
502     		 */
503     		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
504     			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
505     			if (err < 0)
506     				break;
507     		}
508     		/* This does the actual lookups.. */
509     		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
510     		if (!dentry) {
511     			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
512     			err = PTR_ERR(dentry);
513     			if (IS_ERR(dentry))
514     				break;
515     		}
516     		/* Check mountpoints.. */
517     		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
518     			;
519     
520     		err = -ENOENT;
521     		inode = dentry->d_inode;
522     		if (!inode)
523     			goto out_dput;
524     		err = -ENOTDIR; 
525     		if (!inode->i_op)
526     			goto out_dput;
527     
528     		if (inode->i_op->follow_link) {
529     			err = do_follow_link(dentry, nd);
530     			dput(dentry);
531     			if (err)
532     				goto return_err;
533     			err = -ENOENT;
534     			inode = nd->dentry->d_inode;
535     			if (!inode)
536     				break;
537     			err = -ENOTDIR; 
538     			if (!inode->i_op)
539     				break;
540     		} else {
541     			dput(nd->dentry);
542     			nd->dentry = dentry;
543     		}
544     		err = -ENOTDIR; 
545     		if (!inode->i_op->lookup)
546     			break;
547     		continue;
548     		/* here ends the main loop */
549     
550     last_with_slashes:
551     		lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
552     last_component:
553     		if (lookup_flags & LOOKUP_PARENT)
554     			goto lookup_parent;
555     		if (this.name[0] == '.') switch (this.len) {
556     			default:
557     				break;
558     			case 2:	
559     				if (this.name[1] != '.')
560     					break;
561     				follow_dotdot(nd);
562     				inode = nd->dentry->d_inode;
563     				/* fallthrough */
564     			case 1:
565     				goto return_base;
566     		}
567     		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
568     			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
569     			if (err < 0)
570     				break;
571     		}
572     		dentry = cached_lookup(nd->dentry, &this, 0);
573     		if (!dentry) {
574     			dentry = real_lookup(nd->dentry, &this, 0);
575     			err = PTR_ERR(dentry);
576     			if (IS_ERR(dentry))
577     				break;
578     		}
579     		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
580     			;
581     		inode = dentry->d_inode;
582     		if ((lookup_flags & LOOKUP_FOLLOW)
583     		    && inode && inode->i_op && inode->i_op->follow_link) {
584     			err = do_follow_link(dentry, nd);
585     			dput(dentry);
586     			if (err)
587     				goto return_err;
588     			inode = nd->dentry->d_inode;
589     		} else {
590     			dput(nd->dentry);
591     			nd->dentry = dentry;
592     		}
593     		err = -ENOENT;
594     		if (!inode)
595     			goto no_inode;
596     		if (lookup_flags & LOOKUP_DIRECTORY) {
597     			err = -ENOTDIR; 
598     			if (!inode->i_op || !inode->i_op->lookup)
599     				break;
600     		}
601     		goto return_base;
602     no_inode:
603     		err = -ENOENT;
604     		if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
605     			break;
606     		goto return_base;
607     lookup_parent:
608     		nd->last = this;
609     		nd->last_type = LAST_NORM;
610     		if (this.name[0] != '.')
611     			goto return_base;
612     		if (this.len == 1)
613     			nd->last_type = LAST_DOT;
614     		else if (this.len == 2 && this.name[1] == '.')
615     			nd->last_type = LAST_DOTDOT;
616     return_base:
617     		return 0;
618     out_dput:
619     		dput(dentry);
620     		break;
621     	}
622     	path_release(nd);
623     return_err:
624     	return err;
625     }
626     
627     /* SMP-safe */
628     /* returns 1 if everything is done */
629     static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
630     {
631     	if (path_walk(name, nd))
632     		return 0;
633     
634     	if (!nd->dentry->d_inode) {
635     		struct nameidata nd_root;
636     		nd_root.last_type = LAST_ROOT;
637     		nd_root.flags = nd->flags;
638     		read_lock(&current->fs->lock);
639     		nd_root.mnt = mntget(current->fs->rootmnt);
640     		nd_root.dentry = dget(current->fs->root);
641     		read_unlock(&current->fs->lock);
642     		if (path_walk(name, &nd_root))
643     			return 1;
644     		if (nd_root.dentry->d_inode) {
645     			path_release(nd);
646     			nd->dentry = nd_root.dentry;
647     			nd->mnt = nd_root.mnt;
648     			nd->last = nd_root.last;
649     			return 1;
650     		}
651     		path_release(&nd_root);
652     	}
653     	return 1;
654     }
655     
656     void set_fs_altroot(void)
657     {
658     	char *emul = __emul_prefix();
659     	struct nameidata nd;
660     	struct vfsmount *mnt = NULL, *oldmnt;
661     	struct dentry *dentry = NULL, *olddentry;
662     	if (emul) {
663     		read_lock(&current->fs->lock);
664     		nd.mnt = mntget(current->fs->rootmnt);
665     		nd.dentry = dget(current->fs->root);
666     		read_unlock(&current->fs->lock);
667     		nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
668     		if (path_walk(emul,&nd) == 0) {
669     			mnt = nd.mnt;
670     			dentry = nd.dentry;
671     		}
672     	}
673     	write_lock(&current->fs->lock);
674     	oldmnt = current->fs->altrootmnt;
675     	olddentry = current->fs->altroot;
676     	current->fs->altrootmnt = mnt;
677     	current->fs->altroot = dentry;
678     	write_unlock(&current->fs->lock);
679     	if (olddentry) {
680     		dput(olddentry);
681     		mntput(oldmnt);
682     	}
683     }
684     
685     /* SMP-safe */
686     static inline int
687     walk_init_root(const char *name, struct nameidata *nd)
688     {
689     	read_lock(&current->fs->lock);
690     	if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
691     		nd->mnt = mntget(current->fs->altrootmnt);
692     		nd->dentry = dget(current->fs->altroot);
693     		read_unlock(&current->fs->lock);
694     		if (__emul_lookup_dentry(name,nd))
695     			return 0;
696     		read_lock(&current->fs->lock);
697     	}
698     	nd->mnt = mntget(current->fs->rootmnt);
699     	nd->dentry = dget(current->fs->root);
700     	read_unlock(&current->fs->lock);
701     	return 1;
702     }
703     
704     /* SMP-safe */
705     int path_init(const char *name, unsigned int flags, struct nameidata *nd)
706     {
707     	nd->last_type = LAST_ROOT; /* if there are only slashes... */
708     	nd->flags = flags;
709     	if (*name=='/')
710     		return walk_init_root(name,nd);
711     	read_lock(&current->fs->lock);
712     	nd->mnt = mntget(current->fs->pwdmnt);
713     	nd->dentry = dget(current->fs->pwd);
714     	read_unlock(&current->fs->lock);
715     	return 1;
716     }
717     
718     /*
719      * Restricted form of lookup. Doesn't follow links, single-component only,
720      * needs parent already locked. Doesn't follow mounts.
721      * SMP-safe.
722      */
723     struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
724     {
725     	struct dentry * dentry;
726     	struct inode *inode;
727     	int err;
728     
729     	inode = base->d_inode;
730     	err = permission(inode, MAY_EXEC);
731     	dentry = ERR_PTR(err);
732     	if (err)
733     		goto out;
734     
735     	/*
736     	 * See if the low-level filesystem might want
737     	 * to use its own hash..
738     	 */
739     	if (base->d_op && base->d_op->d_hash) {
740     		err = base->d_op->d_hash(base, name);
741     		dentry = ERR_PTR(err);
742     		if (err < 0)
743     			goto out;
744     	}
745     
746     	dentry = cached_lookup(base, name, 0);
747     	if (!dentry) {
748     		struct dentry *new = d_alloc(base, name);
749     		dentry = ERR_PTR(-ENOMEM);
750     		if (!new)
751     			goto out;
752     		lock_kernel();
753     		dentry = inode->i_op->lookup(inode, new);
754     		unlock_kernel();
755     		if (!dentry)
756     			dentry = new;
757     		else
758     			dput(new);
759     	}
760     out:
761     	return dentry;
762     }
763     
764     /* SMP-safe */
765     struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
766     {
767     	unsigned long hash;
768     	struct qstr this;
769     	unsigned int c;
770     
771     	this.name = name;
772     	this.len = len;
773     	if (!len)
774     		goto access;
775     
776     	hash = init_name_hash();
777     	while (len--) {
778     		c = *(const unsigned char *)name++;
779     		if (c == '/' || c == '\0')
780     			goto access;
781     		hash = partial_name_hash(c, hash);
782     	}
783     	this.hash = end_name_hash(hash);
784     
785     	return lookup_hash(&this, base);
786     access:
787     	return ERR_PTR(-EACCES);
788     }
789     
790     /*
791      *	namei()
792      *
793      * is used by most simple commands to get the inode of a specified name.
794      * Open, link etc use their own routines, but this is enough for things
795      * like 'chmod' etc.
796      *
797      * namei exists in two versions: namei/lnamei. The only difference is
798      * that namei follows links, while lnamei does not.
799      * SMP-safe
800      */
801     int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
802     {
803     	char *tmp;
804     	int err;
805     
806     	tmp = getname(name);
807     	err = PTR_ERR(tmp);
808     	if (!IS_ERR(tmp)) {
809     		err = 0;
810     		if (path_init(tmp, flags, nd))
811     			err = path_walk(tmp, nd);
812     		putname(tmp);
813     	}
814     	return err;
815     }
816     
817     /*
818      * It's inline, so penalty for filesystems that don't use sticky bit is
819      * minimal.
820      */
821     static inline int check_sticky(struct inode *dir, struct inode *inode)
822     {
823     	if (!(dir->i_mode & S_ISVTX))
824     		return 0;
825     	if (inode->i_uid == current->fsuid)
826     		return 0;
827     	if (dir->i_uid == current->fsuid)
828     		return 0;
829     	return !capable(CAP_FOWNER);
830     }
831     
832     /*
833      *	Check whether we can remove a link victim from directory dir, check
834      *  whether the type of victim is right.
835      *  1. We can't do it if dir is read-only (done in permission())
836      *  2. We should have write and exec permissions on dir
837      *  3. We can't remove anything from append-only dir
838      *  4. We can't do anything with immutable dir (done in permission())
839      *  5. If the sticky bit on dir is set we should either
840      *	a. be owner of dir, or
841      *	b. be owner of victim, or
842      *	c. have CAP_FOWNER capability
843      *  6. If the victim is append-only or immutable we can't do antyhing with
844      *     links pointing to it.
845      *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
846      *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
847      *  9. We can't remove a root or mountpoint.
848      */
849     static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
850     {
851     	int error;
852     	if (!victim->d_inode || victim->d_parent->d_inode != dir)
853     		return -ENOENT;
854     	error = permission(dir,MAY_WRITE | MAY_EXEC);
855     	if (error)
856     		return error;
857     	if (IS_APPEND(dir))
858     		return -EPERM;
859     	if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
860     	    IS_IMMUTABLE(victim->d_inode))
861     		return -EPERM;
862     	if (isdir) {
863     		if (!S_ISDIR(victim->d_inode->i_mode))
864     			return -ENOTDIR;
865     		if (IS_ROOT(victim))
866     			return -EBUSY;
867     	} else if (S_ISDIR(victim->d_inode->i_mode))
868     		return -EISDIR;
869     	return 0;
870     }
871     
872     /*	Check whether we can create an object with dentry child in directory
873      *  dir.
874      *  1. We can't do it if child already exists (open has special treatment for
875      *     this case, but since we are inlined it's OK)
876      *  2. We can't do it if dir is read-only (done in permission())
877      *  3. We should have write and exec permissions on dir
878      *  4. We can't do it if dir is immutable (done in permission())
879      */
880     static inline int may_create(struct inode *dir, struct dentry *child) {
881     	if (child->d_inode)
882     		return -EEXIST;
883     	if (IS_DEADDIR(dir))
884     		return -ENOENT;
885     	return permission(dir,MAY_WRITE | MAY_EXEC);
886     }
887     
888     /* 
889      * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
890      * reasons.
891      *
892      * O_DIRECTORY translates into forcing a directory lookup.
893      */
894     static inline int lookup_flags(unsigned int f)
895     {
896     	unsigned long retval = LOOKUP_FOLLOW;
897     
898     	if (f & O_NOFOLLOW)
899     		retval &= ~LOOKUP_FOLLOW;
900     	
901     	if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
902     		retval &= ~LOOKUP_FOLLOW;
903     	
904     	if (f & O_DIRECTORY)
905     		retval |= LOOKUP_DIRECTORY;
906     
907     	return retval;
908     }
909     
910     int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
911     {
912     	int error;
913     
914     	mode &= S_IALLUGO;
915     	mode |= S_IFREG;
916     
917     	down(&dir->i_zombie);
918     	error = may_create(dir, dentry);
919     	if (error)
920     		goto exit_lock;
921     
922     	error = -EACCES;	/* shouldn't it be ENOSYS? */
923     	if (!dir->i_op || !dir->i_op->create)
924     		goto exit_lock;
925     
926     	DQUOT_INIT(dir);
927     	lock_kernel();
928     	error = dir->i_op->create(dir, dentry, mode);
929     	unlock_kernel();
930     exit_lock:
931     	up(&dir->i_zombie);
932     	if (!error)
933     		inode_dir_notify(dir, DN_CREATE);
934     	return error;
935     }
936     
937     /*
938      *	open_namei()
939      *
940      * namei for open - this is in fact almost the whole open-routine.
941      *
942      * Note that the low bits of "flag" aren't the same as in the open
943      * system call - they are 00 - no permissions needed
944      *			  01 - read permission needed
945      *			  10 - write permission needed
946      *			  11 - read/write permissions needed
947      * which is a lot more logical, and also allows the "no perm" needed
948      * for symlinks (where the permissions are checked later).
949      * SMP-safe
950      */
951     int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
952     {
953     	int acc_mode, error = 0;
954     	struct inode *inode;
955     	struct dentry *dentry;
956     	struct dentry *dir;
957     	int count = 0;
958     
959     	acc_mode = ACC_MODE(flag);
960     
961     	/*
962     	 * The simplest case - just a plain lookup.
963     	 */
964     	if (!(flag & O_CREAT)) {
965     		if (path_init(pathname, lookup_flags(flag), nd))
966     			error = path_walk(pathname, nd);
967     		if (error)
968     			return error;
969     		dentry = nd->dentry;
970     		goto ok;
971     	}
972     
973     	/*
974     	 * Create - we need to know the parent.
975     	 */
976     	if (path_init(pathname, LOOKUP_PARENT, nd))
977     		error = path_walk(pathname, nd);
978     	if (error)
979     		return error;
980     
981     	/*
982     	 * We have the parent and last component. First of all, check
983     	 * that we are not asked to creat(2) an obvious directory - that
984     	 * will not do.
985     	 */
986     	error = -EISDIR;
987     	if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
988     		goto exit;
989     
990     	dir = nd->dentry;
991     	down(&dir->d_inode->i_sem);
992     	dentry = lookup_hash(&nd->last, nd->dentry);
993     
994     do_last:
995     	error = PTR_ERR(dentry);
996     	if (IS_ERR(dentry)) {
997     		up(&dir->d_inode->i_sem);
998     		goto exit;
999     	}
1000     
1001     	/* Negative dentry, just create the file */
1002     	if (!dentry->d_inode) {
1003     		error = vfs_create(dir->d_inode, dentry,
1004     				   mode & ~current->fs->umask);
1005     		up(&dir->d_inode->i_sem);
1006     		dput(nd->dentry);
1007     		nd->dentry = dentry;
1008     		if (error)
1009     			goto exit;
1010     		/* Don't check for write permission, don't truncate */
1011     		acc_mode = 0;
1012     		flag &= ~O_TRUNC;
1013     		goto ok;
1014     	}
1015     
1016     	/*
1017     	 * It already exists.
1018     	 */
1019     	up(&dir->d_inode->i_sem);
1020     
1021     	error = -EEXIST;
1022     	if (flag & O_EXCL)
1023     		goto exit_dput;
1024     
1025     	if (d_mountpoint(dentry)) {
1026     		error = -ELOOP;
1027     		if (flag & O_NOFOLLOW)
1028     			goto exit_dput;
1029     		while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1030     	}
1031     	error = -ENOENT;
1032     	if (!dentry->d_inode)
1033     		goto exit_dput;
1034     	if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1035     		goto do_link;
1036     
1037     	dput(nd->dentry);
1038     	nd->dentry = dentry;
1039     	error = -EISDIR;
1040     	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1041     		goto exit;
1042     ok:
1043     	error = -ENOENT;
1044     	inode = dentry->d_inode;
1045     	if (!inode)
1046     		goto exit;
1047     
1048     	error = -ELOOP;
1049     	if (S_ISLNK(inode->i_mode))
1050     		goto exit;
1051     	
1052     	error = -EISDIR;
1053     	if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1054     		goto exit;
1055     
1056     	error = permission(inode,acc_mode);
1057     	if (error)
1058     		goto exit;
1059     
1060     	/*
1061     	 * FIFO's, sockets and device files are special: they don't
1062     	 * actually live on the filesystem itself, and as such you
1063     	 * can write to them even if the filesystem is read-only.
1064     	 */
1065     	if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1066     	    	flag &= ~O_TRUNC;
1067     	} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1068     		error = -EACCES;
1069     		if (nd->mnt->mnt_flags & MNT_NODEV)
1070     			goto exit;
1071     
1072     		flag &= ~O_TRUNC;
1073     	} else {
1074     		error = -EROFS;
1075     		if (IS_RDONLY(inode) && (flag & 2))
1076     			goto exit;
1077     	}
1078     	/*
1079     	 * An append-only file must be opened in append mode for writing.
1080     	 */
1081     	error = -EPERM;
1082     	if (IS_APPEND(inode)) {
1083     		if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1084     			goto exit;
1085     		if (flag & O_TRUNC)
1086     			goto exit;
1087     	}
1088     
1089     	/*
1090     	 * Ensure there are no outstanding leases on the file.
1091     	 */
1092     	error = get_lease(inode, flag);
1093     	if (error)
1094     		goto exit;
1095     
1096     	if (flag & O_TRUNC) {
1097     		error = get_write_access(inode);
1098     		if (error)
1099     			goto exit;
1100     
1101     		/*
1102     		 * Refuse to truncate files with mandatory locks held on them.
1103     		 */
1104     		error = locks_verify_locked(inode);
1105     		if (!error) {
1106     			DQUOT_INIT(inode);
1107     			
1108     			error = do_truncate(dentry, 0);
1109     		}
1110     		put_write_access(inode);
1111     		if (error)
1112     			goto exit;
1113     	} else
1114     		if (flag & FMODE_WRITE)
1115     			DQUOT_INIT(inode);
1116     
1117     	return 0;
1118     
1119     exit_dput:
1120     	dput(dentry);
1121     exit:
1122     	path_release(nd);
1123     	return error;
1124     
1125     do_link:
1126     	error = -ELOOP;
1127     	if (flag & O_NOFOLLOW)
1128     		goto exit_dput;
1129     	/*
1130     	 * This is subtle. Instead of calling do_follow_link() we do the
1131     	 * thing by hands. The reason is that this way we have zero link_count
1132     	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1133     	 * After that we have the parent and last component, i.e.
1134     	 * we are in the same situation as after the first path_walk().
1135     	 * Well, almost - if the last component is normal we get its copy
1136     	 * stored in nd->last.name and we will have to putname() it when we
1137     	 * are done. Procfs-like symlinks just set LAST_BIND.
1138     	 */
1139     	UPDATE_ATIME(dentry->d_inode);
1140     	error = dentry->d_inode->i_op->follow_link(dentry, nd);
1141     	dput(dentry);
1142     	if (error)
1143     		return error;
1144     	if (nd->last_type == LAST_BIND) {
1145     		dentry = nd->dentry;
1146     		goto ok;
1147     	}
1148     	error = -EISDIR;
1149     	if (nd->last_type != LAST_NORM)
1150     		goto exit;
1151     	if (nd->last.name[nd->last.len]) {
1152     		putname(nd->last.name);
1153     		goto exit;
1154     	}
1155     	error = -ELOOP;
1156     	if (count++==32) {
1157     		putname(nd->last.name);
1158     		goto exit;
1159     	}
1160     	dir = nd->dentry;
1161     	down(&dir->d_inode->i_sem);
1162     	dentry = lookup_hash(&nd->last, nd->dentry);
1163     	putname(nd->last.name);
1164     	goto do_last;
1165     }
1166     
1167     /* SMP-safe */
1168     static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1169     {
1170     	struct dentry *dentry;
1171     
1172     	down(&nd->dentry->d_inode->i_sem);
1173     	dentry = ERR_PTR(-EEXIST);
1174     	if (nd->last_type != LAST_NORM)
1175     		goto fail;
1176     	dentry = lookup_hash(&nd->last, nd->dentry);
1177     	if (IS_ERR(dentry))
1178     		goto fail;
1179     	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1180     		goto enoent;
1181     	return dentry;
1182     enoent:
1183     	dput(dentry);
1184     	dentry = ERR_PTR(-ENOENT);
1185     fail:
1186     	return dentry;
1187     }
1188     
1189     int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1190     {
1191     	int error = -EPERM;
1192     
1193     	down(&dir->i_zombie);
1194     	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1195     		goto exit_lock;
1196     
1197     	error = may_create(dir, dentry);
1198     	if (error)
1199     		goto exit_lock;
1200     
1201     	error = -EPERM;
1202     	if (!dir->i_op || !dir->i_op->mknod)
1203     		goto exit_lock;
1204     
1205     	DQUOT_INIT(dir);
1206     	lock_kernel();
1207     	error = dir->i_op->mknod(dir, dentry, mode, dev);
1208     	unlock_kernel();
1209     exit_lock:
1210     	up(&dir->i_zombie);
1211     	if (!error)
1212     		inode_dir_notify(dir, DN_CREATE);
1213     	return error;
1214     }
1215     
1216     asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1217     {
1218     	int error = 0;
1219     	char * tmp;
1220     	struct dentry * dentry;
1221     	struct nameidata nd;
1222     
1223     	if (S_ISDIR(mode))
1224     		return -EPERM;
1225     	tmp = getname(filename);
1226     	if (IS_ERR(tmp))
1227     		return PTR_ERR(tmp);
1228     
1229     	if (path_init(tmp, LOOKUP_PARENT, &nd))
1230     		error = path_walk(tmp, &nd);
1231     	if (error)
1232     		goto out;
1233     	dentry = lookup_create(&nd, 0);
1234     	error = PTR_ERR(dentry);
1235     
1236     	mode &= ~current->fs->umask;
1237     	if (!IS_ERR(dentry)) {
1238     		switch (mode & S_IFMT) {
1239     		case 0: case S_IFREG:
1240     			error = vfs_create(nd.dentry->d_inode,dentry,mode);
1241     			break;
1242     		case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1243     			error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1244     			break;
1245     		case S_IFDIR:
1246     			error = -EPERM;
1247     			break;
1248     		default:
1249     			error = -EINVAL;
1250     		}
1251     		dput(dentry);
1252     	}
1253     	up(&nd.dentry->d_inode->i_sem);
1254     	path_release(&nd);
1255     out:
1256     	putname(tmp);
1257     
1258     	return error;
1259     }
1260     
1261     int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1262     {
1263     	int error;
1264     
1265     	down(&dir->i_zombie);
1266     	error = may_create(dir, dentry);
1267     	if (error)
1268     		goto exit_lock;
1269     
1270     	error = -EPERM;
1271     	if (!dir->i_op || !dir->i_op->mkdir)
1272     		goto exit_lock;
1273     
1274     	DQUOT_INIT(dir);
1275     	mode &= (S_IRWXUGO|S_ISVTX);
1276     	lock_kernel();
1277     	error = dir->i_op->mkdir(dir, dentry, mode);
1278     	unlock_kernel();
1279     
1280     exit_lock:
1281     	up(&dir->i_zombie);
1282     	if (!error)
1283     		inode_dir_notify(dir, DN_CREATE);
1284     	return error;
1285     }
1286     
1287     asmlinkage long sys_mkdir(const char * pathname, int mode)
1288     {
1289     	int error = 0;
1290     	char * tmp;
1291     
1292     	tmp = getname(pathname);
1293     	error = PTR_ERR(tmp);
1294     	if (!IS_ERR(tmp)) {
1295     		struct dentry *dentry;
1296     		struct nameidata nd;
1297     
1298     		if (path_init(tmp, LOOKUP_PARENT, &nd))
1299     			error = path_walk(tmp, &nd);
1300     		if (error)
1301     			goto out;
1302     		dentry = lookup_create(&nd, 1);
1303     		error = PTR_ERR(dentry);
1304     		if (!IS_ERR(dentry)) {
1305     			error = vfs_mkdir(nd.dentry->d_inode, dentry,
1306     					  mode & ~current->fs->umask);
1307     			dput(dentry);
1308     		}
1309     		up(&nd.dentry->d_inode->i_sem);
1310     		path_release(&nd);
1311     out:
1312     		putname(tmp);
1313     	}
1314     
1315     	return error;
1316     }
1317     
1318     /*
1319      * We try to drop the dentry early: we should have
1320      * a usage count of 2 if we're the only user of this
1321      * dentry, and if that is true (possibly after pruning
1322      * the dcache), then we drop the dentry now.
1323      *
1324      * A low-level filesystem can, if it choses, legally
1325      * do a
1326      *
1327      *	if (!d_unhashed(dentry))
1328      *		return -EBUSY;
1329      *
1330      * if it cannot handle the case of removing a directory
1331      * that is still in use by something else..
1332      */
1333     static void d_unhash(struct dentry *dentry)
1334     {
1335     	dget(dentry);
1336     	switch (atomic_read(&dentry->d_count)) {
1337     	default:
1338     		shrink_dcache_parent(dentry);
1339     		if (atomic_read(&dentry->d_count) != 2)
1340     			break;
1341     	case 2:
1342     		d_drop(dentry);
1343     	}
1344     }
1345     
1346     int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1347     {
1348     	int error;
1349     
1350     	error = may_delete(dir, dentry, 1);
1351     	if (error)
1352     		return error;
1353     
1354     	if (!dir->i_op || !dir->i_op->rmdir)
1355     		return -EPERM;
1356     
1357     	DQUOT_INIT(dir);
1358     
1359     	double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1360     	d_unhash(dentry);
1361     	if (IS_DEADDIR(dir))
1362     		error = -ENOENT;
1363     	else if (d_mountpoint(dentry))
1364     		error = -EBUSY;
1365     	else {
1366     		lock_kernel();
1367     		error = dir->i_op->rmdir(dir, dentry);
1368     		unlock_kernel();
1369     		if (!error)
1370     			dentry->d_inode->i_flags |= S_DEAD;
1371     	}
1372     	double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1373     	if (!error) {
1374     		inode_dir_notify(dir, DN_DELETE);
1375     		d_delete(dentry);
1376     	}
1377     	dput(dentry);
1378     
1379     	return error;
1380     }
1381     
1382     asmlinkage long sys_rmdir(const char * pathname)
1383     {
1384     	int error = 0;
1385     	char * name;
1386     	struct dentry *dentry;
1387     	struct nameidata nd;
1388     
1389     	name = getname(pathname);
1390     	if(IS_ERR(name))
1391     		return PTR_ERR(name);
1392     
1393     	if (path_init(name, LOOKUP_PARENT, &nd))
1394     		error = path_walk(name, &nd);
1395     	if (error)
1396     		goto exit;
1397     
1398     	switch(nd.last_type) {
1399     		case LAST_DOTDOT:
1400     			error = -ENOTEMPTY;
1401     			goto exit1;
1402     		case LAST_DOT:
1403     			error = -EINVAL;
1404     			goto exit1;
1405     		case LAST_ROOT:
1406     			error = -EBUSY;
1407     			goto exit1;
1408     	}
1409     	down(&nd.dentry->d_inode->i_sem);
1410     	dentry = lookup_hash(&nd.last, nd.dentry);
1411     	error = PTR_ERR(dentry);
1412     	if (!IS_ERR(dentry)) {
1413     		error = vfs_rmdir(nd.dentry->d_inode, dentry);
1414     		dput(dentry);
1415     	}
1416     	up(&nd.dentry->d_inode->i_sem);
1417     exit1:
1418     	path_release(&nd);
1419     exit:
1420     	putname(name);
1421     	return error;
1422     }
1423     
1424     int vfs_unlink(struct inode *dir, struct dentry *dentry)
1425     {
1426     	int error;
1427     
1428     	down(&dir->i_zombie);
1429     	error = may_delete(dir, dentry, 0);
1430     	if (!error) {
1431     		error = -EPERM;
1432     		if (dir->i_op && dir->i_op->unlink) {
1433     			DQUOT_INIT(dir);
1434     			if (d_mountpoint(dentry))
1435     				error = -EBUSY;
1436     			else {
1437     				lock_kernel();
1438     				error = dir->i_op->unlink(dir, dentry);
1439     				unlock_kernel();
1440     				if (!error)
1441     					d_delete(dentry);
1442     			}
1443     		}
1444     	}
1445     	up(&dir->i_zombie);
1446     	if (!error)
1447     		inode_dir_notify(dir, DN_DELETE);
1448     	return error;
1449     }
1450     
1451     asmlinkage long sys_unlink(const char * pathname)
1452     {
1453     	int error = 0;
1454     	char * name;
1455     	struct dentry *dentry;
1456     	struct nameidata nd;
1457     
1458     	name = getname(pathname);
1459     	if(IS_ERR(name))
1460     		return PTR_ERR(name);
1461     
1462     	if (path_init(name, LOOKUP_PARENT, &nd))
1463     		error = path_walk(name, &nd);
1464     	if (error)
1465     		goto exit;
1466     	error = -EISDIR;
1467     	if (nd.last_type != LAST_NORM)
1468     		goto exit1;
1469     	down(&nd.dentry->d_inode->i_sem);
1470     	dentry = lookup_hash(&nd.last, nd.dentry);
1471     	error = PTR_ERR(dentry);
1472     	if (!IS_ERR(dentry)) {
1473     		/* Why not before? Because we want correct error value */
1474     		if (nd.last.name[nd.last.len])
1475     			goto slashes;
1476     		error = vfs_unlink(nd.dentry->d_inode, dentry);
1477     	exit2:
1478     		dput(dentry);
1479     	}
1480     	up(&nd.dentry->d_inode->i_sem);
1481     exit1:
1482     	path_release(&nd);
1483     exit:
1484     	putname(name);
1485     
1486     	return error;
1487     
1488     slashes:
1489     	error = !dentry->d_inode ? -ENOENT :
1490     		S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1491     	goto exit2;
1492     }
1493     
1494     int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1495     {
1496     	int error;
1497     
1498     	down(&dir->i_zombie);
1499     	error = may_create(dir, dentry);
1500     	if (error)
1501     		goto exit_lock;
1502     
1503     	error = -EPERM;
1504     	if (!dir->i_op || !dir->i_op->symlink)
1505     		goto exit_lock;
1506     
1507     	DQUOT_INIT(dir);
1508     	lock_kernel();
1509     	error = dir->i_op->symlink(dir, dentry, oldname);
1510     	unlock_kernel();
1511     
1512     exit_lock:
1513     	up(&dir->i_zombie);
1514     	if (!error)
1515     		inode_dir_notify(dir, DN_CREATE);
1516     	return error;
1517     }
1518     
1519     asmlinkage long sys_symlink(const char * oldname, const char * newname)
1520     {
1521     	int error = 0;
1522     	char * from;
1523     	char * to;
1524     
1525     	from = getname(oldname);
1526     	if(IS_ERR(from))
1527     		return PTR_ERR(from);
1528     	to = getname(newname);
1529     	error = PTR_ERR(to);
1530     	if (!IS_ERR(to)) {
1531     		struct dentry *dentry;
1532     		struct nameidata nd;
1533     
1534     		if (path_init(to, LOOKUP_PARENT, &nd))
1535     			error = path_walk(to, &nd);
1536     		if (error)
1537     			goto out;
1538     		dentry = lookup_create(&nd, 0);
1539     		error = PTR_ERR(dentry);
1540     		if (!IS_ERR(dentry)) {
1541     			error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1542     			dput(dentry);
1543     		}
1544     		up(&nd.dentry->d_inode->i_sem);
1545     		path_release(&nd);
1546     out:
1547     		putname(to);
1548     	}
1549     	putname(from);
1550     	return error;
1551     }
1552     
1553     int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1554     {
1555     	struct inode *inode;
1556     	int error;
1557     
1558     	down(&dir->i_zombie);
1559     	error = -ENOENT;
1560     	inode = old_dentry->d_inode;
1561     	if (!inode)
1562     		goto exit_lock;
1563     
1564     	error = may_create(dir, new_dentry);
1565     	if (error)
1566     		goto exit_lock;
1567     
1568     	error = -EXDEV;
1569     	if (dir->i_dev != inode->i_dev)
1570     		goto exit_lock;
1571     
1572     	/*
1573     	 * A link to an append-only or immutable file cannot be created.
1574     	 */
1575     	error = -EPERM;
1576     	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1577     		goto exit_lock;
1578     	if (!dir->i_op || !dir->i_op->link)
1579     		goto exit_lock;
1580     
1581     	DQUOT_INIT(dir);
1582     	lock_kernel();
1583     	error = dir->i_op->link(old_dentry, dir, new_dentry);
1584     	unlock_kernel();
1585     
1586     exit_lock:
1587     	up(&dir->i_zombie);
1588     	if (!error)
1589     		inode_dir_notify(dir, DN_CREATE);
1590     	return error;
1591     }
1592     
1593     /*
1594      * Hardlinks are often used in delicate situations.  We avoid
1595      * security-related surprises by not following symlinks on the
1596      * newname.  --KAB
1597      *
1598      * We don't follow them on the oldname either to be compatible
1599      * with linux 2.0, and to avoid hard-linking to directories
1600      * and other special files.  --ADM
1601      */
1602     asmlinkage long sys_link(const char * oldname, const char * newname)
1603     {
1604     	int error;
1605     	char * from;
1606     	char * to;
1607     
1608     	from = getname(oldname);
1609     	if(IS_ERR(from))
1610     		return PTR_ERR(from);
1611     	to = getname(newname);
1612     	error = PTR_ERR(to);
1613     	if (!IS_ERR(to)) {
1614     		struct dentry *new_dentry;
1615     		struct nameidata nd, old_nd;
1616     
1617     		error = 0;
1618     		if (path_init(from, LOOKUP_POSITIVE, &old_nd))
1619     			error = path_walk(from, &old_nd);
1620     		if (error)
1621     			goto exit;
1622     		if (path_init(to, LOOKUP_PARENT, &nd))
1623     			error = path_walk(to, &nd);
1624     		if (error)
1625     			goto out;
1626     		error = -EXDEV;
1627     		if (old_nd.mnt != nd.mnt)
1628     			goto out_release;
1629     		new_dentry = lookup_create(&nd, 0);
1630     		error = PTR_ERR(new_dentry);
1631     		if (!IS_ERR(new_dentry)) {
1632     			error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1633     			dput(new_dentry);
1634     		}
1635     		up(&nd.dentry->d_inode->i_sem);
1636     out_release:
1637     		path_release(&nd);
1638     out:
1639     		path_release(&old_nd);
1640     exit:
1641     		putname(to);
1642     	}
1643     	putname(from);
1644     
1645     	return error;
1646     }
1647     
1648     /*
1649      * The worst of all namespace operations - renaming directory. "Perverted"
1650      * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1651      * Problems:
1652      *	a) we can get into loop creation. Check is done in is_subdir().
1653      *	b) race potential - two innocent renames can create a loop together.
1654      *	   That's where 4.4 screws up. Current fix: serialization on
1655      *	   sb->s_vfs_rename_sem. We might be more accurate, but that's another
1656      *	   story.
1657      *	c) we have to lock _three_ objects - parents and victim (if it exists).
1658      *	   And that - after we got ->i_sem on parents (until then we don't know
1659      *	   whether the target exists at all, let alone whether it is a directory
1660      *	   or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1661      *	   on link creation/removal of any kind. And taken (without ->i_sem) on
1662      *	   directory that will be removed (both in rmdir() and here).
1663      *	d) some filesystems don't support opened-but-unlinked directories,
1664      *	   either because of layout or because they are not ready to deal with
1665      *	   all cases correctly. The latter will be fixed (taking this sort of
1666      *	   stuff into VFS), but the former is not going away. Solution: the same
1667      *	   trick as in rmdir().
1668      *	e) conversion from fhandle to dentry may come in the wrong moment - when
1669      *	   we are removing the target. Solution: we will have to grab ->i_zombie
1670      *	   in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1671      *	   ->i_sem on parents, which works but leads to some truely excessive
1672      *	   locking].
1673      */
1674     int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1675     	       struct inode *new_dir, struct dentry *new_dentry)
1676     {
1677     	int error;
1678     	struct inode *target;
1679     
1680     	if (old_dentry->d_inode == new_dentry->d_inode)
1681     		return 0;
1682     
1683     	error = may_delete(old_dir, old_dentry, 1);
1684     	if (error)
1685     		return error;
1686     
1687     	if (new_dir->i_dev != old_dir->i_dev)
1688     		return -EXDEV;
1689     
1690     	if (!new_dentry->d_inode)
1691     		error = may_create(new_dir, new_dentry);
1692     	else
1693     		error = may_delete(new_dir, new_dentry, 1);
1694     	if (error)
1695     		return error;
1696     
1697     	if (!old_dir->i_op || !old_dir->i_op->rename)
1698     		return -EPERM;
1699     
1700     	/*
1701     	 * If we are going to change the parent - check write permissions,
1702     	 * we'll need to flip '..'.
1703     	 */
1704     	if (new_dir != old_dir) {
1705     		error = permission(old_dentry->d_inode, MAY_WRITE);
1706     	}
1707     	if (error)
1708     		return error;
1709     
1710     	DQUOT_INIT(old_dir);
1711     	DQUOT_INIT(new_dir);
1712     	down(&old_dir->i_sb->s_vfs_rename_sem);
1713     	error = -EINVAL;
1714     	if (is_subdir(new_dentry, old_dentry))
1715     		goto out_unlock;
1716     	/* Don't eat your daddy, dear... */
1717     	/* This also avoids locking issues */
1718     	if (old_dentry->d_parent == new_dentry)
1719     		goto out_unlock;
1720     	target = new_dentry->d_inode;
1721     	if (target) { /* Hastur! Hastur! Hastur! */
1722     		triple_down(&old_dir->i_zombie,
1723     			    &new_dir->i_zombie,
1724     			    &target->i_zombie);
1725     		d_unhash(new_dentry);
1726     	} else
1727     		double_down(&old_dir->i_zombie,
1728     			    &new_dir->i_zombie);
1729     	if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
1730     		error = -ENOENT;
1731     	else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1732     		error = -EBUSY;
1733     	else 
1734     		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1735     	if (target) {
1736     		if (!error)
1737     			target->i_flags |= S_DEAD;
1738     		triple_up(&old_dir->i_zombie,
1739     			  &new_dir->i_zombie,
1740     			  &target->i_zombie);
1741     		if (d_unhashed(new_dentry))
1742     			d_rehash(new_dentry);
1743     		dput(new_dentry);
1744     	} else
1745     		double_up(&old_dir->i_zombie,
1746     			  &new_dir->i_zombie);
1747     		
1748     	if (!error)
1749     		d_move(old_dentry,new_dentry);
1750     out_unlock:
1751     	up(&old_dir->i_sb->s_vfs_rename_sem);
1752     	return error;
1753     }
1754     
1755     int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1756     	       struct inode *new_dir, struct dentry *new_dentry)
1757     {
1758     	int error;
1759     
1760     	if (old_dentry->d_inode == new_dentry->d_inode)
1761     		return 0;
1762     
1763     	error = may_delete(old_dir, old_dentry, 0);
1764     	if (error)
1765     		return error;
1766     
1767     	if (new_dir->i_dev != old_dir->i_dev)
1768     		return -EXDEV;
1769     
1770     	if (!new_dentry->d_inode)
1771     		error = may_create(new_dir, new_dentry);
1772     	else
1773     		error = may_delete(new_dir, new_dentry, 0);
1774     	if (error)
1775     		return error;
1776     
1777     	if (!old_dir->i_op || !old_dir->i_op->rename)
1778     		return -EPERM;
1779     
1780     	DQUOT_INIT(old_dir);
1781     	DQUOT_INIT(new_dir);
1782     	double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1783     	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1784     		error = -EBUSY;
1785     	else
1786     		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1787     	double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1788     	if (error)
1789     		return error;
1790     	/* The following d_move() should become unconditional */
1791     	if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1792     		d_move(old_dentry, new_dentry);
1793     	}
1794     	return 0;
1795     }
1796     
1797     int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1798     	       struct inode *new_dir, struct dentry *new_dentry)
1799     {
1800     	int error;
1801     	if (S_ISDIR(old_dentry->d_inode->i_mode))
1802     		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1803     	else
1804     		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1805     	if (!error) {
1806     		if (old_dir == new_dir)
1807     			inode_dir_notify(old_dir, DN_RENAME);
1808     		else {
1809     			inode_dir_notify(old_dir, DN_DELETE);
1810     			inode_dir_notify(new_dir, DN_CREATE);
1811     		}
1812     	}
1813     	return error;
1814     }
1815     
1816     static inline int do_rename(const char * oldname, const char * newname)
1817     {
1818     	int error = 0;
1819     	struct dentry * old_dir, * new_dir;
1820     	struct dentry * old_dentry, *new_dentry;
1821     	struct nameidata oldnd, newnd;
1822     
1823     	if (path_init(oldname, LOOKUP_PARENT, &oldnd))
1824     		error = path_walk(oldname, &oldnd);
1825     
1826     	if (error)
1827     		goto exit;
1828     
1829     	if (path_init(newname, LOOKUP_PARENT, &newnd))
1830     		error = path_walk(newname, &newnd);
1831     	if (error)
1832     		goto exit1;
1833     
1834     	error = -EXDEV;
1835     	if (oldnd.mnt != newnd.mnt)
1836     		goto exit2;
1837     
1838     	old_dir = oldnd.dentry;
1839     	error = -EBUSY;
1840     	if (oldnd.last_type != LAST_NORM)
1841     		goto exit2;
1842     
1843     	new_dir = newnd.dentry;
1844     	if (newnd.last_type != LAST_NORM)
1845     		goto exit2;
1846     
1847     	double_lock(new_dir, old_dir);
1848     
1849     	old_dentry = lookup_hash(&oldnd.last, old_dir);
1850     	error = PTR_ERR(old_dentry);
1851     	if (IS_ERR(old_dentry))
1852     		goto exit3;
1853     	/* source must exist */
1854     	error = -ENOENT;
1855     	if (!old_dentry->d_inode)
1856     		goto exit4;
1857     	/* unless the source is a directory trailing slashes give -ENOTDIR */
1858     	if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1859     		error = -ENOTDIR;
1860     		if (oldnd.last.name[oldnd.last.len])
1861     			goto exit4;
1862     		if (newnd.last.name[newnd.last.len])
1863     			goto exit4;
1864     	}
1865     	new_dentry = lookup_hash(&newnd.last, new_dir);
1866     	error = PTR_ERR(new_dentry);
1867     	if (IS_ERR(new_dentry))
1868     		goto exit4;
1869     
1870     	lock_kernel();
1871     	error = vfs_rename(old_dir->d_inode, old_dentry,
1872     				   new_dir->d_inode, new_dentry);
1873     	unlock_kernel();
1874     
1875     	dput(new_dentry);
1876     exit4:
1877     	dput(old_dentry);
1878     exit3:
1879     	double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1880     exit2:
1881     	path_release(&newnd);
1882     exit1:
1883     	path_release(&oldnd);
1884     exit:
1885     	return error;
1886     }
1887     
1888     asmlinkage long sys_rename(const char * oldname, const char * newname)
1889     {
1890     	int error;
1891     	char * from;
1892     	char * to;
1893     
1894     	from = getname(oldname);
1895     	if(IS_ERR(from))
1896     		return PTR_ERR(from);
1897     	to = getname(newname);
1898     	error = PTR_ERR(to);
1899     	if (!IS_ERR(to)) {
1900     		error = do_rename(from,to);
1901     		putname(to);
1902     	}
1903     	putname(from);
1904     	return error;
1905     }
1906     
1907     int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1908     {
1909     	int len;
1910     
1911     	len = PTR_ERR(link);
1912     	if (IS_ERR(link))
1913     		goto out;
1914     
1915     	len = strlen(link);
1916     	if (len > (unsigned) buflen)
1917     		len = buflen;
1918     	if (copy_to_user(buffer, link, len))
1919     		len = -EFAULT;
1920     out:
1921     	return len;
1922     }
1923     
1924     static inline int
1925     __vfs_follow_link(struct nameidata *nd, const char *link)
1926     {
1927     	int res = 0;
1928     	char *name;
1929     	if (IS_ERR(link))
1930     		goto fail;
1931     
1932     	if (*link == '/') {
1933     		path_release(nd);
1934     		if (!walk_init_root(link, nd))
1935     			/* weird __emul_prefix() stuff did it */
1936     			goto out;
1937     	}
1938     	res = path_walk(link, nd);
1939     out:
1940     	if (current->link_count || res || nd->last_type!=LAST_NORM)
1941     		return res;
1942     	/*
1943     	 * If it is an iterative symlinks resolution in open_namei() we
1944     	 * have to copy the last component. And all that crap because of
1945     	 * bloody create() on broken symlinks. Furrfu...
1946     	 */
1947     	name = __getname();
1948     	if (!name)
1949     		return -ENOMEM;
1950     	strcpy(name, nd->last.name);
1951     	nd->last.name = name;
1952     	return 0;
1953     fail:
1954     	path_release(nd);
1955     	return PTR_ERR(link);
1956     }
1957     
1958     int vfs_follow_link(struct nameidata *nd, const char *link)
1959     {
1960     	return __vfs_follow_link(nd, link);
1961     }
1962     
1963     /* get the link contents into pagecache */
1964     static char *page_getlink(struct dentry * dentry, struct page **ppage)
1965     {
1966     	struct page * page;
1967     	struct address_space *mapping = dentry->d_inode->i_mapping;
1968     	page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
1969     				NULL);
1970     	if (IS_ERR(page))
1971     		goto sync_fail;
1972     	wait_on_page(page);
1973     	if (!Page_Uptodate(page))
1974     		goto async_fail;
1975     	*ppage = page;
1976     	return kmap(page);
1977     
1978     async_fail:
1979     	page_cache_release(page);
1980     	return ERR_PTR(-EIO);
1981     
1982     sync_fail:
1983     	return (char*)page;
1984     }
1985     
1986     int page_readlink(struct dentry *dentry, char *buffer, int buflen)
1987     {
1988     	struct page *page = NULL;
1989     	char *s = page_getlink(dentry, &page);
1990     	int res = vfs_readlink(dentry,buffer,buflen,s);
1991     	if (page) {
1992     		kunmap(page);
1993     		page_cache_release(page);
1994     	}
1995     	return res;
1996     }
1997     
1998     int page_follow_link(struct dentry *dentry, struct nameidata *nd)
1999     {
2000     	struct page *page = NULL;
2001     	char *s = page_getlink(dentry, &page);
2002     	int res = __vfs_follow_link(nd, s);
2003     	if (page) {
2004     		kunmap(page);
2005     		page_cache_release(page);
2006     	}
2007     	return res;
2008     }
2009     
2010     struct inode_operations page_symlink_inode_operations = {
2011     	readlink:	page_readlink,
2012     	follow_link:	page_follow_link,
2013     };
2014