File: /usr/src/linux/fs/proc/base.c

1     /*
2      *  linux/fs/proc/base.c
3      *
4      *  Copyright (C) 1991, 1992 Linus Torvalds
5      *
6      *  proc base directory handling functions
7      *
8      *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9      *  Instead of using magical inumbers to determine the kind of object
10      *  we allocate and fill in-core inodes upon lookup. They don't even
11      *  go into icache. We cache the reference to task_struct upon lookup too.
12      *  Eventually it should become a filesystem in its own. We don't use the
13      *  rest of procfs anymore.
14      */
15     
16     #include <asm/uaccess.h>
17     
18     #include <linux/config.h>
19     #include <linux/errno.h>
20     #include <linux/sched.h>
21     #include <linux/proc_fs.h>
22     #include <linux/stat.h>
23     #include <linux/init.h>
24     #include <linux/file.h>
25     #include <linux/string.h>
26     
27     /*
28      * For hysterical raisins we keep the same inumbers as in the old procfs.
29      * Feel free to change the macro below - just keep the range distinct from
30      * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
31      * As soon as we'll get a separate superblock we will be able to forget
32      * about magical ranges too.
33      */
34     
35     #define fake_ino(pid,ino) (((pid)<<16)|(ino))
36     
37     ssize_t proc_pid_read_maps(struct task_struct*,struct file*,char*,size_t,loff_t*);
38     int proc_pid_stat(struct task_struct*,char*);
39     int proc_pid_status(struct task_struct*,char*);
40     int proc_pid_statm(struct task_struct*,char*);
41     int proc_pid_cpu(struct task_struct*,char*);
42     
43     static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
44     {
45     	if (inode->u.proc_i.file) {
46     		*mnt = mntget(inode->u.proc_i.file->f_vfsmnt);
47     		*dentry = dget(inode->u.proc_i.file->f_dentry);
48     		return 0;
49     	}
50     	return -ENOENT;
51     }
52     
53     static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
54     {
55     	struct mm_struct * mm;
56     	struct vm_area_struct * vma;
57     	int result = -ENOENT;
58     	struct task_struct *task = inode->u.proc_i.task;
59     
60     	task_lock(task);
61     	mm = task->mm;
62     	if (mm)
63     		atomic_inc(&mm->mm_users);
64     	task_unlock(task);
65     	if (!mm)
66     		goto out;
67     	down_read(&mm->mmap_sem);
68     	vma = mm->mmap;
69     	while (vma) {
70     		if ((vma->vm_flags & VM_EXECUTABLE) && 
71     		    vma->vm_file) {
72     			*mnt = mntget(vma->vm_file->f_vfsmnt);
73     			*dentry = dget(vma->vm_file->f_dentry);
74     			result = 0;
75     			break;
76     		}
77     		vma = vma->vm_next;
78     	}
79     	up_read(&mm->mmap_sem);
80     	mmput(mm);
81     out:
82     	return result;
83     }
84     
85     static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
86     {
87     	struct fs_struct *fs;
88     	int result = -ENOENT;
89     	task_lock(inode->u.proc_i.task);
90     	fs = inode->u.proc_i.task->fs;
91     	if(fs)
92     		atomic_inc(&fs->count);
93     	task_unlock(inode->u.proc_i.task);
94     	if (fs) {
95     		read_lock(&fs->lock);
96     		*mnt = mntget(fs->pwdmnt);
97     		*dentry = dget(fs->pwd);
98     		read_unlock(&fs->lock);
99     		result = 0;
100     		put_fs_struct(fs);
101     	}
102     	return result;
103     }
104     
105     static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
106     {
107     	struct fs_struct *fs;
108     	int result = -ENOENT;
109     	task_lock(inode->u.proc_i.task);
110     	fs = inode->u.proc_i.task->fs;
111     	if(fs)
112     		atomic_inc(&fs->count);
113     	task_unlock(inode->u.proc_i.task);
114     	if (fs) {
115     		read_lock(&fs->lock);
116     		*mnt = mntget(fs->rootmnt);
117     		*dentry = dget(fs->root);
118     		read_unlock(&fs->lock);
119     		result = 0;
120     		put_fs_struct(fs);
121     	}
122     	return result;
123     }
124     
125     static int proc_pid_environ(struct task_struct *task, char * buffer)
126     {
127     	struct mm_struct *mm;
128     	int res = 0;
129     	task_lock(task);
130     	mm = task->mm;
131     	if (mm)
132     		atomic_inc(&mm->mm_users);
133     	task_unlock(task);
134     	if (mm) {
135     		int len = mm->env_end - mm->env_start;
136     		if (len > PAGE_SIZE)
137     			len = PAGE_SIZE;
138     		res = access_process_vm(task, mm->env_start, buffer, len, 0);
139     		mmput(mm);
140     	}
141     	return res;
142     }
143     
144     static int proc_pid_cmdline(struct task_struct *task, char * buffer)
145     {
146     	struct mm_struct *mm;
147     	int res = 0;
148     	task_lock(task);
149     	mm = task->mm;
150     	if (mm)
151     		atomic_inc(&mm->mm_users);
152     	task_unlock(task);
153     	if (mm) {
154     		int len = mm->arg_end - mm->arg_start;
155     		if (len > PAGE_SIZE)
156     			len = PAGE_SIZE;
157     		res = access_process_vm(task, mm->arg_start, buffer, len, 0);
158     		// If the nul at the end of args has been overwritten, then
159     		// assume application is using setproctitle(3).
160     		if ( res > 0 && buffer[res-1] != '\0' )
161     		{
162     			len = strnlen( buffer, res );
163     			if ( len < res )
164     			{
165     			    res = len;
166     			}
167     			else
168     			{
169     				len = mm->env_end - mm->env_start;
170     				if (len > PAGE_SIZE - res)
171     					len = PAGE_SIZE - res;
172     				res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
173     				res = strnlen( buffer, res );
174     			}
175     		}
176     		mmput(mm);
177     	}
178     	return res;
179     }
180     
181     /************************************************************************/
182     /*                       Here the fs part begins                        */
183     /************************************************************************/
184     
185     /* permission checks */
186     
187     static int standard_permission(struct inode *inode, int mask)
188     {
189     	int mode = inode->i_mode;
190     
191     	if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
192     	    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
193     		return -EROFS; /* Nobody gets write access to a read-only fs */
194     	else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
195     		return -EACCES; /* Nobody gets write access to an immutable file */
196     	else if (current->fsuid == inode->i_uid)
197     		mode >>= 6;
198     	else if (in_group_p(inode->i_gid))
199     		mode >>= 3;
200     	if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
201     		return 0;
202     	/* read and search access */
203     	if ((mask == S_IROTH) ||
204     	    (S_ISDIR(mode)  && !(mask & ~(S_IROTH | S_IXOTH))))
205     		if (capable(CAP_DAC_READ_SEARCH))
206     			return 0;
207     	return -EACCES;
208     }
209     
210     static int proc_check_root(struct inode *inode)
211     {
212     	struct dentry *de, *base, *root;
213     	struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
214     	int res = 0;
215     
216     	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
217     		return -ENOENT;
218     	read_lock(&current->fs->lock);
219     	our_vfsmnt = mntget(current->fs->rootmnt);
220     	base = dget(current->fs->root);
221     	read_unlock(&current->fs->lock);
222     
223     	spin_lock(&dcache_lock);
224     	de = root;
225     	mnt = vfsmnt;
226     
227     	while (vfsmnt != our_vfsmnt) {
228     		if (vfsmnt == vfsmnt->mnt_parent)
229     			goto out;
230     		de = vfsmnt->mnt_mountpoint;
231     		vfsmnt = vfsmnt->mnt_parent;
232     	}
233     
234     	if (!is_subdir(de, base))
235     		goto out;
236     	spin_unlock(&dcache_lock);
237     
238     exit:
239     	dput(base);
240     	mntput(our_vfsmnt);
241     	dput(root);
242     	mntput(mnt);
243     	return res;
244     out:
245     	spin_unlock(&dcache_lock);
246     	res = -EACCES;
247     	goto exit;
248     }
249     
250     static int proc_permission(struct inode *inode, int mask)
251     {
252     	if (standard_permission(inode, mask) != 0)
253     		return -EACCES;
254     	return proc_check_root(inode);
255     }
256     
257     static ssize_t pid_maps_read(struct file * file, char * buf,
258     			      size_t count, loff_t *ppos)
259     {
260     	struct inode * inode = file->f_dentry->d_inode;
261     	struct task_struct *task = inode->u.proc_i.task;
262     	ssize_t res;
263     
264     	res = proc_pid_read_maps(task, file, buf, count, ppos);
265     	return res;
266     }
267     
268     static struct file_operations proc_maps_operations = {
269     	read:		pid_maps_read,
270     };
271     
272     #define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */
273     
274     static ssize_t proc_info_read(struct file * file, char * buf,
275     			  size_t count, loff_t *ppos)
276     {
277     	struct inode * inode = file->f_dentry->d_inode;
278     	unsigned long page;
279     	ssize_t length;
280     	ssize_t end;
281     	struct task_struct *task = inode->u.proc_i.task;
282     
283     	if (count > PROC_BLOCK_SIZE)
284     		count = PROC_BLOCK_SIZE;
285     	if (!(page = __get_free_page(GFP_KERNEL)))
286     		return -ENOMEM;
287     
288     	length = inode->u.proc_i.op.proc_read(task, (char*)page);
289     
290     	if (length < 0) {
291     		free_page(page);
292     		return length;
293     	}
294     	/* Static 4kB (or whatever) block capacity */
295     	if (*ppos >= length) {
296     		free_page(page);
297     		return 0;
298     	}
299     	if (count + *ppos > length)
300     		count = length - *ppos;
301     	end = count + *ppos;
302     	copy_to_user(buf, (char *) page + *ppos, count);
303     	*ppos = end;
304     	free_page(page);
305     	return count;
306     }
307     
308     static struct file_operations proc_info_file_operations = {
309     	read:		proc_info_read,
310     };
311     
312     #define MAY_PTRACE(p) \
313     (p==current||(p->p_pptr==current&&(p->ptrace & PT_PTRACED)&&p->state==TASK_STOPPED))
314     
315     
316     static int mem_open(struct inode* inode, struct file* file)
317     {
318     	file->private_data = (void*)((long)current->self_exec_id);
319     	return 0;
320     }
321     
322     static ssize_t mem_read(struct file * file, char * buf,
323     			size_t count, loff_t *ppos)
324     {
325     	struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
326     	char *page;
327     	unsigned long src = *ppos;
328     	int copied = 0;
329     	struct mm_struct *mm;
330     
331     
332     	if (!MAY_PTRACE(task))
333     		return -ESRCH;
334     
335     	page = (char *)__get_free_page(GFP_USER);
336     	if (!page)
337     		return -ENOMEM;
338     
339     	task_lock(task);
340     	mm = task->mm;
341     	if (mm)
342     		atomic_inc(&mm->mm_users);
343     	task_unlock(task);
344     	if (!mm)
345     		return 0;
346     
347     	if (file->private_data != (void*)((long)current->self_exec_id) ) {
348     		mmput(mm);
349     		return -EIO;
350     	}
351     		
352     
353     	while (count > 0) {
354     		int this_len, retval;
355     
356     		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
357     		retval = access_process_vm(task, src, page, this_len, 0);
358     		if (!retval) {
359     			if (!copied)
360     				copied = -EIO;
361     			break;
362     		}
363     		if (copy_to_user(buf, page, retval)) {
364     			copied = -EFAULT;
365     			break;
366     		}
367     		copied += retval;
368     		src += retval;
369     		buf += retval;
370     		count -= retval;
371     	}
372     	*ppos = src;
373     	mmput(mm);
374     	free_page((unsigned long) page);
375     	return copied;
376     }
377     
378     #define mem_write NULL
379     
380     #ifndef mem_write
381     /* This is a security hazard */
382     static ssize_t mem_write(struct file * file, const char * buf,
383     			 size_t count, loff_t *ppos)
384     {
385     	int copied = 0;
386     	char *page;
387     	struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
388     	unsigned long dst = *ppos;
389     
390     	if (!MAY_PTRACE(task))
391     		return -ESRCH;
392     
393     	page = (char *)__get_free_page(GFP_USER);
394     	if (!page)
395     		return -ENOMEM;
396     
397     	while (count > 0) {
398     		int this_len, retval;
399     
400     		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
401     		if (copy_from_user(page, buf, this_len)) {
402     			copied = -EFAULT;
403     			break;
404     		}
405     		retval = access_process_vm(task, dst, page, this_len, 1);
406     		if (!retval) {
407     			if (!copied)
408     				copied = -EIO;
409     			break;
410     		}
411     		copied += retval;
412     		buf += retval;
413     		dst += retval;
414     		count -= retval;			
415     	}
416     	*ppos = dst;
417     	free_page((unsigned long) page);
418     	return copied;
419     }
420     #endif
421     
422     static struct file_operations proc_mem_operations = {
423     	read:		mem_read,
424     	write:		mem_write,
425     	open:		mem_open,
426     };
427     
428     static struct inode_operations proc_mem_inode_operations = {
429     	permission:	proc_permission,
430     };
431     
432     static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
433     {
434     	struct inode *inode = dentry->d_inode;
435     	int error = -EACCES;
436     
437     	/* We don't need a base pointer in the /proc filesystem */
438     	path_release(nd);
439     
440     	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
441     		goto out;
442     	error = proc_check_root(inode);
443     	if (error)
444     		goto out;
445     
446     	error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
447     	nd->last_type = LAST_BIND;
448     out:
449     	return error;
450     }
451     
452     static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
453     			    char * buffer, int buflen)
454     {
455     	struct inode * inode;
456     	char * tmp = (char*)__get_free_page(GFP_KERNEL), *path;
457     	int len;
458     
459     	if (!tmp)
460     		return -ENOMEM;
461     		
462     	inode = dentry->d_inode;
463     	path = d_path(dentry, mnt, tmp, PAGE_SIZE);
464     	len = tmp + PAGE_SIZE - 1 - path;
465     
466     	if (len < buflen)
467     		buflen = len;
468     	copy_to_user(buffer, path, buflen);
469     	free_page((unsigned long)tmp);
470     	return buflen;
471     }
472     
473     static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen)
474     {
475     	int error = -EACCES;
476     	struct inode *inode = dentry->d_inode;
477     	struct dentry *de;
478     	struct vfsmount *mnt = NULL;
479     
480     	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
481     		goto out;
482     	error = proc_check_root(inode);
483     	if (error)
484     		goto out;
485     
486     	error = inode->u.proc_i.op.proc_get_link(inode, &de, &mnt);
487     	if (error)
488     		goto out;
489     
490     	error = do_proc_readlink(de, mnt, buffer, buflen);
491     	dput(de);
492     	mntput(mnt);
493     out:
494     	return error;
495     }
496     
497     static struct inode_operations proc_pid_link_inode_operations = {
498     	readlink:	proc_pid_readlink,
499     	follow_link:	proc_pid_follow_link
500     };
501     
502     struct pid_entry {
503     	int type;
504     	int len;
505     	char *name;
506     	mode_t mode;
507     };
508     
509     enum pid_directory_inos {
510     	PROC_PID_INO = 2,
511     	PROC_PID_STATUS,
512     	PROC_PID_MEM,
513     	PROC_PID_CWD,
514     	PROC_PID_ROOT,
515     	PROC_PID_EXE,
516     	PROC_PID_FD,
517     	PROC_PID_ENVIRON,
518     	PROC_PID_CMDLINE,
519     	PROC_PID_STAT,
520     	PROC_PID_STATM,
521     	PROC_PID_MAPS,
522     	PROC_PID_CPU,
523     	PROC_PID_FD_DIR = 0x8000,	/* 0x8000-0xffff */
524     };
525     
526     #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
527     static struct pid_entry base_stuff[] = {
528       E(PROC_PID_FD,	"fd",		S_IFDIR|S_IRUSR|S_IXUSR),
529       E(PROC_PID_ENVIRON,	"environ",	S_IFREG|S_IRUSR),
530       E(PROC_PID_STATUS,	"status",	S_IFREG|S_IRUGO),
531       E(PROC_PID_CMDLINE,	"cmdline",	S_IFREG|S_IRUGO),
532       E(PROC_PID_STAT,	"stat",		S_IFREG|S_IRUGO),
533       E(PROC_PID_STATM,	"statm",	S_IFREG|S_IRUGO),
534     #ifdef CONFIG_SMP
535       E(PROC_PID_CPU,	"cpu",		S_IFREG|S_IRUGO),
536     #endif
537       E(PROC_PID_MAPS,	"maps",		S_IFREG|S_IRUGO),
538       E(PROC_PID_MEM,	"mem",		S_IFREG|S_IRUSR|S_IWUSR),
539       E(PROC_PID_CWD,	"cwd",		S_IFLNK|S_IRWXUGO),
540       E(PROC_PID_ROOT,	"root",		S_IFLNK|S_IRWXUGO),
541       E(PROC_PID_EXE,	"exe",		S_IFLNK|S_IRWXUGO),
542       {0,0,NULL,0}
543     };
544     #undef E
545     
546     #define NUMBUF 10
547     
548     static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
549     {
550     	struct inode *inode = filp->f_dentry->d_inode;
551     	struct task_struct *p = inode->u.proc_i.task;
552     	unsigned int fd, pid, ino;
553     	int retval;
554     	char buf[NUMBUF];
555     	struct files_struct * files;
556     
557     	retval = 0;
558     	pid = p->pid;
559     
560     	fd = filp->f_pos;
561     	switch (fd) {
562     		case 0:
563     			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
564     				goto out;
565     			filp->f_pos++;
566     		case 1:
567     			ino = fake_ino(pid, PROC_PID_INO);
568     			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
569     				goto out;
570     			filp->f_pos++;
571     		default:
572     			task_lock(p);
573     			files = p->files;
574     			if (files)
575     				atomic_inc(&files->count);
576     			task_unlock(p);
577     			if (!files)
578     				goto out;
579     			for (fd = filp->f_pos-2;
580     			     fd < files->max_fds;
581     			     fd++, filp->f_pos++) {
582     				unsigned int i,j;
583     
584     				if (!fcheck_files(files, fd))
585     					continue;
586     
587     				j = NUMBUF;
588     				i = fd;
589     				do {
590     					j--;
591     					buf[j] = '0' + (i % 10);
592     					i /= 10;
593     				} while (i);
594     
595     				ino = fake_ino(pid, PROC_PID_FD_DIR + fd);
596     				if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0)
597     					break;
598     			}
599     			put_files_struct(files);
600     	}
601     out:
602     	return retval;
603     }
604     
605     static int proc_base_readdir(struct file * filp,
606     	void * dirent, filldir_t filldir)
607     {
608     	int i;
609     	int pid;
610     	struct inode *inode = filp->f_dentry->d_inode;
611     	struct pid_entry *p;
612     
613     	pid = inode->u.proc_i.task->pid;
614     	if (!pid)
615     		return -ENOENT;
616     	i = filp->f_pos;
617     	switch (i) {
618     		case 0:
619     			if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
620     				return 0;
621     			i++;
622     			filp->f_pos++;
623     			/* fall through */
624     		case 1:
625     			if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
626     				return 0;
627     			i++;
628     			filp->f_pos++;
629     			/* fall through */
630     		default:
631     			i -= 2;
632     			if (i>=sizeof(base_stuff)/sizeof(base_stuff[0]))
633     				return 1;
634     			p = base_stuff + i;
635     			while (p->name) {
636     				if (filldir(dirent, p->name, p->len, filp->f_pos,
637     					    fake_ino(pid, p->type), p->mode >> 12) < 0)
638     					return 0;
639     				filp->f_pos++;
640     				p++;
641     			}
642     	}
643     	return 1;
644     }
645     
646     /* building an inode */
647     
648     static int task_dumpable(struct task_struct *task)
649     {
650     	int dumpable = 0;
651     	struct mm_struct *mm;
652     
653     	task_lock(task);
654     	mm = task->mm;
655     	if (mm)
656     		dumpable = mm->dumpable;
657     	task_unlock(task);
658     	return dumpable;
659     }
660     
661     
662     static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
663     {
664     	struct inode * inode;
665     
666     	/* We need a new inode */
667     	
668     	inode = new_inode(sb);
669     	if (!inode)
670     		goto out;
671     
672     	/* Common stuff */
673     
674     	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
675     	inode->i_ino = fake_ino(task->pid, ino);
676     
677     	if (!task->pid)
678     		goto out_unlock;
679     
680     	/*
681     	 * grab the reference to task.
682     	 */
683     	get_task_struct(task);
684     	inode->u.proc_i.task = task;
685     	inode->i_uid = 0;
686     	inode->i_gid = 0;
687     	if (ino == PROC_PID_INO || task_dumpable(task)) {
688     		inode->i_uid = task->euid;
689     		inode->i_gid = task->egid;
690     	}
691     
692     out:
693     	return inode;
694     
695     out_unlock:
696     	iput(inode);
697     	return NULL;
698     }
699     
700     /* dentry stuff */
701     
702     static int pid_fd_revalidate(struct dentry * dentry, int flags)
703     {
704     	return 0;
705     }
706     
707     /*
708      *	Exceptional case: normally we are not allowed to unhash a busy
709      * directory. In this case, however, we can do it - no aliasing problems
710      * due to the way we treat inodes.
711      */
712     static int pid_base_revalidate(struct dentry * dentry, int flags)
713     {
714     	if (dentry->d_inode->u.proc_i.task->pid)
715     		return 1;
716     	d_drop(dentry);
717     	return 0;
718     }
719     
720     static int pid_delete_dentry(struct dentry * dentry)
721     {
722     	return 1;
723     }
724     
725     static struct dentry_operations pid_fd_dentry_operations =
726     {
727     	d_revalidate:	pid_fd_revalidate,
728     	d_delete:	pid_delete_dentry,
729     };
730     
731     static struct dentry_operations pid_dentry_operations =
732     {
733     	d_delete:	pid_delete_dentry,
734     };
735     
736     static struct dentry_operations pid_base_dentry_operations =
737     {
738     	d_revalidate:	pid_base_revalidate,
739     	d_delete:	pid_delete_dentry,
740     };
741     
742     /* Lookups */
743     #define MAX_MULBY10	((~0U-9)/10)
744     
745     static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
746     {
747     	unsigned int fd, c;
748     	struct task_struct *task = dir->u.proc_i.task;
749     	struct file * file;
750     	struct files_struct * files;
751     	struct inode *inode;
752     	const char *name;
753     	int len;
754     
755     	fd = 0;
756     	len = dentry->d_name.len;
757     	name = dentry->d_name.name;
758     	if (len > 1 && *name == '0') goto out;
759     	while (len-- > 0) {
760     		c = *name - '0';
761     		name++;
762     		if (c > 9)
763     			goto out;
764     		if (fd >= MAX_MULBY10)
765     			goto out;
766     		fd *= 10;
767     		fd += c;
768     	}
769     
770     	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
771     	if (!inode)
772     		goto out;
773     	task_lock(task);
774     	files = task->files;
775     	if (files)
776     		atomic_inc(&files->count);
777     	task_unlock(task);
778     	if (!files)
779     		goto out_unlock;
780     	read_lock(&files->file_lock);
781     	file = inode->u.proc_i.file = fcheck_files(files, fd);
782     	if (!file)
783     		goto out_unlock2;
784     	get_file(file);
785     	read_unlock(&files->file_lock);
786     	put_files_struct(files);
787     	inode->i_op = &proc_pid_link_inode_operations;
788     	inode->i_size = 64;
789     	inode->i_mode = S_IFLNK;
790     	inode->u.proc_i.op.proc_get_link = proc_fd_link;
791     	if (file->f_mode & 1)
792     		inode->i_mode |= S_IRUSR | S_IXUSR;
793     	if (file->f_mode & 2)
794     		inode->i_mode |= S_IWUSR | S_IXUSR;
795     	dentry->d_op = &pid_fd_dentry_operations;
796     	d_add(dentry, inode);
797     	return NULL;
798     
799     out_unlock2:
800     	put_files_struct(files);
801     	read_unlock(&files->file_lock);
802     out_unlock:
803     	iput(inode);
804     out:
805     	return ERR_PTR(-ENOENT);
806     }
807     
808     static struct file_operations proc_fd_operations = {
809     	read:		generic_read_dir,
810     	readdir:	proc_readfd,
811     };
812     
813     /*
814      * proc directories can do almost nothing..
815      */
816     static struct inode_operations proc_fd_inode_operations = {
817     	lookup:		proc_lookupfd,
818     	permission:	proc_permission,
819     };
820     
821     static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
822     {
823     	struct inode *inode;
824     	int error;
825     	struct task_struct *task = dir->u.proc_i.task;
826     	struct pid_entry *p;
827     
828     	error = -ENOENT;
829     	inode = NULL;
830     
831     	for (p = base_stuff; p->name; p++) {
832     		if (p->len != dentry->d_name.len)
833     			continue;
834     		if (!memcmp(dentry->d_name.name, p->name, p->len))
835     			break;
836     	}
837     	if (!p->name)
838     		goto out;
839     
840     	error = -EINVAL;
841     	inode = proc_pid_make_inode(dir->i_sb, task, p->type);
842     	if (!inode)
843     		goto out;
844     
845     	inode->i_mode = p->mode;
846     	/*
847     	 * Yes, it does not scale. And it should not. Don't add
848     	 * new entries into /proc/<pid>/ without very good reasons.
849     	 */
850     	switch(p->type) {
851     		case PROC_PID_FD:
852     			inode->i_nlink = 2;
853     			inode->i_op = &proc_fd_inode_operations;
854     			inode->i_fop = &proc_fd_operations;
855     			break;
856     		case PROC_PID_EXE:
857     			inode->i_op = &proc_pid_link_inode_operations;
858     			inode->u.proc_i.op.proc_get_link = proc_exe_link;
859     			break;
860     		case PROC_PID_CWD:
861     			inode->i_op = &proc_pid_link_inode_operations;
862     			inode->u.proc_i.op.proc_get_link = proc_cwd_link;
863     			break;
864     		case PROC_PID_ROOT:
865     			inode->i_op = &proc_pid_link_inode_operations;
866     			inode->u.proc_i.op.proc_get_link = proc_root_link;
867     			break;
868     		case PROC_PID_ENVIRON:
869     			inode->i_fop = &proc_info_file_operations;
870     			inode->u.proc_i.op.proc_read = proc_pid_environ;
871     			break;
872     		case PROC_PID_STATUS:
873     			inode->i_fop = &proc_info_file_operations;
874     			inode->u.proc_i.op.proc_read = proc_pid_status;
875     			break;
876     		case PROC_PID_STAT:
877     			inode->i_fop = &proc_info_file_operations;
878     			inode->u.proc_i.op.proc_read = proc_pid_stat;
879     			break;
880     		case PROC_PID_CMDLINE:
881     			inode->i_fop = &proc_info_file_operations;
882     			inode->u.proc_i.op.proc_read = proc_pid_cmdline;
883     			break;
884     		case PROC_PID_STATM:
885     			inode->i_fop = &proc_info_file_operations;
886     			inode->u.proc_i.op.proc_read = proc_pid_statm;
887     			break;
888     		case PROC_PID_MAPS:
889     			inode->i_fop = &proc_maps_operations;
890     			break;
891     #ifdef CONFIG_SMP
892     		case PROC_PID_CPU:
893     			inode->i_fop = &proc_info_file_operations;
894     			inode->u.proc_i.op.proc_read = proc_pid_cpu;
895     			break;
896     #endif
897     		case PROC_PID_MEM:
898     			inode->i_op = &proc_mem_inode_operations;
899     			inode->i_fop = &proc_mem_operations;
900     			break;
901     		default:
902     			printk("procfs: impossible type (%d)",p->type);
903     			iput(inode);
904     			return ERR_PTR(-EINVAL);
905     	}
906     	dentry->d_op = &pid_dentry_operations;
907     	d_add(dentry, inode);
908     	return NULL;
909     
910     out:
911     	return ERR_PTR(error);
912     }
913     
914     static struct file_operations proc_base_operations = {
915     	read:		generic_read_dir,
916     	readdir:	proc_base_readdir,
917     };
918     
919     static struct inode_operations proc_base_inode_operations = {
920     	lookup:		proc_base_lookup,
921     };
922     
923     /*
924      * /proc/self:
925      */
926     static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen)
927     {
928     	char tmp[30];
929     	sprintf(tmp, "%d", current->pid);
930     	return vfs_readlink(dentry,buffer,buflen,tmp);
931     }
932     
933     static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
934     {
935     	char tmp[30];
936     	sprintf(tmp, "%d", current->pid);
937     	return vfs_follow_link(nd,tmp);
938     }	
939     
940     static struct inode_operations proc_self_inode_operations = {
941     	readlink:	proc_self_readlink,
942     	follow_link:	proc_self_follow_link,
943     };
944     
945     struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
946     {
947     	unsigned int pid, c;
948     	struct task_struct *task;
949     	const char *name;
950     	struct inode *inode;
951     	int len;
952     
953     	pid = 0;
954     	name = dentry->d_name.name;
955     	len = dentry->d_name.len;
956     	if (len == 4 && !memcmp(name, "self", 4)) {
957     		inode = new_inode(dir->i_sb);
958     		if (!inode)
959     			return ERR_PTR(-ENOMEM);
960     		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
961     		inode->i_ino = fake_ino(0, PROC_PID_INO);
962     		inode->u.proc_i.file = NULL;
963     		inode->u.proc_i.task = NULL;
964     		inode->i_mode = S_IFLNK|S_IRWXUGO;
965     		inode->i_uid = inode->i_gid = 0;
966     		inode->i_size = 64;
967     		inode->i_op = &proc_self_inode_operations;
968     		d_add(dentry, inode);
969     		return NULL;
970     	}
971     	while (len-- > 0) {
972     		c = *name - '0';
973     		name++;
974     		if (c > 9)
975     			goto out;
976     		if (pid >= MAX_MULBY10)
977     			goto out;
978     		pid *= 10;
979     		pid += c;
980     		if (!pid)
981     			goto out;
982     	}
983     
984     	read_lock(&tasklist_lock);
985     	task = find_task_by_pid(pid);
986     	if (task)
987     		get_task_struct(task);
988     	read_unlock(&tasklist_lock);
989     	if (!task)
990     		goto out;
991     
992     	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
993     
994     	free_task_struct(task);
995     
996     	if (!inode)
997     		goto out;
998     	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
999     	inode->i_op = &proc_base_inode_operations;
1000     	inode->i_fop = &proc_base_operations;
1001     	inode->i_nlink = 3;
1002     	inode->i_flags|=S_IMMUTABLE;
1003     
1004     	dentry->d_op = &pid_base_dentry_operations;
1005     	d_add(dentry, inode);
1006     	return NULL;
1007     out:
1008     	return ERR_PTR(-ENOENT);
1009     }
1010     
1011     void proc_pid_delete_inode(struct inode *inode)
1012     {
1013     	if (inode->u.proc_i.file)
1014     		fput(inode->u.proc_i.file);
1015     	if (inode->u.proc_i.task)
1016     		free_task_struct(inode->u.proc_i.task);
1017     }
1018     
1019     #define PROC_NUMBUF 10
1020     #define PROC_MAXPIDS 20
1021     
1022     /*
1023      * Get a few pid's to return for filldir - we need to hold the
1024      * tasklist lock while doing this, and we must release it before
1025      * we actually do the filldir itself, so we use a temp buffer..
1026      */
1027     static int get_pid_list(int index, unsigned int *pids)
1028     {
1029     	struct task_struct *p;
1030     	int nr_pids = 0;
1031     
1032     	index--;
1033     	read_lock(&tasklist_lock);
1034     	for_each_task(p) {
1035     		int pid = p->pid;
1036     		if (!pid)
1037     			continue;
1038     		if (--index >= 0)
1039     			continue;
1040     		pids[nr_pids] = pid;
1041     		nr_pids++;
1042     		if (nr_pids >= PROC_MAXPIDS)
1043     			break;
1044     	}
1045     	read_unlock(&tasklist_lock);
1046     	return nr_pids;
1047     }
1048     
1049     int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1050     {
1051     	unsigned int pid_array[PROC_MAXPIDS];
1052     	char buf[PROC_NUMBUF];
1053     	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1054     	unsigned int nr_pids, i;
1055     
1056     	if (!nr) {
1057     		ino_t ino = fake_ino(0,PROC_PID_INO);
1058     		if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1059     			return 0;
1060     		filp->f_pos++;
1061     		nr++;
1062     	}
1063     
1064     	nr_pids = get_pid_list(nr, pid_array);
1065     
1066     	for (i = 0; i < nr_pids; i++) {
1067     		int pid = pid_array[i];
1068     		ino_t ino = fake_ino(pid,PROC_PID_INO);
1069     		unsigned long j = PROC_NUMBUF;
1070     
1071     		do buf[--j] = '0' + (pid % 10); while (pid/=10);
1072     
1073     		if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
1074     			break;
1075     		filp->f_pos++;
1076     	}
1077     	return 0;
1078     }
1079