File: /usr/src/linux/fs/proc/base.c
1 /*
2 * linux/fs/proc/base.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * proc base directory handling functions
7 *
8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9 * Instead of using magical inumbers to determine the kind of object
10 * we allocate and fill in-core inodes upon lookup. They don't even
11 * go into icache. We cache the reference to task_struct upon lookup too.
12 * Eventually it should become a filesystem in its own. We don't use the
13 * rest of procfs anymore.
14 */
15
16 #include <asm/uaccess.h>
17
18 #include <linux/config.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
21 #include <linux/proc_fs.h>
22 #include <linux/stat.h>
23 #include <linux/init.h>
24 #include <linux/file.h>
25 #include <linux/string.h>
26
27 /*
28 * For hysterical raisins we keep the same inumbers as in the old procfs.
29 * Feel free to change the macro below - just keep the range distinct from
30 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
31 * As soon as we'll get a separate superblock we will be able to forget
32 * about magical ranges too.
33 */
34
35 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
36
37 ssize_t proc_pid_read_maps(struct task_struct*,struct file*,char*,size_t,loff_t*);
38 int proc_pid_stat(struct task_struct*,char*);
39 int proc_pid_status(struct task_struct*,char*);
40 int proc_pid_statm(struct task_struct*,char*);
41 int proc_pid_cpu(struct task_struct*,char*);
42
43 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
44 {
45 if (inode->u.proc_i.file) {
46 *mnt = mntget(inode->u.proc_i.file->f_vfsmnt);
47 *dentry = dget(inode->u.proc_i.file->f_dentry);
48 return 0;
49 }
50 return -ENOENT;
51 }
52
53 static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
54 {
55 struct mm_struct * mm;
56 struct vm_area_struct * vma;
57 int result = -ENOENT;
58 struct task_struct *task = inode->u.proc_i.task;
59
60 task_lock(task);
61 mm = task->mm;
62 if (mm)
63 atomic_inc(&mm->mm_users);
64 task_unlock(task);
65 if (!mm)
66 goto out;
67 down_read(&mm->mmap_sem);
68 vma = mm->mmap;
69 while (vma) {
70 if ((vma->vm_flags & VM_EXECUTABLE) &&
71 vma->vm_file) {
72 *mnt = mntget(vma->vm_file->f_vfsmnt);
73 *dentry = dget(vma->vm_file->f_dentry);
74 result = 0;
75 break;
76 }
77 vma = vma->vm_next;
78 }
79 up_read(&mm->mmap_sem);
80 mmput(mm);
81 out:
82 return result;
83 }
84
85 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
86 {
87 struct fs_struct *fs;
88 int result = -ENOENT;
89 task_lock(inode->u.proc_i.task);
90 fs = inode->u.proc_i.task->fs;
91 if(fs)
92 atomic_inc(&fs->count);
93 task_unlock(inode->u.proc_i.task);
94 if (fs) {
95 read_lock(&fs->lock);
96 *mnt = mntget(fs->pwdmnt);
97 *dentry = dget(fs->pwd);
98 read_unlock(&fs->lock);
99 result = 0;
100 put_fs_struct(fs);
101 }
102 return result;
103 }
104
105 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
106 {
107 struct fs_struct *fs;
108 int result = -ENOENT;
109 task_lock(inode->u.proc_i.task);
110 fs = inode->u.proc_i.task->fs;
111 if(fs)
112 atomic_inc(&fs->count);
113 task_unlock(inode->u.proc_i.task);
114 if (fs) {
115 read_lock(&fs->lock);
116 *mnt = mntget(fs->rootmnt);
117 *dentry = dget(fs->root);
118 read_unlock(&fs->lock);
119 result = 0;
120 put_fs_struct(fs);
121 }
122 return result;
123 }
124
125 static int proc_pid_environ(struct task_struct *task, char * buffer)
126 {
127 struct mm_struct *mm;
128 int res = 0;
129 task_lock(task);
130 mm = task->mm;
131 if (mm)
132 atomic_inc(&mm->mm_users);
133 task_unlock(task);
134 if (mm) {
135 int len = mm->env_end - mm->env_start;
136 if (len > PAGE_SIZE)
137 len = PAGE_SIZE;
138 res = access_process_vm(task, mm->env_start, buffer, len, 0);
139 mmput(mm);
140 }
141 return res;
142 }
143
144 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
145 {
146 struct mm_struct *mm;
147 int res = 0;
148 task_lock(task);
149 mm = task->mm;
150 if (mm)
151 atomic_inc(&mm->mm_users);
152 task_unlock(task);
153 if (mm) {
154 int len = mm->arg_end - mm->arg_start;
155 if (len > PAGE_SIZE)
156 len = PAGE_SIZE;
157 res = access_process_vm(task, mm->arg_start, buffer, len, 0);
158 // If the nul at the end of args has been overwritten, then
159 // assume application is using setproctitle(3).
160 if ( res > 0 && buffer[res-1] != '\0' )
161 {
162 len = strnlen( buffer, res );
163 if ( len < res )
164 {
165 res = len;
166 }
167 else
168 {
169 len = mm->env_end - mm->env_start;
170 if (len > PAGE_SIZE - res)
171 len = PAGE_SIZE - res;
172 res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
173 res = strnlen( buffer, res );
174 }
175 }
176 mmput(mm);
177 }
178 return res;
179 }
180
181 /************************************************************************/
182 /* Here the fs part begins */
183 /************************************************************************/
184
185 /* permission checks */
186
187 static int standard_permission(struct inode *inode, int mask)
188 {
189 int mode = inode->i_mode;
190
191 if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
192 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
193 return -EROFS; /* Nobody gets write access to a read-only fs */
194 else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
195 return -EACCES; /* Nobody gets write access to an immutable file */
196 else if (current->fsuid == inode->i_uid)
197 mode >>= 6;
198 else if (in_group_p(inode->i_gid))
199 mode >>= 3;
200 if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
201 return 0;
202 /* read and search access */
203 if ((mask == S_IROTH) ||
204 (S_ISDIR(mode) && !(mask & ~(S_IROTH | S_IXOTH))))
205 if (capable(CAP_DAC_READ_SEARCH))
206 return 0;
207 return -EACCES;
208 }
209
210 static int proc_check_root(struct inode *inode)
211 {
212 struct dentry *de, *base, *root;
213 struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
214 int res = 0;
215
216 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
217 return -ENOENT;
218 read_lock(¤t->fs->lock);
219 our_vfsmnt = mntget(current->fs->rootmnt);
220 base = dget(current->fs->root);
221 read_unlock(¤t->fs->lock);
222
223 spin_lock(&dcache_lock);
224 de = root;
225 mnt = vfsmnt;
226
227 while (vfsmnt != our_vfsmnt) {
228 if (vfsmnt == vfsmnt->mnt_parent)
229 goto out;
230 de = vfsmnt->mnt_mountpoint;
231 vfsmnt = vfsmnt->mnt_parent;
232 }
233
234 if (!is_subdir(de, base))
235 goto out;
236 spin_unlock(&dcache_lock);
237
238 exit:
239 dput(base);
240 mntput(our_vfsmnt);
241 dput(root);
242 mntput(mnt);
243 return res;
244 out:
245 spin_unlock(&dcache_lock);
246 res = -EACCES;
247 goto exit;
248 }
249
250 static int proc_permission(struct inode *inode, int mask)
251 {
252 if (standard_permission(inode, mask) != 0)
253 return -EACCES;
254 return proc_check_root(inode);
255 }
256
257 static ssize_t pid_maps_read(struct file * file, char * buf,
258 size_t count, loff_t *ppos)
259 {
260 struct inode * inode = file->f_dentry->d_inode;
261 struct task_struct *task = inode->u.proc_i.task;
262 ssize_t res;
263
264 res = proc_pid_read_maps(task, file, buf, count, ppos);
265 return res;
266 }
267
268 static struct file_operations proc_maps_operations = {
269 read: pid_maps_read,
270 };
271
272 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
273
274 static ssize_t proc_info_read(struct file * file, char * buf,
275 size_t count, loff_t *ppos)
276 {
277 struct inode * inode = file->f_dentry->d_inode;
278 unsigned long page;
279 ssize_t length;
280 ssize_t end;
281 struct task_struct *task = inode->u.proc_i.task;
282
283 if (count > PROC_BLOCK_SIZE)
284 count = PROC_BLOCK_SIZE;
285 if (!(page = __get_free_page(GFP_KERNEL)))
286 return -ENOMEM;
287
288 length = inode->u.proc_i.op.proc_read(task, (char*)page);
289
290 if (length < 0) {
291 free_page(page);
292 return length;
293 }
294 /* Static 4kB (or whatever) block capacity */
295 if (*ppos >= length) {
296 free_page(page);
297 return 0;
298 }
299 if (count + *ppos > length)
300 count = length - *ppos;
301 end = count + *ppos;
302 copy_to_user(buf, (char *) page + *ppos, count);
303 *ppos = end;
304 free_page(page);
305 return count;
306 }
307
308 static struct file_operations proc_info_file_operations = {
309 read: proc_info_read,
310 };
311
312 #define MAY_PTRACE(p) \
313 (p==current||(p->p_pptr==current&&(p->ptrace & PT_PTRACED)&&p->state==TASK_STOPPED))
314
315
316 static int mem_open(struct inode* inode, struct file* file)
317 {
318 file->private_data = (void*)((long)current->self_exec_id);
319 return 0;
320 }
321
322 static ssize_t mem_read(struct file * file, char * buf,
323 size_t count, loff_t *ppos)
324 {
325 struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
326 char *page;
327 unsigned long src = *ppos;
328 int copied = 0;
329 struct mm_struct *mm;
330
331
332 if (!MAY_PTRACE(task))
333 return -ESRCH;
334
335 page = (char *)__get_free_page(GFP_USER);
336 if (!page)
337 return -ENOMEM;
338
339 task_lock(task);
340 mm = task->mm;
341 if (mm)
342 atomic_inc(&mm->mm_users);
343 task_unlock(task);
344 if (!mm)
345 return 0;
346
347 if (file->private_data != (void*)((long)current->self_exec_id) ) {
348 mmput(mm);
349 return -EIO;
350 }
351
352
353 while (count > 0) {
354 int this_len, retval;
355
356 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
357 retval = access_process_vm(task, src, page, this_len, 0);
358 if (!retval) {
359 if (!copied)
360 copied = -EIO;
361 break;
362 }
363 if (copy_to_user(buf, page, retval)) {
364 copied = -EFAULT;
365 break;
366 }
367 copied += retval;
368 src += retval;
369 buf += retval;
370 count -= retval;
371 }
372 *ppos = src;
373 mmput(mm);
374 free_page((unsigned long) page);
375 return copied;
376 }
377
378 #define mem_write NULL
379
380 #ifndef mem_write
381 /* This is a security hazard */
382 static ssize_t mem_write(struct file * file, const char * buf,
383 size_t count, loff_t *ppos)
384 {
385 int copied = 0;
386 char *page;
387 struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
388 unsigned long dst = *ppos;
389
390 if (!MAY_PTRACE(task))
391 return -ESRCH;
392
393 page = (char *)__get_free_page(GFP_USER);
394 if (!page)
395 return -ENOMEM;
396
397 while (count > 0) {
398 int this_len, retval;
399
400 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
401 if (copy_from_user(page, buf, this_len)) {
402 copied = -EFAULT;
403 break;
404 }
405 retval = access_process_vm(task, dst, page, this_len, 1);
406 if (!retval) {
407 if (!copied)
408 copied = -EIO;
409 break;
410 }
411 copied += retval;
412 buf += retval;
413 dst += retval;
414 count -= retval;
415 }
416 *ppos = dst;
417 free_page((unsigned long) page);
418 return copied;
419 }
420 #endif
421
422 static struct file_operations proc_mem_operations = {
423 read: mem_read,
424 write: mem_write,
425 open: mem_open,
426 };
427
428 static struct inode_operations proc_mem_inode_operations = {
429 permission: proc_permission,
430 };
431
432 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
433 {
434 struct inode *inode = dentry->d_inode;
435 int error = -EACCES;
436
437 /* We don't need a base pointer in the /proc filesystem */
438 path_release(nd);
439
440 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
441 goto out;
442 error = proc_check_root(inode);
443 if (error)
444 goto out;
445
446 error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
447 nd->last_type = LAST_BIND;
448 out:
449 return error;
450 }
451
452 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
453 char * buffer, int buflen)
454 {
455 struct inode * inode;
456 char * tmp = (char*)__get_free_page(GFP_KERNEL), *path;
457 int len;
458
459 if (!tmp)
460 return -ENOMEM;
461
462 inode = dentry->d_inode;
463 path = d_path(dentry, mnt, tmp, PAGE_SIZE);
464 len = tmp + PAGE_SIZE - 1 - path;
465
466 if (len < buflen)
467 buflen = len;
468 copy_to_user(buffer, path, buflen);
469 free_page((unsigned long)tmp);
470 return buflen;
471 }
472
473 static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen)
474 {
475 int error = -EACCES;
476 struct inode *inode = dentry->d_inode;
477 struct dentry *de;
478 struct vfsmount *mnt = NULL;
479
480 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
481 goto out;
482 error = proc_check_root(inode);
483 if (error)
484 goto out;
485
486 error = inode->u.proc_i.op.proc_get_link(inode, &de, &mnt);
487 if (error)
488 goto out;
489
490 error = do_proc_readlink(de, mnt, buffer, buflen);
491 dput(de);
492 mntput(mnt);
493 out:
494 return error;
495 }
496
497 static struct inode_operations proc_pid_link_inode_operations = {
498 readlink: proc_pid_readlink,
499 follow_link: proc_pid_follow_link
500 };
501
502 struct pid_entry {
503 int type;
504 int len;
505 char *name;
506 mode_t mode;
507 };
508
509 enum pid_directory_inos {
510 PROC_PID_INO = 2,
511 PROC_PID_STATUS,
512 PROC_PID_MEM,
513 PROC_PID_CWD,
514 PROC_PID_ROOT,
515 PROC_PID_EXE,
516 PROC_PID_FD,
517 PROC_PID_ENVIRON,
518 PROC_PID_CMDLINE,
519 PROC_PID_STAT,
520 PROC_PID_STATM,
521 PROC_PID_MAPS,
522 PROC_PID_CPU,
523 PROC_PID_FD_DIR = 0x8000, /* 0x8000-0xffff */
524 };
525
526 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
527 static struct pid_entry base_stuff[] = {
528 E(PROC_PID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR),
529 E(PROC_PID_ENVIRON, "environ", S_IFREG|S_IRUSR),
530 E(PROC_PID_STATUS, "status", S_IFREG|S_IRUGO),
531 E(PROC_PID_CMDLINE, "cmdline", S_IFREG|S_IRUGO),
532 E(PROC_PID_STAT, "stat", S_IFREG|S_IRUGO),
533 E(PROC_PID_STATM, "statm", S_IFREG|S_IRUGO),
534 #ifdef CONFIG_SMP
535 E(PROC_PID_CPU, "cpu", S_IFREG|S_IRUGO),
536 #endif
537 E(PROC_PID_MAPS, "maps", S_IFREG|S_IRUGO),
538 E(PROC_PID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
539 E(PROC_PID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
540 E(PROC_PID_ROOT, "root", S_IFLNK|S_IRWXUGO),
541 E(PROC_PID_EXE, "exe", S_IFLNK|S_IRWXUGO),
542 {0,0,NULL,0}
543 };
544 #undef E
545
546 #define NUMBUF 10
547
548 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
549 {
550 struct inode *inode = filp->f_dentry->d_inode;
551 struct task_struct *p = inode->u.proc_i.task;
552 unsigned int fd, pid, ino;
553 int retval;
554 char buf[NUMBUF];
555 struct files_struct * files;
556
557 retval = 0;
558 pid = p->pid;
559
560 fd = filp->f_pos;
561 switch (fd) {
562 case 0:
563 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
564 goto out;
565 filp->f_pos++;
566 case 1:
567 ino = fake_ino(pid, PROC_PID_INO);
568 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
569 goto out;
570 filp->f_pos++;
571 default:
572 task_lock(p);
573 files = p->files;
574 if (files)
575 atomic_inc(&files->count);
576 task_unlock(p);
577 if (!files)
578 goto out;
579 for (fd = filp->f_pos-2;
580 fd < files->max_fds;
581 fd++, filp->f_pos++) {
582 unsigned int i,j;
583
584 if (!fcheck_files(files, fd))
585 continue;
586
587 j = NUMBUF;
588 i = fd;
589 do {
590 j--;
591 buf[j] = '0' + (i % 10);
592 i /= 10;
593 } while (i);
594
595 ino = fake_ino(pid, PROC_PID_FD_DIR + fd);
596 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0)
597 break;
598 }
599 put_files_struct(files);
600 }
601 out:
602 return retval;
603 }
604
605 static int proc_base_readdir(struct file * filp,
606 void * dirent, filldir_t filldir)
607 {
608 int i;
609 int pid;
610 struct inode *inode = filp->f_dentry->d_inode;
611 struct pid_entry *p;
612
613 pid = inode->u.proc_i.task->pid;
614 if (!pid)
615 return -ENOENT;
616 i = filp->f_pos;
617 switch (i) {
618 case 0:
619 if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
620 return 0;
621 i++;
622 filp->f_pos++;
623 /* fall through */
624 case 1:
625 if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
626 return 0;
627 i++;
628 filp->f_pos++;
629 /* fall through */
630 default:
631 i -= 2;
632 if (i>=sizeof(base_stuff)/sizeof(base_stuff[0]))
633 return 1;
634 p = base_stuff + i;
635 while (p->name) {
636 if (filldir(dirent, p->name, p->len, filp->f_pos,
637 fake_ino(pid, p->type), p->mode >> 12) < 0)
638 return 0;
639 filp->f_pos++;
640 p++;
641 }
642 }
643 return 1;
644 }
645
646 /* building an inode */
647
648 static int task_dumpable(struct task_struct *task)
649 {
650 int dumpable = 0;
651 struct mm_struct *mm;
652
653 task_lock(task);
654 mm = task->mm;
655 if (mm)
656 dumpable = mm->dumpable;
657 task_unlock(task);
658 return dumpable;
659 }
660
661
662 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
663 {
664 struct inode * inode;
665
666 /* We need a new inode */
667
668 inode = new_inode(sb);
669 if (!inode)
670 goto out;
671
672 /* Common stuff */
673
674 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
675 inode->i_ino = fake_ino(task->pid, ino);
676
677 if (!task->pid)
678 goto out_unlock;
679
680 /*
681 * grab the reference to task.
682 */
683 get_task_struct(task);
684 inode->u.proc_i.task = task;
685 inode->i_uid = 0;
686 inode->i_gid = 0;
687 if (ino == PROC_PID_INO || task_dumpable(task)) {
688 inode->i_uid = task->euid;
689 inode->i_gid = task->egid;
690 }
691
692 out:
693 return inode;
694
695 out_unlock:
696 iput(inode);
697 return NULL;
698 }
699
700 /* dentry stuff */
701
702 static int pid_fd_revalidate(struct dentry * dentry, int flags)
703 {
704 return 0;
705 }
706
707 /*
708 * Exceptional case: normally we are not allowed to unhash a busy
709 * directory. In this case, however, we can do it - no aliasing problems
710 * due to the way we treat inodes.
711 */
712 static int pid_base_revalidate(struct dentry * dentry, int flags)
713 {
714 if (dentry->d_inode->u.proc_i.task->pid)
715 return 1;
716 d_drop(dentry);
717 return 0;
718 }
719
720 static int pid_delete_dentry(struct dentry * dentry)
721 {
722 return 1;
723 }
724
725 static struct dentry_operations pid_fd_dentry_operations =
726 {
727 d_revalidate: pid_fd_revalidate,
728 d_delete: pid_delete_dentry,
729 };
730
731 static struct dentry_operations pid_dentry_operations =
732 {
733 d_delete: pid_delete_dentry,
734 };
735
736 static struct dentry_operations pid_base_dentry_operations =
737 {
738 d_revalidate: pid_base_revalidate,
739 d_delete: pid_delete_dentry,
740 };
741
742 /* Lookups */
743 #define MAX_MULBY10 ((~0U-9)/10)
744
745 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
746 {
747 unsigned int fd, c;
748 struct task_struct *task = dir->u.proc_i.task;
749 struct file * file;
750 struct files_struct * files;
751 struct inode *inode;
752 const char *name;
753 int len;
754
755 fd = 0;
756 len = dentry->d_name.len;
757 name = dentry->d_name.name;
758 if (len > 1 && *name == '0') goto out;
759 while (len-- > 0) {
760 c = *name - '0';
761 name++;
762 if (c > 9)
763 goto out;
764 if (fd >= MAX_MULBY10)
765 goto out;
766 fd *= 10;
767 fd += c;
768 }
769
770 inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
771 if (!inode)
772 goto out;
773 task_lock(task);
774 files = task->files;
775 if (files)
776 atomic_inc(&files->count);
777 task_unlock(task);
778 if (!files)
779 goto out_unlock;
780 read_lock(&files->file_lock);
781 file = inode->u.proc_i.file = fcheck_files(files, fd);
782 if (!file)
783 goto out_unlock2;
784 get_file(file);
785 read_unlock(&files->file_lock);
786 put_files_struct(files);
787 inode->i_op = &proc_pid_link_inode_operations;
788 inode->i_size = 64;
789 inode->i_mode = S_IFLNK;
790 inode->u.proc_i.op.proc_get_link = proc_fd_link;
791 if (file->f_mode & 1)
792 inode->i_mode |= S_IRUSR | S_IXUSR;
793 if (file->f_mode & 2)
794 inode->i_mode |= S_IWUSR | S_IXUSR;
795 dentry->d_op = &pid_fd_dentry_operations;
796 d_add(dentry, inode);
797 return NULL;
798
799 out_unlock2:
800 put_files_struct(files);
801 read_unlock(&files->file_lock);
802 out_unlock:
803 iput(inode);
804 out:
805 return ERR_PTR(-ENOENT);
806 }
807
808 static struct file_operations proc_fd_operations = {
809 read: generic_read_dir,
810 readdir: proc_readfd,
811 };
812
813 /*
814 * proc directories can do almost nothing..
815 */
816 static struct inode_operations proc_fd_inode_operations = {
817 lookup: proc_lookupfd,
818 permission: proc_permission,
819 };
820
821 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
822 {
823 struct inode *inode;
824 int error;
825 struct task_struct *task = dir->u.proc_i.task;
826 struct pid_entry *p;
827
828 error = -ENOENT;
829 inode = NULL;
830
831 for (p = base_stuff; p->name; p++) {
832 if (p->len != dentry->d_name.len)
833 continue;
834 if (!memcmp(dentry->d_name.name, p->name, p->len))
835 break;
836 }
837 if (!p->name)
838 goto out;
839
840 error = -EINVAL;
841 inode = proc_pid_make_inode(dir->i_sb, task, p->type);
842 if (!inode)
843 goto out;
844
845 inode->i_mode = p->mode;
846 /*
847 * Yes, it does not scale. And it should not. Don't add
848 * new entries into /proc/<pid>/ without very good reasons.
849 */
850 switch(p->type) {
851 case PROC_PID_FD:
852 inode->i_nlink = 2;
853 inode->i_op = &proc_fd_inode_operations;
854 inode->i_fop = &proc_fd_operations;
855 break;
856 case PROC_PID_EXE:
857 inode->i_op = &proc_pid_link_inode_operations;
858 inode->u.proc_i.op.proc_get_link = proc_exe_link;
859 break;
860 case PROC_PID_CWD:
861 inode->i_op = &proc_pid_link_inode_operations;
862 inode->u.proc_i.op.proc_get_link = proc_cwd_link;
863 break;
864 case PROC_PID_ROOT:
865 inode->i_op = &proc_pid_link_inode_operations;
866 inode->u.proc_i.op.proc_get_link = proc_root_link;
867 break;
868 case PROC_PID_ENVIRON:
869 inode->i_fop = &proc_info_file_operations;
870 inode->u.proc_i.op.proc_read = proc_pid_environ;
871 break;
872 case PROC_PID_STATUS:
873 inode->i_fop = &proc_info_file_operations;
874 inode->u.proc_i.op.proc_read = proc_pid_status;
875 break;
876 case PROC_PID_STAT:
877 inode->i_fop = &proc_info_file_operations;
878 inode->u.proc_i.op.proc_read = proc_pid_stat;
879 break;
880 case PROC_PID_CMDLINE:
881 inode->i_fop = &proc_info_file_operations;
882 inode->u.proc_i.op.proc_read = proc_pid_cmdline;
883 break;
884 case PROC_PID_STATM:
885 inode->i_fop = &proc_info_file_operations;
886 inode->u.proc_i.op.proc_read = proc_pid_statm;
887 break;
888 case PROC_PID_MAPS:
889 inode->i_fop = &proc_maps_operations;
890 break;
891 #ifdef CONFIG_SMP
892 case PROC_PID_CPU:
893 inode->i_fop = &proc_info_file_operations;
894 inode->u.proc_i.op.proc_read = proc_pid_cpu;
895 break;
896 #endif
897 case PROC_PID_MEM:
898 inode->i_op = &proc_mem_inode_operations;
899 inode->i_fop = &proc_mem_operations;
900 break;
901 default:
902 printk("procfs: impossible type (%d)",p->type);
903 iput(inode);
904 return ERR_PTR(-EINVAL);
905 }
906 dentry->d_op = &pid_dentry_operations;
907 d_add(dentry, inode);
908 return NULL;
909
910 out:
911 return ERR_PTR(error);
912 }
913
914 static struct file_operations proc_base_operations = {
915 read: generic_read_dir,
916 readdir: proc_base_readdir,
917 };
918
919 static struct inode_operations proc_base_inode_operations = {
920 lookup: proc_base_lookup,
921 };
922
923 /*
924 * /proc/self:
925 */
926 static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen)
927 {
928 char tmp[30];
929 sprintf(tmp, "%d", current->pid);
930 return vfs_readlink(dentry,buffer,buflen,tmp);
931 }
932
933 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
934 {
935 char tmp[30];
936 sprintf(tmp, "%d", current->pid);
937 return vfs_follow_link(nd,tmp);
938 }
939
940 static struct inode_operations proc_self_inode_operations = {
941 readlink: proc_self_readlink,
942 follow_link: proc_self_follow_link,
943 };
944
945 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
946 {
947 unsigned int pid, c;
948 struct task_struct *task;
949 const char *name;
950 struct inode *inode;
951 int len;
952
953 pid = 0;
954 name = dentry->d_name.name;
955 len = dentry->d_name.len;
956 if (len == 4 && !memcmp(name, "self", 4)) {
957 inode = new_inode(dir->i_sb);
958 if (!inode)
959 return ERR_PTR(-ENOMEM);
960 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
961 inode->i_ino = fake_ino(0, PROC_PID_INO);
962 inode->u.proc_i.file = NULL;
963 inode->u.proc_i.task = NULL;
964 inode->i_mode = S_IFLNK|S_IRWXUGO;
965 inode->i_uid = inode->i_gid = 0;
966 inode->i_size = 64;
967 inode->i_op = &proc_self_inode_operations;
968 d_add(dentry, inode);
969 return NULL;
970 }
971 while (len-- > 0) {
972 c = *name - '0';
973 name++;
974 if (c > 9)
975 goto out;
976 if (pid >= MAX_MULBY10)
977 goto out;
978 pid *= 10;
979 pid += c;
980 if (!pid)
981 goto out;
982 }
983
984 read_lock(&tasklist_lock);
985 task = find_task_by_pid(pid);
986 if (task)
987 get_task_struct(task);
988 read_unlock(&tasklist_lock);
989 if (!task)
990 goto out;
991
992 inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
993
994 free_task_struct(task);
995
996 if (!inode)
997 goto out;
998 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
999 inode->i_op = &proc_base_inode_operations;
1000 inode->i_fop = &proc_base_operations;
1001 inode->i_nlink = 3;
1002 inode->i_flags|=S_IMMUTABLE;
1003
1004 dentry->d_op = &pid_base_dentry_operations;
1005 d_add(dentry, inode);
1006 return NULL;
1007 out:
1008 return ERR_PTR(-ENOENT);
1009 }
1010
1011 void proc_pid_delete_inode(struct inode *inode)
1012 {
1013 if (inode->u.proc_i.file)
1014 fput(inode->u.proc_i.file);
1015 if (inode->u.proc_i.task)
1016 free_task_struct(inode->u.proc_i.task);
1017 }
1018
1019 #define PROC_NUMBUF 10
1020 #define PROC_MAXPIDS 20
1021
1022 /*
1023 * Get a few pid's to return for filldir - we need to hold the
1024 * tasklist lock while doing this, and we must release it before
1025 * we actually do the filldir itself, so we use a temp buffer..
1026 */
1027 static int get_pid_list(int index, unsigned int *pids)
1028 {
1029 struct task_struct *p;
1030 int nr_pids = 0;
1031
1032 index--;
1033 read_lock(&tasklist_lock);
1034 for_each_task(p) {
1035 int pid = p->pid;
1036 if (!pid)
1037 continue;
1038 if (--index >= 0)
1039 continue;
1040 pids[nr_pids] = pid;
1041 nr_pids++;
1042 if (nr_pids >= PROC_MAXPIDS)
1043 break;
1044 }
1045 read_unlock(&tasklist_lock);
1046 return nr_pids;
1047 }
1048
1049 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1050 {
1051 unsigned int pid_array[PROC_MAXPIDS];
1052 char buf[PROC_NUMBUF];
1053 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1054 unsigned int nr_pids, i;
1055
1056 if (!nr) {
1057 ino_t ino = fake_ino(0,PROC_PID_INO);
1058 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1059 return 0;
1060 filp->f_pos++;
1061 nr++;
1062 }
1063
1064 nr_pids = get_pid_list(nr, pid_array);
1065
1066 for (i = 0; i < nr_pids; i++) {
1067 int pid = pid_array[i];
1068 ino_t ino = fake_ino(pid,PROC_PID_INO);
1069 unsigned long j = PROC_NUMBUF;
1070
1071 do buf[--j] = '0' + (pid % 10); while (pid/=10);
1072
1073 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
1074 break;
1075 filp->f_pos++;
1076 }
1077 return 0;
1078 }
1079