File: /usr/src/linux/fs/proc/array.c
1 /*
2 * linux/fs/proc/array.c
3 *
4 * Copyright (C) 1992 by Linus Torvalds
5 * based on ideas by Darren Senn
6 *
7 * Fixes:
8 * Michael. K. Johnson: stat,statm extensions.
9 * <johnsonm@stolaf.edu>
10 *
11 * Pauline Middelink : Made cmdline,envline only break at '\0's, to
12 * make sure SET_PROCTITLE works. Also removed
13 * bad '!' which forced address recalculation for
14 * EVERY character on the current page.
15 * <middelin@polyware.iaf.nl>
16 *
17 * Danny ter Haar : added cpuinfo
18 * <dth@cistron.nl>
19 *
20 * Alessandro Rubini : profile extension.
21 * <rubini@ipvvis.unipv.it>
22 *
23 * Jeff Tranter : added BogoMips field to cpuinfo
24 * <Jeff_Tranter@Mitel.COM>
25 *
26 * Bruno Haible : remove 4K limit for the maps file
27 * <haible@ma2s2.mathematik.uni-karlsruhe.de>
28 *
29 * Yves Arrouye : remove removal of trailing spaces in get_array.
30 * <Yves.Arrouye@marin.fdn.fr>
31 *
32 * Jerome Forissier : added per-CPU time information to /proc/stat
33 * and /proc/<pid>/cpu extension
34 * <forissier@isia.cma.fr>
35 * - Incorporation and non-SMP safe operation
36 * of forissier patch in 2.1.78 by
37 * Hans Marcus <crowbar@concepts.nl>
38 *
39 * aeb@cwi.nl : /proc/partitions
40 *
41 *
42 * Alan Cox : security fixes.
43 * <Alan.Cox@linux.org>
44 *
45 * Al Viro : safe handling of mm_struct
46 *
47 * Gerhard Wichert : added BIGMEM support
48 * Siemens AG <Gerhard.Wichert@pdb.siemens.de>
49 *
50 * Al Viro & Jeff Garzik : moved most of the thing into base.c and
51 * : proc_misc.c. The rest may eventually go into
52 * : base.c too.
53 */
54
55 #include <linux/config.h>
56 #include <linux/types.h>
57 #include <linux/errno.h>
58 #include <linux/sched.h>
59 #include <linux/kernel.h>
60 #include <linux/kernel_stat.h>
61 #include <linux/tty.h>
62 #include <linux/string.h>
63 #include <linux/mman.h>
64 #include <linux/proc_fs.h>
65 #include <linux/ioport.h>
66 #include <linux/mm.h>
67 #include <linux/pagemap.h>
68 #include <linux/swap.h>
69 #include <linux/slab.h>
70 #include <linux/smp.h>
71 #include <linux/signal.h>
72 #include <linux/highmem.h>
73
74 #include <asm/uaccess.h>
75 #include <asm/pgtable.h>
76 #include <asm/io.h>
77 #include <asm/processor.h>
78
79 /* Gcc optimizes away "strlen(x)" for constant x */
80 #define ADDBUF(buffer, string) \
81 do { memcpy(buffer, string, strlen(string)); \
82 buffer += strlen(string); } while (0)
83
84 static inline char * task_name(struct task_struct *p, char * buf)
85 {
86 int i;
87 char * name;
88
89 ADDBUF(buf, "Name:\t");
90 name = p->comm;
91 i = sizeof(p->comm);
92 do {
93 unsigned char c = *name;
94 name++;
95 i--;
96 *buf = c;
97 if (!c)
98 break;
99 if (c == '\\') {
100 buf[1] = c;
101 buf += 2;
102 continue;
103 }
104 if (c == '\n') {
105 buf[0] = '\\';
106 buf[1] = 'n';
107 buf += 2;
108 continue;
109 }
110 buf++;
111 } while (i);
112 *buf = '\n';
113 return buf+1;
114 }
115
116 /*
117 * The task state array is a strange "bitmap" of
118 * reasons to sleep. Thus "running" is zero, and
119 * you can test for combinations of others with
120 * simple bit tests.
121 */
122 static const char *task_state_array[] = {
123 "R (running)", /* 0 */
124 "S (sleeping)", /* 1 */
125 "D (disk sleep)", /* 2 */
126 "Z (zombie)", /* 4 */
127 "T (stopped)", /* 8 */
128 "W (paging)" /* 16 */
129 };
130
131 static inline const char * get_task_state(struct task_struct *tsk)
132 {
133 unsigned int state = tsk->state & (TASK_RUNNING |
134 TASK_INTERRUPTIBLE |
135 TASK_UNINTERRUPTIBLE |
136 TASK_ZOMBIE |
137 TASK_STOPPED);
138 const char **p = &task_state_array[0];
139
140 while (state) {
141 p++;
142 state >>= 1;
143 }
144 return *p;
145 }
146
147 static inline char * task_state(struct task_struct *p, char *buffer)
148 {
149 int g;
150
151 read_lock(&tasklist_lock);
152 buffer += sprintf(buffer,
153 "State:\t%s\n"
154 "Pid:\t%d\n"
155 "PPid:\t%d\n"
156 "TracerPid:\t%d\n"
157 "Uid:\t%d\t%d\t%d\t%d\n"
158 "Gid:\t%d\t%d\t%d\t%d\n",
159 get_task_state(p),
160 p->pid, p->pid ? p->p_opptr->pid : 0, 0,
161 p->uid, p->euid, p->suid, p->fsuid,
162 p->gid, p->egid, p->sgid, p->fsgid);
163 read_unlock(&tasklist_lock);
164 task_lock(p);
165 buffer += sprintf(buffer,
166 "FDSize:\t%d\n"
167 "Groups:\t",
168 p->files ? p->files->max_fds : 0);
169 task_unlock(p);
170
171 for (g = 0; g < p->ngroups; g++)
172 buffer += sprintf(buffer, "%d ", p->groups[g]);
173
174 buffer += sprintf(buffer, "\n");
175 return buffer;
176 }
177
178 static inline char * task_mem(struct mm_struct *mm, char *buffer)
179 {
180 struct vm_area_struct * vma;
181 unsigned long data = 0, stack = 0;
182 unsigned long exec = 0, lib = 0;
183
184 down_read(&mm->mmap_sem);
185 for (vma = mm->mmap; vma; vma = vma->vm_next) {
186 unsigned long len = (vma->vm_end - vma->vm_start) >> 10;
187 if (!vma->vm_file) {
188 data += len;
189 if (vma->vm_flags & VM_GROWSDOWN)
190 stack += len;
191 continue;
192 }
193 if (vma->vm_flags & VM_WRITE)
194 continue;
195 if (vma->vm_flags & VM_EXEC) {
196 exec += len;
197 if (vma->vm_flags & VM_EXECUTABLE)
198 continue;
199 lib += len;
200 }
201 }
202 buffer += sprintf(buffer,
203 "VmSize:\t%8lu kB\n"
204 "VmLck:\t%8lu kB\n"
205 "VmRSS:\t%8lu kB\n"
206 "VmData:\t%8lu kB\n"
207 "VmStk:\t%8lu kB\n"
208 "VmExe:\t%8lu kB\n"
209 "VmLib:\t%8lu kB\n",
210 mm->total_vm << (PAGE_SHIFT-10),
211 mm->locked_vm << (PAGE_SHIFT-10),
212 mm->rss << (PAGE_SHIFT-10),
213 data - stack, stack,
214 exec - lib, lib);
215 up_read(&mm->mmap_sem);
216 return buffer;
217 }
218
219 static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
220 sigset_t *catch)
221 {
222 struct k_sigaction *k;
223 int i;
224
225 sigemptyset(ign);
226 sigemptyset(catch);
227
228 if (p->sig) {
229 k = p->sig->action;
230 for (i = 1; i <= _NSIG; ++i, ++k) {
231 if (k->sa.sa_handler == SIG_IGN)
232 sigaddset(ign, i);
233 else if (k->sa.sa_handler != SIG_DFL)
234 sigaddset(catch, i);
235 }
236 }
237 }
238
239 static inline char * task_sig(struct task_struct *p, char *buffer)
240 {
241 sigset_t ign, catch;
242
243 buffer += sprintf(buffer, "SigPnd:\t");
244 buffer = render_sigset_t(&p->pending.signal, buffer);
245 *buffer++ = '\n';
246 buffer += sprintf(buffer, "SigBlk:\t");
247 buffer = render_sigset_t(&p->blocked, buffer);
248 *buffer++ = '\n';
249
250 collect_sigign_sigcatch(p, &ign, &catch);
251 buffer += sprintf(buffer, "SigIgn:\t");
252 buffer = render_sigset_t(&ign, buffer);
253 *buffer++ = '\n';
254 buffer += sprintf(buffer, "SigCgt:\t"); /* Linux 2.0 uses "SigCgt" */
255 buffer = render_sigset_t(&catch, buffer);
256 *buffer++ = '\n';
257
258 return buffer;
259 }
260
261 static inline char *task_cap(struct task_struct *p, char *buffer)
262 {
263 return buffer + sprintf(buffer, "CapInh:\t%016x\n"
264 "CapPrm:\t%016x\n"
265 "CapEff:\t%016x\n",
266 cap_t(p->cap_inheritable),
267 cap_t(p->cap_permitted),
268 cap_t(p->cap_effective));
269 }
270
271
272 int proc_pid_status(struct task_struct *task, char * buffer)
273 {
274 char * orig = buffer;
275 struct mm_struct *mm;
276
277 buffer = task_name(task, buffer);
278 buffer = task_state(task, buffer);
279 task_lock(task);
280 mm = task->mm;
281 if(mm)
282 atomic_inc(&mm->mm_users);
283 task_unlock(task);
284 if (mm) {
285 buffer = task_mem(mm, buffer);
286 mmput(mm);
287 }
288 buffer = task_sig(task, buffer);
289 buffer = task_cap(task, buffer);
290 #if defined(CONFIG_ARCH_S390)
291 buffer = task_show_regs(task, buffer);
292 #endif
293 return buffer - orig;
294 }
295
296 int proc_pid_stat(struct task_struct *task, char * buffer)
297 {
298 unsigned long vsize, eip, esp, wchan;
299 long priority, nice;
300 int tty_pgrp = -1, tty_nr = 0;
301 sigset_t sigign, sigcatch;
302 char state;
303 int res;
304 pid_t ppid;
305 struct mm_struct *mm;
306
307 state = *get_task_state(task);
308 vsize = eip = esp = 0;
309 task_lock(task);
310 mm = task->mm;
311 if(mm)
312 atomic_inc(&mm->mm_users);
313 if (task->tty) {
314 tty_pgrp = task->tty->pgrp;
315 tty_nr = kdev_t_to_nr(task->tty->device);
316 }
317 task_unlock(task);
318 if (mm) {
319 struct vm_area_struct *vma;
320 down_read(&mm->mmap_sem);
321 vma = mm->mmap;
322 while (vma) {
323 vsize += vma->vm_end - vma->vm_start;
324 vma = vma->vm_next;
325 }
326 eip = KSTK_EIP(task);
327 esp = KSTK_ESP(task);
328 up_read(&mm->mmap_sem);
329 }
330
331 wchan = get_wchan(task);
332
333 collect_sigign_sigcatch(task, &sigign, &sigcatch);
334
335 /* scale priority and nice values from timeslices to -20..20 */
336 /* to make it look like a "normal" Unix priority/nice value */
337 priority = task->counter;
338 priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER;
339 nice = task->nice;
340
341 read_lock(&tasklist_lock);
342 ppid = task->pid ? task->p_opptr->pid : 0;
343 read_unlock(&tasklist_lock);
344 res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
345 %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
346 %lu %lu %lu %lu %lu %lu %lu %lu %d %d\n",
347 task->pid,
348 task->comm,
349 state,
350 ppid,
351 task->pgrp,
352 task->session,
353 tty_nr,
354 tty_pgrp,
355 task->flags,
356 task->min_flt,
357 task->cmin_flt,
358 task->maj_flt,
359 task->cmaj_flt,
360 task->times.tms_utime,
361 task->times.tms_stime,
362 task->times.tms_cutime,
363 task->times.tms_cstime,
364 priority,
365 nice,
366 0UL /* removed */,
367 task->it_real_value,
368 task->start_time,
369 vsize,
370 mm ? mm->rss : 0, /* you might want to shift this left 3 */
371 task->rlim[RLIMIT_RSS].rlim_cur,
372 mm ? mm->start_code : 0,
373 mm ? mm->end_code : 0,
374 mm ? mm->start_stack : 0,
375 esp,
376 eip,
377 /* The signal information here is obsolete.
378 * It must be decimal for Linux 2.0 compatibility.
379 * Use /proc/#/status for real-time signals.
380 */
381 task->pending.signal.sig[0] & 0x7fffffffUL,
382 task->blocked.sig[0] & 0x7fffffffUL,
383 sigign .sig[0] & 0x7fffffffUL,
384 sigcatch .sig[0] & 0x7fffffffUL,
385 wchan,
386 task->nswap,
387 task->cnswap,
388 task->exit_signal,
389 task->processor);
390 if(mm)
391 mmput(mm);
392 return res;
393 }
394
395 static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
396 int * pages, int * shared, int * dirty, int * total)
397 {
398 pte_t * pte;
399 unsigned long end;
400
401 if (pmd_none(*pmd))
402 return;
403 if (pmd_bad(*pmd)) {
404 pmd_ERROR(*pmd);
405 pmd_clear(pmd);
406 return;
407 }
408 pte = pte_offset(pmd, address);
409 address &= ~PMD_MASK;
410 end = address + size;
411 if (end > PMD_SIZE)
412 end = PMD_SIZE;
413 do {
414 pte_t page = *pte;
415 struct page *ptpage;
416
417 address += PAGE_SIZE;
418 pte++;
419 if (pte_none(page))
420 continue;
421 ++*total;
422 if (!pte_present(page))
423 continue;
424 ptpage = pte_page(page);
425 if ((!VALID_PAGE(ptpage)) || PageReserved(ptpage))
426 continue;
427 ++*pages;
428 if (pte_dirty(page))
429 ++*dirty;
430 if (page_count(pte_page(page)) > 1)
431 ++*shared;
432 } while (address < end);
433 }
434
435 static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size,
436 int * pages, int * shared, int * dirty, int * total)
437 {
438 pmd_t * pmd;
439 unsigned long end;
440
441 if (pgd_none(*pgd))
442 return;
443 if (pgd_bad(*pgd)) {
444 pgd_ERROR(*pgd);
445 pgd_clear(pgd);
446 return;
447 }
448 pmd = pmd_offset(pgd, address);
449 address &= ~PGDIR_MASK;
450 end = address + size;
451 if (end > PGDIR_SIZE)
452 end = PGDIR_SIZE;
453 do {
454 statm_pte_range(pmd, address, end - address, pages, shared, dirty, total);
455 address = (address + PMD_SIZE) & PMD_MASK;
456 pmd++;
457 } while (address < end);
458 }
459
460 static void statm_pgd_range(pgd_t * pgd, unsigned long address, unsigned long end,
461 int * pages, int * shared, int * dirty, int * total)
462 {
463 while (address < end) {
464 statm_pmd_range(pgd, address, end - address, pages, shared, dirty, total);
465 address = (address + PGDIR_SIZE) & PGDIR_MASK;
466 pgd++;
467 }
468 }
469
470 int proc_pid_statm(struct task_struct *task, char * buffer)
471 {
472 struct mm_struct *mm;
473 int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0;
474
475 task_lock(task);
476 mm = task->mm;
477 if(mm)
478 atomic_inc(&mm->mm_users);
479 task_unlock(task);
480 if (mm) {
481 struct vm_area_struct * vma;
482 down_read(&mm->mmap_sem);
483 vma = mm->mmap;
484 while (vma) {
485 pgd_t *pgd = pgd_offset(mm, vma->vm_start);
486 int pages = 0, shared = 0, dirty = 0, total = 0;
487
488 statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
489 resident += pages;
490 share += shared;
491 dt += dirty;
492 size += total;
493 if (vma->vm_flags & VM_EXECUTABLE)
494 trs += pages; /* text */
495 else if (vma->vm_flags & VM_GROWSDOWN)
496 drs += pages; /* stack */
497 else if (vma->vm_end > 0x60000000)
498 lrs += pages; /* library */
499 else
500 drs += pages;
501 vma = vma->vm_next;
502 }
503 up_read(&mm->mmap_sem);
504 mmput(mm);
505 }
506 return sprintf(buffer,"%d %d %d %d %d %d %d\n",
507 size, resident, share, trs, lrs, drs, dt);
508 }
509
510 /*
511 * The way we support synthetic files > 4K
512 * - without storing their contents in some buffer and
513 * - without walking through the entire synthetic file until we reach the
514 * position of the requested data
515 * is to cleverly encode the current position in the file's f_pos field.
516 * There is no requirement that a read() call which returns `count' bytes
517 * of data increases f_pos by exactly `count'.
518 *
519 * This idea is Linus' one. Bruno implemented it.
520 */
521
522 /*
523 * For the /proc/<pid>/maps file, we use fixed length records, each containing
524 * a single line.
525 *
526 * f_pos = (number of the vma in the task->mm->mmap list) * PAGE_SIZE
527 * + (index into the line)
528 */
529 /* for systems with sizeof(void*) == 4: */
530 #define MAPS_LINE_FORMAT4 "%08lx-%08lx %s %08lx %s %lu"
531 #define MAPS_LINE_MAX4 49 /* sum of 8 1 8 1 4 1 8 1 5 1 10 1 */
532
533 /* for systems with sizeof(void*) == 8: */
534 #define MAPS_LINE_FORMAT8 "%016lx-%016lx %s %016lx %s %lu"
535 #define MAPS_LINE_MAX8 73 /* sum of 16 1 16 1 4 1 16 1 5 1 10 1 */
536
537 #define MAPS_LINE_FORMAT (sizeof(void*) == 4 ? MAPS_LINE_FORMAT4 : MAPS_LINE_FORMAT8)
538 #define MAPS_LINE_MAX (sizeof(void*) == 4 ? MAPS_LINE_MAX4 : MAPS_LINE_MAX8)
539
540 static int proc_pid_maps_get_line (char *buf, struct vm_area_struct *map)
541 {
542 /* produce the next line */
543 char *line;
544 char str[5];
545 int flags;
546 kdev_t dev;
547 unsigned long ino;
548 int len;
549
550 flags = map->vm_flags;
551
552 str[0] = flags & VM_READ ? 'r' : '-';
553 str[1] = flags & VM_WRITE ? 'w' : '-';
554 str[2] = flags & VM_EXEC ? 'x' : '-';
555 str[3] = flags & VM_MAYSHARE ? 's' : 'p';
556 str[4] = 0;
557
558 dev = 0;
559 ino = 0;
560 if (map->vm_file != NULL) {
561 dev = map->vm_file->f_dentry->d_inode->i_dev;
562 ino = map->vm_file->f_dentry->d_inode->i_ino;
563 line = d_path(map->vm_file->f_dentry,
564 map->vm_file->f_vfsmnt,
565 buf, PAGE_SIZE);
566 buf[PAGE_SIZE-1] = '\n';
567 line -= MAPS_LINE_MAX;
568 if(line < buf)
569 line = buf;
570 } else
571 line = buf;
572
573 len = sprintf(line,
574 MAPS_LINE_FORMAT,
575 map->vm_start, map->vm_end, str, map->vm_pgoff << PAGE_SHIFT,
576 kdevname(dev), ino);
577
578 if(map->vm_file) {
579 int i;
580 for(i = len; i < MAPS_LINE_MAX; i++)
581 line[i] = ' ';
582 len = buf + PAGE_SIZE - line;
583 memmove(buf, line, len);
584 } else
585 line[len++] = '\n';
586 return len;
587 }
588
589 ssize_t proc_pid_read_maps (struct task_struct *task, struct file * file, char * buf,
590 size_t count, loff_t *ppos)
591 {
592 struct mm_struct *mm;
593 struct vm_area_struct * map;
594 char *tmp, *kbuf;
595 long retval;
596 int off, lineno, loff;
597
598 /* reject calls with out of range parameters immediately */
599 retval = 0;
600 if (*ppos > LONG_MAX)
601 goto out;
602 if (count == 0)
603 goto out;
604 off = (long)*ppos;
605 /*
606 * We might sleep getting the page, so get it first.
607 */
608 retval = -ENOMEM;
609 kbuf = (char*)__get_free_page(GFP_KERNEL);
610 if (!kbuf)
611 goto out;
612
613 tmp = (char*)__get_free_page(GFP_KERNEL);
614 if (!tmp)
615 goto out_free1;
616
617 task_lock(task);
618 mm = task->mm;
619 if (mm)
620 atomic_inc(&mm->mm_users);
621 task_unlock(task);
622 retval = 0;
623 if (!mm)
624 goto out_free2;
625
626 down_read(&mm->mmap_sem);
627 map = mm->mmap;
628 lineno = 0;
629 loff = 0;
630 if (count > PAGE_SIZE)
631 count = PAGE_SIZE;
632 while (map) {
633 int len;
634 if (off > PAGE_SIZE) {
635 off -= PAGE_SIZE;
636 goto next;
637 }
638 len = proc_pid_maps_get_line(tmp, map);
639 len -= off;
640 if (len > 0) {
641 if (retval+len > count) {
642 /* only partial line transfer possible */
643 len = count - retval;
644 /* save the offset where the next read
645 * must start */
646 loff = len+off;
647 }
648 memcpy(kbuf+retval, tmp+off, len);
649 retval += len;
650 }
651 off = 0;
652 next:
653 if (!loff)
654 lineno++;
655 if (retval >= count)
656 break;
657 if (loff) BUG();
658 map = map->vm_next;
659 }
660 up_read(&mm->mmap_sem);
661 mmput(mm);
662
663 if (retval > count) BUG();
664 if (copy_to_user(buf, kbuf, retval))
665 retval = -EFAULT;
666 else
667 *ppos = (lineno << PAGE_SHIFT) + loff;
668
669 out_free2:
670 free_page((unsigned long)tmp);
671 out_free1:
672 free_page((unsigned long)kbuf);
673 out:
674 return retval;
675 }
676
677 #ifdef CONFIG_SMP
678 int proc_pid_cpu(struct task_struct *task, char * buffer)
679 {
680 int i, len;
681
682 len = sprintf(buffer,
683 "cpu %lu %lu\n",
684 task->times.tms_utime,
685 task->times.tms_stime);
686
687 for (i = 0 ; i < smp_num_cpus; i++)
688 len += sprintf(buffer + len, "cpu%d %lu %lu\n",
689 i,
690 task->per_cpu_utime[cpu_logical_map(i)],
691 task->per_cpu_stime[cpu_logical_map(i)]);
692
693 return len;
694 }
695 #endif
696