File: /usr/src/linux/kernel/sys.c

1     /*
2      *  linux/kernel/sys.c
3      *
4      *  Copyright (C) 1991, 1992  Linus Torvalds
5      */
6     
7     #include <linux/module.h>
8     #include <linux/mm.h>
9     #include <linux/utsname.h>
10     #include <linux/mman.h>
11     #include <linux/smp_lock.h>
12     #include <linux/notifier.h>
13     #include <linux/reboot.h>
14     #include <linux/prctl.h>
15     #include <linux/init.h>
16     #include <linux/highuid.h>
17     
18     #include <asm/uaccess.h>
19     #include <asm/io.h>
20     
21     /*
22      * this is where the system-wide overflow UID and GID are defined, for
23      * architectures that now have 32-bit UID/GID but didn't in the past
24      */
25     
26     int overflowuid = DEFAULT_OVERFLOWUID;
27     int overflowgid = DEFAULT_OVERFLOWGID;
28     
29     /*
30      * the same as above, but for filesystems which can only store a 16-bit
31      * UID and GID. as such, this is needed on all architectures
32      */
33     
34     int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
35     int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
36     
37     /*
38      * this indicates whether you can reboot with ctrl-alt-del: the default is yes
39      */
40     
41     int C_A_D = 1;
42     int cad_pid = 1;
43     
44     
45     /*
46      *	Notifier list for kernel code which wants to be called
47      *	at shutdown. This is used to stop any idling DMA operations
48      *	and the like. 
49      */
50     
51     static struct notifier_block *reboot_notifier_list;
52     rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
53     
54     /**
55      *	notifier_chain_register	- Add notifier to a notifier chain
56      *	@list: Pointer to root list pointer
57      *	@n: New entry in notifier chain
58      *
59      *	Adds a notifier to a notifier chain.
60      *
61      *	Currently always returns zero.
62      */
63      
64     int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
65     {
66     	write_lock(&notifier_lock);
67     	while(*list)
68     	{
69     		if(n->priority > (*list)->priority)
70     			break;
71     		list= &((*list)->next);
72     	}
73     	n->next = *list;
74     	*list=n;
75     	write_unlock(&notifier_lock);
76     	return 0;
77     }
78     
79     /**
80      *	notifier_chain_unregister - Remove notifier from a notifier chain
81      *	@nl: Pointer to root list pointer
82      *	@n: New entry in notifier chain
83      *
84      *	Removes a notifier from a notifier chain.
85      *
86      *	Returns zero on success, or %-ENOENT on failure.
87      */
88      
89     int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
90     {
91     	write_lock(&notifier_lock);
92     	while((*nl)!=NULL)
93     	{
94     		if((*nl)==n)
95     		{
96     			*nl=n->next;
97     			write_unlock(&notifier_lock);
98     			return 0;
99     		}
100     		nl=&((*nl)->next);
101     	}
102     	write_unlock(&notifier_lock);
103     	return -ENOENT;
104     }
105     
106     /**
107      *	notifier_call_chain - Call functions in a notifier chain
108      *	@n: Pointer to root pointer of notifier chain
109      *	@val: Value passed unmodified to notifier function
110      *	@v: Pointer passed unmodified to notifier function
111      *
112      *	Calls each function in a notifier chain in turn.
113      *
114      *	If the return value of the notifier can be and'd
115      *	with %NOTIFY_STOP_MASK, then notifier_call_chain
116      *	will return immediately, with the return value of
117      *	the notifier function which halted execution.
118      *	Otherwise, the return value is the return value
119      *	of the last notifier function called.
120      */
121      
122     int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
123     {
124     	int ret=NOTIFY_DONE;
125     	struct notifier_block *nb = *n;
126     
127     	while(nb)
128     	{
129     		ret=nb->notifier_call(nb,val,v);
130     		if(ret&NOTIFY_STOP_MASK)
131     		{
132     			return ret;
133     		}
134     		nb=nb->next;
135     	}
136     	return ret;
137     }
138     
139     /**
140      *	register_reboot_notifier - Register function to be called at reboot time
141      *	@nb: Info about notifier function to be called
142      *
143      *	Registers a function with the list of functions
144      *	to be called at reboot time.
145      *
146      *	Currently always returns zero, as notifier_chain_register
147      *	always returns zero.
148      */
149      
150     int register_reboot_notifier(struct notifier_block * nb)
151     {
152     	return notifier_chain_register(&reboot_notifier_list, nb);
153     }
154     
155     /**
156      *	unregister_reboot_notifier - Unregister previously registered reboot notifier
157      *	@nb: Hook to be unregistered
158      *
159      *	Unregisters a previously registered reboot
160      *	notifier function.
161      *
162      *	Returns zero on success, or %-ENOENT on failure.
163      */
164      
165     int unregister_reboot_notifier(struct notifier_block * nb)
166     {
167     	return notifier_chain_unregister(&reboot_notifier_list, nb);
168     }
169     
170     asmlinkage long sys_ni_syscall(void)
171     {
172     	return -ENOSYS;
173     }
174     
175     static int proc_sel(struct task_struct *p, int which, int who)
176     {
177     	if(p->pid)
178     	{
179     		switch (which) {
180     			case PRIO_PROCESS:
181     				if (!who && p == current)
182     					return 1;
183     				return(p->pid == who);
184     			case PRIO_PGRP:
185     				if (!who)
186     					who = current->pgrp;
187     				return(p->pgrp == who);
188     			case PRIO_USER:
189     				if (!who)
190     					who = current->uid;
191     				return(p->uid == who);
192     		}
193     	}
194     	return 0;
195     }
196     
197     asmlinkage long sys_setpriority(int which, int who, int niceval)
198     {
199     	struct task_struct *p;
200     	int error;
201     
202     	if (which > 2 || which < 0)
203     		return -EINVAL;
204     
205     	/* normalize: avoid signed division (rounding problems) */
206     	error = -ESRCH;
207     	if (niceval < -20)
208     		niceval = -20;
209     	if (niceval > 19)
210     		niceval = 19;
211     
212     	read_lock(&tasklist_lock);
213     	for_each_task(p) {
214     		if (!proc_sel(p, which, who))
215     			continue;
216     		if (p->uid != current->euid &&
217     			p->uid != current->uid && !capable(CAP_SYS_NICE)) {
218     			error = -EPERM;
219     			continue;
220     		}
221     		if (error == -ESRCH)
222     			error = 0;
223     		if (niceval < p->nice && !capable(CAP_SYS_NICE))
224     			error = -EACCES;
225     		else
226     			p->nice = niceval;
227     	}
228     	read_unlock(&tasklist_lock);
229     
230     	return error;
231     }
232     
233     /*
234      * Ugh. To avoid negative return values, "getpriority()" will
235      * not return the normal nice-value, but a negated value that
236      * has been offset by 20 (ie it returns 40..1 instead of -20..19)
237      * to stay compatible.
238      */
239     asmlinkage long sys_getpriority(int which, int who)
240     {
241     	struct task_struct *p;
242     	long retval = -ESRCH;
243     
244     	if (which > 2 || which < 0)
245     		return -EINVAL;
246     
247     	read_lock(&tasklist_lock);
248     	for_each_task (p) {
249     		long niceval;
250     		if (!proc_sel(p, which, who))
251     			continue;
252     		niceval = 20 - p->nice;
253     		if (niceval > retval)
254     			retval = niceval;
255     	}
256     	read_unlock(&tasklist_lock);
257     
258     	return retval;
259     }
260     
261     
262     /*
263      * Reboot system call: for obvious reasons only root may call it,
264      * and even root needs to set up some magic numbers in the registers
265      * so that some mistake won't make this reboot the whole machine.
266      * You can also set the meaning of the ctrl-alt-del-key here.
267      *
268      * reboot doesn't sync: do that yourself before calling this.
269      */
270     asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
271     {
272     	char buffer[256];
273     
274     	/* We only trust the superuser with rebooting the system. */
275     	if (!capable(CAP_SYS_BOOT))
276     		return -EPERM;
277     
278     	/* For safety, we require "magic" arguments. */
279     	if (magic1 != LINUX_REBOOT_MAGIC1 ||
280     	    (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
281     			magic2 != LINUX_REBOOT_MAGIC2B))
282     		return -EINVAL;
283     
284     	lock_kernel();
285     	switch (cmd) {
286     	case LINUX_REBOOT_CMD_RESTART:
287     		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
288     		printk(KERN_EMERG "Restarting system.\n");
289     		machine_restart(NULL);
290     		break;
291     
292     	case LINUX_REBOOT_CMD_CAD_ON:
293     		C_A_D = 1;
294     		break;
295     
296     	case LINUX_REBOOT_CMD_CAD_OFF:
297     		C_A_D = 0;
298     		break;
299     
300     	case LINUX_REBOOT_CMD_HALT:
301     		notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
302     		printk(KERN_EMERG "System halted.\n");
303     		machine_halt();
304     		do_exit(0);
305     		break;
306     
307     	case LINUX_REBOOT_CMD_POWER_OFF:
308     		notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
309     		printk(KERN_EMERG "Power down.\n");
310     		machine_power_off();
311     		do_exit(0);
312     		break;
313     
314     	case LINUX_REBOOT_CMD_RESTART2:
315     		if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
316     			unlock_kernel();
317     			return -EFAULT;
318     		}
319     		buffer[sizeof(buffer) - 1] = '\0';
320     
321     		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
322     		printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
323     		machine_restart(buffer);
324     		break;
325     
326     	default:
327     		unlock_kernel();
328     		return -EINVAL;
329     	}
330     	unlock_kernel();
331     	return 0;
332     }
333     
334     static void deferred_cad(void *dummy)
335     {
336     	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
337     	machine_restart(NULL);
338     }
339     
340     /*
341      * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
342      * As it's called within an interrupt, it may NOT sync: the only choice
343      * is whether to reboot at once, or just ignore the ctrl-alt-del.
344      */
345     void ctrl_alt_del(void)
346     {
347     	static struct tq_struct cad_tq = {
348     		routine: deferred_cad,
349     	};
350     
351     	if (C_A_D)
352     		schedule_task(&cad_tq);
353     	else
354     		kill_proc(cad_pid, SIGINT, 1);
355     }
356     	
357     
358     /*
359      * Unprivileged users may change the real gid to the effective gid
360      * or vice versa.  (BSD-style)
361      *
362      * If you set the real gid at all, or set the effective gid to a value not
363      * equal to the real gid, then the saved gid is set to the new effective gid.
364      *
365      * This makes it possible for a setgid program to completely drop its
366      * privileges, which is often a useful assertion to make when you are doing
367      * a security audit over a program.
368      *
369      * The general idea is that a program which uses just setregid() will be
370      * 100% compatible with BSD.  A program which uses just setgid() will be
371      * 100% compatible with POSIX with saved IDs. 
372      *
373      * SMP: There are not races, the GIDs are checked only by filesystem
374      *      operations (as far as semantic preservation is concerned).
375      */
376     asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
377     {
378     	int old_rgid = current->gid;
379     	int old_egid = current->egid;
380     	int new_rgid = old_rgid;
381     	int new_egid = old_egid;
382     
383     	if (rgid != (gid_t) -1) {
384     		if ((old_rgid == rgid) ||
385     		    (current->egid==rgid) ||
386     		    capable(CAP_SETGID))
387     			new_rgid = rgid;
388     		else
389     			return -EPERM;
390     	}
391     	if (egid != (gid_t) -1) {
392     		if ((old_rgid == egid) ||
393     		    (current->egid == egid) ||
394     		    (current->sgid == egid) ||
395     		    capable(CAP_SETGID))
396     			new_egid = egid;
397     		else {
398     			return -EPERM;
399     		}
400     	}
401     	if (new_egid != old_egid)
402     	{
403     		current->mm->dumpable = 0;
404     		wmb();
405     	}
406     	if (rgid != (gid_t) -1 ||
407     	    (egid != (gid_t) -1 && egid != old_rgid))
408     		current->sgid = new_egid;
409     	current->fsgid = new_egid;
410     	current->egid = new_egid;
411     	current->gid = new_rgid;
412     	return 0;
413     }
414     
415     /*
416      * setgid() is implemented like SysV w/ SAVED_IDS 
417      *
418      * SMP: Same implicit races as above.
419      */
420     asmlinkage long sys_setgid(gid_t gid)
421     {
422     	int old_egid = current->egid;
423     
424     	if (capable(CAP_SETGID))
425     	{
426     		if(old_egid != gid)
427     		{
428     			current->mm->dumpable=0;
429     			wmb();
430     		}
431     		current->gid = current->egid = current->sgid = current->fsgid = gid;
432     	}
433     	else if ((gid == current->gid) || (gid == current->sgid))
434     	{
435     		if(old_egid != gid)
436     		{
437     			current->mm->dumpable=0;
438     			wmb();
439     		}
440     		current->egid = current->fsgid = gid;
441     	}
442     	else
443     		return -EPERM;
444     	return 0;
445     }
446       
447     /* 
448      * cap_emulate_setxuid() fixes the effective / permitted capabilities of
449      * a process after a call to setuid, setreuid, or setresuid.
450      *
451      *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
452      *  {r,e,s}uid != 0, the permitted and effective capabilities are
453      *  cleared.
454      *
455      *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
456      *  capabilities of the process are cleared.
457      *
458      *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
459      *  capabilities are set to the permitted capabilities.
460      *
461      *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 
462      *  never happen.
463      *
464      *  -astor 
465      *
466      * cevans - New behaviour, Oct '99
467      * A process may, via prctl(), elect to keep its capabilities when it
468      * calls setuid() and switches away from uid==0. Both permitted and
469      * effective sets will be retained.
470      * Without this change, it was impossible for a daemon to drop only some
471      * of its privilege. The call to setuid(!=0) would drop all privileges!
472      * Keeping uid 0 is not an option because uid 0 owns too many vital
473      * files..
474      * Thanks to Olaf Kirch and Peter Benie for spotting this.
475      */
476     static inline void cap_emulate_setxuid(int old_ruid, int old_euid, 
477     				       int old_suid)
478     {
479     	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
480     	    (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
481     	    !current->keep_capabilities) {
482     		cap_clear(current->cap_permitted);
483     		cap_clear(current->cap_effective);
484     	}
485     	if (old_euid == 0 && current->euid != 0) {
486     		cap_clear(current->cap_effective);
487     	}
488     	if (old_euid != 0 && current->euid == 0) {
489     		current->cap_effective = current->cap_permitted;
490     	}
491     }
492     
493     static int set_user(uid_t new_ruid, int dumpclear)
494     {
495     	struct user_struct *new_user, *old_user;
496     
497     	/* What if a process setreuid()'s and this brings the
498     	 * new uid over his NPROC rlimit?  We can check this now
499     	 * cheaply with the new uid cache, so if it matters
500     	 * we should be checking for it.  -DaveM
501     	 */
502     	new_user = alloc_uid(new_ruid);
503     	if (!new_user)
504     		return -EAGAIN;
505     	old_user = current->user;
506     	atomic_dec(&old_user->processes);
507     	atomic_inc(&new_user->processes);
508     
509     	if(dumpclear)
510     	{
511     		current->mm->dumpable = 0;
512     		wmb();
513     	}
514     	current->uid = new_ruid;
515     	current->user = new_user;
516     	free_uid(old_user);
517     	return 0;
518     }
519     
520     /*
521      * Unprivileged users may change the real uid to the effective uid
522      * or vice versa.  (BSD-style)
523      *
524      * If you set the real uid at all, or set the effective uid to a value not
525      * equal to the real uid, then the saved uid is set to the new effective uid.
526      *
527      * This makes it possible for a setuid program to completely drop its
528      * privileges, which is often a useful assertion to make when you are doing
529      * a security audit over a program.
530      *
531      * The general idea is that a program which uses just setreuid() will be
532      * 100% compatible with BSD.  A program which uses just setuid() will be
533      * 100% compatible with POSIX with saved IDs. 
534      */
535     asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
536     {
537     	int old_ruid, old_euid, old_suid, new_ruid, new_euid;
538     
539     	new_ruid = old_ruid = current->uid;
540     	new_euid = old_euid = current->euid;
541     	old_suid = current->suid;
542     
543     	if (ruid != (uid_t) -1) {
544     		new_ruid = ruid;
545     		if ((old_ruid != ruid) &&
546     		    (current->euid != ruid) &&
547     		    !capable(CAP_SETUID))
548     			return -EPERM;
549     	}
550     
551     	if (euid != (uid_t) -1) {
552     		new_euid = euid;
553     		if ((old_ruid != euid) &&
554     		    (current->euid != euid) &&
555     		    (current->suid != euid) &&
556     		    !capable(CAP_SETUID))
557     			return -EPERM;
558     	}
559     
560     	if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
561     		return -EAGAIN;
562     
563     	if (new_euid != old_euid)
564     	{
565     		current->mm->dumpable=0;
566     		wmb();
567     	}
568     	current->fsuid = current->euid = new_euid;
569     	if (ruid != (uid_t) -1 ||
570     	    (euid != (uid_t) -1 && euid != old_ruid))
571     		current->suid = current->euid;
572     	current->fsuid = current->euid;
573     
574     	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
575     		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
576     	}
577     
578     	return 0;
579     }
580     
581     
582     		
583     /*
584      * setuid() is implemented like SysV with SAVED_IDS 
585      * 
586      * Note that SAVED_ID's is deficient in that a setuid root program
587      * like sendmail, for example, cannot set its uid to be a normal 
588      * user and then switch back, because if you're root, setuid() sets
589      * the saved uid too.  If you don't like this, blame the bright people
590      * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
591      * will allow a root program to temporarily drop privileges and be able to
592      * regain them by swapping the real and effective uid.  
593      */
594     asmlinkage long sys_setuid(uid_t uid)
595     {
596     	int old_euid = current->euid;
597     	int old_ruid, old_suid, new_ruid, new_suid;
598     
599     	old_ruid = new_ruid = current->uid;
600     	old_suid = current->suid;
601     	new_suid = old_suid;
602     	
603     	if (capable(CAP_SETUID)) {
604     		if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
605     			return -EAGAIN;
606     		new_suid = uid;
607     	} else if ((uid != current->uid) && (uid != new_suid))
608     		return -EPERM;
609     
610     	if (old_euid != uid)
611     	{
612     		current->mm->dumpable = 0;
613     		wmb();
614     	}
615     	current->fsuid = current->euid = uid;
616     	current->suid = new_suid;
617     
618     	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
619     		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
620     	}
621     
622     	return 0;
623     }
624     
625     
626     /*
627      * This function implements a generic ability to update ruid, euid,
628      * and suid.  This allows you to implement the 4.4 compatible seteuid().
629      */
630     asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
631     {
632     	int old_ruid = current->uid;
633     	int old_euid = current->euid;
634     	int old_suid = current->suid;
635     
636     	if (!capable(CAP_SETUID)) {
637     		if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
638     		    (ruid != current->euid) && (ruid != current->suid))
639     			return -EPERM;
640     		if ((euid != (uid_t) -1) && (euid != current->uid) &&
641     		    (euid != current->euid) && (euid != current->suid))
642     			return -EPERM;
643     		if ((suid != (uid_t) -1) && (suid != current->uid) &&
644     		    (suid != current->euid) && (suid != current->suid))
645     			return -EPERM;
646     	}
647     	if (ruid != (uid_t) -1) {
648     		if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
649     			return -EAGAIN;
650     	}
651     	if (euid != (uid_t) -1) {
652     		if (euid != current->euid)
653     		{
654     			current->mm->dumpable = 0;
655     			wmb();
656     		}
657     		current->euid = euid;
658     		current->fsuid = euid;
659     	}
660     	if (suid != (uid_t) -1)
661     		current->suid = suid;
662     
663     	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
664     		cap_emulate_setxuid(old_ruid, old_euid, old_suid);
665     	}
666     
667     	return 0;
668     }
669     
670     asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
671     {
672     	int retval;
673     
674     	if (!(retval = put_user(current->uid, ruid)) &&
675     	    !(retval = put_user(current->euid, euid)))
676     		retval = put_user(current->suid, suid);
677     
678     	return retval;
679     }
680     
681     /*
682      * Same as above, but for rgid, egid, sgid.
683      */
684     asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
685     {
686     	if (!capable(CAP_SETGID)) {
687     		if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
688     		    (rgid != current->egid) && (rgid != current->sgid))
689     			return -EPERM;
690     		if ((egid != (gid_t) -1) && (egid != current->gid) &&
691     		    (egid != current->egid) && (egid != current->sgid))
692     			return -EPERM;
693     		if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
694     		    (sgid != current->egid) && (sgid != current->sgid))
695     			return -EPERM;
696     	}
697     	if (egid != (gid_t) -1) {
698     		if (egid != current->egid)
699     		{
700     			current->mm->dumpable = 0;
701     			wmb();
702     		}
703     		current->egid = egid;
704     		current->fsgid = egid;
705     	}
706     	if (rgid != (gid_t) -1)
707     		current->gid = rgid;
708     	if (sgid != (gid_t) -1)
709     		current->sgid = sgid;
710     	return 0;
711     }
712     
713     asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
714     {
715     	int retval;
716     
717     	if (!(retval = put_user(current->gid, rgid)) &&
718     	    !(retval = put_user(current->egid, egid)))
719     		retval = put_user(current->sgid, sgid);
720     
721     	return retval;
722     }
723     
724     
725     /*
726      * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
727      * is used for "access()" and for the NFS daemon (letting nfsd stay at
728      * whatever uid it wants to). It normally shadows "euid", except when
729      * explicitly set by setfsuid() or for access..
730      */
731     asmlinkage long sys_setfsuid(uid_t uid)
732     {
733     	int old_fsuid;
734     
735     	old_fsuid = current->fsuid;
736     	if (uid == current->uid || uid == current->euid ||
737     	    uid == current->suid || uid == current->fsuid || 
738     	    capable(CAP_SETUID))
739     	{
740     		if (uid != old_fsuid)
741     		{
742     			current->mm->dumpable = 0;
743     			wmb();
744     		}
745     		current->fsuid = uid;
746     	}
747     
748     	/* We emulate fsuid by essentially doing a scaled-down version
749     	 * of what we did in setresuid and friends. However, we only
750     	 * operate on the fs-specific bits of the process' effective
751     	 * capabilities 
752     	 *
753     	 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
754     	 *          if not, we might be a bit too harsh here.
755     	 */
756     	
757     	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
758     		if (old_fsuid == 0 && current->fsuid != 0) {
759     			cap_t(current->cap_effective) &= ~CAP_FS_MASK;
760     		}
761     		if (old_fsuid != 0 && current->fsuid == 0) {
762     			cap_t(current->cap_effective) |=
763     				(cap_t(current->cap_permitted) & CAP_FS_MASK);
764     		}
765     	}
766     
767     	return old_fsuid;
768     }
769     
770     /*
771      * Samma på svenska..
772      */
773     asmlinkage long sys_setfsgid(gid_t gid)
774     {
775     	int old_fsgid;
776     
777     	old_fsgid = current->fsgid;
778     	if (gid == current->gid || gid == current->egid ||
779     	    gid == current->sgid || gid == current->fsgid || 
780     	    capable(CAP_SETGID))
781     	{
782     		if (gid != old_fsgid)
783     		{
784     			current->mm->dumpable = 0;
785     			wmb();
786     		}
787     		current->fsgid = gid;
788     	}
789     	return old_fsgid;
790     }
791     
792     asmlinkage long sys_times(struct tms * tbuf)
793     {
794     	/*
795     	 *	In the SMP world we might just be unlucky and have one of
796     	 *	the times increment as we use it. Since the value is an
797     	 *	atomically safe type this is just fine. Conceptually its
798     	 *	as if the syscall took an instant longer to occur.
799     	 */
800     	if (tbuf)
801     		if (copy_to_user(tbuf, &current->times, sizeof(struct tms)))
802     			return -EFAULT;
803     	return jiffies;
804     }
805     
806     /*
807      * This needs some heavy checking ...
808      * I just haven't the stomach for it. I also don't fully
809      * understand sessions/pgrp etc. Let somebody who does explain it.
810      *
811      * OK, I think I have the protection semantics right.... this is really
812      * only important on a multi-user system anyway, to make sure one user
813      * can't send a signal to a process owned by another.  -TYT, 12/12/91
814      *
815      * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
816      * LBT 04.03.94
817      */
818     
819     asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
820     {
821     	struct task_struct * p;
822     	int err = -EINVAL;
823     
824     	if (!pid)
825     		pid = current->pid;
826     	if (!pgid)
827     		pgid = pid;
828     	if (pgid < 0)
829     		return -EINVAL;
830     
831     	/* From this point forward we keep holding onto the tasklist lock
832     	 * so that our parent does not change from under us. -DaveM
833     	 */
834     	read_lock(&tasklist_lock);
835     
836     	err = -ESRCH;
837     	p = find_task_by_pid(pid);
838     	if (!p)
839     		goto out;
840     
841     	if (p->p_pptr == current || p->p_opptr == current) {
842     		err = -EPERM;
843     		if (p->session != current->session)
844     			goto out;
845     		err = -EACCES;
846     		if (p->did_exec)
847     			goto out;
848     	} else if (p != current)
849     		goto out;
850     	err = -EPERM;
851     	if (p->leader)
852     		goto out;
853     	if (pgid != pid) {
854     		struct task_struct * tmp;
855     		for_each_task (tmp) {
856     			if (tmp->pgrp == pgid &&
857     			    tmp->session == current->session)
858     				goto ok_pgid;
859     		}
860     		goto out;
861     	}
862     
863     ok_pgid:
864     	p->pgrp = pgid;
865     	err = 0;
866     out:
867     	/* All paths lead to here, thus we are safe. -DaveM */
868     	read_unlock(&tasklist_lock);
869     	return err;
870     }
871     
872     asmlinkage long sys_getpgid(pid_t pid)
873     {
874     	if (!pid) {
875     		return current->pgrp;
876     	} else {
877     		int retval;
878     		struct task_struct *p;
879     
880     		read_lock(&tasklist_lock);
881     		p = find_task_by_pid(pid);
882     
883     		retval = -ESRCH;
884     		if (p)
885     			retval = p->pgrp;
886     		read_unlock(&tasklist_lock);
887     		return retval;
888     	}
889     }
890     
891     asmlinkage long sys_getpgrp(void)
892     {
893     	/* SMP - assuming writes are word atomic this is fine */
894     	return current->pgrp;
895     }
896     
897     asmlinkage long sys_getsid(pid_t pid)
898     {
899     	if (!pid) {
900     		return current->session;
901     	} else {
902     		int retval;
903     		struct task_struct *p;
904     
905     		read_lock(&tasklist_lock);
906     		p = find_task_by_pid(pid);
907     
908     		retval = -ESRCH;
909     		if(p)
910     			retval = p->session;
911     		read_unlock(&tasklist_lock);
912     		return retval;
913     	}
914     }
915     
916     asmlinkage long sys_setsid(void)
917     {
918     	struct task_struct * p;
919     	int err = -EPERM;
920     
921     	read_lock(&tasklist_lock);
922     	for_each_task(p) {
923     		if (p->pgrp == current->pid)
924     			goto out;
925     	}
926     
927     	current->leader = 1;
928     	current->session = current->pgrp = current->pid;
929     	current->tty = NULL;
930     	current->tty_old_pgrp = 0;
931     	err = current->pgrp;
932     out:
933     	read_unlock(&tasklist_lock);
934     	return err;
935     }
936     
937     /*
938      * Supplementary group IDs
939      */
940     asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
941     {
942     	int i;
943     	
944     	/*
945     	 *	SMP: Nobody else can change our grouplist. Thus we are
946     	 *	safe.
947     	 */
948     
949     	if (gidsetsize < 0)
950     		return -EINVAL;
951     	i = current->ngroups;
952     	if (gidsetsize) {
953     		if (i > gidsetsize)
954     			return -EINVAL;
955     		if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
956     			return -EFAULT;
957     	}
958     	return i;
959     }
960     
961     /*
962      *	SMP: Our groups are not shared. We can copy to/from them safely
963      *	without another task interfering.
964      */
965      
966     asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
967     {
968     	if (!capable(CAP_SETGID))
969     		return -EPERM;
970     	if ((unsigned) gidsetsize > NGROUPS)
971     		return -EINVAL;
972     	if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
973     		return -EFAULT;
974     	current->ngroups = gidsetsize;
975     	return 0;
976     }
977     
978     static int supplemental_group_member(gid_t grp)
979     {
980     	int i = current->ngroups;
981     
982     	if (i) {
983     		gid_t *groups = current->groups;
984     		do {
985     			if (*groups == grp)
986     				return 1;
987     			groups++;
988     			i--;
989     		} while (i);
990     	}
991     	return 0;
992     }
993     
994     /*
995      * Check whether we're fsgid/egid or in the supplemental group..
996      */
997     int in_group_p(gid_t grp)
998     {
999     	int retval = 1;
1000     	if (grp != current->fsgid)
1001     		retval = supplemental_group_member(grp);
1002     	return retval;
1003     }
1004     
1005     int in_egroup_p(gid_t grp)
1006     {
1007     	int retval = 1;
1008     	if (grp != current->egid)
1009     		retval = supplemental_group_member(grp);
1010     	return retval;
1011     }
1012     
1013     DECLARE_RWSEM(uts_sem);
1014     
1015     asmlinkage long sys_newuname(struct new_utsname * name)
1016     {
1017     	int errno = 0;
1018     
1019     	down_read(&uts_sem);
1020     	if (copy_to_user(name,&system_utsname,sizeof *name))
1021     		errno = -EFAULT;
1022     	up_read(&uts_sem);
1023     	return errno;
1024     }
1025     
1026     asmlinkage long sys_sethostname(char *name, int len)
1027     {
1028     	int errno;
1029     
1030     	if (!capable(CAP_SYS_ADMIN))
1031     		return -EPERM;
1032     	if (len < 0 || len > __NEW_UTS_LEN)
1033     		return -EINVAL;
1034     	down_write(&uts_sem);
1035     	errno = -EFAULT;
1036     	if (!copy_from_user(system_utsname.nodename, name, len)) {
1037     		system_utsname.nodename[len] = 0;
1038     		errno = 0;
1039     	}
1040     	up_write(&uts_sem);
1041     	return errno;
1042     }
1043     
1044     asmlinkage long sys_gethostname(char *name, int len)
1045     {
1046     	int i, errno;
1047     
1048     	if (len < 0)
1049     		return -EINVAL;
1050     	down_read(&uts_sem);
1051     	i = 1 + strlen(system_utsname.nodename);
1052     	if (i > len)
1053     		i = len;
1054     	errno = 0;
1055     	if (copy_to_user(name, system_utsname.nodename, i))
1056     		errno = -EFAULT;
1057     	up_read(&uts_sem);
1058     	return errno;
1059     }
1060     
1061     /*
1062      * Only setdomainname; getdomainname can be implemented by calling
1063      * uname()
1064      */
1065     asmlinkage long sys_setdomainname(char *name, int len)
1066     {
1067     	int errno;
1068     
1069     	if (!capable(CAP_SYS_ADMIN))
1070     		return -EPERM;
1071     	if (len < 0 || len > __NEW_UTS_LEN)
1072     		return -EINVAL;
1073     
1074     	down_write(&uts_sem);
1075     	errno = -EFAULT;
1076     	if (!copy_from_user(system_utsname.domainname, name, len)) {
1077     		errno = 0;
1078     		system_utsname.domainname[len] = 0;
1079     	}
1080     	up_write(&uts_sem);
1081     	return errno;
1082     }
1083     
1084     asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
1085     {
1086     	if (resource >= RLIM_NLIMITS)
1087     		return -EINVAL;
1088     	else
1089     		return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
1090     			? -EFAULT : 0;
1091     }
1092     
1093     #if !defined(__ia64__) 
1094     
1095     /*
1096      *	Back compatibility for getrlimit. Needed for some apps.
1097      */
1098      
1099     asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
1100     {
1101     	struct rlimit x;
1102     	if (resource >= RLIM_NLIMITS)
1103     		return -EINVAL;
1104     
1105     	memcpy(&x, current->rlim + resource, sizeof(*rlim));
1106     	if(x.rlim_cur > 0x7FFFFFFF)
1107     		x.rlim_cur = 0x7FFFFFFF;
1108     	if(x.rlim_max > 0x7FFFFFFF)
1109     		x.rlim_max = 0x7FFFFFFF;
1110     	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1111     }
1112     
1113     #endif
1114     
1115     asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
1116     {
1117     	struct rlimit new_rlim, *old_rlim;
1118     
1119     	if (resource >= RLIM_NLIMITS)
1120     		return -EINVAL;
1121     	if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1122     		return -EFAULT;
1123     	old_rlim = current->rlim + resource;
1124     	if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
1125     	     (new_rlim.rlim_max > old_rlim->rlim_max)) &&
1126     	    !capable(CAP_SYS_RESOURCE))
1127     		return -EPERM;
1128     	if (resource == RLIMIT_NOFILE) {
1129     		if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
1130     			return -EPERM;
1131     	}
1132     	*old_rlim = new_rlim;
1133     	return 0;
1134     }
1135     
1136     /*
1137      * It would make sense to put struct rusage in the task_struct,
1138      * except that would make the task_struct be *really big*.  After
1139      * task_struct gets moved into malloc'ed memory, it would
1140      * make sense to do this.  It will make moving the rest of the information
1141      * a lot simpler!  (Which we're not doing right now because we're not
1142      * measuring them yet).
1143      *
1144      * This is SMP safe.  Either we are called from sys_getrusage on ourselves
1145      * below (we know we aren't going to exit/disappear and only we change our
1146      * rusage counters), or we are called from wait4() on a process which is
1147      * either stopped or zombied.  In the zombied case the task won't get
1148      * reaped till shortly after the call to getrusage(), in both cases the
1149      * task being examined is in a frozen state so the counters won't change.
1150      *
1151      * FIXME! Get the fault counts properly!
1152      */
1153     int getrusage(struct task_struct *p, int who, struct rusage *ru)
1154     {
1155     	struct rusage r;
1156     
1157     	memset((char *) &r, 0, sizeof(r));
1158     	switch (who) {
1159     		case RUSAGE_SELF:
1160     			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
1161     			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
1162     			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
1163     			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
1164     			r.ru_minflt = p->min_flt;
1165     			r.ru_majflt = p->maj_flt;
1166     			r.ru_nswap = p->nswap;
1167     			break;
1168     		case RUSAGE_CHILDREN:
1169     			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
1170     			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
1171     			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
1172     			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
1173     			r.ru_minflt = p->cmin_flt;
1174     			r.ru_majflt = p->cmaj_flt;
1175     			r.ru_nswap = p->cnswap;
1176     			break;
1177     		default:
1178     			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
1179     			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
1180     			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
1181     			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
1182     			r.ru_minflt = p->min_flt + p->cmin_flt;
1183     			r.ru_majflt = p->maj_flt + p->cmaj_flt;
1184     			r.ru_nswap = p->nswap + p->cnswap;
1185     			break;
1186     	}
1187     	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1188     }
1189     
1190     asmlinkage long sys_getrusage(int who, struct rusage *ru)
1191     {
1192     	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1193     		return -EINVAL;
1194     	return getrusage(current, who, ru);
1195     }
1196     
1197     asmlinkage long sys_umask(int mask)
1198     {
1199     	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1200     	return mask;
1201     }
1202         
1203     asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1204     			  unsigned long arg4, unsigned long arg5)
1205     {
1206     	int error = 0;
1207     	int sig;
1208     
1209     	switch (option) {
1210     		case PR_SET_PDEATHSIG:
1211     			sig = arg2;
1212     			if (sig < 0 || sig > _NSIG) {
1213     				error = -EINVAL;
1214     				break;
1215     			}
1216     			current->pdeath_signal = sig;
1217     			break;
1218     		case PR_GET_PDEATHSIG:
1219     			error = put_user(current->pdeath_signal, (int *)arg2);
1220     			break;
1221     		case PR_GET_DUMPABLE:
1222     			if (current->mm->dumpable)
1223     				error = 1;
1224     			break;
1225     		case PR_SET_DUMPABLE:
1226     			if (arg2 != 0 && arg2 != 1) {
1227     				error = -EINVAL;
1228     				break;
1229     			}
1230     			current->mm->dumpable = arg2;
1231     			break;
1232     	        case PR_SET_UNALIGN:
1233     #ifdef SET_UNALIGN_CTL
1234     			error = SET_UNALIGN_CTL(current, arg2);
1235     #else
1236     			error = -EINVAL;
1237     #endif
1238     			break;
1239     
1240     	        case PR_GET_UNALIGN:
1241     #ifdef GET_UNALIGN_CTL
1242     			error = GET_UNALIGN_CTL(current, arg2);
1243     #else
1244     			error = -EINVAL;
1245     #endif
1246     			break;
1247     
1248     		case PR_GET_KEEPCAPS:
1249     			if (current->keep_capabilities)
1250     				error = 1;
1251     			break;
1252     		case PR_SET_KEEPCAPS:
1253     			if (arg2 != 0 && arg2 != 1) {
1254     				error = -EINVAL;
1255     				break;
1256     			}
1257     			current->keep_capabilities = arg2;
1258     			break;
1259     		default:
1260     			error = -EINVAL;
1261     			break;
1262     	}
1263     	return error;
1264     }
1265     
1266     EXPORT_SYMBOL(notifier_chain_register);
1267     EXPORT_SYMBOL(notifier_chain_unregister);
1268     EXPORT_SYMBOL(notifier_call_chain);
1269     EXPORT_SYMBOL(register_reboot_notifier);
1270     EXPORT_SYMBOL(unregister_reboot_notifier);
1271     EXPORT_SYMBOL(in_group_p);
1272     EXPORT_SYMBOL(in_egroup_p);
1273