File: /usr/src/linux/fs/fcntl.c

1     /*
2      *  linux/fs/fcntl.c
3      *
4      *  Copyright (C) 1991, 1992  Linus Torvalds
5      */
6     
7     #include <linux/init.h>
8     #include <linux/mm.h>
9     #include <linux/file.h>
10     #include <linux/dnotify.h>
11     #include <linux/smp_lock.h>
12     #include <linux/slab.h>
13     #include <linux/iobuf.h>
14     
15     #include <asm/poll.h>
16     #include <asm/siginfo.h>
17     #include <asm/uaccess.h>
18     
19     extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);
20     extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
21     extern int fcntl_getlease(struct file *filp);
22     
23     /* Expand files.  Return <0 on error; 0 nothing done; 1 files expanded,
24      * we may have blocked. 
25      *
26      * Should be called with the files->file_lock spinlock held for write.
27      */
28     static int expand_files(struct files_struct *files, int nr)
29     {
30     	int err, expand = 0;
31     #ifdef FDSET_DEBUG	
32     	printk (KERN_ERR __FUNCTION__ " %d: nr = %d\n", current->pid, nr);
33     #endif
34     	
35     	if (nr >= files->max_fdset) {
36     		expand = 1;
37     		if ((err = expand_fdset(files, nr)))
38     			goto out;
39     	}
40     	if (nr >= files->max_fds) {
41     		expand = 1;
42     		if ((err = expand_fd_array(files, nr)))
43     			goto out;
44     	}
45     	err = expand;
46      out:
47     #ifdef FDSET_DEBUG	
48     	if (err)
49     		printk (KERN_ERR __FUNCTION__ " %d: return %d\n", current->pid, err);
50     #endif
51     	return err;
52     }
53     
54     /*
55      * locate_fd finds a free file descriptor in the open_fds fdset,
56      * expanding the fd arrays if necessary.  The files write lock will be
57      * held on exit to ensure that the fd can be entered atomically.
58      */
59     
60     static int locate_fd(struct files_struct *files, 
61     			    struct file *file, int orig_start)
62     {
63     	unsigned int newfd;
64     	int error;
65     	int start;
66     
67     	write_lock(&files->file_lock);
68     	
69     repeat:
70     	/*
71     	 * Someone might have closed fd's in the range
72     	 * orig_start..files->next_fd
73     	 */
74     	start = orig_start;
75     	if (start < files->next_fd)
76     		start = files->next_fd;
77     
78     	newfd = start;
79     	if (start < files->max_fdset) {
80     		newfd = find_next_zero_bit(files->open_fds->fds_bits,
81     			files->max_fdset, start);
82     	}
83     	
84     	error = -EMFILE;
85     	if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
86     		goto out;
87     
88     	error = expand_files(files, newfd);
89     	if (error < 0)
90     		goto out;
91     
92     	/*
93     	 * If we needed to expand the fs array we
94     	 * might have blocked - try again.
95     	 */
96     	if (error)
97     		goto repeat;
98     
99     	if (start <= files->next_fd)
100     		files->next_fd = newfd + 1;
101     	
102     	error = newfd;
103     	
104     out:
105     	return error;
106     }
107     
108     static inline void allocate_fd(struct files_struct *files, 
109     					struct file *file, int fd)
110     {
111     	FD_SET(fd, files->open_fds);
112     	FD_CLR(fd, files->close_on_exec);
113     	write_unlock(&files->file_lock);
114     	fd_install(fd, file);
115     }
116     
117     static int dupfd(struct file *file, int start)
118     {
119     	struct files_struct * files = current->files;
120     	int ret;
121     
122     	ret = locate_fd(files, file, start);
123     	if (ret < 0) 
124     		goto out_putf;
125     	allocate_fd(files, file, ret);
126     	return ret;
127     
128     out_putf:
129     	write_unlock(&files->file_lock);
130     	fput(file);
131     	return ret;
132     }
133     
134     asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
135     {
136     	int err = -EBADF;
137     	struct file * file, *tofree;
138     	struct files_struct * files = current->files;
139     
140     	write_lock(&files->file_lock);
141     	if (!(file = fcheck(oldfd)))
142     		goto out_unlock;
143     	err = newfd;
144     	if (newfd == oldfd)
145     		goto out_unlock;
146     	err = -EBADF;
147     	if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
148     		goto out_unlock;
149     	get_file(file);			/* We are now finished with oldfd */
150     
151     	err = expand_files(files, newfd);
152     	if (err < 0)
153     		goto out_fput;
154     
155     	/* To avoid races with open() and dup(), we will mark the fd as
156     	 * in-use in the open-file bitmap throughout the entire dup2()
157     	 * process.  This is quite safe: do_close() uses the fd array
158     	 * entry, not the bitmap, to decide what work needs to be
159     	 * done.  --sct */
160     	/* Doesn't work. open() might be there first. --AV */
161     
162     	/* Yes. It's a race. In user space. Nothing sane to do */
163     	err = -EBUSY;
164     	tofree = files->fd[newfd];
165     	if (!tofree && FD_ISSET(newfd, files->open_fds))
166     		goto out_fput;
167     
168     	files->fd[newfd] = file;
169     	FD_SET(newfd, files->open_fds);
170     	FD_CLR(newfd, files->close_on_exec);
171     	write_unlock(&files->file_lock);
172     
173     	if (tofree)
174     		filp_close(tofree, files);
175     	err = newfd;
176     out:
177     	return err;
178     out_unlock:
179     	write_unlock(&files->file_lock);
180     	goto out;
181     
182     out_fput:
183     	write_unlock(&files->file_lock);
184     	fput(file);
185     	goto out;
186     }
187     
188     asmlinkage long sys_dup(unsigned int fildes)
189     {
190     	int ret = -EBADF;
191     	struct file * file = fget(fildes);
192     
193     	if (file)
194     		ret = dupfd(file, 0);
195     	return ret;
196     }
197     
198     #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT)
199     
200     static int setfl(int fd, struct file * filp, unsigned long arg)
201     {
202     	struct inode * inode = filp->f_dentry->d_inode;
203     	int error;
204     
205     	/*
206     	 * In the case of an append-only file, O_APPEND
207     	 * cannot be cleared
208     	 */
209     	if (!(arg & O_APPEND) && IS_APPEND(inode))
210     		return -EPERM;
211     
212     	/* Did FASYNC state change? */
213     	if ((arg ^ filp->f_flags) & FASYNC) {
214     		if (filp->f_op && filp->f_op->fasync) {
215     			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
216     			if (error < 0)
217     				return error;
218     		}
219     	}
220     
221     	if (arg & O_DIRECT) {
222     		/*
223     		 * alloc_kiovec() can sleep and we are only serialized by
224     		 * the big kernel lock here, so abuse the i_sem to serialize
225     		 * this case too. We of course wouldn't need to go deep down
226     		 * to the inode layer, we could stay at the file layer, but
227     		 * we don't want to pay for the memory of a semaphore in each
228     		 * file structure too and we use the inode semaphore that we just
229     		 * pay for anyways.
230     		 */
231     		error = 0;
232     		down(&inode->i_sem);
233     		if (!filp->f_iobuf)
234     			error = alloc_kiovec(1, &filp->f_iobuf);
235     		up(&inode->i_sem);
236     		if (error < 0)
237     			return error;
238     	}
239     
240     	/* required for strict SunOS emulation */
241     	if (O_NONBLOCK != O_NDELAY)
242     	       if (arg & O_NDELAY)
243     		   arg |= O_NONBLOCK;
244     
245     	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
246     	return 0;
247     }
248     
249     static long do_fcntl(unsigned int fd, unsigned int cmd,
250     		     unsigned long arg, struct file * filp)
251     {
252     	long err = -EINVAL;
253     
254     	switch (cmd) {
255     		case F_DUPFD:
256     			if (arg < NR_OPEN) {
257     				get_file(filp);
258     				err = dupfd(filp, arg);
259     			}
260     			break;
261     		case F_GETFD:
262     			err = get_close_on_exec(fd);
263     			break;
264     		case F_SETFD:
265     			err = 0;
266     			set_close_on_exec(fd, arg&1);
267     			break;
268     		case F_GETFL:
269     			err = filp->f_flags;
270     			break;
271     		case F_SETFL:
272     			lock_kernel();
273     			err = setfl(fd, filp, arg);
274     			unlock_kernel();
275     			break;
276     		case F_GETLK:
277     			err = fcntl_getlk(fd, (struct flock *) arg);
278     			break;
279     		case F_SETLK:
280     		case F_SETLKW:
281     			err = fcntl_setlk(fd, cmd, (struct flock *) arg);
282     			break;
283     		case F_GETOWN:
284     			/*
285     			 * XXX If f_owner is a process group, the
286     			 * negative return value will get converted
287     			 * into an error.  Oops.  If we keep the
288     			 * current syscall conventions, the only way
289     			 * to fix this will be in libc.
290     			 */
291     			err = filp->f_owner.pid;
292     			break;
293     		case F_SETOWN:
294     			lock_kernel();
295     			filp->f_owner.pid = arg;
296     			filp->f_owner.uid = current->uid;
297     			filp->f_owner.euid = current->euid;
298     			err = 0;
299     			if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
300     				err = sock_fcntl (filp, F_SETOWN, arg);
301     			unlock_kernel();
302     			break;
303     		case F_GETSIG:
304     			err = filp->f_owner.signum;
305     			break;
306     		case F_SETSIG:
307     			/* arg == 0 restores default behaviour. */
308     			if (arg < 0 || arg > _NSIG) {
309     				break;
310     			}
311     			err = 0;
312     			filp->f_owner.signum = arg;
313     			break;
314     		case F_GETLEASE:
315     			err = fcntl_getlease(filp);
316     			break;
317     		case F_SETLEASE:
318     			err = fcntl_setlease(fd, filp, arg);
319     			break;
320     		case F_NOTIFY:
321     			err = fcntl_dirnotify(fd, filp, arg);
322     			break;
323     		default:
324     			/* sockets need a few special fcntls. */
325     			err = -EINVAL;
326     			if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
327     				err = sock_fcntl (filp, cmd, arg);
328     			break;
329     	}
330     
331     	return err;
332     }
333     
334     asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
335     {	
336     	struct file * filp;
337     	long err = -EBADF;
338     
339     	filp = fget(fd);
340     	if (!filp)
341     		goto out;
342     
343     	err = do_fcntl(fd, cmd, arg, filp);
344     
345      	fput(filp);
346     out:
347     	return err;
348     }
349     
350     #if BITS_PER_LONG == 32
351     asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
352     {	
353     	struct file * filp;
354     	long err;
355     
356     	err = -EBADF;
357     	filp = fget(fd);
358     	if (!filp)
359     		goto out;
360     
361     	switch (cmd) {
362     		case F_GETLK64:
363     			err = fcntl_getlk64(fd, (struct flock64 *) arg);
364     			break;
365     		case F_SETLK64:
366     			err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
367     			break;
368     		case F_SETLKW64:
369     			err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
370     			break;
371     		default:
372     			err = do_fcntl(fd, cmd, arg, filp);
373     			break;
374     	}
375     	fput(filp);
376     out:
377     	return err;
378     }
379     #endif
380     
381     /* Table to convert sigio signal codes into poll band bitmaps */
382     
383     static long band_table[NSIGPOLL] = {
384     	POLLIN | POLLRDNORM,			/* POLL_IN */
385     	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
386     	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
387     	POLLERR,				/* POLL_ERR */
388     	POLLPRI | POLLRDBAND,			/* POLL_PRI */
389     	POLLHUP | POLLERR			/* POLL_HUP */
390     };
391     
392     static void send_sigio_to_task(struct task_struct *p,
393     			       struct fown_struct *fown, 
394     			       int fd,
395     			       int reason)
396     {
397     	if ((fown->euid != 0) &&
398     	    (fown->euid ^ p->suid) && (fown->euid ^ p->uid) &&
399     	    (fown->uid ^ p->suid) && (fown->uid ^ p->uid))
400     		return;
401     	switch (fown->signum) {
402     		siginfo_t si;
403     		default:
404     			/* Queue a rt signal with the appropriate fd as its
405     			   value.  We use SI_SIGIO as the source, not 
406     			   SI_KERNEL, since kernel signals always get 
407     			   delivered even if we can't queue.  Failure to
408     			   queue in this case _should_ be reported; we fall
409     			   back to SIGIO in that case. --sct */
410     			si.si_signo = fown->signum;
411     			si.si_errno = 0;
412     		        si.si_code  = reason & ~__SI_MASK;
413     			/* Make sure we are called with one of the POLL_*
414     			   reasons, otherwise we could leak kernel stack into
415     			   userspace.  */
416     			if ((reason & __SI_MASK) != __SI_POLL)
417     				BUG();
418     			if (reason - POLL_IN >= NSIGPOLL)
419     				si.si_band  = ~0L;
420     			else
421     				si.si_band = band_table[reason - POLL_IN];
422     			si.si_fd    = fd;
423     			if (!send_sig_info(fown->signum, &si, p))
424     				break;
425     		/* fall-through: fall back on the old plain SIGIO signal */
426     		case 0:
427     			send_sig(SIGIO, p, 1);
428     	}
429     }
430     
431     void send_sigio(struct fown_struct *fown, int fd, int band)
432     {
433     	struct task_struct * p;
434     	int   pid	= fown->pid;
435     	
436     	read_lock(&tasklist_lock);
437     	if ( (pid > 0) && (p = find_task_by_pid(pid)) ) {
438     		send_sigio_to_task(p, fown, fd, band);
439     		goto out;
440     	}
441     	for_each_task(p) {
442     		int match = p->pid;
443     		if (pid < 0)
444     			match = -p->pgrp;
445     		if (pid != match)
446     			continue;
447     		send_sigio_to_task(p, fown, fd, band);
448     	}
449     out:
450     	read_unlock(&tasklist_lock);
451     }
452     
453     static rwlock_t fasync_lock = RW_LOCK_UNLOCKED;
454     static kmem_cache_t *fasync_cache;
455     
456     /*
457      * fasync_helper() is used by some character device drivers (mainly mice)
458      * to set up the fasync queue. It returns negative on error, 0 if it did
459      * no changes and positive if it added/deleted the entry.
460      */
461     int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
462     {
463     	struct fasync_struct *fa, **fp;
464     	struct fasync_struct *new = NULL;
465     	int result = 0;
466     
467     	if (on) {
468     		new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
469     		if (!new)
470     			return -ENOMEM;
471     	}
472     	write_lock_irq(&fasync_lock);
473     	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
474     		if (fa->fa_file == filp) {
475     			if(on) {
476     				fa->fa_fd = fd;
477     				kmem_cache_free(fasync_cache, new);
478     			} else {
479     				*fp = fa->fa_next;
480     				kmem_cache_free(fasync_cache, fa);
481     				result = 1;
482     			}
483     			goto out;
484     		}
485     	}
486     
487     	if (on) {
488     		new->magic = FASYNC_MAGIC;
489     		new->fa_file = filp;
490     		new->fa_fd = fd;
491     		new->fa_next = *fapp;
492     		*fapp = new;
493     		result = 1;
494     	}
495     out:
496     	write_unlock_irq(&fasync_lock);
497     	return result;
498     }
499     
500     void __kill_fasync(struct fasync_struct *fa, int sig, int band)
501     {
502     	while (fa) {
503     		struct fown_struct * fown;
504     		if (fa->magic != FASYNC_MAGIC) {
505     			printk(KERN_ERR "kill_fasync: bad magic number in "
506     			       "fasync_struct!\n");
507     			return;
508     		}
509     		fown = &fa->fa_file->f_owner;
510     		/* Don't send SIGURG to processes which have not set a
511     		   queued signum: SIGURG has its own default signalling
512     		   mechanism. */
513     		if (fown->pid && !(sig == SIGURG && fown->signum == 0))
514     			send_sigio(fown, fa->fa_fd, band);
515     		fa = fa->fa_next;
516     	}
517     }
518     
519     void kill_fasync(struct fasync_struct **fp, int sig, int band)
520     {
521     	read_lock(&fasync_lock);
522     	__kill_fasync(*fp, sig, band);
523     	read_unlock(&fasync_lock);
524     }
525     
526     static int __init fasync_init(void)
527     {
528     	fasync_cache = kmem_cache_create("fasync cache",
529     		sizeof(struct fasync_struct), 0, 0, NULL, NULL);
530     	if (!fasync_cache)
531     		panic("cannot create fasync slab cache");
532     	return 0;
533     }
534     
535     module_init(fasync_init)
536