File: /usr/src/linux/fs/fcntl.c
1 /*
2 * linux/fs/fcntl.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7 #include <linux/init.h>
8 #include <linux/mm.h>
9 #include <linux/file.h>
10 #include <linux/dnotify.h>
11 #include <linux/smp_lock.h>
12 #include <linux/slab.h>
13 #include <linux/iobuf.h>
14
15 #include <asm/poll.h>
16 #include <asm/siginfo.h>
17 #include <asm/uaccess.h>
18
19 extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);
20 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
21 extern int fcntl_getlease(struct file *filp);
22
23 /* Expand files. Return <0 on error; 0 nothing done; 1 files expanded,
24 * we may have blocked.
25 *
26 * Should be called with the files->file_lock spinlock held for write.
27 */
28 static int expand_files(struct files_struct *files, int nr)
29 {
30 int err, expand = 0;
31 #ifdef FDSET_DEBUG
32 printk (KERN_ERR __FUNCTION__ " %d: nr = %d\n", current->pid, nr);
33 #endif
34
35 if (nr >= files->max_fdset) {
36 expand = 1;
37 if ((err = expand_fdset(files, nr)))
38 goto out;
39 }
40 if (nr >= files->max_fds) {
41 expand = 1;
42 if ((err = expand_fd_array(files, nr)))
43 goto out;
44 }
45 err = expand;
46 out:
47 #ifdef FDSET_DEBUG
48 if (err)
49 printk (KERN_ERR __FUNCTION__ " %d: return %d\n", current->pid, err);
50 #endif
51 return err;
52 }
53
54 /*
55 * locate_fd finds a free file descriptor in the open_fds fdset,
56 * expanding the fd arrays if necessary. The files write lock will be
57 * held on exit to ensure that the fd can be entered atomically.
58 */
59
60 static int locate_fd(struct files_struct *files,
61 struct file *file, int orig_start)
62 {
63 unsigned int newfd;
64 int error;
65 int start;
66
67 write_lock(&files->file_lock);
68
69 repeat:
70 /*
71 * Someone might have closed fd's in the range
72 * orig_start..files->next_fd
73 */
74 start = orig_start;
75 if (start < files->next_fd)
76 start = files->next_fd;
77
78 newfd = start;
79 if (start < files->max_fdset) {
80 newfd = find_next_zero_bit(files->open_fds->fds_bits,
81 files->max_fdset, start);
82 }
83
84 error = -EMFILE;
85 if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
86 goto out;
87
88 error = expand_files(files, newfd);
89 if (error < 0)
90 goto out;
91
92 /*
93 * If we needed to expand the fs array we
94 * might have blocked - try again.
95 */
96 if (error)
97 goto repeat;
98
99 if (start <= files->next_fd)
100 files->next_fd = newfd + 1;
101
102 error = newfd;
103
104 out:
105 return error;
106 }
107
108 static inline void allocate_fd(struct files_struct *files,
109 struct file *file, int fd)
110 {
111 FD_SET(fd, files->open_fds);
112 FD_CLR(fd, files->close_on_exec);
113 write_unlock(&files->file_lock);
114 fd_install(fd, file);
115 }
116
117 static int dupfd(struct file *file, int start)
118 {
119 struct files_struct * files = current->files;
120 int ret;
121
122 ret = locate_fd(files, file, start);
123 if (ret < 0)
124 goto out_putf;
125 allocate_fd(files, file, ret);
126 return ret;
127
128 out_putf:
129 write_unlock(&files->file_lock);
130 fput(file);
131 return ret;
132 }
133
134 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
135 {
136 int err = -EBADF;
137 struct file * file, *tofree;
138 struct files_struct * files = current->files;
139
140 write_lock(&files->file_lock);
141 if (!(file = fcheck(oldfd)))
142 goto out_unlock;
143 err = newfd;
144 if (newfd == oldfd)
145 goto out_unlock;
146 err = -EBADF;
147 if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
148 goto out_unlock;
149 get_file(file); /* We are now finished with oldfd */
150
151 err = expand_files(files, newfd);
152 if (err < 0)
153 goto out_fput;
154
155 /* To avoid races with open() and dup(), we will mark the fd as
156 * in-use in the open-file bitmap throughout the entire dup2()
157 * process. This is quite safe: do_close() uses the fd array
158 * entry, not the bitmap, to decide what work needs to be
159 * done. --sct */
160 /* Doesn't work. open() might be there first. --AV */
161
162 /* Yes. It's a race. In user space. Nothing sane to do */
163 err = -EBUSY;
164 tofree = files->fd[newfd];
165 if (!tofree && FD_ISSET(newfd, files->open_fds))
166 goto out_fput;
167
168 files->fd[newfd] = file;
169 FD_SET(newfd, files->open_fds);
170 FD_CLR(newfd, files->close_on_exec);
171 write_unlock(&files->file_lock);
172
173 if (tofree)
174 filp_close(tofree, files);
175 err = newfd;
176 out:
177 return err;
178 out_unlock:
179 write_unlock(&files->file_lock);
180 goto out;
181
182 out_fput:
183 write_unlock(&files->file_lock);
184 fput(file);
185 goto out;
186 }
187
188 asmlinkage long sys_dup(unsigned int fildes)
189 {
190 int ret = -EBADF;
191 struct file * file = fget(fildes);
192
193 if (file)
194 ret = dupfd(file, 0);
195 return ret;
196 }
197
198 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT)
199
200 static int setfl(int fd, struct file * filp, unsigned long arg)
201 {
202 struct inode * inode = filp->f_dentry->d_inode;
203 int error;
204
205 /*
206 * In the case of an append-only file, O_APPEND
207 * cannot be cleared
208 */
209 if (!(arg & O_APPEND) && IS_APPEND(inode))
210 return -EPERM;
211
212 /* Did FASYNC state change? */
213 if ((arg ^ filp->f_flags) & FASYNC) {
214 if (filp->f_op && filp->f_op->fasync) {
215 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
216 if (error < 0)
217 return error;
218 }
219 }
220
221 if (arg & O_DIRECT) {
222 /*
223 * alloc_kiovec() can sleep and we are only serialized by
224 * the big kernel lock here, so abuse the i_sem to serialize
225 * this case too. We of course wouldn't need to go deep down
226 * to the inode layer, we could stay at the file layer, but
227 * we don't want to pay for the memory of a semaphore in each
228 * file structure too and we use the inode semaphore that we just
229 * pay for anyways.
230 */
231 error = 0;
232 down(&inode->i_sem);
233 if (!filp->f_iobuf)
234 error = alloc_kiovec(1, &filp->f_iobuf);
235 up(&inode->i_sem);
236 if (error < 0)
237 return error;
238 }
239
240 /* required for strict SunOS emulation */
241 if (O_NONBLOCK != O_NDELAY)
242 if (arg & O_NDELAY)
243 arg |= O_NONBLOCK;
244
245 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
246 return 0;
247 }
248
249 static long do_fcntl(unsigned int fd, unsigned int cmd,
250 unsigned long arg, struct file * filp)
251 {
252 long err = -EINVAL;
253
254 switch (cmd) {
255 case F_DUPFD:
256 if (arg < NR_OPEN) {
257 get_file(filp);
258 err = dupfd(filp, arg);
259 }
260 break;
261 case F_GETFD:
262 err = get_close_on_exec(fd);
263 break;
264 case F_SETFD:
265 err = 0;
266 set_close_on_exec(fd, arg&1);
267 break;
268 case F_GETFL:
269 err = filp->f_flags;
270 break;
271 case F_SETFL:
272 lock_kernel();
273 err = setfl(fd, filp, arg);
274 unlock_kernel();
275 break;
276 case F_GETLK:
277 err = fcntl_getlk(fd, (struct flock *) arg);
278 break;
279 case F_SETLK:
280 case F_SETLKW:
281 err = fcntl_setlk(fd, cmd, (struct flock *) arg);
282 break;
283 case F_GETOWN:
284 /*
285 * XXX If f_owner is a process group, the
286 * negative return value will get converted
287 * into an error. Oops. If we keep the
288 * current syscall conventions, the only way
289 * to fix this will be in libc.
290 */
291 err = filp->f_owner.pid;
292 break;
293 case F_SETOWN:
294 lock_kernel();
295 filp->f_owner.pid = arg;
296 filp->f_owner.uid = current->uid;
297 filp->f_owner.euid = current->euid;
298 err = 0;
299 if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
300 err = sock_fcntl (filp, F_SETOWN, arg);
301 unlock_kernel();
302 break;
303 case F_GETSIG:
304 err = filp->f_owner.signum;
305 break;
306 case F_SETSIG:
307 /* arg == 0 restores default behaviour. */
308 if (arg < 0 || arg > _NSIG) {
309 break;
310 }
311 err = 0;
312 filp->f_owner.signum = arg;
313 break;
314 case F_GETLEASE:
315 err = fcntl_getlease(filp);
316 break;
317 case F_SETLEASE:
318 err = fcntl_setlease(fd, filp, arg);
319 break;
320 case F_NOTIFY:
321 err = fcntl_dirnotify(fd, filp, arg);
322 break;
323 default:
324 /* sockets need a few special fcntls. */
325 err = -EINVAL;
326 if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
327 err = sock_fcntl (filp, cmd, arg);
328 break;
329 }
330
331 return err;
332 }
333
334 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
335 {
336 struct file * filp;
337 long err = -EBADF;
338
339 filp = fget(fd);
340 if (!filp)
341 goto out;
342
343 err = do_fcntl(fd, cmd, arg, filp);
344
345 fput(filp);
346 out:
347 return err;
348 }
349
350 #if BITS_PER_LONG == 32
351 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
352 {
353 struct file * filp;
354 long err;
355
356 err = -EBADF;
357 filp = fget(fd);
358 if (!filp)
359 goto out;
360
361 switch (cmd) {
362 case F_GETLK64:
363 err = fcntl_getlk64(fd, (struct flock64 *) arg);
364 break;
365 case F_SETLK64:
366 err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
367 break;
368 case F_SETLKW64:
369 err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
370 break;
371 default:
372 err = do_fcntl(fd, cmd, arg, filp);
373 break;
374 }
375 fput(filp);
376 out:
377 return err;
378 }
379 #endif
380
381 /* Table to convert sigio signal codes into poll band bitmaps */
382
383 static long band_table[NSIGPOLL] = {
384 POLLIN | POLLRDNORM, /* POLL_IN */
385 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */
386 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */
387 POLLERR, /* POLL_ERR */
388 POLLPRI | POLLRDBAND, /* POLL_PRI */
389 POLLHUP | POLLERR /* POLL_HUP */
390 };
391
392 static void send_sigio_to_task(struct task_struct *p,
393 struct fown_struct *fown,
394 int fd,
395 int reason)
396 {
397 if ((fown->euid != 0) &&
398 (fown->euid ^ p->suid) && (fown->euid ^ p->uid) &&
399 (fown->uid ^ p->suid) && (fown->uid ^ p->uid))
400 return;
401 switch (fown->signum) {
402 siginfo_t si;
403 default:
404 /* Queue a rt signal with the appropriate fd as its
405 value. We use SI_SIGIO as the source, not
406 SI_KERNEL, since kernel signals always get
407 delivered even if we can't queue. Failure to
408 queue in this case _should_ be reported; we fall
409 back to SIGIO in that case. --sct */
410 si.si_signo = fown->signum;
411 si.si_errno = 0;
412 si.si_code = reason & ~__SI_MASK;
413 /* Make sure we are called with one of the POLL_*
414 reasons, otherwise we could leak kernel stack into
415 userspace. */
416 if ((reason & __SI_MASK) != __SI_POLL)
417 BUG();
418 if (reason - POLL_IN >= NSIGPOLL)
419 si.si_band = ~0L;
420 else
421 si.si_band = band_table[reason - POLL_IN];
422 si.si_fd = fd;
423 if (!send_sig_info(fown->signum, &si, p))
424 break;
425 /* fall-through: fall back on the old plain SIGIO signal */
426 case 0:
427 send_sig(SIGIO, p, 1);
428 }
429 }
430
431 void send_sigio(struct fown_struct *fown, int fd, int band)
432 {
433 struct task_struct * p;
434 int pid = fown->pid;
435
436 read_lock(&tasklist_lock);
437 if ( (pid > 0) && (p = find_task_by_pid(pid)) ) {
438 send_sigio_to_task(p, fown, fd, band);
439 goto out;
440 }
441 for_each_task(p) {
442 int match = p->pid;
443 if (pid < 0)
444 match = -p->pgrp;
445 if (pid != match)
446 continue;
447 send_sigio_to_task(p, fown, fd, band);
448 }
449 out:
450 read_unlock(&tasklist_lock);
451 }
452
453 static rwlock_t fasync_lock = RW_LOCK_UNLOCKED;
454 static kmem_cache_t *fasync_cache;
455
456 /*
457 * fasync_helper() is used by some character device drivers (mainly mice)
458 * to set up the fasync queue. It returns negative on error, 0 if it did
459 * no changes and positive if it added/deleted the entry.
460 */
461 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
462 {
463 struct fasync_struct *fa, **fp;
464 struct fasync_struct *new = NULL;
465 int result = 0;
466
467 if (on) {
468 new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
469 if (!new)
470 return -ENOMEM;
471 }
472 write_lock_irq(&fasync_lock);
473 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
474 if (fa->fa_file == filp) {
475 if(on) {
476 fa->fa_fd = fd;
477 kmem_cache_free(fasync_cache, new);
478 } else {
479 *fp = fa->fa_next;
480 kmem_cache_free(fasync_cache, fa);
481 result = 1;
482 }
483 goto out;
484 }
485 }
486
487 if (on) {
488 new->magic = FASYNC_MAGIC;
489 new->fa_file = filp;
490 new->fa_fd = fd;
491 new->fa_next = *fapp;
492 *fapp = new;
493 result = 1;
494 }
495 out:
496 write_unlock_irq(&fasync_lock);
497 return result;
498 }
499
500 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
501 {
502 while (fa) {
503 struct fown_struct * fown;
504 if (fa->magic != FASYNC_MAGIC) {
505 printk(KERN_ERR "kill_fasync: bad magic number in "
506 "fasync_struct!\n");
507 return;
508 }
509 fown = &fa->fa_file->f_owner;
510 /* Don't send SIGURG to processes which have not set a
511 queued signum: SIGURG has its own default signalling
512 mechanism. */
513 if (fown->pid && !(sig == SIGURG && fown->signum == 0))
514 send_sigio(fown, fa->fa_fd, band);
515 fa = fa->fa_next;
516 }
517 }
518
519 void kill_fasync(struct fasync_struct **fp, int sig, int band)
520 {
521 read_lock(&fasync_lock);
522 __kill_fasync(*fp, sig, band);
523 read_unlock(&fasync_lock);
524 }
525
526 static int __init fasync_init(void)
527 {
528 fasync_cache = kmem_cache_create("fasync cache",
529 sizeof(struct fasync_struct), 0, 0, NULL, NULL);
530 if (!fasync_cache)
531 panic("cannot create fasync slab cache");
532 return 0;
533 }
534
535 module_init(fasync_init)
536