File: /usr/src/linux/mm/shmem.c
1 /*
2 * Resizable virtual memory filesystem for Linux.
3 *
4 * Copyright (C) 2000 Linus Torvalds.
5 * 2000 Transmeta Corp.
6 * 2000-2001 Christoph Rohland
7 * 2000-2001 SAP AG
8 *
9 * This file is released under the GPL.
10 */
11
12 /*
13 * This virtual memory filesystem is heavily based on the ramfs. It
14 * extends ramfs by the ability to use swap and honor resource limits
15 * which makes it a completely usable filesystem.
16 */
17
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/devfs_fs_kernel.h>
22 #include <linux/fs.h>
23 #include <linux/mm.h>
24 #include <linux/file.h>
25 #include <linux/swap.h>
26 #include <linux/pagemap.h>
27 #include <linux/string.h>
28 #include <linux/locks.h>
29 #include <asm/smplock.h>
30
31 #include <asm/uaccess.h>
32
33 /* This magic number is used in glibc for posix shared memory */
34 #define TMPFS_MAGIC 0x01021994
35
36 #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
37 #define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
38
39 static struct super_operations shmem_ops;
40 static struct address_space_operations shmem_aops;
41 static struct file_operations shmem_file_operations;
42 static struct inode_operations shmem_inode_operations;
43 static struct file_operations shmem_dir_operations;
44 static struct inode_operations shmem_dir_inode_operations;
45 static struct inode_operations shmem_symlink_inode_operations;
46 static struct vm_operations_struct shmem_vm_ops;
47
48 LIST_HEAD (shmem_inodes);
49 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
50
51 #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
52
53 /*
54 * shmem_recalc_inode - recalculate the size of an inode
55 *
56 * @inode: inode to recalc
57 *
58 * We have to calculate the free blocks since the mm can drop pages
59 * behind our back
60 *
61 * But we know that normally
62 * inodes->i_blocks/BLOCKS_PER_PAGE ==
63 * inode->i_mapping->nrpages + info->swapped
64 *
65 * So the mm freed
66 * inodes->i_blocks/BLOCKS_PER_PAGE -
67 * (inode->i_mapping->nrpages + info->swapped)
68 *
69 * It has to be called with the spinlock held.
70 */
71
72 static void shmem_recalc_inode(struct inode * inode)
73 {
74 unsigned long freed;
75
76 freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
77 (inode->i_mapping->nrpages + inode->u.shmem_i.swapped);
78 if (freed){
79 struct shmem_sb_info * info = &inode->i_sb->u.shmem_sb;
80 inode->i_blocks -= freed*BLOCKS_PER_PAGE;
81 spin_lock (&info->stat_lock);
82 info->free_blocks += freed;
83 spin_unlock (&info->stat_lock);
84 }
85 }
86
87 static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index)
88 {
89 unsigned long offset;
90
91 if (index < SHMEM_NR_DIRECT)
92 return info->i_direct+index;
93
94 index -= SHMEM_NR_DIRECT;
95 offset = index % ENTRIES_PER_PAGE;
96 index /= ENTRIES_PER_PAGE;
97
98 if (index >= ENTRIES_PER_PAGE)
99 return ERR_PTR(-EFBIG);
100
101 if (!info->i_indirect) {
102 info->i_indirect = (swp_entry_t **) get_zeroed_page(GFP_USER);
103 if (!info->i_indirect)
104 return ERR_PTR(-ENOMEM);
105 }
106 if(!(info->i_indirect[index])) {
107 info->i_indirect[index] = (swp_entry_t *) get_zeroed_page(GFP_USER);
108 if (!info->i_indirect[index])
109 return ERR_PTR(-ENOMEM);
110 }
111
112 return info->i_indirect[index]+offset;
113 }
114
115 static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
116 {
117 swp_entry_t *ptr, entry;
118 int freed = 0;
119
120 for (ptr = dir; ptr < dir + count; ptr++) {
121 if (!ptr->val)
122 continue;
123 entry = *ptr;
124 *ptr = (swp_entry_t){0};
125 freed++;
126
127 /* vmscan will do the actual page freeing later.. */
128 swap_free (entry);
129 }
130 return freed;
131 }
132
133 /*
134 * shmem_truncate_part - free a bunch of swap entries
135 *
136 * @dir: pointer to swp_entries
137 * @size: number of entries in dir
138 * @start: offset to start from
139 * @freed: counter for freed pages
140 *
141 * It frees the swap entries from dir+start til dir+size
142 *
143 * returns 0 if it truncated something, else (offset-size)
144 */
145
146 static unsigned long
147 shmem_truncate_part (swp_entry_t * dir, unsigned long size,
148 unsigned long start, unsigned long *freed) {
149 if (start > size)
150 return start - size;
151 if (dir)
152 *freed += shmem_free_swp (dir+start, size-start);
153
154 return 0;
155 }
156
157 static void shmem_truncate (struct inode * inode)
158 {
159 int clear_base;
160 unsigned long index, start;
161 unsigned long freed = 0;
162 swp_entry_t **base, **ptr, **last;
163 struct shmem_inode_info * info = &inode->u.shmem_i;
164
165 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
166 spin_lock (&info->lock);
167 index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
168 if (index > info->max_index)
169 goto out;
170
171 start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, index, &freed);
172
173 if (!(base = info->i_indirect))
174 goto out;
175
176 clear_base = 1;
177 last = base + ((info->max_index - SHMEM_NR_DIRECT + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE);
178 for (ptr = base; ptr < last; ptr++) {
179 if (!start) {
180 if (!*ptr)
181 continue;
182 freed += shmem_free_swp (*ptr, ENTRIES_PER_PAGE);
183 free_page ((unsigned long) *ptr);
184 *ptr = 0;
185 continue;
186 }
187 clear_base = 0;
188 start = shmem_truncate_part (*ptr, ENTRIES_PER_PAGE, start, &freed);
189 }
190
191 if (clear_base) {
192 free_page ((unsigned long)base);
193 info->i_indirect = 0;
194 }
195
196 out:
197 /*
198 * We have no chance to give an error, so we limit it to max
199 * size here and the application will fail later
200 */
201 if (index > SHMEM_MAX_BLOCKS)
202 info->max_index = SHMEM_MAX_BLOCKS;
203 else
204 info->max_index = index;
205 info->swapped -= freed;
206 shmem_recalc_inode(inode);
207 spin_unlock (&info->lock);
208 }
209
210 static void shmem_delete_inode(struct inode * inode)
211 {
212 struct shmem_sb_info *info = &inode->i_sb->u.shmem_sb;
213
214 spin_lock (&shmem_ilock);
215 list_del (&inode->u.shmem_i.list);
216 spin_unlock (&shmem_ilock);
217 inode->i_size = 0;
218 shmem_truncate (inode);
219 spin_lock (&info->stat_lock);
220 info->free_inodes++;
221 spin_unlock (&info->stat_lock);
222 clear_inode(inode);
223 }
224
225 /*
226 * Move the page from the page cache to the swap cache.
227 *
228 * The page lock prevents multiple occurences of shmem_writepage at
229 * once. We still need to guard against racing with
230 * shmem_getpage_locked().
231 */
232 static int shmem_writepage(struct page * page)
233 {
234 int error;
235 struct shmem_inode_info *info;
236 swp_entry_t *entry, swap;
237 struct address_space *mapping;
238 unsigned long index;
239 struct inode *inode;
240
241 if (!PageLocked(page))
242 BUG();
243
244 mapping = page->mapping;
245 index = page->index;
246 inode = mapping->host;
247 info = &inode->u.shmem_i;
248
249 spin_lock(&info->lock);
250 entry = shmem_swp_entry(info, index);
251 if (IS_ERR(entry)) /* this had been allocated on page allocation */
252 BUG();
253 shmem_recalc_inode(inode);
254 if (entry->val)
255 BUG();
256
257 /* Remove it from the page cache */
258 lru_cache_del(page);
259 remove_inode_page(page);
260
261 swap_list_lock();
262 swap = get_swap_page();
263
264 if (!swap.val) {
265 swap_list_unlock();
266 /* Add it back to the page cache */
267 add_to_page_cache_locked(page, mapping, index);
268 activate_page(page);
269 SetPageDirty(page);
270 error = -ENOMEM;
271 goto out;
272 }
273
274 /* Add it to the swap cache */
275 add_to_swap_cache(page, swap);
276 swap_list_unlock();
277
278 set_page_dirty(page);
279 info->swapped++;
280 *entry = swap;
281 error = 0;
282 out:
283 spin_unlock(&info->lock);
284 UnlockPage(page);
285 page_cache_release(page);
286 return error;
287 }
288
289 /*
290 * shmem_getpage_locked - either get the page from swap or allocate a new one
291 *
292 * If we allocate a new one we do not mark it dirty. That's up to the
293 * vm. If we swap it in we mark it dirty since we also free the swap
294 * entry since a page cannot live in both the swap and page cache
295 *
296 * Called with the inode locked, so it cannot race with itself, but we
297 * still need to guard against racing with shm_writepage(), which might
298 * be trying to move the page to the swap cache as we run.
299 */
300 static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
301 {
302 struct address_space * mapping = inode->i_mapping;
303 struct shmem_inode_info *info;
304 struct page * page;
305 swp_entry_t *entry;
306
307 info = &inode->u.shmem_i;
308
309 repeat:
310 page = find_lock_page(mapping, idx);
311 if (page)
312 return page;
313
314 entry = shmem_swp_entry (info, idx);
315 if (IS_ERR(entry))
316 return (void *)entry;
317
318 spin_lock (&info->lock);
319
320 /* The shmem_swp_entry() call may have blocked, and
321 * shmem_writepage may have been moving a page between the page
322 * cache and swap cache. We need to recheck the page cache
323 * under the protection of the info->lock spinlock. */
324
325 page = find_get_page(mapping, idx);
326 if (page) {
327 if (TryLockPage(page))
328 goto wait_retry;
329 spin_unlock (&info->lock);
330 return page;
331 }
332
333 shmem_recalc_inode(inode);
334 if (entry->val) {
335 unsigned long flags;
336
337 /* Look it up and read it in.. */
338 page = find_get_page(&swapper_space, entry->val);
339 if (!page) {
340 swp_entry_t swap = *entry;
341 spin_unlock (&info->lock);
342 lock_kernel();
343 swapin_readahead(*entry);
344 page = read_swap_cache_async(*entry);
345 unlock_kernel();
346 if (!page) {
347 if (entry->val != swap.val)
348 goto repeat;
349 return ERR_PTR(-ENOMEM);
350 }
351 wait_on_page(page);
352 if (!Page_Uptodate(page) && entry->val == swap.val) {
353 page_cache_release(page);
354 return ERR_PTR(-EIO);
355 }
356
357 /* Too bad we can't trust this page, because we
358 * dropped the info->lock spinlock */
359 page_cache_release(page);
360 goto repeat;
361 }
362
363 /* We have to this with page locked to prevent races */
364 if (TryLockPage(page))
365 goto wait_retry;
366
367 swap_free(*entry);
368 *entry = (swp_entry_t) {0};
369 delete_from_swap_cache(page);
370 flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1);
371 page->flags = flags | (1 << PG_dirty);
372 add_to_page_cache_locked(page, mapping, idx);
373 info->swapped--;
374 spin_unlock (&info->lock);
375 } else {
376 spin_unlock (&info->lock);
377 spin_lock (&inode->i_sb->u.shmem_sb.stat_lock);
378 if (inode->i_sb->u.shmem_sb.free_blocks == 0)
379 goto no_space;
380 inode->i_sb->u.shmem_sb.free_blocks--;
381 spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
382
383 /* Ok, get a new page. We don't have to worry about the
384 * info->lock spinlock here: we cannot race against
385 * shm_writepage because we have already verified that
386 * there is no page present either in memory or in the
387 * swap cache, so we are guaranteed to be populating a
388 * new shm entry. The inode semaphore we already hold
389 * is enough to make this atomic. */
390 page = page_cache_alloc(mapping);
391 if (!page)
392 return ERR_PTR(-ENOMEM);
393 clear_highpage(page);
394 inode->i_blocks += BLOCKS_PER_PAGE;
395 add_to_page_cache (page, mapping, idx);
396 }
397
398 /* We have the page */
399 SetPageUptodate(page);
400 if (info->locked)
401 page_cache_get(page);
402 return page;
403 no_space:
404 spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
405 return ERR_PTR(-ENOSPC);
406
407 wait_retry:
408 spin_unlock (&info->lock);
409 wait_on_page(page);
410 page_cache_release(page);
411 goto repeat;
412 }
413
414 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
415 {
416 int error;
417
418 down (&inode->i_sem);
419 if (inode->i_size <= (loff_t) idx * PAGE_CACHE_SIZE)
420 goto sigbus;
421 *ptr = shmem_getpage_locked(inode, idx);
422 if (IS_ERR (*ptr))
423 goto failed;
424 UnlockPage(*ptr);
425 up (&inode->i_sem);
426 return 0;
427 failed:
428 up (&inode->i_sem);
429 error = PTR_ERR(*ptr);
430 *ptr = NOPAGE_OOM;
431 if (error != -EFBIG)
432 *ptr = NOPAGE_SIGBUS;
433 return error;
434 sigbus:
435 up (&inode->i_sem);
436 *ptr = NOPAGE_SIGBUS;
437 return -EFAULT;
438 }
439
440 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
441 {
442 struct page * page;
443 unsigned int idx;
444 struct inode * inode = vma->vm_file->f_dentry->d_inode;
445
446 idx = (address - vma->vm_start) >> PAGE_SHIFT;
447 idx += vma->vm_pgoff;
448
449 if (shmem_getpage(inode, idx, &page))
450 return page;
451
452 if (no_share) {
453 struct page *new_page = page_cache_alloc(inode->i_mapping);
454
455 if (new_page) {
456 copy_user_highpage(new_page, page, address);
457 flush_page_to_ram(new_page);
458 } else
459 new_page = NOPAGE_OOM;
460 page_cache_release(page);
461 return new_page;
462 }
463
464 flush_page_to_ram (page);
465 return(page);
466 }
467
468 void shmem_lock(struct file * file, int lock)
469 {
470 struct inode * inode = file->f_dentry->d_inode;
471 struct shmem_inode_info * info = &inode->u.shmem_i;
472 struct page * page;
473 unsigned long idx, size;
474
475 if (info->locked == lock)
476 return;
477 down(&inode->i_sem);
478 info->locked = lock;
479 size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
480 for (idx = 0; idx < size; idx++) {
481 page = find_lock_page(inode->i_mapping, idx);
482 if (!page)
483 continue;
484 if (!lock) {
485 /* release the extra count and our reference */
486 page_cache_release(page);
487 page_cache_release(page);
488 }
489 UnlockPage(page);
490 }
491 up(&inode->i_sem);
492 }
493
494 static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
495 {
496 struct vm_operations_struct * ops;
497 struct inode *inode = file->f_dentry->d_inode;
498
499 ops = &shmem_vm_ops;
500 if (!inode->i_sb || !S_ISREG(inode->i_mode))
501 return -EACCES;
502 UPDATE_ATIME(inode);
503 vma->vm_ops = ops;
504 return 0;
505 }
506
507 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
508 {
509 struct inode * inode;
510
511 spin_lock (&sb->u.shmem_sb.stat_lock);
512 if (!sb->u.shmem_sb.free_inodes) {
513 spin_unlock (&sb->u.shmem_sb.stat_lock);
514 return NULL;
515 }
516 sb->u.shmem_sb.free_inodes--;
517 spin_unlock (&sb->u.shmem_sb.stat_lock);
518
519 inode = new_inode(sb);
520 if (inode) {
521 inode->i_mode = mode;
522 inode->i_uid = current->fsuid;
523 inode->i_gid = current->fsgid;
524 inode->i_blksize = PAGE_CACHE_SIZE;
525 inode->i_blocks = 0;
526 inode->i_rdev = NODEV;
527 inode->i_mapping->a_ops = &shmem_aops;
528 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
529 spin_lock_init (&inode->u.shmem_i.lock);
530 switch (mode & S_IFMT) {
531 default:
532 init_special_inode(inode, mode, dev);
533 break;
534 case S_IFREG:
535 inode->i_op = &shmem_inode_operations;
536 inode->i_fop = &shmem_file_operations;
537 break;
538 case S_IFDIR:
539 inode->i_nlink++;
540 inode->i_op = &shmem_dir_inode_operations;
541 inode->i_fop = &shmem_dir_operations;
542 break;
543 case S_IFLNK:
544 inode->i_op = &shmem_symlink_inode_operations;
545 break;
546 }
547 spin_lock (&shmem_ilock);
548 list_add (&inode->u.shmem_i.list, &shmem_inodes);
549 spin_unlock (&shmem_ilock);
550 }
551 return inode;
552 }
553
554 static int shmem_set_size(struct shmem_sb_info *info,
555 unsigned long max_blocks, unsigned long max_inodes)
556 {
557 int error;
558 unsigned long blocks, inodes;
559
560 spin_lock(&info->stat_lock);
561 blocks = info->max_blocks - info->free_blocks;
562 inodes = info->max_inodes - info->free_inodes;
563 error = -EINVAL;
564 if (max_blocks < blocks)
565 goto out;
566 if (max_inodes < inodes)
567 goto out;
568 error = 0;
569 info->max_blocks = max_blocks;
570 info->free_blocks = max_blocks - blocks;
571 info->max_inodes = max_inodes;
572 info->free_inodes = max_inodes - inodes;
573 out:
574 spin_unlock(&info->stat_lock);
575 return error;
576 }
577
578 #ifdef CONFIG_TMPFS
579 static ssize_t
580 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
581 {
582 struct inode *inode = file->f_dentry->d_inode;
583 unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
584 loff_t pos;
585 struct page *page;
586 unsigned long written;
587 long status;
588 int err;
589
590
591 down(&inode->i_sem);
592
593 pos = *ppos;
594 err = -EINVAL;
595 if (pos < 0)
596 goto out;
597
598 err = file->f_error;
599 if (err) {
600 file->f_error = 0;
601 goto out;
602 }
603
604 written = 0;
605
606 if (file->f_flags & O_APPEND)
607 pos = inode->i_size;
608
609 /*
610 * Check whether we've reached the file size limit.
611 */
612 err = -EFBIG;
613 if (limit != RLIM_INFINITY) {
614 if (pos >= limit) {
615 send_sig(SIGXFSZ, current, 0);
616 goto out;
617 }
618 if (count > limit - pos) {
619 send_sig(SIGXFSZ, current, 0);
620 count = limit - pos;
621 }
622 }
623
624 status = 0;
625 if (count) {
626 remove_suid(inode);
627 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
628 }
629
630 while (count) {
631 unsigned long bytes, index, offset;
632 char *kaddr;
633 int deactivate = 1;
634
635 /*
636 * Try to find the page in the cache. If it isn't there,
637 * allocate a free page.
638 */
639 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
640 index = pos >> PAGE_CACHE_SHIFT;
641 bytes = PAGE_CACHE_SIZE - offset;
642 if (bytes > count) {
643 bytes = count;
644 deactivate = 0;
645 }
646
647 /*
648 * Bring in the user page that we will copy from _first_.
649 * Otherwise there's a nasty deadlock on copying from the
650 * same page as we're writing to, without it being marked
651 * up-to-date.
652 */
653 { volatile unsigned char dummy;
654 __get_user(dummy, buf);
655 __get_user(dummy, buf+bytes-1);
656 }
657
658 page = shmem_getpage_locked(inode, index);
659 status = PTR_ERR(page);
660 if (IS_ERR(page))
661 break;
662
663 /* We have exclusive IO access to the page.. */
664 if (!PageLocked(page)) {
665 PAGE_BUG(page);
666 }
667
668 kaddr = kmap(page);
669 // can this do a truncated write? cr
670 status = copy_from_user(kaddr+offset, buf, bytes);
671 kunmap(page);
672 if (status)
673 goto fail_write;
674
675 flush_dcache_page(page);
676 if (bytes > 0) {
677 SetPageDirty(page);
678 written += bytes;
679 count -= bytes;
680 pos += bytes;
681 buf += bytes;
682 if (pos > inode->i_size)
683 inode->i_size = pos;
684 if (inode->u.shmem_i.max_index <= index)
685 inode->u.shmem_i.max_index = index+1;
686
687 }
688 unlock:
689 /* Mark it unlocked again and drop the page.. */
690 UnlockPage(page);
691 if (deactivate)
692 deactivate_page(page);
693 page_cache_release(page);
694
695 if (status < 0)
696 break;
697 }
698 *ppos = pos;
699
700 err = written ? written : status;
701 out:
702 up(&inode->i_sem);
703 return err;
704 fail_write:
705 status = -EFAULT;
706 ClearPageUptodate(page);
707 kunmap(page);
708 goto unlock;
709 }
710
711 static void do_shmem_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc)
712 {
713 struct inode *inode = filp->f_dentry->d_inode;
714 struct address_space *mapping = inode->i_mapping;
715 unsigned long index, offset;
716 int nr = 1;
717
718 index = *ppos >> PAGE_CACHE_SHIFT;
719 offset = *ppos & ~PAGE_CACHE_MASK;
720
721 while (nr && desc->count) {
722 struct page *page;
723 unsigned long end_index, nr;
724
725 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
726 if (index > end_index)
727 break;
728 nr = PAGE_CACHE_SIZE;
729 if (index == end_index) {
730 nr = inode->i_size & ~PAGE_CACHE_MASK;
731 if (nr <= offset)
732 break;
733 }
734
735 nr = nr - offset;
736
737 if ((desc->error = shmem_getpage(inode, index, &page)))
738 break;
739
740 if (mapping->i_mmap_shared != NULL)
741 flush_dcache_page(page);
742
743 /*
744 * Ok, we have the page, and it's up-to-date, so
745 * now we can copy it to user space...
746 *
747 * The actor routine returns how many bytes were actually used..
748 * NOTE! This may not be the same as how much of a user buffer
749 * we filled up (we may be padding etc), so we can only update
750 * "pos" here (the actor routine has to update the user buffer
751 * pointers and the remaining count).
752 */
753 nr = file_read_actor(desc, page, offset, nr);
754 offset += nr;
755 index += offset >> PAGE_CACHE_SHIFT;
756 offset &= ~PAGE_CACHE_MASK;
757
758 page_cache_release(page);
759 }
760
761 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
762 UPDATE_ATIME(inode);
763 }
764
765 static ssize_t shmem_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
766 {
767 ssize_t retval;
768
769 retval = -EFAULT;
770 if (access_ok(VERIFY_WRITE, buf, count)) {
771 retval = 0;
772
773 if (count) {
774 read_descriptor_t desc;
775
776 desc.written = 0;
777 desc.count = count;
778 desc.buf = buf;
779 desc.error = 0;
780 do_shmem_file_read(filp, ppos, &desc);
781
782 retval = desc.written;
783 if (!retval)
784 retval = desc.error;
785 }
786 }
787 return retval;
788 }
789
790 static int shmem_statfs(struct super_block *sb, struct statfs *buf)
791 {
792 buf->f_type = TMPFS_MAGIC;
793 buf->f_bsize = PAGE_CACHE_SIZE;
794 spin_lock (&sb->u.shmem_sb.stat_lock);
795 buf->f_blocks = sb->u.shmem_sb.max_blocks;
796 buf->f_bavail = buf->f_bfree = sb->u.shmem_sb.free_blocks;
797 buf->f_files = sb->u.shmem_sb.max_inodes;
798 buf->f_ffree = sb->u.shmem_sb.free_inodes;
799 spin_unlock (&sb->u.shmem_sb.stat_lock);
800 buf->f_namelen = 255;
801 return 0;
802 }
803
804 /*
805 * Lookup the data. This is trivial - if the dentry didn't already
806 * exist, we know it is negative.
807 */
808 static struct dentry * shmem_lookup(struct inode *dir, struct dentry *dentry)
809 {
810 d_add(dentry, NULL);
811 return NULL;
812 }
813
814 /*
815 * File creation. Allocate an inode, and we're done..
816 */
817 static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
818 {
819 struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev);
820 int error = -ENOSPC;
821
822 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
823 if (inode) {
824 d_instantiate(dentry, inode);
825 dget(dentry); /* Extra count - pin the dentry in core */
826 error = 0;
827 }
828 return error;
829 }
830
831 static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode)
832 {
833 int error;
834
835 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
836 return error;
837 dir->i_nlink++;
838 return 0;
839 }
840
841 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
842 {
843 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
844 }
845
846 /*
847 * Link a file..
848 */
849 static int shmem_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry)
850 {
851 struct inode *inode = old_dentry->d_inode;
852
853 if (S_ISDIR(inode->i_mode))
854 return -EPERM;
855
856 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
857 inode->i_nlink++;
858 atomic_inc(&inode->i_count); /* New dentry reference */
859 dget(dentry); /* Extra pinning count for the created dentry */
860 d_instantiate(dentry, inode);
861 return 0;
862 }
863
864 static inline int shmem_positive(struct dentry *dentry)
865 {
866 return dentry->d_inode && !d_unhashed(dentry);
867 }
868
869 /*
870 * Check that a directory is empty (this works
871 * for regular files too, they'll just always be
872 * considered empty..).
873 *
874 * Note that an empty directory can still have
875 * children, they just all have to be negative..
876 */
877 static int shmem_empty(struct dentry *dentry)
878 {
879 struct list_head *list;
880
881 spin_lock(&dcache_lock);
882 list = dentry->d_subdirs.next;
883
884 while (list != &dentry->d_subdirs) {
885 struct dentry *de = list_entry(list, struct dentry, d_child);
886
887 if (shmem_positive(de)) {
888 spin_unlock(&dcache_lock);
889 return 0;
890 }
891 list = list->next;
892 }
893 spin_unlock(&dcache_lock);
894 return 1;
895 }
896
897 static int shmem_unlink(struct inode * dir, struct dentry *dentry)
898 {
899 struct inode *inode = dentry->d_inode;
900 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
901 inode->i_nlink--;
902 dput(dentry); /* Undo the count from "create" - this does all the work */
903 return 0;
904 }
905
906 static int shmem_rmdir(struct inode * dir, struct dentry *dentry)
907 {
908 if (!shmem_empty(dentry))
909 return -ENOTEMPTY;
910
911 dir->i_nlink--;
912 return shmem_unlink(dir, dentry);
913 }
914
915 /*
916 * The VFS layer already does all the dentry stuff for rename,
917 * we just have to decrement the usage count for the target if
918 * it exists so that the VFS layer correctly free's it when it
919 * gets overwritten.
920 */
921 static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry)
922 {
923 int error = -ENOTEMPTY;
924
925 if (shmem_empty(new_dentry)) {
926 struct inode *inode = new_dentry->d_inode;
927 if (inode) {
928 inode->i_ctime = CURRENT_TIME;
929 inode->i_nlink--;
930 dput(new_dentry);
931 }
932 error = 0;
933 old_dentry->d_inode->i_ctime = old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
934 }
935 return error;
936 }
937
938 static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
939 {
940 int error;
941 int len;
942 struct inode *inode;
943 struct page *page;
944 char *kaddr;
945
946 error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
947 if (error)
948 return error;
949
950 len = strlen(symname);
951 if (len > PAGE_SIZE)
952 return -ENAMETOOLONG;
953
954 inode = dentry->d_inode;
955 down(&inode->i_sem);
956 page = shmem_getpage_locked(inode, 0);
957 if (IS_ERR(page))
958 goto fail;
959 kaddr = kmap(page);
960 memcpy(kaddr, symname, len);
961 kunmap(page);
962 inode->i_size = len;
963 SetPageDirty(page);
964 UnlockPage(page);
965 page_cache_release(page);
966 up(&inode->i_sem);
967 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
968 return 0;
969 fail:
970 up(&inode->i_sem);
971 return PTR_ERR(page);
972 }
973
974 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
975 {
976 struct page * page;
977 int res = shmem_getpage(dentry->d_inode, 0, &page);
978
979 if (res)
980 return res;
981
982 res = vfs_readlink(dentry,buffer,buflen, kmap(page));
983 kunmap(page);
984 page_cache_release(page);
985 return res;
986 }
987
988 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
989 {
990 struct page * page;
991 int res = shmem_getpage(dentry->d_inode, 0, &page);
992 if (res)
993 return res;
994
995 res = vfs_follow_link(nd, kmap(page));
996 kunmap(page);
997 page_cache_release(page);
998 return res;
999 }
1000
1001 static int shmem_parse_options(char *options, int *mode, unsigned long * blocks, unsigned long *inodes)
1002 {
1003 char *this_char, *value;
1004
1005 this_char = NULL;
1006 if ( options )
1007 this_char = strtok(options,",");
1008 for ( ; this_char; this_char = strtok(NULL,",")) {
1009 if ((value = strchr(this_char,'=')) != NULL)
1010 *value++ = 0;
1011 if (!strcmp(this_char,"size")) {
1012 unsigned long long size;
1013 if (!value || !*value || !blocks)
1014 return 1;
1015 size = memparse(value,&value);
1016 if (*value)
1017 return 1;
1018 *blocks = size >> PAGE_CACHE_SHIFT;
1019 } else if (!strcmp(this_char,"nr_blocks")) {
1020 if (!value || !*value || !blocks)
1021 return 1;
1022 *blocks = memparse(value,&value);
1023 if (*value)
1024 return 1;
1025 } else if (!strcmp(this_char,"nr_inodes")) {
1026 if (!value || !*value || !inodes)
1027 return 1;
1028 *inodes = memparse(value,&value);
1029 if (*value)
1030 return 1;
1031 } else if (!strcmp(this_char,"mode")) {
1032 if (!value || !*value || !mode)
1033 return 1;
1034 *mode = simple_strtoul(value,&value,8);
1035 if (*value)
1036 return 1;
1037 }
1038 else
1039 return 1;
1040 }
1041 return 0;
1042 }
1043
1044 static int shmem_remount_fs (struct super_block *sb, int *flags, char *data)
1045 {
1046 struct shmem_sb_info *info = &sb->u.shmem_sb;
1047 unsigned long max_blocks = info->max_blocks;
1048 unsigned long max_inodes = info->max_inodes;
1049
1050 if (shmem_parse_options (data, NULL, &max_blocks, &max_inodes))
1051 return -EINVAL;
1052 return shmem_set_size(info, max_blocks, max_inodes);
1053 }
1054
1055 int shmem_sync_file(struct file * file, struct dentry *dentry, int datasync)
1056 {
1057 return 0;
1058 }
1059 #endif
1060
1061 static struct super_block *shmem_read_super(struct super_block * sb, void * data, int silent)
1062 {
1063 struct inode * inode;
1064 struct dentry * root;
1065 unsigned long blocks, inodes;
1066 int mode = S_IRWXUGO | S_ISVTX;
1067 struct sysinfo si;
1068
1069 /*
1070 * Per default we only allow half of the physical ram per
1071 * tmpfs instance
1072 */
1073 si_meminfo(&si);
1074 blocks = inodes = si.totalram / 2;
1075
1076 #ifdef CONFIG_TMPFS
1077 if (shmem_parse_options (data, &mode, &blocks, &inodes)) {
1078 printk(KERN_ERR "tmpfs invalid option\n");
1079 return NULL;
1080 }
1081 #endif
1082
1083 spin_lock_init (&sb->u.shmem_sb.stat_lock);
1084 sb->u.shmem_sb.max_blocks = blocks;
1085 sb->u.shmem_sb.free_blocks = blocks;
1086 sb->u.shmem_sb.max_inodes = inodes;
1087 sb->u.shmem_sb.free_inodes = inodes;
1088 sb->s_maxbytes = (unsigned long long)SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT;
1089 sb->s_blocksize = PAGE_CACHE_SIZE;
1090 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1091 sb->s_magic = TMPFS_MAGIC;
1092 sb->s_op = &shmem_ops;
1093 inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1094 if (!inode)
1095 return NULL;
1096
1097 root = d_alloc_root(inode);
1098 if (!root) {
1099 iput(inode);
1100 return NULL;
1101 }
1102 sb->s_root = root;
1103 return sb;
1104 }
1105
1106
1107
1108 static struct address_space_operations shmem_aops = {
1109 writepage: shmem_writepage
1110 };
1111
1112 static struct file_operations shmem_file_operations = {
1113 mmap: shmem_mmap,
1114 #ifdef CONFIG_TMPFS
1115 read: shmem_file_read,
1116 write: shmem_file_write,
1117 fsync: shmem_sync_file,
1118 #endif
1119 };
1120
1121 static struct inode_operations shmem_inode_operations = {
1122 truncate: shmem_truncate,
1123 };
1124
1125 static struct inode_operations shmem_symlink_inode_operations = {
1126 truncate: shmem_truncate,
1127 #ifdef CONFIG_TMPFS
1128 readlink: shmem_readlink,
1129 follow_link: shmem_follow_link,
1130 #endif
1131 };
1132
1133 static struct file_operations shmem_dir_operations = {
1134 read: generic_read_dir,
1135 readdir: dcache_readdir,
1136 #ifdef CONFIG_TMPFS
1137 fsync: shmem_sync_file,
1138 #endif
1139 };
1140
1141 static struct inode_operations shmem_dir_inode_operations = {
1142 #ifdef CONFIG_TMPFS
1143 create: shmem_create,
1144 lookup: shmem_lookup,
1145 link: shmem_link,
1146 unlink: shmem_unlink,
1147 symlink: shmem_symlink,
1148 mkdir: shmem_mkdir,
1149 rmdir: shmem_rmdir,
1150 mknod: shmem_mknod,
1151 rename: shmem_rename,
1152 #endif
1153 };
1154
1155 static struct super_operations shmem_ops = {
1156 #ifdef CONFIG_TMPFS
1157 statfs: shmem_statfs,
1158 remount_fs: shmem_remount_fs,
1159 #endif
1160 delete_inode: shmem_delete_inode,
1161 put_inode: force_delete,
1162 };
1163
1164 static struct vm_operations_struct shmem_vm_ops = {
1165 nopage: shmem_nopage,
1166 };
1167
1168 #ifdef CONFIG_TMPFS
1169 /* type "shm" will be tagged obsolete in 2.5 */
1170 static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
1171 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
1172 #else
1173 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
1174 #endif
1175 static struct vfsmount *shm_mnt;
1176
1177 static int __init init_shmem_fs(void)
1178 {
1179 int error;
1180 struct vfsmount * res;
1181
1182 if ((error = register_filesystem(&tmpfs_fs_type))) {
1183 printk (KERN_ERR "Could not register tmpfs\n");
1184 return error;
1185 }
1186 #ifdef CONFIG_TMPFS
1187 if ((error = register_filesystem(&shmem_fs_type))) {
1188 printk (KERN_ERR "Could not register shm fs\n");
1189 return error;
1190 }
1191 devfs_mk_dir (NULL, "shm", NULL);
1192 #endif
1193 res = kern_mount(&tmpfs_fs_type);
1194 if (IS_ERR (res)) {
1195 printk (KERN_ERR "could not kern_mount tmpfs\n");
1196 unregister_filesystem(&tmpfs_fs_type);
1197 return PTR_ERR(res);
1198 }
1199 shm_mnt = res;
1200
1201 /* The internal instance should not do size checking */
1202 if ((error = shmem_set_size(&res->mnt_sb->u.shmem_sb, ULONG_MAX, ULONG_MAX)))
1203 printk (KERN_ERR "could not set limits on internal tmpfs\n");
1204
1205 return 0;
1206 }
1207
1208 static void __exit exit_shmem_fs(void)
1209 {
1210 #ifdef CONFIG_TMPFS
1211 unregister_filesystem(&shmem_fs_type);
1212 #endif
1213 unregister_filesystem(&tmpfs_fs_type);
1214 mntput(shm_mnt);
1215 }
1216
1217 module_init(init_shmem_fs)
1218 module_exit(exit_shmem_fs)
1219
1220 static int shmem_clear_swp (swp_entry_t entry, swp_entry_t *ptr, int size) {
1221 swp_entry_t *test;
1222
1223 for (test = ptr; test < ptr + size; test++) {
1224 if (test->val == entry.val) {
1225 swap_free (entry);
1226 *test = (swp_entry_t) {0};
1227 return test - ptr;
1228 }
1229 }
1230 return -1;
1231 }
1232
1233 static int shmem_unuse_inode (struct inode *inode, swp_entry_t entry, struct page *page)
1234 {
1235 swp_entry_t **base, **ptr;
1236 unsigned long idx;
1237 int offset;
1238 struct shmem_inode_info *info = &inode->u.shmem_i;
1239
1240 idx = 0;
1241 spin_lock (&info->lock);
1242 if ((offset = shmem_clear_swp (entry,info->i_direct, SHMEM_NR_DIRECT)) >= 0)
1243 goto found;
1244
1245 idx = SHMEM_NR_DIRECT;
1246 if (!(base = info->i_indirect))
1247 goto out;
1248
1249 for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) {
1250 if (*ptr &&
1251 (offset = shmem_clear_swp (entry, *ptr, ENTRIES_PER_PAGE)) >= 0)
1252 goto found;
1253 idx += ENTRIES_PER_PAGE;
1254 }
1255 out:
1256 spin_unlock (&info->lock);
1257 return 0;
1258 found:
1259 add_to_page_cache(page, inode->i_mapping, offset + idx);
1260 SetPageDirty(page);
1261 SetPageUptodate(page);
1262 UnlockPage(page);
1263 info->swapped--;
1264 spin_unlock(&info->lock);
1265 return 1;
1266 }
1267
1268 /*
1269 * unuse_shmem() search for an eventually swapped out shmem page.
1270 */
1271 void shmem_unuse(swp_entry_t entry, struct page *page)
1272 {
1273 struct list_head *p;
1274 struct inode * inode;
1275
1276 spin_lock (&shmem_ilock);
1277 list_for_each(p, &shmem_inodes) {
1278 inode = list_entry(p, struct inode, u.shmem_i.list);
1279
1280 if (shmem_unuse_inode(inode, entry, page))
1281 break;
1282 }
1283 spin_unlock (&shmem_ilock);
1284 }
1285
1286
1287 /*
1288 * shmem_file_setup - get an unlinked file living in shmem fs
1289 *
1290 * @name: name for dentry (to be seen in /proc/<pid>/maps
1291 * @size: size to be set for the file
1292 *
1293 */
1294 struct file *shmem_file_setup(char * name, loff_t size)
1295 {
1296 int error;
1297 struct file *file;
1298 struct inode * inode;
1299 struct dentry *dentry, *root;
1300 struct qstr this;
1301 int vm_enough_memory(long pages);
1302
1303 if (size > (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT)
1304 return ERR_PTR(-EINVAL);
1305
1306 if (!vm_enough_memory((size) >> PAGE_SHIFT))
1307 return ERR_PTR(-ENOMEM);
1308
1309 this.name = name;
1310 this.len = strlen(name);
1311 this.hash = 0; /* will go */
1312 root = shm_mnt->mnt_root;
1313 dentry = d_alloc(root, &this);
1314 if (!dentry)
1315 return ERR_PTR(-ENOMEM);
1316
1317 error = -ENFILE;
1318 file = get_empty_filp();
1319 if (!file)
1320 goto put_dentry;
1321
1322 error = -ENOSPC;
1323 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1324 if (!inode)
1325 goto close_file;
1326
1327 d_instantiate(dentry, inode);
1328 dentry->d_inode->i_size = size;
1329 shmem_truncate(inode);
1330 file->f_vfsmnt = mntget(shm_mnt);
1331 file->f_dentry = dentry;
1332 file->f_op = &shmem_file_operations;
1333 file->f_mode = FMODE_WRITE | FMODE_READ;
1334 inode->i_nlink = 0; /* It is unlinked */
1335 return(file);
1336
1337 close_file:
1338 put_filp(file);
1339 put_dentry:
1340 dput (dentry);
1341 return ERR_PTR(error);
1342 }
1343 /*
1344 * shmem_zero_setup - setup a shared anonymous mapping
1345 *
1346 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1347 */
1348 int shmem_zero_setup(struct vm_area_struct *vma)
1349 {
1350 struct file *file;
1351 loff_t size = vma->vm_end - vma->vm_start;
1352
1353 file = shmem_file_setup("dev/zero", size);
1354 if (IS_ERR(file))
1355 return PTR_ERR(file);
1356
1357 if (vma->vm_file)
1358 fput (vma->vm_file);
1359 vma->vm_file = file;
1360 vma->vm_ops = &shmem_vm_ops;
1361 return 0;
1362 }
1363
1364 EXPORT_SYMBOL(shmem_file_setup);
1365