File: /usr/src/linux/mm/shmem.c

1     /*
2      * Resizable virtual memory filesystem for Linux.
3      *
4      * Copyright (C) 2000 Linus Torvalds.
5      *		 2000 Transmeta Corp.
6      *		 2000-2001 Christoph Rohland
7      *		 2000-2001 SAP AG
8      * 
9      * This file is released under the GPL.
10      */
11     
12     /*
13      * This virtual memory filesystem is heavily based on the ramfs. It
14      * extends ramfs by the ability to use swap and honor resource limits
15      * which makes it a completely usable filesystem.
16      */
17     
18     #include <linux/config.h>
19     #include <linux/module.h>
20     #include <linux/init.h>
21     #include <linux/devfs_fs_kernel.h>
22     #include <linux/fs.h>
23     #include <linux/mm.h>
24     #include <linux/file.h>
25     #include <linux/swap.h>
26     #include <linux/pagemap.h>
27     #include <linux/string.h>
28     #include <linux/locks.h>
29     #include <asm/smplock.h>
30     
31     #include <asm/uaccess.h>
32     
33     /* This magic number is used in glibc for posix shared memory */
34     #define TMPFS_MAGIC	0x01021994
35     
36     #define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
37     #define SHMEM_MAX_BLOCKS (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
38     
39     static struct super_operations shmem_ops;
40     static struct address_space_operations shmem_aops;
41     static struct file_operations shmem_file_operations;
42     static struct inode_operations shmem_inode_operations;
43     static struct file_operations shmem_dir_operations;
44     static struct inode_operations shmem_dir_inode_operations;
45     static struct inode_operations shmem_symlink_inode_operations;
46     static struct vm_operations_struct shmem_vm_ops;
47     
48     LIST_HEAD (shmem_inodes);
49     static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
50     
51     #define BLOCKS_PER_PAGE (PAGE_SIZE/512)
52     
53     /*
54      * shmem_recalc_inode - recalculate the size of an inode
55      *
56      * @inode: inode to recalc
57      *
58      * We have to calculate the free blocks since the mm can drop pages
59      * behind our back
60      *
61      * But we know that normally
62      * inodes->i_blocks/BLOCKS_PER_PAGE == 
63      * 			inode->i_mapping->nrpages + info->swapped
64      *
65      * So the mm freed 
66      * inodes->i_blocks/BLOCKS_PER_PAGE - 
67      *			(inode->i_mapping->nrpages + info->swapped)
68      *
69      * It has to be called with the spinlock held.
70      */
71     
72     static void shmem_recalc_inode(struct inode * inode)
73     {
74     	unsigned long freed;
75     
76     	freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
77     		(inode->i_mapping->nrpages + inode->u.shmem_i.swapped);
78     	if (freed){
79     		struct shmem_sb_info * info = &inode->i_sb->u.shmem_sb;
80     		inode->i_blocks -= freed*BLOCKS_PER_PAGE;
81     		spin_lock (&info->stat_lock);
82     		info->free_blocks += freed;
83     		spin_unlock (&info->stat_lock);
84     	}
85     }
86     
87     static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index) 
88     {
89     	unsigned long offset;
90     
91     	if (index < SHMEM_NR_DIRECT)
92     		return info->i_direct+index;
93     
94     	index -= SHMEM_NR_DIRECT;
95     	offset = index % ENTRIES_PER_PAGE;
96     	index /= ENTRIES_PER_PAGE;
97     
98     	if (index >= ENTRIES_PER_PAGE)
99     		return ERR_PTR(-EFBIG);
100     
101     	if (!info->i_indirect) {
102     		info->i_indirect = (swp_entry_t **) get_zeroed_page(GFP_USER);
103     		if (!info->i_indirect)
104     			return ERR_PTR(-ENOMEM);
105     	}
106     	if(!(info->i_indirect[index])) {
107     		info->i_indirect[index] = (swp_entry_t *) get_zeroed_page(GFP_USER);
108     		if (!info->i_indirect[index])
109     			return ERR_PTR(-ENOMEM);
110     	}
111     	
112     	return info->i_indirect[index]+offset;
113     }
114     
115     static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
116     {
117     	swp_entry_t *ptr, entry;
118     	int freed = 0;
119     
120     	for (ptr = dir; ptr < dir + count; ptr++) {
121     		if (!ptr->val)
122     			continue;
123     		entry = *ptr;
124     		*ptr = (swp_entry_t){0};
125     		freed++;
126     
127     		/* vmscan will do the actual page freeing later.. */
128     		swap_free (entry);
129     	}
130     	return freed;
131     }
132     
133     /*
134      * shmem_truncate_part - free a bunch of swap entries
135      *
136      * @dir:	pointer to swp_entries 
137      * @size:	number of entries in dir
138      * @start:	offset to start from
139      * @freed:	counter for freed pages
140      *
141      * It frees the swap entries from dir+start til dir+size
142      *
143      * returns 0 if it truncated something, else (offset-size)
144      */
145     
146     static unsigned long 
147     shmem_truncate_part (swp_entry_t * dir, unsigned long size, 
148     		     unsigned long start, unsigned long *freed) {
149     	if (start > size)
150     		return start - size;
151     	if (dir)
152     		*freed += shmem_free_swp (dir+start, size-start);
153     	
154     	return 0;
155     }
156     
157     static void shmem_truncate (struct inode * inode)
158     {
159     	int clear_base;
160     	unsigned long index, start;
161     	unsigned long freed = 0;
162     	swp_entry_t **base, **ptr, **last;
163     	struct shmem_inode_info * info = &inode->u.shmem_i;
164     
165     	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
166     	spin_lock (&info->lock);
167     	index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
168     	if (index > info->max_index)
169     		goto out;
170     
171     	start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, index, &freed);
172     
173     	if (!(base = info->i_indirect))
174     		goto out;
175     
176     	clear_base = 1;
177     	last = base + ((info->max_index - SHMEM_NR_DIRECT + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE);
178     	for (ptr = base; ptr < last; ptr++) {
179     		if (!start) {
180     			if (!*ptr)
181     				continue;
182     			freed += shmem_free_swp (*ptr, ENTRIES_PER_PAGE);
183     			free_page ((unsigned long) *ptr);
184     			*ptr = 0;
185     			continue;
186     		}
187     		clear_base = 0;
188     		start = shmem_truncate_part (*ptr, ENTRIES_PER_PAGE, start, &freed);
189     	}
190     
191     	if (clear_base) {
192     		free_page ((unsigned long)base);
193     		info->i_indirect = 0;
194     	}
195     
196     out:
197     	/*
198     	 * We have no chance to give an error, so we limit it to max
199     	 * size here and the application will fail later
200     	 */
201     	if (index > SHMEM_MAX_BLOCKS) 
202     		info->max_index = SHMEM_MAX_BLOCKS;
203     	else
204     		info->max_index = index;
205     	info->swapped -= freed;
206     	shmem_recalc_inode(inode);
207     	spin_unlock (&info->lock);
208     }
209     
210     static void shmem_delete_inode(struct inode * inode)
211     {
212     	struct shmem_sb_info *info = &inode->i_sb->u.shmem_sb;
213     
214     	spin_lock (&shmem_ilock);
215     	list_del (&inode->u.shmem_i.list);
216     	spin_unlock (&shmem_ilock);
217     	inode->i_size = 0;
218     	shmem_truncate (inode);
219     	spin_lock (&info->stat_lock);
220     	info->free_inodes++;
221     	spin_unlock (&info->stat_lock);
222     	clear_inode(inode);
223     }
224     
225     /*
226      * Move the page from the page cache to the swap cache.
227      *
228      * The page lock prevents multiple occurences of shmem_writepage at
229      * once.  We still need to guard against racing with
230      * shmem_getpage_locked().  
231      */
232     static int shmem_writepage(struct page * page)
233     {
234     	int error;
235     	struct shmem_inode_info *info;
236     	swp_entry_t *entry, swap;
237     	struct address_space *mapping;
238     	unsigned long index;
239     	struct inode *inode;
240     
241     	if (!PageLocked(page))
242     		BUG();
243     
244     	mapping = page->mapping;
245     	index = page->index;
246     	inode = mapping->host;
247     	info = &inode->u.shmem_i;
248     
249     	spin_lock(&info->lock);
250     	entry = shmem_swp_entry(info, index);
251     	if (IS_ERR(entry))	/* this had been allocated on page allocation */
252     		BUG();
253     	shmem_recalc_inode(inode);
254     	if (entry->val)
255     		BUG();
256     
257     	/* Remove it from the page cache */
258     	lru_cache_del(page);
259     	remove_inode_page(page);
260     
261     	swap_list_lock();
262     	swap = get_swap_page();
263     
264     	if (!swap.val) {
265     		swap_list_unlock();
266     		/* Add it back to the page cache */
267     		add_to_page_cache_locked(page, mapping, index);
268     		activate_page(page);
269     		SetPageDirty(page);
270     		error = -ENOMEM;
271     		goto out;
272     	}
273     
274     	/* Add it to the swap cache */
275     	add_to_swap_cache(page, swap);
276     	swap_list_unlock();
277     
278     	set_page_dirty(page);
279     	info->swapped++;
280     	*entry = swap;
281     	error = 0;
282     out:
283     	spin_unlock(&info->lock);
284     	UnlockPage(page);
285     	page_cache_release(page);
286     	return error;
287     }
288     
289     /*
290      * shmem_getpage_locked - either get the page from swap or allocate a new one
291      *
292      * If we allocate a new one we do not mark it dirty. That's up to the
293      * vm. If we swap it in we mark it dirty since we also free the swap
294      * entry since a page cannot live in both the swap and page cache
295      *
296      * Called with the inode locked, so it cannot race with itself, but we
297      * still need to guard against racing with shm_writepage(), which might
298      * be trying to move the page to the swap cache as we run.
299      */
300     static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
301     {
302     	struct address_space * mapping = inode->i_mapping;
303     	struct shmem_inode_info *info;
304     	struct page * page;
305     	swp_entry_t *entry;
306     
307     	info = &inode->u.shmem_i;
308     
309     repeat:
310     	page = find_lock_page(mapping, idx);
311     	if (page)
312     		return page;
313     
314     	entry = shmem_swp_entry (info, idx);
315     	if (IS_ERR(entry))
316     		return (void *)entry;
317     
318     	spin_lock (&info->lock);
319     	
320     	/* The shmem_swp_entry() call may have blocked, and
321     	 * shmem_writepage may have been moving a page between the page
322     	 * cache and swap cache.  We need to recheck the page cache
323     	 * under the protection of the info->lock spinlock. */
324     
325     	page = find_get_page(mapping, idx);
326     	if (page) {
327     		if (TryLockPage(page))
328     			goto wait_retry;
329     		spin_unlock (&info->lock);
330     		return page;
331     	}
332     	
333     	shmem_recalc_inode(inode);
334     	if (entry->val) {
335     		unsigned long flags;
336     
337     		/* Look it up and read it in.. */
338     		page = find_get_page(&swapper_space, entry->val);
339     		if (!page) {
340     			swp_entry_t swap = *entry;
341     			spin_unlock (&info->lock);
342     			lock_kernel();
343     			swapin_readahead(*entry);
344     			page = read_swap_cache_async(*entry);
345     			unlock_kernel();
346     			if (!page) {
347     				if (entry->val != swap.val)
348     					goto repeat;
349     				return ERR_PTR(-ENOMEM);
350     			}
351     			wait_on_page(page);
352     			if (!Page_Uptodate(page) && entry->val == swap.val) {
353     				page_cache_release(page);
354     				return ERR_PTR(-EIO);
355     			}
356     			
357     			/* Too bad we can't trust this page, because we
358     			 * dropped the info->lock spinlock */
359     			page_cache_release(page);
360     			goto repeat;
361     		}
362     
363     		/* We have to this with page locked to prevent races */
364     		if (TryLockPage(page)) 
365     			goto wait_retry;
366     
367     		swap_free(*entry);
368     		*entry = (swp_entry_t) {0};
369     		delete_from_swap_cache(page);
370     		flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1);
371     		page->flags = flags | (1 << PG_dirty);
372     		add_to_page_cache_locked(page, mapping, idx);
373     		info->swapped--;
374     		spin_unlock (&info->lock);
375     	} else {
376     		spin_unlock (&info->lock);
377     		spin_lock (&inode->i_sb->u.shmem_sb.stat_lock);
378     		if (inode->i_sb->u.shmem_sb.free_blocks == 0)
379     			goto no_space;
380     		inode->i_sb->u.shmem_sb.free_blocks--;
381     		spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
382     
383     		/* Ok, get a new page.  We don't have to worry about the
384     		 * info->lock spinlock here: we cannot race against
385     		 * shm_writepage because we have already verified that
386     		 * there is no page present either in memory or in the
387     		 * swap cache, so we are guaranteed to be populating a
388     		 * new shm entry.  The inode semaphore we already hold
389     		 * is enough to make this atomic. */
390     		page = page_cache_alloc(mapping);
391     		if (!page)
392     			return ERR_PTR(-ENOMEM);
393     		clear_highpage(page);
394     		inode->i_blocks += BLOCKS_PER_PAGE;
395     		add_to_page_cache (page, mapping, idx);
396     	}
397     
398     	/* We have the page */
399     	SetPageUptodate(page);
400     	if (info->locked)
401     		page_cache_get(page);
402     	return page;
403     no_space:
404     	spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
405     	return ERR_PTR(-ENOSPC);
406     
407     wait_retry:
408     	spin_unlock (&info->lock);
409     	wait_on_page(page);
410     	page_cache_release(page);
411     	goto repeat;
412     }
413     
414     static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
415     {
416     	int error;
417     
418     	down (&inode->i_sem);
419     	if (inode->i_size <= (loff_t) idx * PAGE_CACHE_SIZE)
420     		goto sigbus;
421     	*ptr = shmem_getpage_locked(inode, idx);
422     	if (IS_ERR (*ptr))
423     		goto failed;
424     	UnlockPage(*ptr);
425     	up (&inode->i_sem);
426     	return 0;
427     failed:
428     	up (&inode->i_sem);
429     	error = PTR_ERR(*ptr);
430     	*ptr = NOPAGE_OOM;
431     	if (error != -EFBIG)
432     		*ptr = NOPAGE_SIGBUS;
433     	return error;
434     sigbus:
435     	up (&inode->i_sem);
436     	*ptr = NOPAGE_SIGBUS;
437     	return -EFAULT;
438     }
439     
440     struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
441     {
442     	struct page * page;
443     	unsigned int idx;
444     	struct inode * inode = vma->vm_file->f_dentry->d_inode;
445     
446     	idx = (address - vma->vm_start) >> PAGE_SHIFT;
447     	idx += vma->vm_pgoff;
448     
449     	if (shmem_getpage(inode, idx, &page))
450     		return page;
451     
452     	if (no_share) {
453     		struct page *new_page = page_cache_alloc(inode->i_mapping);
454     
455     		if (new_page) {
456     			copy_user_highpage(new_page, page, address);
457     			flush_page_to_ram(new_page);
458     		} else
459     			new_page = NOPAGE_OOM;
460     		page_cache_release(page);
461     		return new_page;
462     	}
463     
464     	flush_page_to_ram (page);
465     	return(page);
466     }
467     
468     void shmem_lock(struct file * file, int lock)
469     {
470     	struct inode * inode = file->f_dentry->d_inode;
471     	struct shmem_inode_info * info = &inode->u.shmem_i;
472     	struct page * page;
473     	unsigned long idx, size;
474     
475     	if (info->locked == lock)
476     		return;
477     	down(&inode->i_sem);
478     	info->locked = lock;
479     	size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
480     	for (idx = 0; idx < size; idx++) {
481     		page = find_lock_page(inode->i_mapping, idx);
482     		if (!page)
483     			continue;
484     		if (!lock) {
485     			/* release the extra count and our reference */
486     			page_cache_release(page);
487     			page_cache_release(page);
488     		}
489     		UnlockPage(page);
490     	}
491     	up(&inode->i_sem);
492     }
493     
494     static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
495     {
496     	struct vm_operations_struct * ops;
497     	struct inode *inode = file->f_dentry->d_inode;
498     
499     	ops = &shmem_vm_ops;
500     	if (!inode->i_sb || !S_ISREG(inode->i_mode))
501     		return -EACCES;
502     	UPDATE_ATIME(inode);
503     	vma->vm_ops = ops;
504     	return 0;
505     }
506     
507     struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
508     {
509     	struct inode * inode;
510     
511     	spin_lock (&sb->u.shmem_sb.stat_lock);
512     	if (!sb->u.shmem_sb.free_inodes) {
513     		spin_unlock (&sb->u.shmem_sb.stat_lock);
514     		return NULL;
515     	}
516     	sb->u.shmem_sb.free_inodes--;
517     	spin_unlock (&sb->u.shmem_sb.stat_lock);
518     
519     	inode = new_inode(sb);
520     	if (inode) {
521     		inode->i_mode = mode;
522     		inode->i_uid = current->fsuid;
523     		inode->i_gid = current->fsgid;
524     		inode->i_blksize = PAGE_CACHE_SIZE;
525     		inode->i_blocks = 0;
526     		inode->i_rdev = NODEV;
527     		inode->i_mapping->a_ops = &shmem_aops;
528     		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
529     		spin_lock_init (&inode->u.shmem_i.lock);
530     		switch (mode & S_IFMT) {
531     		default:
532     			init_special_inode(inode, mode, dev);
533     			break;
534     		case S_IFREG:
535     			inode->i_op = &shmem_inode_operations;
536     			inode->i_fop = &shmem_file_operations;
537     			break;
538     		case S_IFDIR:
539     			inode->i_nlink++;
540     			inode->i_op = &shmem_dir_inode_operations;
541     			inode->i_fop = &shmem_dir_operations;
542     			break;
543     		case S_IFLNK:
544     			inode->i_op = &shmem_symlink_inode_operations;
545     			break;
546     		}
547     		spin_lock (&shmem_ilock);
548     		list_add (&inode->u.shmem_i.list, &shmem_inodes);
549     		spin_unlock (&shmem_ilock);
550     	}
551     	return inode;
552     }
553     
554     static int shmem_set_size(struct shmem_sb_info *info,
555     			  unsigned long max_blocks, unsigned long max_inodes)
556     {
557     	int error;
558     	unsigned long blocks, inodes;
559     
560     	spin_lock(&info->stat_lock);
561     	blocks = info->max_blocks - info->free_blocks;
562     	inodes = info->max_inodes - info->free_inodes;
563     	error = -EINVAL;
564     	if (max_blocks < blocks)
565     		goto out;
566     	if (max_inodes < inodes)
567     		goto out;
568     	error = 0;
569     	info->max_blocks  = max_blocks;
570     	info->free_blocks = max_blocks - blocks;
571     	info->max_inodes  = max_inodes;
572     	info->free_inodes = max_inodes - inodes;
573     out:
574     	spin_unlock(&info->stat_lock);
575     	return error;
576     }
577     
578     #ifdef CONFIG_TMPFS
579     static ssize_t
580     shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
581     {
582     	struct inode	*inode = file->f_dentry->d_inode; 
583     	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
584     	loff_t		pos;
585     	struct page	*page;
586     	unsigned long	written;
587     	long		status;
588     	int		err;
589     
590     
591     	down(&inode->i_sem);
592     
593     	pos = *ppos;
594     	err = -EINVAL;
595     	if (pos < 0)
596     		goto out;
597     
598     	err = file->f_error;
599     	if (err) {
600     		file->f_error = 0;
601     		goto out;
602     	}
603     
604     	written = 0;
605     
606     	if (file->f_flags & O_APPEND)
607     		pos = inode->i_size;
608     
609     	/*
610     	 * Check whether we've reached the file size limit.
611     	 */
612     	err = -EFBIG;
613     	if (limit != RLIM_INFINITY) {
614     		if (pos >= limit) {
615     			send_sig(SIGXFSZ, current, 0);
616     			goto out;
617     		}
618     		if (count > limit - pos) {
619     			send_sig(SIGXFSZ, current, 0);
620     			count = limit - pos;
621     		}
622     	}
623     
624     	status	= 0;
625     	if (count) {
626     		remove_suid(inode);
627     		inode->i_ctime = inode->i_mtime = CURRENT_TIME;
628     	}
629     
630     	while (count) {
631     		unsigned long bytes, index, offset;
632     		char *kaddr;
633     		int deactivate = 1;
634     
635     		/*
636     		 * Try to find the page in the cache. If it isn't there,
637     		 * allocate a free page.
638     		 */
639     		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
640     		index = pos >> PAGE_CACHE_SHIFT;
641     		bytes = PAGE_CACHE_SIZE - offset;
642     		if (bytes > count) {
643     			bytes = count;
644     			deactivate = 0;
645     		}
646     
647     		/*
648     		 * Bring in the user page that we will copy from _first_.
649     		 * Otherwise there's a nasty deadlock on copying from the
650     		 * same page as we're writing to, without it being marked
651     		 * up-to-date.
652     		 */
653     		{ volatile unsigned char dummy;
654     			__get_user(dummy, buf);
655     			__get_user(dummy, buf+bytes-1);
656     		}
657     
658     		page = shmem_getpage_locked(inode, index);
659     		status = PTR_ERR(page);
660     		if (IS_ERR(page))
661     			break;
662     
663     		/* We have exclusive IO access to the page.. */
664     		if (!PageLocked(page)) {
665     			PAGE_BUG(page);
666     		}
667     
668     		kaddr = kmap(page);
669     // can this do a truncated write? cr
670     		status = copy_from_user(kaddr+offset, buf, bytes);
671     		kunmap(page);
672     		if (status)
673     			goto fail_write;
674     
675     		flush_dcache_page(page);
676     		if (bytes > 0) {
677     			SetPageDirty(page);
678     			written += bytes;
679     			count -= bytes;
680     			pos += bytes;
681     			buf += bytes;
682     			if (pos > inode->i_size) 
683     				inode->i_size = pos;
684     			if (inode->u.shmem_i.max_index <= index)
685     				inode->u.shmem_i.max_index = index+1;
686     
687     		}
688     unlock:
689     		/* Mark it unlocked again and drop the page.. */
690     		UnlockPage(page);
691     		if (deactivate)
692     			deactivate_page(page);
693     		page_cache_release(page);
694     
695     		if (status < 0)
696     			break;
697     	}
698     	*ppos = pos;
699     
700     	err = written ? written : status;
701     out:
702     	up(&inode->i_sem);
703     	return err;
704     fail_write:
705     	status = -EFAULT;
706     	ClearPageUptodate(page);
707     	kunmap(page);
708     	goto unlock;
709     }
710     
711     static void do_shmem_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc)
712     {
713     	struct inode *inode = filp->f_dentry->d_inode;
714     	struct address_space *mapping = inode->i_mapping;
715     	unsigned long index, offset;
716     	int nr = 1;
717     
718     	index = *ppos >> PAGE_CACHE_SHIFT;
719     	offset = *ppos & ~PAGE_CACHE_MASK;
720     
721     	while (nr && desc->count) {
722     		struct page *page;
723     		unsigned long end_index, nr;
724     
725     		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
726     		if (index > end_index)
727     			break;
728     		nr = PAGE_CACHE_SIZE;
729     		if (index == end_index) {
730     			nr = inode->i_size & ~PAGE_CACHE_MASK;
731     			if (nr <= offset)
732     				break;
733     		}
734     
735     		nr = nr - offset;
736     
737     		if ((desc->error = shmem_getpage(inode, index, &page)))
738     			break;
739     
740     		if (mapping->i_mmap_shared != NULL)
741     			flush_dcache_page(page);
742     
743     		/*
744     		 * Ok, we have the page, and it's up-to-date, so
745     		 * now we can copy it to user space...
746     		 *
747     		 * The actor routine returns how many bytes were actually used..
748     		 * NOTE! This may not be the same as how much of a user buffer
749     		 * we filled up (we may be padding etc), so we can only update
750     		 * "pos" here (the actor routine has to update the user buffer
751     		 * pointers and the remaining count).
752     		 */
753     		nr = file_read_actor(desc, page, offset, nr);
754     		offset += nr;
755     		index += offset >> PAGE_CACHE_SHIFT;
756     		offset &= ~PAGE_CACHE_MASK;
757     	
758     		page_cache_release(page);
759     	}
760     
761     	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
762     	UPDATE_ATIME(inode);
763     }
764     
765     static ssize_t shmem_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
766     {
767     	ssize_t retval;
768     
769     	retval = -EFAULT;
770     	if (access_ok(VERIFY_WRITE, buf, count)) {
771     		retval = 0;
772     
773     		if (count) {
774     			read_descriptor_t desc;
775     
776     			desc.written = 0;
777     			desc.count = count;
778     			desc.buf = buf;
779     			desc.error = 0;
780     			do_shmem_file_read(filp, ppos, &desc);
781     
782     			retval = desc.written;
783     			if (!retval)
784     				retval = desc.error;
785     		}
786     	}
787     	return retval;
788     }
789     
790     static int shmem_statfs(struct super_block *sb, struct statfs *buf)
791     {
792     	buf->f_type = TMPFS_MAGIC;
793     	buf->f_bsize = PAGE_CACHE_SIZE;
794     	spin_lock (&sb->u.shmem_sb.stat_lock);
795     	buf->f_blocks = sb->u.shmem_sb.max_blocks;
796     	buf->f_bavail = buf->f_bfree = sb->u.shmem_sb.free_blocks;
797     	buf->f_files = sb->u.shmem_sb.max_inodes;
798     	buf->f_ffree = sb->u.shmem_sb.free_inodes;
799     	spin_unlock (&sb->u.shmem_sb.stat_lock);
800     	buf->f_namelen = 255;
801     	return 0;
802     }
803     
804     /*
805      * Lookup the data. This is trivial - if the dentry didn't already
806      * exist, we know it is negative.
807      */
808     static struct dentry * shmem_lookup(struct inode *dir, struct dentry *dentry)
809     {
810     	d_add(dentry, NULL);
811     	return NULL;
812     }
813     
814     /*
815      * File creation. Allocate an inode, and we're done..
816      */
817     static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
818     {
819     	struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev);
820     	int error = -ENOSPC;
821     
822     	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
823     	if (inode) {
824     		d_instantiate(dentry, inode);
825     		dget(dentry); /* Extra count - pin the dentry in core */
826     		error = 0;
827     	}
828     	return error;
829     }
830     
831     static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode)
832     {
833     	int error;
834     
835     	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
836     		return error;
837     	dir->i_nlink++;
838     	return 0;
839     }
840     
841     static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
842     {
843     	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
844     }
845     
846     /*
847      * Link a file..
848      */
849     static int shmem_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry)
850     {
851     	struct inode *inode = old_dentry->d_inode;
852     
853     	if (S_ISDIR(inode->i_mode))
854     		return -EPERM;
855     
856     	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
857     	inode->i_nlink++;
858     	atomic_inc(&inode->i_count);	/* New dentry reference */
859     	dget(dentry);		/* Extra pinning count for the created dentry */
860     	d_instantiate(dentry, inode);
861     	return 0;
862     }
863     
864     static inline int shmem_positive(struct dentry *dentry)
865     {
866     	return dentry->d_inode && !d_unhashed(dentry);
867     }
868     
869     /*
870      * Check that a directory is empty (this works
871      * for regular files too, they'll just always be
872      * considered empty..).
873      *
874      * Note that an empty directory can still have
875      * children, they just all have to be negative..
876      */
877     static int shmem_empty(struct dentry *dentry)
878     {
879     	struct list_head *list;
880     
881     	spin_lock(&dcache_lock);
882     	list = dentry->d_subdirs.next;
883     
884     	while (list != &dentry->d_subdirs) {
885     		struct dentry *de = list_entry(list, struct dentry, d_child);
886     
887     		if (shmem_positive(de)) {
888     			spin_unlock(&dcache_lock);
889     			return 0;
890     		}
891     		list = list->next;
892     	}
893     	spin_unlock(&dcache_lock);
894     	return 1;
895     }
896     
897     static int shmem_unlink(struct inode * dir, struct dentry *dentry)
898     {
899     	struct inode *inode = dentry->d_inode;
900     	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
901     	inode->i_nlink--;
902     	dput(dentry);	/* Undo the count from "create" - this does all the work */
903     	return 0;
904     }
905     
906     static int shmem_rmdir(struct inode * dir, struct dentry *dentry)
907     {
908     	if (!shmem_empty(dentry))
909     		return -ENOTEMPTY;
910     
911     	dir->i_nlink--;
912     	return shmem_unlink(dir, dentry);
913     }
914     
915     /*
916      * The VFS layer already does all the dentry stuff for rename,
917      * we just have to decrement the usage count for the target if
918      * it exists so that the VFS layer correctly free's it when it
919      * gets overwritten.
920      */
921     static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry)
922     {
923     	int error = -ENOTEMPTY;
924     
925     	if (shmem_empty(new_dentry)) {
926     		struct inode *inode = new_dentry->d_inode;
927     		if (inode) {
928     			inode->i_ctime = CURRENT_TIME;
929     			inode->i_nlink--;
930     			dput(new_dentry);
931     		}
932     		error = 0;
933     		old_dentry->d_inode->i_ctime = old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
934     	}
935     	return error;
936     }
937     
938     static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
939     {
940     	int error;
941     	int len;
942     	struct inode *inode;
943     	struct page *page;
944     	char *kaddr;
945     
946     	error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
947     	if (error)
948     		return error;
949     
950     	len = strlen(symname);
951     	if (len > PAGE_SIZE)
952     		return -ENAMETOOLONG;
953     		
954     	inode = dentry->d_inode;
955     	down(&inode->i_sem);
956     	page = shmem_getpage_locked(inode, 0);
957     	if (IS_ERR(page))
958     		goto fail;
959     	kaddr = kmap(page);
960     	memcpy(kaddr, symname, len);
961     	kunmap(page);
962     	inode->i_size = len;
963     	SetPageDirty(page);
964     	UnlockPage(page);
965     	page_cache_release(page);
966     	up(&inode->i_sem);
967     	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
968     	return 0;
969     fail:
970     	up(&inode->i_sem);
971     	return PTR_ERR(page);
972     }
973     
974     static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
975     {
976     	struct page * page;
977     	int res = shmem_getpage(dentry->d_inode, 0, &page);
978     
979     	if (res)
980     		return res;
981     
982     	res = vfs_readlink(dentry,buffer,buflen, kmap(page));
983     	kunmap(page);
984     	page_cache_release(page);
985     	return res;
986     }
987     
988     static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
989     {
990     	struct page * page;
991     	int res = shmem_getpage(dentry->d_inode, 0, &page);
992     	if (res)
993     		return res;
994     
995     	res = vfs_follow_link(nd, kmap(page));
996     	kunmap(page);
997     	page_cache_release(page);
998     	return res;
999     }
1000     
1001     static int shmem_parse_options(char *options, int *mode, unsigned long * blocks, unsigned long *inodes)
1002     {
1003     	char *this_char, *value;
1004     
1005     	this_char = NULL;
1006     	if ( options )
1007     		this_char = strtok(options,",");
1008     	for ( ; this_char; this_char = strtok(NULL,",")) {
1009     		if ((value = strchr(this_char,'=')) != NULL)
1010     			*value++ = 0;
1011     		if (!strcmp(this_char,"size")) {
1012     			unsigned long long size;
1013     			if (!value || !*value || !blocks)
1014     				return 1;
1015     			size = memparse(value,&value);
1016     			if (*value)
1017     				return 1;
1018     			*blocks = size >> PAGE_CACHE_SHIFT;
1019     		} else if (!strcmp(this_char,"nr_blocks")) {
1020     			if (!value || !*value || !blocks)
1021     				return 1;
1022     			*blocks = memparse(value,&value);
1023     			if (*value)
1024     				return 1;
1025     		} else if (!strcmp(this_char,"nr_inodes")) {
1026     			if (!value || !*value || !inodes)
1027     				return 1;
1028     			*inodes = memparse(value,&value);
1029     			if (*value)
1030     				return 1;
1031     		} else if (!strcmp(this_char,"mode")) {
1032     			if (!value || !*value || !mode)
1033     				return 1;
1034     			*mode = simple_strtoul(value,&value,8);
1035     			if (*value)
1036     				return 1;
1037     		}
1038     		else
1039     			return 1;
1040     	}
1041     	return 0;
1042     }
1043     
1044     static int shmem_remount_fs (struct super_block *sb, int *flags, char *data)
1045     {
1046     	struct shmem_sb_info *info = &sb->u.shmem_sb;
1047     	unsigned long max_blocks = info->max_blocks;
1048     	unsigned long max_inodes = info->max_inodes;
1049     
1050     	if (shmem_parse_options (data, NULL, &max_blocks, &max_inodes))
1051     		return -EINVAL;
1052     	return shmem_set_size(info, max_blocks, max_inodes);
1053     }
1054     
1055     int shmem_sync_file(struct file * file, struct dentry *dentry, int datasync)
1056     {
1057     	return 0;
1058     }
1059     #endif
1060     
1061     static struct super_block *shmem_read_super(struct super_block * sb, void * data, int silent)
1062     {
1063     	struct inode * inode;
1064     	struct dentry * root;
1065     	unsigned long blocks, inodes;
1066     	int mode   = S_IRWXUGO | S_ISVTX;
1067     	struct sysinfo si;
1068     
1069     	/*
1070     	 * Per default we only allow half of the physical ram per
1071     	 * tmpfs instance
1072     	 */
1073     	si_meminfo(&si);
1074     	blocks = inodes = si.totalram / 2;
1075     
1076     #ifdef CONFIG_TMPFS
1077     	if (shmem_parse_options (data, &mode, &blocks, &inodes)) {
1078     		printk(KERN_ERR "tmpfs invalid option\n");
1079     		return NULL;
1080     	}
1081     #endif
1082     
1083     	spin_lock_init (&sb->u.shmem_sb.stat_lock);
1084     	sb->u.shmem_sb.max_blocks = blocks;
1085     	sb->u.shmem_sb.free_blocks = blocks;
1086     	sb->u.shmem_sb.max_inodes = inodes;
1087     	sb->u.shmem_sb.free_inodes = inodes;
1088     	sb->s_maxbytes = (unsigned long long)SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT;
1089     	sb->s_blocksize = PAGE_CACHE_SIZE;
1090     	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1091     	sb->s_magic = TMPFS_MAGIC;
1092     	sb->s_op = &shmem_ops;
1093     	inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1094     	if (!inode)
1095     		return NULL;
1096     
1097     	root = d_alloc_root(inode);
1098     	if (!root) {
1099     		iput(inode);
1100     		return NULL;
1101     	}
1102     	sb->s_root = root;
1103     	return sb;
1104     }
1105     
1106     
1107     
1108     static struct address_space_operations shmem_aops = {
1109     	writepage: shmem_writepage
1110     };
1111     
1112     static struct file_operations shmem_file_operations = {
1113     	mmap:	shmem_mmap,
1114     #ifdef CONFIG_TMPFS
1115     	read:	shmem_file_read,
1116     	write:	shmem_file_write,
1117     	fsync:	shmem_sync_file,
1118     #endif
1119     };
1120     
1121     static struct inode_operations shmem_inode_operations = {
1122     	truncate:	shmem_truncate,
1123     };
1124     
1125     static struct inode_operations shmem_symlink_inode_operations = {
1126     	truncate:	shmem_truncate,
1127     #ifdef CONFIG_TMPFS
1128     	readlink:	shmem_readlink,
1129     	follow_link:	shmem_follow_link,
1130     #endif
1131     };
1132     
1133     static struct file_operations shmem_dir_operations = {
1134     	read:		generic_read_dir,
1135     	readdir:	dcache_readdir,
1136     #ifdef CONFIG_TMPFS
1137     	fsync:		shmem_sync_file,
1138     #endif
1139     };
1140     
1141     static struct inode_operations shmem_dir_inode_operations = {
1142     #ifdef CONFIG_TMPFS
1143     	create:		shmem_create,
1144     	lookup:		shmem_lookup,
1145     	link:		shmem_link,
1146     	unlink:		shmem_unlink,
1147     	symlink:	shmem_symlink,
1148     	mkdir:		shmem_mkdir,
1149     	rmdir:		shmem_rmdir,
1150     	mknod:		shmem_mknod,
1151     	rename:		shmem_rename,
1152     #endif
1153     };
1154     
1155     static struct super_operations shmem_ops = {
1156     #ifdef CONFIG_TMPFS
1157     	statfs:		shmem_statfs,
1158     	remount_fs:	shmem_remount_fs,
1159     #endif
1160     	delete_inode:	shmem_delete_inode,
1161     	put_inode:	force_delete,	
1162     };
1163     
1164     static struct vm_operations_struct shmem_vm_ops = {
1165     	nopage:	shmem_nopage,
1166     };
1167     
1168     #ifdef CONFIG_TMPFS
1169     /* type "shm" will be tagged obsolete in 2.5 */
1170     static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
1171     static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
1172     #else
1173     static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
1174     #endif
1175     static struct vfsmount *shm_mnt;
1176     
1177     static int __init init_shmem_fs(void)
1178     {
1179     	int error;
1180     	struct vfsmount * res;
1181     
1182     	if ((error = register_filesystem(&tmpfs_fs_type))) {
1183     		printk (KERN_ERR "Could not register tmpfs\n");
1184     		return error;
1185     	}
1186     #ifdef CONFIG_TMPFS
1187     	if ((error = register_filesystem(&shmem_fs_type))) {
1188     		printk (KERN_ERR "Could not register shm fs\n");
1189     		return error;
1190     	}
1191     	devfs_mk_dir (NULL, "shm", NULL);
1192     #endif
1193     	res = kern_mount(&tmpfs_fs_type);
1194     	if (IS_ERR (res)) {
1195     		printk (KERN_ERR "could not kern_mount tmpfs\n");
1196     		unregister_filesystem(&tmpfs_fs_type);
1197     		return PTR_ERR(res);
1198     	}
1199     	shm_mnt = res;
1200     
1201     	/* The internal instance should not do size checking */
1202     	if ((error = shmem_set_size(&res->mnt_sb->u.shmem_sb, ULONG_MAX, ULONG_MAX)))
1203     		printk (KERN_ERR "could not set limits on internal tmpfs\n");
1204     
1205     	return 0;
1206     }
1207     
1208     static void __exit exit_shmem_fs(void)
1209     {
1210     #ifdef CONFIG_TMPFS
1211     	unregister_filesystem(&shmem_fs_type);
1212     #endif
1213     	unregister_filesystem(&tmpfs_fs_type);
1214     	mntput(shm_mnt);
1215     }
1216     
1217     module_init(init_shmem_fs)
1218     module_exit(exit_shmem_fs)
1219     
1220     static int shmem_clear_swp (swp_entry_t entry, swp_entry_t *ptr, int size) {
1221     	swp_entry_t *test;
1222     
1223     	for (test = ptr; test < ptr + size; test++) {
1224     		if (test->val == entry.val) {
1225     			swap_free (entry);
1226     			*test = (swp_entry_t) {0};
1227     			return test - ptr;
1228     		}
1229     	}
1230     	return -1;
1231     }
1232     
1233     static int shmem_unuse_inode (struct inode *inode, swp_entry_t entry, struct page *page)
1234     {
1235     	swp_entry_t **base, **ptr;
1236     	unsigned long idx;
1237     	int offset;
1238     	struct shmem_inode_info *info = &inode->u.shmem_i;
1239     	
1240     	idx = 0;
1241     	spin_lock (&info->lock);
1242     	if ((offset = shmem_clear_swp (entry,info->i_direct, SHMEM_NR_DIRECT)) >= 0)
1243     		goto found;
1244     
1245     	idx = SHMEM_NR_DIRECT;
1246     	if (!(base = info->i_indirect))
1247     		goto out;
1248     
1249     	for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) {
1250     		if (*ptr &&
1251     		    (offset = shmem_clear_swp (entry, *ptr, ENTRIES_PER_PAGE)) >= 0)
1252     			goto found;
1253     		idx += ENTRIES_PER_PAGE;
1254     	}
1255     out:
1256     	spin_unlock (&info->lock);
1257     	return 0;
1258     found:
1259     	add_to_page_cache(page, inode->i_mapping, offset + idx);
1260     	SetPageDirty(page);
1261     	SetPageUptodate(page);
1262     	UnlockPage(page);
1263     	info->swapped--;
1264     	spin_unlock(&info->lock);
1265     	return 1;
1266     }
1267     
1268     /*
1269      * unuse_shmem() search for an eventually swapped out shmem page.
1270      */
1271     void shmem_unuse(swp_entry_t entry, struct page *page)
1272     {
1273     	struct list_head *p;
1274     	struct inode * inode;
1275     
1276     	spin_lock (&shmem_ilock);
1277     	list_for_each(p, &shmem_inodes) {
1278     		inode = list_entry(p, struct inode, u.shmem_i.list);
1279     
1280     		if (shmem_unuse_inode(inode, entry, page))
1281     			break;
1282     	}
1283     	spin_unlock (&shmem_ilock);
1284     }
1285     
1286     
1287     /*
1288      * shmem_file_setup - get an unlinked file living in shmem fs
1289      *
1290      * @name: name for dentry (to be seen in /proc/<pid>/maps
1291      * @size: size to be set for the file
1292      *
1293      */
1294     struct file *shmem_file_setup(char * name, loff_t size)
1295     {
1296     	int error;
1297     	struct file *file;
1298     	struct inode * inode;
1299     	struct dentry *dentry, *root;
1300     	struct qstr this;
1301     	int vm_enough_memory(long pages);
1302     
1303     	if (size > (unsigned long long) SHMEM_MAX_BLOCKS << PAGE_CACHE_SHIFT)
1304     		return ERR_PTR(-EINVAL);
1305     
1306     	if (!vm_enough_memory((size) >> PAGE_SHIFT))
1307     		return ERR_PTR(-ENOMEM);
1308     
1309     	this.name = name;
1310     	this.len = strlen(name);
1311     	this.hash = 0; /* will go */
1312     	root = shm_mnt->mnt_root;
1313     	dentry = d_alloc(root, &this);
1314     	if (!dentry)
1315     		return ERR_PTR(-ENOMEM);
1316     
1317     	error = -ENFILE;
1318     	file = get_empty_filp();
1319     	if (!file)
1320     		goto put_dentry;
1321     
1322     	error = -ENOSPC;
1323     	inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1324     	if (!inode) 
1325     		goto close_file;
1326     
1327     	d_instantiate(dentry, inode);
1328     	dentry->d_inode->i_size = size;
1329     	shmem_truncate(inode);
1330     	file->f_vfsmnt = mntget(shm_mnt);
1331     	file->f_dentry = dentry;
1332     	file->f_op = &shmem_file_operations;
1333     	file->f_mode = FMODE_WRITE | FMODE_READ;
1334     	inode->i_nlink = 0;	/* It is unlinked */
1335     	return(file);
1336     
1337     close_file:
1338     	put_filp(file);
1339     put_dentry:
1340     	dput (dentry);
1341     	return ERR_PTR(error);	
1342     }
1343     /*
1344      * shmem_zero_setup - setup a shared anonymous mapping
1345      *
1346      * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1347      */
1348     int shmem_zero_setup(struct vm_area_struct *vma)
1349     {
1350     	struct file *file;
1351     	loff_t size = vma->vm_end - vma->vm_start;
1352     	
1353     	file = shmem_file_setup("dev/zero", size);
1354     	if (IS_ERR(file))
1355     		return PTR_ERR(file);
1356     
1357     	if (vma->vm_file)
1358     		fput (vma->vm_file);
1359     	vma->vm_file = file;
1360     	vma->vm_ops = &shmem_vm_ops;
1361     	return 0;
1362     }
1363     
1364     EXPORT_SYMBOL(shmem_file_setup);
1365