File: /usr/src/linux/mm/memory.c

1     /*
2      *  linux/mm/memory.c
3      *
4      *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
5      */
6     
7     /*
8      * demand-loading started 01.12.91 - seems it is high on the list of
9      * things wanted, and it should be easy to implement. - Linus
10      */
11     
12     /*
13      * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
14      * pages started 02.12.91, seems to work. - Linus.
15      *
16      * Tested sharing by executing about 30 /bin/sh: under the old kernel it
17      * would have taken more than the 6M I have free, but it worked well as
18      * far as I could see.
19      *
20      * Also corrected some "invalidate()"s - I wasn't doing enough of them.
21      */
22     
23     /*
24      * Real VM (paging to/from disk) started 18.12.91. Much more work and
25      * thought has to go into this. Oh, well..
26      * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
27      *		Found it. Everything seems to work now.
28      * 20.12.91  -  Ok, making the swap-device changeable like the root.
29      */
30     
31     /*
32      * 05.04.94  -  Multi-page memory management added for v1.1.
33      * 		Idea by Alex Bligh (alex@cconcepts.co.uk)
34      *
35      * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
36      *		(Gerhard.Wichert@pdb.siemens.de)
37      */
38     
39     #include <linux/mm.h>
40     #include <linux/mman.h>
41     #include <linux/swap.h>
42     #include <linux/smp_lock.h>
43     #include <linux/swapctl.h>
44     #include <linux/iobuf.h>
45     #include <linux/highmem.h>
46     #include <linux/pagemap.h>
47     
48     #include <asm/pgalloc.h>
49     #include <asm/uaccess.h>
50     #include <asm/tlb.h>
51     
52     unsigned long max_mapnr;
53     unsigned long num_physpages;
54     void * high_memory;
55     struct page *highmem_start_page;
56     
57     /*
58      * We special-case the C-O-W ZERO_PAGE, because it's such
59      * a common occurrence (no need to read the page to know
60      * that it's zero - better for the cache and memory subsystem).
61      */
62     static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address)
63     {
64     	if (from == ZERO_PAGE(address)) {
65     		clear_user_highpage(to, address);
66     		return;
67     	}
68     	copy_user_highpage(to, from, address);
69     }
70     
71     mem_map_t * mem_map;
72     
73     /*
74      * Called by TLB shootdown 
75      */
76     void __free_pte(pte_t pte)
77     {
78     	struct page *page = pte_page(pte);
79     	if ((!VALID_PAGE(page)) || PageReserved(page))
80     		return;
81     	/*
82     	 * free_page() used to be able to clear swap cache
83     	 * entries.  We may now have to do it manually.
84     	 */
85     	if (page->mapping) {
86     		if (pte_dirty(pte))
87     			set_page_dirty(page);
88     	}
89     		
90     	free_page_and_swap_cache(page);
91     }
92     
93     
94     /*
95      * Note: this doesn't free the actual pages themselves. That
96      * has been handled earlier when unmapping all the memory regions.
97      */
98     static inline void free_one_pmd(pmd_t * dir)
99     {
100     	pte_t * pte;
101     
102     	if (pmd_none(*dir))
103     		return;
104     	if (pmd_bad(*dir)) {
105     		pmd_ERROR(*dir);
106     		pmd_clear(dir);
107     		return;
108     	}
109     	pte = pte_offset(dir, 0);
110     	pmd_clear(dir);
111     	pte_free(pte);
112     }
113     
114     static inline void free_one_pgd(pgd_t * dir)
115     {
116     	int j;
117     	pmd_t * pmd;
118     
119     	if (pgd_none(*dir))
120     		return;
121     	if (pgd_bad(*dir)) {
122     		pgd_ERROR(*dir);
123     		pgd_clear(dir);
124     		return;
125     	}
126     	pmd = pmd_offset(dir, 0);
127     	pgd_clear(dir);
128     	for (j = 0; j < PTRS_PER_PMD ; j++) {
129     		prefetchw(pmd+j+(PREFETCH_STRIDE/16));
130     		free_one_pmd(pmd+j);
131     	}
132     	pmd_free(pmd);
133     }
134     
135     /* Low and high watermarks for page table cache.
136        The system should try to have pgt_water[0] <= cache elements <= pgt_water[1]
137      */
138     int pgt_cache_water[2] = { 25, 50 };
139     
140     /* Returns the number of pages freed */
141     int check_pgt_cache(void)
142     {
143     	return do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]);
144     }
145     
146     
147     /*
148      * This function clears all user-level page tables of a process - this
149      * is needed by execve(), so that old pages aren't in the way.
150      */
151     void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
152     {
153     	pgd_t * page_dir = mm->pgd;
154     
155     	spin_lock(&mm->page_table_lock);
156     	page_dir += first;
157     	do {
158     		free_one_pgd(page_dir);
159     		page_dir++;
160     	} while (--nr);
161     	spin_unlock(&mm->page_table_lock);
162     
163     	/* keep the page table cache within bounds */
164     	check_pgt_cache();
165     }
166     
167     #define PTE_TABLE_MASK	((PTRS_PER_PTE-1) * sizeof(pte_t))
168     #define PMD_TABLE_MASK	((PTRS_PER_PMD-1) * sizeof(pmd_t))
169     
170     /*
171      * copy one vm_area from one task to the other. Assumes the page tables
172      * already present in the new task to be cleared in the whole range
173      * covered by this vma.
174      *
175      * 08Jan98 Merged into one routine from several inline routines to reduce
176      *         variable count and make things faster. -jj
177      *
178      * dst->page_table_lock is held on entry and exit,
179      * but may be dropped within pmd_alloc() and pte_alloc().
180      */
181     int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
182     			struct vm_area_struct *vma)
183     {
184     	pgd_t * src_pgd, * dst_pgd;
185     	unsigned long address = vma->vm_start;
186     	unsigned long end = vma->vm_end;
187     	unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
188     
189     	src_pgd = pgd_offset(src, address)-1;
190     	dst_pgd = pgd_offset(dst, address)-1;
191     
192     	for (;;) {
193     		pmd_t * src_pmd, * dst_pmd;
194     
195     		src_pgd++; dst_pgd++;
196     		
197     		/* copy_pmd_range */
198     		
199     		if (pgd_none(*src_pgd))
200     			goto skip_copy_pmd_range;
201     		if (pgd_bad(*src_pgd)) {
202     			pgd_ERROR(*src_pgd);
203     			pgd_clear(src_pgd);
204     skip_copy_pmd_range:	address = (address + PGDIR_SIZE) & PGDIR_MASK;
205     			if (!address || (address >= end))
206     				goto out;
207     			continue;
208     		}
209     
210     		src_pmd = pmd_offset(src_pgd, address);
211     		dst_pmd = pmd_alloc(dst, dst_pgd, address);
212     		if (!dst_pmd)
213     			goto nomem;
214     
215     		do {
216     			pte_t * src_pte, * dst_pte;
217     		
218     			/* copy_pte_range */
219     		
220     			if (pmd_none(*src_pmd))
221     				goto skip_copy_pte_range;
222     			if (pmd_bad(*src_pmd)) {
223     				pmd_ERROR(*src_pmd);
224     				pmd_clear(src_pmd);
225     skip_copy_pte_range:		address = (address + PMD_SIZE) & PMD_MASK;
226     				if (address >= end)
227     					goto out;
228     				goto cont_copy_pmd_range;
229     			}
230     
231     			src_pte = pte_offset(src_pmd, address);
232     			dst_pte = pte_alloc(dst, dst_pmd, address);
233     			if (!dst_pte)
234     				goto nomem;
235     
236     			spin_lock(&src->page_table_lock);			
237     			do {
238     				pte_t pte = *src_pte;
239     				struct page *ptepage;
240     				
241     				/* copy_one_pte */
242     
243     				if (pte_none(pte))
244     					goto cont_copy_pte_range_noset;
245     				if (!pte_present(pte)) {
246     					swap_duplicate(pte_to_swp_entry(pte));
247     					goto cont_copy_pte_range;
248     				}
249     				ptepage = pte_page(pte);
250     				if ((!VALID_PAGE(ptepage)) || 
251     				    PageReserved(ptepage))
252     					goto cont_copy_pte_range;
253     
254     				/* If it's a COW mapping, write protect it both in the parent and the child */
255     				if (cow) {
256     					ptep_set_wrprotect(src_pte);
257     					pte = *src_pte;
258     				}
259     
260     				/* If it's a shared mapping, mark it clean in the child */
261     				if (vma->vm_flags & VM_SHARED)
262     					pte = pte_mkclean(pte);
263     				pte = pte_mkold(pte);
264     				get_page(ptepage);
265     				dst->rss++;
266     
267     cont_copy_pte_range:		set_pte(dst_pte, pte);
268     cont_copy_pte_range_noset:	address += PAGE_SIZE;
269     				if (address >= end)
270     					goto out_unlock;
271     				src_pte++;
272     				dst_pte++;
273     			} while ((unsigned long)src_pte & PTE_TABLE_MASK);
274     			spin_unlock(&src->page_table_lock);
275     		
276     cont_copy_pmd_range:	src_pmd++;
277     			dst_pmd++;
278     		} while ((unsigned long)src_pmd & PMD_TABLE_MASK);
279     	}
280     out_unlock:
281     	spin_unlock(&src->page_table_lock);
282     out:
283     	return 0;
284     nomem:
285     	return -ENOMEM;
286     }
287     
288     /*
289      * Return indicates whether a page was freed so caller can adjust rss
290      */
291     static inline void forget_pte(pte_t page)
292     {
293     	if (!pte_none(page)) {
294     		printk("forget_pte: old mapping existed!\n");
295     		BUG();
296     	}
297     }
298     
299     static inline int zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size)
300     {
301     	unsigned long offset;
302     	pte_t * ptep;
303     	int freed = 0;
304     
305     	if (pmd_none(*pmd))
306     		return 0;
307     	if (pmd_bad(*pmd)) {
308     		pmd_ERROR(*pmd);
309     		pmd_clear(pmd);
310     		return 0;
311     	}
312     	ptep = pte_offset(pmd, address);
313     	offset = address & ~PMD_MASK;
314     	if (offset + size > PMD_SIZE)
315     		size = PMD_SIZE - offset;
316     	size &= PAGE_MASK;
317     	for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
318     		pte_t pte = *ptep;
319     		if (pte_none(pte))
320     			continue;
321     		if (pte_present(pte)) {
322     			freed ++;
323     			/* This will eventually call __free_pte on the pte. */
324     			tlb_remove_page(tlb, ptep, address + offset);
325     		} else {
326     			swap_free(pte_to_swp_entry(pte));
327     			pte_clear(ptep);
328     		}
329     	}
330     
331     	return freed;
332     }
333     
334     static inline int zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size)
335     {
336     	pmd_t * pmd;
337     	unsigned long end;
338     	int freed;
339     
340     	if (pgd_none(*dir))
341     		return 0;
342     	if (pgd_bad(*dir)) {
343     		pgd_ERROR(*dir);
344     		pgd_clear(dir);
345     		return 0;
346     	}
347     	pmd = pmd_offset(dir, address);
348     	end = address + size;
349     	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
350     		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
351     	freed = 0;
352     	do {
353     		freed += zap_pte_range(tlb, pmd, address, end - address);
354     		address = (address + PMD_SIZE) & PMD_MASK; 
355     		pmd++;
356     	} while (address < end);
357     	return freed;
358     }
359     
360     /*
361      * remove user pages in a given range.
362      */
363     void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
364     {
365     	mmu_gather_t *tlb;
366     	pgd_t * dir;
367     	unsigned long start = address, end = address + size;
368     	int freed = 0;
369     
370     	dir = pgd_offset(mm, address);
371     
372     	/*
373     	 * This is a long-lived spinlock. That's fine.
374     	 * There's no contention, because the page table
375     	 * lock only protects against kswapd anyway, and
376     	 * even if kswapd happened to be looking at this
377     	 * process we _want_ it to get stuck.
378     	 */
379     	if (address >= end)
380     		BUG();
381     	spin_lock(&mm->page_table_lock);
382     	flush_cache_range(mm, address, end);
383     	tlb = tlb_gather_mmu(mm);
384     
385     	do {
386     		freed += zap_pmd_range(tlb, dir, address, end - address);
387     		address = (address + PGDIR_SIZE) & PGDIR_MASK;
388     		dir++;
389     	} while (address && (address < end));
390     
391     	/* this will flush any remaining tlb entries */
392     	tlb_finish_mmu(tlb, start, end);
393     
394     	/*
395     	 * Update rss for the mm_struct (not necessarily current->mm)
396     	 * Notice that rss is an unsigned long.
397     	 */
398     	if (mm->rss > freed)
399     		mm->rss -= freed;
400     	else
401     		mm->rss = 0;
402     	spin_unlock(&mm->page_table_lock);
403     }
404     
405     
406     /*
407      * Do a quick page-table lookup for a single page. 
408      */
409     static struct page * follow_page(unsigned long address, int write) 
410     {
411     	pgd_t *pgd;
412     	pmd_t *pmd;
413     	pte_t *ptep, pte;
414     
415     	pgd = pgd_offset(current->mm, address);
416     	if (pgd_none(*pgd) || pgd_bad(*pgd))
417     		goto out;
418     
419     	pmd = pmd_offset(pgd, address);
420     	if (pmd_none(*pmd) || pmd_bad(*pmd))
421     		goto out;
422     
423     	ptep = pte_offset(pmd, address);
424     	if (!ptep)
425     		goto out;
426     
427     	pte = *ptep;
428     	if (pte_present(pte)) {
429     		if (!write ||
430     		    (pte_write(pte) && pte_dirty(pte)))
431     			return pte_page(pte);
432     	}
433     
434     out:
435     	return 0;
436     }
437     
438     /* 
439      * Given a physical address, is there a useful struct page pointing to
440      * it?  This may become more complex in the future if we start dealing
441      * with IO-aperture pages in kiobufs.
442      */
443     
444     static inline struct page * get_page_map(struct page *page)
445     {
446     	if (!VALID_PAGE(page))
447     		return 0;
448     	return page;
449     }
450     
451     /*
452      * Force in an entire range of pages from the current process's user VA,
453      * and pin them in physical memory.  
454      */
455     
456     #define dprintk(x...)
457     int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
458     {
459     	unsigned long		ptr, end;
460     	int			err;
461     	struct mm_struct *	mm;
462     	struct vm_area_struct *	vma = 0;
463     	struct page *		map;
464     	int			i;
465     	int			datain = (rw == READ);
466     	
467     	/* Make sure the iobuf is not already mapped somewhere. */
468     	if (iobuf->nr_pages)
469     		return -EINVAL;
470     
471     	mm = current->mm;
472     	dprintk ("map_user_kiobuf: begin\n");
473     	
474     	ptr = va & PAGE_MASK;
475     	end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
476     	err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
477     	if (err)
478     		return err;
479     
480     	down_read(&mm->mmap_sem);
481     
482     	err = -EFAULT;
483     	iobuf->locked = 0;
484     	iobuf->offset = va & ~PAGE_MASK;
485     	iobuf->length = len;
486     	
487     	i = 0;
488     	
489     	/* 
490     	 * First of all, try to fault in all of the necessary pages
491     	 */
492     	while (ptr < end) {
493     		if (!vma || ptr >= vma->vm_end) {
494     			vma = find_vma(current->mm, ptr);
495     			if (!vma) 
496     				goto out_unlock;
497     			if (vma->vm_start > ptr) {
498     				if (!(vma->vm_flags & VM_GROWSDOWN))
499     					goto out_unlock;
500     				if (expand_stack(vma, ptr))
501     					goto out_unlock;
502     			}
503     			if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
504     					(!(vma->vm_flags & VM_READ))) {
505     				err = -EACCES;
506     				goto out_unlock;
507     			}
508     		}
509     		spin_lock(&mm->page_table_lock);
510     		while (!(map = follow_page(ptr, datain))) {
511     			int ret;
512     
513     			spin_unlock(&mm->page_table_lock);
514     			ret = handle_mm_fault(current->mm, vma, ptr, datain);
515     			if (ret <= 0) {
516     				if (!ret)
517     					goto out_unlock;
518     				else {
519     					err = -ENOMEM;
520     					goto out_unlock;
521     				}
522     			}
523     			spin_lock(&mm->page_table_lock);
524     		}			
525     		map = get_page_map(map);
526     		if (map) {
527     			flush_dcache_page(map);
528     			atomic_inc(&map->count);
529     		} else
530     			printk (KERN_INFO "Mapped page missing [%d]\n", i);
531     		spin_unlock(&mm->page_table_lock);
532     		iobuf->maplist[i] = map;
533     		iobuf->nr_pages = ++i;
534     		
535     		ptr += PAGE_SIZE;
536     	}
537     
538     	up_read(&mm->mmap_sem);
539     	dprintk ("map_user_kiobuf: end OK\n");
540     	return 0;
541     
542      out_unlock:
543     	up_read(&mm->mmap_sem);
544     	unmap_kiobuf(iobuf);
545     	dprintk ("map_user_kiobuf: end %d\n", err);
546     	return err;
547     }
548     
549     /*
550      * Mark all of the pages in a kiobuf as dirty 
551      *
552      * We need to be able to deal with short reads from disk: if an IO error
553      * occurs, the number of bytes read into memory may be less than the
554      * size of the kiobuf, so we have to stop marking pages dirty once the
555      * requested byte count has been reached.
556      */
557     
558     void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes)
559     {
560     	int index, offset, remaining;
561     	struct page *page;
562     	
563     	index = iobuf->offset >> PAGE_SHIFT;
564     	offset = iobuf->offset & ~PAGE_MASK;
565     	remaining = bytes;
566     	if (remaining > iobuf->length)
567     		remaining = iobuf->length;
568     	
569     	while (remaining > 0 && index < iobuf->nr_pages) {
570     		page = iobuf->maplist[index];
571     		
572     		if (!PageReserved(page))
573     			SetPageDirty(page);
574     
575     		remaining -= (PAGE_SIZE - offset);
576     		offset = 0;
577     		index++;
578     	}
579     }
580     
581     /*
582      * Unmap all of the pages referenced by a kiobuf.  We release the pages,
583      * and unlock them if they were locked. 
584      */
585     
586     void unmap_kiobuf (struct kiobuf *iobuf) 
587     {
588     	int i;
589     	struct page *map;
590     	
591     	for (i = 0; i < iobuf->nr_pages; i++) {
592     		map = iobuf->maplist[i];
593     		if (map) {
594     			if (iobuf->locked)
595     				UnlockPage(map);
596     			__free_page(map);
597     		}
598     	}
599     	
600     	iobuf->nr_pages = 0;
601     	iobuf->locked = 0;
602     }
603     
604     
605     /*
606      * Lock down all of the pages of a kiovec for IO.
607      *
608      * If any page is mapped twice in the kiovec, we return the error -EINVAL.
609      *
610      * The optional wait parameter causes the lock call to block until all
611      * pages can be locked if set.  If wait==0, the lock operation is
612      * aborted if any locked pages are found and -EAGAIN is returned.
613      */
614     
615     int lock_kiovec(int nr, struct kiobuf *iovec[], int wait)
616     {
617     	struct kiobuf *iobuf;
618     	int i, j;
619     	struct page *page, **ppage;
620     	int doublepage = 0;
621     	int repeat = 0;
622     	
623      repeat:
624     	
625     	for (i = 0; i < nr; i++) {
626     		iobuf = iovec[i];
627     
628     		if (iobuf->locked)
629     			continue;
630     
631     		ppage = iobuf->maplist;
632     		for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
633     			page = *ppage;
634     			if (!page)
635     				continue;
636     			
637     			if (TryLockPage(page)) {
638     				while (j--) {
639     					struct page *tmp = *--ppage;
640     					if (tmp)
641     						UnlockPage(tmp);
642     				}
643     				goto retry;
644     			}
645     		}
646     		iobuf->locked = 1;
647     	}
648     
649     	return 0;
650     	
651      retry:
652     	
653     	/* 
654     	 * We couldn't lock one of the pages.  Undo the locking so far,
655     	 * wait on the page we got to, and try again.  
656     	 */
657     	
658     	unlock_kiovec(nr, iovec);
659     	if (!wait)
660     		return -EAGAIN;
661     	
662     	/* 
663     	 * Did the release also unlock the page we got stuck on?
664     	 */
665     	if (!PageLocked(page)) {
666     		/* 
667     		 * If so, we may well have the page mapped twice
668     		 * in the IO address range.  Bad news.  Of
669     		 * course, it _might_ just be a coincidence,
670     		 * but if it happens more than once, chances
671     		 * are we have a double-mapped page. 
672     		 */
673     		if (++doublepage >= 3) 
674     			return -EINVAL;
675     		
676     		/* Try again...  */
677     		wait_on_page(page);
678     	}
679     	
680     	if (++repeat < 16)
681     		goto repeat;
682     	return -EAGAIN;
683     }
684     
685     /*
686      * Unlock all of the pages of a kiovec after IO.
687      */
688     
689     int unlock_kiovec(int nr, struct kiobuf *iovec[])
690     {
691     	struct kiobuf *iobuf;
692     	int i, j;
693     	struct page *page, **ppage;
694     	
695     	for (i = 0; i < nr; i++) {
696     		iobuf = iovec[i];
697     
698     		if (!iobuf->locked)
699     			continue;
700     		iobuf->locked = 0;
701     		
702     		ppage = iobuf->maplist;
703     		for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
704     			page = *ppage;
705     			if (!page)
706     				continue;
707     			UnlockPage(page);
708     		}
709     	}
710     	return 0;
711     }
712     
713     static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
714                                          unsigned long size, pgprot_t prot)
715     {
716     	unsigned long end;
717     
718     	address &= ~PMD_MASK;
719     	end = address + size;
720     	if (end > PMD_SIZE)
721     		end = PMD_SIZE;
722     	do {
723     		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
724     		pte_t oldpage = ptep_get_and_clear(pte);
725     		set_pte(pte, zero_pte);
726     		forget_pte(oldpage);
727     		address += PAGE_SIZE;
728     		pte++;
729     	} while (address && (address < end));
730     }
731     
732     static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
733                                         unsigned long size, pgprot_t prot)
734     {
735     	unsigned long end;
736     
737     	address &= ~PGDIR_MASK;
738     	end = address + size;
739     	if (end > PGDIR_SIZE)
740     		end = PGDIR_SIZE;
741     	do {
742     		pte_t * pte = pte_alloc(mm, pmd, address);
743     		if (!pte)
744     			return -ENOMEM;
745     		zeromap_pte_range(pte, address, end - address, prot);
746     		address = (address + PMD_SIZE) & PMD_MASK;
747     		pmd++;
748     	} while (address && (address < end));
749     	return 0;
750     }
751     
752     int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
753     {
754     	int error = 0;
755     	pgd_t * dir;
756     	unsigned long beg = address;
757     	unsigned long end = address + size;
758     	struct mm_struct *mm = current->mm;
759     
760     	dir = pgd_offset(mm, address);
761     	flush_cache_range(mm, beg, end);
762     	if (address >= end)
763     		BUG();
764     
765     	spin_lock(&mm->page_table_lock);
766     	do {
767     		pmd_t *pmd = pmd_alloc(mm, dir, address);
768     		error = -ENOMEM;
769     		if (!pmd)
770     			break;
771     		error = zeromap_pmd_range(mm, pmd, address, end - address, prot);
772     		if (error)
773     			break;
774     		address = (address + PGDIR_SIZE) & PGDIR_MASK;
775     		dir++;
776     	} while (address && (address < end));
777     	spin_unlock(&mm->page_table_lock);
778     	flush_tlb_range(mm, beg, end);
779     	return error;
780     }
781     
782     /*
783      * maps a range of physical memory into the requested pages. the old
784      * mappings are removed. any references to nonexistent pages results
785      * in null mappings (currently treated as "copy-on-access")
786      */
787     static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
788     	unsigned long phys_addr, pgprot_t prot)
789     {
790     	unsigned long end;
791     
792     	address &= ~PMD_MASK;
793     	end = address + size;
794     	if (end > PMD_SIZE)
795     		end = PMD_SIZE;
796     	do {
797     		struct page *page;
798     		pte_t oldpage;
799     		oldpage = ptep_get_and_clear(pte);
800     
801     		page = virt_to_page(__va(phys_addr));
802     		if ((!VALID_PAGE(page)) || PageReserved(page))
803      			set_pte(pte, mk_pte_phys(phys_addr, prot));
804     		forget_pte(oldpage);
805     		address += PAGE_SIZE;
806     		phys_addr += PAGE_SIZE;
807     		pte++;
808     	} while (address && (address < end));
809     }
810     
811     static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
812     	unsigned long phys_addr, pgprot_t prot)
813     {
814     	unsigned long end;
815     
816     	address &= ~PGDIR_MASK;
817     	end = address + size;
818     	if (end > PGDIR_SIZE)
819     		end = PGDIR_SIZE;
820     	phys_addr -= address;
821     	do {
822     		pte_t * pte = pte_alloc(mm, pmd, address);
823     		if (!pte)
824     			return -ENOMEM;
825     		remap_pte_range(pte, address, end - address, address + phys_addr, prot);
826     		address = (address + PMD_SIZE) & PMD_MASK;
827     		pmd++;
828     	} while (address && (address < end));
829     	return 0;
830     }
831     
832     /*  Note: this is only safe if the mm semaphore is held when called. */
833     int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot)
834     {
835     	int error = 0;
836     	pgd_t * dir;
837     	unsigned long beg = from;
838     	unsigned long end = from + size;
839     	struct mm_struct *mm = current->mm;
840     
841     	phys_addr -= from;
842     	dir = pgd_offset(mm, from);
843     	flush_cache_range(mm, beg, end);
844     	if (from >= end)
845     		BUG();
846     
847     	spin_lock(&mm->page_table_lock);
848     	do {
849     		pmd_t *pmd = pmd_alloc(mm, dir, from);
850     		error = -ENOMEM;
851     		if (!pmd)
852     			break;
853     		error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot);
854     		if (error)
855     			break;
856     		from = (from + PGDIR_SIZE) & PGDIR_MASK;
857     		dir++;
858     	} while (from && (from < end));
859     	spin_unlock(&mm->page_table_lock);
860     	flush_tlb_range(mm, beg, end);
861     	return error;
862     }
863     
864     /*
865      * Establish a new mapping:
866      *  - flush the old one
867      *  - update the page tables
868      *  - inform the TLB about the new one
869      *
870      * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
871      */
872     static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry)
873     {
874     	set_pte(page_table, entry);
875     	flush_tlb_page(vma, address);
876     	update_mmu_cache(vma, address, entry);
877     }
878     
879     /*
880      * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
881      */
882     static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, 
883     		pte_t *page_table)
884     {
885     	flush_page_to_ram(new_page);
886     	flush_cache_page(vma, address);
887     	establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
888     }
889     
890     /*
891      * This routine handles present pages, when users try to write
892      * to a shared page. It is done by copying the page to a new address
893      * and decrementing the shared-page counter for the old page.
894      *
895      * Goto-purists beware: the only reason for goto's here is that it results
896      * in better assembly code.. The "default" path will see no jumps at all.
897      *
898      * Note that this routine assumes that the protection checks have been
899      * done by the caller (the low-level page fault routine in most cases).
900      * Thus we can safely just mark it writable once we've done any necessary
901      * COW.
902      *
903      * We also mark the page dirty at this point even though the page will
904      * change only once the write actually happens. This avoids a few races,
905      * and potentially makes it more efficient.
906      *
907      * We hold the mm semaphore and the page_table_lock on entry and exit.
908      */
909     static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
910     	unsigned long address, pte_t *page_table, pte_t pte)
911     {
912     	struct page *old_page, *new_page;
913     
914     	old_page = pte_page(pte);
915     	if (!VALID_PAGE(old_page))
916     		goto bad_wp_page;
917     	
918     	/*
919     	 * We can avoid the copy if:
920     	 * - we're the only user (count == 1)
921     	 * - the only other user is the swap cache,
922     	 *   and the only swap cache user is itself,
923     	 *   in which case we can just continue to
924     	 *   use the same swap cache (it will be
925     	 *   marked dirty).
926     	 */
927     	switch (page_count(old_page)) {
928     	int can_reuse;
929     	case 3:
930     		if (!old_page->buffers)
931     			break;
932     		/* FallThrough */
933     	case 2:
934     		if (!PageSwapCache(old_page))
935     			break;
936     		if (TryLockPage(old_page))
937     			break;
938     		/* Recheck swapcachedness once the page is locked */
939     		can_reuse = exclusive_swap_page(old_page);
940     		if (can_reuse)
941     			delete_from_swap_cache(old_page);
942     		UnlockPage(old_page);
943     		if (!can_reuse)
944     			break;
945     		/* FallThrough */
946     	case 1:
947     		if (PageReserved(old_page))
948     			break;
949     		flush_cache_page(vma, address);
950     		establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
951     		return 1;	/* Minor fault */
952     	}
953     
954     	/*
955     	 * Ok, we need to copy. Oh, well..
956     	 */
957     	page_cache_get(old_page);
958     	spin_unlock(&mm->page_table_lock);
959     
960     	new_page = alloc_page(GFP_HIGHUSER);
961     	if (!new_page)
962     		goto no_mem;
963     	copy_cow_page(old_page,new_page,address);
964     	page_cache_release(old_page);
965     
966     	/*
967     	 * Re-check the pte - we dropped the lock
968     	 */
969     	spin_lock(&mm->page_table_lock);
970     	if (pte_same(*page_table, pte)) {
971     		if (PageReserved(old_page))
972     			++mm->rss;
973     		break_cow(vma, new_page, address, page_table);
974     
975     		/* Free the old page.. */
976     		new_page = old_page;
977     	}
978     	page_cache_release(new_page);
979     	return 1;	/* Minor fault */
980     
981     bad_wp_page:
982     	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
983     	return -1;
984     no_mem:
985     	page_cache_release(old_page);
986     	spin_lock(&mm->page_table_lock);
987     	return -1;
988     }
989     
990     static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff)
991     {
992     	do {
993     		struct mm_struct *mm = mpnt->vm_mm;
994     		unsigned long start = mpnt->vm_start;
995     		unsigned long end = mpnt->vm_end;
996     		unsigned long len = end - start;
997     		unsigned long diff;
998     
999     		/* mapping wholly truncated? */
1000     		if (mpnt->vm_pgoff >= pgoff) {
1001     			zap_page_range(mm, start, len);
1002     			continue;
1003     		}
1004     
1005     		/* mapping wholly unaffected? */
1006     		len = len >> PAGE_SHIFT;
1007     		diff = pgoff - mpnt->vm_pgoff;
1008     		if (diff >= len)
1009     			continue;
1010     
1011     		/* Ok, partially affected.. */
1012     		start += diff << PAGE_SHIFT;
1013     		len = (len - diff) << PAGE_SHIFT;
1014     		zap_page_range(mm, start, len);
1015     	} while ((mpnt = mpnt->vm_next_share) != NULL);
1016     }
1017     
1018     /*
1019      * Handle all mappings that got truncated by a "truncate()"
1020      * system call.
1021      *
1022      * NOTE! We have to be ready to update the memory sharing
1023      * between the file and the memory map for a potential last
1024      * incomplete page.  Ugly, but necessary.
1025      */
1026     int vmtruncate(struct inode * inode, loff_t offset)
1027     {
1028     	unsigned long pgoff;
1029     	struct address_space *mapping = inode->i_mapping;
1030     	unsigned long limit;
1031     
1032     	if (inode->i_size < offset)
1033     		goto do_expand;
1034     	inode->i_size = offset;
1035     	spin_lock(&mapping->i_shared_lock);
1036     	if (!mapping->i_mmap && !mapping->i_mmap_shared)
1037     		goto out_unlock;
1038     
1039     	pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1040     	if (mapping->i_mmap != NULL)
1041     		vmtruncate_list(mapping->i_mmap, pgoff);
1042     	if (mapping->i_mmap_shared != NULL)
1043     		vmtruncate_list(mapping->i_mmap_shared, pgoff);
1044     
1045     out_unlock:
1046     	spin_unlock(&mapping->i_shared_lock);
1047     	truncate_inode_pages(mapping, offset);
1048     	goto out_truncate;
1049     
1050     do_expand:
1051     	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
1052     	if (limit != RLIM_INFINITY) {
1053     		if (inode->i_size >= limit) {
1054     			send_sig(SIGXFSZ, current, 0);
1055     			goto out;
1056     		}
1057     		if (offset > limit) {
1058     			send_sig(SIGXFSZ, current, 0);
1059     			offset = limit;
1060     		}
1061     	}
1062     	inode->i_size = offset;
1063     
1064     out_truncate:
1065     	if (inode->i_op && inode->i_op->truncate) {
1066     		lock_kernel();
1067     		inode->i_op->truncate(inode);
1068     		unlock_kernel();
1069     	}
1070     out:
1071     	return 0;
1072     }
1073     
1074     /* 
1075      * Primitive swap readahead code. We simply read an aligned block of
1076      * (1 << page_cluster) entries in the swap area. This method is chosen
1077      * because it doesn't cost us any seek time.  We also make sure to queue
1078      * the 'original' request together with the readahead ones...  
1079      */
1080     void swapin_readahead(swp_entry_t entry)
1081     {
1082     	int i, num;
1083     	struct page *new_page;
1084     	unsigned long offset;
1085     
1086     	/*
1087     	 * Get the number of handles we should do readahead io to.
1088     	 */
1089     	num = valid_swaphandles(entry, &offset);
1090     	for (i = 0; i < num; offset++, i++) {
1091     		/* Don't block on I/O for read-ahead */
1092     		if (atomic_read(&nr_async_pages) >=
1093     		    pager_daemon.swap_cluster << page_cluster)
1094     			break;
1095     		/* Ok, do the async read-ahead now */
1096     		new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
1097     		if (!new_page)
1098     			break;
1099     		page_cache_release(new_page);
1100     	}
1101     	return;
1102     }
1103     
1104     /*
1105      * We hold the mm semaphore and the page_table_lock on entry and exit.
1106      */
1107     static int do_swap_page(struct mm_struct * mm,
1108     	struct vm_area_struct * vma, unsigned long address,
1109     	pte_t * page_table, pte_t orig_pte, int write_access)
1110     {
1111     	struct page *page;
1112     	swp_entry_t entry = pte_to_swp_entry(orig_pte);
1113     	pte_t pte;
1114     	int ret = 1;
1115     
1116     	spin_unlock(&mm->page_table_lock);
1117     	page = lookup_swap_cache(entry);
1118     	if (!page) {
1119     		lock_kernel();
1120     		swapin_readahead(entry);
1121     		page = read_swap_cache_async(entry);
1122     		unlock_kernel();
1123     		if (!page) {
1124     			spin_lock(&mm->page_table_lock);
1125     			/*
1126     			 * Back out if somebody else faulted in this pte while
1127     			 * we released the page table lock.
1128     			 */
1129     			return pte_same(*page_table, orig_pte) ? -1 : 1;
1130     		}
1131     
1132     		/* Had to read the page from swap area: Major fault */
1133     		ret = 2;
1134     	}
1135     
1136     	/*
1137     	 * Freeze the "shared"ness of the page, ie page_count + swap_count.
1138     	 * Must lock page before transferring our swap count to already
1139     	 * obtained page count.
1140     	 */
1141     	lock_page(page);
1142     
1143     	/*
1144     	 * Back out if somebody else faulted in this pte while we
1145     	 * released the page table lock.
1146     	 */
1147     	spin_lock(&mm->page_table_lock);
1148     	if (!pte_same(*page_table, orig_pte)) {
1149     		UnlockPage(page);
1150     		page_cache_release(page);
1151     		return 1;
1152     	}
1153     		
1154     	/* The page isn't present yet, go ahead with the fault. */
1155     	mm->rss++;
1156     	pte = mk_pte(page, vma->vm_page_prot);
1157     
1158     	swap_free(entry);
1159     	mark_page_accessed(page);
1160     	if (exclusive_swap_page(page)) {
1161     		if (vma->vm_flags & VM_WRITE)
1162     			pte = pte_mkwrite(pte);
1163     		pte = pte_mkdirty(pte);
1164     		delete_from_swap_cache(page);
1165     	}
1166     	UnlockPage(page);
1167     
1168     	flush_page_to_ram(page);
1169     	flush_icache_page(vma, page);
1170     	set_pte(page_table, pte);
1171     
1172     	/* No need to invalidate - it was non-present before */
1173     	update_mmu_cache(vma, address, pte);
1174     	return ret;
1175     }
1176     
1177     /*
1178      * We are called with the MM semaphore and page_table_lock
1179      * spinlock held to protect against concurrent faults in
1180      * multithreaded programs. 
1181      */
1182     static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
1183     {
1184     	pte_t entry;
1185     
1186     	/* Read-only mapping of ZERO_PAGE. */
1187     	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
1188     
1189     	/* ..except if it's a write access */
1190     	if (write_access) {
1191     		struct page *page;
1192     
1193     		/* Allocate our own private page. */
1194     		spin_unlock(&mm->page_table_lock);
1195     
1196     		page = alloc_page(GFP_HIGHUSER);
1197     		if (!page)
1198     			goto no_mem;
1199     		clear_user_highpage(page, addr);
1200     
1201     		spin_lock(&mm->page_table_lock);
1202     		if (!pte_none(*page_table)) {
1203     			page_cache_release(page);
1204     			return 1;
1205     		}
1206     		mm->rss++;
1207     		flush_page_to_ram(page);
1208     		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
1209     	}
1210     
1211     	set_pte(page_table, entry);
1212     
1213     	/* No need to invalidate - it was non-present before */
1214     	update_mmu_cache(vma, addr, entry);
1215     	return 1;	/* Minor fault */
1216     
1217     no_mem:
1218     	spin_lock(&mm->page_table_lock);
1219     	return -1;
1220     }
1221     
1222     /*
1223      * do_no_page() tries to create a new page mapping. It aggressively
1224      * tries to share with existing pages, but makes a separate copy if
1225      * the "write_access" parameter is true in order to avoid the next
1226      * page fault.
1227      *
1228      * As this is called only for pages that do not currently exist, we
1229      * do not need to flush old virtual caches or the TLB.
1230      *
1231      * This is called with the MM semaphore held and the page table
1232      * spinlock held.
1233      */
1234     static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
1235     	unsigned long address, int write_access, pte_t *page_table)
1236     {
1237     	struct page * new_page;
1238     	pte_t entry;
1239     
1240     	if (!vma->vm_ops || !vma->vm_ops->nopage)
1241     		return do_anonymous_page(mm, vma, page_table, write_access, address);
1242     	spin_unlock(&mm->page_table_lock);
1243     
1244     	/*
1245     	 * The third argument is "no_share", which tells the low-level code
1246     	 * to copy, not share the page even if sharing is possible.  It's
1247     	 * essentially an early COW detection.
1248     	 */
1249     	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
1250     
1251     	spin_lock(&mm->page_table_lock);
1252     	if (new_page == NULL)	/* no page was available -- SIGBUS */
1253     		return 0;
1254     	if (new_page == NOPAGE_OOM)
1255     		return -1;
1256     	/*
1257     	 * This silly early PAGE_DIRTY setting removes a race
1258     	 * due to the bad i386 page protection. But it's valid
1259     	 * for other architectures too.
1260     	 *
1261     	 * Note that if write_access is true, we either now have
1262     	 * an exclusive copy of the page, or this is a shared mapping,
1263     	 * so we can make it writable and dirty to avoid having to
1264     	 * handle that later.
1265     	 */
1266     	/* Only go through if we didn't race with anybody else... */
1267     	if (pte_none(*page_table)) {
1268     		++mm->rss;
1269     		flush_page_to_ram(new_page);
1270     		flush_icache_page(vma, new_page);
1271     		entry = mk_pte(new_page, vma->vm_page_prot);
1272     		if (write_access) {
1273     			entry = pte_mkwrite(pte_mkdirty(entry));
1274     		} else if (page_count(new_page) > 1 &&
1275     			   !(vma->vm_flags & VM_SHARED))
1276     			entry = pte_wrprotect(entry);
1277     		set_pte(page_table, entry);
1278     	} else {
1279     		/* One of our sibling threads was faster, back out. */
1280     		page_cache_release(new_page);
1281     		return 1;
1282     	}
1283     
1284     	/* no need to invalidate: a not-present page shouldn't be cached */
1285     	update_mmu_cache(vma, address, entry);
1286     	return 2;	/* Major fault */
1287     }
1288     
1289     /*
1290      * These routines also need to handle stuff like marking pages dirty
1291      * and/or accessed for architectures that don't do it in hardware (most
1292      * RISC architectures).  The early dirtying is also good on the i386.
1293      *
1294      * There is also a hook called "update_mmu_cache()" that architectures
1295      * with external mmu caches can use to update those (ie the Sparc or
1296      * PowerPC hashed page tables that act as extended TLBs).
1297      *
1298      * Note the "page_table_lock". It is to protect against kswapd removing
1299      * pages from under us. Note that kswapd only ever _removes_ pages, never
1300      * adds them. As such, once we have noticed that the page is not present,
1301      * we can drop the lock early.
1302      *
1303      * The adding of pages is protected by the MM semaphore (which we hold),
1304      * so we don't need to worry about a page being suddenly been added into
1305      * our VM.
1306      */
1307     static inline int handle_pte_fault(struct mm_struct *mm,
1308     	struct vm_area_struct * vma, unsigned long address,
1309     	int write_access, pte_t * pte)
1310     {
1311     	pte_t entry;
1312     
1313     	entry = *pte;
1314     	if (!pte_present(entry)) {
1315     		/*
1316     		 * If it truly wasn't present, we know that kswapd
1317     		 * and the PTE updates will not touch it later. So
1318     		 * drop the lock.
1319     		 */
1320     		if (pte_none(entry))
1321     			return do_no_page(mm, vma, address, write_access, pte);
1322     		return do_swap_page(mm, vma, address, pte, entry, write_access);
1323     	}
1324     
1325     	if (write_access) {
1326     		if (!pte_write(entry))
1327     			return do_wp_page(mm, vma, address, pte, entry);
1328     
1329     		entry = pte_mkdirty(entry);
1330     	}
1331     	entry = pte_mkyoung(entry);
1332     	establish_pte(vma, address, pte, entry);
1333     	return 1;
1334     }
1335     
1336     /*
1337      * By the time we get here, we already hold the mm semaphore
1338      */
1339     int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
1340     	unsigned long address, int write_access)
1341     {
1342     	int ret = -1;
1343     	pgd_t *pgd;
1344     	pmd_t *pmd;
1345     
1346     	current->state = TASK_RUNNING;
1347     	pgd = pgd_offset(mm, address);
1348     
1349     	/*
1350     	 * We need the page table lock to synchronize with kswapd
1351     	 * and the SMP-safe atomic PTE updates.
1352     	 */
1353     	spin_lock(&mm->page_table_lock);
1354     	pmd = pmd_alloc(mm, pgd, address);
1355     
1356     	if (pmd) {
1357     		pte_t * pte = pte_alloc(mm, pmd, address);
1358     		if (pte)
1359     			ret = handle_pte_fault(mm, vma, address, write_access, pte);
1360     	}
1361     	spin_unlock(&mm->page_table_lock);
1362     	return ret;
1363     }
1364     
1365     /*
1366      * Allocate page middle directory.
1367      *
1368      * We've already handled the fast-path in-line, and we own the
1369      * page table lock.
1370      *
1371      * On a two-level page table, this ends up actually being entirely
1372      * optimized away.
1373      */
1374     pmd_t *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
1375     {
1376     	pmd_t *new;
1377     
1378     	/* "fast" allocation can happen without dropping the lock.. */
1379     	new = pmd_alloc_one_fast(mm, address);
1380     	if (!new) {
1381     		spin_unlock(&mm->page_table_lock);
1382     		new = pmd_alloc_one(mm, address);
1383     		spin_lock(&mm->page_table_lock);
1384     		if (!new)
1385     			return NULL;
1386     
1387     		/*
1388     		 * Because we dropped the lock, we should re-check the
1389     		 * entry, as somebody else could have populated it..
1390     		 */
1391     		if (!pgd_none(*pgd)) {
1392     			pmd_free(new);
1393     			goto out;
1394     		}
1395     	}
1396     	pgd_populate(mm, pgd, new);
1397     out:
1398     	return pmd_offset(pgd, address);
1399     }
1400     
1401     /*
1402      * Allocate the page table directory.
1403      *
1404      * We've already handled the fast-path in-line, and we own the
1405      * page table lock.
1406      */
1407     pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
1408     {
1409     	if (pmd_none(*pmd)) {
1410     		pte_t *new;
1411     
1412     		/* "fast" allocation can happen without dropping the lock.. */
1413     		new = pte_alloc_one_fast(mm, address);
1414     		if (!new) {
1415     			spin_unlock(&mm->page_table_lock);
1416     			new = pte_alloc_one(mm, address);
1417     			spin_lock(&mm->page_table_lock);
1418     			if (!new)
1419     				return NULL;
1420     
1421     			/*
1422     			 * Because we dropped the lock, we should re-check the
1423     			 * entry, as somebody else could have populated it..
1424     			 */
1425     			if (!pmd_none(*pmd)) {
1426     				pte_free(new);
1427     				goto out;
1428     			}
1429     		}
1430     		pmd_populate(mm, pmd, new);
1431     	}
1432     out:
1433     	return pte_offset(pmd, address);
1434     }
1435     
1436     /*
1437      * Simplistic page force-in..
1438      */
1439     int make_pages_present(unsigned long addr, unsigned long end)
1440     {
1441     	int write;
1442     	struct mm_struct *mm = current->mm;
1443     	struct vm_area_struct * vma;
1444     
1445     	vma = find_vma(mm, addr);
1446     	write = (vma->vm_flags & VM_WRITE) != 0;
1447     	if (addr >= end)
1448     		BUG();
1449     	do {
1450     		if (handle_mm_fault(mm, vma, addr, write) < 0)
1451     			return -1;
1452     		addr += PAGE_SIZE;
1453     	} while (addr < end);
1454     	return 0;
1455     }
1456