File: /usr/src/linux/arch/cris/mm/fault.c

1     /*
2      *  linux/arch/cris/mm/fault.c
3      *
4      *  Copyright (C) 2000, 2001  Axis Communications AB
5      *
6      *  Authors:  Bjorn Wesen 
7      * 
8      *  $Log: fault.c,v $
9      *  Revision 1.18  2001/07/18 22:14:32  bjornw
10      *  Enable interrupts in the bulk of do_page_fault
11      *
12      *  Revision 1.17  2001/07/18 13:07:23  bjornw
13      *  * Detect non-existant PTE's in vmalloc pmd synchronization
14      *  * Remove comment about fast-paths for VMALLOC_START etc, because all that
15      *    was totally bogus anyway it turned out :)
16      *  * Fix detection of vmalloc-area synchronization
17      *  * Add some comments
18      *
19      *  Revision 1.16  2001/06/13 00:06:08  bjornw
20      *  current_pgd should be volatile
21      *
22      *  Revision 1.15  2001/06/13 00:02:23  bjornw
23      *  Use a separate variable to store the current pgd to avoid races in schedule
24      *
25      *  Revision 1.14  2001/05/16 17:41:07  hp
26      *  Last comment tweak further tweaked.
27      *
28      *  Revision 1.13  2001/05/15 00:58:44  hp
29      *  Expand a bit on the comment why we compare address >= TASK_SIZE rather
30      *  than >= VMALLOC_START.
31      *
32      *  Revision 1.12  2001/04/04 10:51:14  bjornw
33      *  mmap_sem is grabbed for reading
34      *
35      *  Revision 1.11  2001/03/23 07:36:07  starvik
36      *  Corrected according to review remarks
37      *
38      *  Revision 1.10  2001/03/21 16:10:11  bjornw
39      *  CRIS_FRAME_FIXUP not needed anymore, use FRAME_NORMAL
40      *
41      *  Revision 1.9  2001/03/05 13:22:20  bjornw
42      *  Spell-fix and fix in vmalloc_fault handling
43      *
44      *  Revision 1.8  2000/11/22 14:45:31  bjornw
45      *  * 2.4.0-test10 removed the set_pgdir instantaneous kernel global mapping
46      *    into all processes. Instead we fill in the missing PTE entries on demand.
47      *
48      *  Revision 1.7  2000/11/21 16:39:09  bjornw
49      *  fixup switches frametype
50      *
51      *  Revision 1.6  2000/11/17 16:54:08  bjornw
52      *  More detailed siginfo reporting
53      *
54      *
55      */
56     
57     #include <linux/signal.h>
58     #include <linux/sched.h>
59     #include <linux/kernel.h>
60     #include <linux/errno.h>
61     #include <linux/string.h>
62     #include <linux/types.h>
63     #include <linux/ptrace.h>
64     #include <linux/mman.h>
65     #include <linux/mm.h>
66     #include <linux/interrupt.h>
67     
68     #include <asm/system.h>
69     #include <asm/segment.h>
70     #include <asm/pgtable.h>
71     #include <asm/uaccess.h>
72     #include <asm/svinto.h>
73     
74     extern void die_if_kernel(const char *,struct pt_regs *,long);
75     
76     asmlinkage void do_invalid_op (struct pt_regs *, unsigned long);
77     asmlinkage void do_page_fault(unsigned long address, struct pt_regs *regs,
78     			      int error_code);
79     
80     /* debug of low-level TLB reload */
81     #define D(x)
82     /* debug of higher-level faults */
83     #define DPG(x)
84     
85     /* current active page directory */
86     
87     volatile pgd_t *current_pgd;
88     
89     /* fast TLB-fill fault handler
90      * this is called from entry.S with interrupts disabled
91      */
92     
93     void
94     handle_mmu_bus_fault(struct pt_regs *regs)
95     {
96     	int cause, select;
97     	int index;
98     	int page_id;
99     	int miss, we, acc, inv;  
100     	pmd_t *pmd;
101     	pte_t pte;
102     	int errcode;
103     	unsigned long address;
104     
105     	cause = *R_MMU_CAUSE;
106     	select = *R_TLB_SELECT;
107     
108     	address = cause & PAGE_MASK; /* get faulting address */
109     	
110     	D(page_id = IO_EXTRACT(R_MMU_CAUSE,  page_id,   cause));
111     	D(acc     = IO_EXTRACT(R_MMU_CAUSE,  acc_excp,  cause));
112     	D(inv     = IO_EXTRACT(R_MMU_CAUSE,  inv_excp,  cause));  
113     	D(index  =  IO_EXTRACT(R_TLB_SELECT, index,     select));
114     	miss    = IO_EXTRACT(R_MMU_CAUSE,  miss_excp, cause);
115     	we      = IO_EXTRACT(R_MMU_CAUSE,  we_excp,   cause);
116     	
117     	/* Note: the reason we don't set errcode's r/w flag here
118     	 * using the 'we' flag, is because the latter is only given
119     	 * if there is a write-protection exception, not given as a
120     	 * general r/w access mode flag. It is currently not possible
121     	 * to get this from the MMU (TODO: check if this is the case
122     	 * for LXv2).
123     	 * 
124     	 * The page-fault code won't care, but there will be two page-
125     	 * faults instead of one for the case of a write to a non-tabled
126     	 * page (miss, then write-protection).
127     	 */
128     
129     	errcode = 0;
130     
131     	D(printk("bus_fault from IRP 0x%x: addr 0x%x, miss %d, inv %d, we %d, acc %d, "
132     		 "idx %d pid %d\n",
133     		 regs->irp, address, miss, inv, we, acc, index, page_id));
134     
135     	/* for a miss, we need to reload the TLB entry */
136     
137     	if(miss) {
138     
139     		/* see if the pte exists at all
140     		 * refer through current_pgd, dont use mm->pgd
141     		 */
142     		
143     		pmd = (pmd_t *)(current_pgd + pgd_index(address));
144     		if(pmd_none(*pmd))
145     			goto dofault;
146     		if(pmd_bad(*pmd)) {
147     			printk("bad pgdir entry 0x%x at 0x%x\n", *pmd, pmd);
148     			pmd_clear(pmd);
149     			return;
150     		}
151     		pte = *pte_offset(pmd, address);
152     		if(!pte_present(pte))
153     			goto dofault;
154     		
155     		D(printk(" found pte %x pg %x ", pte_val(pte), pte_page(pte)));
156     		D(
157     		  {
158     			  if(pte_val(pte) & _PAGE_SILENT_WRITE)
159     				  printk("Silent-W ");
160     			  if(pte_val(pte) & _PAGE_KERNEL)
161     				  printk("Kernel ");
162     			  if(pte_val(pte) & _PAGE_SILENT_READ)
163     				  printk("Silent-R ");
164     			  if(pte_val(pte) & _PAGE_GLOBAL)
165     				  printk("Global ");
166     			  if(pte_val(pte) & _PAGE_PRESENT)
167     				  printk("Present ");
168     			  if(pte_val(pte) & _PAGE_ACCESSED)
169     				  printk("Accessed ");
170     			  if(pte_val(pte) & _PAGE_MODIFIED)
171     				  printk("Modified ");
172     			  if(pte_val(pte) & _PAGE_READ)
173     				  printk("Readable ");
174     			  if(pte_val(pte) & _PAGE_WRITE)
175     				  printk("Writeable ");
176     			  printk("\n");
177     		  });
178     
179     		/* load up the chosen TLB entry
180     		 * this assumes the pte format is the same as the TLB_LO layout.
181     		 *
182     		 * the write to R_TLB_LO also writes the vpn and page_id fields from
183     		 * R_MMU_CAUSE, which we in this case obviously want to keep
184     		 */
185     
186     		*R_TLB_LO = pte_val(pte);
187     
188     		return;
189     	} 
190     
191     	errcode = 1 | (we << 1);
192     	
193      dofault:
194     	/* leave it to the MM system fault handler below */
195     	D(printk("do_page_fault %p errcode %d\n", address, errcode));
196     	do_page_fault(address, regs, errcode);
197     }
198     
199     /*
200      * This routine handles page faults.  It determines the address,
201      * and the problem, and then passes it off to one of the appropriate
202      * routines.
203      *
204      * Notice that the address we're given is aligned to the page the fault
205      * occurred in, since we only get the PFN in R_MMU_CAUSE not the complete
206      * address.
207      *
208      * error_code:
209      *	bit 0 == 0 means no page found, 1 means protection fault
210      *	bit 1 == 0 means read, 1 means write
211      *
212      * If this routine detects a bad access, it returns 1, otherwise it
213      * returns 0.
214      */
215     
216     asmlinkage void
217     do_page_fault(unsigned long address, struct pt_regs *regs,
218     	      int error_code)
219     {
220     	struct task_struct *tsk;
221     	struct mm_struct *mm;
222     	struct vm_area_struct * vma;
223     	int writeaccess;
224     	int fault;
225     	unsigned long fixup;
226     	siginfo_t info;
227     
228     	tsk = current;
229     
230             /*
231              * We fault-in kernel-space virtual memory on-demand. The
232              * 'reference' page table is init_mm.pgd.
233              *
234              * NOTE! We MUST NOT take any locks for this case. We may
235              * be in an interrupt or a critical region, and should
236              * only copy the information from the master page table,
237              * nothing more.
238     	 *
239     	 * NOTE2: This is done so that, when updating the vmalloc
240     	 * mappings we don't have to walk all processes pgdirs and
241     	 * add the high mappings all at once. Instead we do it as they
242     	 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
243     	 * bit set so sometimes the TLB can use a lingering entry.
244     	 *
245     	 * This verifies that the fault happens in kernel space
246              * and that the fault was not a protection error (error_code & 1).
247              */
248     
249             if (address >= VMALLOC_START &&
250     	    !(error_code & 1) &&
251     	    !user_mode(regs))
252                     goto vmalloc_fault;
253     
254     	/* we can and should enable interrupts at this point */
255     	sti();
256     
257     	mm = tsk->mm;
258     	writeaccess = error_code & 2;
259     	info.si_code = SEGV_MAPERR;
260     
261     	/*
262     	 * If we're in an interrupt or have no user
263     	 * context, we must not take the fault..
264     	 */
265     
266     	if (in_interrupt() || !mm)
267     		goto no_context;
268     
269     	down_read(&mm->mmap_sem);
270     	vma = find_vma(mm, address);
271     	if (!vma)
272     		goto bad_area;
273     	if (vma->vm_start <= address)
274     		goto good_area;
275     	if (!(vma->vm_flags & VM_GROWSDOWN))
276     		goto bad_area;
277     	if (user_mode(regs)) {
278     		/*
279     		 * accessing the stack below usp is always a bug.
280     		 * we get page-aligned addresses so we can only check
281     		 * if we're within a page from usp, but that might be
282     		 * enough to catch brutal errors at least.
283     		 */
284     		if (address + PAGE_SIZE < rdusp())
285     			goto bad_area;
286     	}
287     	if (expand_stack(vma, address))
288     		goto bad_area;
289     
290     	/*
291     	 * Ok, we have a good vm_area for this memory access, so
292     	 * we can handle it..
293     	 */
294     
295      good_area:
296     	info.si_code = SEGV_ACCERR;
297     
298     	/* first do some preliminary protection checks */
299     
300     	if (writeaccess) {
301     		if (!(vma->vm_flags & VM_WRITE))
302     			goto bad_area;
303     	} else {
304     		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
305     			goto bad_area;
306     	}
307     
308     	/*
309     	 * If for any reason at all we couldn't handle the fault,
310     	 * make sure we exit gracefully rather than endlessly redo
311     	 * the fault.
312     	 */
313     
314     	switch (handle_mm_fault(mm, vma, address, writeaccess)) {
315             case 1:
316                     tsk->min_flt++;
317                     break;
318             case 2:
319                     tsk->maj_flt++;
320                     break;
321             case 0:
322                     goto do_sigbus;
323             default:
324                     goto out_of_memory;
325     	}
326     
327     	up_read(&mm->mmap_sem);
328     	return;
329     	
330     	/*
331     	 * Something tried to access memory that isn't in our memory map..
332     	 * Fix it, but check if it's kernel or user first..
333     	 */
334     
335      bad_area:
336     
337     	up_read(&mm->mmap_sem);
338     
339      bad_area_nosemaphore:
340     	DPG(show_registers(regs));
341     
342     	/* User mode accesses just cause a SIGSEGV */
343     
344     	if (user_mode(regs)) {
345     		info.si_signo = SIGSEGV;
346     		info.si_errno = 0;
347     		/* info.si_code has been set above */
348     		info.si_addr = (void *)address;
349     		force_sig_info(SIGSEGV, &info, tsk);
350     		return;
351     	}
352     
353      no_context:
354     
355     	/* Are we prepared to handle this kernel fault?
356     	 *
357     	 * (The kernel has valid exception-points in the source 
358     	 *  when it acesses user-memory. When it fails in one
359     	 *  of those points, we find it in a table and do a jump
360     	 *  to some fixup code that loads an appropriate error
361     	 *  code)
362     	 */
363     
364             if ((fixup = search_exception_table(regs->irp)) != 0) {
365     		/* Adjust the instruction pointer in the stackframe */
366     
367                     regs->irp = fixup;
368     
369     		/* We do not want to return by restoring the CPU-state
370     		 * anymore, so switch frame-types (see ptrace.h)
371     		 */
372     
373     		regs->frametype = CRIS_FRAME_NORMAL;
374     
375     		D(printk("doing fixup to 0x%x\n", fixup));
376                     return;
377             }
378     
379     	/*
380     	 * Oops. The kernel tried to access some bad page. We'll have to
381     	 * terminate things with extreme prejudice.
382     	 */
383     
384     	if ((unsigned long) (address) < PAGE_SIZE)
385     		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
386     	else
387     		printk(KERN_ALERT "Unable to handle kernel access");
388     	printk(" at virtual address %08lx\n",address);
389     
390     	die_if_kernel("Oops", regs, error_code);
391     
392     	do_exit(SIGKILL);
393     
394     	/*
395     	 * We ran out of memory, or some other thing happened to us that made
396     	 * us unable to handle the page fault gracefully.
397     	 */
398     
399      out_of_memory:
400             up_read(&mm->mmap_sem);
401     	printk("VM: killing process %s\n", tsk->comm);
402     	if(user_mode(regs))
403     		do_exit(SIGKILL);
404     	goto no_context;
405     
406      do_sigbus:
407     	up_read(&mm->mmap_sem);
408     
409     	/*
410              * Send a sigbus, regardless of whether we were in kernel
411              * or user mode.
412              */
413     	info.si_code = SIGBUS;
414     	info.si_errno = 0;
415     	info.si_code = BUS_ADRERR;
416     	info.si_addr = (void *)address;
417     	force_sig_info(SIGBUS, &info, tsk);
418     	
419             /* Kernel mode? Handle exceptions or die */
420             if (!user_mode(regs))
421                     goto no_context;
422             return;
423     
424     vmalloc_fault:
425             {
426                     /*
427                      * Synchronize this task's top level page-table
428                      * with the 'reference' page table.
429     		 *
430     		 * Use current_pgd instead of tsk->active_mm->pgd
431     		 * since the latter might be unavailable if this
432     		 * code is executed in a misfortunately run irq
433     		 * (like inside schedule() between switch_mm and
434     		 *  switch_to...).
435                      */
436     
437                     int offset = pgd_index(address);
438                     pgd_t *pgd, *pgd_k;
439                     pmd_t *pmd, *pmd_k;
440     		pte_t *pte_k;
441     
442                     pgd = current_pgd + offset;
443                     pgd_k = init_mm.pgd + offset;
444     
445     		/* Since we're two-level, we don't need to do both
446     		 * set_pgd and set_pmd (they do the same thing). If
447     		 * we go three-level at some point, do the right thing
448     		 * with pgd_present and set_pgd here. 
449     		 * 
450     		 * Also, since the vmalloc area is global, we don't
451     		 * need to copy individual PTE's, it is enough to
452     		 * copy the pgd pointer into the pte page of the
453     		 * root task. If that is there, we'll find our pte if
454     		 * it exists.
455     		 */
456     
457                     pmd = pmd_offset(pgd, address);
458                     pmd_k = pmd_offset(pgd_k, address);
459     
460                     if (!pmd_present(*pmd_k))
461                             goto bad_area_nosemaphore;
462     
463                     set_pmd(pmd, *pmd_k);
464     
465     		/* Make sure the actual PTE exists as well to
466     		 * catch kernel vmalloc-area accesses to non-mapped
467     		 * addresses. If we don't do this, this will just
468     		 * silently loop forever.
469     		 */
470     
471                     pte_k = pte_offset(pmd_k, address);
472                     if (!pte_present(*pte_k))
473                             goto no_context;
474     
475                     return;
476             }
477     }
478