File: /usr/src/linux/arch/s390x/mm/fault.c

1     /*
2      *  arch/s390/mm/fault.c
3      *
4      *  S390 version
5      *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
6      *    Author(s): Hartmut Penner (hp@de.ibm.com)
7      *
8      *  Derived from "arch/i386/mm/fault.c"
9      *    Copyright (C) 1995  Linus Torvalds
10      */
11     
12     #include <linux/config.h>
13     #include <linux/signal.h>
14     #include <linux/sched.h>
15     #include <linux/kernel.h>
16     #include <linux/errno.h>
17     #include <linux/string.h>
18     #include <linux/types.h>
19     #include <linux/ptrace.h>
20     #include <linux/mman.h>
21     #include <linux/mm.h>
22     #include <linux/smp.h>
23     #include <linux/smp_lock.h>
24     #include <linux/init.h>
25     
26     #include <asm/system.h>
27     #include <asm/uaccess.h>
28     #include <asm/pgtable.h>
29     #include <asm/hardirq.h>
30     
31     #ifdef CONFIG_SYSCTL
32     extern int sysctl_userprocess_debug;
33     #endif
34     
35     extern void die(const char *,struct pt_regs *,long);
36     
37     extern spinlock_t timerlist_lock;
38     
39     /*
40      * Unlock any spinlocks which will prevent us from getting the
41      * message out
42      */
43     void bust_spinlocks(int yes)
44     {
45             spin_lock_init(&timerlist_lock);
46             if (yes) {
47                     oops_in_progress = 1;
48     #ifdef CONFIG_SMP
49                     atomic_set(&global_irq_lock,0);
50     #endif
51             } else {
52                     int loglevel_save = console_loglevel;
53                     oops_in_progress = 0;
54                     /*
55                      * OK, the message is on the console.  Now we call printk()
56                      * without oops_in_progress set so that printk will give klogd
57                      * a poke.  Hold onto your hats...
58                      */
59                     console_loglevel = 15;          /* NMI oopser may have shut the console up */
60                     printk(" ");
61                     console_loglevel = loglevel_save;
62             }
63     }
64     
65     /*
66      * This routine handles page faults.  It determines the address,
67      * and the problem, and then passes it off to one of the appropriate
68      * routines.
69      *
70      * error_code:
71      *             ****0004       Protection           ->  Write-Protection  (suprression)
72      *             ****0010       Segment translation  ->  Not present       (nullification)
73      *             ****0011       Page translation     ->  Not present       (nullification)
74      *             ****003B       Region third exception ->  Not present       (nullification)
75      */
76     asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
77     {
78             struct task_struct *tsk;
79             struct mm_struct *mm;
80             struct vm_area_struct * vma;
81             unsigned long address;
82             unsigned long fixup;
83             int write;
84     	int si_code = SEGV_MAPERR;
85     	int kernel_address = 0;
86     
87             /* 
88              * get the failing address 
89              * more specific the segment and page table portion of 
90              * the address 
91              */
92     
93             address = S390_lowcore.trans_exc_code&-4096L;
94     
95             tsk = current;
96             mm = tsk->mm;
97     
98             if (in_interrupt() || !mm)
99                     goto no_context;
100     
101     	/*
102     	 * Check which address space the address belongs to
103     	 */
104     	switch (S390_lowcore.trans_exc_code & 3)
105     	{
106     	case 0: /* Primary Segment Table Descriptor */
107     		kernel_address = 1;
108     		goto no_context;
109     
110     	case 1: /* STD determined via access register */
111     		if (S390_lowcore.exc_access_id == 0)
112     		{
113     			kernel_address = 1;
114     			goto no_context;
115     		}
116     		if (regs && S390_lowcore.exc_access_id < NUM_ACRS)
117     		{
118     			if (regs->acrs[S390_lowcore.exc_access_id] == 0)
119     			{
120     				kernel_address = 1;
121     				goto no_context;
122     			}
123     			if (regs->acrs[S390_lowcore.exc_access_id] == 1)
124     			{
125     				/* user space address */
126     				break;
127     			}
128     		}
129     		die("page fault via unknown access register", regs, error_code);
130     		break;
131     
132     	case 2: /* Secondary Segment Table Descriptor */
133     	case 3: /* Home Segment Table Descriptor */
134     		/* user space address */
135     		break;
136     	}
137     
138     
139     	/*
140     	 * When we get here, the fault happened in the current
141     	 * task's user address space, so we search the VMAs
142     	 */
143     
144             down_read(&mm->mmap_sem);
145     
146             vma = find_vma(mm, address);
147             if (!vma) {
148     	        printk("no vma for address %lX\n",address);
149                     goto bad_area;
150             }
151             if (vma->vm_start <= address) 
152                     goto good_area;
153             if (!(vma->vm_flags & VM_GROWSDOWN))
154                     goto bad_area;
155             if (expand_stack(vma, address))
156                     goto bad_area;
157     /*
158      * Ok, we have a good vm_area for this memory access, so
159      * we can handle it..
160      */
161     good_area:
162             write = 0;
163     	si_code = SEGV_ACCERR;
164     
165             switch (error_code & 0xFF) {
166                     case 0x04:                                /* write, present*/
167                             write = 1;
168                             break;
169                     case 0x10:                                   /* not present*/
170                     case 0x11:                                   /* not present*/
171                     case 0x3B:                                   /* not present*/
172                             if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
173                                     goto bad_area;
174                             break;
175                     default:
176                            printk("code should be 4, 10 or 11 (%lX) \n",error_code&0xFF);  
177                            goto bad_area;
178             }
179     
180     	/*
181     	 * If for any reason at all we couldn't handle the fault,
182     	 * make sure we exit gracefully rather than endlessly redo
183     	 * the fault.
184     	 */
185     	switch (handle_mm_fault(mm, vma, address, write)) {
186     	case 1:
187     		tsk->min_flt++;
188     		break;
189     	case 2:
190     		tsk->maj_flt++;
191     		break;
192     	case 0:
193     		goto do_sigbus;
194     	default:
195     		goto out_of_memory;
196     	}
197     
198             up_read(&mm->mmap_sem);
199             return;
200     
201     /*
202      * Something tried to access memory that isn't in our memory map..
203      * Fix it, but check if it's kernel or user first..
204      */
205     bad_area:
206             up_read(&mm->mmap_sem);
207     
208             /* User mode accesses just cause a SIGSEGV */
209             if (regs->psw.mask & PSW_PROBLEM_STATE) {
210     		struct siginfo si;
211                     tsk->thread.prot_addr = address;
212                     tsk->thread.trap_no = error_code;
213     #ifndef CONFIG_SYSCTL
214     #ifdef CONFIG_PROCESS_DEBUG
215                     printk("User process fault: interruption code 0x%lX\n",error_code);
216                     printk("failing address: %lX\n",address);
217     		show_regs(regs);
218     #endif
219     #else
220     		if (sysctl_userprocess_debug) {
221     			printk("User process fault: interruption code 0x%lX\n",
222     			       error_code);
223     			printk("failing address: %lX\n", address);
224     			show_regs(regs);
225     		}
226     #endif
227     		si.si_signo = SIGSEGV;
228     		si.si_code = si_code;
229     		si.si_addr = (void*) address;
230     		force_sig_info(SIGSEGV, &si, tsk);
231                     return;
232     	}
233     
234     no_context:
235             /* Are we prepared to handle this kernel fault?  */
236             if ((fixup = search_exception_table(regs->psw.addr)) != 0) {
237                     regs->psw.addr = fixup;
238                     return;
239             }
240     
241     /*
242      * Oops. The kernel tried to access some bad page. We'll have to
243      * terminate things with extreme prejudice.
244      */
245             if (kernel_address)
246                     printk(KERN_ALERT "Unable to handle kernel pointer dereference"
247             	       " at virtual kernel address %016lx\n", address);
248             else
249                     printk(KERN_ALERT "Unable to handle kernel paging request"
250     		       " at virtual user address %016lx\n", address);
251     
252     /*
253      * need to define, which information is useful here
254      */
255     
256             die("Oops", regs, error_code);
257             do_exit(SIGKILL);
258     
259     
260     /*
261      * We ran out of memory, or some other thing happened to us that made
262      * us unable to handle the page fault gracefully.
263     */
264     out_of_memory:
265     	up_read(&mm->mmap_sem);
266     	printk("VM: killing process %s\n", tsk->comm);
267     	if (regs->psw.mask & PSW_PROBLEM_STATE)
268     		do_exit(SIGKILL);
269     	goto no_context;
270     
271     do_sigbus:
272     	up_read(&mm->mmap_sem);
273     
274     	/*
275     	 * Send a sigbus, regardless of whether we were in kernel
276     	 * or user mode.
277     	 */
278             tsk->thread.prot_addr = address;
279             tsk->thread.trap_no = error_code;
280     	force_sig(SIGBUS, tsk);
281     
282     	/* Kernel mode? Handle exceptions or die */
283     	if (!(regs->psw.mask & PSW_PROBLEM_STATE))
284     		goto no_context;
285     }
286     
287     #ifdef CONFIG_PFAULT
288     /*
289      * 'pfault' pseudo page faults routines.
290      */
291     static int pfault_disable = 0;
292     
293     static int __init nopfault(char *str)
294     {
295     	pfault_disable = 1;
296     	return 1;
297     }
298     
299     __setup("nopfault", nopfault);
300     
301     typedef struct {
302     	__u16 refdiagc;
303     	__u16 reffcode;
304     	__u16 refdwlen;
305     	__u16 refversn;
306     	__u64 refgaddr;
307     	__u64 refselmk;
308     	__u64 refcmpmk;
309     	__u64 reserved;
310     } __attribute__ ((packed)) pfault_refbk_t;
311     
312     typedef struct _pseudo_wait_t {
313            struct _pseudo_wait_t *next;
314            wait_queue_head_t queue;
315            unsigned long address;
316            int resolved;
317     } pseudo_wait_t;
318     
319     static pseudo_wait_t *pseudo_lock_queue = NULL;
320     static spinlock_t pseudo_wait_spinlock; /* spinlock to protect lock queue */
321     
322     int pfault_init(void)
323     {
324     	pfault_refbk_t refbk =
325     	{ 0x258, 0, 5, 2, __LC_KERNEL_STACK, 1ULL << 48, 1ULL << 48, 0ULL };
326             int rc;
327     
328     	if (pfault_disable)
329     		return -1;
330             __asm__ __volatile__(
331                     "    diag  %1,%0,0x258\n"
332     		"0:  j     2f\n"
333     		"1:  la    %0,8\n"
334     		"2:\n"
335     		".section __ex_table,\"a\"\n"
336     		"   .align 4\n"
337     		"   .quad  0b,1b\n"
338     		".previous"
339                     : "=d" (rc) : "a" (&refbk) : "cc" );
340     	__ctl_set_bit(0, 9);
341             return rc;
342     }
343     
344     void pfault_fini(void)
345     {
346     	pfault_refbk_t refbk =
347     	{ 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };
348     
349     	if (pfault_disable)
350     		return;
351     	__ctl_clear_bit(0, 9);
352             __asm__ __volatile__(
353                     "    diag  %0,0,0x258\n"
354     		"0:\n"
355     		".section __ex_table,\"a\"\n"
356     		"   .align 4\n"
357     		"   .quad  0b,0b\n"
358     		".previous"
359     		: : "a" (&refbk) : "cc" );
360     }
361     
362     asmlinkage void
363     pfault_interrupt(struct pt_regs *regs, __u16 error_code)
364     {
365             DECLARE_WAITQUEUE(wait, current);
366     	struct task_struct *tsk;
367     	wait_queue_head_t queue;
368     	wait_queue_head_t *qp;
369     	__u16 subcode;
370     
371     	/*
372     	 * Get the external interruption subcode & pfault
373     	 * initial/completion signal bit. VM stores this 
374     	 * in the 'cpu address' field associated with the
375              * external interrupt. 
376     	 */
377     	subcode = S390_lowcore.cpu_addr;
378     	if ((subcode & 0xff00) != 0x06)
379     		return;
380     
381     	/*
382     	 * Get the token (= address of kernel stack of affected task).
383     	 */
384     	tsk = (struct task_struct *)
385     		(*((unsigned long *) __LC_PFAULT_INTPARM) - THREAD_SIZE);
386     
387     	if (subcode & 0x0080) {
388     		/* signal bit is set -> a page has been swapped in by VM */
389     		qp = (wait_queue_head_t *)
390     			xchg(&tsk->thread.pfault_wait, -1);
391     		if (qp != NULL) {
392     			/* Initial interrupt was faster than the completion
393     			 * interrupt. pfault_wait is valid. Set pfault_wait
394     			 * back to zero and wake up the process. This can
395     			 * safely be done because the task is still sleeping
396     			 * and can't procude new pfaults. */
397     			tsk->thread.pfault_wait = 0ULL;
398     			wake_up(qp);
399     		}
400     	} else {
401     		/* signal bit not set -> a real page is missing. */
402                     init_waitqueue_head (&queue);
403     		qp = (wait_queue_head_t *)
404     			xchg(&tsk->thread.pfault_wait, (addr_t) &queue);
405     		if (qp != NULL) {
406     			/* Completion interrupt was faster than the initial
407     			 * interrupt (swapped in a -1 for pfault_wait). Set
408     			 * pfault_wait back to zero and exit. This can be
409     			 * done safely because tsk is running in kernel 
410     			 * mode and can't produce new pfaults. */
411     			tsk->thread.pfault_wait = 0ULL;
412     		}
413     
414                     /* go to sleep */
415                     wait_event(queue, tsk->thread.pfault_wait == 0ULL);
416     	}
417     }
418     #endif
419     
420