File: /usr/src/linux/arch/s390x/mm/fault.c
1 /*
2 * arch/s390/mm/fault.c
3 *
4 * S390 version
5 * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
6 * Author(s): Hartmut Penner (hp@de.ibm.com)
7 *
8 * Derived from "arch/i386/mm/fault.c"
9 * Copyright (C) 1995 Linus Torvalds
10 */
11
12 #include <linux/config.h>
13 #include <linux/signal.h>
14 #include <linux/sched.h>
15 #include <linux/kernel.h>
16 #include <linux/errno.h>
17 #include <linux/string.h>
18 #include <linux/types.h>
19 #include <linux/ptrace.h>
20 #include <linux/mman.h>
21 #include <linux/mm.h>
22 #include <linux/smp.h>
23 #include <linux/smp_lock.h>
24 #include <linux/init.h>
25
26 #include <asm/system.h>
27 #include <asm/uaccess.h>
28 #include <asm/pgtable.h>
29 #include <asm/hardirq.h>
30
31 #ifdef CONFIG_SYSCTL
32 extern int sysctl_userprocess_debug;
33 #endif
34
35 extern void die(const char *,struct pt_regs *,long);
36
37 extern spinlock_t timerlist_lock;
38
39 /*
40 * Unlock any spinlocks which will prevent us from getting the
41 * message out
42 */
43 void bust_spinlocks(int yes)
44 {
45 spin_lock_init(&timerlist_lock);
46 if (yes) {
47 oops_in_progress = 1;
48 #ifdef CONFIG_SMP
49 atomic_set(&global_irq_lock,0);
50 #endif
51 } else {
52 int loglevel_save = console_loglevel;
53 oops_in_progress = 0;
54 /*
55 * OK, the message is on the console. Now we call printk()
56 * without oops_in_progress set so that printk will give klogd
57 * a poke. Hold onto your hats...
58 */
59 console_loglevel = 15; /* NMI oopser may have shut the console up */
60 printk(" ");
61 console_loglevel = loglevel_save;
62 }
63 }
64
65 /*
66 * This routine handles page faults. It determines the address,
67 * and the problem, and then passes it off to one of the appropriate
68 * routines.
69 *
70 * error_code:
71 * ****0004 Protection -> Write-Protection (suprression)
72 * ****0010 Segment translation -> Not present (nullification)
73 * ****0011 Page translation -> Not present (nullification)
74 * ****003B Region third exception -> Not present (nullification)
75 */
76 asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
77 {
78 struct task_struct *tsk;
79 struct mm_struct *mm;
80 struct vm_area_struct * vma;
81 unsigned long address;
82 unsigned long fixup;
83 int write;
84 int si_code = SEGV_MAPERR;
85 int kernel_address = 0;
86
87 /*
88 * get the failing address
89 * more specific the segment and page table portion of
90 * the address
91 */
92
93 address = S390_lowcore.trans_exc_code&-4096L;
94
95 tsk = current;
96 mm = tsk->mm;
97
98 if (in_interrupt() || !mm)
99 goto no_context;
100
101 /*
102 * Check which address space the address belongs to
103 */
104 switch (S390_lowcore.trans_exc_code & 3)
105 {
106 case 0: /* Primary Segment Table Descriptor */
107 kernel_address = 1;
108 goto no_context;
109
110 case 1: /* STD determined via access register */
111 if (S390_lowcore.exc_access_id == 0)
112 {
113 kernel_address = 1;
114 goto no_context;
115 }
116 if (regs && S390_lowcore.exc_access_id < NUM_ACRS)
117 {
118 if (regs->acrs[S390_lowcore.exc_access_id] == 0)
119 {
120 kernel_address = 1;
121 goto no_context;
122 }
123 if (regs->acrs[S390_lowcore.exc_access_id] == 1)
124 {
125 /* user space address */
126 break;
127 }
128 }
129 die("page fault via unknown access register", regs, error_code);
130 break;
131
132 case 2: /* Secondary Segment Table Descriptor */
133 case 3: /* Home Segment Table Descriptor */
134 /* user space address */
135 break;
136 }
137
138
139 /*
140 * When we get here, the fault happened in the current
141 * task's user address space, so we search the VMAs
142 */
143
144 down_read(&mm->mmap_sem);
145
146 vma = find_vma(mm, address);
147 if (!vma) {
148 printk("no vma for address %lX\n",address);
149 goto bad_area;
150 }
151 if (vma->vm_start <= address)
152 goto good_area;
153 if (!(vma->vm_flags & VM_GROWSDOWN))
154 goto bad_area;
155 if (expand_stack(vma, address))
156 goto bad_area;
157 /*
158 * Ok, we have a good vm_area for this memory access, so
159 * we can handle it..
160 */
161 good_area:
162 write = 0;
163 si_code = SEGV_ACCERR;
164
165 switch (error_code & 0xFF) {
166 case 0x04: /* write, present*/
167 write = 1;
168 break;
169 case 0x10: /* not present*/
170 case 0x11: /* not present*/
171 case 0x3B: /* not present*/
172 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
173 goto bad_area;
174 break;
175 default:
176 printk("code should be 4, 10 or 11 (%lX) \n",error_code&0xFF);
177 goto bad_area;
178 }
179
180 /*
181 * If for any reason at all we couldn't handle the fault,
182 * make sure we exit gracefully rather than endlessly redo
183 * the fault.
184 */
185 switch (handle_mm_fault(mm, vma, address, write)) {
186 case 1:
187 tsk->min_flt++;
188 break;
189 case 2:
190 tsk->maj_flt++;
191 break;
192 case 0:
193 goto do_sigbus;
194 default:
195 goto out_of_memory;
196 }
197
198 up_read(&mm->mmap_sem);
199 return;
200
201 /*
202 * Something tried to access memory that isn't in our memory map..
203 * Fix it, but check if it's kernel or user first..
204 */
205 bad_area:
206 up_read(&mm->mmap_sem);
207
208 /* User mode accesses just cause a SIGSEGV */
209 if (regs->psw.mask & PSW_PROBLEM_STATE) {
210 struct siginfo si;
211 tsk->thread.prot_addr = address;
212 tsk->thread.trap_no = error_code;
213 #ifndef CONFIG_SYSCTL
214 #ifdef CONFIG_PROCESS_DEBUG
215 printk("User process fault: interruption code 0x%lX\n",error_code);
216 printk("failing address: %lX\n",address);
217 show_regs(regs);
218 #endif
219 #else
220 if (sysctl_userprocess_debug) {
221 printk("User process fault: interruption code 0x%lX\n",
222 error_code);
223 printk("failing address: %lX\n", address);
224 show_regs(regs);
225 }
226 #endif
227 si.si_signo = SIGSEGV;
228 si.si_code = si_code;
229 si.si_addr = (void*) address;
230 force_sig_info(SIGSEGV, &si, tsk);
231 return;
232 }
233
234 no_context:
235 /* Are we prepared to handle this kernel fault? */
236 if ((fixup = search_exception_table(regs->psw.addr)) != 0) {
237 regs->psw.addr = fixup;
238 return;
239 }
240
241 /*
242 * Oops. The kernel tried to access some bad page. We'll have to
243 * terminate things with extreme prejudice.
244 */
245 if (kernel_address)
246 printk(KERN_ALERT "Unable to handle kernel pointer dereference"
247 " at virtual kernel address %016lx\n", address);
248 else
249 printk(KERN_ALERT "Unable to handle kernel paging request"
250 " at virtual user address %016lx\n", address);
251
252 /*
253 * need to define, which information is useful here
254 */
255
256 die("Oops", regs, error_code);
257 do_exit(SIGKILL);
258
259
260 /*
261 * We ran out of memory, or some other thing happened to us that made
262 * us unable to handle the page fault gracefully.
263 */
264 out_of_memory:
265 up_read(&mm->mmap_sem);
266 printk("VM: killing process %s\n", tsk->comm);
267 if (regs->psw.mask & PSW_PROBLEM_STATE)
268 do_exit(SIGKILL);
269 goto no_context;
270
271 do_sigbus:
272 up_read(&mm->mmap_sem);
273
274 /*
275 * Send a sigbus, regardless of whether we were in kernel
276 * or user mode.
277 */
278 tsk->thread.prot_addr = address;
279 tsk->thread.trap_no = error_code;
280 force_sig(SIGBUS, tsk);
281
282 /* Kernel mode? Handle exceptions or die */
283 if (!(regs->psw.mask & PSW_PROBLEM_STATE))
284 goto no_context;
285 }
286
287 #ifdef CONFIG_PFAULT
288 /*
289 * 'pfault' pseudo page faults routines.
290 */
291 static int pfault_disable = 0;
292
293 static int __init nopfault(char *str)
294 {
295 pfault_disable = 1;
296 return 1;
297 }
298
299 __setup("nopfault", nopfault);
300
301 typedef struct {
302 __u16 refdiagc;
303 __u16 reffcode;
304 __u16 refdwlen;
305 __u16 refversn;
306 __u64 refgaddr;
307 __u64 refselmk;
308 __u64 refcmpmk;
309 __u64 reserved;
310 } __attribute__ ((packed)) pfault_refbk_t;
311
312 typedef struct _pseudo_wait_t {
313 struct _pseudo_wait_t *next;
314 wait_queue_head_t queue;
315 unsigned long address;
316 int resolved;
317 } pseudo_wait_t;
318
319 static pseudo_wait_t *pseudo_lock_queue = NULL;
320 static spinlock_t pseudo_wait_spinlock; /* spinlock to protect lock queue */
321
322 int pfault_init(void)
323 {
324 pfault_refbk_t refbk =
325 { 0x258, 0, 5, 2, __LC_KERNEL_STACK, 1ULL << 48, 1ULL << 48, 0ULL };
326 int rc;
327
328 if (pfault_disable)
329 return -1;
330 __asm__ __volatile__(
331 " diag %1,%0,0x258\n"
332 "0: j 2f\n"
333 "1: la %0,8\n"
334 "2:\n"
335 ".section __ex_table,\"a\"\n"
336 " .align 4\n"
337 " .quad 0b,1b\n"
338 ".previous"
339 : "=d" (rc) : "a" (&refbk) : "cc" );
340 __ctl_set_bit(0, 9);
341 return rc;
342 }
343
344 void pfault_fini(void)
345 {
346 pfault_refbk_t refbk =
347 { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };
348
349 if (pfault_disable)
350 return;
351 __ctl_clear_bit(0, 9);
352 __asm__ __volatile__(
353 " diag %0,0,0x258\n"
354 "0:\n"
355 ".section __ex_table,\"a\"\n"
356 " .align 4\n"
357 " .quad 0b,0b\n"
358 ".previous"
359 : : "a" (&refbk) : "cc" );
360 }
361
362 asmlinkage void
363 pfault_interrupt(struct pt_regs *regs, __u16 error_code)
364 {
365 DECLARE_WAITQUEUE(wait, current);
366 struct task_struct *tsk;
367 wait_queue_head_t queue;
368 wait_queue_head_t *qp;
369 __u16 subcode;
370
371 /*
372 * Get the external interruption subcode & pfault
373 * initial/completion signal bit. VM stores this
374 * in the 'cpu address' field associated with the
375 * external interrupt.
376 */
377 subcode = S390_lowcore.cpu_addr;
378 if ((subcode & 0xff00) != 0x06)
379 return;
380
381 /*
382 * Get the token (= address of kernel stack of affected task).
383 */
384 tsk = (struct task_struct *)
385 (*((unsigned long *) __LC_PFAULT_INTPARM) - THREAD_SIZE);
386
387 if (subcode & 0x0080) {
388 /* signal bit is set -> a page has been swapped in by VM */
389 qp = (wait_queue_head_t *)
390 xchg(&tsk->thread.pfault_wait, -1);
391 if (qp != NULL) {
392 /* Initial interrupt was faster than the completion
393 * interrupt. pfault_wait is valid. Set pfault_wait
394 * back to zero and wake up the process. This can
395 * safely be done because the task is still sleeping
396 * and can't procude new pfaults. */
397 tsk->thread.pfault_wait = 0ULL;
398 wake_up(qp);
399 }
400 } else {
401 /* signal bit not set -> a real page is missing. */
402 init_waitqueue_head (&queue);
403 qp = (wait_queue_head_t *)
404 xchg(&tsk->thread.pfault_wait, (addr_t) &queue);
405 if (qp != NULL) {
406 /* Completion interrupt was faster than the initial
407 * interrupt (swapped in a -1 for pfault_wait). Set
408 * pfault_wait back to zero and exit. This can be
409 * done safely because tsk is running in kernel
410 * mode and can't produce new pfaults. */
411 tsk->thread.pfault_wait = 0ULL;
412 }
413
414 /* go to sleep */
415 wait_event(queue, tsk->thread.pfault_wait == 0ULL);
416 }
417 }
418 #endif
419
420