File: /usr/src/linux/arch/ia64/kernel/unaligned.c
1 /*
2 * Architecture-specific unaligned trap handling.
3 *
4 * Copyright (C) 1999-2001 Hewlett-Packard Co
5 * Copyright (C) 1999-2000 Stephane Eranian <eranian@hpl.hp.com>
6 * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com>
7 *
8 * 2001/01/17 Add support emulation of unaligned kernel accesses.
9 */
10 #include <linux/kernel.h>
11 #include <linux/sched.h>
12 #include <linux/smp_lock.h>
13
14 #include <asm/uaccess.h>
15 #include <asm/rse.h>
16 #include <asm/processor.h>
17 #include <asm/unaligned.h>
18
19 extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
20
21 #undef DEBUG_UNALIGNED_TRAP
22
23 #ifdef DEBUG_UNALIGNED_TRAP
24 # define DPRINT(a...) do { printk("%s.%u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
25 # define DDUMP(str,vp,len) dump(str, vp, len)
26
27 static void
28 dump (const char *str, void *vp, size_t len)
29 {
30 unsigned char *cp = vp;
31 int i;
32
33 printk("%s", str);
34 for (i = 0; i < len; ++i)
35 printk (" %02x", *cp++);
36 printk("\n");
37 }
38 #else
39 # define DPRINT(a...)
40 # define DDUMP(str,vp,len)
41 #endif
42
43 #define IA64_FIRST_STACKED_GR 32
44 #define IA64_FIRST_ROTATING_FR 32
45 #define SIGN_EXT9 0xffffffffffffff00ul
46
47 /*
48 * For M-unit:
49 *
50 * opcode | m | x6 |
51 * --------|------|---------|
52 * [40-37] | [36] | [35:30] |
53 * --------|------|---------|
54 * 4 | 1 | 6 | = 11 bits
55 * --------------------------
56 * However bits [31:30] are not directly useful to distinguish between
57 * load/store so we can use [35:32] instead, which gives the following
58 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
59 * checking the m-bit until later in the load/store emulation.
60 */
61 #define IA64_OPCODE_MASK 0x1ef
62 #define IA64_OPCODE_SHIFT 32
63
64 /*
65 * Table C-28 Integer Load/Store
66 *
67 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
68 *
69 * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
70 * the address (bits [8:3]), so we must failed.
71 */
72 #define LD_OP 0x080
73 #define LDS_OP 0x081
74 #define LDA_OP 0x082
75 #define LDSA_OP 0x083
76 #define LDBIAS_OP 0x084
77 #define LDACQ_OP 0x085
78 /* 0x086, 0x087 are not relevant */
79 #define LDCCLR_OP 0x088
80 #define LDCNC_OP 0x089
81 #define LDCCLRACQ_OP 0x08a
82 #define ST_OP 0x08c
83 #define STREL_OP 0x08d
84 /* 0x08e,0x8f are not relevant */
85
86 /*
87 * Table C-29 Integer Load +Reg
88 *
89 * we use the ld->m (bit [36:36]) field to determine whether or not we have
90 * a load/store of this form.
91 */
92
93 /*
94 * Table C-30 Integer Load/Store +Imm
95 *
96 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
97 *
98 * ld8.fill, st8.fill must be aligned because the Nat register are based on
99 * the address, so we must fail and the program must be fixed.
100 */
101 #define LD_IMM_OP 0x0a0
102 #define LDS_IMM_OP 0x0a1
103 #define LDA_IMM_OP 0x0a2
104 #define LDSA_IMM_OP 0x0a3
105 #define LDBIAS_IMM_OP 0x0a4
106 #define LDACQ_IMM_OP 0x0a5
107 /* 0x0a6, 0xa7 are not relevant */
108 #define LDCCLR_IMM_OP 0x0a8
109 #define LDCNC_IMM_OP 0x0a9
110 #define LDCCLRACQ_IMM_OP 0x0aa
111 #define ST_IMM_OP 0x0ac
112 #define STREL_IMM_OP 0x0ad
113 /* 0x0ae,0xaf are not relevant */
114
115 /*
116 * Table C-32 Floating-point Load/Store
117 */
118 #define LDF_OP 0x0c0
119 #define LDFS_OP 0x0c1
120 #define LDFA_OP 0x0c2
121 #define LDFSA_OP 0x0c3
122 /* 0x0c6 is irrelevant */
123 #define LDFCCLR_OP 0x0c8
124 #define LDFCNC_OP 0x0c9
125 /* 0x0cb is irrelevant */
126 #define STF_OP 0x0cc
127
128 /*
129 * Table C-33 Floating-point Load +Reg
130 *
131 * we use the ld->m (bit [36:36]) field to determine whether or not we have
132 * a load/store of this form.
133 */
134
135 /*
136 * Table C-34 Floating-point Load/Store +Imm
137 */
138 #define LDF_IMM_OP 0x0e0
139 #define LDFS_IMM_OP 0x0e1
140 #define LDFA_IMM_OP 0x0e2
141 #define LDFSA_IMM_OP 0x0e3
142 /* 0x0e6 is irrelevant */
143 #define LDFCCLR_IMM_OP 0x0e8
144 #define LDFCNC_IMM_OP 0x0e9
145 #define STF_IMM_OP 0x0ec
146
147 typedef struct {
148 unsigned long qp:6; /* [0:5] */
149 unsigned long r1:7; /* [6:12] */
150 unsigned long imm:7; /* [13:19] */
151 unsigned long r3:7; /* [20:26] */
152 unsigned long x:1; /* [27:27] */
153 unsigned long hint:2; /* [28:29] */
154 unsigned long x6_sz:2; /* [30:31] */
155 unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
156 unsigned long m:1; /* [36:36] */
157 unsigned long op:4; /* [37:40] */
158 unsigned long pad:23; /* [41:63] */
159 } load_store_t;
160
161
162 typedef enum {
163 UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
164 UPD_REG /* ldXZ r1=[r3],r2 */
165 } update_t;
166
167 /*
168 * We use tables to keep track of the offsets of registers in the saved state.
169 * This way we save having big switch/case statements.
170 *
171 * We use bit 0 to indicate switch_stack or pt_regs.
172 * The offset is simply shifted by 1 bit.
173 * A 2-byte value should be enough to hold any kind of offset
174 *
175 * In case the calling convention changes (and thus pt_regs/switch_stack)
176 * simply use RSW instead of RPT or vice-versa.
177 */
178
179 #define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
180 #define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
181
182 #define RPT(x) (RPO(x) << 1)
183 #define RSW(x) (1| RSO(x)<<1)
184
185 #define GR_OFFS(x) (gr_info[x]>>1)
186 #define GR_IN_SW(x) (gr_info[x] & 0x1)
187
188 #define FR_OFFS(x) (fr_info[x]>>1)
189 #define FR_IN_SW(x) (fr_info[x] & 0x1)
190
191 static u16 gr_info[32]={
192 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
193
194 RPT(r1), RPT(r2), RPT(r3),
195
196 RSW(r4), RSW(r5), RSW(r6), RSW(r7),
197
198 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
199 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
200
201 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
202 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
203 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
204 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
205 };
206
207 static u16 fr_info[32]={
208 0, /* constant : WE SHOULD NEVER GET THIS */
209 0, /* constant : WE SHOULD NEVER GET THIS */
210
211 RSW(f2), RSW(f3), RSW(f4), RSW(f5),
212
213 RPT(f6), RPT(f7), RPT(f8), RPT(f9),
214
215 RSW(f10), RSW(f11), RSW(f12), RSW(f13), RSW(f14),
216 RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
217 RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
218 RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
219 RSW(f30), RSW(f31)
220 };
221
222 /* Invalidate ALAT entry for integer register REGNO. */
223 static void
224 invala_gr (int regno)
225 {
226 # define F(reg) case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
227
228 switch (regno) {
229 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
230 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
231 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
232 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
233 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
234 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
235 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
236 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
237 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
238 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
239 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
240 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
241 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
242 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
243 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
244 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
245 }
246 # undef F
247 }
248
249 /* Invalidate ALAT entry for floating-point register REGNO. */
250 static void
251 invala_fr (int regno)
252 {
253 # define F(reg) case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
254
255 switch (regno) {
256 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
257 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
258 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
259 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
260 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
261 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
262 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
263 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
264 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
265 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
266 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
267 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
268 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
269 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
270 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
271 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
272 }
273 # undef F
274 }
275
276 static void
277 set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
278 {
279 struct switch_stack *sw = (struct switch_stack *) regs - 1;
280 unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
281 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
282 unsigned long rnats, nat_mask;
283 unsigned long on_kbs;
284 long sof = (regs->cr_ifs) & 0x7f;
285
286 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld\n",
287 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f);
288
289 if ((r1 - 32) >= sof) {
290 /* this should never happen, as the "rsvd register fault" has higher priority */
291 DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
292 return;
293 }
294
295 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
296 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + (r1 - 32));
297 if (addr >= kbs) {
298 /* the register is on the kernel backing store: easy... */
299 rnat_addr = ia64_rse_rnat_addr(addr);
300 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
301 rnat_addr = &sw->ar_rnat;
302 nat_mask = 1UL << ia64_rse_slot_num(addr);
303
304 *addr = val;
305 if (nat)
306 *rnat_addr |= nat_mask;
307 else
308 *rnat_addr &= ~nat_mask;
309 return;
310 }
311
312 /*
313 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
314 * infer whether the interrupt task was running on the kernel backing store.
315 */
316 if (regs->r12 >= TASK_SIZE) {
317 DPRINT("ignoring kernel write to r%lu; register isn't on the RBS!", r1);
318 return;
319 }
320
321 bspstore = (unsigned long *) regs->ar_bspstore;
322 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
323 bsp = ia64_rse_skip_regs(ubs_end, -sof);
324 addr = ia64_rse_skip_regs(bsp, r1 - 32);
325
326 DPRINT("ubs_end=%p bsp=%p addr=%px\n", (void *) ubs_end, (void *) bsp, (void *) addr);
327
328 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
329
330 rnat_addr = ia64_rse_rnat_addr(addr);
331
332 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
333 DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
334 (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
335
336 nat_mask = 1UL << ia64_rse_slot_num(addr);
337 if (nat)
338 rnats |= nat_mask;
339 else
340 rnats &= ~nat_mask;
341 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
342
343 DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
344 }
345
346
347 static void
348 get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
349 {
350 struct switch_stack *sw = (struct switch_stack *) regs - 1;
351 unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
352 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
353 unsigned long rnats, nat_mask;
354 unsigned long on_kbs;
355 long sof = (regs->cr_ifs) & 0x7f;
356
357 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld\n",
358 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f);
359
360 if ((r1 - 32) >= sof) {
361 /* this should never happen, as the "rsvd register fault" has higher priority */
362 DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
363 return;
364 }
365
366 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
367 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + (r1 - 32));
368 if (addr >= kbs) {
369 /* the register is on the kernel backing store: easy... */
370 *val = *addr;
371 if (nat) {
372 rnat_addr = ia64_rse_rnat_addr(addr);
373 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
374 rnat_addr = &sw->ar_rnat;
375 nat_mask = 1UL << ia64_rse_slot_num(addr);
376 *nat = (*rnat_addr & nat_mask) != 0;
377 }
378 return;
379 }
380
381 /*
382 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
383 * infer whether the interrupt task was running on the kernel backing store.
384 */
385 if (regs->r12 >= TASK_SIZE) {
386 DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
387 return;
388 }
389
390 bspstore = (unsigned long *)regs->ar_bspstore;
391 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
392 bsp = ia64_rse_skip_regs(ubs_end, -sof);
393 addr = ia64_rse_skip_regs(bsp, r1 - 32);
394
395 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
396
397 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
398
399 if (nat) {
400 rnat_addr = ia64_rse_rnat_addr(addr);
401 nat_mask = 1UL << ia64_rse_slot_num(addr);
402
403 DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
404
405 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
406 *nat = (rnats & nat_mask) != 0;
407 }
408 }
409
410
411 static void
412 setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
413 {
414 struct switch_stack *sw = (struct switch_stack *) regs - 1;
415 unsigned long addr;
416 unsigned long bitmask;
417 unsigned long *unat;
418
419 /*
420 * First takes care of stacked registers
421 */
422 if (regnum >= IA64_FIRST_STACKED_GR) {
423 set_rse_reg(regs, regnum, val, nat);
424 return;
425 }
426
427 /*
428 * Using r0 as a target raises a General Exception fault which has higher priority
429 * than the Unaligned Reference fault.
430 */
431
432 /*
433 * Now look at registers in [0-31] range and init correct UNAT
434 */
435 if (GR_IN_SW(regnum)) {
436 addr = (unsigned long)sw;
437 unat = &sw->ar_unat;
438 } else {
439 addr = (unsigned long)regs;
440 unat = &sw->caller_unat;
441 }
442 DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
443 addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
444 /*
445 * add offset from base of struct
446 * and do it !
447 */
448 addr += GR_OFFS(regnum);
449
450 *(unsigned long *)addr = val;
451
452 /*
453 * We need to clear the corresponding UNAT bit to fully emulate the load
454 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
455 */
456 bitmask = 1UL << (addr >> 3 & 0x3f);
457 DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
458 if (nat) {
459 *unat |= bitmask;
460 } else {
461 *unat &= ~bitmask;
462 }
463 DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
464 }
465
466 #define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
467
468 static void
469 setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
470 {
471 struct switch_stack *sw = (struct switch_stack *)regs - 1;
472 unsigned long addr;
473
474 /*
475 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
476 * Fault. Thus, when we get here, we know the partition is enabled.
477 * To update f32-f127, there are three choices:
478 *
479 * (1) save f32-f127 to thread.fph and update the values there
480 * (2) use a gigantic switch statement to directly access the registers
481 * (3) generate code on the fly to update the desired register
482 *
483 * For now, we are using approach (1).
484 */
485 if (regnum >= IA64_FIRST_ROTATING_FR) {
486 ia64_sync_fph(current);
487 current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
488 } else {
489 /*
490 * pt_regs or switch_stack ?
491 */
492 if (FR_IN_SW(regnum)) {
493 addr = (unsigned long)sw;
494 } else {
495 addr = (unsigned long)regs;
496 }
497
498 DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
499
500 addr += FR_OFFS(regnum);
501 *(struct ia64_fpreg *)addr = *fpval;
502
503 /*
504 * mark the low partition as being used now
505 *
506 * It is highly unlikely that this bit is not already set, but
507 * let's do it for safety.
508 */
509 regs->cr_ipsr |= IA64_PSR_MFL;
510 }
511 }
512
513 /*
514 * Those 2 inline functions generate the spilled versions of the constant floating point
515 * registers which can be used with stfX
516 */
517 static inline void
518 float_spill_f0 (struct ia64_fpreg *final)
519 {
520 __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
521 }
522
523 static inline void
524 float_spill_f1 (struct ia64_fpreg *final)
525 {
526 __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
527 }
528
529 static void
530 getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
531 {
532 struct switch_stack *sw = (struct switch_stack *) regs - 1;
533 unsigned long addr;
534
535 /*
536 * From EAS-2.5: FPDisableFault has higher priority than
537 * Unaligned Fault. Thus, when we get here, we know the partition is
538 * enabled.
539 *
540 * When regnum > 31, the register is still live and we need to force a save
541 * to current->thread.fph to get access to it. See discussion in setfpreg()
542 * for reasons and other ways of doing this.
543 */
544 if (regnum >= IA64_FIRST_ROTATING_FR) {
545 ia64_flush_fph(current);
546 *fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
547 } else {
548 /*
549 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
550 * not saved, we must generate their spilled form on the fly
551 */
552 switch(regnum) {
553 case 0:
554 float_spill_f0(fpval);
555 break;
556 case 1:
557 float_spill_f1(fpval);
558 break;
559 default:
560 /*
561 * pt_regs or switch_stack ?
562 */
563 addr = FR_IN_SW(regnum) ? (unsigned long)sw
564 : (unsigned long)regs;
565
566 DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
567 FR_IN_SW(regnum), addr, FR_OFFS(regnum));
568
569 addr += FR_OFFS(regnum);
570 *fpval = *(struct ia64_fpreg *)addr;
571 }
572 }
573 }
574
575
576 static void
577 getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
578 {
579 struct switch_stack *sw = (struct switch_stack *) regs - 1;
580 unsigned long addr, *unat;
581
582 if (regnum >= IA64_FIRST_STACKED_GR) {
583 get_rse_reg(regs, regnum, val, nat);
584 return;
585 }
586
587 /*
588 * take care of r0 (read-only always evaluate to 0)
589 */
590 if (regnum == 0) {
591 *val = 0;
592 if (nat)
593 *nat = 0;
594 return;
595 }
596
597 /*
598 * Now look at registers in [0-31] range and init correct UNAT
599 */
600 if (GR_IN_SW(regnum)) {
601 addr = (unsigned long)sw;
602 unat = &sw->ar_unat;
603 } else {
604 addr = (unsigned long)regs;
605 unat = &sw->caller_unat;
606 }
607
608 DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
609
610 addr += GR_OFFS(regnum);
611
612 *val = *(unsigned long *)addr;
613
614 /*
615 * do it only when requested
616 */
617 if (nat)
618 *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
619 }
620
621 static void
622 emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
623 {
624 /*
625 * IMPORTANT:
626 * Given the way we handle unaligned speculative loads, we should
627 * not get to this point in the code but we keep this sanity check,
628 * just in case.
629 */
630 if (ld.x6_op == 1 || ld.x6_op == 3) {
631 printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n");
632 die_if_kernel("unaligned reference on specualtive load with register update\n",
633 regs, 30);
634 }
635
636
637 /*
638 * at this point, we know that the base register to update is valid i.e.,
639 * it's not r0
640 */
641 if (type == UPD_IMMEDIATE) {
642 unsigned long imm;
643
644 /*
645 * Load +Imm: ldXZ r1=[r3],imm(9)
646 *
647 *
648 * form imm9: [13:19] contain the first 7 bits
649 */
650 imm = ld.x << 7 | ld.imm;
651
652 /*
653 * sign extend (1+8bits) if m set
654 */
655 if (ld.m) imm |= SIGN_EXT9;
656
657 /*
658 * ifa == r3 and we know that the NaT bit on r3 was clear so
659 * we can directly use ifa.
660 */
661 ifa += imm;
662
663 setreg(ld.r3, ifa, 0, regs);
664
665 DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
666
667 } else if (ld.m) {
668 unsigned long r2;
669 int nat_r2;
670
671 /*
672 * Load +Reg Opcode: ldXZ r1=[r3],r2
673 *
674 * Note: that we update r3 even in the case of ldfX.a
675 * (where the load does not happen)
676 *
677 * The way the load algorithm works, we know that r3 does not
678 * have its NaT bit set (would have gotten NaT consumption
679 * before getting the unaligned fault). So we can use ifa
680 * which equals r3 at this point.
681 *
682 * IMPORTANT:
683 * The above statement holds ONLY because we know that we
684 * never reach this code when trying to do a ldX.s.
685 * If we ever make it to here on an ldfX.s then
686 */
687 getreg(ld.imm, &r2, &nat_r2, regs);
688
689 ifa += r2;
690
691 /*
692 * propagate Nat r2 -> r3
693 */
694 setreg(ld.r3, ifa, nat_r2, regs);
695
696 DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
697 }
698 }
699
700
701 static int
702 emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
703 {
704 unsigned int len = 1 << ld.x6_sz;
705
706 /*
707 * r0, as target, doesn't need to be checked because Illegal Instruction
708 * faults have higher priority than unaligned faults.
709 *
710 * r0 cannot be found as the base as it would never generate an
711 * unaligned reference.
712 */
713
714 /*
715 * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry.
716 * See comment below for explanation on how we handle ldX.a
717 */
718 if (ld.x6_op != 0x2) {
719 unsigned long val = 0;
720
721 if (len != 2 && len != 4 && len != 8) {
722 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
723 return -1;
724 }
725 /* this assumes little-endian byte-order: */
726 if (copy_from_user(&val, (void *) ifa, len))
727 return -1;
728 setreg(ld.r1, val, 0, regs);
729 }
730
731 /*
732 * check for updates on any kind of loads
733 */
734 if (ld.op == 0x5 || ld.m)
735 emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
736
737 /*
738 * handling of various loads (based on EAS2.4):
739 *
740 * ldX.acq (ordered load):
741 * - acquire semantics would have been used, so force fence instead.
742 *
743 * ldX.c.clr (check load and clear):
744 * - if we get to this handler, it's because the entry was not in the ALAT.
745 * Therefore the operation reverts to a normal load
746 *
747 * ldX.c.nc (check load no clear):
748 * - same as previous one
749 *
750 * ldX.c.clr.acq (ordered check load and clear):
751 * - same as above for c.clr part. The load needs to have acquire semantics. So
752 * we use the fence semantics which is stronger and thus ensures correctness.
753 *
754 * ldX.a (advanced load):
755 * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
756 * address doesn't match requested size alignement. This means that we would
757 * possibly need more than one load to get the result.
758 *
759 * The load part can be handled just like a normal load, however the difficult
760 * part is to get the right thing into the ALAT. The critical piece of information
761 * in the base address of the load & size. To do that, a ld.a must be executed,
762 * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
763 * if we use the same target register, we will be okay for the check.a instruction.
764 * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
765 * which would overlap within [r3,r3+X] (the size of the load was store in the
766 * ALAT). If such an entry is found the entry is invalidated. But this is not good
767 * enough, take the following example:
768 * r3=3
769 * ld4.a r1=[r3]
770 *
771 * Could be emulated by doing:
772 * ld1.a r1=[r3],1
773 * store to temporary;
774 * ld1.a r1=[r3],1
775 * store & shift to temporary;
776 * ld1.a r1=[r3],1
777 * store & shift to temporary;
778 * ld1.a r1=[r3]
779 * store & shift to temporary;
780 * r1=temporary
781 *
782 * So int this case, you would get the right value is r1 but the wrong info in
783 * the ALAT. Notice that you could do it in reverse to finish with address 3
784 * but you would still get the size wrong. To get the size right, one needs to
785 * execute exactly the same kind of load. You could do it from a aligned
786 * temporary location, but you would get the address wrong.
787 *
788 * So no matter what, it is not possible to emulate an advanced load
789 * correctly. But is that really critical ?
790 *
791 *
792 * Now one has to look at how ld.a is used, one must either do a ld.c.* or
793 * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
794 * entry found in ALAT), and that's perfectly ok because:
795 *
796 * - ld.c.*, if the entry is not present a normal load is executed
797 * - chk.a.*, if the entry is not present, execution jumps to recovery code
798 *
799 * In either case, the load can be potentially retried in another form.
800 *
801 * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
802 * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
803 * a stale entry later) The register base update MUST also be performed.
804 *
805 * Now what is the content of the register and its NaT bit in the case we don't
806 * do the load ? EAS2.4, says (in case an actual load is needed)
807 *
808 * - r1 = [r3], Nat = 0 if succeeds
809 * - r1 = 0 Nat = 0 if trying to access non-speculative memory
810 *
811 * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
812 * retry and thus eventually reload the register thereby changing Nat and
813 * register content.
814 */
815
816 /*
817 * when the load has the .acq completer then
818 * use ordering fence.
819 */
820 if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
821 mb();
822
823 /*
824 * invalidate ALAT entry in case of advanced load
825 */
826 if (ld.x6_op == 0x2)
827 invala_gr(ld.r1);
828
829 return 0;
830 }
831
832 static int
833 emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
834 {
835 unsigned long r2;
836 unsigned int len = 1 << ld.x6_sz;
837
838 /*
839 * if we get to this handler, Nat bits on both r3 and r2 have already
840 * been checked. so we don't need to do it
841 *
842 * extract the value to be stored
843 */
844 getreg(ld.imm, &r2, 0, regs);
845
846 /*
847 * we rely on the macros in unaligned.h for now i.e.,
848 * we let the compiler figure out how to read memory gracefully.
849 *
850 * We need this switch/case because the way the inline function
851 * works. The code is optimized by the compiler and looks like
852 * a single switch/case.
853 */
854 DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
855
856 if (len != 2 && len != 4 && len != 8) {
857 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
858 return -1;
859 }
860
861 /* this assumes little-endian byte-order: */
862 if (copy_to_user((void *) ifa, &r2, len))
863 return -1;
864
865 /*
866 * stX [r3]=r2,imm(9)
867 *
868 * NOTE:
869 * ld.r3 can never be r0, because r0 would not generate an
870 * unaligned access.
871 */
872 if (ld.op == 0x5) {
873 unsigned long imm;
874
875 /*
876 * form imm9: [12:6] contain first 7bits
877 */
878 imm = ld.x << 7 | ld.r1;
879 /*
880 * sign extend (8bits) if m set
881 */
882 if (ld.m) imm |= SIGN_EXT9;
883 /*
884 * ifa == r3 (NaT is necessarily cleared)
885 */
886 ifa += imm;
887
888 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
889
890 setreg(ld.r3, ifa, 0, regs);
891 }
892 /*
893 * we don't have alat_invalidate_multiple() so we need
894 * to do the complete flush :-<<
895 */
896 ia64_invala();
897
898 /*
899 * stX.rel: use fence instead of release
900 */
901 if (ld.x6_op == 0xd)
902 mb();
903
904 return 0;
905 }
906
907 /*
908 * floating point operations sizes in bytes
909 */
910 static const unsigned char float_fsz[4]={
911 16, /* extended precision (e) */
912 8, /* integer (8) */
913 4, /* single precision (s) */
914 8 /* double precision (d) */
915 };
916
917 static inline void
918 mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
919 {
920 __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
921 :: "r"(init), "r"(final) : "f6","memory");
922 }
923
924 static inline void
925 mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
926 {
927 __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
928 :: "r"(init), "r"(final) : "f6","memory");
929 }
930
931 static inline void
932 mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
933 {
934 __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
935 :: "r"(init), "r"(final) : "f6","memory");
936 }
937
938 static inline void
939 mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
940 {
941 __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
942 :: "r"(init), "r"(final) : "f6","memory");
943 }
944
945 static inline void
946 float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
947 {
948 __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
949 :: "r"(init), "r"(final) : "f6","memory");
950 }
951
952 static inline void
953 float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
954 {
955 __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
956 :: "r"(init), "r"(final) : "f6","memory");
957 }
958
959 static inline void
960 float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
961 {
962 __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
963 :: "r"(init), "r"(final) : "f6","memory");
964 }
965
966 static inline void
967 float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
968 {
969 __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
970 :: "r"(init), "r"(final) : "f6","memory");
971 }
972
973 static int
974 emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
975 {
976 struct ia64_fpreg fpr_init[2];
977 struct ia64_fpreg fpr_final[2];
978 unsigned long len = float_fsz[ld.x6_sz];
979
980 /*
981 * fr0 & fr1 don't need to be checked because Illegal Instruction
982 * faults have higher priority than unaligned faults.
983 *
984 * r0 cannot be found as the base as it would never generate an
985 * unaligned reference.
986 */
987
988 /*
989 * make sure we get clean buffers
990 */
991 memset(&fpr_init, 0, sizeof(fpr_init));
992 memset(&fpr_final, 0, sizeof(fpr_final));
993
994 /*
995 * ldfpX.a: we don't try to emulate anything but we must
996 * invalidate the ALAT entry and execute updates, if any.
997 */
998 if (ld.x6_op != 0x2) {
999 /* this assumes little-endian byte-order: */
1000
1001 if (copy_from_user(&fpr_init[0], (void *) ifa, len)
1002 || copy_from_user(&fpr_init[1], (void *) (ifa + len), len))
1003 return -1;
1004
1005 DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1006 DDUMP("frp_init =", &fpr_init, 2*len);
1007 /*
1008 * XXX fixme
1009 * Could optimize inlines by using ldfpX & 2 spills
1010 */
1011 switch( ld.x6_sz ) {
1012 case 0:
1013 mem2float_extended(&fpr_init[0], &fpr_final[0]);
1014 mem2float_extended(&fpr_init[1], &fpr_final[1]);
1015 break;
1016 case 1:
1017 mem2float_integer(&fpr_init[0], &fpr_final[0]);
1018 mem2float_integer(&fpr_init[1], &fpr_final[1]);
1019 break;
1020 case 2:
1021 mem2float_single(&fpr_init[0], &fpr_final[0]);
1022 mem2float_single(&fpr_init[1], &fpr_final[1]);
1023 break;
1024 case 3:
1025 mem2float_double(&fpr_init[0], &fpr_final[0]);
1026 mem2float_double(&fpr_init[1], &fpr_final[1]);
1027 break;
1028 }
1029 DDUMP("fpr_final =", &fpr_final, 2*len);
1030 /*
1031 * XXX fixme
1032 *
1033 * A possible optimization would be to drop fpr_final and directly
1034 * use the storage from the saved context i.e., the actual final
1035 * destination (pt_regs, switch_stack or thread structure).
1036 */
1037 setfpreg(ld.r1, &fpr_final[0], regs);
1038 setfpreg(ld.imm, &fpr_final[1], regs);
1039 }
1040
1041 /*
1042 * Check for updates: only immediate updates are available for this
1043 * instruction.
1044 */
1045 if (ld.m) {
1046 /*
1047 * the immediate is implicit given the ldsz of the operation:
1048 * single: 8 (2x4) and for all others it's 16 (2x8)
1049 */
1050 ifa += len<<1;
1051
1052 /*
1053 * IMPORTANT:
1054 * the fact that we force the NaT of r3 to zero is ONLY valid
1055 * as long as we don't come here with a ldfpX.s.
1056 * For this reason we keep this sanity check
1057 */
1058 if (ld.x6_op == 1 || ld.x6_op == 3)
1059 printk(KERN_ERR __FUNCTION__": register update on speculative load pair, "
1060 "error\n");
1061
1062 setreg(ld.r3, ifa, 0, regs);
1063 }
1064
1065 /*
1066 * Invalidate ALAT entries, if any, for both registers.
1067 */
1068 if (ld.x6_op == 0x2) {
1069 invala_fr(ld.r1);
1070 invala_fr(ld.imm);
1071 }
1072 return 0;
1073 }
1074
1075
1076 static int
1077 emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1078 {
1079 struct ia64_fpreg fpr_init;
1080 struct ia64_fpreg fpr_final;
1081 unsigned long len = float_fsz[ld.x6_sz];
1082
1083 /*
1084 * fr0 & fr1 don't need to be checked because Illegal Instruction
1085 * faults have higher priority than unaligned faults.
1086 *
1087 * r0 cannot be found as the base as it would never generate an
1088 * unaligned reference.
1089 */
1090
1091 /*
1092 * make sure we get clean buffers
1093 */
1094 memset(&fpr_init,0, sizeof(fpr_init));
1095 memset(&fpr_final,0, sizeof(fpr_final));
1096
1097 /*
1098 * ldfX.a we don't try to emulate anything but we must
1099 * invalidate the ALAT entry.
1100 * See comments in ldX for descriptions on how the various loads are handled.
1101 */
1102 if (ld.x6_op != 0x2) {
1103 if (copy_from_user(&fpr_init, (void *) ifa, len))
1104 return -1;
1105
1106 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1107 DDUMP("fpr_init =", &fpr_init, len);
1108 /*
1109 * we only do something for x6_op={0,8,9}
1110 */
1111 switch( ld.x6_sz ) {
1112 case 0:
1113 mem2float_extended(&fpr_init, &fpr_final);
1114 break;
1115 case 1:
1116 mem2float_integer(&fpr_init, &fpr_final);
1117 break;
1118 case 2:
1119 mem2float_single(&fpr_init, &fpr_final);
1120 break;
1121 case 3:
1122 mem2float_double(&fpr_init, &fpr_final);
1123 break;
1124 }
1125 DDUMP("fpr_final =", &fpr_final, len);
1126 /*
1127 * XXX fixme
1128 *
1129 * A possible optimization would be to drop fpr_final and directly
1130 * use the storage from the saved context i.e., the actual final
1131 * destination (pt_regs, switch_stack or thread structure).
1132 */
1133 setfpreg(ld.r1, &fpr_final, regs);
1134 }
1135
1136 /*
1137 * check for updates on any loads
1138 */
1139 if (ld.op == 0x7 || ld.m)
1140 emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1141
1142 /*
1143 * invalidate ALAT entry in case of advanced floating point loads
1144 */
1145 if (ld.x6_op == 0x2)
1146 invala_fr(ld.r1);
1147
1148 return 0;
1149 }
1150
1151
1152 static int
1153 emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1154 {
1155 struct ia64_fpreg fpr_init;
1156 struct ia64_fpreg fpr_final;
1157 unsigned long len = float_fsz[ld.x6_sz];
1158
1159 /*
1160 * make sure we get clean buffers
1161 */
1162 memset(&fpr_init,0, sizeof(fpr_init));
1163 memset(&fpr_final,0, sizeof(fpr_final));
1164
1165 /*
1166 * if we get to this handler, Nat bits on both r3 and r2 have already
1167 * been checked. so we don't need to do it
1168 *
1169 * extract the value to be stored
1170 */
1171 getfpreg(ld.imm, &fpr_init, regs);
1172 /*
1173 * during this step, we extract the spilled registers from the saved
1174 * context i.e., we refill. Then we store (no spill) to temporary
1175 * aligned location
1176 */
1177 switch( ld.x6_sz ) {
1178 case 0:
1179 float2mem_extended(&fpr_init, &fpr_final);
1180 break;
1181 case 1:
1182 float2mem_integer(&fpr_init, &fpr_final);
1183 break;
1184 case 2:
1185 float2mem_single(&fpr_init, &fpr_final);
1186 break;
1187 case 3:
1188 float2mem_double(&fpr_init, &fpr_final);
1189 break;
1190 }
1191 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1192 DDUMP("fpr_init =", &fpr_init, len);
1193 DDUMP("fpr_final =", &fpr_final, len);
1194
1195 if (copy_to_user((void *) ifa, &fpr_final, len))
1196 return -1;
1197
1198 /*
1199 * stfX [r3]=r2,imm(9)
1200 *
1201 * NOTE:
1202 * ld.r3 can never be r0, because r0 would not generate an
1203 * unaligned access.
1204 */
1205 if (ld.op == 0x7) {
1206 unsigned long imm;
1207
1208 /*
1209 * form imm9: [12:6] contain first 7bits
1210 */
1211 imm = ld.x << 7 | ld.r1;
1212 /*
1213 * sign extend (8bits) if m set
1214 */
1215 if (ld.m)
1216 imm |= SIGN_EXT9;
1217 /*
1218 * ifa == r3 (NaT is necessarily cleared)
1219 */
1220 ifa += imm;
1221
1222 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1223
1224 setreg(ld.r3, ifa, 0, regs);
1225 }
1226 /*
1227 * we don't have alat_invalidate_multiple() so we need
1228 * to do the complete flush :-<<
1229 */
1230 ia64_invala();
1231
1232 return 0;
1233 }
1234
1235 /*
1236 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1237 * eventually fix the program. However, we don't want to do that for every access so we
1238 * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
1239 * either...
1240 */
1241 static int
1242 within_logging_rate_limit (void)
1243 {
1244 static unsigned long count, last_time;
1245
1246 if (jiffies - last_time > 5*HZ)
1247 count = 0;
1248 if (++count < 5) {
1249 last_time = jiffies;
1250 return 1;
1251 }
1252 return 0;
1253
1254 }
1255
1256 void
1257 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1258 {
1259 struct exception_fixup fix = { 0 };
1260 struct ia64_psr *ipsr = ia64_psr(regs);
1261 mm_segment_t old_fs = get_fs();
1262 unsigned long bundle[2];
1263 unsigned long opcode;
1264 struct siginfo si;
1265 union {
1266 unsigned long l;
1267 load_store_t insn;
1268 } u;
1269 int ret = -1;
1270
1271 if (ia64_psr(regs)->be) {
1272 /* we don't support big-endian accesses */
1273 die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1274 goto force_sigbus;
1275 }
1276
1277 /*
1278 * Treat kernel accesses for which there is an exception handler entry the same as
1279 * user-level unaligned accesses. Otherwise, a clever program could trick this
1280 * handler into reading an arbitrary kernel addresses...
1281 */
1282 if (!user_mode(regs)) {
1283 #ifdef GAS_HAS_LOCAL_TAGS
1284 fix = search_exception_table(regs->cr_iip + ia64_psr(regs)->ri);
1285 #else
1286 fix = search_exception_table(regs->cr_iip);
1287 #endif
1288 }
1289 if (user_mode(regs) || fix.cont) {
1290 if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1291 goto force_sigbus;
1292
1293 if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1294 && within_logging_rate_limit())
1295 {
1296 char buf[200]; /* comm[] is at most 16 bytes... */
1297 size_t len;
1298
1299 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1300 "ip=0x%016lx\n\r", current->comm, current->pid,
1301 ifa, regs->cr_iip + ipsr->ri);
1302 /*
1303 * Don't call tty_write_message() if we're in the kernel; we might
1304 * be holding locks...
1305 */
1306 if (user_mode(regs))
1307 tty_write_message(current->tty, buf);
1308 buf[len-1] = '\0'; /* drop '\r' */
1309 printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */
1310 }
1311 } else {
1312 if (within_logging_rate_limit())
1313 printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1314 ifa, regs->cr_iip + ipsr->ri);
1315 set_fs(KERNEL_DS);
1316 }
1317
1318 DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1319 regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1320
1321 if (__copy_from_user(bundle, (void *) regs->cr_iip, 16))
1322 goto failure;
1323
1324 /*
1325 * extract the instruction from the bundle given the slot number
1326 */
1327 switch (ipsr->ri) {
1328 case 0: u.l = (bundle[0] >> 5); break;
1329 case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1330 case 2: u.l = (bundle[1] >> 23); break;
1331 }
1332 opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1333
1334 DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1335 "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1336 u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1337
1338 /*
1339 * IMPORTANT:
1340 * Notice that the swictch statement DOES not cover all possible instructions
1341 * that DO generate unaligned references. This is made on purpose because for some
1342 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1343 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1344 * the program will get a signal and die:
1345 *
1346 * load/store:
1347 * - ldX.spill
1348 * - stX.spill
1349 * Reason: RNATs are based on addresses
1350 *
1351 * synchronization:
1352 * - cmpxchg
1353 * - fetchadd
1354 * - xchg
1355 * Reason: ATOMIC operations cannot be emulated properly using multiple
1356 * instructions.
1357 *
1358 * speculative loads:
1359 * - ldX.sZ
1360 * Reason: side effects, code must be ready to deal with failure so simpler
1361 * to let the load fail.
1362 * ---------------------------------------------------------------------------------
1363 * XXX fixme
1364 *
1365 * I would like to get rid of this switch case and do something
1366 * more elegant.
1367 */
1368 switch (opcode) {
1369 case LDS_OP:
1370 case LDSA_OP:
1371 case LDS_IMM_OP:
1372 case LDSA_IMM_OP:
1373 case LDFS_OP:
1374 case LDFSA_OP:
1375 case LDFS_IMM_OP:
1376 /*
1377 * The instruction will be retried with deferred exceptions turned on, and
1378 * we should get Nat bit installed
1379 *
1380 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1381 * are actually executed even though the operation failed. So we don't
1382 * need to take care of this.
1383 */
1384 DPRINT("forcing PSR_ED\n");
1385 regs->cr_ipsr |= IA64_PSR_ED;
1386 goto done;
1387
1388 case LD_OP:
1389 case LDA_OP:
1390 case LDBIAS_OP:
1391 case LDACQ_OP:
1392 case LDCCLR_OP:
1393 case LDCNC_OP:
1394 case LDCCLRACQ_OP:
1395 case LD_IMM_OP:
1396 case LDA_IMM_OP:
1397 case LDBIAS_IMM_OP:
1398 case LDACQ_IMM_OP:
1399 case LDCCLR_IMM_OP:
1400 case LDCNC_IMM_OP:
1401 case LDCCLRACQ_IMM_OP:
1402 ret = emulate_load_int(ifa, u.insn, regs);
1403 break;
1404
1405 case ST_OP:
1406 case STREL_OP:
1407 case ST_IMM_OP:
1408 case STREL_IMM_OP:
1409 ret = emulate_store_int(ifa, u.insn, regs);
1410 break;
1411
1412 case LDF_OP:
1413 case LDFA_OP:
1414 case LDFCCLR_OP:
1415 case LDFCNC_OP:
1416 case LDF_IMM_OP:
1417 case LDFA_IMM_OP:
1418 case LDFCCLR_IMM_OP:
1419 case LDFCNC_IMM_OP:
1420 if (u.insn.x)
1421 ret = emulate_load_floatpair(ifa, u.insn, regs);
1422 else
1423 ret = emulate_load_float(ifa, u.insn, regs);
1424 break;
1425
1426 case STF_OP:
1427 case STF_IMM_OP:
1428 ret = emulate_store_float(ifa, u.insn, regs);
1429 break;
1430
1431 default:
1432 goto failure;
1433 }
1434 DPRINT("ret=%d\n", ret);
1435 if (ret)
1436 goto failure;
1437
1438 if (ipsr->ri == 2)
1439 /*
1440 * given today's architecture this case is not likely to happen because a
1441 * memory access instruction (M) can never be in the last slot of a
1442 * bundle. But let's keep it for now.
1443 */
1444 regs->cr_iip += 16;
1445 ipsr->ri = (ipsr->ri + 1) & 0x3;
1446
1447 DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1448 done:
1449 set_fs(old_fs); /* restore original address limit */
1450 return;
1451
1452 failure:
1453 /* something went wrong... */
1454 if (!user_mode(regs)) {
1455 if (fix.cont) {
1456 handle_exception(regs, fix);
1457 goto done;
1458 }
1459 die_if_kernel("error during unaligned kernel access\n", regs, ret);
1460 /* NOT_REACHED */
1461 }
1462 force_sigbus:
1463 si.si_signo = SIGBUS;
1464 si.si_errno = 0;
1465 si.si_code = BUS_ADRALN;
1466 si.si_addr = (void *) ifa;
1467 force_sig_info(SIGBUS, &si, current);
1468 goto done;
1469 }
1470