File: /usr/src/linux/arch/ia64/kernel/unaligned.c

1     /*
2      * Architecture-specific unaligned trap handling.
3      *
4      * Copyright (C) 1999-2001 Hewlett-Packard Co
5      * Copyright (C) 1999-2000 Stephane Eranian <eranian@hpl.hp.com>
6      * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com>
7      *
8      * 2001/01/17	Add support emulation of unaligned kernel accesses.
9      */
10     #include <linux/kernel.h>
11     #include <linux/sched.h>
12     #include <linux/smp_lock.h>
13     
14     #include <asm/uaccess.h>
15     #include <asm/rse.h>
16     #include <asm/processor.h>
17     #include <asm/unaligned.h>
18     
19     extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
20     
21     #undef DEBUG_UNALIGNED_TRAP
22     
23     #ifdef DEBUG_UNALIGNED_TRAP
24     # define DPRINT(a...)	do { printk("%s.%u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
25     # define DDUMP(str,vp,len)	dump(str, vp, len)
26     
27     static void
28     dump (const char *str, void *vp, size_t len)
29     {
30     	unsigned char *cp = vp;
31     	int i;
32     
33     	printk("%s", str);
34     	for (i = 0; i < len; ++i)
35     		printk (" %02x", *cp++);
36     	printk("\n");
37     }
38     #else
39     # define DPRINT(a...)
40     # define DDUMP(str,vp,len)
41     #endif
42     
43     #define IA64_FIRST_STACKED_GR	32
44     #define IA64_FIRST_ROTATING_FR	32
45     #define SIGN_EXT9		0xffffffffffffff00ul
46     
47     /*
48      * For M-unit:
49      *
50      *  opcode |   m  |   x6    |
51      * --------|------|---------|
52      * [40-37] | [36] | [35:30] |
53      * --------|------|---------|
54      *     4   |   1  |    6    | = 11 bits
55      * --------------------------
56      * However bits [31:30] are not directly useful to distinguish between
57      * load/store so we can use [35:32] instead, which gives the following
58      * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
59      * checking the m-bit until later in the load/store emulation.
60      */
61     #define IA64_OPCODE_MASK	0x1ef
62     #define IA64_OPCODE_SHIFT	32
63     
64     /*
65      * Table C-28 Integer Load/Store
66      *
67      * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
68      *
69      * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
70      * the address (bits [8:3]), so we must failed.
71      */
72     #define LD_OP            0x080
73     #define LDS_OP           0x081
74     #define LDA_OP           0x082
75     #define LDSA_OP          0x083
76     #define LDBIAS_OP        0x084
77     #define LDACQ_OP         0x085
78     /* 0x086, 0x087 are not relevant */
79     #define LDCCLR_OP        0x088
80     #define LDCNC_OP         0x089
81     #define LDCCLRACQ_OP     0x08a
82     #define ST_OP            0x08c
83     #define STREL_OP         0x08d
84     /* 0x08e,0x8f are not relevant */
85     
86     /*
87      * Table C-29 Integer Load +Reg
88      *
89      * we use the ld->m (bit [36:36]) field to determine whether or not we have
90      * a load/store of this form.
91      */
92     
93     /*
94      * Table C-30 Integer Load/Store +Imm
95      *
96      * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
97      *
98      * ld8.fill, st8.fill  must be aligned because the Nat register are based on
99      * the address, so we must fail and the program must be fixed.
100      */
101     #define LD_IMM_OP            0x0a0
102     #define LDS_IMM_OP           0x0a1
103     #define LDA_IMM_OP           0x0a2
104     #define LDSA_IMM_OP          0x0a3
105     #define LDBIAS_IMM_OP        0x0a4
106     #define LDACQ_IMM_OP         0x0a5
107     /* 0x0a6, 0xa7 are not relevant */
108     #define LDCCLR_IMM_OP        0x0a8
109     #define LDCNC_IMM_OP         0x0a9
110     #define LDCCLRACQ_IMM_OP     0x0aa
111     #define ST_IMM_OP            0x0ac
112     #define STREL_IMM_OP         0x0ad
113     /* 0x0ae,0xaf are not relevant */
114     
115     /*
116      * Table C-32 Floating-point Load/Store
117      */
118     #define LDF_OP           0x0c0
119     #define LDFS_OP          0x0c1
120     #define LDFA_OP          0x0c2
121     #define LDFSA_OP         0x0c3
122     /* 0x0c6 is irrelevant */
123     #define LDFCCLR_OP       0x0c8
124     #define LDFCNC_OP        0x0c9
125     /* 0x0cb is irrelevant  */
126     #define STF_OP           0x0cc
127     
128     /*
129      * Table C-33 Floating-point Load +Reg
130      *
131      * we use the ld->m (bit [36:36]) field to determine whether or not we have
132      * a load/store of this form.
133      */
134     
135     /*
136      * Table C-34 Floating-point Load/Store +Imm
137      */
138     #define LDF_IMM_OP       0x0e0
139     #define LDFS_IMM_OP      0x0e1
140     #define LDFA_IMM_OP      0x0e2
141     #define LDFSA_IMM_OP     0x0e3
142     /* 0x0e6 is irrelevant */
143     #define LDFCCLR_IMM_OP   0x0e8
144     #define LDFCNC_IMM_OP    0x0e9
145     #define STF_IMM_OP       0x0ec
146     
147     typedef struct {
148     	unsigned long	 qp:6;	/* [0:5]   */
149     	unsigned long    r1:7;	/* [6:12]  */
150     	unsigned long   imm:7;	/* [13:19] */
151     	unsigned long    r3:7;	/* [20:26] */
152     	unsigned long     x:1;  /* [27:27] */
153     	unsigned long  hint:2;	/* [28:29] */
154     	unsigned long x6_sz:2;	/* [30:31] */
155     	unsigned long x6_op:4;	/* [32:35], x6 = x6_sz|x6_op */
156     	unsigned long     m:1;	/* [36:36] */
157     	unsigned long    op:4;	/* [37:40] */
158     	unsigned long   pad:23; /* [41:63] */
159     } load_store_t;
160     
161     
162     typedef enum {
163     	UPD_IMMEDIATE,	/* ldXZ r1=[r3],imm(9) */
164     	UPD_REG		/* ldXZ r1=[r3],r2     */
165     } update_t;
166     
167     /*
168      * We use tables to keep track of the offsets of registers in the saved state.
169      * This way we save having big switch/case statements.
170      *
171      * We use bit 0 to indicate switch_stack or pt_regs.
172      * The offset is simply shifted by 1 bit.
173      * A 2-byte value should be enough to hold any kind of offset
174      *
175      * In case the calling convention changes (and thus pt_regs/switch_stack)
176      * simply use RSW instead of RPT or vice-versa.
177      */
178     
179     #define RPO(x)	((size_t) &((struct pt_regs *)0)->x)
180     #define RSO(x)	((size_t) &((struct switch_stack *)0)->x)
181     
182     #define RPT(x)		(RPO(x) << 1)
183     #define RSW(x)		(1| RSO(x)<<1)
184     
185     #define GR_OFFS(x)	(gr_info[x]>>1)
186     #define GR_IN_SW(x)	(gr_info[x] & 0x1)
187     
188     #define FR_OFFS(x)	(fr_info[x]>>1)
189     #define FR_IN_SW(x)	(fr_info[x] & 0x1)
190     
191     static u16 gr_info[32]={
192     	0,			/* r0 is read-only : WE SHOULD NEVER GET THIS */
193     
194     	RPT(r1), RPT(r2), RPT(r3),
195     
196     	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
197     
198     	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
199     	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
200     
201     	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
202     	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
203     	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
204     	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
205     };
206     
207     static u16 fr_info[32]={
208     	0,			/* constant : WE SHOULD NEVER GET THIS */
209     	0,			/* constant : WE SHOULD NEVER GET THIS */
210     
211     	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
212     
213     	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
214     
215     	RSW(f10), RSW(f11), RSW(f12), RSW(f13), RSW(f14),
216     	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
217     	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
218     	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
219     	RSW(f30), RSW(f31)
220     };
221     
222     /* Invalidate ALAT entry for integer register REGNO.  */
223     static void
224     invala_gr (int regno)
225     {
226     #	define F(reg)	case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
227     
228     	switch (regno) {
229     		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
230     		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
231     		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
232     		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
233     		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
234     		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
235     		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
236     		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
237     		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
238     		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
239     		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
240     		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
241     		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
242     		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
243     		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
244     		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
245     	}
246     #	undef F
247     }
248     
249     /* Invalidate ALAT entry for floating-point register REGNO.  */
250     static void
251     invala_fr (int regno)
252     {
253     #	define F(reg)	case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
254     
255     	switch (regno) {
256     		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
257     		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
258     		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
259     		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
260     		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
261     		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
262     		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
263     		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
264     		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
265     		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
266     		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
267     		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
268     		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
269     		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
270     		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
271     		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
272     	}
273     #	undef F
274     }
275     
276     static void
277     set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
278     {
279     	struct switch_stack *sw = (struct switch_stack *) regs - 1;
280     	unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
281     	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
282     	unsigned long rnats, nat_mask;
283     	unsigned long on_kbs;
284     	long sof = (regs->cr_ifs) & 0x7f;
285     
286     	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld\n",
287     	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f);
288     
289     	if ((r1 - 32) >= sof) {
290     		/* this should never happen, as the "rsvd register fault" has higher priority */
291     		DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
292     		return;
293     	}
294     
295     	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
296     	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + (r1 - 32));
297     	if (addr >= kbs) {
298     		/* the register is on the kernel backing store: easy... */
299     		rnat_addr = ia64_rse_rnat_addr(addr);
300     		if ((unsigned long) rnat_addr >= sw->ar_bspstore)
301     			rnat_addr = &sw->ar_rnat;
302     		nat_mask = 1UL << ia64_rse_slot_num(addr);
303     
304     		*addr = val;
305     		if (nat)
306     			*rnat_addr |=  nat_mask;
307     		else
308     			*rnat_addr &= ~nat_mask;
309     		return;
310     	}
311     
312     	/*
313     	 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
314     	 * infer whether the interrupt task was running on the kernel backing store.
315     	 */
316     	if (regs->r12 >= TASK_SIZE) {
317     		DPRINT("ignoring kernel write to r%lu; register isn't on the RBS!", r1);
318     		return;
319     	}
320     
321     	bspstore = (unsigned long *) regs->ar_bspstore;
322     	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
323     	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
324     	addr    = ia64_rse_skip_regs(bsp, r1 - 32);
325     
326     	DPRINT("ubs_end=%p bsp=%p addr=%px\n", (void *) ubs_end, (void *) bsp, (void *) addr);
327     
328     	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
329     
330     	rnat_addr = ia64_rse_rnat_addr(addr);
331     
332     	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
333     	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
334     	       (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
335     
336     	nat_mask = 1UL << ia64_rse_slot_num(addr);
337     	if (nat)
338     		rnats |=  nat_mask;
339     	else
340     		rnats &= ~nat_mask;
341     	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
342     
343     	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
344     }
345     
346     
347     static void
348     get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
349     {
350     	struct switch_stack *sw = (struct switch_stack *) regs - 1;
351     	unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
352     	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
353     	unsigned long rnats, nat_mask;
354     	unsigned long on_kbs;
355     	long sof = (regs->cr_ifs) & 0x7f;
356     
357     	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld\n",
358     	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f);
359     
360     	if ((r1 - 32) >= sof) {
361     		/* this should never happen, as the "rsvd register fault" has higher priority */
362     		DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
363     		return;
364     	}
365     
366     	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
367     	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + (r1 - 32));
368     	if (addr >= kbs) {
369     		/* the register is on the kernel backing store: easy... */
370     		*val = *addr;
371     		if (nat) {
372     			rnat_addr = ia64_rse_rnat_addr(addr);
373     			if ((unsigned long) rnat_addr >= sw->ar_bspstore)
374     				rnat_addr = &sw->ar_rnat;
375     			nat_mask = 1UL << ia64_rse_slot_num(addr);
376     			*nat = (*rnat_addr & nat_mask) != 0;
377     		}
378     		return;
379     	}
380     
381     	/*
382     	 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
383     	 * infer whether the interrupt task was running on the kernel backing store.
384     	 */
385     	if (regs->r12 >= TASK_SIZE) {
386     		DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
387     		return;
388     	}
389     
390     	bspstore = (unsigned long *)regs->ar_bspstore;
391     	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
392     	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
393     	addr    = ia64_rse_skip_regs(bsp, r1 - 32);
394     
395     	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
396     
397     	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
398     
399     	if (nat) {
400     		rnat_addr = ia64_rse_rnat_addr(addr);
401     		nat_mask = 1UL << ia64_rse_slot_num(addr);
402     
403     		DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
404     
405     		ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
406     		*nat = (rnats & nat_mask) != 0;
407     	}
408     }
409     
410     
411     static void
412     setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
413     {
414     	struct switch_stack *sw = (struct switch_stack *) regs - 1;
415     	unsigned long addr;
416     	unsigned long bitmask;
417     	unsigned long *unat;
418     
419     	/*
420     	 * First takes care of stacked registers
421     	 */
422     	if (regnum >= IA64_FIRST_STACKED_GR) {
423     		set_rse_reg(regs, regnum, val, nat);
424     		return;
425     	}
426     
427     	/*
428     	 * Using r0 as a target raises a General Exception fault which has higher priority
429     	 * than the Unaligned Reference fault.
430     	 */
431     
432     	/*
433     	 * Now look at registers in [0-31] range and init correct UNAT
434     	 */
435     	if (GR_IN_SW(regnum)) {
436     		addr = (unsigned long)sw;
437     		unat = &sw->ar_unat;
438     	} else {
439     		addr = (unsigned long)regs;
440     		unat = &sw->caller_unat;
441     	}
442     	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
443     	       addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
444     	/*
445     	 * add offset from base of struct
446     	 * and do it !
447     	 */
448     	addr += GR_OFFS(regnum);
449     
450     	*(unsigned long *)addr = val;
451     
452     	/*
453     	 * We need to clear the corresponding UNAT bit to fully emulate the load
454     	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
455     	 */
456     	bitmask   = 1UL << (addr >> 3 & 0x3f);
457     	DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
458     	if (nat) {
459     		*unat |= bitmask;
460     	} else {
461     		*unat &= ~bitmask;
462     	}
463     	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
464     }
465     
466     #define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
467     
468     static void
469     setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
470     {
471     	struct switch_stack *sw = (struct switch_stack *)regs - 1;
472     	unsigned long addr;
473     
474     	/*
475     	 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
476     	 * Fault. Thus, when we get here, we know the partition is enabled.
477     	 * To update f32-f127, there are three choices:
478     	 *
479     	 *	(1) save f32-f127 to thread.fph and update the values there
480     	 *	(2) use a gigantic switch statement to directly access the registers
481     	 *	(3) generate code on the fly to update the desired register
482     	 *
483     	 * For now, we are using approach (1).
484     	 */
485     	if (regnum >= IA64_FIRST_ROTATING_FR) {
486     		ia64_sync_fph(current);
487     		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
488     	} else {
489     		/*
490     		 * pt_regs or switch_stack ?
491     		 */
492     		if (FR_IN_SW(regnum)) {
493     			addr = (unsigned long)sw;
494     		} else {
495     			addr = (unsigned long)regs;
496     		}
497     
498     		DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
499     
500     		addr += FR_OFFS(regnum);
501     		*(struct ia64_fpreg *)addr = *fpval;
502     
503     		/*
504     		 * mark the low partition as being used now
505     		 *
506     		 * It is highly unlikely that this bit is not already set, but
507     		 * let's do it for safety.
508     		 */
509     		regs->cr_ipsr |= IA64_PSR_MFL;
510     	}
511     }
512     
513     /*
514      * Those 2 inline functions generate the spilled versions of the constant floating point
515      * registers which can be used with stfX
516      */
517     static inline void
518     float_spill_f0 (struct ia64_fpreg *final)
519     {
520     	__asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
521     }
522     
523     static inline void
524     float_spill_f1 (struct ia64_fpreg *final)
525     {
526     	__asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
527     }
528     
529     static void
530     getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
531     {
532     	struct switch_stack *sw = (struct switch_stack *) regs - 1;
533     	unsigned long addr;
534     
535     	/*
536     	 * From EAS-2.5: FPDisableFault has higher priority than
537     	 * Unaligned Fault. Thus, when we get here, we know the partition is
538     	 * enabled.
539     	 *
540     	 * When regnum > 31, the register is still live and we need to force a save
541     	 * to current->thread.fph to get access to it.  See discussion in setfpreg()
542     	 * for reasons and other ways of doing this.
543     	 */
544     	if (regnum >= IA64_FIRST_ROTATING_FR) {
545     		ia64_flush_fph(current);
546     		*fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
547     	} else {
548     		/*
549     		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
550     		 * not saved, we must generate their spilled form on the fly
551     		 */
552     		switch(regnum) {
553     		case 0:
554     			float_spill_f0(fpval);
555     			break;
556     		case 1:
557     			float_spill_f1(fpval);
558     			break;
559     		default:
560     			/*
561     			 * pt_regs or switch_stack ?
562     			 */
563     			addr =  FR_IN_SW(regnum) ? (unsigned long)sw
564     						 : (unsigned long)regs;
565     
566     			DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
567     			       FR_IN_SW(regnum), addr, FR_OFFS(regnum));
568     
569     			addr  += FR_OFFS(regnum);
570     			*fpval = *(struct ia64_fpreg *)addr;
571     		}
572     	}
573     }
574     
575     
576     static void
577     getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
578     {
579     	struct switch_stack *sw = (struct switch_stack *) regs - 1;
580     	unsigned long addr, *unat;
581     
582     	if (regnum >= IA64_FIRST_STACKED_GR) {
583     		get_rse_reg(regs, regnum, val, nat);
584     		return;
585     	}
586     
587     	/*
588     	 * take care of r0 (read-only always evaluate to 0)
589     	 */
590     	if (regnum == 0) {
591     		*val = 0;
592     		if (nat)
593     			*nat = 0;
594     		return;
595     	}
596     
597     	/*
598     	 * Now look at registers in [0-31] range and init correct UNAT
599     	 */
600     	if (GR_IN_SW(regnum)) {
601     		addr = (unsigned long)sw;
602     		unat = &sw->ar_unat;
603     	} else {
604     		addr = (unsigned long)regs;
605     		unat = &sw->caller_unat;
606     	}
607     
608     	DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
609     
610     	addr += GR_OFFS(regnum);
611     
612     	*val  = *(unsigned long *)addr;
613     
614     	/*
615     	 * do it only when requested
616     	 */
617     	if (nat)
618     		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
619     }
620     
621     static void
622     emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
623     {
624     	/*
625     	 * IMPORTANT:
626     	 * Given the way we handle unaligned speculative loads, we should
627     	 * not get to this point in the code but we keep this sanity check,
628     	 * just in case.
629     	 */
630     	if (ld.x6_op == 1 || ld.x6_op == 3) {
631     		printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n");
632     		die_if_kernel("unaligned reference on specualtive load with register update\n",
633     			      regs, 30);
634     	}
635     
636     
637     	/*
638     	 * at this point, we know that the base register to update is valid i.e.,
639     	 * it's not r0
640     	 */
641     	if (type == UPD_IMMEDIATE) {
642     		unsigned long imm;
643     
644     		/*
645     		 * Load +Imm: ldXZ r1=[r3],imm(9)
646     		 *
647     		 *
648     		 * form imm9: [13:19] contain the first 7 bits
649     		 */
650     		imm = ld.x << 7 | ld.imm;
651     
652     		/*
653     		 * sign extend (1+8bits) if m set
654     		 */
655     		if (ld.m) imm |= SIGN_EXT9;
656     
657     		/*
658     		 * ifa == r3 and we know that the NaT bit on r3 was clear so
659     		 * we can directly use ifa.
660     		 */
661     		ifa += imm;
662     
663     		setreg(ld.r3, ifa, 0, regs);
664     
665     		DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
666     
667     	} else if (ld.m) {
668     		unsigned long r2;
669     		int nat_r2;
670     
671     		/*
672     		 * Load +Reg Opcode: ldXZ r1=[r3],r2
673     		 *
674     		 * Note: that we update r3 even in the case of ldfX.a
675     		 * (where the load does not happen)
676     		 *
677     		 * The way the load algorithm works, we know that r3 does not
678     		 * have its NaT bit set (would have gotten NaT consumption
679     		 * before getting the unaligned fault). So we can use ifa
680     		 * which equals r3 at this point.
681     		 *
682     		 * IMPORTANT:
683     		 * The above statement holds ONLY because we know that we
684     		 * never reach this code when trying to do a ldX.s.
685     		 * If we ever make it to here on an ldfX.s then
686     		 */
687     		getreg(ld.imm, &r2, &nat_r2, regs);
688     
689     		ifa += r2;
690     
691     		/*
692     		 * propagate Nat r2 -> r3
693     		 */
694     		setreg(ld.r3, ifa, nat_r2, regs);
695     
696     		DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
697     	}
698     }
699     
700     
701     static int
702     emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
703     {
704     	unsigned int len = 1 << ld.x6_sz;
705     
706     	/*
707     	 * r0, as target, doesn't need to be checked because Illegal Instruction
708     	 * faults have higher priority than unaligned faults.
709     	 *
710     	 * r0 cannot be found as the base as it would never generate an
711     	 * unaligned reference.
712     	 */
713     
714     	/*
715     	 * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry.
716     	 * See comment below for explanation on how we handle ldX.a
717     	 */
718     	if (ld.x6_op != 0x2) {
719     		unsigned long val = 0;
720     
721     		if (len != 2 && len != 4 && len != 8) {
722     			DPRINT("unknown size: x6=%d\n", ld.x6_sz);
723     			return -1;
724     		}
725     		/* this assumes little-endian byte-order: */
726     		if (copy_from_user(&val, (void *) ifa, len))
727     		    return -1;
728     		setreg(ld.r1, val, 0, regs);
729     	}
730     
731     	/*
732     	 * check for updates on any kind of loads
733     	 */
734     	if (ld.op == 0x5 || ld.m)
735     		emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
736     
737     	/*
738     	 * handling of various loads (based on EAS2.4):
739     	 *
740     	 * ldX.acq (ordered load):
741     	 *	- acquire semantics would have been used, so force fence instead.
742     	 *
743     	 * ldX.c.clr (check load and clear):
744     	 *	- if we get to this handler, it's because the entry was not in the ALAT.
745     	 *	  Therefore the operation reverts to a normal load
746     	 *
747     	 * ldX.c.nc (check load no clear):
748     	 *	- same as previous one
749     	 *
750     	 * ldX.c.clr.acq (ordered check load and clear):
751     	 *	- same as above for c.clr part. The load needs to have acquire semantics. So
752     	 *	  we use the fence semantics which is stronger and thus ensures correctness.
753     	 *
754     	 * ldX.a (advanced load):
755     	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
756     	 *	  address doesn't match requested size alignement. This means that we would
757     	 *	  possibly need more than one load to get the result.
758     	 *
759     	 *	  The load part can be handled just like a normal load, however the difficult
760     	 *	  part is to get the right thing into the ALAT. The critical piece of information
761     	 *	  in the base address of the load & size. To do that, a ld.a must be executed,
762     	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
763     	 *	  if we use the same target register, we will be okay for the check.a instruction.
764     	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
765     	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the
766     	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good
767     	 *	  enough, take the following example:
768     	 *		r3=3
769     	 *		ld4.a r1=[r3]
770     	 *
771     	 *	  Could be emulated by doing:
772     	 *		ld1.a r1=[r3],1
773     	 *		store to temporary;
774     	 *		ld1.a r1=[r3],1
775     	 *		store & shift to temporary;
776     	 *		ld1.a r1=[r3],1
777     	 *		store & shift to temporary;
778     	 *		ld1.a r1=[r3]
779     	 *		store & shift to temporary;
780     	 *		r1=temporary
781     	 *
782     	 *	  So int this case, you would get the right value is r1 but the wrong info in
783     	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3
784     	 *	  but you would still get the size wrong.  To get the size right, one needs to
785     	 *	  execute exactly the same kind of load. You could do it from a aligned
786     	 *	  temporary location, but you would get the address wrong.
787     	 *
788     	 *	  So no matter what, it is not possible to emulate an advanced load
789     	 *	  correctly. But is that really critical ?
790     	 *
791     	 *
792     	 *	  Now one has to look at how ld.a is used, one must either do a ld.c.* or
793     	 *	  chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
794     	 *	  entry found in ALAT), and that's perfectly ok because:
795     	 *
796     	 *		- ld.c.*, if the entry is not present a  normal load is executed
797     	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code
798     	 *
799     	 *	  In either case, the load can be potentially retried in another form.
800     	 *
801     	 *	  So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
802     	 *	  must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
803     	 *	  a stale entry later) The register base update MUST also be performed.
804     	 *
805     	 *	  Now what is the content of the register and its NaT bit in the case we don't
806     	 *	  do the load ?  EAS2.4, says (in case an actual load is needed)
807     	 *
808     	 *		- r1 = [r3], Nat = 0 if succeeds
809     	 *		- r1 = 0 Nat = 0 if trying to access non-speculative memory
810     	 *
811     	 *	  For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
812     	 *	  retry and thus eventually reload the register thereby changing Nat and
813     	 *	  register content.
814     	 */
815     
816     	/*
817     	 * when the load has the .acq completer then
818     	 * use ordering fence.
819     	 */
820     	if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
821     		mb();
822     
823     	/*
824     	 * invalidate ALAT entry in case of advanced load
825     	 */
826     	if (ld.x6_op == 0x2)
827     		invala_gr(ld.r1);
828     
829     	return 0;
830     }
831     
832     static int
833     emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
834     {
835     	unsigned long r2;
836     	unsigned int len = 1 << ld.x6_sz;
837     
838     	/*
839     	 * if we get to this handler, Nat bits on both r3 and r2 have already
840     	 * been checked. so we don't need to do it
841     	 *
842     	 * extract the value to be stored
843     	 */
844     	getreg(ld.imm, &r2, 0, regs);
845     
846     	/*
847     	 * we rely on the macros in unaligned.h for now i.e.,
848     	 * we let the compiler figure out how to read memory gracefully.
849     	 *
850     	 * We need this switch/case because the way the inline function
851     	 * works. The code is optimized by the compiler and looks like
852     	 * a single switch/case.
853     	 */
854     	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
855     
856     	if (len != 2 && len != 4 && len != 8) {
857     		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
858     		return -1;
859     	}
860     
861     	/* this assumes little-endian byte-order: */
862     	if (copy_to_user((void *) ifa, &r2, len))
863     		return -1;
864     
865     	/*
866     	 * stX [r3]=r2,imm(9)
867     	 *
868     	 * NOTE:
869     	 * ld.r3 can never be r0, because r0 would not generate an
870     	 * unaligned access.
871     	 */
872     	if (ld.op == 0x5) {
873     		unsigned long imm;
874     
875     		/*
876     		 * form imm9: [12:6] contain first 7bits
877     		 */
878     		imm = ld.x << 7 | ld.r1;
879     		/*
880     		 * sign extend (8bits) if m set
881     		 */
882     		if (ld.m) imm |= SIGN_EXT9;
883     		/*
884     		 * ifa == r3 (NaT is necessarily cleared)
885     		 */
886     		ifa += imm;
887     
888     		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
889     
890     		setreg(ld.r3, ifa, 0, regs);
891     	}
892     	/*
893     	 * we don't have alat_invalidate_multiple() so we need
894     	 * to do the complete flush :-<<
895     	 */
896     	ia64_invala();
897     
898     	/*
899     	 * stX.rel: use fence instead of release
900     	 */
901     	if (ld.x6_op == 0xd)
902     		mb();
903     
904     	return 0;
905     }
906     
907     /*
908      * floating point operations sizes in bytes
909      */
910     static const unsigned char float_fsz[4]={
911     	16, /* extended precision (e) */
912     	8,  /* integer (8)            */
913     	4,  /* single precision (s)   */
914     	8   /* double precision (d)   */
915     };
916     
917     static inline void
918     mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
919     {
920     	__asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
921     			      :: "r"(init), "r"(final) : "f6","memory");
922     }
923     
924     static inline void
925     mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
926     {
927     	__asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
928     			      :: "r"(init), "r"(final) : "f6","memory");
929     }
930     
931     static inline void
932     mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
933     {
934     	__asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
935     			      :: "r"(init), "r"(final) : "f6","memory");
936     }
937     
938     static inline void
939     mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
940     {
941     	__asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
942     			      :: "r"(init), "r"(final) : "f6","memory");
943     }
944     
945     static inline void
946     float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
947     {
948     	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
949     			      :: "r"(init), "r"(final) : "f6","memory");
950     }
951     
952     static inline void
953     float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
954     {
955     	__asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
956     			      :: "r"(init), "r"(final) : "f6","memory");
957     }
958     
959     static inline void
960     float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
961     {
962     	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
963     			      :: "r"(init), "r"(final) : "f6","memory");
964     }
965     
966     static inline void
967     float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
968     {
969     	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
970     			      :: "r"(init), "r"(final) : "f6","memory");
971     }
972     
973     static int
974     emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
975     {
976     	struct ia64_fpreg fpr_init[2];
977     	struct ia64_fpreg fpr_final[2];
978     	unsigned long len = float_fsz[ld.x6_sz];
979     
980     	/*
981     	 * fr0 & fr1 don't need to be checked because Illegal Instruction
982     	 * faults have higher priority than unaligned faults.
983     	 *
984     	 * r0 cannot be found as the base as it would never generate an
985     	 * unaligned reference.
986     	 */
987     
988     	/*
989     	 * make sure we get clean buffers
990     	 */
991     	memset(&fpr_init, 0, sizeof(fpr_init));
992     	memset(&fpr_final, 0, sizeof(fpr_final));
993     
994     	/*
995     	 * ldfpX.a: we don't try to emulate anything but we must
996     	 * invalidate the ALAT entry and execute updates, if any.
997     	 */
998     	if (ld.x6_op != 0x2) {
999     		/* this assumes little-endian byte-order: */
1000     
1001     		if (copy_from_user(&fpr_init[0], (void *) ifa, len)
1002     		    || copy_from_user(&fpr_init[1], (void *) (ifa + len), len))
1003     			return -1;
1004     
1005     		DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1006     		DDUMP("frp_init =", &fpr_init, 2*len);
1007     		/*
1008     		 * XXX fixme
1009     		 * Could optimize inlines by using ldfpX & 2 spills
1010     		 */
1011     		switch( ld.x6_sz ) {
1012     			case 0:
1013     				mem2float_extended(&fpr_init[0], &fpr_final[0]);
1014     				mem2float_extended(&fpr_init[1], &fpr_final[1]);
1015     				break;
1016     			case 1:
1017     				mem2float_integer(&fpr_init[0], &fpr_final[0]);
1018     				mem2float_integer(&fpr_init[1], &fpr_final[1]);
1019     				break;
1020     			case 2:
1021     				mem2float_single(&fpr_init[0], &fpr_final[0]);
1022     				mem2float_single(&fpr_init[1], &fpr_final[1]);
1023     				break;
1024     			case 3:
1025     				mem2float_double(&fpr_init[0], &fpr_final[0]);
1026     				mem2float_double(&fpr_init[1], &fpr_final[1]);
1027     				break;
1028     		}
1029     		DDUMP("fpr_final =", &fpr_final, 2*len);
1030     		/*
1031     		 * XXX fixme
1032     		 *
1033     		 * A possible optimization would be to drop fpr_final and directly
1034     		 * use the storage from the saved context i.e., the actual final
1035     		 * destination (pt_regs, switch_stack or thread structure).
1036     		 */
1037     		setfpreg(ld.r1, &fpr_final[0], regs);
1038     		setfpreg(ld.imm, &fpr_final[1], regs);
1039     	}
1040     
1041     	/*
1042     	 * Check for updates: only immediate updates are available for this
1043     	 * instruction.
1044     	 */
1045     	if (ld.m) {
1046     		/*
1047     		 * the immediate is implicit given the ldsz of the operation:
1048     		 * single: 8 (2x4) and for  all others it's 16 (2x8)
1049     		 */
1050     		ifa += len<<1;
1051     
1052     		/*
1053     		 * IMPORTANT:
1054     		 * the fact that we force the NaT of r3 to zero is ONLY valid
1055     		 * as long as we don't come here with a ldfpX.s.
1056     		 * For this reason we keep this sanity check
1057     		 */
1058     		if (ld.x6_op == 1 || ld.x6_op == 3)
1059     			printk(KERN_ERR __FUNCTION__": register update on speculative load pair, "
1060     			       "error\n");
1061     
1062     		setreg(ld.r3, ifa, 0, regs);
1063     	}
1064     
1065     	/*
1066     	 * Invalidate ALAT entries, if any, for both registers.
1067     	 */
1068     	if (ld.x6_op == 0x2) {
1069     		invala_fr(ld.r1);
1070     		invala_fr(ld.imm);
1071     	}
1072     	return 0;
1073     }
1074     
1075     
1076     static int
1077     emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1078     {
1079     	struct ia64_fpreg fpr_init;
1080     	struct ia64_fpreg fpr_final;
1081     	unsigned long len = float_fsz[ld.x6_sz];
1082     
1083     	/*
1084     	 * fr0 & fr1 don't need to be checked because Illegal Instruction
1085     	 * faults have higher priority than unaligned faults.
1086     	 *
1087     	 * r0 cannot be found as the base as it would never generate an
1088     	 * unaligned reference.
1089     	 */
1090     
1091     	/*
1092     	 * make sure we get clean buffers
1093     	 */
1094     	memset(&fpr_init,0, sizeof(fpr_init));
1095     	memset(&fpr_final,0, sizeof(fpr_final));
1096     
1097     	/*
1098     	 * ldfX.a we don't try to emulate anything but we must
1099     	 * invalidate the ALAT entry.
1100     	 * See comments in ldX for descriptions on how the various loads are handled.
1101     	 */
1102     	if (ld.x6_op != 0x2) {
1103     		if (copy_from_user(&fpr_init, (void *) ifa, len))
1104     			return -1;
1105     
1106     		DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1107     		DDUMP("fpr_init =", &fpr_init, len);
1108     		/*
1109     		 * we only do something for x6_op={0,8,9}
1110     		 */
1111     		switch( ld.x6_sz ) {
1112     			case 0:
1113     				mem2float_extended(&fpr_init, &fpr_final);
1114     				break;
1115     			case 1:
1116     				mem2float_integer(&fpr_init, &fpr_final);
1117     				break;
1118     			case 2:
1119     				mem2float_single(&fpr_init, &fpr_final);
1120     				break;
1121     			case 3:
1122     				mem2float_double(&fpr_init, &fpr_final);
1123     				break;
1124     		}
1125     		DDUMP("fpr_final =", &fpr_final, len);
1126     		/*
1127     		 * XXX fixme
1128     		 *
1129     		 * A possible optimization would be to drop fpr_final and directly
1130     		 * use the storage from the saved context i.e., the actual final
1131     		 * destination (pt_regs, switch_stack or thread structure).
1132     		 */
1133     		setfpreg(ld.r1, &fpr_final, regs);
1134     	}
1135     
1136     	/*
1137     	 * check for updates on any loads
1138     	 */
1139     	if (ld.op == 0x7 || ld.m)
1140     		emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1141     
1142     	/*
1143     	 * invalidate ALAT entry in case of advanced floating point loads
1144     	 */
1145     	if (ld.x6_op == 0x2)
1146     		invala_fr(ld.r1);
1147     
1148     	return 0;
1149     }
1150     
1151     
1152     static int
1153     emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1154     {
1155     	struct ia64_fpreg fpr_init;
1156     	struct ia64_fpreg fpr_final;
1157     	unsigned long len = float_fsz[ld.x6_sz];
1158     
1159     	/*
1160     	 * make sure we get clean buffers
1161     	 */
1162     	memset(&fpr_init,0, sizeof(fpr_init));
1163     	memset(&fpr_final,0, sizeof(fpr_final));
1164     
1165     	/*
1166     	 * if we get to this handler, Nat bits on both r3 and r2 have already
1167     	 * been checked. so we don't need to do it
1168     	 *
1169     	 * extract the value to be stored
1170     	 */
1171     	getfpreg(ld.imm, &fpr_init, regs);
1172     	/*
1173     	 * during this step, we extract the spilled registers from the saved
1174     	 * context i.e., we refill. Then we store (no spill) to temporary
1175     	 * aligned location
1176     	 */
1177     	switch( ld.x6_sz ) {
1178     		case 0:
1179     			float2mem_extended(&fpr_init, &fpr_final);
1180     			break;
1181     		case 1:
1182     			float2mem_integer(&fpr_init, &fpr_final);
1183     			break;
1184     		case 2:
1185     			float2mem_single(&fpr_init, &fpr_final);
1186     			break;
1187     		case 3:
1188     			float2mem_double(&fpr_init, &fpr_final);
1189     			break;
1190     	}
1191     	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1192     	DDUMP("fpr_init =", &fpr_init, len);
1193     	DDUMP("fpr_final =", &fpr_final, len);
1194     
1195     	if (copy_to_user((void *) ifa, &fpr_final, len))
1196     		return -1;
1197     
1198     	/*
1199     	 * stfX [r3]=r2,imm(9)
1200     	 *
1201     	 * NOTE:
1202     	 * ld.r3 can never be r0, because r0 would not generate an
1203     	 * unaligned access.
1204     	 */
1205     	if (ld.op == 0x7) {
1206     		unsigned long imm;
1207     
1208     		/*
1209     		 * form imm9: [12:6] contain first 7bits
1210     		 */
1211     		imm = ld.x << 7 | ld.r1;
1212     		/*
1213     		 * sign extend (8bits) if m set
1214     		 */
1215     		if (ld.m)
1216     			imm |= SIGN_EXT9;
1217     		/*
1218     		 * ifa == r3 (NaT is necessarily cleared)
1219     		 */
1220     		ifa += imm;
1221     
1222     		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1223     
1224     		setreg(ld.r3, ifa, 0, regs);
1225     	}
1226     	/*
1227     	 * we don't have alat_invalidate_multiple() so we need
1228     	 * to do the complete flush :-<<
1229     	 */
1230     	ia64_invala();
1231     
1232     	return 0;
1233     }
1234     
1235     /*
1236      * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1237      * eventually fix the program.  However, we don't want to do that for every access so we
1238      * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
1239      * either...
1240      */
1241     static int
1242     within_logging_rate_limit (void)
1243     {
1244     	static unsigned long count, last_time;
1245     
1246     	if (jiffies - last_time > 5*HZ)
1247     		count = 0;
1248     	if (++count < 5) {
1249     		last_time = jiffies;
1250     		return 1;
1251     	}
1252     	return 0;
1253     
1254     }
1255     
1256     void
1257     ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1258     {
1259     	struct exception_fixup fix = { 0 };
1260     	struct ia64_psr *ipsr = ia64_psr(regs);
1261     	mm_segment_t old_fs = get_fs();
1262     	unsigned long bundle[2];
1263     	unsigned long opcode;
1264     	struct siginfo si;
1265     	union {
1266     		unsigned long l;
1267     		load_store_t insn;
1268     	} u;
1269     	int ret = -1;
1270     
1271     	if (ia64_psr(regs)->be) {
1272     		/* we don't support big-endian accesses */
1273     		die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1274     		goto force_sigbus;
1275     	}
1276     
1277     	/*
1278     	 * Treat kernel accesses for which there is an exception handler entry the same as
1279     	 * user-level unaligned accesses.  Otherwise, a clever program could trick this
1280     	 * handler into reading an arbitrary kernel addresses...
1281     	 */
1282     	if (!user_mode(regs)) {
1283     #ifdef GAS_HAS_LOCAL_TAGS
1284     		fix = search_exception_table(regs->cr_iip + ia64_psr(regs)->ri);
1285     #else
1286     		fix = search_exception_table(regs->cr_iip);
1287     #endif
1288     	}
1289     	if (user_mode(regs) || fix.cont) {
1290     		if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1291     			goto force_sigbus;
1292     
1293     		if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1294     		    && within_logging_rate_limit())
1295     		{
1296     			char buf[200];	/* comm[] is at most 16 bytes... */
1297     			size_t len;
1298     
1299     			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1300     				      "ip=0x%016lx\n\r", current->comm, current->pid,
1301     				      ifa, regs->cr_iip + ipsr->ri);
1302     			/*
1303     			 * Don't call tty_write_message() if we're in the kernel; we might
1304     			 * be holding locks...
1305     			 */
1306     			if (user_mode(regs))
1307     				tty_write_message(current->tty, buf);
1308     			buf[len-1] = '\0';	/* drop '\r' */
1309     			printk(KERN_WARNING "%s", buf);	/* watch for command names containing %s */
1310     		}
1311     	} else {
1312     		if (within_logging_rate_limit())
1313     			printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1314     			       ifa, regs->cr_iip + ipsr->ri);
1315     		set_fs(KERNEL_DS);
1316     	}
1317     
1318     	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1319     	       regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1320     
1321     	if (__copy_from_user(bundle, (void *) regs->cr_iip, 16))
1322     		goto failure;
1323     
1324     	/*
1325     	 * extract the instruction from the bundle given the slot number
1326     	 */
1327     	switch (ipsr->ri) {
1328     	      case 0: u.l = (bundle[0] >>  5); break;
1329     	      case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1330     	      case 2: u.l = (bundle[1] >> 23); break;
1331     	}
1332     	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1333     
1334     	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1335     	       "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1336     	       u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1337     
1338     	/*
1339     	 * IMPORTANT:
1340     	 * Notice that the swictch statement DOES not cover all possible instructions
1341     	 * that DO generate unaligned references. This is made on purpose because for some
1342     	 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1343     	 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1344     	 * the program will get a signal and die:
1345     	 *
1346     	 *	load/store:
1347     	 *		- ldX.spill
1348     	 *		- stX.spill
1349     	 *	Reason: RNATs are based on addresses
1350     	 *
1351     	 *	synchronization:
1352     	 *		- cmpxchg
1353     	 *		- fetchadd
1354     	 *		- xchg
1355     	 *	Reason: ATOMIC operations cannot be emulated properly using multiple
1356     	 *	        instructions.
1357     	 *
1358     	 *	speculative loads:
1359     	 *		- ldX.sZ
1360     	 *	Reason: side effects, code must be ready to deal with failure so simpler
1361     	 *		to let the load fail.
1362     	 * ---------------------------------------------------------------------------------
1363     	 * XXX fixme
1364     	 *
1365     	 * I would like to get rid of this switch case and do something
1366     	 * more elegant.
1367     	 */
1368     	switch (opcode) {
1369     	      case LDS_OP:
1370     	      case LDSA_OP:
1371     	      case LDS_IMM_OP:
1372     	      case LDSA_IMM_OP:
1373     	      case LDFS_OP:
1374     	      case LDFSA_OP:
1375     	      case LDFS_IMM_OP:
1376     		/*
1377     		 * The instruction will be retried with deferred exceptions turned on, and
1378     		 * we should get Nat bit installed
1379     		 *
1380     		 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1381     		 * are actually executed even though the operation failed. So we don't
1382     		 * need to take care of this.
1383     		 */
1384     		DPRINT("forcing PSR_ED\n");
1385     		regs->cr_ipsr |= IA64_PSR_ED;
1386     		goto done;
1387     
1388     	      case LD_OP:
1389     	      case LDA_OP:
1390     	      case LDBIAS_OP:
1391     	      case LDACQ_OP:
1392     	      case LDCCLR_OP:
1393     	      case LDCNC_OP:
1394     	      case LDCCLRACQ_OP:
1395     	      case LD_IMM_OP:
1396     	      case LDA_IMM_OP:
1397     	      case LDBIAS_IMM_OP:
1398     	      case LDACQ_IMM_OP:
1399     	      case LDCCLR_IMM_OP:
1400     	      case LDCNC_IMM_OP:
1401     	      case LDCCLRACQ_IMM_OP:
1402     		ret = emulate_load_int(ifa, u.insn, regs);
1403     		break;
1404     
1405     	      case ST_OP:
1406     	      case STREL_OP:
1407     	      case ST_IMM_OP:
1408     	      case STREL_IMM_OP:
1409     		ret = emulate_store_int(ifa, u.insn, regs);
1410     		break;
1411     
1412     	      case LDF_OP:
1413     	      case LDFA_OP:
1414     	      case LDFCCLR_OP:
1415     	      case LDFCNC_OP:
1416     	      case LDF_IMM_OP:
1417     	      case LDFA_IMM_OP:
1418     	      case LDFCCLR_IMM_OP:
1419     	      case LDFCNC_IMM_OP:
1420     		if (u.insn.x)
1421     			ret = emulate_load_floatpair(ifa, u.insn, regs);
1422     		else
1423     			ret = emulate_load_float(ifa, u.insn, regs);
1424     		break;
1425     
1426     	      case STF_OP:
1427     	      case STF_IMM_OP:
1428     		ret = emulate_store_float(ifa, u.insn, regs);
1429     		break;
1430     
1431     	      default:
1432     		goto failure;
1433     	}
1434     	DPRINT("ret=%d\n", ret);
1435     	if (ret)
1436     		goto failure;
1437     
1438     	if (ipsr->ri == 2)
1439     		/*
1440     		 * given today's architecture this case is not likely to happen because a
1441     		 * memory access instruction (M) can never be in the last slot of a
1442     		 * bundle. But let's keep it for now.
1443     		 */
1444     		regs->cr_iip += 16;
1445     	ipsr->ri = (ipsr->ri + 1) & 0x3;
1446     
1447     	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1448       done:
1449     	set_fs(old_fs);		/* restore original address limit */
1450     	return;
1451     
1452       failure:
1453     	/* something went wrong... */
1454     	if (!user_mode(regs)) {
1455     		if (fix.cont) {
1456     			handle_exception(regs, fix);
1457     			goto done;
1458     		}
1459     		die_if_kernel("error during unaligned kernel access\n", regs, ret);
1460     		/* NOT_REACHED */
1461     	}
1462       force_sigbus:
1463     	si.si_signo = SIGBUS;
1464     	si.si_errno = 0;
1465     	si.si_code = BUS_ADRALN;
1466     	si.si_addr = (void *) ifa;
1467     	force_sig_info(SIGBUS, &si, current);
1468     	goto done;
1469     }
1470