File: /usr/src/linux/arch/cris/lib/usercopy.c
1 /*
2 * User address space access functions.
3 * The non-inlined parts of asm-cris/uaccess.h are here.
4 *
5 * Copyright (C) 2000, Axis Communications AB.
6 *
7 * Written by Hans-Peter Nilsson.
8 * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
9 */
10
11 #include <asm/uaccess.h>
12
13 /* Asm:s have been tweaked (within the domain of correctness) to give
14 satisfactory results for "gcc version 2.96 20000427 (experimental)".
15
16 Check regularly...
17
18 Note that the PC saved at a bus-fault is the address *after* the
19 faulting instruction, which means the branch-target for instructions in
20 delay-slots for taken branches. Note also that the postincrement in
21 the instruction is performed regardless of bus-fault; the register is
22 seen updated in fault handlers.
23
24 Oh, and on the code formatting issue, to whomever feels like "fixing
25 it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"
26 string.c too. I just don't think too many people will hack this file
27 for the code format to be an issue. */
28
29
30 /* Copy to userspace. This is based on the memcpy used for
31 kernel-to-kernel copying; see "string.c". */
32
33 unsigned long
34 __copy_user (void *pdst, const void *psrc, unsigned long pn)
35 {
36 /* We want the parameters put in special registers.
37 Make sure the compiler is able to make something useful of this.
38 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
39
40 FIXME: Comment for old gcc version. Check.
41 If gcc was allright, it really would need no temporaries, and no
42 stack space to save stuff on. */
43
44 register char *dst __asm__ ("r13") = pdst;
45 register const char *src __asm__ ("r11") = psrc;
46 register int n __asm__ ("r12") = pn;
47 register int retn __asm__ ("r10") = 0;
48
49
50 /* When src is aligned but not dst, this makes a few extra needless
51 cycles. I believe it would take as many to check that the
52 re-alignment was unnecessary. */
53 if (((unsigned long) dst & 3) != 0
54 /* Don't align if we wouldn't copy more than a few bytes; so we
55 don't have to check further for overflows. */
56 && n >= 3)
57 {
58 if ((unsigned long) dst & 1)
59 {
60 __asm_copy_to_user_1 (dst, src, retn);
61 n--;
62 }
63
64 if ((unsigned long) dst & 2)
65 {
66 __asm_copy_to_user_2 (dst, src, retn);
67 n -= 2;
68 }
69 }
70
71 /* Decide which copying method to use. */
72 if (n >= 44*2) /* Break even between movem and
73 move16 is at 38.7*2, but modulo 44. */
74 {
75 /* For large copies we use 'movem'. */
76
77 /* It is not optimal to tell the compiler about clobbering any
78 registers; that will move the saving/restoring of those registers
79 to the function prologue/epilogue, and make non-movem sizes
80 suboptimal.
81
82 This method is not foolproof; it assumes that the "asm reg"
83 declarations at the beginning of the function really are used
84 here (beware: they may be moved to temporary registers).
85 This way, we do not have to save/move the registers around into
86 temporaries; we can safely use them straight away.
87
88 If you want to check that the allocation was right; then
89 check the equalities in the first comment. It should say
90 "r13=r13, r11=r11, r12=r12". */
91 __asm__ volatile ("
92 ;; Check that the following is true (same register names on
93 ;; both sides of equal sign, as in r8=r8):
94 ;; %0=r13, %1=r11, %2=r12 %3=r10
95 ;;
96 ;; Save the registers we'll use in the movem process
97 ;; on the stack.
98 subq 11*4,sp
99 movem r10,[sp]
100
101 ;; Now we've got this:
102 ;; r11 - src
103 ;; r13 - dst
104 ;; r12 - n
105
106 ;; Update n for the first loop
107 subq 44,r12
108
109 ; Since the noted PC of a faulting instruction in a delay-slot of a taken
110 ; branch, is that of the branch target, we actually point at the from-movem
111 ; for this case. There is no ambiguity here; if there was a fault in that
112 ; instruction (meaning a kernel oops), the faulted PC would be the address
113 ; after *that* movem.
114
115 0:
116 movem [r11+],r10
117 subq 44,r12
118 bge 0b
119 movem r10,[r13+]
120 1:
121 addq 44,r12 ;; compensate for last loop underflowing n
122
123 ;; Restore registers from stack
124 movem [sp+],r10
125 2:
126 .section .fixup,\"ax\"
127
128 ; To provide a correct count in r10 of bytes that failed to be copied,
129 ; we jump back into the loop if the loop-branch was taken. There is no
130 ; performance penalty for sany use; the program will segfault soon enough.
131
132 3:
133 move.d [sp],r10
134 addq 44,r10
135 move.d r10,[sp]
136 jump 0b
137 4:
138 movem [sp+],r10
139 addq 44,r10
140 addq 44,r12
141 jump 2b
142
143 .previous
144 .section __ex_table,\"a\"
145 .dword 0b,3b
146 .dword 1b,4b
147 .previous"
148
149 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
150 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
151
152 }
153
154 /* Either we directly start copying, using dword copying in a loop, or
155 we copy as much as possible with 'movem' and then the last block (<44
156 bytes) is copied here. This will work since 'movem' will have
157 updated SRC, DST and N. */
158
159 while (n >= 16)
160 {
161 __asm_copy_to_user_16 (dst, src, retn);
162 n -= 16;
163 }
164
165 /* Having a separate by-four loops cuts down on cache footprint.
166 FIXME: Test with and without; increasing switch to be 0..15. */
167 while (n >= 4)
168 {
169 __asm_copy_to_user_4 (dst, src, retn);
170 n -= 4;
171 }
172
173 switch (n)
174 {
175 case 0:
176 break;
177 case 1:
178 __asm_copy_to_user_1 (dst, src, retn);
179 break;
180 case 2:
181 __asm_copy_to_user_2 (dst, src, retn);
182 break;
183 case 3:
184 __asm_copy_to_user_3 (dst, src, retn);
185 break;
186 }
187
188 return retn;
189 }
190
191 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
192 userland. */
193
194 unsigned long
195 __copy_user_zeroing (void *pdst, const void *psrc, unsigned long pn)
196 {
197 /* We want the parameters put in special registers.
198 Make sure the compiler is able to make something useful of this.
199 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
200
201 FIXME: Comment for old gcc version. Check.
202 If gcc was allright, it really would need no temporaries, and no
203 stack space to save stuff on. */
204
205 register char *dst __asm__ ("r13") = pdst;
206 register const char *src __asm__ ("r11") = psrc;
207 register int n __asm__ ("r12") = pn;
208 register int retn __asm__ ("r10") = 0;
209
210 /* When src is aligned but not dst, this makes a few extra needless
211 cycles. I believe it would take as many to check that the
212 re-alignment was unnecessary. */
213 if (((unsigned long) dst & 3) != 0
214 /* Don't align if we wouldn't copy more than a few bytes; so we
215 don't have to check further for overflows. */
216 && n >= 3)
217 {
218 if ((unsigned long) dst & 1)
219 {
220 __asm_copy_from_user_1 (dst, src, retn);
221 n--;
222 }
223
224 if ((unsigned long) dst & 2)
225 {
226 __asm_copy_from_user_2 (dst, src, retn);
227 n -= 2;
228 }
229 }
230
231 /* Decide which copying method to use. */
232 if (n >= 44*2) /* Break even between movem and
233 move16 is at 38.7*2, but modulo 44. */
234 {
235 /* For large copies we use 'movem' */
236
237 /* It is not optimal to tell the compiler about clobbering any
238 registers; that will move the saving/restoring of those registers
239 to the function prologue/epilogue, and make non-movem sizes
240 suboptimal.
241
242 This method is not foolproof; it assumes that the "asm reg"
243 declarations at the beginning of the function really are used
244 here (beware: they may be moved to temporary registers).
245 This way, we do not have to save/move the registers around into
246 temporaries; we can safely use them straight away.
247
248 If you want to check that the allocation was right; then
249 check the equalities in the first comment. It should say
250 "r13=r13, r11=r11, r12=r12" */
251 __asm__ volatile ("
252 ;; Check that the following is true (same register names on
253 ;; both sides of equal sign, as in r8=r8):
254 ;; %0=r13, %1=r11, %2=r12 %3=r10
255 ;;
256 ;; Save the registers we'll use in the movem process
257 ;; on the stack.
258 subq 11*4,sp
259 movem r10,[sp]
260
261 ;; Now we've got this:
262 ;; r11 - src
263 ;; r13 - dst
264 ;; r12 - n
265
266 ;; Update n for the first loop
267 subq 44,r12
268 0:
269 movem [r11+],r10
270 1:
271 subq 44,r12
272 bge 0b
273 movem r10,[r13+]
274
275 addq 44,r12 ;; compensate for last loop underflowing n
276 8:
277 ;; Restore registers from stack
278 movem [sp+],r10
279
280 .section .fixup,\"ax\"
281
282 ;; Do not jump back into the loop if we fail. For some uses, we get a
283 ;; page fault but for performance reasons we care to not get further
284 ;; faults. For example, fs/super.c at one time did
285 ;; i = size - copy_from_user((void *)page, data, size);
286 ;; which would cause repeated faults while clearing the remainder of
287 ;; the SIZE bytes at PAGE after the first fault.
288
289 3:
290 move.d [sp],r10
291
292 ;; Number of remaining bytes, cleared but not copied, is r12 + 44.
293
294 add.d r12,r10
295 addq 44,r10
296
297 move.d r10,[sp]
298 clear.d r0
299 clear.d r1
300 clear.d r2
301 clear.d r3
302 clear.d r4
303 clear.d r5
304 clear.d r6
305 clear.d r7
306 clear.d r8
307 clear.d r9
308 clear.d r10
309
310 ;; Perform clear similar to the copy-loop.
311
312 4:
313 subq 44,r12
314 bge 4b
315 movem r10,[r13+]
316
317 ;; Clear by four for the remaining multiples.
318
319 addq 40,r12
320 bmi 6f
321 nop
322 5:
323 subq 4,r12
324 bpl 5b
325 clear.d [r13+]
326 6:
327 addq 4,r12
328 beq 7f
329 nop
330
331 subq 1,r12
332 beq 7f
333 clear.b [r13+]
334
335 subq 1,r12
336 beq 7f
337 clear.b [r13+]
338
339 clear.d r12
340 clear.b [r13+]
341 7:
342 jump 8b
343
344 .previous
345 .section __ex_table,\"a\"
346 .dword 1b,3b
347 .previous"
348
349 /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
350 /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
351
352 }
353
354 /* Either we directly start copying here, using dword copying in a loop,
355 or we copy as much as possible with 'movem' and then the last block
356 (<44 bytes) is copied here. This will work since 'movem' will have
357 updated src, dst and n. */
358
359 while (n >= 16)
360 {
361 __asm_copy_from_user_16 (dst, src, retn);
362 n -= 16;
363 }
364
365 /* Having a separate by-four loops cuts down on cache footprint.
366 FIXME: Test with and without; increasing switch to be 0..15. */
367 while (n >= 4)
368 {
369 __asm_copy_from_user_4 (dst, src, retn);
370 n -= 4;
371 }
372
373 switch (n)
374 {
375 case 0:
376 break;
377 case 1:
378 __asm_copy_from_user_1 (dst, src, retn);
379 break;
380 case 2:
381 __asm_copy_from_user_2 (dst, src, retn);
382 break;
383 case 3:
384 __asm_copy_from_user_3 (dst, src, retn);
385 break;
386 }
387
388 return retn;
389 }
390
391 /* Zero userspace. */
392
393 unsigned long
394 __do_clear_user (void *pto, unsigned long pn)
395 {
396 /* We want the parameters put in special registers.
397 Make sure the compiler is able to make something useful of this.
398 As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
399
400 FIXME: Comment for old gcc version. Check.
401 If gcc was allright, it really would need no temporaries, and no
402 stack space to save stuff on. */
403
404 register char *dst __asm__ ("r13") = pto;
405 register int n __asm__ ("r12") = pn;
406 register int retn __asm__ ("r10") = 0;
407
408
409 if (((unsigned long) dst & 3) != 0
410 /* Don't align if we wouldn't copy more than a few bytes. */
411 && n >= 3)
412 {
413 if ((unsigned long) dst & 1)
414 {
415 __asm_clear_1 (dst, retn);
416 n--;
417 }
418
419 if ((unsigned long) dst & 2)
420 {
421 __asm_clear_2 (dst, retn);
422 n -= 2;
423 }
424 }
425
426 /* Decide which copying method to use.
427 FIXME: This number is from the "ordinary" kernel memset. */
428 if (n >= (1*48))
429 {
430 /* For large clears we use 'movem' */
431
432 /* It is not optimal to tell the compiler about clobbering any
433 call-saved registers; that will move the saving/restoring of
434 those registers to the function prologue/epilogue, and make
435 non-movem sizes suboptimal.
436
437 This method is not foolproof; it assumes that the "asm reg"
438 declarations at the beginning of the function really are used
439 here (beware: they may be moved to temporary registers).
440 This way, we do not have to save/move the registers around into
441 temporaries; we can safely use them straight away.
442
443 If you want to check that the allocation was right; then
444 check the equalities in the first comment. It should say
445 something like "r13=r13, r11=r11, r12=r12". */
446 __asm__ volatile ("
447 ;; Check that the following is true (same register names on
448 ;; both sides of equal sign, as in r8=r8):
449 ;; %0=r13, %1=r12 %2=r10
450 ;;
451 ;; Save the registers we'll clobber in the movem process
452 ;; on the stack. Don't mention them to gcc, it will only be
453 ;; upset.
454 subq 11*4,sp
455 movem r10,[sp]
456
457 clear.d r0
458 clear.d r1
459 clear.d r2
460 clear.d r3
461 clear.d r4
462 clear.d r5
463 clear.d r6
464 clear.d r7
465 clear.d r8
466 clear.d r9
467 clear.d r10
468 clear.d r11
469
470 ;; Now we've got this:
471 ;; r13 - dst
472 ;; r12 - n
473
474 ;; Update n for the first loop
475 subq 12*4,r12
476 0:
477 subq 12*4,r12
478 bge 0b
479 movem r11,[r13+]
480 1:
481 addq 12*4,r12 ;; compensate for last loop underflowing n
482
483 ;; Restore registers from stack
484 movem [sp+],r10
485 2:
486 .section .fixup,\"ax\"
487 3:
488 move.d [sp],r10
489 addq 12*4,r10
490 move.d r10,[sp]
491 clear.d r10
492 jump 0b
493
494 4:
495 movem [sp+],r10
496 addq 12*4,r10
497 addq 12*4,r12
498 jump 2b
499
500 .previous
501 .section __ex_table,\"a\"
502 .dword 0b,3b
503 .dword 1b,4b
504 .previous"
505
506 /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
507 /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
508 /* Clobber */ : "r11");
509 }
510
511 while (n >= 16)
512 {
513 __asm_clear_16 (dst, retn);
514 n -= 16;
515 }
516
517 /* Having a separate by-four loops cuts down on cache footprint.
518 FIXME: Test with and without; increasing switch to be 0..15. */
519 while (n >= 4)
520 {
521 __asm_clear_4 (dst, retn);
522 n -= 4;
523 }
524
525 switch (n)
526 {
527 case 0:
528 break;
529 case 1:
530 __asm_clear_1 (dst, retn);
531 break;
532 case 2:
533 __asm_clear_2 (dst, retn);
534 break;
535 case 3:
536 __asm_clear_3 (dst, retn);
537 break;
538 }
539
540 return retn;
541 }
542