File: /usr/src/linux/arch/cris/lib/usercopy.c

1     /*
2      * User address space access functions.
3      * The non-inlined parts of asm-cris/uaccess.h are here.
4      *
5      * Copyright (C) 2000, Axis Communications AB.
6      *
7      * Written by Hans-Peter Nilsson.
8      * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
9      */
10     
11     #include <asm/uaccess.h>
12     
13     /* Asm:s have been tweaked (within the domain of correctness) to give
14        satisfactory results for "gcc version 2.96 20000427 (experimental)".
15     
16        Check regularly...
17     
18        Note that the PC saved at a bus-fault is the address *after* the
19        faulting instruction, which means the branch-target for instructions in
20        delay-slots for taken branches.  Note also that the postincrement in
21        the instruction is performed regardless of bus-fault; the register is
22        seen updated in fault handlers.
23     
24        Oh, and on the code formatting issue, to whomever feels like "fixing
25        it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"
26        string.c too.  I just don't think too many people will hack this file
27        for the code format to be an issue.  */
28     
29     
30     /* Copy to userspace.  This is based on the memcpy used for
31        kernel-to-kernel copying; see "string.c".  */
32     
33     unsigned long
34     __copy_user (void *pdst, const void *psrc, unsigned long pn)
35     {
36       /* We want the parameters put in special registers.
37          Make sure the compiler is able to make something useful of this.
38          As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
39     
40          FIXME: Comment for old gcc version.  Check.
41          If gcc was allright, it really would need no temporaries, and no
42          stack space to save stuff on. */
43     
44       register char *dst __asm__ ("r13") = pdst;
45       register const char *src __asm__ ("r11") = psrc;
46       register int n __asm__ ("r12") = pn;
47       register int retn __asm__ ("r10") = 0;
48     
49     
50       /* When src is aligned but not dst, this makes a few extra needless
51          cycles.  I believe it would take as many to check that the
52          re-alignment was unnecessary.  */
53       if (((unsigned long) dst & 3) != 0
54           /* Don't align if we wouldn't copy more than a few bytes; so we
55     	 don't have to check further for overflows.  */
56           && n >= 3)
57       {
58         if ((unsigned long) dst & 1)
59         {
60           __asm_copy_to_user_1 (dst, src, retn);
61           n--;
62         }
63     
64         if ((unsigned long) dst & 2)
65         {
66           __asm_copy_to_user_2 (dst, src, retn);
67           n -= 2;
68         }
69       }
70     
71       /* Decide which copying method to use. */
72       if (n >= 44*2)		/* Break even between movem and
73     				   move16 is at 38.7*2, but modulo 44. */
74       {
75         /* For large copies we use 'movem'.  */
76     
77         /* It is not optimal to tell the compiler about clobbering any
78            registers; that will move the saving/restoring of those registers
79            to the function prologue/epilogue, and make non-movem sizes
80            suboptimal.
81     
82            This method is not foolproof; it assumes that the "asm reg"
83            declarations at the beginning of the function really are used
84            here (beware: they may be moved to temporary registers).
85            This way, we do not have to save/move the registers around into
86            temporaries; we can safely use them straight away.
87     
88            If you want to check that the allocation was right; then
89            check the equalities in the first comment.  It should say
90            "r13=r13, r11=r11, r12=r12".  */
91         __asm__ volatile ("
92     	;; Check that the following is true (same register names on
93     	;; both sides of equal sign, as in r8=r8):
94     	;; %0=r13, %1=r11, %2=r12 %3=r10
95     	;;
96     	;; Save the registers we'll use in the movem process
97     	;; on the stack.
98     	subq	11*4,sp
99     	movem	r10,[sp]
100     
101     	;; Now we've got this:
102     	;; r11 - src
103     	;; r13 - dst
104     	;; r12 - n
105     
106     	;; Update n for the first loop
107     	subq	44,r12
108     
109     ; Since the noted PC of a faulting instruction in a delay-slot of a taken
110     ; branch, is that of the branch target, we actually point at the from-movem
111     ; for this case.  There is no ambiguity here; if there was a fault in that
112     ; instruction (meaning a kernel oops), the faulted PC would be the address
113     ; after *that* movem.
114     
115     0:
116     	movem	[r11+],r10
117     	subq   44,r12
118     	bge	0b
119     	movem	r10,[r13+]
120     1:
121     	addq   44,r12  ;; compensate for last loop underflowing n
122     
123     	;; Restore registers from stack
124     	movem [sp+],r10
125     2:
126     	.section .fixup,\"ax\"
127     
128     ; To provide a correct count in r10 of bytes that failed to be copied,
129     ; we jump back into the loop if the loop-branch was taken.  There is no
130     ; performance penalty for sany use; the program will segfault soon enough.
131     
132     3:
133     	move.d [sp],r10
134     	addq 44,r10
135     	move.d r10,[sp]
136     	jump 0b
137     4:
138     	movem [sp+],r10
139     	addq 44,r10
140     	addq 44,r12
141     	jump 2b
142     
143     	.previous
144     	.section __ex_table,\"a\"
145     	.dword 0b,3b
146     	.dword 1b,4b
147     	.previous"
148     
149          /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
150          /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
151     
152       }
153     
154       /* Either we directly start copying, using dword copying in a loop, or
155          we copy as much as possible with 'movem' and then the last block (<44
156          bytes) is copied here.  This will work since 'movem' will have
157          updated SRC, DST and N.  */
158     
159       while (n >= 16)
160       {
161         __asm_copy_to_user_16 (dst, src, retn);
162         n -= 16;
163       }
164     
165       /* Having a separate by-four loops cuts down on cache footprint.
166          FIXME:  Test with and without; increasing switch to be 0..15.  */
167       while (n >= 4)
168       {
169         __asm_copy_to_user_4 (dst, src, retn);
170         n -= 4;
171       }
172     
173       switch (n)
174       {
175         case 0:
176           break;
177         case 1:
178           __asm_copy_to_user_1 (dst, src, retn);
179           break;
180         case 2:
181           __asm_copy_to_user_2 (dst, src, retn);
182           break;
183         case 3:
184           __asm_copy_to_user_3 (dst, src, retn);
185           break;
186       }
187     
188       return retn;
189     }
190     
191     /* Copy from user to kernel, zeroing the bytes that were inaccessible in
192        userland.  */
193     
194     unsigned long
195     __copy_user_zeroing (void *pdst, const void *psrc, unsigned long pn)
196     {
197       /* We want the parameters put in special registers.
198          Make sure the compiler is able to make something useful of this.
199          As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
200     
201          FIXME: Comment for old gcc version.  Check.
202          If gcc was allright, it really would need no temporaries, and no
203          stack space to save stuff on.  */
204     
205       register char *dst __asm__ ("r13") = pdst;
206       register const char *src __asm__ ("r11") = psrc;
207       register int n __asm__ ("r12") = pn;
208       register int retn __asm__ ("r10") = 0;
209     
210       /* When src is aligned but not dst, this makes a few extra needless
211          cycles.  I believe it would take as many to check that the
212          re-alignment was unnecessary.  */
213       if (((unsigned long) dst & 3) != 0
214           /* Don't align if we wouldn't copy more than a few bytes; so we
215     	 don't have to check further for overflows.  */
216           && n >= 3)
217       {
218         if ((unsigned long) dst & 1)
219         {
220           __asm_copy_from_user_1 (dst, src, retn);
221           n--;
222         }
223     
224         if ((unsigned long) dst & 2)
225         {
226           __asm_copy_from_user_2 (dst, src, retn);
227           n -= 2;
228         }
229       }
230     
231       /* Decide which copying method to use. */
232       if (n >= 44*2)		/* Break even between movem and
233     				   move16 is at 38.7*2, but modulo 44. */
234       {
235         /* For large copies we use 'movem' */
236     
237         /* It is not optimal to tell the compiler about clobbering any
238            registers; that will move the saving/restoring of those registers
239            to the function prologue/epilogue, and make non-movem sizes
240            suboptimal.
241     
242            This method is not foolproof; it assumes that the "asm reg"
243            declarations at the beginning of the function really are used
244            here (beware: they may be moved to temporary registers).
245            This way, we do not have to save/move the registers around into
246            temporaries; we can safely use them straight away.
247     
248            If you want to check that the allocation was right; then
249            check the equalities in the first comment.  It should say
250            "r13=r13, r11=r11, r12=r12" */
251         __asm__ volatile ("
252     	;; Check that the following is true (same register names on
253     	;; both sides of equal sign, as in r8=r8):
254     	;; %0=r13, %1=r11, %2=r12 %3=r10
255     	;;
256     	;; Save the registers we'll use in the movem process
257     	;; on the stack.
258     	subq	11*4,sp
259     	movem	r10,[sp]
260     
261     	;; Now we've got this:
262     	;; r11 - src
263     	;; r13 - dst
264     	;; r12 - n
265     
266     	;; Update n for the first loop
267     	subq	44,r12
268     0:
269     	movem	[r11+],r10
270     1:
271     	subq   44,r12
272     	bge	0b
273     	movem	r10,[r13+]
274     
275     	addq   44,r12  ;; compensate for last loop underflowing n
276     8:
277     	;; Restore registers from stack
278     	movem [sp+],r10
279     
280     	.section .fixup,\"ax\"
281     
282     ;; Do not jump back into the loop if we fail.  For some uses, we get a
283     ;; page fault but for performance reasons we care to not get further
284     ;; faults.  For example, fs/super.c at one time did
285     ;;  i = size - copy_from_user((void *)page, data, size);
286     ;; which would cause repeated faults while clearing the remainder of
287     ;; the SIZE bytes at PAGE after the first fault.
288     
289     3:
290     	move.d [sp],r10
291     
292     ;; Number of remaining bytes, cleared but not copied, is r12 + 44.
293     
294     	add.d r12,r10
295     	addq 44,r10
296     
297     	move.d r10,[sp]
298     	clear.d r0
299     	clear.d r1
300     	clear.d r2
301     	clear.d r3
302     	clear.d r4
303     	clear.d r5
304     	clear.d r6
305     	clear.d r7
306     	clear.d r8
307     	clear.d r9
308     	clear.d r10
309     
310     ;; Perform clear similar to the copy-loop.
311     
312     4:
313     	subq 44,r12
314     	bge 4b
315     	movem r10,[r13+]
316     
317     ;; Clear by four for the remaining multiples.
318     
319     	addq 40,r12
320     	bmi 6f
321     	nop
322     5:
323     	subq 4,r12
324     	bpl 5b
325     	clear.d [r13+]
326     6:
327     	addq 4,r12
328     	beq 7f
329     	nop
330     
331     	subq 1,r12
332     	beq 7f
333     	clear.b [r13+]
334     
335     	subq 1,r12
336     	beq 7f
337     	clear.b [r13+]
338     
339     	clear.d r12
340     	clear.b [r13+]
341     7:
342     	jump 8b
343     
344     	.previous
345     	.section __ex_table,\"a\"
346     	.dword 1b,3b
347     	.previous"
348     
349          /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
350          /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
351     
352       }
353     
354       /* Either we directly start copying here, using dword copying in a loop,
355          or we copy as much as possible with 'movem' and then the last block
356          (<44 bytes) is copied here.  This will work since 'movem' will have
357          updated src, dst and n. */
358     
359       while (n >= 16)
360       {
361         __asm_copy_from_user_16 (dst, src, retn);
362         n -= 16;
363       }
364     
365       /* Having a separate by-four loops cuts down on cache footprint.
366          FIXME:  Test with and without; increasing switch to be 0..15.  */
367       while (n >= 4)
368       {
369         __asm_copy_from_user_4 (dst, src, retn);
370         n -= 4;
371       }
372     
373       switch (n)
374       {
375         case 0:
376           break;
377         case 1:
378           __asm_copy_from_user_1 (dst, src, retn);
379           break;
380         case 2:
381           __asm_copy_from_user_2 (dst, src, retn);
382           break;
383         case 3:
384           __asm_copy_from_user_3 (dst, src, retn);
385           break;
386       }
387     
388       return retn;
389     }
390     
391     /* Zero userspace.  */
392     
393     unsigned long
394     __do_clear_user (void *pto, unsigned long pn)
395     {
396       /* We want the parameters put in special registers.
397          Make sure the compiler is able to make something useful of this.
398           As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
399     
400          FIXME: Comment for old gcc version.  Check.
401          If gcc was allright, it really would need no temporaries, and no
402          stack space to save stuff on. */
403     
404       register char *dst __asm__ ("r13") = pto;
405       register int n __asm__ ("r12") = pn;
406       register int retn __asm__ ("r10") = 0;
407     
408     
409       if (((unsigned long) dst & 3) != 0
410          /* Don't align if we wouldn't copy more than a few bytes.  */
411           && n >= 3)
412       {
413         if ((unsigned long) dst & 1)
414         {
415           __asm_clear_1 (dst, retn);
416           n--;
417         }
418     
419         if ((unsigned long) dst & 2)
420         {
421           __asm_clear_2 (dst, retn);
422           n -= 2;
423         }
424       }
425     
426       /* Decide which copying method to use.
427          FIXME: This number is from the "ordinary" kernel memset.  */
428       if (n >= (1*48))
429       {
430         /* For large clears we use 'movem' */
431     
432         /* It is not optimal to tell the compiler about clobbering any
433            call-saved registers; that will move the saving/restoring of
434            those registers to the function prologue/epilogue, and make
435            non-movem sizes suboptimal.
436     
437            This method is not foolproof; it assumes that the "asm reg"
438            declarations at the beginning of the function really are used
439            here (beware: they may be moved to temporary registers).
440            This way, we do not have to save/move the registers around into
441            temporaries; we can safely use them straight away.
442     
443           If you want to check that the allocation was right; then
444           check the equalities in the first comment.  It should say
445           something like "r13=r13, r11=r11, r12=r12". */
446         __asm__ volatile ("
447     	;; Check that the following is true (same register names on
448     	;; both sides of equal sign, as in r8=r8):
449     	;; %0=r13, %1=r12 %2=r10
450     	;;
451     	;; Save the registers we'll clobber in the movem process
452     	;; on the stack.  Don't mention them to gcc, it will only be
453     	;; upset.
454     	subq	11*4,sp
455     	movem	r10,[sp]
456     
457     	clear.d r0
458     	clear.d r1
459     	clear.d r2
460     	clear.d r3
461     	clear.d r4
462     	clear.d r5
463     	clear.d r6
464     	clear.d r7
465     	clear.d r8
466     	clear.d r9
467     	clear.d r10
468     	clear.d r11
469     
470     	;; Now we've got this:
471     	;; r13 - dst
472     	;; r12 - n
473     
474     	;; Update n for the first loop
475     	subq	12*4,r12
476     0:
477     	subq   12*4,r12
478     	bge	0b
479     	movem	r11,[r13+]
480     1:
481     	addq   12*4,r12	 ;; compensate for last loop underflowing n
482     
483     	;; Restore registers from stack
484     	movem [sp+],r10
485     2:
486     	.section .fixup,\"ax\"
487     3:
488     	move.d [sp],r10
489     	addq 12*4,r10
490     	move.d r10,[sp]
491     	clear.d r10
492     	jump 0b
493     
494     4:
495     	movem [sp+],r10
496     	addq 12*4,r10
497     	addq 12*4,r12
498     	jump 2b
499     
500     	.previous
501     	.section __ex_table,\"a\"
502     	.dword 0b,3b
503     	.dword 1b,4b
504     	.previous"
505     
506          /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
507          /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
508          /* Clobber */ : "r11");
509       }
510     
511       while (n >= 16)
512       {
513         __asm_clear_16 (dst, retn);
514         n -= 16;
515       }
516     
517       /* Having a separate by-four loops cuts down on cache footprint.
518          FIXME:  Test with and without; increasing switch to be 0..15.  */
519       while (n >= 4)
520       {
521         __asm_clear_4 (dst, retn);
522         n -= 4;
523       }
524     
525       switch (n)
526       {
527         case 0:
528           break;
529         case 1:
530           __asm_clear_1 (dst, retn);
531           break;
532         case 2:
533           __asm_clear_2 (dst, retn);
534           break;
535         case 3:
536           __asm_clear_3 (dst, retn);
537           break;
538       }
539     
540       return retn;
541     }
542