File: /usr/src/linux/arch/cris/lib/string.c

1     /*#************************************************************************#*/
2     /*#-------------------------------------------------------------------------*/
3     /*#                                                                         */
4     /*# FUNCTION NAME: memcpy()                                                 */
5     /*#                                                                         */
6     /*# PARAMETERS:  void* dst;   Destination address.                          */
7     /*#              void* src;   Source address.                               */
8     /*#              int   len;   Number of bytes to copy.                      */
9     /*#                                                                         */
10     /*# RETURNS:     dst.                                                       */
11     /*#                                                                         */
12     /*# DESCRIPTION: Copies len bytes of memory from src to dst.  No guarantees */
13     /*#              about copying of overlapping memory areas. This routine is */
14     /*#              very sensitive to compiler changes in register allocation. */
15     /*#              Should really be rewritten to avoid this problem.          */
16     /*#                                                                         */
17     /*#-------------------------------------------------------------------------*/
18     /*#                                                                         */
19     /*# HISTORY                                                                 */
20     /*#                                                                         */
21     /*# DATE      NAME            CHANGES                                       */
22     /*# ----      ----            -------                                       */
23     /*# 941007    Kenny R         Creation                                      */
24     /*# 941011    Kenny R         Lots of optimizations and inlining.           */
25     /*# 941129    Ulf A           Adapted for use in libc.                      */
26     /*# 950216    HP              N==0 forgotten if non-aligned src/dst.        */
27     /*#                           Added some optimizations.                     */
28     /*# 001025    HP              Make src and dst char *.  Align dst to	    */
29     /*#			      dword, not just word-if-both-src-and-dst-	    */
30     /*#			      are-misaligned.				    */
31     /*#                                                                         */
32     /*#-------------------------------------------------------------------------*/
33     
34     #include <linux/types.h>
35     
36     void *memcpy(void *pdst,
37                  const void *psrc,
38                  size_t pn)
39     {
40       /* Ok.  Now we want the parameters put in special registers.
41          Make sure the compiler is able to make something useful of this.
42           As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
43     
44          If gcc was allright, it really would need no temporaries, and no
45          stack space to save stuff on. */
46     
47       register void *return_dst __asm__ ("r10") = pdst;
48       register char *dst __asm__ ("r13") = pdst;
49       register const char *src __asm__ ("r11") = psrc;
50       register int n __asm__ ("r12") = pn;
51       
52      
53       /* When src is aligned but not dst, this makes a few extra needless
54          cycles.  I believe it would take as many to check that the
55          re-alignment was unnecessary.  */
56       if (((unsigned long) dst & 3) != 0
57           /* Don't align if we wouldn't copy more than a few bytes; so we
58     	 don't have to check further for overflows.  */
59           && n >= 3)
60       {
61         if ((unsigned long) dst & 1)
62         {
63           n--;
64           *(char*)dst = *(char*)src;
65           src++;
66           dst++;
67         }
68     
69         if ((unsigned long) dst & 2)
70         {
71           n -= 2;
72           *(short*)dst = *(short*)src;
73           src += 2;
74           dst += 2;
75         }
76       }
77     
78       /* Decide which copying method to use. */
79       if (n >= 44*2)                /* Break even between movem and
80                                        move16 is at 38.7*2, but modulo 44. */
81       {
82         /* For large copies we use 'movem' */
83     
84       /* It is not optimal to tell the compiler about clobbering any
85          registers; that will move the saving/restoring of those registers
86          to the function prologue/epilogue, and make non-movem sizes
87          suboptimal.
88     
89           This method is not foolproof; it assumes that the "asm reg"
90          declarations at the beginning of the function really are used
91          here (beware: they may be moved to temporary registers).
92           This way, we do not have to save/move the registers around into
93          temporaries; we can safely use them straight away.
94     
95           If you want to check that the allocation was right; then
96           check the equalities in the first comment.  It should say
97           "r13=r13, r11=r11, r12=r12" */
98         __asm__ volatile ("
99             ;; Check that the following is true (same register names on
100             ;; both sides of equal sign, as in r8=r8):
101             ;; %0=r13, %1=r11, %2=r12
102             ;;
103     	;; Save the registers we'll use in the movem process
104     	;; on the stack.
105     	subq 	11*4,sp
106     	movem	r10,[sp]
107     
108             ;; Now we've got this:
109     	;; r11 - src
110     	;; r13 - dst
111     	;; r12 - n
112     	
113             ;; Update n for the first loop
114             subq    44,r12
115     0:
116     	movem	[r11+],r10
117             subq   44,r12
118             bge     0b
119     	movem	r10,[r13+]
120     
121             addq   44,r12  ;; compensate for last loop underflowing n
122     
123     	;; Restore registers from stack
124             movem [sp+],r10" 
125     
126          /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) 
127          /* Inputs */ : "0" (dst), "1" (src), "2" (n));
128         
129       }
130     
131       /* Either we directly starts copying, using dword copying
132          in a loop, or we copy as much as possible with 'movem' 
133          and then the last block (<44 bytes) is copied here.
134          This will work since 'movem' will have updated src,dst,n. */
135     
136       while ( n >= 16 )
137       {
138         *((long*)dst)++ = *((long*)src)++;
139         *((long*)dst)++ = *((long*)src)++;
140         *((long*)dst)++ = *((long*)src)++;
141         *((long*)dst)++ = *((long*)src)++;
142         n -= 16;
143       }
144     
145       /* A switch() is definitely the fastest although it takes a LOT of code.
146        * Particularly if you inline code this.
147        */
148       switch (n)
149       {
150         case 0:
151           break;
152         case 1:
153           *(char*)dst = *(char*)src;
154           break;
155         case 2:
156           *(short*)dst = *(short*)src;
157           break;
158         case 3:
159           *((short*)dst)++ = *((short*)src)++;
160           *(char*)dst = *(char*)src;
161           break;
162         case 4:
163           *((long*)dst)++ = *((long*)src)++;
164           break;
165         case 5:
166           *((long*)dst)++ = *((long*)src)++;
167           *(char*)dst = *(char*)src;
168           break;
169         case 6:
170           *((long*)dst)++ = *((long*)src)++;
171           *(short*)dst = *(short*)src;
172           break;
173         case 7:
174           *((long*)dst)++ = *((long*)src)++;
175           *((short*)dst)++ = *((short*)src)++;
176           *(char*)dst = *(char*)src;
177           break;
178         case 8:
179           *((long*)dst)++ = *((long*)src)++;
180           *((long*)dst)++ = *((long*)src)++;
181           break;
182         case 9:
183           *((long*)dst)++ = *((long*)src)++;
184           *((long*)dst)++ = *((long*)src)++;
185           *(char*)dst = *(char*)src;
186           break;
187         case 10:
188           *((long*)dst)++ = *((long*)src)++;
189           *((long*)dst)++ = *((long*)src)++;
190           *(short*)dst = *(short*)src;
191           break;
192         case 11:
193           *((long*)dst)++ = *((long*)src)++;
194           *((long*)dst)++ = *((long*)src)++;
195           *((short*)dst)++ = *((short*)src)++;
196           *(char*)dst = *(char*)src;
197           break;
198         case 12:
199           *((long*)dst)++ = *((long*)src)++;
200           *((long*)dst)++ = *((long*)src)++;
201           *((long*)dst)++ = *((long*)src)++;
202           break;
203         case 13:
204           *((long*)dst)++ = *((long*)src)++;
205           *((long*)dst)++ = *((long*)src)++;
206           *((long*)dst)++ = *((long*)src)++;
207           *(char*)dst = *(char*)src;
208           break;
209         case 14:
210           *((long*)dst)++ = *((long*)src)++;
211           *((long*)dst)++ = *((long*)src)++;
212           *((long*)dst)++ = *((long*)src)++;
213           *(short*)dst = *(short*)src;
214           break;
215         case 15:
216           *((long*)dst)++ = *((long*)src)++;
217           *((long*)dst)++ = *((long*)src)++;
218           *((long*)dst)++ = *((long*)src)++;
219           *((short*)dst)++ = *((short*)src)++;
220           *(char*)dst = *(char*)src;
221           break;
222       }
223     
224       return return_dst; /* destination pointer. */
225     } /* memcpy() */
226