]> pilppa.org Git - familiar-h63xx-build.git/blob - org.handhelds.familiar/packages/glibc/glibc-cvs/arm-memcpy.patch
OE tree imported from monotone branch org.openembedded.oz354fam083 at revision 8b12e3...
[familiar-h63xx-build.git] / org.handhelds.familiar / packages / glibc / glibc-cvs / arm-memcpy.patch
1 --- /dev/null   2004-02-02 20:32:13.000000000 +0000
2 +++ sysdeps/arm/memmove.S       2004-03-20 18:37:23.000000000 +0000
3 @@ -0,0 +1,251 @@
4 +/*
5 + *   Optimized memmove implementation for ARM processors
6 + *
7 + *     Author:         Nicolas Pitre
8 + *     Created:        Dec 23, 2003
9 + *     Copyright:      (C) MontaVista Software, Inc.
10 + *
11 + *   This file is free software; you can redistribute it and/or
12 + *   modify it under the terms of the GNU Lesser General Public
13 + *   License as published by the Free Software Foundation; either
14 + *   version 2.1 of the License, or (at your option) any later version.
15 + *
16 + *   This file is distributed in the hope that it will be useful,
17 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 + *   Lesser General Public License for more details.
20 + */
21 +
22 +#include <sysdep.h>
23 +
24 +
25 +/*
26 + * Endian independent macros for shifting bytes within registers.
27 + */
28 +#ifndef __ARMEB__
29 +#define pull            lsr
30 +#define push            lsl
31 +#else
32 +#define pull            lsl
33 +#define push            lsr
34 +#endif
35 +
36 +/*
37 + * Enable data preload for architectures that support it (ARMv5 and above)
38 + */
39 +#if defined(__ARM_ARCH_5__) || \
40 +    defined(__ARM_ARCH_5T__) || \
41 +    defined(__ARM_ARCH_5TE__)
42 +#define PLD(code...)   code
43 +#else
44 +#define PLD(code...)
45 +#endif
46 +
47 +
48 +/* char * memmove (char *dst, const char *src) */
49 +ENTRY(memmove)
50 +               subs    ip, r0, r1
51 +               cmphi   r2, ip
52 +               bls     memcpy(PLT)
53 +
54 +               stmfd   sp!, {r0, r4, lr}
55 +               add     r1, r1, r2
56 +               add     r0, r0, r2
57 +               subs    r2, r2, #4
58 +               blt     25f
59 +               ands    ip, r0, #3
60 +       PLD(    pld     [r1, #-4]               )
61 +               bne     26f
62 +               ands    ip, r1, #3
63 +               bne     27f
64 +
65 +19:            subs    r2, r2, #4
66 +               blt     24f
67 +               subs    r2, r2, #8
68 +               blt     23f
69 +               subs    r2, r2, #16
70 +               blt     22f
71 +
72 +       PLD(    pld     [r1, #-32]              )
73 +       PLD(    subs    r2, r2, #96             )
74 +               stmfd   sp!, {r5 - r8}
75 +       PLD(    blt     21f                     )
76 +
77 +       PLD(    @ cache alignment               )
78 +       PLD(    ands    ip, r1, #31             )
79 +       PLD(    pld     [r1, #-64]              )
80 +       PLD(    beq     20f                     )
81 +       PLD(    cmp     r2, ip                  )
82 +       PLD(    pld     [r1, #-96]              )
83 +       PLD(    blt     20f                     )
84 +       PLD(    cmp     ip, #16                 )
85 +       PLD(    sub     r2, r2, ip              )
86 +       PLD(    ldmgedb r1!, {r3 - r6}          )
87 +       PLD(    stmgedb r0!, {r3 - r6}          )
88 +       PLD(    beq     20f                     )
89 +       PLD(    and     ip, ip, #15             )
90 +       PLD(    cmp     ip, #8                  )
91 +       PLD(    ldr     r3, [r1, #-4]!          )
92 +       PLD(    ldrge   r4, [r1, #-4]!          )
93 +       PLD(    ldrgt   r5, [r1, #-4]!          )
94 +       PLD(    str     r3, [r0, #-4]!          )
95 +       PLD(    strge   r4, [r0, #-4]!          )
96 +       PLD(    strgt   r5, [r0, #-4]!          )
97 +
98 +20:    PLD(    pld     [r1, #-96]              )
99 +       PLD(    pld     [r1, #-128]             )
100 +21:            ldmdb   r1!, {r3, r4, ip, lr}
101 +               subs    r2, r2, #32
102 +               stmdb   r0!, {r3, r4, ip, lr}
103 +               ldmdb   r1!, {r3, r4, ip, lr}
104 +               stmgedb r0!, {r3, r4, ip, lr}
105 +               ldmgedb r1!, {r3, r4, ip, lr}
106 +               stmgedb r0!, {r3, r4, ip, lr}
107 +               ldmgedb r1!, {r3, r4, ip, lr}
108 +               subges  r2, r2, #32
109 +               stmdb   r0!, {r3, r4, ip, lr}
110 +               bge     20b
111 +       PLD(    cmn     r2, #96                 )
112 +       PLD(    bge     21b                     )
113 +       PLD(    add     r2, r2, #96             )
114 +               tst     r2, #31
115 +               ldmfd   sp!, {r5 - r8}
116 +               ldmeqfd sp!, {r0, r4, pc}
117 +
118 +               tst     r2, #16
119 +22:            ldmnedb r1!, {r3, r4, ip, lr}
120 +               stmnedb r0!, {r3, r4, ip, lr}
121 +
122 +               tst     r2, #8
123 +23:            ldmnedb r1!, {r3, r4}
124 +               stmnedb r0!, {r3, r4}
125 +
126 +               tst     r2, #4
127 +24:            ldrne   r3, [r1, #-4]!
128 +               strne   r3, [r0, #-4]!
129 +
130 +25:            ands    r2, r2, #3
131 +               ldmeqfd sp!, {r0, r4, pc}
132 +
133 +               cmp     r2, #2
134 +               ldrb    r3, [r1, #-1]
135 +               ldrgeb  r4, [r1, #-2]
136 +               ldrgtb  ip, [r1, #-3]
137 +               strb    r3, [r0, #-1]
138 +               strgeb  r4, [r0, #-2]
139 +               strgtb  ip, [r0, #-3]
140 +               ldmfd   sp!, {r0, r4, pc}
141 +
142 +26:            cmp     ip, #2
143 +               ldrb    r3, [r1, #-1]!
144 +               ldrgeb  r4, [r1, #-1]!
145 +               ldrgtb  lr, [r1, #-1]!
146 +               strb    r3, [r0, #-1]!
147 +               strgeb  r4, [r0, #-1]!
148 +               strgtb  lr, [r0, #-1]!
149 +               subs    r2, r2, ip
150 +               blt     25b
151 +               ands    ip, r1, #3
152 +               beq     19b
153 +
154 +27:            bic     r1, r1, #3
155 +               cmp     ip, #2
156 +               ldr     r3, [r1]
157 +               beq     35f
158 +               blt     36f
159 +
160 +
161 +               .macro  backward_copy_shift push pull
162 +
163 +               cmp     r2, #12
164 +       PLD(    pld     [r1, #-4]               )
165 +               blt     33f
166 +               subs    r2, r2, #28
167 +               stmfd   sp!, {r5 - r9}
168 +               blt     31f
169 +
170 +       PLD(    subs    r2, r2, #96             )
171 +       PLD(    pld     [r1, #-32]              )
172 +       PLD(    blt     30f                     )
173 +       PLD(    pld     [r1, #-64]              )
174 +
175 +       PLD(    @ cache alignment               )
176 +       PLD(    ands    ip, r1, #31             )
177 +       PLD(    pld     [r1, #-96]              )
178 +       PLD(    beq     29f                     )
179 +       PLD(    cmp     r2, ip                  )
180 +       PLD(    pld     [r1, #-128]             )
181 +       PLD(    blt     29f                     )
182 +       PLD(    sub     r2, r2, ip              )
183 +28:    PLD(    mov     r4, r3, push #\push     )
184 +       PLD(    ldr     r3, [r1, #-4]!          )
185 +       PLD(    subs    ip, ip, #4              )
186 +       PLD(    orr     r4, r4, r3, pull #\pull )
187 +       PLD(    str     r4, [r0, #-4]!          )
188 +       PLD(    bgt     28b                     )
189 +
190 +29:    PLD(    pld     [r1, #-128]             )
191 +30:            mov     lr, r3, push #\push
192 +               ldmdb   r1!, {r3 - r9, ip}
193 +               subs    r2, r2, #32
194 +               orr     lr, lr, ip, pull #\pull
195 +               mov     ip, ip, push #\push
196 +               orr     ip, ip, r9, pull #\pull
197 +               mov     r9, r9, push #\push
198 +               orr     r9, r9, r8, pull #\pull
199 +               mov     r8, r8, push #\push
200 +               orr     r8, r8, r7, pull #\pull
201 +               mov     r7, r7, push #\push
202 +               orr     r7, r7, r6, pull #\pull
203 +               mov     r6, r6, push #\push
204 +               orr     r6, r6, r5, pull #\pull
205 +               mov     r5, r5, push #\push
206 +               orr     r5, r5, r4, pull #\pull
207 +               mov     r4, r4, push #\push
208 +               orr     r4, r4, r3, pull #\pull
209 +               stmdb   r0!, {r4 - r9, ip, lr}
210 +               bge     29b
211 +       PLD(    cmn     r2, #96                 )
212 +       PLD(    bge     30b                     )
213 +       PLD(    add     r2, r2, #96             )
214 +               cmn     r2, #16
215 +               blt     32f
216 +31:            mov     r7, r3, push #\push
217 +               ldmdb   r1!, {r3 - r6}
218 +               sub     r2, r2, #16
219 +               orr     r7, r7, r6, pull #\pull
220 +               mov     r6, r6, push #\push
221 +               orr     r6, r6, r5, pull #\pull
222 +               mov     r5, r5, push #\push
223 +               orr     r5, r5, r4, pull #\pull
224 +               mov     r4, r4, push #\push
225 +               orr     r4, r4, r3, pull #\pull
226 +               stmdb   r0!, {r4 - r7}
227 +32:            adds    r2, r2, #28
228 +               ldmfd   sp!, {r5 - r9}
229 +               blt     34f
230 +33:            mov     r4, r3, push #\push
231 +               ldr     r3, [r1, #-4]!
232 +               subs    r2, r2, #4
233 +               orr     r4, r4, r3, pull #\pull
234 +               str     r4, [r0, #-4]!
235 +               bge     33b
236 +34:
237 +               .endm
238 +
239 +
240 +               backward_copy_shift     push=8  pull=24
241 +               add     r1, r1, #3
242 +               b       25b
243 +
244 +35:            backward_copy_shift     push=16 pull=16
245 +               add     r1, r1, #2
246 +               b       25b
247 +
248 +36:            backward_copy_shift     push=24 pull=8
249 +               add     r1, r1, #1
250 +               b       25b
251 +
252 +               .size   memmove, . - memmove
253 +END(memmove)
254 +libc_hidden_builtin_def (memmove)
255 --- /dev/null   2004-02-02 20:32:13.000000000 +0000
256 +++ sysdeps/arm/bcopy.S 2004-03-20 18:37:48.000000000 +0000
257 @@ -0,0 +1,255 @@
258 +/*
259 + *   Optimized memmove implementation for ARM processors
260 + *
261 + *     Author:         Nicolas Pitre
262 + *     Created:        Dec 23, 2003
263 + *     Copyright:      (C) MontaVista Software, Inc.
264 + *
265 + *   This file is free software; you can redistribute it and/or
266 + *   modify it under the terms of the GNU Lesser General Public
267 + *   License as published by the Free Software Foundation; either
268 + *   version 2.1 of the License, or (at your option) any later version.
269 + *
270 + *   This file is distributed in the hope that it will be useful,
271 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
272 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
273 + *   Lesser General Public License for more details.
274 + */
275 +
276 +#include <sysdep.h>
277 +
278 +
279 +/*
280 + * Endian independent macros for shifting bytes within registers.
281 + */
282 +#ifndef __ARMEB__
283 +#define pull            lsr
284 +#define push            lsl
285 +#else
286 +#define pull            lsl
287 +#define push            lsr
288 +#endif
289 +
290 +/*
291 + * Enable data preload for architectures that support it (ARMv5 and above)
292 + */
293 +#if defined(__ARM_ARCH_5__) || \
294 +    defined(__ARM_ARCH_5T__) || \
295 +    defined(__ARM_ARCH_5TE__)
296 +#define PLD(code...)   code
297 +#else
298 +#define PLD(code...)
299 +#endif
300 +
301 +dst            .req    r1
302 +src            .req    r0
303 +
304 +/* void *bcopy (const char *src, char *dst, size_t size) */
305 +ENTRY(bcopy)
306 +               subs    ip, dst, src
307 +               cmphi   r2, ip
308 +               movls   r3, r0
309 +               movls   r0, r1
310 +               movls   r1, r3
311 +               bls     memcpy(PLT)
312 +
313 +               stmfd   sp!, {r4, lr}
314 +               add     src, src, r2
315 +               add     dst, dst, r2
316 +               subs    r2, r2, #4
317 +               blt     25f
318 +               ands    ip, dst, #3
319 +       PLD(    pld     [src, #-4]              )
320 +               bne     26f
321 +               ands    ip, src, #3
322 +               bne     27f
323 +
324 +19:            subs    r2, r2, #4
325 +               blt     24f
326 +               subs    r2, r2, #8
327 +               blt     23f
328 +               subs    r2, r2, #16
329 +               blt     22f
330 +
331 +       PLD(    pld     [src, #-32]             )
332 +       PLD(    subs    r2, r2, #96             )
333 +               stmfd   sp!, {r5 - r8}
334 +       PLD(    blt     21f                     )
335 +
336 +       PLD(    @ cache alignment               )
337 +       PLD(    ands    ip, src, #31            )
338 +       PLD(    pld     [src, #-64]             )
339 +       PLD(    beq     20f                     )
340 +       PLD(    cmp     r2, ip                  )
341 +       PLD(    pld     [src, #-96]             )
342 +       PLD(    blt     20f                     )
343 +       PLD(    cmp     ip, #16                 )
344 +       PLD(    sub     r2, r2, ip              )
345 +       PLD(    ldmgedb src!, {r3 - r6}         )
346 +       PLD(    stmgedb dst!, {r3 - r6}         )
347 +       PLD(    beq     20f                     )
348 +       PLD(    and     ip, ip, #15             )
349 +       PLD(    cmp     ip, #8                  )
350 +       PLD(    ldr     r3, [src, #-4]!         )
351 +       PLD(    ldrge   r4, [src, #-4]!         )
352 +       PLD(    ldrgt   r5, [src, #-4]!         )
353 +       PLD(    str     r3, [dst, #-4]!         )
354 +       PLD(    strge   r4, [dst, #-4]!         )
355 +       PLD(    strgt   r5, [dst, #-4]!         )
356 +
357 +20:    PLD(    pld     [src, #-96]             )
358 +       PLD(    pld     [src, #-128]            )
359 +21:            ldmdb   src!, {r3, r4, ip, lr}
360 +               subs    r2, r2, #32
361 +               stmdb   dst!, {r3, r4, ip, lr}
362 +               ldmdb   src!, {r3, r4, ip, lr}
363 +               stmgedb dst!, {r3, r4, ip, lr}
364 +               ldmgedb src!, {r3, r4, ip, lr}
365 +               stmgedb dst!, {r3, r4, ip, lr}
366 +               ldmgedb src!, {r3, r4, ip, lr}
367 +               subges  r2, r2, #32
368 +               stmdb   dst!, {r3, r4, ip, lr}
369 +               bge     20b
370 +       PLD(    cmn     r2, #96                 )
371 +       PLD(    bge     21b                     )
372 +       PLD(    add     r2, r2, #96             )
373 +               tst     r2, #31
374 +               ldmfd   sp!, {r5 - r8}
375 +               ldmeqfd sp!, {r4, pc}
376 +
377 +               tst     r2, #16
378 +22:            ldmnedb src!, {r3, r4, ip, lr}
379 +               stmnedb dst!, {r3, r4, ip, lr}
380 +
381 +               tst     r2, #8
382 +23:            ldmnedb src!, {r3, r4}
383 +               stmnedb dst!, {r3, r4}
384 +
385 +               tst     r2, #4
386 +24:            ldrne   r3, [src, #-4]!
387 +               strne   r3, [dst, #-4]!
388 +
389 +25:            ands    r2, r2, #3
390 +               ldmeqfd sp!, {dst, r4, pc}
391 +
392 +               cmp     r2, #2
393 +               ldrb    r3, [src, #-1]
394 +               ldrgeb  r4, [src, #-2]
395 +               ldrgtb  ip, [src, #-3]
396 +               strb    r3, [dst, #-1]
397 +               strgeb  r4, [dst, #-2]
398 +               strgtb  ip, [dst, #-3]
399 +               ldmfd   sp!, {dst, r4, pc}
400 +
401 +26:            cmp     ip, #2
402 +               ldrb    r3, [src, #-1]!
403 +               ldrgeb  r4, [src, #-1]!
404 +               ldrgtb  lr, [src, #-1]!
405 +               strb    r3, [dst, #-1]!
406 +               strgeb  r4, [dst, #-1]!
407 +               strgtb  lr, [dst, #-1]!
408 +               subs    r2, r2, ip
409 +               blt     25b
410 +               ands    ip, src, #3
411 +               beq     19b
412 +
413 +27:            bic     src, src, #3
414 +               cmp     ip, #2
415 +               ldr     r3, [src]
416 +               beq     35f
417 +               blt     36f
418 +
419 +
420 +               .macro  backward_copy_shift push pull
421 +
422 +               cmp     r2, #12
423 +       PLD(    pld     [src, #-4]              )
424 +               blt     33f
425 +               subs    r2, r2, #28
426 +               stmfd   sp!, {r5 - r9}
427 +               blt     31f
428 +
429 +       PLD(    subs    r2, r2, #96             )
430 +       PLD(    pld     [src, #-32]             )
431 +       PLD(    blt     30f                     )
432 +       PLD(    pld     [src, #-64]             )
433 +
434 +       PLD(    @ cache alignment               )
435 +       PLD(    ands    ip, src, #31            )
436 +       PLD(    pld     [src, #-96]             )
437 +       PLD(    beq     29f                     )
438 +       PLD(    cmp     r2, ip                  )
439 +       PLD(    pld     [src, #-128]            )
440 +       PLD(    blt     29f                     )
441 +       PLD(    sub     r2, r2, ip              )
442 +28:    PLD(    mov     r4, r3, push #\push     )
443 +       PLD(    ldr     r3, [src, #-4]!         )
444 +       PLD(    subs    ip, ip, #4              )
445 +       PLD(    orr     r4, r4, r3, pull #\pull )
446 +       PLD(    str     r4, [dst, #-4]!         )
447 +       PLD(    bgt     28b                     )
448 +
449 +29:    PLD(    pld     [src, #-128]            )
450 +30:            mov     lr, r3, push #\push
451 +               ldmdb   src!, {r3 - r9, ip}
452 +               subs    r2, r2, #32
453 +               orr     lr, lr, ip, pull #\pull
454 +               mov     ip, ip, push #\push
455 +               orr     ip, ip, r9, pull #\pull
456 +               mov     r9, r9, push #\push
457 +               orr     r9, r9, r8, pull #\pull
458 +               mov     r8, r8, push #\push
459 +               orr     r8, r8, r7, pull #\pull
460 +               mov     r7, r7, push #\push
461 +               orr     r7, r7, r6, pull #\pull
462 +               mov     r6, r6, push #\push
463 +               orr     r6, r6, r5, pull #\pull
464 +               mov     r5, r5, push #\push
465 +               orr     r5, r5, r4, pull #\pull
466 +               mov     r4, r4, push #\push
467 +               orr     r4, r4, r3, pull #\pull
468 +               stmdb   dst!, {r4 - r9, ip, lr}
469 +               bge     29b
470 +       PLD(    cmn     r2, #96                 )
471 +       PLD(    bge     30b                     )
472 +       PLD(    add     r2, r2, #96             )
473 +               cmn     r2, #16
474 +               blt     32f
475 +31:            mov     r7, r3, push #\push
476 +               ldmdb   src!, {r3 - r6}
477 +               sub     r2, r2, #16
478 +               orr     r7, r7, r6, pull #\pull
479 +               mov     r6, r6, push #\push
480 +               orr     r6, r6, r5, pull #\pull
481 +               mov     r5, r5, push #\push
482 +               orr     r5, r5, r4, pull #\pull
483 +               mov     r4, r4, push #\push
484 +               orr     r4, r4, r3, pull #\pull
485 +               stmdb   dst!, {r4 - r7}
486 +32:            adds    r2, r2, #28
487 +               ldmfd   sp!, {r5 - r9}
488 +               blt     34f
489 +33:            mov     r4, r3, push #\push
490 +               ldr     r3, [src, #-4]!
491 +               subs    r2, r2, #4
492 +               orr     r4, r4, r3, pull #\pull
493 +               str     r4, [dst, #-4]!
494 +               bge     33b
495 +34:
496 +               .endm
497 +
498 +
499 +               backward_copy_shift     push=8  pull=24
500 +               add     src, src, #3
501 +               b       25b
502 +
503 +35:            backward_copy_shift     push=16 pull=16
504 +               add     src, src, #2
505 +               b       25b
506 +
507 +36:            backward_copy_shift     push=24 pull=8
508 +               add     src, src, #1
509 +               b       25b
510 +
511 +               .size   bcopy, . - bcopy
512 +END(bcopy)
513
514 --- /dev/null   2004-02-02 20:32:13.000000000 +0000
515 +++ sysdeps/arm/memcpy.S        2004-05-02 14:33:22.000000000 +0100
516 @@ -0,0 +1,242 @@
517 +/*
518 + *   Optimized memcpy implementation for ARM processors
519 + *
520 + *     Author:         Nicolas Pitre
521 + *     Created:        Dec 23, 2003
522 + *     Copyright:      (C) MontaVista Software, Inc.
523 + *
524 + *   This file is free software; you can redistribute it and/or
525 + *   modify it under the terms of the GNU Lesser General Public
526 + *   License as published by the Free Software Foundation; either
527 + *   version 2.1 of the License, or (at your option) any later version.
528 + *
529 + *   This file is distributed in the hope that it will be useful,
530 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
531 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
532 + *   Lesser General Public License for more details.
533 + */
534 +
535 +#include <sysdep.h>
536 +
537 +
538 +/*
539 + * Endian independent macros for shifting bytes within registers.
540 + */
541 +#ifndef __ARMEB__
542 +#define pull            lsr
543 +#define push            lsl
544 +#else
545 +#define pull            lsl
546 +#define push            lsr
547 +#endif
548 +
549 +/*
550 + * Enable data preload for architectures that support it (ARMv5 and above)
551 + */
552 +#if defined(__ARM_ARCH_5__) || \
553 +    defined(__ARM_ARCH_5T__) || \
554 +    defined(__ARM_ARCH_5TE__)
555 +#define PLD(code...)   code
556 +#else
557 +#define PLD(code...)
558 +#endif
559 +
560 +
561 +/* char * memcpy (char *dst, const char *src) */
562 +
563 +ENTRY(memcpy)
564 +               subs    r2, r2, #4
565 +               stmfd   sp!, {r0, r4, lr}
566 +               blt     7f
567 +               ands    ip, r0, #3
568 +       PLD(    pld     [r1, #0]                )
569 +               bne     8f
570 +               ands    ip, r1, #3
571 +               bne     9f
572 +
573 +1:             subs    r2, r2, #4
574 +               blt     6f
575 +               subs    r2, r2, #8
576 +               blt     5f
577 +               subs    r2, r2, #16
578 +               blt     4f
579 +
580 +       PLD(    subs    r2, r2, #65             )
581 +               stmfd   sp!, {r5 - r8}
582 +       PLD(    blt     3f                      )
583 +       PLD(    pld     [r1, #32]               )
584 +
585 +       PLD(    @ cache alignment               )
586 +       PLD(    ands    ip, r1, #31             )
587 +       PLD(    pld     [r1, #64]               )
588 +       PLD(    beq     2f                      )
589 +       PLD(    rsb     ip, ip, #32             )
590 +       PLD(    cmp     r2, ip                  )
591 +       PLD(    pld     [r1, #96]               )
592 +       PLD(    blt     2f                      )
593 +       PLD(    cmp     ip, #16                 )
594 +       PLD(    sub     r2, r2, ip              )
595 +       PLD(    ldmgeia r1!, {r3 - r6}          )
596 +       PLD(    stmgeia r0!, {r3 - r6}          )
597 +       PLD(    beq     2f                      )
598 +       PLD(    and     ip, ip, #15             )
599 +       PLD(    cmp     ip, #8                  )
600 +       PLD(    ldr     r3, [r1], #4            )
601 +       PLD(    ldrge   r4, [r1], #4            )
602 +       PLD(    ldrgt   r5, [r1], #4            )
603 +       PLD(    str     r3, [r0], #4            )
604 +       PLD(    strge   r4, [r0], #4            )
605 +       PLD(    strgt   r5, [r0], #4            )
606 +
607 +2:     PLD(    pld     [r1, #96]               )
608 +3:             ldmia   r1!, {r3 - r8, ip, lr}
609 +               subs    r2, r2, #32
610 +               stmia   r0!, {r3 - r8, ip, lr}
611 +               bge     2b
612 +       PLD(    cmn     r2, #65                 )
613 +       PLD(    bge     3b                      )
614 +       PLD(    add     r2, r2, #65             )
615 +               tst     r2, #31
616 +               ldmfd   sp!, {r5 - r8}
617 +               ldmeqfd sp!, {r0, r4, pc}
618 +
619 +               tst     r2, #16
620 +4:             ldmneia r1!, {r3, r4, ip, lr}
621 +               stmneia r0!, {r3, r4, ip, lr}
622 +
623 +               tst     r2, #8
624 +5:             ldmneia r1!, {r3, r4}
625 +               stmneia r0!, {r3, r4}
626 +
627 +               tst     r2, #4
628 +6:             ldrne   r3, [r1], #4
629 +               strne   r3, [r0], #4
630 +
631 +7:             ands    r2, r2, #3
632 +               ldmeqfd sp!, {r0, r4, pc}
633 +
634 +               cmp     r2, #2
635 +               ldrb    r3, [r1], #1
636 +               ldrgeb  r4, [r1], #1
637 +               ldrgtb  ip, [r1]
638 +               strb    r3, [r0], #1
639 +               strgeb  r4, [r0], #1
640 +               strgtb  ip, [r0]
641 +               ldmfd   sp!, {r0, r4, pc}
642 +
643 +8:             rsb     ip, ip, #4
644 +               cmp     ip, #2
645 +               ldrb    r3, [r1], #1
646 +               ldrgeb  r4, [r1], #1
647 +               ldrgtb  lr, [r1], #1
648 +               strb    r3, [r0], #1
649 +               strgeb  r4, [r0], #1
650 +               strgtb  lr, [r0], #1
651 +               subs    r2, r2, ip
652 +               blt     7b
653 +               ands    ip, r1, #3
654 +               beq     1b
655 +
656 +9:             bic     r1, r1, #3
657 +               cmp     ip, #2
658 +               ldr     lr, [r1], #4
659 +               beq     17f
660 +               bgt     18f
661 +
662 +
663 +               .macro  forward_copy_shift pull push
664 +
665 +               cmp     r2, #12
666 +       PLD(    pld     [r1, #0]                )
667 +               blt     15f
668 +               subs    r2, r2, #28
669 +               stmfd   sp!, {r5 - r9}
670 +               blt     13f
671 +
672 +       PLD(    subs    r2, r2, #97             )
673 +       PLD(    blt     12f                     )
674 +       PLD(    pld     [r1, #32]               )
675 +
676 +       PLD(    @ cache alignment               )
677 +       PLD(    rsb     ip, r1, #36             )
678 +       PLD(    pld     [r1, #64]               )
679 +       PLD(    ands    ip, ip, #31             )
680 +       PLD(    pld     [r1, #96]               )
681 +       PLD(    beq     11f                     )
682 +       PLD(    cmp     r2, ip                  )
683 +       PLD(    pld     [r1, #128]              )
684 +       PLD(    blt     11f                     )
685 +       PLD(    sub     r2, r2, ip              )
686 +10:    PLD(    mov     r3, lr, pull #\pull     )
687 +       PLD(    ldr     lr, [r1], #4            )
688 +       PLD(    subs    ip, ip, #4              )
689 +       PLD(    orr     r3, r3, lr, push #\push )
690 +       PLD(    str     r3, [r0], #4            )
691 +       PLD(    bgt     10b                     )
692 +
693 +11:    PLD(    pld     [r1, #128]              )
694 +12:            mov     r3, lr, pull #\pull
695 +               ldmia   r1!, {r4 - r9, ip, lr}
696 +               subs    r2, r2, #32
697 +               orr     r3, r3, r4, push #\push
698 +               mov     r4, r4, pull #\pull
699 +               orr     r4, r4, r5, push #\push
700 +               mov     r5, r5, pull #\pull
701 +               orr     r5, r5, r6, push #\push
702 +               mov     r6, r6, pull #\pull
703 +               orr     r6, r6, r7, push #\push
704 +               mov     r7, r7, pull #\pull
705 +               orr     r7, r7, r8, push #\push
706 +               mov     r8, r8, pull #\pull
707 +               orr     r8, r8, r9, push #\push
708 +               mov     r9, r9, pull #\pull
709 +               orr     r9, r9, ip, push #\push
710 +               mov     ip, ip, pull #\pull
711 +               orr     ip, ip, lr, push #\push
712 +               stmia   r0!, {r3 - r9, ip}
713 +               bge     11b
714 +       PLD(    cmn     r2, #97                 )
715 +       PLD(    bge     12b                     )
716 +       PLD(    add     r2, r2, #97             )
717 +               cmn     r2, #16
718 +               blt     14f
719 +13:            mov     r3, lr, pull #\pull
720 +               ldmia   r1!, {r4 - r6, lr}
721 +               sub     r2, r2, #16
722 +               orr     r3, r3, r4, push #\push
723 +               mov     r4, r4, pull #\pull
724 +               orr     r4, r4, r5, push #\push
725 +               mov     r5, r5, pull #\pull
726 +               orr     r5, r5, r6, push #\push
727 +               mov     r6, r6, pull #\pull
728 +               orr     r6, r6, lr, push #\push
729 +               stmia   r0!, {r3 - r6}
730 +14:            adds    r2, r2, #28
731 +               ldmfd   sp!, {r5 - r9}
732 +               blt     16f
733 +15:            mov     r3, lr, pull #\pull
734 +               ldr     lr, [r1], #4
735 +               subs    r2, r2, #4
736 +               orr     r3, r3, lr, push #\push
737 +               str     r3, [r0], #4
738 +               bge     15b
739 +16:
740 +               .endm
741 +
742 +
743 +               forward_copy_shift      pull=8  push=24
744 +               sub     r1, r1, #3
745 +               b       7b
746 +
747 +17:            forward_copy_shift      pull=16 push=16
748 +               sub     r1, r1, #2
749 +               b       7b
750 +
751 +18:            forward_copy_shift      pull=24 push=8
752 +               sub     r1, r1, #1
753 +               b       7b
754 +
755 +               .size   memcpy, . - memcpy
756 +END(memcpy)
757 +libc_hidden_builtin_def (memcpy)
758 +