]> pilppa.org Git - familiar-h63xx-build.git/blob - org.handhelds.familiar/packages/glibc/glibc-2.3.2/arm-memcpy.patch
OE tree imported from monotone branch org.openembedded.oz354fam083 at revision 8b12e3...
[familiar-h63xx-build.git] / org.handhelds.familiar / packages / glibc / glibc-2.3.2 / arm-memcpy.patch
1 --- /dev/null   2004-02-02 20:32:13.000000000 +0000
2 +++ sysdeps/arm/memcpy.S        2004-03-20 13:25:27.000000000 +0000
3 @@ -0,0 +1,241 @@
4 +/*
5 + *   Optimized memcpy implementation for ARM processors
6 + *
7 + *     Author:         Nicolas Pitre
8 + *     Created:        Dec 23, 2003
9 + *     Copyright:      (C) MontaVista Software, Inc.
10 + *
11 + *   This file is free software; you can redistribute it and/or
12 + *   modify it under the terms of the GNU Lesser General Public
13 + *   License as published by the Free Software Foundation; either
14 + *   version 2.1 of the License, or (at your option) any later version.
15 + *
16 + *   This file is distributed in the hope that it will be useful,
17 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 + *   Lesser General Public License for more details.
20 + */
21 +
22 +#include <sysdep.h>
23 +
24 +
25 +/*
26 + * Endian independent macros for shifting bytes within registers.
27 + */
28 +#ifndef __ARMEB__
29 +#define pull            lsr
30 +#define push            lsl
31 +#else
32 +#define pull            lsl
33 +#define push            lsr
34 +#endif
35 +
36 +/*
37 + * Enable data preload for architectures that support it (ARMv5 and above)
38 + */
39 +#if defined(__ARM_ARCH_5__) || \
40 +    defined(__ARM_ARCH_5T__) || \
41 +    defined(__ARM_ARCH_5TE__)
42 +#define PLD(code...)   code
43 +#else
44 +#define PLD(code...)
45 +#endif
46 +
47 +
48 +/* char * memcpy (char *dst, const char *src) */
49 +
50 +ENTRY(memcpy)
51 +               subs    r2, r2, #4
52 +               stmfd   sp!, {r0, r4, lr}
53 +               blt     7f
54 +               ands    ip, r0, #3
55 +       PLD(    pld     [r1, #0]                )
56 +               bne     8f
57 +               ands    ip, r1, #3
58 +               bne     9f
59 +
60 +1:             subs    r2, r2, #4
61 +               blt     6f
62 +               subs    r2, r2, #8
63 +               blt     5f
64 +               subs    r2, r2, #16
65 +               blt     4f
66 +
67 +       PLD(    subs    r2, r2, #65             )
68 +               stmfd   sp!, {r5 - r8}
69 +       PLD(    blt     3f                      )
70 +       PLD(    pld     [r1, #32]               )
71 +
72 +       PLD(    @ cache alignment               )
73 +       PLD(    ands    ip, r1, #31             )
74 +       PLD(    pld     [r1, #64]               )
75 +       PLD(    beq     2f                      )
76 +       PLD(    rsb     ip, ip, #32             )
77 +       PLD(    cmp     r2, ip                  )
78 +       PLD(    pld     [r1, #96]               )
79 +       PLD(    blt     2f                      )
80 +       PLD(    cmp     ip, #16                 )
81 +       PLD(    sub     r2, r2, ip              )
82 +       PLD(    ldmgeia r1!, {r3 - r6}          )
83 +       PLD(    stmgeia r0!, {r3 - r6}          )
84 +       PLD(    beq     2f                      )
85 +       PLD(    and     ip, ip, #15             )
86 +       PLD(    cmp     ip, #8                  )
87 +       PLD(    ldr     r3, [r1], #4            )
88 +       PLD(    ldrge   r4, [r1], #4            )
89 +       PLD(    ldrgt   r5, [r1], #4            )
90 +       PLD(    str     r3, [r0], #4            )
91 +       PLD(    strge   r4, [r0], #4            )
92 +       PLD(    strgt   r5, [r0], #4            )
93 +
94 +2:     PLD(    pld     [r1, #96]               )
95 +3:             ldmia   r1!, {r3 - r8, ip, lr}
96 +               subs    r2, r2, #32
97 +               stmia   r0!, {r3 - r8, ip, lr}
98 +               bge     2b
99 +       PLD(    cmn     r2, #65                 )
100 +       PLD(    bge     3b                      )
101 +       PLD(    add     r2, r2, #65             )
102 +               tst     r2, #31
103 +               ldmfd   sp!, {r5 - r8}
104 +               ldmeqfd sp!, {r0, r4, pc}
105 +
106 +               tst     r2, #16
107 +4:             ldmneia r1!, {r3, r4, ip, lr}
108 +               stmneia r0!, {r3, r4, ip, lr}
109 +
110 +               tst     r2, #8
111 +5:             ldmneia r1!, {r3, r4}
112 +               stmneia r0!, {r3, r4}
113 +
114 +               tst     r2, #4
115 +6:             ldrne   r3, [r1], #4
116 +               strne   r3, [r0], #4
117 +
118 +7:             ands    r2, r2, #3
119 +               ldmeqfd sp!, {r0, r4, pc}
120 +
121 +               cmp     r2, #2
122 +               ldrb    r3, [r1], #1
123 +               ldrgeb  r4, [r1], #1
124 +               ldrgtb  ip, [r1]
125 +               strb    r3, [r0], #1
126 +               strgeb  r4, [r0], #1
127 +               strgtb  ip, [r0]
128 +               ldmfd   sp!, {r0, r4, pc}
129 +
130 +8:             rsb     ip, ip, #4
131 +               cmp     ip, #2
132 +               ldrb    r3, [r1], #1
133 +               ldrgeb  r4, [r1], #1
134 +               ldrgtb  lr, [r1], #1
135 +               strb    r3, [r0], #1
136 +               strgeb  r4, [r0], #1
137 +               strgtb  lr, [r0], #1
138 +               subs    r2, r2, ip
139 +               blt     7b
140 +               ands    ip, r1, #3
141 +               beq     1b
142 +
143 +9:             bic     r1, r1, #3
144 +               cmp     ip, #2
145 +               ldr     lr, [r1], #4
146 +               beq     17f
147 +               bgt     18f
148 +
149 +
150 +               .macro  forward_copy_shift pull push
151 +
152 +               cmp     r2, #12
153 +       PLD(    pld     [r1, #0]                )
154 +               blt     15f
155 +               subs    r2, r2, #28
156 +               stmfd   sp!, {r5 - r9}
157 +               blt     13f
158 +
159 +       PLD(    subs    r2, r2, #97             )
160 +       PLD(    blt     12f                     )
161 +       PLD(    pld     [r1, #32]               )
162 +
163 +       PLD(    @ cache alignment               )
164 +       PLD(    rsb     ip, r1, #36             )
165 +       PLD(    pld     [r1, #64]               )
166 +       PLD(    ands    ip, ip, #31             )
167 +       PLD(    pld     [r1, #96]               )
168 +       PLD(    beq     11f                     )
169 +       PLD(    cmp     r2, ip                  )
170 +       PLD(    pld     [r1, #128]              )
171 +       PLD(    blt     11f                     )
172 +       PLD(    sub     r2, r2, ip              )
173 +10:    PLD(    mov     r3, lr, pull #\pull     )
174 +       PLD(    ldr     lr, [r1], #4            )
175 +       PLD(    subs    ip, ip, #4              )
176 +       PLD(    orr     r3, r3, lr, push #\push )
177 +       PLD(    str     r3, [r0], #4            )
178 +       PLD(    bgt     10b                     )
179 +
180 +11:    PLD(    pld     [r1, #128]              )
181 +12:            mov     r3, lr, pull #\pull
182 +               ldmia   r1!, {r4 - r9, ip, lr}
183 +               subs    r2, r2, #32
184 +               orr     r3, r3, r4, push #\push
185 +               mov     r4, r4, pull #\pull
186 +               orr     r4, r4, r5, push #\push
187 +               mov     r5, r5, pull #\pull
188 +               orr     r5, r5, r6, push #\push
189 +               mov     r6, r6, pull #\pull
190 +               orr     r6, r6, r7, push #\push
191 +               mov     r7, r7, pull #\pull
192 +               orr     r7, r7, r8, push #\push
193 +               mov     r8, r8, pull #\pull
194 +               orr     r8, r8, r9, push #\push
195 +               mov     r9, r9, pull #\pull
196 +               orr     r9, r9, ip, push #\push
197 +               mov     ip, ip, pull #\pull
198 +               orr     ip, ip, lr, push #\push
199 +               stmia   r0!, {r3 - r9, ip}
200 +               bge     11b
201 +       PLD(    cmn     r2, #97                 )
202 +       PLD(    bge     12b                     )
203 +       PLD(    add     r2, r2, #97             )
204 +               cmn     r2, #16
205 +               blt     14f
206 +13:            mov     r3, lr, pull #\pull
207 +               ldmia   r1!, {r4 - r6, lr}
208 +               sub     r2, r2, #16
209 +               orr     r3, r3, r4, push #\push
210 +               mov     r4, r4, pull #\pull
211 +               orr     r4, r4, r5, push #\push
212 +               mov     r5, r5, pull #\pull
213 +               orr     r5, r5, r6, push #\push
214 +               mov     r6, r6, pull #\pull
215 +               orr     r6, r6, lr, push #\push
216 +               stmia   r0!, {r3 - r6}
217 +14:            adds    r2, r2, #28
218 +               ldmfd   sp!, {r5 - r9}
219 +               blt     16f
220 +15:            mov     r3, lr, pull #\pull
221 +               ldr     lr, [r1], #4
222 +               subs    r2, r2, #4
223 +               orr     r3, r3, lr, push #\push
224 +               str     r3, [r0], #4
225 +               bge     15b
226 +16:
227 +               .endm
228 +
229 +
230 +               forward_copy_shift      pull=8  push=24
231 +               sub     r1, r1, #3
232 +               b       7b
233 +
234 +17:            forward_copy_shift      pull=16 push=16
235 +               sub     r1, r1, #2
236 +               b       7b
237 +
238 +18:            forward_copy_shift      pull=24 push=8
239 +               sub     r1, r1, #1
240 +               b       7b
241 +
242 +               .size   memcpy, . - memcpy
243 +END(memcpy)
244 +libc_hidden_builtin_def (memcpy)
245 --- /dev/null   2004-02-02 20:32:13.000000000 +0000
246 +++ sysdeps/arm/memmove.S       2004-03-20 18:37:23.000000000 +0000
247 @@ -0,0 +1,251 @@
248 +/*
249 + *   Optimized memmove implementation for ARM processors
250 + *
251 + *     Author:         Nicolas Pitre
252 + *     Created:        Dec 23, 2003
253 + *     Copyright:      (C) MontaVista Software, Inc.
254 + *
255 + *   This file is free software; you can redistribute it and/or
256 + *   modify it under the terms of the GNU Lesser General Public
257 + *   License as published by the Free Software Foundation; either
258 + *   version 2.1 of the License, or (at your option) any later version.
259 + *
260 + *   This file is distributed in the hope that it will be useful,
261 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
262 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
263 + *   Lesser General Public License for more details.
264 + */
265 +
266 +#include <sysdep.h>
267 +
268 +
269 +/*
270 + * Endian independent macros for shifting bytes within registers.
271 + */
272 +#ifndef __ARMEB__
273 +#define pull            lsr
274 +#define push            lsl
275 +#else
276 +#define pull            lsl
277 +#define push            lsr
278 +#endif
279 +
280 +/*
281 + * Enable data preload for architectures that support it (ARMv5 and above)
282 + */
283 +#if defined(__ARM_ARCH_5__) || \
284 +    defined(__ARM_ARCH_5T__) || \
285 +    defined(__ARM_ARCH_5TE__)
286 +#define PLD(code...)   code
287 +#else
288 +#define PLD(code...)
289 +#endif
290 +
291 +
292 +/* char * memmove (char *dst, const char *src) */
293 +ENTRY(memmove)
294 +               subs    ip, r0, r1
295 +               cmphi   r2, ip
296 +               bls     memcpy(PLT)
297 +
298 +               stmfd   sp!, {r0, r4, lr}
299 +               add     r1, r1, r2
300 +               add     r0, r0, r2
301 +               subs    r2, r2, #4
302 +               blt     25f
303 +               ands    ip, r0, #3
304 +       PLD(    pld     [r1, #-4]               )
305 +               bne     26f
306 +               ands    ip, r1, #3
307 +               bne     27f
308 +
309 +19:            subs    r2, r2, #4
310 +               blt     24f
311 +               subs    r2, r2, #8
312 +               blt     23f
313 +               subs    r2, r2, #16
314 +               blt     22f
315 +
316 +       PLD(    pld     [r1, #-32]              )
317 +       PLD(    subs    r2, r2, #96             )
318 +               stmfd   sp!, {r5 - r8}
319 +       PLD(    blt     21f                     )
320 +
321 +       PLD(    @ cache alignment               )
322 +       PLD(    ands    ip, r1, #31             )
323 +       PLD(    pld     [r1, #-64]              )
324 +       PLD(    beq     20f                     )
325 +       PLD(    cmp     r2, ip                  )
326 +       PLD(    pld     [r1, #-96]              )
327 +       PLD(    blt     20f                     )
328 +       PLD(    cmp     ip, #16                 )
329 +       PLD(    sub     r2, r2, ip              )
330 +       PLD(    ldmgedb r1!, {r3 - r6}          )
331 +       PLD(    stmgedb r0!, {r3 - r6}          )
332 +       PLD(    beq     20f                     )
333 +       PLD(    and     ip, ip, #15             )
334 +       PLD(    cmp     ip, #8                  )
335 +       PLD(    ldr     r3, [r1, #-4]!          )
336 +       PLD(    ldrge   r4, [r1, #-4]!          )
337 +       PLD(    ldrgt   r5, [r1, #-4]!          )
338 +       PLD(    str     r3, [r0, #-4]!          )
339 +       PLD(    strge   r4, [r0, #-4]!          )
340 +       PLD(    strgt   r5, [r0, #-4]!          )
341 +
342 +20:    PLD(    pld     [r1, #-96]              )
343 +       PLD(    pld     [r1, #-128]             )
344 +21:            ldmdb   r1!, {r3, r4, ip, lr}
345 +               subs    r2, r2, #32
346 +               stmdb   r0!, {r3, r4, ip, lr}
347 +               ldmdb   r1!, {r3, r4, ip, lr}
348 +               stmgedb r0!, {r3, r4, ip, lr}
349 +               ldmgedb r1!, {r3, r4, ip, lr}
350 +               stmgedb r0!, {r3, r4, ip, lr}
351 +               ldmgedb r1!, {r3, r4, ip, lr}
352 +               subges  r2, r2, #32
353 +               stmdb   r0!, {r3, r4, ip, lr}
354 +               bge     20b
355 +       PLD(    cmn     r2, #96                 )
356 +       PLD(    bge     21b                     )
357 +       PLD(    add     r2, r2, #96             )
358 +               tst     r2, #31
359 +               ldmfd   sp!, {r5 - r8}
360 +               ldmeqfd sp!, {r0, r4, pc}
361 +
362 +               tst     r2, #16
363 +22:            ldmnedb r1!, {r3, r4, ip, lr}
364 +               stmnedb r0!, {r3, r4, ip, lr}
365 +
366 +               tst     r2, #8
367 +23:            ldmnedb r1!, {r3, r4}
368 +               stmnedb r0!, {r3, r4}
369 +
370 +               tst     r2, #4
371 +24:            ldrne   r3, [r1, #-4]!
372 +               strne   r3, [r0, #-4]!
373 +
374 +25:            ands    r2, r2, #3
375 +               ldmeqfd sp!, {r0, r4, pc}
376 +
377 +               cmp     r2, #2
378 +               ldrb    r3, [r1, #-1]
379 +               ldrgeb  r4, [r1, #-2]
380 +               ldrgtb  ip, [r1, #-3]
381 +               strb    r3, [r0, #-1]
382 +               strgeb  r4, [r0, #-2]
383 +               strgtb  ip, [r0, #-3]
384 +               ldmfd   sp!, {r0, r4, pc}
385 +
386 +26:            cmp     ip, #2
387 +               ldrb    r3, [r1, #-1]!
388 +               ldrgeb  r4, [r1, #-1]!
389 +               ldrgtb  lr, [r1, #-1]!
390 +               strb    r3, [r0, #-1]!
391 +               strgeb  r4, [r0, #-1]!
392 +               strgtb  lr, [r0, #-1]!
393 +               subs    r2, r2, ip
394 +               blt     25b
395 +               ands    ip, r1, #3
396 +               beq     19b
397 +
398 +27:            bic     r1, r1, #3
399 +               cmp     ip, #2
400 +               ldr     r3, [r1]
401 +               beq     35f
402 +               blt     36f
403 +
404 +
405 +               .macro  backward_copy_shift push pull
406 +
407 +               cmp     r2, #12
408 +       PLD(    pld     [r1, #-4]               )
409 +               blt     33f
410 +               subs    r2, r2, #28
411 +               stmfd   sp!, {r5 - r9}
412 +               blt     31f
413 +
414 +       PLD(    subs    r2, r2, #96             )
415 +       PLD(    pld     [r1, #-32]              )
416 +       PLD(    blt     30f                     )
417 +       PLD(    pld     [r1, #-64]              )
418 +
419 +       PLD(    @ cache alignment               )
420 +       PLD(    ands    ip, r1, #31             )
421 +       PLD(    pld     [r1, #-96]              )
422 +       PLD(    beq     29f                     )
423 +       PLD(    cmp     r2, ip                  )
424 +       PLD(    pld     [r1, #-128]             )
425 +       PLD(    blt     29f                     )
426 +       PLD(    sub     r2, r2, ip              )
427 +28:    PLD(    mov     r4, r3, push #\push     )
428 +       PLD(    ldr     r3, [r1, #-4]!          )
429 +       PLD(    subs    ip, ip, #4              )
430 +       PLD(    orr     r4, r4, r3, pull #\pull )
431 +       PLD(    str     r4, [r0, #-4]!          )
432 +       PLD(    bgt     28b                     )
433 +
434 +29:    PLD(    pld     [r1, #-128]             )
435 +30:            mov     lr, r3, push #\push
436 +               ldmdb   r1!, {r3 - r9, ip}
437 +               subs    r2, r2, #32
438 +               orr     lr, lr, ip, pull #\pull
439 +               mov     ip, ip, push #\push
440 +               orr     ip, ip, r9, pull #\pull
441 +               mov     r9, r9, push #\push
442 +               orr     r9, r9, r8, pull #\pull
443 +               mov     r8, r8, push #\push
444 +               orr     r8, r8, r7, pull #\pull
445 +               mov     r7, r7, push #\push
446 +               orr     r7, r7, r6, pull #\pull
447 +               mov     r6, r6, push #\push
448 +               orr     r6, r6, r5, pull #\pull
449 +               mov     r5, r5, push #\push
450 +               orr     r5, r5, r4, pull #\pull
451 +               mov     r4, r4, push #\push
452 +               orr     r4, r4, r3, pull #\pull
453 +               stmdb   r0!, {r4 - r9, ip, lr}
454 +               bge     29b
455 +       PLD(    cmn     r2, #96                 )
456 +       PLD(    bge     30b                     )
457 +       PLD(    add     r2, r2, #96             )
458 +               cmn     r2, #16
459 +               blt     32f
460 +31:            mov     r7, r3, push #\push
461 +               ldmdb   r1!, {r3 - r6}
462 +               sub     r2, r2, #16
463 +               orr     r7, r7, r6, pull #\pull
464 +               mov     r6, r6, push #\push
465 +               orr     r6, r6, r5, pull #\pull
466 +               mov     r5, r5, push #\push
467 +               orr     r5, r5, r4, pull #\pull
468 +               mov     r4, r4, push #\push
469 +               orr     r4, r4, r3, pull #\pull
470 +               stmdb   r0!, {r4 - r7}
471 +32:            adds    r2, r2, #28
472 +               ldmfd   sp!, {r5 - r9}
473 +               blt     34f
474 +33:            mov     r4, r3, push #\push
475 +               ldr     r3, [r1, #-4]!
476 +               subs    r2, r2, #4
477 +               orr     r4, r4, r3, pull #\pull
478 +               str     r4, [r0, #-4]!
479 +               bge     33b
480 +34:
481 +               .endm
482 +
483 +
484 +               backward_copy_shift     push=8  pull=24
485 +               add     r1, r1, #3
486 +               b       25b
487 +
488 +35:            backward_copy_shift     push=16 pull=16
489 +               add     r1, r1, #2
490 +               b       25b
491 +
492 +36:            backward_copy_shift     push=24 pull=8
493 +               add     r1, r1, #1
494 +               b       25b
495 +
496 +               .size   memmove, . - memmove
497 +END(memmove)
498 +libc_hidden_builtin_def (memmove)
499 --- /dev/null   2004-02-02 20:32:13.000000000 +0000
500 +++ sysdeps/arm/bcopy.S 2004-03-20 18:37:48.000000000 +0000
501 @@ -0,0 +1,255 @@
502 +/*
503 + *   Optimized memmove implementation for ARM processors
504 + *
505 + *     Author:         Nicolas Pitre
506 + *     Created:        Dec 23, 2003
507 + *     Copyright:      (C) MontaVista Software, Inc.
508 + *
509 + *   This file is free software; you can redistribute it and/or
510 + *   modify it under the terms of the GNU Lesser General Public
511 + *   License as published by the Free Software Foundation; either
512 + *   version 2.1 of the License, or (at your option) any later version.
513 + *
514 + *   This file is distributed in the hope that it will be useful,
515 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
516 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
517 + *   Lesser General Public License for more details.
518 + */
519 +
520 +#include <sysdep.h>
521 +
522 +
523 +/*
524 + * Endian independent macros for shifting bytes within registers.
525 + */
526 +#ifndef __ARMEB__
527 +#define pull            lsr
528 +#define push            lsl
529 +#else
530 +#define pull            lsl
531 +#define push            lsr
532 +#endif
533 +
534 +/*
535 + * Enable data preload for architectures that support it (ARMv5 and above)
536 + */
537 +#if defined(__ARM_ARCH_5__) || \
538 +    defined(__ARM_ARCH_5T__) || \
539 +    defined(__ARM_ARCH_5TE__)
540 +#define PLD(code...)   code
541 +#else
542 +#define PLD(code...)
543 +#endif
544 +
545 +dst            .req    r1
546 +src            .req    r0
547 +
548 +/* void *bcopy (const char *src, char *dst, size_t size) */
549 +ENTRY(bcopy)
550 +               subs    ip, dst, src
551 +               cmphi   r2, ip
552 +               movls   r3, r0
553 +               movls   r0, r1
554 +               movls   r1, r3
555 +               bls     memcpy(PLT)
556 +
557 +               stmfd   sp!, {r4, lr}
558 +               add     src, src, r2
559 +               add     dst, dst, r2
560 +               subs    r2, r2, #4
561 +               blt     25f
562 +               ands    ip, dst, #3
563 +       PLD(    pld     [src, #-4]              )
564 +               bne     26f
565 +               ands    ip, src, #3
566 +               bne     27f
567 +
568 +19:            subs    r2, r2, #4
569 +               blt     24f
570 +               subs    r2, r2, #8
571 +               blt     23f
572 +               subs    r2, r2, #16
573 +               blt     22f
574 +
575 +       PLD(    pld     [src, #-32]             )
576 +       PLD(    subs    r2, r2, #96             )
577 +               stmfd   sp!, {r5 - r8}
578 +       PLD(    blt     21f                     )
579 +
580 +       PLD(    @ cache alignment               )
581 +       PLD(    ands    ip, src, #31            )
582 +       PLD(    pld     [src, #-64]             )
583 +       PLD(    beq     20f                     )
584 +       PLD(    cmp     r2, ip                  )
585 +       PLD(    pld     [src, #-96]             )
586 +       PLD(    blt     20f                     )
587 +       PLD(    cmp     ip, #16                 )
588 +       PLD(    sub     r2, r2, ip              )
589 +       PLD(    ldmgedb src!, {r3 - r6}         )
590 +       PLD(    stmgedb dst!, {r3 - r6}         )
591 +       PLD(    beq     20f                     )
592 +       PLD(    and     ip, ip, #15             )
593 +       PLD(    cmp     ip, #8                  )
594 +       PLD(    ldr     r3, [src, #-4]!         )
595 +       PLD(    ldrge   r4, [src, #-4]!         )
596 +       PLD(    ldrgt   r5, [src, #-4]!         )
597 +       PLD(    str     r3, [dst, #-4]!         )
598 +       PLD(    strge   r4, [dst, #-4]!         )
599 +       PLD(    strgt   r5, [dst, #-4]!         )
600 +
601 +20:    PLD(    pld     [src, #-96]             )
602 +       PLD(    pld     [src, #-128]            )
603 +21:            ldmdb   src!, {r3, r4, ip, lr}
604 +               subs    r2, r2, #32
605 +               stmdb   dst!, {r3, r4, ip, lr}
606 +               ldmdb   src!, {r3, r4, ip, lr}
607 +               stmgedb dst!, {r3, r4, ip, lr}
608 +               ldmgedb src!, {r3, r4, ip, lr}
609 +               stmgedb dst!, {r3, r4, ip, lr}
610 +               ldmgedb src!, {r3, r4, ip, lr}
611 +               subges  r2, r2, #32
612 +               stmdb   dst!, {r3, r4, ip, lr}
613 +               bge     20b
614 +       PLD(    cmn     r2, #96                 )
615 +       PLD(    bge     21b                     )
616 +       PLD(    add     r2, r2, #96             )
617 +               tst     r2, #31
618 +               ldmfd   sp!, {r5 - r8}
619 +               ldmeqfd sp!, {r4, pc}
620 +
621 +               tst     r2, #16
622 +22:            ldmnedb src!, {r3, r4, ip, lr}
623 +               stmnedb dst!, {r3, r4, ip, lr}
624 +
625 +               tst     r2, #8
626 +23:            ldmnedb src!, {r3, r4}
627 +               stmnedb dst!, {r3, r4}
628 +
629 +               tst     r2, #4
630 +24:            ldrne   r3, [src, #-4]!
631 +               strne   r3, [dst, #-4]!
632 +
633 +25:            ands    r2, r2, #3
634 +               ldmeqfd sp!, {dst, r4, pc}
635 +
636 +               cmp     r2, #2
637 +               ldrb    r3, [src, #-1]
638 +               ldrgeb  r4, [src, #-2]
639 +               ldrgtb  ip, [src, #-3]
640 +               strb    r3, [dst, #-1]
641 +               strgeb  r4, [dst, #-2]
642 +               strgtb  ip, [dst, #-3]
643 +               ldmfd   sp!, {dst, r4, pc}
644 +
645 +26:            cmp     ip, #2
646 +               ldrb    r3, [src, #-1]!
647 +               ldrgeb  r4, [src, #-1]!
648 +               ldrgtb  lr, [src, #-1]!
649 +               strb    r3, [dst, #-1]!
650 +               strgeb  r4, [dst, #-1]!
651 +               strgtb  lr, [dst, #-1]!
652 +               subs    r2, r2, ip
653 +               blt     25b
654 +               ands    ip, src, #3
655 +               beq     19b
656 +
657 +27:            bic     src, src, #3
658 +               cmp     ip, #2
659 +               ldr     r3, [src]
660 +               beq     35f
661 +               blt     36f
662 +
663 +
664 +               .macro  backward_copy_shift push pull
665 +
666 +               cmp     r2, #12
667 +       PLD(    pld     [src, #-4]              )
668 +               blt     33f
669 +               subs    r2, r2, #28
670 +               stmfd   sp!, {r5 - r9}
671 +               blt     31f
672 +
673 +       PLD(    subs    r2, r2, #96             )
674 +       PLD(    pld     [src, #-32]             )
675 +       PLD(    blt     30f                     )
676 +       PLD(    pld     [src, #-64]             )
677 +
678 +       PLD(    @ cache alignment               )
679 +       PLD(    ands    ip, src, #31            )
680 +       PLD(    pld     [src, #-96]             )
681 +       PLD(    beq     29f                     )
682 +       PLD(    cmp     r2, ip                  )
683 +       PLD(    pld     [src, #-128]            )
684 +       PLD(    blt     29f                     )
685 +       PLD(    sub     r2, r2, ip              )
686 +28:    PLD(    mov     r4, r3, push #\push     )
687 +       PLD(    ldr     r3, [src, #-4]!         )
688 +       PLD(    subs    ip, ip, #4              )
689 +       PLD(    orr     r4, r4, r3, pull #\pull )
690 +       PLD(    str     r4, [dst, #-4]!         )
691 +       PLD(    bgt     28b                     )
692 +
693 +29:    PLD(    pld     [src, #-128]            )
694 +30:            mov     lr, r3, push #\push
695 +               ldmdb   src!, {r3 - r9, ip}
696 +               subs    r2, r2, #32
697 +               orr     lr, lr, ip, pull #\pull
698 +               mov     ip, ip, push #\push
699 +               orr     ip, ip, r9, pull #\pull
700 +               mov     r9, r9, push #\push
701 +               orr     r9, r9, r8, pull #\pull
702 +               mov     r8, r8, push #\push
703 +               orr     r8, r8, r7, pull #\pull
704 +               mov     r7, r7, push #\push
705 +               orr     r7, r7, r6, pull #\pull
706 +               mov     r6, r6, push #\push
707 +               orr     r6, r6, r5, pull #\pull
708 +               mov     r5, r5, push #\push
709 +               orr     r5, r5, r4, pull #\pull
710 +               mov     r4, r4, push #\push
711 +               orr     r4, r4, r3, pull #\pull
712 +               stmdb   dst!, {r4 - r9, ip, lr}
713 +               bge     29b
714 +       PLD(    cmn     r2, #96                 )
715 +       PLD(    bge     30b                     )
716 +       PLD(    add     r2, r2, #96             )
717 +               cmn     r2, #16
718 +               blt     32f
719 +31:            mov     r7, r3, push #\push
720 +               ldmdb   src!, {r3 - r6}
721 +               sub     r2, r2, #16
722 +               orr     r7, r7, r6, pull #\pull
723 +               mov     r6, r6, push #\push
724 +               orr     r6, r6, r5, pull #\pull
725 +               mov     r5, r5, push #\push
726 +               orr     r5, r5, r4, pull #\pull
727 +               mov     r4, r4, push #\push
728 +               orr     r4, r4, r3, pull #\pull
729 +               stmdb   dst!, {r4 - r7}
730 +32:            adds    r2, r2, #28
731 +               ldmfd   sp!, {r5 - r9}
732 +               blt     34f
733 +33:            mov     r4, r3, push #\push
734 +               ldr     r3, [src, #-4]!
735 +               subs    r2, r2, #4
736 +               orr     r4, r4, r3, pull #\pull
737 +               str     r4, [dst, #-4]!
738 +               bge     33b
739 +34:
740 +               .endm
741 +
742 +
743 +               backward_copy_shift     push=8  pull=24
744 +               add     src, src, #3
745 +               b       25b
746 +
747 +35:            backward_copy_shift     push=16 pull=16
748 +               add     src, src, #2
749 +               b       25b
750 +
751 +36:            backward_copy_shift     push=24 pull=8
752 +               add     src, src, #1
753 +               b       25b
754 +
755 +               .size   bcopy, . - bcopy
756 +END(bcopy)