]> pilppa.org Git - familiar-h63xx-build.git/blob - org.handhelds.familiar/packages/glibc/glibc-2.2.5/arm-memcpy.patch
OE tree imported from monotone branch org.openembedded.oz354fam083 at revision 8b12e3...
[familiar-h63xx-build.git] / org.handhelds.familiar / packages / glibc / glibc-2.2.5 / arm-memcpy.patch
1
2 #
3 # Patch managed by http://www.mn-logistik.de/unsupported/pxa250/patcher
4 #
5
6 --- /dev/null   2004-06-02 16:28:12.000000000 -0500
7 +++ glibc-2.2.5/sysdeps/arm/memcpy.S    2004-09-03 19:00:39.000000000 -0500
8 @@ -0,0 +1,241 @@
9 +/*
10 + *   Optimized memcpy implementation for ARM processors
11 + *
12 + *     Author:         Nicolas Pitre
13 + *     Created:        Dec 23, 2003
14 + *     Copyright:      (C) MontaVista Software, Inc.
15 + *
16 + *   This file is free software; you can redistribute it and/or
17 + *   modify it under the terms of the GNU Lesser General Public
18 + *   License as published by the Free Software Foundation; either
19 + *   version 2.1 of the License, or (at your option) any later version.
20 + *
21 + *   This file is distributed in the hope that it will be useful,
22 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
23 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24 + *   Lesser General Public License for more details.
25 + */
26 +
27 +#include <sysdep.h>
28 +
29 +
30 +/*
31 + * Endian independent macros for shifting bytes within registers.
32 + */
33 +#ifndef __ARMEB__
34 +#define pull            lsr
35 +#define push            lsl
36 +#else
37 +#define pull            lsl
38 +#define push            lsr
39 +#endif
40 +
41 +/*
42 + * Enable data preload for architectures that support it (ARMv5 and above)
43 + */
44 +#if defined(__ARM_ARCH_5__) || \
45 +    defined(__ARM_ARCH_5T__) || \
46 +    defined(__ARM_ARCH_5TE__)
47 +#define PLD(code...)   code
48 +#else
49 +#define PLD(code...)
50 +#endif
51 +
52 +
53 +/* char * memcpy (char *dst, const char *src) */
54 +
55 +ENTRY(memcpy)
56 +               subs    r2, r2, #4
57 +               stmfd   sp!, {r0, r4, lr}
58 +               blt     7f
59 +               ands    ip, r0, #3
60 +       PLD(    pld     [r1, #0]                )
61 +               bne     8f
62 +               ands    ip, r1, #3
63 +               bne     9f
64 +
65 +1:             subs    r2, r2, #4
66 +               blt     6f
67 +               subs    r2, r2, #8
68 +               blt     5f
69 +               subs    r2, r2, #16
70 +               blt     4f
71 +
72 +       PLD(    subs    r2, r2, #65             )
73 +               stmfd   sp!, {r5 - r8}
74 +       PLD(    blt     3f                      )
75 +       PLD(    pld     [r1, #32]               )
76 +
77 +       PLD(    @ cache alignment               )
78 +       PLD(    ands    ip, r1, #31             )
79 +       PLD(    pld     [r1, #64]               )
80 +       PLD(    beq     2f                      )
81 +       PLD(    rsb     ip, ip, #32             )
82 +       PLD(    cmp     r2, ip                  )
83 +       PLD(    pld     [r1, #96]               )
84 +       PLD(    blt     2f                      )
85 +       PLD(    cmp     ip, #16                 )
86 +       PLD(    sub     r2, r2, ip              )
87 +       PLD(    ldmgeia r1!, {r3 - r6}          )
88 +       PLD(    stmgeia r0!, {r3 - r6}          )
89 +       PLD(    beq     2f                      )
90 +       PLD(    and     ip, ip, #15             )
91 +       PLD(    cmp     ip, #8                  )
92 +       PLD(    ldr     r3, [r1], #4            )
93 +       PLD(    ldrge   r4, [r1], #4            )
94 +       PLD(    ldrgt   r5, [r1], #4            )
95 +       PLD(    str     r3, [r0], #4            )
96 +       PLD(    strge   r4, [r0], #4            )
97 +       PLD(    strgt   r5, [r0], #4            )
98 +
99 +2:     PLD(    pld     [r1, #96]               )
100 +3:             ldmia   r1!, {r3 - r8, ip, lr}
101 +               subs    r2, r2, #32
102 +               stmia   r0!, {r3 - r8, ip, lr}
103 +               bge     2b
104 +       PLD(    cmn     r2, #65                 )
105 +       PLD(    bge     3b                      )
106 +       PLD(    add     r2, r2, #65             )
107 +               tst     r2, #31
108 +               ldmfd   sp!, {r5 - r8}
109 +               ldmeqfd sp!, {r0, r4, pc}
110 +
111 +               tst     r2, #16
112 +4:             ldmneia r1!, {r3, r4, ip, lr}
113 +               stmneia r0!, {r3, r4, ip, lr}
114 +
115 +               tst     r2, #8
116 +5:             ldmneia r1!, {r3, r4}
117 +               stmneia r0!, {r3, r4}
118 +
119 +               tst     r2, #4
120 +6:             ldrne   r3, [r1], #4
121 +               strne   r3, [r0], #4
122 +
123 +7:             ands    r2, r2, #3
124 +               ldmeqfd sp!, {r0, r4, pc}
125 +
126 +               cmp     r2, #2
127 +               ldrb    r3, [r1], #1
128 +               ldrgeb  r4, [r1], #1
129 +               ldrgtb  ip, [r1]
130 +               strb    r3, [r0], #1
131 +               strgeb  r4, [r0], #1
132 +               strgtb  ip, [r0]
133 +               ldmfd   sp!, {r0, r4, pc}
134 +
135 +8:             rsb     ip, ip, #4
136 +               cmp     ip, #2
137 +               ldrb    r3, [r1], #1
138 +               ldrgeb  r4, [r1], #1
139 +               ldrgtb  lr, [r1], #1
140 +               strb    r3, [r0], #1
141 +               strgeb  r4, [r0], #1
142 +               strgtb  lr, [r0], #1
143 +               subs    r2, r2, ip
144 +               blt     7b
145 +               ands    ip, r1, #3
146 +               beq     1b
147 +
148 +9:             bic     r1, r1, #3
149 +               cmp     ip, #2
150 +               ldr     lr, [r1], #4
151 +               beq     17f
152 +               bgt     18f
153 +
154 +
155 +               .macro  forward_copy_shift pull push
156 +
157 +               cmp     r2, #12
158 +       PLD(    pld     [r1, #0]                )
159 +               blt     15f
160 +               subs    r2, r2, #28
161 +               stmfd   sp!, {r5 - r9}
162 +               blt     13f
163 +
164 +       PLD(    subs    r2, r2, #97             )
165 +       PLD(    blt     12f                     )
166 +       PLD(    pld     [r1, #32]               )
167 +
168 +       PLD(    @ cache alignment               )
169 +       PLD(    rsb     ip, r1, #36             )
170 +       PLD(    pld     [r1, #64]               )
171 +       PLD(    ands    ip, ip, #31             )
172 +       PLD(    pld     [r1, #96]               )
173 +       PLD(    beq     11f                     )
174 +       PLD(    cmp     r2, ip                  )
175 +       PLD(    pld     [r1, #128]              )
176 +       PLD(    blt     11f                     )
177 +       PLD(    sub     r2, r2, ip              )
178 +10:    PLD(    mov     r3, lr, pull #\pull     )
179 +       PLD(    ldr     lr, [r1], #4            )
180 +       PLD(    subs    ip, ip, #4              )
181 +       PLD(    orr     r3, r3, lr, push #\push )
182 +       PLD(    str     r3, [r0], #4            )
183 +       PLD(    bgt     10b                     )
184 +
185 +11:    PLD(    pld     [r1, #128]              )
186 +12:            mov     r3, lr, pull #\pull
187 +               ldmia   r1!, {r4 - r9, ip, lr}
188 +               subs    r2, r2, #32
189 +               orr     r3, r3, r4, push #\push
190 +               mov     r4, r4, pull #\pull
191 +               orr     r4, r4, r5, push #\push
192 +               mov     r5, r5, pull #\pull
193 +               orr     r5, r5, r6, push #\push
194 +               mov     r6, r6, pull #\pull
195 +               orr     r6, r6, r7, push #\push
196 +               mov     r7, r7, pull #\pull
197 +               orr     r7, r7, r8, push #\push
198 +               mov     r8, r8, pull #\pull
199 +               orr     r8, r8, r9, push #\push
200 +               mov     r9, r9, pull #\pull
201 +               orr     r9, r9, ip, push #\push
202 +               mov     ip, ip, pull #\pull
203 +               orr     ip, ip, lr, push #\push
204 +               stmia   r0!, {r3 - r9, ip}
205 +               bge     11b
206 +       PLD(    cmn     r2, #97                 )
207 +       PLD(    bge     12b                     )
208 +       PLD(    add     r2, r2, #97             )
209 +               cmn     r2, #16
210 +               blt     14f
211 +13:            mov     r3, lr, pull #\pull
212 +               ldmia   r1!, {r4 - r6, lr}
213 +               sub     r2, r2, #16
214 +               orr     r3, r3, r4, push #\push
215 +               mov     r4, r4, pull #\pull
216 +               orr     r4, r4, r5, push #\push
217 +               mov     r5, r5, pull #\pull
218 +               orr     r5, r5, r6, push #\push
219 +               mov     r6, r6, pull #\pull
220 +               orr     r6, r6, lr, push #\push
221 +               stmia   r0!, {r3 - r6}
222 +14:            adds    r2, r2, #28
223 +               ldmfd   sp!, {r5 - r9}
224 +               blt     16f
225 +15:            mov     r3, lr, pull #\pull
226 +               ldr     lr, [r1], #4
227 +               subs    r2, r2, #4
228 +               orr     r3, r3, lr, push #\push
229 +               str     r3, [r0], #4
230 +               bge     15b
231 +16:
232 +               .endm
233 +
234 +
235 +               forward_copy_shift      pull=8  push=24
236 +               sub     r1, r1, #3
237 +               b       7b
238 +
239 +17:            forward_copy_shift      pull=16 push=16
240 +               sub     r1, r1, #2
241 +               b       7b
242 +
243 +18:            forward_copy_shift      pull=24 push=8
244 +               sub     r1, r1, #1
245 +               b       7b
246 +
247 +               .size   memcpy, . - memcpy
248 +END(memcpy)
249 +libc_hidden_builtin_def (memcpy)
250 --- /dev/null   2004-06-02 16:28:12.000000000 -0500
251 +++ glibc-2.2.5/sysdeps/arm/memmove.S   2004-09-03 19:00:39.000000000 -0500
252 @@ -0,0 +1,251 @@
253 +/*
254 + *   Optimized memmove implementation for ARM processors
255 + *
256 + *     Author:         Nicolas Pitre
257 + *     Created:        Dec 23, 2003
258 + *     Copyright:      (C) MontaVista Software, Inc.
259 + *
260 + *   This file is free software; you can redistribute it and/or
261 + *   modify it under the terms of the GNU Lesser General Public
262 + *   License as published by the Free Software Foundation; either
263 + *   version 2.1 of the License, or (at your option) any later version.
264 + *
265 + *   This file is distributed in the hope that it will be useful,
266 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
267 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
268 + *   Lesser General Public License for more details.
269 + */
270 +
271 +#include <sysdep.h>
272 +
273 +
274 +/*
275 + * Endian independent macros for shifting bytes within registers.
276 + */
277 +#ifndef __ARMEB__
278 +#define pull            lsr
279 +#define push            lsl
280 +#else
281 +#define pull            lsl
282 +#define push            lsr
283 +#endif
284 +
285 +/*
286 + * Enable data preload for architectures that support it (ARMv5 and above)
287 + */
288 +#if defined(__ARM_ARCH_5__) || \
289 +    defined(__ARM_ARCH_5T__) || \
290 +    defined(__ARM_ARCH_5TE__)
291 +#define PLD(code...)   code
292 +#else
293 +#define PLD(code...)
294 +#endif
295 +
296 +
297 +/* char * memmove (char *dst, const char *src) */
298 +ENTRY(memmove)
299 +               subs    ip, r0, r1
300 +               cmphi   r2, ip
301 +               bls     memcpy(PLT)
302 +
303 +               stmfd   sp!, {r0, r4, lr}
304 +               add     r1, r1, r2
305 +               add     r0, r0, r2
306 +               subs    r2, r2, #4
307 +               blt     25f
308 +               ands    ip, r0, #3
309 +       PLD(    pld     [r1, #-4]               )
310 +               bne     26f
311 +               ands    ip, r1, #3
312 +               bne     27f
313 +
314 +19:            subs    r2, r2, #4
315 +               blt     24f
316 +               subs    r2, r2, #8
317 +               blt     23f
318 +               subs    r2, r2, #16
319 +               blt     22f
320 +
321 +       PLD(    pld     [r1, #-32]              )
322 +       PLD(    subs    r2, r2, #96             )
323 +               stmfd   sp!, {r5 - r8}
324 +       PLD(    blt     21f                     )
325 +
326 +       PLD(    @ cache alignment               )
327 +       PLD(    ands    ip, r1, #31             )
328 +       PLD(    pld     [r1, #-64]              )
329 +       PLD(    beq     20f                     )
330 +       PLD(    cmp     r2, ip                  )
331 +       PLD(    pld     [r1, #-96]              )
332 +       PLD(    blt     20f                     )
333 +       PLD(    cmp     ip, #16                 )
334 +       PLD(    sub     r2, r2, ip              )
335 +       PLD(    ldmgedb r1!, {r3 - r6}          )
336 +       PLD(    stmgedb r0!, {r3 - r6}          )
337 +       PLD(    beq     20f                     )
338 +       PLD(    and     ip, ip, #15             )
339 +       PLD(    cmp     ip, #8                  )
340 +       PLD(    ldr     r3, [r1, #-4]!          )
341 +       PLD(    ldrge   r4, [r1, #-4]!          )
342 +       PLD(    ldrgt   r5, [r1, #-4]!          )
343 +       PLD(    str     r3, [r0, #-4]!          )
344 +       PLD(    strge   r4, [r0, #-4]!          )
345 +       PLD(    strgt   r5, [r0, #-4]!          )
346 +
347 +20:    PLD(    pld     [r1, #-96]              )
348 +       PLD(    pld     [r1, #-128]             )
349 +21:            ldmdb   r1!, {r3, r4, ip, lr}
350 +               subs    r2, r2, #32
351 +               stmdb   r0!, {r3, r4, ip, lr}
352 +               ldmdb   r1!, {r3, r4, ip, lr}
353 +               stmgedb r0!, {r3, r4, ip, lr}
354 +               ldmgedb r1!, {r3, r4, ip, lr}
355 +               stmgedb r0!, {r3, r4, ip, lr}
356 +               ldmgedb r1!, {r3, r4, ip, lr}
357 +               subges  r2, r2, #32
358 +               stmdb   r0!, {r3, r4, ip, lr}
359 +               bge     20b
360 +       PLD(    cmn     r2, #96                 )
361 +       PLD(    bge     21b                     )
362 +       PLD(    add     r2, r2, #96             )
363 +               tst     r2, #31
364 +               ldmfd   sp!, {r5 - r8}
365 +               ldmeqfd sp!, {r0, r4, pc}
366 +
367 +               tst     r2, #16
368 +22:            ldmnedb r1!, {r3, r4, ip, lr}
369 +               stmnedb r0!, {r3, r4, ip, lr}
370 +
371 +               tst     r2, #8
372 +23:            ldmnedb r1!, {r3, r4}
373 +               stmnedb r0!, {r3, r4}
374 +
375 +               tst     r2, #4
376 +24:            ldrne   r3, [r1, #-4]!
377 +               strne   r3, [r0, #-4]!
378 +
379 +25:            ands    r2, r2, #3
380 +               ldmeqfd sp!, {r0, r4, pc}
381 +
382 +               cmp     r2, #2
383 +               ldrb    r3, [r1, #-1]
384 +               ldrgeb  r4, [r1, #-2]
385 +               ldrgtb  ip, [r1, #-3]
386 +               strb    r3, [r0, #-1]
387 +               strgeb  r4, [r0, #-2]
388 +               strgtb  ip, [r0, #-3]
389 +               ldmfd   sp!, {r0, r4, pc}
390 +
391 +26:            cmp     ip, #2
392 +               ldrb    r3, [r1, #-1]!
393 +               ldrgeb  r4, [r1, #-1]!
394 +               ldrgtb  lr, [r1, #-1]!
395 +               strb    r3, [r0, #-1]!
396 +               strgeb  r4, [r0, #-1]!
397 +               strgtb  lr, [r0, #-1]!
398 +               subs    r2, r2, ip
399 +               blt     25b
400 +               ands    ip, r1, #3
401 +               beq     19b
402 +
403 +27:            bic     r1, r1, #3
404 +               cmp     ip, #2
405 +               ldr     r3, [r1]
406 +               beq     35f
407 +               blt     36f
408 +
409 +
410 +               .macro  backward_copy_shift push pull
411 +
412 +               cmp     r2, #12
413 +       PLD(    pld     [r1, #-4]               )
414 +               blt     33f
415 +               subs    r2, r2, #28
416 +               stmfd   sp!, {r5 - r9}
417 +               blt     31f
418 +
419 +       PLD(    subs    r2, r2, #96             )
420 +       PLD(    pld     [r1, #-32]              )
421 +       PLD(    blt     30f                     )
422 +       PLD(    pld     [r1, #-64]              )
423 +
424 +       PLD(    @ cache alignment               )
425 +       PLD(    ands    ip, r1, #31             )
426 +       PLD(    pld     [r1, #-96]              )
427 +       PLD(    beq     29f                     )
428 +       PLD(    cmp     r2, ip                  )
429 +       PLD(    pld     [r1, #-128]             )
430 +       PLD(    blt     29f                     )
431 +       PLD(    sub     r2, r2, ip              )
432 +28:    PLD(    mov     r4, r3, push #\push     )
433 +       PLD(    ldr     r3, [r1, #-4]!          )
434 +       PLD(    subs    ip, ip, #4              )
435 +       PLD(    orr     r4, r4, r3, pull #\pull )
436 +       PLD(    str     r4, [r0, #-4]!          )
437 +       PLD(    bgt     28b                     )
438 +
439 +29:    PLD(    pld     [r1, #-128]             )
440 +30:            mov     lr, r3, push #\push
441 +               ldmdb   r1!, {r3 - r9, ip}
442 +               subs    r2, r2, #32
443 +               orr     lr, lr, ip, pull #\pull
444 +               mov     ip, ip, push #\push
445 +               orr     ip, ip, r9, pull #\pull
446 +               mov     r9, r9, push #\push
447 +               orr     r9, r9, r8, pull #\pull
448 +               mov     r8, r8, push #\push
449 +               orr     r8, r8, r7, pull #\pull
450 +               mov     r7, r7, push #\push
451 +               orr     r7, r7, r6, pull #\pull
452 +               mov     r6, r6, push #\push
453 +               orr     r6, r6, r5, pull #\pull
454 +               mov     r5, r5, push #\push
455 +               orr     r5, r5, r4, pull #\pull
456 +               mov     r4, r4, push #\push
457 +               orr     r4, r4, r3, pull #\pull
458 +               stmdb   r0!, {r4 - r9, ip, lr}
459 +               bge     29b
460 +       PLD(    cmn     r2, #96                 )
461 +       PLD(    bge     30b                     )
462 +       PLD(    add     r2, r2, #96             )
463 +               cmn     r2, #16
464 +               blt     32f
465 +31:            mov     r7, r3, push #\push
466 +               ldmdb   r1!, {r3 - r6}
467 +               sub     r2, r2, #16
468 +               orr     r7, r7, r6, pull #\pull
469 +               mov     r6, r6, push #\push
470 +               orr     r6, r6, r5, pull #\pull
471 +               mov     r5, r5, push #\push
472 +               orr     r5, r5, r4, pull #\pull
473 +               mov     r4, r4, push #\push
474 +               orr     r4, r4, r3, pull #\pull
475 +               stmdb   r0!, {r4 - r7}
476 +32:            adds    r2, r2, #28
477 +               ldmfd   sp!, {r5 - r9}
478 +               blt     34f
479 +33:            mov     r4, r3, push #\push
480 +               ldr     r3, [r1, #-4]!
481 +               subs    r2, r2, #4
482 +               orr     r4, r4, r3, pull #\pull
483 +               str     r4, [r0, #-4]!
484 +               bge     33b
485 +34:
486 +               .endm
487 +
488 +
489 +               backward_copy_shift     push=8  pull=24
490 +               add     r1, r1, #3
491 +               b       25b
492 +
493 +35:            backward_copy_shift     push=16 pull=16
494 +               add     r1, r1, #2
495 +               b       25b
496 +
497 +36:            backward_copy_shift     push=24 pull=8
498 +               add     r1, r1, #1
499 +               b       25b
500 +
501 +               .size   memmove, . - memmove
502 +END(memmove)
503 +libc_hidden_builtin_def (memmove)
504 --- /dev/null   2004-06-02 16:28:12.000000000 -0500
505 +++ glibc-2.2.5/sysdeps/arm/bcopy.S     2004-09-03 19:00:39.000000000 -0500
506 @@ -0,0 +1,255 @@
507 +/*
508 + *   Optimized memmove implementation for ARM processors
509 + *
510 + *     Author:         Nicolas Pitre
511 + *     Created:        Dec 23, 2003
512 + *     Copyright:      (C) MontaVista Software, Inc.
513 + *
514 + *   This file is free software; you can redistribute it and/or
515 + *   modify it under the terms of the GNU Lesser General Public
516 + *   License as published by the Free Software Foundation; either
517 + *   version 2.1 of the License, or (at your option) any later version.
518 + *
519 + *   This file is distributed in the hope that it will be useful,
520 + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
521 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
522 + *   Lesser General Public License for more details.
523 + */
524 +
525 +#include <sysdep.h>
526 +
527 +
528 +/*
529 + * Endian independent macros for shifting bytes within registers.
530 + */
531 +#ifndef __ARMEB__
532 +#define pull            lsr
533 +#define push            lsl
534 +#else
535 +#define pull            lsl
536 +#define push            lsr
537 +#endif
538 +
539 +/*
540 + * Enable data preload for architectures that support it (ARMv5 and above)
541 + */
542 +#if defined(__ARM_ARCH_5__) || \
543 +    defined(__ARM_ARCH_5T__) || \
544 +    defined(__ARM_ARCH_5TE__)
545 +#define PLD(code...)   code
546 +#else
547 +#define PLD(code...)
548 +#endif
549 +
550 +dst            .req    r1
551 +src            .req    r0
552 +
553 +/* void *bcopy (const char *src, char *dst, size_t size) */
554 +ENTRY(bcopy)
555 +               subs    ip, dst, src
556 +               cmphi   r2, ip
557 +               movls   r3, r0
558 +               movls   r0, r1
559 +               movls   r1, r3
560 +               bls     memcpy(PLT)
561 +
562 +               stmfd   sp!, {r4, lr}
563 +               add     src, src, r2
564 +               add     dst, dst, r2
565 +               subs    r2, r2, #4
566 +               blt     25f
567 +               ands    ip, dst, #3
568 +       PLD(    pld     [src, #-4]              )
569 +               bne     26f
570 +               ands    ip, src, #3
571 +               bne     27f
572 +
573 +19:            subs    r2, r2, #4
574 +               blt     24f
575 +               subs    r2, r2, #8
576 +               blt     23f
577 +               subs    r2, r2, #16
578 +               blt     22f
579 +
580 +       PLD(    pld     [src, #-32]             )
581 +       PLD(    subs    r2, r2, #96             )
582 +               stmfd   sp!, {r5 - r8}
583 +       PLD(    blt     21f                     )
584 +
585 +       PLD(    @ cache alignment               )
586 +       PLD(    ands    ip, src, #31            )
587 +       PLD(    pld     [src, #-64]             )
588 +       PLD(    beq     20f                     )
589 +       PLD(    cmp     r2, ip                  )
590 +       PLD(    pld     [src, #-96]             )
591 +       PLD(    blt     20f                     )
592 +       PLD(    cmp     ip, #16                 )
593 +       PLD(    sub     r2, r2, ip              )
594 +       PLD(    ldmgedb src!, {r3 - r6}         )
595 +       PLD(    stmgedb dst!, {r3 - r6}         )
596 +       PLD(    beq     20f                     )
597 +       PLD(    and     ip, ip, #15             )
598 +       PLD(    cmp     ip, #8                  )
599 +       PLD(    ldr     r3, [src, #-4]!         )
600 +       PLD(    ldrge   r4, [src, #-4]!         )
601 +       PLD(    ldrgt   r5, [src, #-4]!         )
602 +       PLD(    str     r3, [dst, #-4]!         )
603 +       PLD(    strge   r4, [dst, #-4]!         )
604 +       PLD(    strgt   r5, [dst, #-4]!         )
605 +
606 +20:    PLD(    pld     [src, #-96]             )
607 +       PLD(    pld     [src, #-128]            )
608 +21:            ldmdb   src!, {r3, r4, ip, lr}
609 +               subs    r2, r2, #32
610 +               stmdb   dst!, {r3, r4, ip, lr}
611 +               ldmdb   src!, {r3, r4, ip, lr}
612 +               stmgedb dst!, {r3, r4, ip, lr}
613 +               ldmgedb src!, {r3, r4, ip, lr}
614 +               stmgedb dst!, {r3, r4, ip, lr}
615 +               ldmgedb src!, {r3, r4, ip, lr}
616 +               subges  r2, r2, #32
617 +               stmdb   dst!, {r3, r4, ip, lr}
618 +               bge     20b
619 +       PLD(    cmn     r2, #96                 )
620 +       PLD(    bge     21b                     )
621 +       PLD(    add     r2, r2, #96             )
622 +               tst     r2, #31
623 +               ldmfd   sp!, {r5 - r8}
624 +               ldmeqfd sp!, {r4, pc}
625 +
626 +               tst     r2, #16
627 +22:            ldmnedb src!, {r3, r4, ip, lr}
628 +               stmnedb dst!, {r3, r4, ip, lr}
629 +
630 +               tst     r2, #8
631 +23:            ldmnedb src!, {r3, r4}
632 +               stmnedb dst!, {r3, r4}
633 +
634 +               tst     r2, #4
635 +24:            ldrne   r3, [src, #-4]!
636 +               strne   r3, [dst, #-4]!
637 +
638 +25:            ands    r2, r2, #3
639 +               ldmeqfd sp!, {dst, r4, pc}
640 +
641 +               cmp     r2, #2
642 +               ldrb    r3, [src, #-1]
643 +               ldrgeb  r4, [src, #-2]
644 +               ldrgtb  ip, [src, #-3]
645 +               strb    r3, [dst, #-1]
646 +               strgeb  r4, [dst, #-2]
647 +               strgtb  ip, [dst, #-3]
648 +               ldmfd   sp!, {dst, r4, pc}
649 +
650 +26:            cmp     ip, #2
651 +               ldrb    r3, [src, #-1]!
652 +               ldrgeb  r4, [src, #-1]!
653 +               ldrgtb  lr, [src, #-1]!
654 +               strb    r3, [dst, #-1]!
655 +               strgeb  r4, [dst, #-1]!
656 +               strgtb  lr, [dst, #-1]!
657 +               subs    r2, r2, ip
658 +               blt     25b
659 +               ands    ip, src, #3
660 +               beq     19b
661 +
662 +27:            bic     src, src, #3
663 +               cmp     ip, #2
664 +               ldr     r3, [src]
665 +               beq     35f
666 +               blt     36f
667 +
668 +
669 +               .macro  backward_copy_shift push pull
670 +
671 +               cmp     r2, #12
672 +       PLD(    pld     [src, #-4]              )
673 +               blt     33f
674 +               subs    r2, r2, #28
675 +               stmfd   sp!, {r5 - r9}
676 +               blt     31f
677 +
678 +       PLD(    subs    r2, r2, #96             )
679 +       PLD(    pld     [src, #-32]             )
680 +       PLD(    blt     30f                     )
681 +       PLD(    pld     [src, #-64]             )
682 +
683 +       PLD(    @ cache alignment               )
684 +       PLD(    ands    ip, src, #31            )
685 +       PLD(    pld     [src, #-96]             )
686 +       PLD(    beq     29f                     )
687 +       PLD(    cmp     r2, ip                  )
688 +       PLD(    pld     [src, #-128]            )
689 +       PLD(    blt     29f                     )
690 +       PLD(    sub     r2, r2, ip              )
691 +28:    PLD(    mov     r4, r3, push #\push     )
692 +       PLD(    ldr     r3, [src, #-4]!         )
693 +       PLD(    subs    ip, ip, #4              )
694 +       PLD(    orr     r4, r4, r3, pull #\pull )
695 +       PLD(    str     r4, [dst, #-4]!         )
696 +       PLD(    bgt     28b                     )
697 +
698 +29:    PLD(    pld     [src, #-128]            )
699 +30:            mov     lr, r3, push #\push
700 +               ldmdb   src!, {r3 - r9, ip}
701 +               subs    r2, r2, #32
702 +               orr     lr, lr, ip, pull #\pull
703 +               mov     ip, ip, push #\push
704 +               orr     ip, ip, r9, pull #\pull
705 +               mov     r9, r9, push #\push
706 +               orr     r9, r9, r8, pull #\pull
707 +               mov     r8, r8, push #\push
708 +               orr     r8, r8, r7, pull #\pull
709 +               mov     r7, r7, push #\push
710 +               orr     r7, r7, r6, pull #\pull
711 +               mov     r6, r6, push #\push
712 +               orr     r6, r6, r5, pull #\pull
713 +               mov     r5, r5, push #\push
714 +               orr     r5, r5, r4, pull #\pull
715 +               mov     r4, r4, push #\push
716 +               orr     r4, r4, r3, pull #\pull
717 +               stmdb   dst!, {r4 - r9, ip, lr}
718 +               bge     29b
719 +       PLD(    cmn     r2, #96                 )
720 +       PLD(    bge     30b                     )
721 +       PLD(    add     r2, r2, #96             )
722 +               cmn     r2, #16
723 +               blt     32f
724 +31:            mov     r7, r3, push #\push
725 +               ldmdb   src!, {r3 - r6}
726 +               sub     r2, r2, #16
727 +               orr     r7, r7, r6, pull #\pull
728 +               mov     r6, r6, push #\push
729 +               orr     r6, r6, r5, pull #\pull
730 +               mov     r5, r5, push #\push
731 +               orr     r5, r5, r4, pull #\pull
732 +               mov     r4, r4, push #\push
733 +               orr     r4, r4, r3, pull #\pull
734 +               stmdb   dst!, {r4 - r7}
735 +32:            adds    r2, r2, #28
736 +               ldmfd   sp!, {r5 - r9}
737 +               blt     34f
738 +33:            mov     r4, r3, push #\push
739 +               ldr     r3, [src, #-4]!
740 +               subs    r2, r2, #4
741 +               orr     r4, r4, r3, pull #\pull
742 +               str     r4, [dst, #-4]!
743 +               bge     33b
744 +34:
745 +               .endm
746 +
747 +
748 +               backward_copy_shift     push=8  pull=24
749 +               add     src, src, #3
750 +               b       25b
751 +
752 +35:            backward_copy_shift     push=16 pull=16
753 +               add     src, src, #2
754 +               b       25b
755 +
756 +36:            backward_copy_shift     push=24 pull=8
757 +               add     src, src, #1
758 +               b       25b
759 +
760 +               .size   bcopy, . - bcopy
761 +END(bcopy)