1 --- /dev/null 2004-02-02 20:32:13.000000000 +0000
2 +++ sysdeps/arm/memmove.S 2004-03-20 18:37:23.000000000 +0000
5 + * Optimized memmove implementation for ARM processors
7 + * Author: Nicolas Pitre
8 + * Created: Dec 23, 2003
9 + * Copyright: (C) MontaVista Software, Inc.
11 + * This file is free software; you can redistribute it and/or
12 + * modify it under the terms of the GNU Lesser General Public
13 + * License as published by the Free Software Foundation; either
14 + * version 2.1 of the License, or (at your option) any later version.
16 + * This file is distributed in the hope that it will be useful,
17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 + * Lesser General Public License for more details.
26 + * Endian independent macros for shifting bytes within registers.
37 + * Enable data preload for architectures that support it (ARMv5 and above)
39 +#if defined(__ARM_ARCH_5__) || \
40 + defined(__ARM_ARCH_5T__) || \
41 + defined(__ARM_ARCH_5TE__)
42 +#define PLD(code...) code
48 +/* char * memmove (char *dst, const char *src) */
54 + stmfd sp!, {r0, r4, lr}
60 + PLD( pld [r1, #-4] )
72 + PLD( pld [r1, #-32] )
73 + PLD( subs r2, r2, #96 )
74 + stmfd sp!, {r5 - r8}
77 + PLD( @ cache alignment )
78 + PLD( ands ip, r1, #31 )
79 + PLD( pld [r1, #-64] )
82 + PLD( pld [r1, #-96] )
85 + PLD( sub r2, r2, ip )
86 + PLD( ldmgedb r1!, {r3 - r6} )
87 + PLD( stmgedb r0!, {r3 - r6} )
89 + PLD( and ip, ip, #15 )
91 + PLD( ldr r3, [r1, #-4]! )
92 + PLD( ldrge r4, [r1, #-4]! )
93 + PLD( ldrgt r5, [r1, #-4]! )
94 + PLD( str r3, [r0, #-4]! )
95 + PLD( strge r4, [r0, #-4]! )
96 + PLD( strgt r5, [r0, #-4]! )
98 +20: PLD( pld [r1, #-96] )
99 + PLD( pld [r1, #-128] )
100 +21: ldmdb r1!, {r3, r4, ip, lr}
102 + stmdb r0!, {r3, r4, ip, lr}
103 + ldmdb r1!, {r3, r4, ip, lr}
104 + stmgedb r0!, {r3, r4, ip, lr}
105 + ldmgedb r1!, {r3, r4, ip, lr}
106 + stmgedb r0!, {r3, r4, ip, lr}
107 + ldmgedb r1!, {r3, r4, ip, lr}
109 + stmdb r0!, {r3, r4, ip, lr}
113 + PLD( add r2, r2, #96 )
115 + ldmfd sp!, {r5 - r8}
116 + ldmeqfd sp!, {r0, r4, pc}
119 +22: ldmnedb r1!, {r3, r4, ip, lr}
120 + stmnedb r0!, {r3, r4, ip, lr}
123 +23: ldmnedb r1!, {r3, r4}
124 + stmnedb r0!, {r3, r4}
127 +24: ldrne r3, [r1, #-4]!
128 + strne r3, [r0, #-4]!
131 + ldmeqfd sp!, {r0, r4, pc}
135 + ldrgeb r4, [r1, #-2]
136 + ldrgtb ip, [r1, #-3]
138 + strgeb r4, [r0, #-2]
139 + strgtb ip, [r0, #-3]
140 + ldmfd sp!, {r0, r4, pc}
143 + ldrb r3, [r1, #-1]!
144 + ldrgeb r4, [r1, #-1]!
145 + ldrgtb lr, [r1, #-1]!
146 + strb r3, [r0, #-1]!
147 + strgeb r4, [r0, #-1]!
148 + strgtb lr, [r0, #-1]!
161 + .macro backward_copy_shift push pull
164 + PLD( pld [r1, #-4] )
167 + stmfd sp!, {r5 - r9}
170 + PLD( subs r2, r2, #96 )
171 + PLD( pld [r1, #-32] )
173 + PLD( pld [r1, #-64] )
175 + PLD( @ cache alignment )
176 + PLD( ands ip, r1, #31 )
177 + PLD( pld [r1, #-96] )
180 + PLD( pld [r1, #-128] )
182 + PLD( sub r2, r2, ip )
183 +28: PLD( mov r4, r3, push #\push )
184 + PLD( ldr r3, [r1, #-4]! )
185 + PLD( subs ip, ip, #4 )
186 + PLD( orr r4, r4, r3, pull #\pull )
187 + PLD( str r4, [r0, #-4]! )
190 +29: PLD( pld [r1, #-128] )
191 +30: mov lr, r3, push #\push
192 + ldmdb r1!, {r3 - r9, ip}
194 + orr lr, lr, ip, pull #\pull
195 + mov ip, ip, push #\push
196 + orr ip, ip, r9, pull #\pull
197 + mov r9, r9, push #\push
198 + orr r9, r9, r8, pull #\pull
199 + mov r8, r8, push #\push
200 + orr r8, r8, r7, pull #\pull
201 + mov r7, r7, push #\push
202 + orr r7, r7, r6, pull #\pull
203 + mov r6, r6, push #\push
204 + orr r6, r6, r5, pull #\pull
205 + mov r5, r5, push #\push
206 + orr r5, r5, r4, pull #\pull
207 + mov r4, r4, push #\push
208 + orr r4, r4, r3, pull #\pull
209 + stmdb r0!, {r4 - r9, ip, lr}
213 + PLD( add r2, r2, #96 )
216 +31: mov r7, r3, push #\push
217 + ldmdb r1!, {r3 - r6}
219 + orr r7, r7, r6, pull #\pull
220 + mov r6, r6, push #\push
221 + orr r6, r6, r5, pull #\pull
222 + mov r5, r5, push #\push
223 + orr r5, r5, r4, pull #\pull
224 + mov r4, r4, push #\push
225 + orr r4, r4, r3, pull #\pull
226 + stmdb r0!, {r4 - r7}
227 +32: adds r2, r2, #28
228 + ldmfd sp!, {r5 - r9}
230 +33: mov r4, r3, push #\push
233 + orr r4, r4, r3, pull #\pull
240 + backward_copy_shift push=8 pull=24
244 +35: backward_copy_shift push=16 pull=16
248 +36: backward_copy_shift push=24 pull=8
252 + .size memmove, . - memmove
254 +libc_hidden_builtin_def (memmove)
255 --- /dev/null 2004-02-02 20:32:13.000000000 +0000
256 +++ sysdeps/arm/bcopy.S 2004-03-20 18:37:48.000000000 +0000
259 + * Optimized memmove implementation for ARM processors
261 + * Author: Nicolas Pitre
262 + * Created: Dec 23, 2003
263 + * Copyright: (C) MontaVista Software, Inc.
265 + * This file is free software; you can redistribute it and/or
266 + * modify it under the terms of the GNU Lesser General Public
267 + * License as published by the Free Software Foundation; either
268 + * version 2.1 of the License, or (at your option) any later version.
270 + * This file is distributed in the hope that it will be useful,
271 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
272 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
273 + * Lesser General Public License for more details.
280 + * Endian independent macros for shifting bytes within registers.
291 + * Enable data preload for architectures that support it (ARMv5 and above)
293 +#if defined(__ARM_ARCH_5__) || \
294 + defined(__ARM_ARCH_5T__) || \
295 + defined(__ARM_ARCH_5TE__)
296 +#define PLD(code...) code
298 +#define PLD(code...)
304 +/* void *bcopy (const char *src, char *dst, size_t size) */
313 + stmfd sp!, {r4, lr}
319 + PLD( pld [src, #-4] )
331 + PLD( pld [src, #-32] )
332 + PLD( subs r2, r2, #96 )
333 + stmfd sp!, {r5 - r8}
336 + PLD( @ cache alignment )
337 + PLD( ands ip, src, #31 )
338 + PLD( pld [src, #-64] )
341 + PLD( pld [src, #-96] )
344 + PLD( sub r2, r2, ip )
345 + PLD( ldmgedb src!, {r3 - r6} )
346 + PLD( stmgedb dst!, {r3 - r6} )
348 + PLD( and ip, ip, #15 )
350 + PLD( ldr r3, [src, #-4]! )
351 + PLD( ldrge r4, [src, #-4]! )
352 + PLD( ldrgt r5, [src, #-4]! )
353 + PLD( str r3, [dst, #-4]! )
354 + PLD( strge r4, [dst, #-4]! )
355 + PLD( strgt r5, [dst, #-4]! )
357 +20: PLD( pld [src, #-96] )
358 + PLD( pld [src, #-128] )
359 +21: ldmdb src!, {r3, r4, ip, lr}
361 + stmdb dst!, {r3, r4, ip, lr}
362 + ldmdb src!, {r3, r4, ip, lr}
363 + stmgedb dst!, {r3, r4, ip, lr}
364 + ldmgedb src!, {r3, r4, ip, lr}
365 + stmgedb dst!, {r3, r4, ip, lr}
366 + ldmgedb src!, {r3, r4, ip, lr}
368 + stmdb dst!, {r3, r4, ip, lr}
372 + PLD( add r2, r2, #96 )
374 + ldmfd sp!, {r5 - r8}
375 + ldmeqfd sp!, {r4, pc}
378 +22: ldmnedb src!, {r3, r4, ip, lr}
379 + stmnedb dst!, {r3, r4, ip, lr}
382 +23: ldmnedb src!, {r3, r4}
383 + stmnedb dst!, {r3, r4}
386 +24: ldrne r3, [src, #-4]!
387 + strne r3, [dst, #-4]!
390 + ldmeqfd sp!, {dst, r4, pc}
393 + ldrb r3, [src, #-1]
394 + ldrgeb r4, [src, #-2]
395 + ldrgtb ip, [src, #-3]
396 + strb r3, [dst, #-1]
397 + strgeb r4, [dst, #-2]
398 + strgtb ip, [dst, #-3]
399 + ldmfd sp!, {dst, r4, pc}
402 + ldrb r3, [src, #-1]!
403 + ldrgeb r4, [src, #-1]!
404 + ldrgtb lr, [src, #-1]!
405 + strb r3, [dst, #-1]!
406 + strgeb r4, [dst, #-1]!
407 + strgtb lr, [dst, #-1]!
413 +27: bic src, src, #3
420 + .macro backward_copy_shift push pull
423 + PLD( pld [src, #-4] )
426 + stmfd sp!, {r5 - r9}
429 + PLD( subs r2, r2, #96 )
430 + PLD( pld [src, #-32] )
432 + PLD( pld [src, #-64] )
434 + PLD( @ cache alignment )
435 + PLD( ands ip, src, #31 )
436 + PLD( pld [src, #-96] )
439 + PLD( pld [src, #-128] )
441 + PLD( sub r2, r2, ip )
442 +28: PLD( mov r4, r3, push #\push )
443 + PLD( ldr r3, [src, #-4]! )
444 + PLD( subs ip, ip, #4 )
445 + PLD( orr r4, r4, r3, pull #\pull )
446 + PLD( str r4, [dst, #-4]! )
449 +29: PLD( pld [src, #-128] )
450 +30: mov lr, r3, push #\push
451 + ldmdb src!, {r3 - r9, ip}
453 + orr lr, lr, ip, pull #\pull
454 + mov ip, ip, push #\push
455 + orr ip, ip, r9, pull #\pull
456 + mov r9, r9, push #\push
457 + orr r9, r9, r8, pull #\pull
458 + mov r8, r8, push #\push
459 + orr r8, r8, r7, pull #\pull
460 + mov r7, r7, push #\push
461 + orr r7, r7, r6, pull #\pull
462 + mov r6, r6, push #\push
463 + orr r6, r6, r5, pull #\pull
464 + mov r5, r5, push #\push
465 + orr r5, r5, r4, pull #\pull
466 + mov r4, r4, push #\push
467 + orr r4, r4, r3, pull #\pull
468 + stmdb dst!, {r4 - r9, ip, lr}
472 + PLD( add r2, r2, #96 )
475 +31: mov r7, r3, push #\push
476 + ldmdb src!, {r3 - r6}
478 + orr r7, r7, r6, pull #\pull
479 + mov r6, r6, push #\push
480 + orr r6, r6, r5, pull #\pull
481 + mov r5, r5, push #\push
482 + orr r5, r5, r4, pull #\pull
483 + mov r4, r4, push #\push
484 + orr r4, r4, r3, pull #\pull
485 + stmdb dst!, {r4 - r7}
486 +32: adds r2, r2, #28
487 + ldmfd sp!, {r5 - r9}
489 +33: mov r4, r3, push #\push
490 + ldr r3, [src, #-4]!
492 + orr r4, r4, r3, pull #\pull
493 + str r4, [dst, #-4]!
499 + backward_copy_shift push=8 pull=24
503 +35: backward_copy_shift push=16 pull=16
507 +36: backward_copy_shift push=24 pull=8
511 + .size bcopy, . - bcopy
514 --- /dev/null 2004-02-02 20:32:13.000000000 +0000
515 +++ sysdeps/arm/memcpy.S 2004-05-02 14:33:22.000000000 +0100
518 + * Optimized memcpy implementation for ARM processors
520 + * Author: Nicolas Pitre
521 + * Created: Dec 23, 2003
522 + * Copyright: (C) MontaVista Software, Inc.
524 + * This file is free software; you can redistribute it and/or
525 + * modify it under the terms of the GNU Lesser General Public
526 + * License as published by the Free Software Foundation; either
527 + * version 2.1 of the License, or (at your option) any later version.
529 + * This file is distributed in the hope that it will be useful,
530 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
531 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
532 + * Lesser General Public License for more details.
539 + * Endian independent macros for shifting bytes within registers.
550 + * Enable data preload for architectures that support it (ARMv5 and above)
552 +#if defined(__ARM_ARCH_5__) || \
553 + defined(__ARM_ARCH_5T__) || \
554 + defined(__ARM_ARCH_5TE__)
555 +#define PLD(code...) code
557 +#define PLD(code...)
561 +/* char * memcpy (char *dst, const char *src) */
565 + stmfd sp!, {r0, r4, lr}
568 + PLD( pld [r1, #0] )
580 + PLD( subs r2, r2, #65 )
581 + stmfd sp!, {r5 - r8}
583 + PLD( pld [r1, #32] )
585 + PLD( @ cache alignment )
586 + PLD( ands ip, r1, #31 )
587 + PLD( pld [r1, #64] )
589 + PLD( rsb ip, ip, #32 )
591 + PLD( pld [r1, #96] )
594 + PLD( sub r2, r2, ip )
595 + PLD( ldmgeia r1!, {r3 - r6} )
596 + PLD( stmgeia r0!, {r3 - r6} )
598 + PLD( and ip, ip, #15 )
600 + PLD( ldr r3, [r1], #4 )
601 + PLD( ldrge r4, [r1], #4 )
602 + PLD( ldrgt r5, [r1], #4 )
603 + PLD( str r3, [r0], #4 )
604 + PLD( strge r4, [r0], #4 )
605 + PLD( strgt r5, [r0], #4 )
607 +2: PLD( pld [r1, #96] )
608 +3: ldmia r1!, {r3 - r8, ip, lr}
610 + stmia r0!, {r3 - r8, ip, lr}
614 + PLD( add r2, r2, #65 )
616 + ldmfd sp!, {r5 - r8}
617 + ldmeqfd sp!, {r0, r4, pc}
620 +4: ldmneia r1!, {r3, r4, ip, lr}
621 + stmneia r0!, {r3, r4, ip, lr}
624 +5: ldmneia r1!, {r3, r4}
625 + stmneia r0!, {r3, r4}
628 +6: ldrne r3, [r1], #4
632 + ldmeqfd sp!, {r0, r4, pc}
636 + ldrgeb r4, [r1], #1
639 + strgeb r4, [r0], #1
641 + ldmfd sp!, {r0, r4, pc}
646 + ldrgeb r4, [r1], #1
647 + ldrgtb lr, [r1], #1
649 + strgeb r4, [r0], #1
650 + strgtb lr, [r0], #1
663 + .macro forward_copy_shift pull push
666 + PLD( pld [r1, #0] )
669 + stmfd sp!, {r5 - r9}
672 + PLD( subs r2, r2, #97 )
674 + PLD( pld [r1, #32] )
676 + PLD( @ cache alignment )
677 + PLD( rsb ip, r1, #36 )
678 + PLD( pld [r1, #64] )
679 + PLD( ands ip, ip, #31 )
680 + PLD( pld [r1, #96] )
683 + PLD( pld [r1, #128] )
685 + PLD( sub r2, r2, ip )
686 +10: PLD( mov r3, lr, pull #\pull )
687 + PLD( ldr lr, [r1], #4 )
688 + PLD( subs ip, ip, #4 )
689 + PLD( orr r3, r3, lr, push #\push )
690 + PLD( str r3, [r0], #4 )
693 +11: PLD( pld [r1, #128] )
694 +12: mov r3, lr, pull #\pull
695 + ldmia r1!, {r4 - r9, ip, lr}
697 + orr r3, r3, r4, push #\push
698 + mov r4, r4, pull #\pull
699 + orr r4, r4, r5, push #\push
700 + mov r5, r5, pull #\pull
701 + orr r5, r5, r6, push #\push
702 + mov r6, r6, pull #\pull
703 + orr r6, r6, r7, push #\push
704 + mov r7, r7, pull #\pull
705 + orr r7, r7, r8, push #\push
706 + mov r8, r8, pull #\pull
707 + orr r8, r8, r9, push #\push
708 + mov r9, r9, pull #\pull
709 + orr r9, r9, ip, push #\push
710 + mov ip, ip, pull #\pull
711 + orr ip, ip, lr, push #\push
712 + stmia r0!, {r3 - r9, ip}
716 + PLD( add r2, r2, #97 )
719 +13: mov r3, lr, pull #\pull
720 + ldmia r1!, {r4 - r6, lr}
722 + orr r3, r3, r4, push #\push
723 + mov r4, r4, pull #\pull
724 + orr r4, r4, r5, push #\push
725 + mov r5, r5, pull #\pull
726 + orr r5, r5, r6, push #\push
727 + mov r6, r6, pull #\pull
728 + orr r6, r6, lr, push #\push
729 + stmia r0!, {r3 - r6}
730 +14: adds r2, r2, #28
731 + ldmfd sp!, {r5 - r9}
733 +15: mov r3, lr, pull #\pull
736 + orr r3, r3, lr, push #\push
743 + forward_copy_shift pull=8 push=24
747 +17: forward_copy_shift pull=16 push=16
751 +18: forward_copy_shift pull=24 push=8
755 + .size memcpy, . - memcpy
757 +libc_hidden_builtin_def (memcpy)