1 --- /dev/null 2004-02-02 20:32:13.000000000 +0000
2 +++ sysdeps/arm/memcpy.S 2004-03-20 13:25:27.000000000 +0000
5 + * Optimized memcpy implementation for ARM processors
7 + * Author: Nicolas Pitre
8 + * Created: Dec 23, 2003
9 + * Copyright: (C) MontaVista Software, Inc.
11 + * This file is free software; you can redistribute it and/or
12 + * modify it under the terms of the GNU Lesser General Public
13 + * License as published by the Free Software Foundation; either
14 + * version 2.1 of the License, or (at your option) any later version.
16 + * This file is distributed in the hope that it will be useful,
17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 + * Lesser General Public License for more details.
26 + * Endian independent macros for shifting bytes within registers.
37 + * Enable data preload for architectures that support it (ARMv5 and above)
39 +#if defined(__ARM_ARCH_5__) || \
40 + defined(__ARM_ARCH_5T__) || \
41 + defined(__ARM_ARCH_5TE__)
42 +#define PLD(code...) code
48 +/* char * memcpy (char *dst, const char *src) */
52 + stmfd sp!, {r0, r4, lr}
67 + PLD( subs r2, r2, #65 )
68 + stmfd sp!, {r5 - r8}
70 + PLD( pld [r1, #32] )
72 + PLD( @ cache alignment )
73 + PLD( ands ip, r1, #31 )
74 + PLD( pld [r1, #64] )
76 + PLD( rsb ip, ip, #32 )
78 + PLD( pld [r1, #96] )
81 + PLD( sub r2, r2, ip )
82 + PLD( ldmgeia r1!, {r3 - r6} )
83 + PLD( stmgeia r0!, {r3 - r6} )
85 + PLD( and ip, ip, #15 )
87 + PLD( ldr r3, [r1], #4 )
88 + PLD( ldrge r4, [r1], #4 )
89 + PLD( ldrgt r5, [r1], #4 )
90 + PLD( str r3, [r0], #4 )
91 + PLD( strge r4, [r0], #4 )
92 + PLD( strgt r5, [r0], #4 )
94 +2: PLD( pld [r1, #96] )
95 +3: ldmia r1!, {r3 - r8, ip, lr}
97 + stmia r0!, {r3 - r8, ip, lr}
101 + PLD( add r2, r2, #65 )
103 + ldmfd sp!, {r5 - r8}
104 + ldmeqfd sp!, {r0, r4, pc}
107 +4: ldmneia r1!, {r3, r4, ip, lr}
108 + stmneia r0!, {r3, r4, ip, lr}
111 +5: ldmneia r1!, {r3, r4}
112 + stmneia r0!, {r3, r4}
115 +6: ldrne r3, [r1], #4
119 + ldmeqfd sp!, {r0, r4, pc}
123 + ldrgeb r4, [r1], #1
126 + strgeb r4, [r0], #1
128 + ldmfd sp!, {r0, r4, pc}
133 + ldrgeb r4, [r1], #1
134 + ldrgtb lr, [r1], #1
136 + strgeb r4, [r0], #1
137 + strgtb lr, [r0], #1
150 + .macro forward_copy_shift pull push
153 + PLD( pld [r1, #0] )
156 + stmfd sp!, {r5 - r9}
159 + PLD( subs r2, r2, #97 )
161 + PLD( pld [r1, #32] )
163 + PLD( @ cache alignment )
164 + PLD( rsb ip, r1, #36 )
165 + PLD( pld [r1, #64] )
166 + PLD( ands ip, ip, #31 )
167 + PLD( pld [r1, #96] )
170 + PLD( pld [r1, #128] )
172 + PLD( sub r2, r2, ip )
173 +10: PLD( mov r3, lr, pull #\pull )
174 + PLD( ldr lr, [r1], #4 )
175 + PLD( subs ip, ip, #4 )
176 + PLD( orr r3, r3, lr, push #\push )
177 + PLD( str r3, [r0], #4 )
180 +11: PLD( pld [r1, #128] )
181 +12: mov r3, lr, pull #\pull
182 + ldmia r1!, {r4 - r9, ip, lr}
184 + orr r3, r3, r4, push #\push
185 + mov r4, r4, pull #\pull
186 + orr r4, r4, r5, push #\push
187 + mov r5, r5, pull #\pull
188 + orr r5, r5, r6, push #\push
189 + mov r6, r6, pull #\pull
190 + orr r6, r6, r7, push #\push
191 + mov r7, r7, pull #\pull
192 + orr r7, r7, r8, push #\push
193 + mov r8, r8, pull #\pull
194 + orr r8, r8, r9, push #\push
195 + mov r9, r9, pull #\pull
196 + orr r9, r9, ip, push #\push
197 + mov ip, ip, pull #\pull
198 + orr ip, ip, lr, push #\push
199 + stmia r0!, {r3 - r9, ip}
203 + PLD( add r2, r2, #97 )
206 +13: mov r3, lr, pull #\pull
207 + ldmia r1!, {r4 - r6, lr}
209 + orr r3, r3, r4, push #\push
210 + mov r4, r4, pull #\pull
211 + orr r4, r4, r5, push #\push
212 + mov r5, r5, pull #\pull
213 + orr r5, r5, r6, push #\push
214 + mov r6, r6, pull #\pull
215 + orr r6, r6, lr, push #\push
216 + stmia r0!, {r3 - r6}
217 +14: adds r2, r2, #28
218 + ldmfd sp!, {r5 - r9}
220 +15: mov r3, lr, pull #\pull
223 + orr r3, r3, lr, push #\push
230 + forward_copy_shift pull=8 push=24
234 +17: forward_copy_shift pull=16 push=16
238 +18: forward_copy_shift pull=24 push=8
242 + .size memcpy, . - memcpy
244 +libc_hidden_builtin_def (memcpy)
245 --- /dev/null 2004-02-02 20:32:13.000000000 +0000
246 +++ sysdeps/arm/memmove.S 2004-03-20 18:37:23.000000000 +0000
249 + * Optimized memmove implementation for ARM processors
251 + * Author: Nicolas Pitre
252 + * Created: Dec 23, 2003
253 + * Copyright: (C) MontaVista Software, Inc.
255 + * This file is free software; you can redistribute it and/or
256 + * modify it under the terms of the GNU Lesser General Public
257 + * License as published by the Free Software Foundation; either
258 + * version 2.1 of the License, or (at your option) any later version.
260 + * This file is distributed in the hope that it will be useful,
261 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
262 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
263 + * Lesser General Public License for more details.
270 + * Endian independent macros for shifting bytes within registers.
281 + * Enable data preload for architectures that support it (ARMv5 and above)
283 +#if defined(__ARM_ARCH_5__) || \
284 + defined(__ARM_ARCH_5T__) || \
285 + defined(__ARM_ARCH_5TE__)
286 +#define PLD(code...) code
288 +#define PLD(code...)
292 +/* char * memmove (char *dst, const char *src) */
298 + stmfd sp!, {r0, r4, lr}
304 + PLD( pld [r1, #-4] )
316 + PLD( pld [r1, #-32] )
317 + PLD( subs r2, r2, #96 )
318 + stmfd sp!, {r5 - r8}
321 + PLD( @ cache alignment )
322 + PLD( ands ip, r1, #31 )
323 + PLD( pld [r1, #-64] )
326 + PLD( pld [r1, #-96] )
329 + PLD( sub r2, r2, ip )
330 + PLD( ldmgedb r1!, {r3 - r6} )
331 + PLD( stmgedb r0!, {r3 - r6} )
333 + PLD( and ip, ip, #15 )
335 + PLD( ldr r3, [r1, #-4]! )
336 + PLD( ldrge r4, [r1, #-4]! )
337 + PLD( ldrgt r5, [r1, #-4]! )
338 + PLD( str r3, [r0, #-4]! )
339 + PLD( strge r4, [r0, #-4]! )
340 + PLD( strgt r5, [r0, #-4]! )
342 +20: PLD( pld [r1, #-96] )
343 + PLD( pld [r1, #-128] )
344 +21: ldmdb r1!, {r3, r4, ip, lr}
346 + stmdb r0!, {r3, r4, ip, lr}
347 + ldmdb r1!, {r3, r4, ip, lr}
348 + stmgedb r0!, {r3, r4, ip, lr}
349 + ldmgedb r1!, {r3, r4, ip, lr}
350 + stmgedb r0!, {r3, r4, ip, lr}
351 + ldmgedb r1!, {r3, r4, ip, lr}
353 + stmdb r0!, {r3, r4, ip, lr}
357 + PLD( add r2, r2, #96 )
359 + ldmfd sp!, {r5 - r8}
360 + ldmeqfd sp!, {r0, r4, pc}
363 +22: ldmnedb r1!, {r3, r4, ip, lr}
364 + stmnedb r0!, {r3, r4, ip, lr}
367 +23: ldmnedb r1!, {r3, r4}
368 + stmnedb r0!, {r3, r4}
371 +24: ldrne r3, [r1, #-4]!
372 + strne r3, [r0, #-4]!
375 + ldmeqfd sp!, {r0, r4, pc}
379 + ldrgeb r4, [r1, #-2]
380 + ldrgtb ip, [r1, #-3]
382 + strgeb r4, [r0, #-2]
383 + strgtb ip, [r0, #-3]
384 + ldmfd sp!, {r0, r4, pc}
387 + ldrb r3, [r1, #-1]!
388 + ldrgeb r4, [r1, #-1]!
389 + ldrgtb lr, [r1, #-1]!
390 + strb r3, [r0, #-1]!
391 + strgeb r4, [r0, #-1]!
392 + strgtb lr, [r0, #-1]!
405 + .macro backward_copy_shift push pull
408 + PLD( pld [r1, #-4] )
411 + stmfd sp!, {r5 - r9}
414 + PLD( subs r2, r2, #96 )
415 + PLD( pld [r1, #-32] )
417 + PLD( pld [r1, #-64] )
419 + PLD( @ cache alignment )
420 + PLD( ands ip, r1, #31 )
421 + PLD( pld [r1, #-96] )
424 + PLD( pld [r1, #-128] )
426 + PLD( sub r2, r2, ip )
427 +28: PLD( mov r4, r3, push #\push )
428 + PLD( ldr r3, [r1, #-4]! )
429 + PLD( subs ip, ip, #4 )
430 + PLD( orr r4, r4, r3, pull #\pull )
431 + PLD( str r4, [r0, #-4]! )
434 +29: PLD( pld [r1, #-128] )
435 +30: mov lr, r3, push #\push
436 + ldmdb r1!, {r3 - r9, ip}
438 + orr lr, lr, ip, pull #\pull
439 + mov ip, ip, push #\push
440 + orr ip, ip, r9, pull #\pull
441 + mov r9, r9, push #\push
442 + orr r9, r9, r8, pull #\pull
443 + mov r8, r8, push #\push
444 + orr r8, r8, r7, pull #\pull
445 + mov r7, r7, push #\push
446 + orr r7, r7, r6, pull #\pull
447 + mov r6, r6, push #\push
448 + orr r6, r6, r5, pull #\pull
449 + mov r5, r5, push #\push
450 + orr r5, r5, r4, pull #\pull
451 + mov r4, r4, push #\push
452 + orr r4, r4, r3, pull #\pull
453 + stmdb r0!, {r4 - r9, ip, lr}
457 + PLD( add r2, r2, #96 )
460 +31: mov r7, r3, push #\push
461 + ldmdb r1!, {r3 - r6}
463 + orr r7, r7, r6, pull #\pull
464 + mov r6, r6, push #\push
465 + orr r6, r6, r5, pull #\pull
466 + mov r5, r5, push #\push
467 + orr r5, r5, r4, pull #\pull
468 + mov r4, r4, push #\push
469 + orr r4, r4, r3, pull #\pull
470 + stmdb r0!, {r4 - r7}
471 +32: adds r2, r2, #28
472 + ldmfd sp!, {r5 - r9}
474 +33: mov r4, r3, push #\push
477 + orr r4, r4, r3, pull #\pull
484 + backward_copy_shift push=8 pull=24
488 +35: backward_copy_shift push=16 pull=16
492 +36: backward_copy_shift push=24 pull=8
496 + .size memmove, . - memmove
498 +libc_hidden_builtin_def (memmove)
499 --- /dev/null 2004-02-02 20:32:13.000000000 +0000
500 +++ sysdeps/arm/bcopy.S 2004-03-20 18:37:48.000000000 +0000
503 + * Optimized memmove implementation for ARM processors
505 + * Author: Nicolas Pitre
506 + * Created: Dec 23, 2003
507 + * Copyright: (C) MontaVista Software, Inc.
509 + * This file is free software; you can redistribute it and/or
510 + * modify it under the terms of the GNU Lesser General Public
511 + * License as published by the Free Software Foundation; either
512 + * version 2.1 of the License, or (at your option) any later version.
514 + * This file is distributed in the hope that it will be useful,
515 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
516 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
517 + * Lesser General Public License for more details.
524 + * Endian independent macros for shifting bytes within registers.
535 + * Enable data preload for architectures that support it (ARMv5 and above)
537 +#if defined(__ARM_ARCH_5__) || \
538 + defined(__ARM_ARCH_5T__) || \
539 + defined(__ARM_ARCH_5TE__)
540 +#define PLD(code...) code
542 +#define PLD(code...)
548 +/* void *bcopy (const char *src, char *dst, size_t size) */
557 + stmfd sp!, {r4, lr}
563 + PLD( pld [src, #-4] )
575 + PLD( pld [src, #-32] )
576 + PLD( subs r2, r2, #96 )
577 + stmfd sp!, {r5 - r8}
580 + PLD( @ cache alignment )
581 + PLD( ands ip, src, #31 )
582 + PLD( pld [src, #-64] )
585 + PLD( pld [src, #-96] )
588 + PLD( sub r2, r2, ip )
589 + PLD( ldmgedb src!, {r3 - r6} )
590 + PLD( stmgedb dst!, {r3 - r6} )
592 + PLD( and ip, ip, #15 )
594 + PLD( ldr r3, [src, #-4]! )
595 + PLD( ldrge r4, [src, #-4]! )
596 + PLD( ldrgt r5, [src, #-4]! )
597 + PLD( str r3, [dst, #-4]! )
598 + PLD( strge r4, [dst, #-4]! )
599 + PLD( strgt r5, [dst, #-4]! )
601 +20: PLD( pld [src, #-96] )
602 + PLD( pld [src, #-128] )
603 +21: ldmdb src!, {r3, r4, ip, lr}
605 + stmdb dst!, {r3, r4, ip, lr}
606 + ldmdb src!, {r3, r4, ip, lr}
607 + stmgedb dst!, {r3, r4, ip, lr}
608 + ldmgedb src!, {r3, r4, ip, lr}
609 + stmgedb dst!, {r3, r4, ip, lr}
610 + ldmgedb src!, {r3, r4, ip, lr}
612 + stmdb dst!, {r3, r4, ip, lr}
616 + PLD( add r2, r2, #96 )
618 + ldmfd sp!, {r5 - r8}
619 + ldmeqfd sp!, {r4, pc}
622 +22: ldmnedb src!, {r3, r4, ip, lr}
623 + stmnedb dst!, {r3, r4, ip, lr}
626 +23: ldmnedb src!, {r3, r4}
627 + stmnedb dst!, {r3, r4}
630 +24: ldrne r3, [src, #-4]!
631 + strne r3, [dst, #-4]!
634 + ldmeqfd sp!, {dst, r4, pc}
637 + ldrb r3, [src, #-1]
638 + ldrgeb r4, [src, #-2]
639 + ldrgtb ip, [src, #-3]
640 + strb r3, [dst, #-1]
641 + strgeb r4, [dst, #-2]
642 + strgtb ip, [dst, #-3]
643 + ldmfd sp!, {dst, r4, pc}
646 + ldrb r3, [src, #-1]!
647 + ldrgeb r4, [src, #-1]!
648 + ldrgtb lr, [src, #-1]!
649 + strb r3, [dst, #-1]!
650 + strgeb r4, [dst, #-1]!
651 + strgtb lr, [dst, #-1]!
657 +27: bic src, src, #3
664 + .macro backward_copy_shift push pull
667 + PLD( pld [src, #-4] )
670 + stmfd sp!, {r5 - r9}
673 + PLD( subs r2, r2, #96 )
674 + PLD( pld [src, #-32] )
676 + PLD( pld [src, #-64] )
678 + PLD( @ cache alignment )
679 + PLD( ands ip, src, #31 )
680 + PLD( pld [src, #-96] )
683 + PLD( pld [src, #-128] )
685 + PLD( sub r2, r2, ip )
686 +28: PLD( mov r4, r3, push #\push )
687 + PLD( ldr r3, [src, #-4]! )
688 + PLD( subs ip, ip, #4 )
689 + PLD( orr r4, r4, r3, pull #\pull )
690 + PLD( str r4, [dst, #-4]! )
693 +29: PLD( pld [src, #-128] )
694 +30: mov lr, r3, push #\push
695 + ldmdb src!, {r3 - r9, ip}
697 + orr lr, lr, ip, pull #\pull
698 + mov ip, ip, push #\push
699 + orr ip, ip, r9, pull #\pull
700 + mov r9, r9, push #\push
701 + orr r9, r9, r8, pull #\pull
702 + mov r8, r8, push #\push
703 + orr r8, r8, r7, pull #\pull
704 + mov r7, r7, push #\push
705 + orr r7, r7, r6, pull #\pull
706 + mov r6, r6, push #\push
707 + orr r6, r6, r5, pull #\pull
708 + mov r5, r5, push #\push
709 + orr r5, r5, r4, pull #\pull
710 + mov r4, r4, push #\push
711 + orr r4, r4, r3, pull #\pull
712 + stmdb dst!, {r4 - r9, ip, lr}
716 + PLD( add r2, r2, #96 )
719 +31: mov r7, r3, push #\push
720 + ldmdb src!, {r3 - r6}
722 + orr r7, r7, r6, pull #\pull
723 + mov r6, r6, push #\push
724 + orr r6, r6, r5, pull #\pull
725 + mov r5, r5, push #\push
726 + orr r5, r5, r4, pull #\pull
727 + mov r4, r4, push #\push
728 + orr r4, r4, r3, pull #\pull
729 + stmdb dst!, {r4 - r7}
730 +32: adds r2, r2, #28
731 + ldmfd sp!, {r5 - r9}
733 +33: mov r4, r3, push #\push
734 + ldr r3, [src, #-4]!
736 + orr r4, r4, r3, pull #\pull
737 + str r4, [dst, #-4]!
743 + backward_copy_shift push=8 pull=24
747 +35: backward_copy_shift push=16 pull=16
751 +36: backward_copy_shift push=24 pull=8
755 + .size bcopy, . - bcopy