|
Added
Link Here
|
| 1 |
/* |
| 2 |
* Optimized memmove implementation for ARM processors |
| 3 |
* |
| 4 |
* Author: Nicolas Pitre |
| 5 |
* Created: Dec 23, 2003 |
| 6 |
* Copyright: (C) MontaVista Software, Inc. |
| 7 |
* |
| 8 |
* This file is free software; you can redistribute it and/or |
| 9 |
* modify it under the terms of the GNU Lesser General Public |
| 10 |
* License as published by the Free Software Foundation; either |
| 11 |
* version 2.1 of the License, or (at your option) any later version. |
| 12 |
* |
| 13 |
* This file is distributed in the hope that it will be useful, |
| 14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 |
* Lesser General Public License for more details. |
| 17 |
*/ |
| 18 |
|
| 19 |
#include <sysdep.h> |
| 20 |
|
| 21 |
|
| 22 |
/* |
| 23 |
* Endian independent macros for shifting bytes within registers. |
| 24 |
*/ |
| 25 |
#ifndef __ARMEB__ |
| 26 |
#define pull lsr |
| 27 |
#define push lsl |
| 28 |
#else |
| 29 |
#define pull lsl |
| 30 |
#define push lsr |
| 31 |
#endif |
| 32 |
|
| 33 |
/* |
| 34 |
* Enable data preload for architectures that support it (ARMv5 and above) |
| 35 |
*/ |
| 36 |
#if defined(__ARM_ARCH_5__) || \ |
| 37 |
defined(__ARM_ARCH_5T__) || \ |
| 38 |
defined(__ARM_ARCH_5TE__) |
| 39 |
#define PLD(code...) code |
| 40 |
#else |
| 41 |
#define PLD(code...) |
| 42 |
#endif |
| 43 |
|
| 44 |
|
| 45 |
/* char * memmove (char *dst, const char *src) */ |
| 46 |
ENTRY(memmove) |
| 47 |
subs ip, r0, r1 |
| 48 |
cmphi r2, ip |
| 49 |
bls memcpy(PLT) |
| 50 |
|
| 51 |
stmfd sp!, {r0, r4, lr} |
| 52 |
add r1, r1, r2 |
| 53 |
add r0, r0, r2 |
| 54 |
subs r2, r2, #4 |
| 55 |
blt 25f |
| 56 |
ands ip, r0, #3 |
| 57 |
PLD( pld [r1, #-4] ) |
| 58 |
bne 26f |
| 59 |
ands ip, r1, #3 |
| 60 |
bne 27f |
| 61 |
|
| 62 |
19: subs r2, r2, #4 |
| 63 |
blt 24f |
| 64 |
subs r2, r2, #8 |
| 65 |
blt 23f |
| 66 |
subs r2, r2, #16 |
| 67 |
blt 22f |
| 68 |
|
| 69 |
PLD( pld [r1, #-32] ) |
| 70 |
PLD( subs r2, r2, #96 ) |
| 71 |
stmfd sp!, {r5 - r8} |
| 72 |
PLD( blt 21f ) |
| 73 |
|
| 74 |
PLD( @ cache alignment ) |
| 75 |
PLD( ands ip, r1, #31 ) |
| 76 |
PLD( pld [r1, #-64] ) |
| 77 |
PLD( beq 20f ) |
| 78 |
PLD( cmp r2, ip ) |
| 79 |
PLD( pld [r1, #-96] ) |
| 80 |
PLD( blt 20f ) |
| 81 |
PLD( cmp ip, #16 ) |
| 82 |
PLD( sub r2, r2, ip ) |
| 83 |
PLD( ldmgedb r1!, {r3 - r6} ) |
| 84 |
PLD( stmgedb r0!, {r3 - r6} ) |
| 85 |
PLD( beq 20f ) |
| 86 |
PLD( and ip, ip, #15 ) |
| 87 |
PLD( cmp ip, #8 ) |
| 88 |
PLD( ldr r3, [r1, #-4]! ) |
| 89 |
PLD( ldrge r4, [r1, #-4]! ) |
| 90 |
PLD( ldrgt r5, [r1, #-4]! ) |
| 91 |
PLD( str r3, [r0, #-4]! ) |
| 92 |
PLD( strge r4, [r0, #-4]! ) |
| 93 |
PLD( strgt r5, [r0, #-4]! ) |
| 94 |
|
| 95 |
20: PLD( pld [r1, #-96] ) |
| 96 |
PLD( pld [r1, #-128] ) |
| 97 |
21: ldmdb r1!, {r3, r4, ip, lr} |
| 98 |
subs r2, r2, #32 |
| 99 |
stmdb r0!, {r3, r4, ip, lr} |
| 100 |
ldmdb r1!, {r3, r4, ip, lr} |
| 101 |
stmgedb r0!, {r3, r4, ip, lr} |
| 102 |
ldmgedb r1!, {r3, r4, ip, lr} |
| 103 |
stmgedb r0!, {r3, r4, ip, lr} |
| 104 |
ldmgedb r1!, {r3, r4, ip, lr} |
| 105 |
subges r2, r2, #32 |
| 106 |
stmdb r0!, {r3, r4, ip, lr} |
| 107 |
bge 20b |
| 108 |
PLD( cmn r2, #96 ) |
| 109 |
PLD( bge 21b ) |
| 110 |
PLD( add r2, r2, #96 ) |
| 111 |
tst r2, #31 |
| 112 |
ldmfd sp!, {r5 - r8} |
| 113 |
ldmeqfd sp!, {r0, r4, pc} |
| 114 |
|
| 115 |
tst r2, #16 |
| 116 |
22: ldmnedb r1!, {r3, r4, ip, lr} |
| 117 |
stmnedb r0!, {r3, r4, ip, lr} |
| 118 |
|
| 119 |
tst r2, #8 |
| 120 |
23: ldmnedb r1!, {r3, r4} |
| 121 |
stmnedb r0!, {r3, r4} |
| 122 |
|
| 123 |
tst r2, #4 |
| 124 |
24: ldrne r3, [r1, #-4]! |
| 125 |
strne r3, [r0, #-4]! |
| 126 |
|
| 127 |
25: ands r2, r2, #3 |
| 128 |
ldmeqfd sp!, {r0, r4, pc} |
| 129 |
|
| 130 |
cmp r2, #2 |
| 131 |
ldrb r3, [r1, #-1] |
| 132 |
ldrgeb r4, [r1, #-2] |
| 133 |
ldrgtb ip, [r1, #-3] |
| 134 |
strb r3, [r0, #-1] |
| 135 |
strgeb r4, [r0, #-2] |
| 136 |
strgtb ip, [r0, #-3] |
| 137 |
ldmfd sp!, {r0, r4, pc} |
| 138 |
|
| 139 |
26: cmp ip, #2 |
| 140 |
ldrb r3, [r1, #-1]! |
| 141 |
ldrgeb r4, [r1, #-1]! |
| 142 |
ldrgtb lr, [r1, #-1]! |
| 143 |
strb r3, [r0, #-1]! |
| 144 |
strgeb r4, [r0, #-1]! |
| 145 |
strgtb lr, [r0, #-1]! |
| 146 |
subs r2, r2, ip |
| 147 |
blt 25b |
| 148 |
ands ip, r1, #3 |
| 149 |
beq 19b |
| 150 |
|
| 151 |
27: bic r1, r1, #3 |
| 152 |
cmp ip, #2 |
| 153 |
ldr r3, [r1] |
| 154 |
beq 35f |
| 155 |
blt 36f |
| 156 |
|
| 157 |
|
| 158 |
.macro backward_copy_shift push pull |
| 159 |
|
| 160 |
cmp r2, #12 |
| 161 |
PLD( pld [r1, #-4] ) |
| 162 |
blt 33f |
| 163 |
subs r2, r2, #28 |
| 164 |
stmfd sp!, {r5 - r9} |
| 165 |
blt 31f |
| 166 |
|
| 167 |
PLD( subs r2, r2, #96 ) |
| 168 |
PLD( pld [r1, #-32] ) |
| 169 |
PLD( blt 30f ) |
| 170 |
PLD( pld [r1, #-64] ) |
| 171 |
|
| 172 |
PLD( @ cache alignment ) |
| 173 |
PLD( ands ip, r1, #31 ) |
| 174 |
PLD( pld [r1, #-96] ) |
| 175 |
PLD( beq 29f ) |
| 176 |
PLD( cmp r2, ip ) |
| 177 |
PLD( pld [r1, #-128] ) |
| 178 |
PLD( blt 29f ) |
| 179 |
PLD( sub r2, r2, ip ) |
| 180 |
28: PLD( mov r4, r3, push #\push ) |
| 181 |
PLD( ldr r3, [r1, #-4]! ) |
| 182 |
PLD( subs ip, ip, #4 ) |
| 183 |
PLD( orr r4, r4, r3, pull #\pull ) |
| 184 |
PLD( str r4, [r0, #-4]! ) |
| 185 |
PLD( bgt 28b ) |
| 186 |
|
| 187 |
29: PLD( pld [r1, #-128] ) |
| 188 |
30: mov lr, r3, push #\push |
| 189 |
ldmdb r1!, {r3 - r9, ip} |
| 190 |
subs r2, r2, #32 |
| 191 |
orr lr, lr, ip, pull #\pull |
| 192 |
mov ip, ip, push #\push |
| 193 |
orr ip, ip, r9, pull #\pull |
| 194 |
mov r9, r9, push #\push |
| 195 |
orr r9, r9, r8, pull #\pull |
| 196 |
mov r8, r8, push #\push |
| 197 |
orr r8, r8, r7, pull #\pull |
| 198 |
mov r7, r7, push #\push |
| 199 |
orr r7, r7, r6, pull #\pull |
| 200 |
mov r6, r6, push #\push |
| 201 |
orr r6, r6, r5, pull #\pull |
| 202 |
mov r5, r5, push #\push |
| 203 |
orr r5, r5, r4, pull #\pull |
| 204 |
mov r4, r4, push #\push |
| 205 |
orr r4, r4, r3, pull #\pull |
| 206 |
stmdb r0!, {r4 - r9, ip, lr} |
| 207 |
bge 29b |
| 208 |
PLD( cmn r2, #96 ) |
| 209 |
PLD( bge 30b ) |
| 210 |
PLD( add r2, r2, #96 ) |
| 211 |
cmn r2, #16 |
| 212 |
blt 32f |
| 213 |
31: mov r7, r3, push #\push |
| 214 |
ldmdb r1!, {r3 - r6} |
| 215 |
sub r2, r2, #16 |
| 216 |
orr r7, r7, r6, pull #\pull |
| 217 |
mov r6, r6, push #\push |
| 218 |
orr r6, r6, r5, pull #\pull |
| 219 |
mov r5, r5, push #\push |
| 220 |
orr r5, r5, r4, pull #\pull |
| 221 |
mov r4, r4, push #\push |
| 222 |
orr r4, r4, r3, pull #\pull |
| 223 |
stmdb r0!, {r4 - r7} |
| 224 |
32: adds r2, r2, #28 |
| 225 |
ldmfd sp!, {r5 - r9} |
| 226 |
blt 34f |
| 227 |
33: mov r4, r3, push #\push |
| 228 |
ldr r3, [r1, #-4]! |
| 229 |
subs r2, r2, #4 |
| 230 |
orr r4, r4, r3, pull #\pull |
| 231 |
str r4, [r0, #-4]! |
| 232 |
bge 33b |
| 233 |
34: |
| 234 |
.endm |
| 235 |
|
| 236 |
|
| 237 |
backward_copy_shift push=8 pull=24 |
| 238 |
add r1, r1, #3 |
| 239 |
b 25b |
| 240 |
|
| 241 |
35: backward_copy_shift push=16 pull=16 |
| 242 |
add r1, r1, #2 |
| 243 |
b 25b |
| 244 |
|
| 245 |
36: backward_copy_shift push=24 pull=8 |
| 246 |
add r1, r1, #1 |
| 247 |
b 25b |
| 248 |
|
| 249 |
.size memmove, . - memmove |
| 250 |
END(memmove) |
| 251 |
libc_hidden_builtin_def (memmove) |