Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 203813 | Differences between
and this patch

Collapse All | Expand All

(-) (+251 lines)
Added Link Here
1
/*
2
 *   Optimized memmove implementation for ARM processors
3
 *
4
 *	Author: 	Nicolas Pitre
5
 *	Created:	Dec 23, 2003
6
 *	Copyright:	(C) MontaVista Software, Inc.
7
 *
8
 *   This file is free software; you can redistribute it and/or
9
 *   modify it under the terms of the GNU Lesser General Public
10
 *   License as published by the Free Software Foundation; either
11
 *   version 2.1 of the License, or (at your option) any later version.
12
 *
13
 *   This file is distributed in the hope that it will be useful,
14
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 *   Lesser General Public License for more details.
17
 */
18
19
#include <sysdep.h>
20
21
22
/*
23
 * Endian independent macros for shifting bytes within registers.
24
 */
25
#ifndef __ARMEB__
26
#define pull            lsr
27
#define push            lsl
28
#else
29
#define pull            lsl
30
#define push            lsr
31
#endif
32
33
/*
34
 * Enable data preload for architectures that support it (ARMv5 and above)
35
 */
36
#if defined(__ARM_ARCH_5__) || \
37
    defined(__ARM_ARCH_5T__) || \
38
    defined(__ARM_ARCH_5TE__)
39
#define PLD(code...)	code
40
#else
41
#define PLD(code...)
42
#endif
43
44
45
/* char * memmove (char *dst, const char *src) */
46
ENTRY(memmove)
47
		subs	ip, r0, r1
48
		cmphi	r2, ip
49
		bls	memcpy(PLT)
50
51
		stmfd	sp!, {r0, r4, lr}
52
		add	r1, r1, r2
53
		add	r0, r0, r2
54
		subs	r2, r2, #4
55
		blt	25f
56
		ands	ip, r0, #3
57
	PLD(	pld	[r1, #-4]		)
58
		bne	26f
59
		ands	ip, r1, #3
60
		bne	27f
61
62
19:		subs	r2, r2, #4
63
		blt	24f
64
		subs	r2, r2, #8
65
		blt	23f
66
		subs	r2, r2, #16
67
		blt	22f
68
69
	PLD(	pld	[r1, #-32]		)
70
	PLD(	subs	r2, r2, #96		)
71
		stmfd	sp!, {r5 - r8}
72
	PLD(	blt	21f			)
73
74
	PLD(	@ cache alignment		)
75
	PLD(	ands	ip, r1, #31		)
76
	PLD(	pld	[r1, #-64]		)
77
	PLD(	beq	20f			)
78
	PLD(	cmp	r2, ip			)
79
	PLD(	pld	[r1, #-96]		)
80
	PLD(	blt	20f			)
81
	PLD(	cmp	ip, #16			)
82
	PLD(	sub	r2, r2, ip		)
83
	PLD(	ldmgedb	r1!, {r3 - r6}		)
84
	PLD(	stmgedb	r0!, {r3 - r6}		)
85
	PLD(	beq	20f			)
86
	PLD(	and	ip, ip, #15		)
87
	PLD(	cmp	ip, #8			)
88
	PLD(	ldr	r3, [r1, #-4]!		)
89
	PLD(	ldrge	r4, [r1, #-4]!		)
90
	PLD(	ldrgt	r5, [r1, #-4]!		)
91
	PLD(	str	r3, [r0, #-4]!		)
92
	PLD(	strge	r4, [r0, #-4]!		)
93
	PLD(	strgt	r5, [r0, #-4]!		)
94
95
20:	PLD(	pld	[r1, #-96]		)
96
	PLD(	pld	[r1, #-128]		)
97
21:		ldmdb	r1!, {r3, r4, ip, lr}
98
		subs	r2, r2, #32
99
		stmdb	r0!, {r3, r4, ip, lr}
100
		ldmdb	r1!, {r3, r4, ip, lr}
101
		stmgedb	r0!, {r3, r4, ip, lr}
102
		ldmgedb	r1!, {r3, r4, ip, lr}
103
		stmgedb	r0!, {r3, r4, ip, lr}
104
		ldmgedb	r1!, {r3, r4, ip, lr}
105
		subges	r2, r2, #32
106
		stmdb	r0!, {r3, r4, ip, lr}
107
		bge	20b
108
	PLD(	cmn	r2, #96			)
109
	PLD(	bge	21b			)
110
	PLD(	add	r2, r2, #96		)
111
		tst	r2, #31
112
		ldmfd	sp!, {r5 - r8}
113
		ldmeqfd	sp!, {r0, r4, pc}
114
115
		tst	r2, #16
116
22:		ldmnedb	r1!, {r3, r4, ip, lr}
117
		stmnedb	r0!, {r3, r4, ip, lr}
118
119
		tst	r2, #8
120
23:		ldmnedb	r1!, {r3, r4}
121
		stmnedb	r0!, {r3, r4}
122
123
		tst	r2, #4
124
24:		ldrne	r3, [r1, #-4]!
125
		strne	r3, [r0, #-4]!
126
127
25:		ands	r2, r2, #3
128
		ldmeqfd	sp!, {r0, r4, pc}
129
130
		cmp	r2, #2
131
		ldrb	r3, [r1, #-1]
132
		ldrgeb	r4, [r1, #-2]
133
		ldrgtb	ip, [r1, #-3]
134
		strb	r3, [r0, #-1]
135
		strgeb	r4, [r0, #-2]
136
		strgtb	ip, [r0, #-3]
137
		ldmfd	sp!, {r0, r4, pc}
138
139
26:		cmp	ip, #2
140
		ldrb	r3, [r1, #-1]!
141
		ldrgeb	r4, [r1, #-1]!
142
		ldrgtb	lr, [r1, #-1]!
143
		strb	r3, [r0, #-1]!
144
		strgeb	r4, [r0, #-1]!
145
		strgtb	lr, [r0, #-1]!
146
		subs	r2, r2, ip
147
		blt	25b
148
		ands	ip, r1, #3
149
		beq	19b
150
151
27:		bic	r1, r1, #3
152
		cmp	ip, #2
153
		ldr	r3, [r1]
154
		beq	35f
155
		blt	36f
156
157
158
		.macro	backward_copy_shift push pull
159
160
		cmp	r2, #12
161
	PLD(	pld	[r1, #-4]		)
162
		blt	33f
163
		subs	r2, r2, #28
164
		stmfd	sp!, {r5 - r9}
165
		blt	31f
166
167
	PLD(	subs	r2, r2, #96		)
168
	PLD(	pld	[r1, #-32]		)
169
	PLD(	blt	30f			)
170
	PLD(	pld	[r1, #-64]		)
171
172
	PLD(	@ cache alignment		)
173
	PLD(	ands	ip, r1, #31		)
174
	PLD(	pld	[r1, #-96]		)
175
	PLD(	beq	29f			)
176
	PLD(	cmp	r2, ip			)
177
	PLD(	pld	[r1, #-128]		)
178
	PLD(	blt	29f			)
179
	PLD(	sub	r2, r2, ip		)
180
28:	PLD(	mov	r4, r3, push #\push	)
181
	PLD(	ldr	r3, [r1, #-4]!		)
182
	PLD(	subs	ip, ip, #4		)
183
	PLD(	orr	r4, r4, r3, pull #\pull	)
184
	PLD(	str	r4, [r0, #-4]!		)
185
	PLD(	bgt	28b			)
186
187
29:	PLD(	pld	[r1, #-128]		)
188
30:		mov	lr, r3, push #\push
189
		ldmdb	r1!, {r3 - r9, ip}
190
		subs	r2, r2, #32
191
		orr	lr, lr, ip, pull #\pull
192
		mov	ip, ip, push #\push
193
		orr	ip, ip, r9, pull #\pull
194
		mov	r9, r9, push #\push
195
		orr	r9, r9, r8, pull #\pull
196
		mov	r8, r8, push #\push
197
		orr	r8, r8, r7, pull #\pull
198
		mov	r7, r7, push #\push
199
		orr	r7, r7, r6, pull #\pull
200
		mov	r6, r6, push #\push
201
		orr	r6, r6, r5, pull #\pull
202
		mov	r5, r5, push #\push
203
		orr	r5, r5, r4, pull #\pull
204
		mov	r4, r4, push #\push
205
		orr	r4, r4, r3, pull #\pull
206
		stmdb	r0!, {r4 - r9, ip, lr}
207
		bge	29b
208
	PLD(	cmn	r2, #96			)
209
	PLD(	bge	30b			)
210
	PLD(	add	r2, r2, #96		)
211
		cmn	r2, #16
212
		blt	32f
213
31:		mov	r7, r3, push #\push
214
		ldmdb	r1!, {r3 - r6}
215
		sub	r2, r2, #16
216
		orr	r7, r7, r6, pull #\pull
217
		mov	r6, r6, push #\push
218
		orr	r6, r6, r5, pull #\pull
219
		mov	r5, r5, push #\push
220
		orr	r5, r5, r4, pull #\pull
221
		mov	r4, r4, push #\push
222
		orr	r4, r4, r3, pull #\pull
223
		stmdb	r0!, {r4 - r7}
224
32:		adds	r2, r2, #28
225
		ldmfd	sp!, {r5 - r9}
226
		blt	34f
227
33:		mov	r4, r3, push #\push
228
		ldr	r3, [r1, #-4]!
229
		subs	r2, r2, #4
230
		orr	r4, r4, r3, pull #\pull
231
		str	r4, [r0, #-4]!
232
		bge	33b
233
34:
234
		.endm
235
236
237
		backward_copy_shift	push=8	pull=24
238
		add	r1, r1, #3
239
		b	25b
240
241
35:		backward_copy_shift	push=16	pull=16
242
		add	r1, r1, #2
243
		b	25b
244
245
36:		backward_copy_shift	push=24	pull=8
246
		add	r1, r1, #1
247
		b	25b
248
249
		.size	memmove, . - memmove
250
END(memmove)
251
libc_hidden_builtin_def (memmove)
(-) (+255 lines)
Added Link Here
1
/*
2
 *   Optimized memmove implementation for ARM processors
3
 *
4
 *	Author: 	Nicolas Pitre
5
 *	Created:	Dec 23, 2003
6
 *	Copyright:	(C) MontaVista Software, Inc.
7
 *
8
 *   This file is free software; you can redistribute it and/or
9
 *   modify it under the terms of the GNU Lesser General Public
10
 *   License as published by the Free Software Foundation; either
11
 *   version 2.1 of the License, or (at your option) any later version.
12
 *
13
 *   This file is distributed in the hope that it will be useful,
14
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 *   Lesser General Public License for more details.
17
 */
18
19
#include <sysdep.h>
20
21
22
/*
23
 * Endian independent macros for shifting bytes within registers.
24
 */
25
#ifndef __ARMEB__
26
#define pull            lsr
27
#define push            lsl
28
#else
29
#define pull            lsl
30
#define push            lsr
31
#endif
32
33
/*
34
 * Enable data preload for architectures that support it (ARMv5 and above)
35
 */
36
#if defined(__ARM_ARCH_5__) || \
37
    defined(__ARM_ARCH_5T__) || \
38
    defined(__ARM_ARCH_5TE__)
39
#define PLD(code...)	code
40
#else
41
#define PLD(code...)
42
#endif
43
44
dst		.req	r1
45
src		.req	r0
46
47
/* void *bcopy (const char *src, char *dst, size_t size) */
48
ENTRY(bcopy)
49
		subs	ip, dst, src
50
		cmphi	r2, ip
51
		movls	r3, r0
52
		movls	r0, r1
53
		movls	r1, r3
54
		bls	memcpy(PLT)
55
56
		stmfd	sp!, {r4, lr}
57
		add	src, src, r2
58
		add	dst, dst, r2
59
		subs	r2, r2, #4
60
		blt	25f
61
		ands	ip, dst, #3
62
	PLD(	pld	[src, #-4]		)
63
		bne	26f
64
		ands	ip, src, #3
65
		bne	27f
66
67
19:		subs	r2, r2, #4
68
		blt	24f
69
		subs	r2, r2, #8
70
		blt	23f
71
		subs	r2, r2, #16
72
		blt	22f
73
74
	PLD(	pld	[src, #-32]		)
75
	PLD(	subs	r2, r2, #96		)
76
		stmfd	sp!, {r5 - r8}
77
	PLD(	blt	21f			)
78
79
	PLD(	@ cache alignment		)
80
	PLD(	ands	ip, src, #31		)
81
	PLD(	pld	[src, #-64]		)
82
	PLD(	beq	20f			)
83
	PLD(	cmp	r2, ip			)
84
	PLD(	pld	[src, #-96]		)
85
	PLD(	blt	20f			)
86
	PLD(	cmp	ip, #16			)
87
	PLD(	sub	r2, r2, ip		)
88
	PLD(	ldmgedb	src!, {r3 - r6}		)
89
	PLD(	stmgedb	dst!, {r3 - r6}		)
90
	PLD(	beq	20f			)
91
	PLD(	and	ip, ip, #15		)
92
	PLD(	cmp	ip, #8			)
93
	PLD(	ldr	r3, [src, #-4]!		)
94
	PLD(	ldrge	r4, [src, #-4]!		)
95
	PLD(	ldrgt	r5, [src, #-4]!		)
96
	PLD(	str	r3, [dst, #-4]!		)
97
	PLD(	strge	r4, [dst, #-4]!		)
98
	PLD(	strgt	r5, [dst, #-4]!		)
99
100
20:	PLD(	pld	[src, #-96]		)
101
	PLD(	pld	[src, #-128]		)
102
21:		ldmdb	src!, {r3, r4, ip, lr}
103
		subs	r2, r2, #32
104
		stmdb	dst!, {r3, r4, ip, lr}
105
		ldmdb	src!, {r3, r4, ip, lr}
106
		stmgedb	dst!, {r3, r4, ip, lr}
107
		ldmgedb	src!, {r3, r4, ip, lr}
108
		stmgedb	dst!, {r3, r4, ip, lr}
109
		ldmgedb	src!, {r3, r4, ip, lr}
110
		subges	r2, r2, #32
111
		stmdb	dst!, {r3, r4, ip, lr}
112
		bge	20b
113
	PLD(	cmn	r2, #96			)
114
	PLD(	bge	21b			)
115
	PLD(	add	r2, r2, #96		)
116
		tst	r2, #31
117
		ldmfd	sp!, {r5 - r8}
118
		ldmeqfd	sp!, {r4, pc}
119
120
		tst	r2, #16
121
22:		ldmnedb	src!, {r3, r4, ip, lr}
122
		stmnedb	dst!, {r3, r4, ip, lr}
123
124
		tst	r2, #8
125
23:		ldmnedb	src!, {r3, r4}
126
		stmnedb	dst!, {r3, r4}
127
128
		tst	r2, #4
129
24:		ldrne	r3, [src, #-4]!
130
		strne	r3, [dst, #-4]!
131
132
25:		ands	r2, r2, #3
133
		ldmeqfd	sp!, {dst, r4, pc}
134
135
		cmp	r2, #2
136
		ldrb	r3, [src, #-1]
137
		ldrgeb	r4, [src, #-2]
138
		ldrgtb	ip, [src, #-3]
139
		strb	r3, [dst, #-1]
140
		strgeb	r4, [dst, #-2]
141
		strgtb	ip, [dst, #-3]
142
		ldmfd	sp!, {dst, r4, pc}
143
144
26:		cmp	ip, #2
145
		ldrb	r3, [src, #-1]!
146
		ldrgeb	r4, [src, #-1]!
147
		ldrgtb	lr, [src, #-1]!
148
		strb	r3, [dst, #-1]!
149
		strgeb	r4, [dst, #-1]!
150
		strgtb	lr, [dst, #-1]!
151
		subs	r2, r2, ip
152
		blt	25b
153
		ands	ip, src, #3
154
		beq	19b
155
156
27:		bic	src, src, #3
157
		cmp	ip, #2
158
		ldr	r3, [src]
159
		beq	35f
160
		blt	36f
161
162
163
		.macro	backward_copy_shift push pull
164
165
		cmp	r2, #12
166
	PLD(	pld	[src, #-4]		)
167
		blt	33f
168
		subs	r2, r2, #28
169
		stmfd	sp!, {r5 - r9}
170
		blt	31f
171
172
	PLD(	subs	r2, r2, #96		)
173
	PLD(	pld	[src, #-32]		)
174
	PLD(	blt	30f			)
175
	PLD(	pld	[src, #-64]		)
176
177
	PLD(	@ cache alignment		)
178
	PLD(	ands	ip, src, #31		)
179
	PLD(	pld	[src, #-96]		)
180
	PLD(	beq	29f			)
181
	PLD(	cmp	r2, ip			)
182
	PLD(	pld	[src, #-128]		)
183
	PLD(	blt	29f			)
184
	PLD(	sub	r2, r2, ip		)
185
28:	PLD(	mov	r4, r3, push #\push	)
186
	PLD(	ldr	r3, [src, #-4]!		)
187
	PLD(	subs	ip, ip, #4		)
188
	PLD(	orr	r4, r4, r3, pull #\pull	)
189
	PLD(	str	r4, [dst, #-4]!		)
190
	PLD(	bgt	28b			)
191
192
29:	PLD(	pld	[src, #-128]		)
193
30:		mov	lr, r3, push #\push
194
		ldmdb	src!, {r3 - r9, ip}
195
		subs	r2, r2, #32
196
		orr	lr, lr, ip, pull #\pull
197
		mov	ip, ip, push #\push
198
		orr	ip, ip, r9, pull #\pull
199
		mov	r9, r9, push #\push
200
		orr	r9, r9, r8, pull #\pull
201
		mov	r8, r8, push #\push
202
		orr	r8, r8, r7, pull #\pull
203
		mov	r7, r7, push #\push
204
		orr	r7, r7, r6, pull #\pull
205
		mov	r6, r6, push #\push
206
		orr	r6, r6, r5, pull #\pull
207
		mov	r5, r5, push #\push
208
		orr	r5, r5, r4, pull #\pull
209
		mov	r4, r4, push #\push
210
		orr	r4, r4, r3, pull #\pull
211
		stmdb	dst!, {r4 - r9, ip, lr}
212
		bge	29b
213
	PLD(	cmn	r2, #96			)
214
	PLD(	bge	30b			)
215
	PLD(	add	r2, r2, #96		)
216
		cmn	r2, #16
217
		blt	32f
218
31:		mov	r7, r3, push #\push
219
		ldmdb	src!, {r3 - r6}
220
		sub	r2, r2, #16
221
		orr	r7, r7, r6, pull #\pull
222
		mov	r6, r6, push #\push
223
		orr	r6, r6, r5, pull #\pull
224
		mov	r5, r5, push #\push
225
		orr	r5, r5, r4, pull #\pull
226
		mov	r4, r4, push #\push
227
		orr	r4, r4, r3, pull #\pull
228
		stmdb	dst!, {r4 - r7}
229
32:		adds	r2, r2, #28
230
		ldmfd	sp!, {r5 - r9}
231
		blt	34f
232
33:		mov	r4, r3, push #\push
233
		ldr	r3, [src, #-4]!
234
		subs	r2, r2, #4
235
		orr	r4, r4, r3, pull #\pull
236
		str	r4, [dst, #-4]!
237
		bge	33b
238
34:
239
		.endm
240
241
242
		backward_copy_shift	push=8	pull=24
243
		add	src, src, #3
244
		b	25b
245
246
35:		backward_copy_shift	push=16	pull=16
247
		add	src, src, #2
248
		b	25b
249
250
36:		backward_copy_shift	push=24	pull=8
251
		add	src, src, #1
252
		b	25b
253
254
		.size	bcopy, . - bcopy
255
END(bcopy)
(-) (+242 lines)
Added Link Here
1
/*
2
 *   Optimized memcpy implementation for ARM processors
3
 *
4
 *	Author: 	Nicolas Pitre
5
 *	Created:	Dec 23, 2003
6
 *	Copyright:	(C) MontaVista Software, Inc.
7
 *
8
 *   This file is free software; you can redistribute it and/or
9
 *   modify it under the terms of the GNU Lesser General Public
10
 *   License as published by the Free Software Foundation; either
11
 *   version 2.1 of the License, or (at your option) any later version.
12
 *
13
 *   This file is distributed in the hope that it will be useful,
14
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 *   Lesser General Public License for more details.
17
 */
18
19
#include <sysdep.h>
20
21
22
/*
23
 * Endian independent macros for shifting bytes within registers.
24
 */
25
#ifndef __ARMEB__
26
#define pull            lsr
27
#define push            lsl
28
#else
29
#define pull            lsl
30
#define push            lsr
31
#endif
32
33
/*
34
 * Enable data preload for architectures that support it (ARMv5 and above)
35
 */
36
#if defined(__ARM_ARCH_5__) || \
37
    defined(__ARM_ARCH_5T__) || \
38
    defined(__ARM_ARCH_5TE__)
39
#define PLD(code...)	code
40
#else
41
#define PLD(code...)
42
#endif
43
44
45
/* char * memcpy (char *dst, const char *src) */
46
47
ENTRY(memcpy)
48
		subs	r2, r2, #4
49
		stmfd	sp!, {r0, r4, lr}
50
		blt	7f
51
		ands	ip, r0, #3
52
	PLD(	pld	[r1, #0]		)
53
		bne	8f
54
		ands	ip, r1, #3
55
		bne	9f
56
57
1:		subs	r2, r2, #4
58
		blt	6f
59
		subs	r2, r2, #8
60
		blt	5f
61
		subs	r2, r2, #16
62
		blt	4f
63
64
	PLD(	subs	r2, r2, #65		)
65
		stmfd	sp!, {r5 - r8}
66
	PLD(	blt	3f			)
67
	PLD(	pld	[r1, #32]		)
68
69
	PLD(	@ cache alignment		)
70
	PLD(	ands	ip, r1, #31		)
71
	PLD(	pld	[r1, #64]		)
72
	PLD(	beq	2f			)
73
	PLD(	rsb	ip, ip, #32		)
74
	PLD(	cmp	r2, ip			)
75
	PLD(	pld	[r1, #96]		)
76
	PLD(	blt	2f			)
77
	PLD(	cmp	ip, #16			)
78
	PLD(	sub	r2, r2, ip		)
79
	PLD(	ldmgeia	r1!, {r3 - r6}		)
80
	PLD(	stmgeia	r0!, {r3 - r6}		)
81
	PLD(	beq	2f			)
82
	PLD(	and	ip, ip, #15		)
83
	PLD(	cmp	ip, #8			)
84
	PLD(	ldr	r3, [r1], #4		)
85
	PLD(	ldrge	r4, [r1], #4		)
86
	PLD(	ldrgt	r5, [r1], #4		)
87
	PLD(	str	r3, [r0], #4		)
88
	PLD(	strge	r4, [r0], #4		)
89
	PLD(	strgt	r5, [r0], #4		)
90
91
2:	PLD(	pld	[r1, #96]		)
92
3:		ldmia	r1!, {r3 - r8, ip, lr}
93
		subs	r2, r2, #32
94
		stmia	r0!, {r3 - r8, ip, lr}
95
		bge	2b
96
	PLD(	cmn	r2, #65			)
97
	PLD(	bge	3b			)
98
	PLD(	add	r2, r2, #65		)
99
		tst	r2, #31
100
		ldmfd	sp!, {r5 - r8}
101
		ldmeqfd	sp!, {r0, r4, pc}
102
103
		tst	r2, #16
104
4:		ldmneia	r1!, {r3, r4, ip, lr}
105
		stmneia	r0!, {r3, r4, ip, lr}
106
107
		tst	r2, #8
108
5:		ldmneia	r1!, {r3, r4}
109
		stmneia	r0!, {r3, r4}
110
111
		tst	r2, #4
112
6:		ldrne	r3, [r1], #4
113
		strne	r3, [r0], #4
114
115
7:		ands	r2, r2, #3
116
		ldmeqfd	sp!, {r0, r4, pc}
117
118
		cmp	r2, #2
119
		ldrb	r3, [r1], #1
120
		ldrgeb	r4, [r1], #1
121
		ldrgtb	ip, [r1]
122
		strb	r3, [r0], #1
123
		strgeb	r4, [r0], #1
124
		strgtb	ip, [r0]
125
		ldmfd	sp!, {r0, r4, pc}
126
127
8:		rsb	ip, ip, #4
128
		cmp	ip, #2
129
		ldrb	r3, [r1], #1
130
		ldrgeb	r4, [r1], #1
131
		ldrgtb	lr, [r1], #1
132
		strb	r3, [r0], #1
133
		strgeb	r4, [r0], #1
134
		strgtb	lr, [r0], #1
135
		subs	r2, r2, ip
136
		blt	7b
137
		ands	ip, r1, #3
138
		beq	1b
139
140
9:		bic	r1, r1, #3
141
		cmp	ip, #2
142
		ldr	lr, [r1], #4
143
		beq	17f
144
		bgt	18f
145
146
147
		.macro	forward_copy_shift pull push
148
149
		cmp	r2, #12
150
	PLD(	pld	[r1, #0]		)
151
		blt	15f
152
		subs	r2, r2, #28
153
		stmfd	sp!, {r5 - r9}
154
		blt	13f
155
156
	PLD(	subs	r2, r2, #97		)
157
	PLD(	blt	12f			)
158
	PLD(	pld	[r1, #32]		)
159
160
	PLD(	@ cache alignment		)
161
	PLD(	rsb	ip, r1, #36		)
162
	PLD(	pld	[r1, #64]		)
163
	PLD(	ands	ip, ip, #31		)
164
	PLD(	pld	[r1, #96]		)
165
	PLD(	beq	11f			)
166
	PLD(	cmp	r2, ip			)
167
	PLD(	pld	[r1, #128]		)
168
	PLD(	blt	11f			)
169
	PLD(	sub	r2, r2, ip		)
170
10:	PLD(	mov	r3, lr, pull #\pull	)
171
	PLD(	ldr	lr, [r1], #4		)
172
	PLD(	subs	ip, ip, #4		)
173
	PLD(	orr	r3, r3, lr, push #\push	)
174
	PLD(	str	r3, [r0], #4		)
175
	PLD(	bgt	10b			)
176
177
11:	PLD(	pld	[r1, #128]		)
178
12:		mov	r3, lr, pull #\pull
179
		ldmia	r1!, {r4 - r9, ip, lr}
180
		subs	r2, r2, #32
181
		orr	r3, r3, r4, push #\push
182
		mov	r4, r4, pull #\pull
183
		orr	r4, r4, r5, push #\push
184
		mov	r5, r5, pull #\pull
185
		orr	r5, r5, r6, push #\push
186
		mov	r6, r6, pull #\pull
187
		orr	r6, r6, r7, push #\push
188
		mov	r7, r7, pull #\pull
189
		orr	r7, r7, r8, push #\push
190
		mov	r8, r8, pull #\pull
191
		orr	r8, r8, r9, push #\push
192
		mov	r9, r9, pull #\pull
193
		orr	r9, r9, ip, push #\push
194
		mov	ip, ip, pull #\pull
195
		orr	ip, ip, lr, push #\push
196
		stmia	r0!, {r3 - r9, ip}
197
		bge	11b
198
	PLD(	cmn	r2, #97			)
199
	PLD(	bge	12b			)
200
	PLD(	add	r2, r2, #97		)
201
		cmn	r2, #16
202
		blt	14f
203
13:		mov	r3, lr, pull #\pull
204
		ldmia	r1!, {r4 - r6, lr}
205
		sub	r2, r2, #16
206
		orr	r3, r3, r4, push #\push
207
		mov	r4, r4, pull #\pull
208
		orr	r4, r4, r5, push #\push
209
		mov	r5, r5, pull #\pull
210
		orr	r5, r5, r6, push #\push
211
		mov	r6, r6, pull #\pull
212
		orr	r6, r6, lr, push #\push
213
		stmia	r0!, {r3 - r6}
214
14:		adds	r2, r2, #28
215
		ldmfd	sp!, {r5 - r9}
216
		blt	16f
217
15:		mov	r3, lr, pull #\pull
218
		ldr	lr, [r1], #4
219
		subs	r2, r2, #4
220
		orr	r3, r3, lr, push #\push
221
		str	r3, [r0], #4
222
		bge	15b
223
16:
224
		.endm
225
226
227
		forward_copy_shift	pull=8	push=24
228
		sub	r1, r1, #3
229
		b	7b
230
231
17:		forward_copy_shift	pull=16	push=16
232
		sub	r1, r1, #2
233
		b	7b
234
235
18:		forward_copy_shift	pull=24	push=8
236
		sub	r1, r1, #1
237
		b	7b
238
239
		.size	memcpy, . - memcpy
240
END(memcpy)
241
libc_hidden_builtin_def (memcpy)
242

Return to bug 203813