Lines 1-159
Link Here
|
1 |
/* strcpy/stpcpy implementation for x86-64. |
1 |
# $Header: /K8_Projects/Glibc/amd64strcpy.S 7 2/12/04 19:06 Emenezes $ |
2 |
Copyright (C) 2002 Free Software Foundation, Inc. |
|
|
3 |
This file is part of the GNU C Library. |
4 |
Contributed by Andreas Jaeger <aj@suse.de>, 2002. |
5 |
|
6 |
The GNU C Library is free software; you can redistribute it and/or |
7 |
modify it under the terms of the GNU Lesser General Public |
8 |
License as published by the Free Software Foundation; either |
9 |
version 2.1 of the License, or (at your option) any later version. |
10 |
|
11 |
The GNU C Library is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
Lesser General Public License for more details. |
15 |
|
16 |
You should have received a copy of the GNU Lesser General Public |
17 |
License along with the GNU C Library; if not, write to the Free |
18 |
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
19 |
02111-1307 USA. */ |
20 |
|
21 |
#include <sysdep.h> |
22 |
#include "asm-syntax.h" |
23 |
#include "bp-sym.h" |
24 |
#include "bp-asm.h" |
25 |
|
2 |
|
26 |
#ifndef USE_AS_STPCPY |
3 |
# (c) 2002 Advanced Micro Devices, Inc. |
|
|
4 |
# YOUR USE OF THIS CODE IS SUBJECT TO THE TERMS |
5 |
# AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC |
6 |
# LICENSE FOUND IN THE "README" FILE THAT IS |
7 |
# INCLUDED WITH THIS FILE |
8 |
|
9 |
#include "sysdep.h" |
10 |
#include <rtld-global-offsets.h> |
11 |
|
12 |
/* XXX: strncpy is broken, just use this for strcpy for now. */ |
13 |
#ifdef PIC |
14 |
.globl _rtld_local_ro |
15 |
.hidden _rtld_local_ro |
16 |
.set _rtld_local_ro,_rtld_global_ro |
17 |
#endif |
18 |
#ifndef STRCPY |
27 |
# define STRCPY strcpy |
19 |
# define STRCPY strcpy |
28 |
#endif |
20 |
#endif |
|
|
21 |
#define LABEL(s) L(strcpy##s) |
22 |
|
23 |
.text |
24 |
|
25 |
ENTRY (STRCPY) # (char *, const char *) |
26 |
|
27 |
#ifdef USE_AS_STRNCPY // (char *, const char *, size_t) |
28 |
test %rdx, %rdx # (char *, const char *, size_t) |
29 |
mov %rdx, %r11 |
30 |
jz LABEL(exitn) # early exit |
31 |
#endif |
32 |
|
33 |
xor %edx, %edx |
34 |
|
35 |
LABEL(aligntry): |
36 |
mov %rsi, %r8 # align by source |
37 |
and $7, %r8 |
38 |
jz LABEL(alignafter) |
39 |
|
40 |
LABEL(align): # 8-byte align |
41 |
sub $8, %r8 |
29 |
|
42 |
|
30 |
.text |
|
|
31 |
ENTRY (BP_SYM (STRCPY)) |
32 |
movq %rsi, %rcx /* Source register. */ |
33 |
andl $7, %ecx /* mask alignment bits */ |
34 |
movq %rdi, %rdx /* Duplicate destination pointer. */ |
35 |
|
36 |
jz 5f /* aligned => start loop */ |
37 |
|
38 |
neg %ecx /* We need to align to 8 bytes. */ |
39 |
addl $8,%ecx |
40 |
/* Search the first bytes directly. */ |
41 |
0: |
42 |
movb (%rsi), %al /* Fetch a byte */ |
43 |
testb %al, %al /* Is it NUL? */ |
44 |
movb %al, (%rdx) /* Store it */ |
45 |
jz 4f /* If it was NUL, done! */ |
46 |
incq %rsi |
47 |
incq %rdx |
48 |
decl %ecx |
49 |
jnz 0b |
50 |
|
51 |
5: |
52 |
movq $0xfefefefefefefeff,%r8 |
53 |
|
54 |
/* Now the sources is aligned. Unfortunatly we cannot force |
55 |
to have both source and destination aligned, so ignore the |
56 |
alignment of the destination. */ |
57 |
.p2align 4 |
43 |
.p2align 4 |
58 |
1: |
|
|
59 |
/* 1st unroll. */ |
60 |
movq (%rsi), %rax /* Read double word (8 bytes). */ |
61 |
addq $8, %rsi /* Adjust pointer for next word. */ |
62 |
movq %rax, %r9 /* Save a copy for NUL finding. */ |
63 |
addq %r8, %r9 /* add the magic value to the word. We get |
64 |
carry bits reported for each byte which |
65 |
is *not* 0 */ |
66 |
jnc 3f /* highest byte is NUL => return pointer */ |
67 |
xorq %rax, %r9 /* (word+magic)^word */ |
68 |
orq %r8, %r9 /* set all non-carry bits */ |
69 |
incq %r9 /* add 1: if one carry bit was *not* set |
70 |
the addition will not result in 0. */ |
71 |
|
72 |
jnz 3f /* found NUL => return pointer */ |
73 |
|
74 |
movq %rax, (%rdx) /* Write value to destination. */ |
75 |
addq $8, %rdx /* Adjust pointer. */ |
76 |
|
77 |
/* 2nd unroll. */ |
78 |
movq (%rsi), %rax /* Read double word (8 bytes). */ |
79 |
addq $8, %rsi /* Adjust pointer for next word. */ |
80 |
movq %rax, %r9 /* Save a copy for NUL finding. */ |
81 |
addq %r8, %r9 /* add the magic value to the word. We get |
82 |
carry bits reported for each byte which |
83 |
is *not* 0 */ |
84 |
jnc 3f /* highest byte is NUL => return pointer */ |
85 |
xorq %rax, %r9 /* (word+magic)^word */ |
86 |
orq %r8, %r9 /* set all non-carry bits */ |
87 |
incq %r9 /* add 1: if one carry bit was *not* set |
88 |
the addition will not result in 0. */ |
89 |
|
90 |
jnz 3f /* found NUL => return pointer */ |
91 |
|
92 |
movq %rax, (%rdx) /* Write value to destination. */ |
93 |
addq $8, %rdx /* Adjust pointer. */ |
94 |
|
95 |
/* 3rd unroll. */ |
96 |
movq (%rsi), %rax /* Read double word (8 bytes). */ |
97 |
addq $8, %rsi /* Adjust pointer for next word. */ |
98 |
movq %rax, %r9 /* Save a copy for NUL finding. */ |
99 |
addq %r8, %r9 /* add the magic value to the word. We get |
100 |
carry bits reported for each byte which |
101 |
is *not* 0 */ |
102 |
jnc 3f /* highest byte is NUL => return pointer */ |
103 |
xorq %rax, %r9 /* (word+magic)^word */ |
104 |
orq %r8, %r9 /* set all non-carry bits */ |
105 |
incq %r9 /* add 1: if one carry bit was *not* set |
106 |
the addition will not result in 0. */ |
107 |
|
108 |
jnz 3f /* found NUL => return pointer */ |
109 |
|
110 |
movq %rax, (%rdx) /* Write value to destination. */ |
111 |
addq $8, %rdx /* Adjust pointer. */ |
112 |
|
113 |
/* 4th unroll. */ |
114 |
movq (%rsi), %rax /* Read double word (8 bytes). */ |
115 |
addq $8, %rsi /* Adjust pointer for next word. */ |
116 |
movq %rax, %r9 /* Save a copy for NUL finding. */ |
117 |
addq %r8, %r9 /* add the magic value to the word. We get |
118 |
carry bits reported for each byte which |
119 |
is *not* 0 */ |
120 |
jnc 3f /* highest byte is NUL => return pointer */ |
121 |
xorq %rax, %r9 /* (word+magic)^word */ |
122 |
orq %r8, %r9 /* set all non-carry bits */ |
123 |
incq %r9 /* add 1: if one carry bit was *not* set |
124 |
the addition will not result in 0. */ |
125 |
|
126 |
jnz 3f /* found NUL => return pointer */ |
127 |
|
128 |
movq %rax, (%rdx) /* Write value to destination. */ |
129 |
addq $8, %rdx /* Adjust pointer. */ |
130 |
jmp 1b /* Next iteration. */ |
131 |
|
44 |
|
132 |
/* Do the last few bytes. %rax contains the value to write. |
45 |
LABEL(alignloop): |
133 |
The loop is unrolled twice. */ |
46 |
#ifdef USE_AS_STRNCPY |
|
|
47 |
dec %r11 |
48 |
jl LABEL(exitn) |
49 |
#endif |
50 |
|
51 |
mov (%rsi, %rdx), %al # check if same character |
52 |
test %al, %al # check if character a NUL |
53 |
mov %al, (%rdi, %rdx) |
54 |
jz LABEL(exit) |
55 |
|
56 |
inc %edx |
57 |
inc %r8 |
58 |
jnz LABEL(alignloop) |
59 |
|
134 |
.p2align 4 |
60 |
.p2align 4 |
|
|
61 |
|
62 |
LABEL(alignafter): |
63 |
|
64 |
LABEL(8try): |
65 |
mov $0xfefefefefefefeff, %rcx |
66 |
|
67 |
LABEL(8): # 8-byte |
68 |
mov (%rsi, %rdx), %rax |
69 |
|
70 |
LABEL(8loop): |
71 |
#ifdef USE_AS_STRNCPY |
72 |
sub $8, %r11 |
73 |
jl LABEL(tail) |
74 |
#endif |
75 |
|
76 |
mov %rcx, %r8 |
77 |
add %rax, %r8 |
78 |
sbb %r10, %r10 |
79 |
|
80 |
xor %rax, %r8 |
81 |
or %rcx, %r8 |
82 |
sub %r10, %r8 |
83 |
jnz LABEL(tail) |
84 |
|
85 |
mov %rax, (%rdi, %rdx) |
86 |
mov 8 (%rsi, %rdx), %rax |
87 |
add $8, %edx |
88 |
|
89 |
#ifdef USE_AS_STRNCPY |
90 |
sub $8, %r11 |
91 |
jl LABEL(tail) |
92 |
#endif |
93 |
|
94 |
mov %rcx, %r8 |
95 |
add %rax, %r8 |
96 |
sbb %r10, %r10 |
97 |
|
98 |
xor %rax, %r8 |
99 |
or %rcx, %r8 |
100 |
sub %r10, %r8 |
101 |
jnz LABEL(tail) |
102 |
|
103 |
mov %rax, (%rdi, %rdx) |
104 |
mov 8 (%rsi, %rdx), %rax |
105 |
add $8, %edx |
106 |
|
107 |
#ifdef USE_AS_STRNCPY |
108 |
sub $8, %r11 |
109 |
jl LABEL(tail) |
110 |
#endif |
111 |
|
112 |
mov %rcx, %r8 |
113 |
add %rax, %r8 |
114 |
sbb %r10, %r10 |
115 |
|
116 |
xor %rax, %r8 |
117 |
or %rcx, %r8 |
118 |
sub %r10, %r8 |
119 |
jnz LABEL(tail) |
120 |
|
121 |
mov %rax, (%rdi, %rdx) |
122 |
mov 8 (%rsi, %rdx), %rax |
123 |
add $8, %edx |
124 |
|
125 |
#ifdef USE_AS_STRNCPY |
126 |
sub $8, %r11 |
127 |
jl LABEL(tail) |
128 |
#endif |
129 |
|
130 |
mov %rcx, %r8 |
131 |
add %rax, %r8 |
132 |
sbb %r10, %r10 |
133 |
|
134 |
xor %rax, %r8 |
135 |
or %rcx, %r8 |
136 |
sub %r10, %r8 |
137 |
jnz LABEL(tail) |
138 |
|
139 |
mov %rax, (%rdi, %rdx) |
140 |
mov 8 (%rsi, %rdx), %rax |
141 |
add $8, %edx |
142 |
|
143 |
#ifdef USE_AS_STRNCPY |
144 |
sub $8, %r11 |
145 |
jl LABEL(tail) |
146 |
#endif |
147 |
|
148 |
mov %rcx, %r8 |
149 |
add %rax, %r8 |
150 |
sbb %r10, %r10 |
151 |
|
152 |
xor %rax, %r8 |
153 |
or %rcx, %r8 |
154 |
sub %r10, %r8 |
155 |
jnz LABEL(tail) |
156 |
|
157 |
mov %rax, (%rdi, %rdx) |
158 |
mov 8 (%rsi, %rdx), %rax |
159 |
add $8, %edx |
160 |
|
161 |
#ifdef USE_AS_STRNCPY |
162 |
sub $8, %r11 |
163 |
jl LABEL(tail) |
164 |
#endif |
165 |
|
166 |
mov %rcx, %r8 |
167 |
add %rax, %r8 |
168 |
sbb %r10, %r10 |
169 |
|
170 |
xor %rax, %r8 |
171 |
or %rcx, %r8 |
172 |
sub %r10, %r8 |
173 |
jnz LABEL(tail) |
174 |
|
175 |
mov %rax, (%rdi, %rdx) |
176 |
mov 8 (%rsi, %rdx), %rax |
177 |
add $8, %edx |
178 |
|
179 |
#ifdef USE_AS_STRNCPY |
180 |
sub $8, %r11 |
181 |
jl LABEL(tail) |
182 |
#endif |
183 |
|
184 |
mov %rcx, %r8 |
185 |
add %rax, %r8 |
186 |
sbb %r10, %r10 |
187 |
|
188 |
xor %rax, %r8 |
189 |
or %rcx, %r8 |
190 |
sub %r10, %r8 |
191 |
jnz LABEL(tail) |
192 |
|
193 |
mov %rax, (%rdi, %rdx) |
194 |
mov 8 (%rsi, %rdx), %rax |
195 |
add $8, %edx |
196 |
|
197 |
#ifdef USE_AS_STRNCPY |
198 |
sub $8, %r11 |
199 |
jl LABEL(tail) |
200 |
#endif |
201 |
|
202 |
mov %rcx, %r8 |
203 |
add %rax, %r8 |
204 |
sbb %r10, %r10 |
205 |
|
206 |
xor %rax, %r8 |
207 |
or %rcx, %r8 |
208 |
sub %r10, %r8 |
209 |
jnz LABEL(tail) |
210 |
|
211 |
mov %rax, (%rdi, %rdx) |
212 |
mov 8 (%rsi, %rdx), %rax |
213 |
add $8, %edx |
214 |
|
215 |
LABEL(8after): |
216 |
|
217 |
LABEL(64try): |
218 |
#ifdef PIC |
219 |
mov _rtld_local_ro@GOTPCREL(%rip), %r8 |
220 |
mov RTLD_GLOBAL_DL_CACHE1SIZEHALF(%r8), %r9 |
221 |
#else |
222 |
mov _dl_cache1sizehalf, %r9 |
223 |
#endif |
224 |
|
225 |
|
226 |
LABEL(64): # 64-byte |
227 |
|
228 |
.p2align 4 |
229 |
|
230 |
LABEL(64loop): |
231 |
#ifdef USE_AS_STRNCPY |
232 |
sub $8, %r11 |
233 |
jl LABEL(tail) |
234 |
#endif |
235 |
|
236 |
mov %rcx, %r8 |
237 |
add %rax, %r8 |
238 |
sbb %r10, %r10 |
239 |
|
240 |
xor %rax, %r8 |
241 |
or %rcx, %r8 |
242 |
sub %r10, %r8 |
243 |
jnz LABEL(tail) |
244 |
|
245 |
mov %rax, (%rdi, %rdx) |
246 |
mov 8 (%rsi, %rdx), %rax |
247 |
add $8, %edx |
248 |
|
249 |
#ifdef USE_AS_STRNCPY |
250 |
sub $8, %r11 |
251 |
jl LABEL(tail) |
252 |
#endif |
253 |
|
254 |
mov %rcx, %r8 |
255 |
add %rax, %r8 |
256 |
sbb %r10, %r10 |
257 |
|
258 |
xor %rax, %r8 |
259 |
or %rcx, %r8 |
260 |
sub %r10, %r8 |
261 |
jnz LABEL(tail) |
262 |
|
263 |
mov %rax, (%rdi, %rdx) |
264 |
mov 8 (%rsi, %rdx), %rax |
265 |
add $8, %edx |
266 |
|
267 |
#ifdef USE_AS_STRNCPY |
268 |
sub $8, %r11 |
269 |
jl LABEL(tail) |
270 |
#endif |
271 |
|
272 |
mov %rcx, %r8 |
273 |
add %rax, %r8 |
274 |
sbb %r10, %r10 |
275 |
|
276 |
xor %rax, %r8 |
277 |
or %rcx, %r8 |
278 |
sub %r10, %r8 |
279 |
jnz LABEL(tail) |
280 |
|
281 |
mov %rax, (%rdi, %rdx) |
282 |
mov 8 (%rsi, %rdx), %rax |
283 |
add $8, %edx |
284 |
|
285 |
#ifdef USE_AS_STRNCPY |
286 |
sub $8, %r11 |
287 |
jl LABEL(tail) |
288 |
#endif |
289 |
|
290 |
mov %rcx, %r8 |
291 |
add %rax, %r8 |
292 |
sbb %r10, %r10 |
293 |
|
294 |
xor %rax, %r8 |
295 |
or %rcx, %r8 |
296 |
sub %r10, %r8 |
297 |
jnz LABEL(tail) |
298 |
|
299 |
mov %rax, (%rdi, %rdx) |
300 |
mov 8 (%rsi, %rdx), %rax |
301 |
add $8, %edx |
302 |
|
303 |
#ifdef USE_AS_STRNCPY |
304 |
sub $8, %r11 |
305 |
jl LABEL(tail) |
306 |
#endif |
307 |
|
308 |
mov %rcx, %r8 |
309 |
add %rax, %r8 |
310 |
sbb %r10, %r10 |
311 |
|
312 |
xor %rax, %r8 |
313 |
or %rcx, %r8 |
314 |
sub %r10, %r8 |
315 |
jnz LABEL(tail) |
316 |
|
317 |
mov %rax, (%rdi, %rdx) |
318 |
mov 8 (%rsi, %rdx), %rax |
319 |
add $8, %edx |
320 |
|
321 |
#ifdef USE_AS_STRNCPY |
322 |
sub $8, %r11 |
323 |
jl LABEL(tail) |
324 |
#endif |
325 |
|
326 |
mov %rcx, %r8 |
327 |
add %rax, %r8 |
328 |
sbb %r10, %r10 |
329 |
|
330 |
xor %rax, %r8 |
331 |
or %rcx, %r8 |
332 |
sub %r10, %r8 |
333 |
jnz LABEL(tail) |
334 |
|
335 |
mov %rax, (%rdi, %rdx) |
336 |
mov 8 (%rsi, %rdx), %rax |
337 |
add $8, %edx |
338 |
|
339 |
#ifdef USE_AS_STRNCPY |
340 |
sub $8, %r11 |
341 |
jl LABEL(tail) |
342 |
#endif |
343 |
|
344 |
mov %rcx, %r8 |
345 |
add %rax, %r8 |
346 |
sbb %r10, %r10 |
347 |
|
348 |
xor %rax, %r8 |
349 |
or %rcx, %r8 |
350 |
sub %r10, %r8 |
351 |
jnz LABEL(tail) |
352 |
|
353 |
mov %rax, (%rdi, %rdx) |
354 |
mov 8 (%rsi, %rdx), %rax |
355 |
add $8, %edx |
356 |
|
357 |
#ifdef USE_AS_STRNCPY |
358 |
sub $8, %r11 |
359 |
jl LABEL(tail) |
360 |
#endif |
361 |
|
362 |
mov %rcx, %r8 |
363 |
add %rax, %r8 |
364 |
sbb %r10, %r10 |
365 |
|
366 |
xor %rax, %r8 |
367 |
or %rcx, %r8 |
368 |
sub %r10, %r8 |
369 |
jnz LABEL(tail) |
370 |
|
371 |
cmp %r9, %rdx |
372 |
|
373 |
mov %rax, (%rdi, %rdx) |
374 |
mov 8 (%rsi, %rdx), %rax |
375 |
lea 8 (%rdx), %rdx |
376 |
|
377 |
jbe LABEL(64loop) |
378 |
|
379 |
LABEL(64after): |
380 |
|
381 |
LABEL(pretry): |
382 |
#ifdef PIC |
383 |
mov _rtld_local_ro@GOTPCREL(%rip), %r8 |
384 |
mov RTLD_GLOBAL_DL_CACHE2SIZEHALF(%r8), %r9 |
385 |
#else |
386 |
mov _dl_cache2sizehalf, %r9 |
387 |
#endif |
388 |
|
389 |
LABEL(pre): # 64-byte prefetch |
390 |
|
391 |
.p2align 4 |
392 |
|
393 |
LABEL(preloop): |
394 |
#ifdef USE_AS_STRNCPY |
395 |
sub $8, %r11 |
396 |
jl LABEL(tail) |
397 |
#endif |
398 |
|
399 |
mov %rcx, %r8 |
400 |
add %rax, %r8 |
401 |
sbb %r10, %r10 |
402 |
|
403 |
xor %rax, %r8 |
404 |
or %rcx, %r8 |
405 |
sub %r10, %r8 |
406 |
jnz LABEL(tail) |
407 |
|
408 |
mov %rax, (%rdi, %rdx) |
409 |
mov 8 (%rsi, %rdx), %rax |
410 |
add $8, %edx |
411 |
|
412 |
#ifdef USE_AS_STRNCPY |
413 |
sub $8, %r11 |
414 |
jl LABEL(tail) |
415 |
#endif |
416 |
|
417 |
mov %rcx, %r8 |
418 |
add %rax, %r8 |
419 |
sbb %r10, %r10 |
420 |
|
421 |
xor %rax, %r8 |
422 |
or %rcx, %r8 |
423 |
sub %r10, %r8 |
424 |
jnz LABEL(tail) |
425 |
|
426 |
mov %rax, (%rdi, %rdx) |
427 |
mov 8 (%rsi, %rdx), %rax |
428 |
add $8, %edx |
429 |
|
430 |
#ifdef USE_AS_STRNCPY |
431 |
sub $8, %r11 |
432 |
jl LABEL(tail) |
433 |
#endif |
434 |
|
435 |
mov %rcx, %r8 |
436 |
add %rax, %r8 |
437 |
sbb %r10, %r10 |
438 |
|
439 |
xor %rax, %r8 |
440 |
or %rcx, %r8 |
441 |
sub %r10, %r8 |
442 |
jnz LABEL(tail) |
443 |
|
444 |
mov %rax, (%rdi, %rdx) |
445 |
mov 8 (%rsi, %rdx), %rax |
446 |
add $8, %edx |
447 |
|
448 |
#ifdef USE_AS_STRNCPY |
449 |
sub $8, %r11 |
450 |
jl LABEL(tail) |
451 |
#endif |
452 |
|
453 |
mov %rcx, %r8 |
454 |
add %rax, %r8 |
455 |
sbb %r10, %r10 |
456 |
|
457 |
xor %rax, %r8 |
458 |
or %rcx, %r8 |
459 |
sub %r10, %r8 |
460 |
jnz LABEL(tail) |
461 |
|
462 |
mov %rax, (%rdi, %rdx) |
463 |
mov 8 (%rsi, %rdx), %rax |
464 |
add $8, %edx |
465 |
|
466 |
#ifdef USE_AS_STRNCPY |
467 |
sub $8, %r11 |
468 |
jl LABEL(tail) |
469 |
#endif |
470 |
|
471 |
mov %rcx, %r8 |
472 |
add %rax, %r8 |
473 |
sbb %r10, %r10 |
474 |
|
475 |
xor %rax, %r8 |
476 |
or %rcx, %r8 |
477 |
sub %r10, %r8 |
478 |
jnz LABEL(tail) |
479 |
|
480 |
mov %rax, (%rdi, %rdx) |
481 |
mov 8 (%rsi, %rdx), %rax |
482 |
add $8, %edx |
483 |
|
484 |
#ifdef USE_AS_STRNCPY |
485 |
sub $8, %r11 |
486 |
jl LABEL(tail) |
487 |
#endif |
488 |
|
489 |
mov %rcx, %r8 |
490 |
add %rax, %r8 |
491 |
sbb %r10, %r10 |
492 |
|
493 |
xor %rax, %r8 |
494 |
or %rcx, %r8 |
495 |
sub %r10, %r8 |
496 |
jnz LABEL(tail) |
497 |
|
498 |
mov %rax, (%rdi, %rdx) |
499 |
mov 8 (%rsi, %rdx), %rax |
500 |
add $8, %edx |
501 |
|
502 |
#ifdef USE_AS_STRNCPY |
503 |
sub $8, %r11 |
504 |
jl LABEL(tail) |
505 |
#endif |
506 |
|
507 |
mov %rcx, %r8 |
508 |
add %rax, %r8 |
509 |
sbb %r10, %r10 |
510 |
|
511 |
xor %rax, %r8 |
512 |
or %rcx, %r8 |
513 |
sub %r10, %r8 |
514 |
jnz LABEL(tail) |
515 |
|
516 |
mov %rax, (%rdi, %rdx) |
517 |
mov 8 (%rsi, %rdx), %rax |
518 |
add $8, %edx |
519 |
|
520 |
#ifdef USE_AS_STRNCPY |
521 |
sub $8, %r11 |
522 |
jl LABEL(tail) |
523 |
#endif |
524 |
|
525 |
mov %rcx, %r8 |
526 |
add %rax, %r8 |
527 |
sbb %r10, %r10 |
528 |
|
529 |
xor %rax, %r8 |
530 |
or %rcx, %r8 |
531 |
sub %r10, %r8 |
532 |
jnz LABEL(tail) |
533 |
|
534 |
cmp %r9, %rdx |
535 |
|
536 |
mov %rax, (%rdi, %rdx) |
537 |
prefetcht0 512 + 8 (%rdi, %rdx) |
538 |
mov 8 (%rsi, %rdx), %rax |
539 |
prefetcht0 512 + 8 (%rsi, %rdx) |
540 |
lea 8 (%rdx), %rdx |
541 |
|
542 |
jb LABEL(preloop) |
543 |
|
544 |
.p2align 4 |
545 |
|
546 |
LABEL(preafter): |
547 |
|
548 |
LABEL(NTtry): |
549 |
sfence |
550 |
|
551 |
LABEL(NT): # 64-byte NT |
552 |
|
553 |
.p2align 4 |
554 |
|
555 |
LABEL(NTloop): |
556 |
#ifdef USE_AS_STRNCPY |
557 |
sub $8, %r11 |
558 |
jl LABEL(tail) |
559 |
#endif |
560 |
|
561 |
mov %rcx, %r8 |
562 |
add %rax, %r8 |
563 |
sbb %r10, %r10 |
564 |
|
565 |
xor %rax, %r8 |
566 |
or %rcx, %r8 |
567 |
sub %r10, %r8 |
568 |
jnz LABEL(NTtail) |
569 |
|
570 |
movnti %rax, (%rdi, %rdx) |
571 |
mov 8 (%rsi, %rdx), %rax |
572 |
add $8, %rdx |
573 |
|
574 |
#ifdef USE_AS_STRNCPY |
575 |
sub $8, %r11 |
576 |
jl LABEL(tail) |
577 |
#endif |
578 |
|
579 |
mov %rcx, %r8 |
580 |
add %rax, %r8 |
581 |
sbb %r10, %r10 |
582 |
|
583 |
xor %rax, %r8 |
584 |
or %rcx, %r8 |
585 |
sub %r10, %r8 |
586 |
jnz LABEL(NTtail) |
587 |
|
588 |
movnti %rax, (%rdi, %rdx) |
589 |
mov 8 (%rsi, %rdx), %rax |
590 |
add $8, %rdx |
591 |
|
592 |
#ifdef USE_AS_STRNCPY |
593 |
sub $8, %r11 |
594 |
jl LABEL(tail) |
595 |
#endif |
596 |
|
597 |
mov %rcx, %r8 |
598 |
add %rax, %r8 |
599 |
sbb %r10, %r10 |
600 |
|
601 |
xor %rax, %r8 |
602 |
or %rcx, %r8 |
603 |
sub %r10, %r8 |
604 |
jnz LABEL(NTtail) |
605 |
|
606 |
movnti %rax, (%rdi, %rdx) |
607 |
mov 8 (%rsi, %rdx), %rax |
608 |
add $8, %rdx |
609 |
|
610 |
#ifdef USE_AS_STRNCPY |
611 |
sub $8, %r11 |
612 |
jl LABEL(tail) |
613 |
#endif |
614 |
|
615 |
mov %rcx, %r8 |
616 |
add %rax, %r8 |
617 |
sbb %r10, %r10 |
618 |
|
619 |
xor %rax, %r8 |
620 |
or %rcx, %r8 |
621 |
sub %r10, %r8 |
622 |
jnz LABEL(NTtail) |
623 |
|
624 |
movnti %rax, (%rdi, %rdx) |
625 |
mov 8 (%rsi, %rdx), %rax |
626 |
add $8, %rdx |
627 |
|
628 |
#ifdef USE_AS_STRNCPY |
629 |
sub $8, %r11 |
630 |
jl LABEL(tail) |
631 |
#endif |
632 |
|
633 |
mov %rcx, %r8 |
634 |
add %rax, %r8 |
635 |
sbb %r10, %r10 |
636 |
|
637 |
xor %rax, %r8 |
638 |
or %rcx, %r8 |
639 |
sub %r10, %r8 |
640 |
jnz LABEL(NTtail) |
641 |
|
642 |
movnti %rax, (%rdi, %rdx) |
643 |
mov 8 (%rsi, %rdx), %rax |
644 |
add $8, %rdx |
645 |
|
646 |
#ifdef USE_AS_STRNCPY |
647 |
sub $8, %r11 |
648 |
jl LABEL(tail) |
649 |
#endif |
650 |
|
651 |
mov %rcx, %r8 |
652 |
add %rax, %r8 |
653 |
sbb %r10, %r10 |
654 |
|
655 |
xor %rax, %r8 |
656 |
or %rcx, %r8 |
657 |
sub %r10, %r8 |
658 |
jnz LABEL(NTtail) |
659 |
|
660 |
movnti %rax, (%rdi, %rdx) |
661 |
mov 8 (%rsi, %rdx), %rax |
662 |
add $8, %rdx |
663 |
|
664 |
#ifdef USE_AS_STRNCPY |
665 |
sub $8, %r11 |
666 |
jl LABEL(tail) |
667 |
#endif |
668 |
|
669 |
mov %rcx, %r8 |
670 |
add %rax, %r8 |
671 |
sbb %r10, %r10 |
672 |
|
673 |
xor %rax, %r8 |
674 |
or %rcx, %r8 |
675 |
sub %r10, %r8 |
676 |
jnz LABEL(NTtail) |
677 |
|
678 |
movnti %rax, (%rdi, %rdx) |
679 |
mov 8 (%rsi, %rdx), %rax |
680 |
add $8, %rdx |
681 |
|
682 |
#ifdef USE_AS_STRNCPY |
683 |
sub $8, %r11 |
684 |
jl LABEL(tail) |
685 |
#endif |
686 |
|
687 |
mov %rcx, %r8 |
688 |
add %rax, %r8 |
689 |
sbb %r10, %r10 |
690 |
|
691 |
xor %rax, %r8 |
692 |
or %rcx, %r8 |
693 |
sub %r10, %r8 |
694 |
jnz LABEL(NTtail) |
695 |
|
696 |
movnti %rax, (%rdi, %rdx) |
697 |
mov 8 (%rsi, %rdx), %rax |
698 |
prefetchnta 768 + 8 (%rsi, %rdx) |
699 |
add $8, %rdx |
700 |
|
701 |
jmp LABEL(NTloop) |
702 |
|
703 |
.p2align 4 |
704 |
|
705 |
LABEL(NTtail): |
706 |
sfence |
707 |
|
708 |
.p2align 4 |
709 |
|
710 |
LABEL(NTafter): |
711 |
|
712 |
LABEL(tailtry): |
713 |
|
714 |
LABEL(tail): # 1-byte tail |
715 |
#ifdef USE_AS_STRNCPY |
716 |
add $8, %r11 |
717 |
#endif |
718 |
|
719 |
.p2align 4 |
720 |
|
721 |
LABEL(tailloop): |
722 |
#ifdef USE_AS_STRNCPY |
723 |
dec %r11 |
724 |
jl LABEL(exitn) |
725 |
#endif |
726 |
|
727 |
test %al, %al |
728 |
mov %al, (%rdi, %rdx) |
729 |
jz LABEL(exit) |
730 |
|
731 |
inc %rdx |
732 |
|
733 |
#ifdef USE_AS_STRNCPY |
734 |
dec %r11 |
735 |
jl LABEL(exitn) |
736 |
|
737 |
mov %ah, %al |
738 |
#endif |
739 |
|
740 |
test %ah, %ah |
741 |
mov %ah, (%rdi, %rdx) |
742 |
jz LABEL(exit) |
743 |
|
744 |
inc %rdx |
745 |
|
746 |
#ifdef USE_AS_STRNCPY |
747 |
dec %r11 |
748 |
jl LABEL(exitn) |
749 |
#endif |
750 |
|
751 |
shr $16, %rax |
752 |
|
753 |
test %al, %al |
754 |
mov %al, (%rdi, %rdx) |
755 |
jz LABEL(exit) |
756 |
|
757 |
inc %rdx |
758 |
|
759 |
#ifdef USE_AS_STRNCPY |
760 |
dec %r11 |
761 |
jl LABEL(exitn) |
762 |
|
763 |
mov %ah, %al |
764 |
#endif |
765 |
|
766 |
test %ah, %ah |
767 |
mov %ah, (%rdi, %rdx) |
768 |
jz LABEL(exit) |
769 |
|
770 |
shr $16, %rax |
771 |
inc %rdx |
772 |
|
773 |
jmp LABEL(tailloop) |
774 |
|
775 |
.p2align 4 |
776 |
|
777 |
LABEL(tailafter): |
778 |
|
779 |
LABEL(exit): |
780 |
#ifdef USE_AS_STRNCPY |
781 |
test %r11, %r11 |
782 |
mov %r11, %rcx |
783 |
|
784 |
#ifdef USE_AS_STPCPY |
785 |
lea (%rdi, %rdx), %r8 |
786 |
#else |
787 |
mov %rdi, %r8 |
788 |
#endif |
789 |
|
790 |
jz 2f |
791 |
|
792 |
xor %eax, %eax # bzero () would do too, but usually there are only a handfull of bytes left |
793 |
shr $3, %rcx |
794 |
lea 1 (%rdi, %rdx), %rdi |
795 |
jz 1f |
796 |
|
797 |
rep stosq |
798 |
|
799 |
1: |
800 |
mov %r11d, %ecx |
801 |
and $7, %ecx |
802 |
jz 2f |
803 |
|
804 |
.p2align 4,, 3 |
805 |
|
135 |
3: |
806 |
3: |
136 |
/* Note that stpcpy needs to return with the value of the NUL |
807 |
dec %ecx |
137 |
byte. */ |
808 |
mov %al, (%rdi, %rcx) |
138 |
movb %al, (%rdx) /* 1st byte. */ |
809 |
jnz 3b |
139 |
testb %al, %al /* Is it NUL. */ |
810 |
|
140 |
jz 4f /* yes, finish. */ |
811 |
.p2align 4,, 3 |
141 |
incq %rdx /* Increment destination. */ |
812 |
|
142 |
movb %ah, (%rdx) /* 2nd byte. */ |
813 |
2: |
143 |
testb %ah, %ah /* Is it NUL?. */ |
814 |
mov %r8, %rax |
144 |
jz 4f /* yes, finish. */ |
815 |
ret |
145 |
incq %rdx /* Increment destination. */ |
816 |
|
146 |
shrq $16, %rax /* Shift... */ |
817 |
#endif |
147 |
jmp 3b /* and look at next two bytes in %rax. */ |
818 |
|
|
|
819 |
.p2align 4 |
148 |
|
820 |
|
149 |
4: |
821 |
LABEL(exitn): |
150 |
#ifdef USE_AS_STPCPY |
822 |
#ifdef USE_AS_STPCPY |
151 |
movq %rdx, %rax /* Destination is return value. */ |
823 |
lea (%rdi, %rdx), %rax |
152 |
#else |
824 |
#else |
153 |
movq %rdi, %rax /* Source is return value. */ |
825 |
mov %rdi, %rax |
154 |
#endif |
826 |
#endif |
155 |
retq |
827 |
|
156 |
END (BP_SYM (STRCPY)) |
828 |
ret |
157 |
#ifndef USE_AS_STPCPY |
829 |
|
158 |
libc_hidden_builtin_def (strcpy) |
830 |
END (STRCPY) |
|
|
831 |
#if !defined USE_AS_STPCPY && !defined USE_AS_STRNCPY |
832 |
libc_hidden_builtin_def (STRCPY) |
159 |
#endif |
833 |
#endif |