Line
Link Here
|
|
* sysdeps/alpha/memchr.S: Use prefetch load. |
|
* sysdeps/alpha/memchr.S: Use prefetch load. |
1 |
* sysdeps/alpha/alphaev6/memchr.S: Likewise. |
1 |
* sysdeps/alpha/alphaev6/memchr.S: Likewise. |
2 |
-- a/ports/sysdeps/alpha/alphaev6/memchr.S |
2 |
++ b/ports/sysdeps/alpha/alphaev6/memchr.S |
Lines 127-133
$first_quad:
Link Here
|
127 |
cmpbge $31, $1, $2 # E : |
127 |
cmpbge $31, $1, $2 # E : |
128 |
bne $2, $found_it # U : |
128 |
bne $2, $found_it # U : |
129 |
# At least one byte left to process. |
129 |
# At least one byte left to process. |
130 |
ldq $1, 8($0) # L : |
130 |
ldq $31, 8($0) # L : |
131 |
subq $5, 1, $18 # E : U L U L |
131 |
subq $5, 1, $18 # E : U L U L |
132 |
|
132 |
|
133 |
addq $0, 8, $0 # E : |
133 |
addq $0, 8, $0 # E : |
Lines 143-180
$first_quad:
Link Here
|
143 |
and $4, 8, $4 # E : odd number of quads? |
143 |
and $4, 8, $4 # E : odd number of quads? |
144 |
bne $4, $odd_quad_count # U : |
144 |
bne $4, $odd_quad_count # U : |
145 |
# At least three quads remain to be accessed |
145 |
# At least three quads remain to be accessed |
146 |
mov $1, $4 # E : L U L U : move prefetched value to correct reg |
146 |
nop # E : L U L U : move prefetched value to correct reg |
147 |
|
147 |
|
148 |
.align 4 |
148 |
.align 4 |
149 |
$unrolled_loop: |
149 |
$unrolled_loop: |
150 |
ldq $1, 8($0) # L : prefetch $1 |
150 |
ldq $1, 0($0) # L : load quad |
151 |
xor $17, $4, $2 # E : |
151 |
xor $17, $1, $2 # E : |
152 |
cmpbge $31, $2, $2 # E : |
152 |
ldq $31, 8($0) # L : prefetch next quad |
153 |
bne $2, $found_it # U : U L U L |
153 |
cmpbge $31, $2, $2 # E : U L U L |
154 |
|
154 |
|
|
|
155 |
bne $2, $found_it # U : |
155 |
addq $0, 8, $0 # E : |
156 |
addq $0, 8, $0 # E : |
156 |
nop # E : |
157 |
nop # E : |
157 |
nop # E : |
158 |
nop # E : |
158 |
nop # E : |
|
|
159 |
|
159 |
|
160 |
$odd_quad_count: |
160 |
$odd_quad_count: |
|
|
161 |
ldq $1, 0($0) # L : load quad |
161 |
xor $17, $1, $2 # E : |
162 |
xor $17, $1, $2 # E : |
162 |
ldq $4, 8($0) # L : prefetch $4 |
163 |
ldq $31, 8($0) # L : prefetch $4 |
163 |
cmpbge $31, $2, $2 # E : |
164 |
cmpbge $31, $2, $2 # E : |
164 |
addq $0, 8, $6 # E : |
|
|
165 |
|
165 |
|
|
|
166 |
addq $0, 8, $6 # E : |
166 |
bne $2, $found_it # U : |
167 |
bne $2, $found_it # U : |
167 |
cmpult $6, $18, $6 # E : |
168 |
cmpult $6, $18, $6 # E : |
168 |
addq $0, 8, $0 # E : |
169 |
addq $0, 8, $0 # E : |
169 |
nop # E : |
|
|
170 |
|
170 |
|
171 |
bne $6, $unrolled_loop # U : |
171 |
bne $6, $unrolled_loop # U : |
172 |
mov $4, $1 # E : move prefetched value into $1 |
|
|
173 |
nop # E : |
172 |
nop # E : |
174 |
nop # E : |
173 |
nop # E : |
175 |
|
|
|
176 |
$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do |
177 |
nop # E : |
174 |
nop # E : |
|
|
175 |
|
176 |
$final: ldq $1, 0($0) # L : load last quad |
177 |
subq $5, $0, $18 # E : $18 <- number of bytes left to do |
178 |
nop # E : |
178 |
nop # E : |
179 |
bne $18, $last_quad # U : |
179 |
bne $18, $last_quad # U : |
180 |
|
180 |
|
181 |
-- a/ports/sysdeps/alpha/memchr.S |
181 |
++ b/ports/sysdeps/alpha/memchr.S |
Lines 119-125
$first_quad:
Link Here
|
119 |
|
119 |
|
120 |
# At least one byte left to process. |
120 |
# At least one byte left to process. |
121 |
|
121 |
|
122 |
ldq t0, 8(v0) # e0 : |
122 |
ldq zero, 8(v0) # e0 : prefetch next quad |
123 |
subq t4, 1, a2 # .. e1 : |
123 |
subq t4, 1, a2 # .. e1 : |
124 |
addq v0, 8, v0 #-e0 : |
124 |
addq v0, 8, v0 #-e0 : |
125 |
|
125 |
|
Lines 138-156
$first_quad:
Link Here
|
138 |
|
138 |
|
139 |
# At least three quads remain to be accessed |
139 |
# At least three quads remain to be accessed |
140 |
|
140 |
|
141 |
mov t0, t3 # e0 : move prefetched value to correct reg |
|
|
142 |
|
143 |
.align 4 |
141 |
.align 4 |
144 |
$unrolled_loop: |
142 |
$unrolled_loop: |
145 |
ldq t0, 8(v0) #-e0 : prefetch t0 |
143 |
ldq t0, 0(v0) # e0 : load quad |
146 |
xor a1, t3, t1 # .. e1 : |
144 |
xor a1, t0, t1 # .. e1 : |
147 |
cmpbge zero, t1, t1 # e0 : |
145 |
ldq zero, 8(v0) # e0 : prefetch next quad |
148 |
bne t1, $found_it # .. e1 : |
146 |
cmpbge zero, t1, t1 # .. e1: |
|
|
147 |
bne t1, $found_it # e0 : |
149 |
|
148 |
|
150 |
addq v0, 8, v0 #-e0 : |
149 |
addq v0, 8, v0 # e1 : |
151 |
$odd_quad_count: |
150 |
$odd_quad_count: |
|
|
151 |
ldq t0, 0(v0) # e0 : load quad |
152 |
xor a1, t0, t1 # .. e1 : |
152 |
xor a1, t0, t1 # .. e1 : |
153 |
ldq t3, 8(v0) # e0 : prefetch t3 |
153 |
ldq zero, 8(v0) # e0 : prefetch next quad |
154 |
cmpbge zero, t1, t1 # .. e1 : |
154 |
cmpbge zero, t1, t1 # .. e1 : |
155 |
addq v0, 8, t5 #-e0 : |
155 |
addq v0, 8, t5 #-e0 : |
156 |
bne t1, $found_it # .. e1 : |
156 |
bne t1, $found_it # .. e1 : |
Lines 159-166
$odd_quad_count:
Link Here
|
159 |
addq v0, 8, v0 # .. e1 : |
159 |
addq v0, 8, v0 # .. e1 : |
160 |
bne t5, $unrolled_loop #-e1 : |
160 |
bne t5, $unrolled_loop #-e1 : |
161 |
|
161 |
|
162 |
mov t3, t0 # e0 : move prefetched value into t0 |
162 |
$final: ldq t0, 0(v0) # e0 : load last quad |
163 |
$final: subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do |
163 |
subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do |
164 |
bne a2, $last_quad # e1 : |
164 |
bne a2, $last_quad # e1 : |
165 |
|
165 |
|
166 |
$not_found: |
166 |
$not_found: |