Line 0
Link Here
|
|
|
1 |
/* ieee754-df.S double-precision floating point support for ARM |
2 |
|
3 |
Copyright (C) 2003 Free Software Foundation, Inc. |
4 |
Contributed by Nicolas Pitre (nico@cam.org) |
5 |
|
6 |
This file is free software; you can redistribute it and/or modify it |
7 |
under the terms of the GNU General Public License as published by the |
8 |
Free Software Foundation; either version 2, or (at your option) any |
9 |
later version. |
10 |
|
11 |
In addition to the permissions in the GNU General Public License, the |
12 |
Free Software Foundation gives you unlimited permission to link the |
13 |
compiled version of this file into combinations with other programs, |
14 |
and to distribute those combinations without any restriction coming |
15 |
from the use of this file. (The General Public License restrictions |
16 |
do apply in other respects; for example, they cover modification of |
17 |
the file, and distribution when not linked into a combine |
18 |
executable.) |
19 |
|
20 |
This file is distributed in the hope that it will be useful, but |
21 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
22 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
23 |
General Public License for more details. |
24 |
|
25 |
You should have received a copy of the GNU General Public License |
26 |
along with this program; see the file COPYING. If not, write to |
27 |
the Free Software Foundation, 59 Temple Place - Suite 330, |
28 |
Boston, MA 02111-1307, USA. */ |
29 |
|
30 |
/* |
31 |
* Notes: |
32 |
* |
33 |
* The goal of this code is to be as fast as possible. This is |
34 |
* not meant to be easy to understand for the casual reader. |
35 |
* For slightly simpler code please see the single precision version |
36 |
* of this file. |
37 |
* |
38 |
* Only the default rounding mode is intended for best performances. |
39 |
* Exceptions aren't supported yet, but that can be added quite easily |
40 |
* if necessary without impacting performances. |
41 |
*/ |
42 |
|
43 |
|
44 |
@ For FPA, float words are always big-endian. |
45 |
@ For VFP, floats words follow the memory system mode. |
46 |
#if defined(__VFP_FP__) && !defined(__ARMEB__) |
47 |
#define xl r0 |
48 |
#define xh r1 |
49 |
#define yl r2 |
50 |
#define yh r3 |
51 |
#else |
52 |
#define xh r0 |
53 |
#define xl r1 |
54 |
#define yh r2 |
55 |
#define yl r3 |
56 |
#endif |
57 |
|
58 |
|
59 |
#ifdef L_negdf2 |
60 |
|
61 |
ARM_FUNC_START negdf2 |
62 |
@ flip sign bit |
63 |
eor xh, xh, #0x80000000 |
64 |
RET |
65 |
|
66 |
FUNC_END negdf2 |
67 |
|
68 |
#endif |
69 |
|
70 |
#ifdef L_addsubdf3 |
71 |
|
72 |
ARM_FUNC_START subdf3 |
73 |
@ flip sign bit of second arg |
74 |
eor yh, yh, #0x80000000 |
75 |
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) |
76 |
b 1f @ Skip Thumb-code prologue |
77 |
#endif |
78 |
|
79 |
ARM_FUNC_START adddf3 |
80 |
|
81 |
1: @ Compare both args, return zero if equal but the sign. |
82 |
teq xl, yl |
83 |
eoreq ip, xh, yh |
84 |
teqeq ip, #0x80000000 |
85 |
beq LSYM(Lad_z) |
86 |
|
87 |
@ If first arg is 0 or -0, return second arg. |
88 |
@ If second arg is 0 or -0, return first arg. |
89 |
orrs ip, xl, xh, lsl #1 |
90 |
moveq xl, yl |
91 |
moveq xh, yh |
92 |
orrnes ip, yl, yh, lsl #1 |
93 |
RETc(eq) |
94 |
|
95 |
stmfd sp!, {r4, r5, lr} |
96 |
|
97 |
@ Mask out exponents. |
98 |
mov ip, #0x7f000000 |
99 |
orr ip, ip, #0x00f00000 |
100 |
and r4, xh, ip |
101 |
and r5, yh, ip |
102 |
|
103 |
@ If either of them is 0x7ff, result will be INF or NAN |
104 |
teq r4, ip |
105 |
teqne r5, ip |
106 |
beq LSYM(Lad_i) |
107 |
|
108 |
@ Compute exponent difference. Make largest exponent in r4, |
109 |
@ corresponding arg in xh-xl, and positive exponent difference in r5. |
110 |
subs r5, r5, r4 |
111 |
rsblt r5, r5, #0 |
112 |
ble 1f |
113 |
add r4, r4, r5 |
114 |
eor yl, xl, yl |
115 |
eor yh, xh, yh |
116 |
eor xl, yl, xl |
117 |
eor xh, yh, xh |
118 |
eor yl, xl, yl |
119 |
eor yh, xh, yh |
120 |
1: |
121 |
|
122 |
@ If exponent difference is too large, return largest argument |
123 |
@ already in xh-xl. We need up to 54 bit to handle proper rounding |
124 |
@ of 0x1p54 - 1.1. |
125 |
cmp r5, #(54 << 20) |
126 |
RETLDM "r4, r5" hi |
127 |
|
128 |
@ Convert mantissa to signed integer. |
129 |
tst xh, #0x80000000 |
130 |
bic xh, xh, ip, lsl #1 |
131 |
orr xh, xh, #0x00100000 |
132 |
beq 1f |
133 |
rsbs xl, xl, #0 |
134 |
rsc xh, xh, #0 |
135 |
1: |
136 |
tst yh, #0x80000000 |
137 |
bic yh, yh, ip, lsl #1 |
138 |
orr yh, yh, #0x00100000 |
139 |
beq 1f |
140 |
rsbs yl, yl, #0 |
141 |
rsc yh, yh, #0 |
142 |
1: |
143 |
@ If exponent == difference, one or both args were denormalized. |
144 |
@ Since this is not common case, rescale them off line. |
145 |
teq r4, r5 |
146 |
beq LSYM(Lad_d) |
147 |
LSYM(Lad_x): |
148 |
@ Scale down second arg with exponent difference. |
149 |
@ Apply shift one bit left to first arg and the rest to second arg |
150 |
@ to simplify things later, but only if exponent does not become 0. |
151 |
mov ip, #0 |
152 |
movs r5, r5, lsr #20 |
153 |
beq 3f |
154 |
teq r4, #(1 << 20) |
155 |
beq 1f |
156 |
movs xl, xl, lsl #1 |
157 |
adc xh, ip, xh, lsl #1 |
158 |
sub r4, r4, #(1 << 20) |
159 |
subs r5, r5, #1 |
160 |
beq 3f |
161 |
|
162 |
@ Shift yh-yl right per r5, keep leftover bits into ip. |
163 |
1: rsbs lr, r5, #32 |
164 |
blt 2f |
165 |
mov ip, yl, lsl lr |
166 |
mov yl, yl, lsr r5 |
167 |
orr yl, yl, yh, lsl lr |
168 |
mov yh, yh, asr r5 |
169 |
b 3f |
170 |
2: sub r5, r5, #32 |
171 |
add lr, lr, #32 |
172 |
cmp yl, #1 |
173 |
adc ip, ip, yh, lsl lr |
174 |
mov yl, yh, asr r5 |
175 |
mov yh, yh, asr #32 |
176 |
3: |
177 |
@ the actual addition |
178 |
adds xl, xl, yl |
179 |
adc xh, xh, yh |
180 |
|
181 |
@ We now have a result in xh-xl-ip. |
182 |
@ Keep absolute value in xh-xl-ip, sign in r5. |
183 |
ands r5, xh, #0x80000000 |
184 |
bpl LSYM(Lad_p) |
185 |
rsbs ip, ip, #0 |
186 |
rscs xl, xl, #0 |
187 |
rsc xh, xh, #0 |
188 |
|
189 |
@ Determine how to normalize the result. |
190 |
LSYM(Lad_p): |
191 |
cmp xh, #0x00100000 |
192 |
bcc LSYM(Lad_l) |
193 |
cmp xh, #0x00200000 |
194 |
bcc LSYM(Lad_r0) |
195 |
cmp xh, #0x00400000 |
196 |
bcc LSYM(Lad_r1) |
197 |
|
198 |
@ Result needs to be shifted right. |
199 |
movs xh, xh, lsr #1 |
200 |
movs xl, xl, rrx |
201 |
movs ip, ip, rrx |
202 |
orrcs ip, ip, #1 |
203 |
add r4, r4, #(1 << 20) |
204 |
LSYM(Lad_r1): |
205 |
movs xh, xh, lsr #1 |
206 |
movs xl, xl, rrx |
207 |
movs ip, ip, rrx |
208 |
orrcs ip, ip, #1 |
209 |
add r4, r4, #(1 << 20) |
210 |
|
211 |
@ Our result is now properly aligned into xh-xl, remaining bits in ip. |
212 |
@ Round with MSB of ip. If halfway between two numbers, round towards |
213 |
@ LSB of xl = 0. |
214 |
LSYM(Lad_r0): |
215 |
adds xl, xl, ip, lsr #31 |
216 |
adc xh, xh, #0 |
217 |
teq ip, #0x80000000 |
218 |
biceq xl, xl, #1 |
219 |
|
220 |
@ One extreme rounding case may add a new MSB. Adjust exponent. |
221 |
@ That MSB will be cleared when exponent is merged below. |
222 |
tst xh, #0x00200000 |
223 |
addne r4, r4, #(1 << 20) |
224 |
|
225 |
@ Make sure we did not bust our exponent. |
226 |
adds ip, r4, #(1 << 20) |
227 |
bmi LSYM(Lad_o) |
228 |
|
229 |
@ Pack final result together. |
230 |
LSYM(Lad_e): |
231 |
bic xh, xh, #0x00300000 |
232 |
orr xh, xh, r4 |
233 |
orr xh, xh, r5 |
234 |
RETLDM "r4, r5" |
235 |
|
236 |
LSYM(Lad_l): |
237 |
@ Result must be shifted left and exponent adjusted. |
238 |
@ No rounding necessary since ip will always be 0. |
239 |
#if __ARM_ARCH__ < 5 |
240 |
|
241 |
teq xh, #0 |
242 |
movne r3, #-11 |
243 |
moveq r3, #21 |
244 |
moveq xh, xl |
245 |
moveq xl, #0 |
246 |
mov r2, xh |
247 |
movs ip, xh, lsr #16 |
248 |
moveq r2, r2, lsl #16 |
249 |
addeq r3, r3, #16 |
250 |
tst r2, #0xff000000 |
251 |
moveq r2, r2, lsl #8 |
252 |
addeq r3, r3, #8 |
253 |
tst r2, #0xf0000000 |
254 |
moveq r2, r2, lsl #4 |
255 |
addeq r3, r3, #4 |
256 |
tst r2, #0xc0000000 |
257 |
moveq r2, r2, lsl #2 |
258 |
addeq r3, r3, #2 |
259 |
tst r2, #0x80000000 |
260 |
addeq r3, r3, #1 |
261 |
|
262 |
#else |
263 |
|
264 |
teq xh, #0 |
265 |
moveq xh, xl |
266 |
moveq xl, #0 |
267 |
clz r3, xh |
268 |
addeq r3, r3, #32 |
269 |
sub r3, r3, #11 |
270 |
|
271 |
#endif |
272 |
|
273 |
@ determine how to shift the value. |
274 |
subs r2, r3, #32 |
275 |
bge 2f |
276 |
adds r2, r2, #12 |
277 |
ble 1f |
278 |
|
279 |
@ shift value left 21 to 31 bits, or actually right 11 to 1 bits |
280 |
@ since a register switch happened above. |
281 |
add ip, r2, #20 |
282 |
rsb r2, r2, #12 |
283 |
mov xl, xh, lsl ip |
284 |
mov xh, xh, lsr r2 |
285 |
b 3f |
286 |
|
287 |
@ actually shift value left 1 to 20 bits, which might also represent |
288 |
@ 32 to 52 bits if counting the register switch that happened earlier. |
289 |
1: add r2, r2, #20 |
290 |
2: rsble ip, r2, #32 |
291 |
mov xh, xh, lsl r2 |
292 |
orrle xh, xh, xl, lsr ip |
293 |
movle xl, xl, lsl r2 |
294 |
|
295 |
@ adjust exponent accordingly. |
296 |
3: subs r4, r4, r3, lsl #20 |
297 |
bgt LSYM(Lad_e) |
298 |
|
299 |
@ Exponent too small, denormalize result. |
300 |
@ Find out proper shift value. |
301 |
mvn r4, r4, asr #20 |
302 |
subs r4, r4, #30 |
303 |
bge 2f |
304 |
adds r4, r4, #12 |
305 |
bgt 1f |
306 |
|
307 |
@ shift result right of 1 to 20 bits, sign is in r5. |
308 |
add r4, r4, #20 |
309 |
rsb r2, r4, #32 |
310 |
mov xl, xl, lsr r4 |
311 |
orr xl, xl, xh, lsl r2 |
312 |
orr xh, r5, xh, lsr r4 |
313 |
RETLDM "r4, r5" |
314 |
|
315 |
@ shift result right of 21 to 31 bits, or left 11 to 1 bits after |
316 |
@ a register switch from xh to xl. |
317 |
1: rsb r4, r4, #12 |
318 |
rsb r2, r4, #32 |
319 |
mov xl, xl, lsr r2 |
320 |
orr xl, xl, xh, lsl r4 |
321 |
mov xh, r5 |
322 |
RETLDM "r4, r5" |
323 |
|
324 |
@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch |
325 |
@ from xh to xl. |
326 |
2: mov xl, xh, lsr r4 |
327 |
mov xh, r5 |
328 |
RETLDM "r4, r5" |
329 |
|
330 |
@ Adjust exponents for denormalized arguments. |
331 |
LSYM(Lad_d): |
332 |
teq r4, #0 |
333 |
eoreq xh, xh, #0x00100000 |
334 |
addeq r4, r4, #(1 << 20) |
335 |
eor yh, yh, #0x00100000 |
336 |
subne r5, r5, #(1 << 20) |
337 |
b LSYM(Lad_x) |
338 |
|
339 |
@ Result is x - x = 0, unless x = INF or NAN. |
340 |
LSYM(Lad_z): |
341 |
sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000 |
342 |
and r2, xh, ip |
343 |
teq r2, ip |
344 |
orreq xh, ip, #0x00080000 |
345 |
movne xh, #0 |
346 |
mov xl, #0 |
347 |
RET |
348 |
|
349 |
@ Overflow: return INF. |
350 |
LSYM(Lad_o): |
351 |
orr xh, r5, #0x7f000000 |
352 |
orr xh, xh, #0x00f00000 |
353 |
mov xl, #0 |
354 |
RETLDM "r4, r5" |
355 |
|
356 |
@ At least one of x or y is INF/NAN. |
357 |
@ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) |
358 |
@ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) |
359 |
@ if either is NAN: return NAN |
360 |
@ if opposite sign: return NAN |
361 |
@ return xh-xl (which is INF or -INF) |
362 |
LSYM(Lad_i): |
363 |
teq r4, ip |
364 |
movne xh, yh |
365 |
movne xl, yl |
366 |
teqeq r5, ip |
367 |
RETLDM "r4, r5" ne |
368 |
|
369 |
orrs r4, xl, xh, lsl #12 |
370 |
orreqs r4, yl, yh, lsl #12 |
371 |
teqeq xh, yh |
372 |
orrne xh, r5, #0x00080000 |
373 |
movne xl, #0 |
374 |
RETLDM "r4, r5" |
375 |
|
376 |
FUNC_END subdf3 |
377 |
FUNC_END adddf3 |
378 |
|
379 |
ARM_FUNC_START floatunsidf |
380 |
teq r0, #0 |
381 |
moveq r1, #0 |
382 |
RETc(eq) |
383 |
stmfd sp!, {r4, r5, lr} |
384 |
mov r4, #(0x400 << 20) @ initial exponent |
385 |
add r4, r4, #((52-1) << 20) |
386 |
mov r5, #0 @ sign bit is 0 |
387 |
mov xl, r0 |
388 |
mov xh, #0 |
389 |
b LSYM(Lad_l) |
390 |
|
391 |
FUNC_END floatunsidf |
392 |
|
393 |
ARM_FUNC_START floatsidf |
394 |
teq r0, #0 |
395 |
moveq r1, #0 |
396 |
RETc(eq) |
397 |
stmfd sp!, {r4, r5, lr} |
398 |
mov r4, #(0x400 << 20) @ initial exponent |
399 |
add r4, r4, #((52-1) << 20) |
400 |
ands r5, r0, #0x80000000 @ sign bit in r5 |
401 |
rsbmi r0, r0, #0 @ absolute value |
402 |
mov xl, r0 |
403 |
mov xh, #0 |
404 |
b LSYM(Lad_l) |
405 |
|
406 |
FUNC_END floatsidf |
407 |
|
408 |
ARM_FUNC_START extendsfdf2 |
409 |
movs r2, r0, lsl #1 |
410 |
beq 1f @ value is 0.0 or -0.0 |
411 |
mov xh, r2, asr #3 @ stretch exponent |
412 |
mov xh, xh, rrx @ retrieve sign bit |
413 |
mov xl, r2, lsl #28 @ retrieve remaining bits |
414 |
ands r2, r2, #0xff000000 @ isolate exponent |
415 |
beq 2f @ exponent was 0 but not mantissa |
416 |
teq r2, #0xff000000 @ check if INF or NAN |
417 |
eorne xh, xh, #0x38000000 @ fixup exponent otherwise. |
418 |
RET |
419 |
|
420 |
1: mov xh, r0 |
421 |
mov xl, #0 |
422 |
RET |
423 |
|
424 |
2: @ value was denormalized. We can normalize it now. |
425 |
stmfd sp!, {r4, r5, lr} |
426 |
mov r4, #(0x380 << 20) @ setup corresponding exponent |
427 |
add r4, r4, #(1 << 20) |
428 |
and r5, xh, #0x80000000 @ move sign bit in r5 |
429 |
bic xh, xh, #0x80000000 |
430 |
b LSYM(Lad_l) |
431 |
|
432 |
FUNC_END extendsfdf2 |
433 |
|
434 |
#endif /* L_addsubdf3 */ |
435 |
|
436 |
#ifdef L_muldivdf3 |
437 |
|
438 |
ARM_FUNC_START muldf3 |
439 |
|
440 |
stmfd sp!, {r4, r5, r6, lr} |
441 |
|
442 |
@ Mask out exponents. |
443 |
mov ip, #0x7f000000 |
444 |
orr ip, ip, #0x00f00000 |
445 |
and r4, xh, ip |
446 |
and r5, yh, ip |
447 |
|
448 |
@ Trap any INF/NAN. |
449 |
teq r4, ip |
450 |
teqne r5, ip |
451 |
beq LSYM(Lml_s) |
452 |
|
453 |
@ Trap any multiplication by 0. |
454 |
orrs r6, xl, xh, lsl #1 |
455 |
orrnes r6, yl, yh, lsl #1 |
456 |
beq LSYM(Lml_z) |
457 |
|
458 |
@ Shift exponents right one bit to make room for overflow bit. |
459 |
@ If either of them is 0, scale denormalized arguments off line. |
460 |
@ Then add both exponents together. |
461 |
movs r4, r4, lsr #1 |
462 |
teqne r5, #0 |
463 |
beq LSYM(Lml_d) |
464 |
LSYM(Lml_x): |
465 |
add r4, r4, r5, asr #1 |
466 |
|
467 |
@ Preserve final sign in r4 along with exponent for now. |
468 |
teq xh, yh |
469 |
orrmi r4, r4, #0x8000 |
470 |
|
471 |
@ Convert mantissa to unsigned integer. |
472 |
bic xh, xh, ip, lsl #1 |
473 |
bic yh, yh, ip, lsl #1 |
474 |
orr xh, xh, #0x00100000 |
475 |
orr yh, yh, #0x00100000 |
476 |
|
477 |
#if __ARM_ARCH__ < 4 |
478 |
|
479 |
@ Well, no way to make it shorter without the umull instruction. |
480 |
@ We must perform that 53 x 53 bit multiplication by hand. |
481 |
stmfd sp!, {r7, r8, r9, sl, fp} |
482 |
mov r7, xl, lsr #16 |
483 |
mov r8, yl, lsr #16 |
484 |
mov r9, xh, lsr #16 |
485 |
mov sl, yh, lsr #16 |
486 |
bic xl, xl, r7, lsl #16 |
487 |
bic yl, yl, r8, lsl #16 |
488 |
bic xh, xh, r9, lsl #16 |
489 |
bic yh, yh, sl, lsl #16 |
490 |
mul ip, xl, yl |
491 |
mul fp, xl, r8 |
492 |
mov lr, #0 |
493 |
adds ip, ip, fp, lsl #16 |
494 |
adc lr, lr, fp, lsr #16 |
495 |
mul fp, r7, yl |
496 |
adds ip, ip, fp, lsl #16 |
497 |
adc lr, lr, fp, lsr #16 |
498 |
mul fp, xl, sl |
499 |
mov r5, #0 |
500 |
adds lr, lr, fp, lsl #16 |
501 |
adc r5, r5, fp, lsr #16 |
502 |
mul fp, r7, yh |
503 |
adds lr, lr, fp, lsl #16 |
504 |
adc r5, r5, fp, lsr #16 |
505 |
mul fp, xh, r8 |
506 |
adds lr, lr, fp, lsl #16 |
507 |
adc r5, r5, fp, lsr #16 |
508 |
mul fp, r9, yl |
509 |
adds lr, lr, fp, lsl #16 |
510 |
adc r5, r5, fp, lsr #16 |
511 |
mul fp, xh, sl |
512 |
mul r6, r9, sl |
513 |
adds r5, r5, fp, lsl #16 |
514 |
adc r6, r6, fp, lsr #16 |
515 |
mul fp, r9, yh |
516 |
adds r5, r5, fp, lsl #16 |
517 |
adc r6, r6, fp, lsr #16 |
518 |
mul fp, xl, yh |
519 |
adds lr, lr, fp |
520 |
mul fp, r7, sl |
521 |
adcs r5, r5, fp |
522 |
mul fp, xh, yl |
523 |
adc r6, r6, #0 |
524 |
adds lr, lr, fp |
525 |
mul fp, r9, r8 |
526 |
adcs r5, r5, fp |
527 |
mul fp, r7, r8 |
528 |
adc r6, r6, #0 |
529 |
adds lr, lr, fp |
530 |
mul fp, xh, yh |
531 |
adcs r5, r5, fp |
532 |
adc r6, r6, #0 |
533 |
ldmfd sp!, {r7, r8, r9, sl, fp} |
534 |
|
535 |
#else |
536 |
|
537 |
@ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits. |
538 |
umull ip, lr, xl, yl |
539 |
mov r5, #0 |
540 |
umlal lr, r5, xl, yh |
541 |
umlal lr, r5, xh, yl |
542 |
mov r6, #0 |
543 |
umlal r5, r6, xh, yh |
544 |
|
545 |
#endif |
546 |
|
547 |
@ The LSBs in ip are only significant for the final rounding. |
548 |
@ Fold them into one bit of lr. |
549 |
teq ip, #0 |
550 |
orrne lr, lr, #1 |
551 |
|
552 |
@ Put final sign in xh. |
553 |
mov xh, r4, lsl #16 |
554 |
bic r4, r4, #0x8000 |
555 |
|
556 |
@ Adjust result if one extra MSB appeared (one of four times). |
557 |
tst r6, #(1 << 9) |
558 |
beq 1f |
559 |
add r4, r4, #(1 << 19) |
560 |
movs r6, r6, lsr #1 |
561 |
movs r5, r5, rrx |
562 |
movs lr, lr, rrx |
563 |
orrcs lr, lr, #1 |
564 |
1: |
565 |
@ Scale back to 53 bits. |
566 |
@ xh contains sign bit already. |
567 |
orr xh, xh, r6, lsl #12 |
568 |
orr xh, xh, r5, lsr #20 |
569 |
mov xl, r5, lsl #12 |
570 |
orr xl, xl, lr, lsr #20 |
571 |
|
572 |
@ Apply exponent bias, check range for underflow. |
573 |
sub r4, r4, #0x00f80000 |
574 |
subs r4, r4, #0x1f000000 |
575 |
ble LSYM(Lml_u) |
576 |
|
577 |
@ Round the result. |
578 |
movs lr, lr, lsl #12 |
579 |
bpl 1f |
580 |
adds xl, xl, #1 |
581 |
adc xh, xh, #0 |
582 |
teq lr, #0x80000000 |
583 |
biceq xl, xl, #1 |
584 |
|
585 |
@ Rounding may have produced an extra MSB here. |
586 |
@ The extra bit is cleared before merging the exponent below. |
587 |
tst xh, #0x00200000 |
588 |
addne r4, r4, #(1 << 19) |
589 |
1: |
590 |
@ Check exponent for overflow. |
591 |
adds ip, r4, #(1 << 19) |
592 |
tst ip, #(1 << 30) |
593 |
bne LSYM(Lml_o) |
594 |
|
595 |
@ Add final exponent. |
596 |
bic xh, xh, #0x00300000 |
597 |
orr xh, xh, r4, lsl #1 |
598 |
RETLDM "r4, r5, r6" |
599 |
|
600 |
@ Result is 0, but determine sign anyway. |
601 |
LSYM(Lml_z): |
602 |
eor xh, xh, yh |
603 |
LSYM(Ldv_z): |
604 |
bic xh, xh, #0x7fffffff |
605 |
mov xl, #0 |
606 |
RETLDM "r4, r5, r6" |
607 |
|
608 |
@ Check if denormalized result is possible, otherwise return signed 0. |
609 |
LSYM(Lml_u): |
610 |
cmn r4, #(53 << 19) |
611 |
movle xl, #0 |
612 |
bicle xh, xh, #0x7fffffff |
613 |
RETLDM "r4, r5, r6" le |
614 |
|
615 |
@ Find out proper shift value. |
616 |
LSYM(Lml_r): |
617 |
mvn r4, r4, asr #19 |
618 |
subs r4, r4, #30 |
619 |
bge 2f |
620 |
adds r4, r4, #12 |
621 |
bgt 1f |
622 |
|
623 |
@ shift result right of 1 to 20 bits, preserve sign bit, round, etc. |
624 |
add r4, r4, #20 |
625 |
rsb r5, r4, #32 |
626 |
mov r3, xl, lsl r5 |
627 |
mov xl, xl, lsr r4 |
628 |
orr xl, xl, xh, lsl r5 |
629 |
movs xh, xh, lsl #1 |
630 |
mov xh, xh, lsr r4 |
631 |
mov xh, xh, rrx |
632 |
adds xl, xl, r3, lsr #31 |
633 |
adc xh, xh, #0 |
634 |
teq lr, #0 |
635 |
teqeq r3, #0x80000000 |
636 |
biceq xl, xl, #1 |
637 |
RETLDM "r4, r5, r6" |
638 |
|
639 |
@ shift result right of 21 to 31 bits, or left 11 to 1 bits after |
640 |
@ a register switch from xh to xl. Then round. |
641 |
1: rsb r4, r4, #12 |
642 |
rsb r5, r4, #32 |
643 |
mov r3, xl, lsl r4 |
644 |
mov xl, xl, lsr r5 |
645 |
orr xl, xl, xh, lsl r4 |
646 |
bic xh, xh, #0x7fffffff |
647 |
adds xl, xl, r3, lsr #31 |
648 |
adc xh, xh, #0 |
649 |
teq lr, #0 |
650 |
teqeq r3, #0x80000000 |
651 |
biceq xl, xl, #1 |
652 |
RETLDM "r4, r5, r6" |
653 |
|
654 |
@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch |
655 |
@ from xh to xl. Leftover bits are in r3-r6-lr for rounding. |
656 |
2: rsb r5, r4, #32 |
657 |
mov r6, xl, lsl r5 |
658 |
mov r3, xl, lsr r4 |
659 |
orr r3, r3, xh, lsl r5 |
660 |
mov xl, xh, lsr r4 |
661 |
bic xh, xh, #0x7fffffff |
662 |
adds xl, xl, r3, lsr #31 |
663 |
adc xh, xh, #0 |
664 |
orrs r6, r6, lr |
665 |
teqeq r3, #0x80000000 |
666 |
biceq xl, xl, #1 |
667 |
RETLDM "r4, r5, r6" |
668 |
|
669 |
@ One or both arguments are denormalized. |
670 |
@ Scale them leftwards and preserve sign bit. |
671 |
LSYM(Lml_d): |
672 |
mov lr, #0 |
673 |
teq r4, #0 |
674 |
bne 2f |
675 |
and r6, xh, #0x80000000 |
676 |
1: movs xl, xl, lsl #1 |
677 |
adc xh, lr, xh, lsl #1 |
678 |
tst xh, #0x00100000 |
679 |
subeq r4, r4, #(1 << 19) |
680 |
beq 1b |
681 |
orr xh, xh, r6 |
682 |
teq r5, #0 |
683 |
bne LSYM(Lml_x) |
684 |
2: and r6, yh, #0x80000000 |
685 |
3: movs yl, yl, lsl #1 |
686 |
adc yh, lr, yh, lsl #1 |
687 |
tst yh, #0x00100000 |
688 |
subeq r5, r5, #(1 << 20) |
689 |
beq 3b |
690 |
orr yh, yh, r6 |
691 |
b LSYM(Lml_x) |
692 |
|
693 |
@ One or both args are INF or NAN. |
694 |
LSYM(Lml_s): |
695 |
orrs r6, xl, xh, lsl #1 |
696 |
orrnes r6, yl, yh, lsl #1 |
697 |
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN |
698 |
teq r4, ip |
699 |
bne 1f |
700 |
orrs r6, xl, xh, lsl #12 |
701 |
bne LSYM(Lml_n) @ NAN * <anything> -> NAN |
702 |
1: teq r5, ip |
703 |
bne LSYM(Lml_i) |
704 |
orrs r6, yl, yh, lsl #12 |
705 |
bne LSYM(Lml_n) @ <anything> * NAN -> NAN |
706 |
|
707 |
@ Result is INF, but we need to determine its sign. |
708 |
LSYM(Lml_i): |
709 |
eor xh, xh, yh |
710 |
|
711 |
@ Overflow: return INF (sign already in xh). |
712 |
LSYM(Lml_o): |
713 |
and xh, xh, #0x80000000 |
714 |
orr xh, xh, #0x7f000000 |
715 |
orr xh, xh, #0x00f00000 |
716 |
mov xl, #0 |
717 |
RETLDM "r4, r5, r6" |
718 |
|
719 |
@ Return NAN. |
720 |
LSYM(Lml_n): |
721 |
mov xh, #0x7f000000 |
722 |
orr xh, xh, #0x00f80000 |
723 |
RETLDM "r4, r5, r6" |
724 |
|
725 |
FUNC_END muldf3 |
726 |
|
727 |
ARM_FUNC_START divdf3 |
728 |
|
729 |
stmfd sp!, {r4, r5, r6, lr} |
730 |
|
731 |
@ Mask out exponents. |
732 |
mov ip, #0x7f000000 |
733 |
orr ip, ip, #0x00f00000 |
734 |
and r4, xh, ip |
735 |
and r5, yh, ip |
736 |
|
737 |
@ Trap any INF/NAN or zeroes. |
738 |
teq r4, ip |
739 |
teqne r5, ip |
740 |
orrnes r6, xl, xh, lsl #1 |
741 |
orrnes r6, yl, yh, lsl #1 |
742 |
beq LSYM(Ldv_s) |
743 |
|
744 |
@ Shift exponents right one bit to make room for overflow bit. |
745 |
@ If either of them is 0, scale denormalized arguments off line. |
746 |
@ Then substract divisor exponent from dividend''s. |
747 |
movs r4, r4, lsr #1 |
748 |
teqne r5, #0 |
749 |
beq LSYM(Ldv_d) |
750 |
LSYM(Ldv_x): |
751 |
sub r4, r4, r5, asr #1 |
752 |
|
753 |
@ Preserve final sign into lr. |
754 |
eor lr, xh, yh |
755 |
|
756 |
@ Convert mantissa to unsigned integer. |
757 |
@ Dividend -> r5-r6, divisor -> yh-yl. |
758 |
mov r5, #0x10000000 |
759 |
mov yh, yh, lsl #12 |
760 |
orr yh, r5, yh, lsr #4 |
761 |
orr yh, yh, yl, lsr #24 |
762 |
movs yl, yl, lsl #8 |
763 |
mov xh, xh, lsl #12 |
764 |
teqeq yh, r5 |
765 |
beq LSYM(Ldv_1) |
766 |
orr r5, r5, xh, lsr #4 |
767 |
orr r5, r5, xl, lsr #24 |
768 |
mov r6, xl, lsl #8 |
769 |
|
770 |
@ Initialize xh with final sign bit. |
771 |
and xh, lr, #0x80000000 |
772 |
|
773 |
@ Ensure result will land to known bit position. |
774 |
cmp r5, yh |
775 |
cmpeq r6, yl |
776 |
bcs 1f |
777 |
sub r4, r4, #(1 << 19) |
778 |
movs yh, yh, lsr #1 |
779 |
mov yl, yl, rrx |
780 |
1: |
781 |
@ Apply exponent bias, check range for over/underflow. |
782 |
add r4, r4, #0x1f000000 |
783 |
add r4, r4, #0x00f80000 |
784 |
cmn r4, #(53 << 19) |
785 |
ble LSYM(Ldv_z) |
786 |
cmp r4, ip, lsr #1 |
787 |
bge LSYM(Lml_o) |
788 |
|
789 |
@ Perform first substraction to align result to a nibble. |
790 |
subs r6, r6, yl |
791 |
sbc r5, r5, yh |
792 |
movs yh, yh, lsr #1 |
793 |
mov yl, yl, rrx |
794 |
mov xl, #0x00100000 |
795 |
mov ip, #0x00080000 |
796 |
|
797 |
@ The actual division loop. |
798 |
1: subs lr, r6, yl |
799 |
sbcs lr, r5, yh |
800 |
subcs r6, r6, yl |
801 |
movcs r5, lr |
802 |
orrcs xl, xl, ip |
803 |
movs yh, yh, lsr #1 |
804 |
mov yl, yl, rrx |
805 |
subs lr, r6, yl |
806 |
sbcs lr, r5, yh |
807 |
subcs r6, r6, yl |
808 |
movcs r5, lr |
809 |
orrcs xl, xl, ip, lsr #1 |
810 |
movs yh, yh, lsr #1 |
811 |
mov yl, yl, rrx |
812 |
subs lr, r6, yl |
813 |
sbcs lr, r5, yh |
814 |
subcs r6, r6, yl |
815 |
movcs r5, lr |
816 |
orrcs xl, xl, ip, lsr #2 |
817 |
movs yh, yh, lsr #1 |
818 |
mov yl, yl, rrx |
819 |
subs lr, r6, yl |
820 |
sbcs lr, r5, yh |
821 |
subcs r6, r6, yl |
822 |
movcs r5, lr |
823 |
orrcs xl, xl, ip, lsr #3 |
824 |
|
825 |
orrs lr, r5, r6 |
826 |
beq 2f |
827 |
mov r5, r5, lsl #4 |
828 |
orr r5, r5, r6, lsr #28 |
829 |
mov r6, r6, lsl #4 |
830 |
mov yh, yh, lsl #3 |
831 |
orr yh, yh, yl, lsr #29 |
832 |
mov yl, yl, lsl #3 |
833 |
movs ip, ip, lsr #4 |
834 |
bne 1b |
835 |
|
836 |
@ We are done with a word of the result. |
837 |
@ Loop again for the low word if this pass was for the high word. |
838 |
tst xh, #0x00100000 |
839 |
bne 3f |
840 |
orr xh, xh, xl |
841 |
mov xl, #0 |
842 |
mov ip, #0x80000000 |
843 |
b 1b |
844 |
2: |
845 |
@ Be sure result starts in the high word. |
846 |
tst xh, #0x00100000 |
847 |
orreq xh, xh, xl |
848 |
moveq xl, #0 |
849 |
3: |
850 |
@ Check if denormalized result is needed. |
851 |
cmp r4, #0 |
852 |
ble LSYM(Ldv_u) |
853 |
|
854 |
@ Apply proper rounding. |
855 |
subs ip, r5, yh |
856 |
subeqs ip, r6, yl |
857 |
adcs xl, xl, #0 |
858 |
adc xh, xh, #0 |
859 |
teq ip, #0 |
860 |
biceq xl, xl, #1 |
861 |
|
862 |
@ Add exponent to result. |
863 |
bic xh, xh, #0x00100000 |
864 |
orr xh, xh, r4, lsl #1 |
865 |
RETLDM "r4, r5, r6" |
866 |
|
867 |
@ Division by 0x1p*: shortcut a lot of code. |
868 |
LSYM(Ldv_1): |
869 |
and lr, lr, #0x80000000 |
870 |
orr xh, lr, xh, lsr #12 |
871 |
add r4, r4, #0x1f000000 |
872 |
add r4, r4, #0x00f80000 |
873 |
cmp r4, ip, lsr #1 |
874 |
bge LSYM(Lml_o) |
875 |
cmp r4, #0 |
876 |
orrgt xh, xh, r4, lsl #1 |
877 |
RETLDM "r4, r5, r6" gt |
878 |
|
879 |
cmn r4, #(53 << 19) |
880 |
ble LSYM(Ldv_z) |
881 |
orr xh, xh, #0x00100000 |
882 |
mov lr, #0 |
883 |
b LSYM(Lml_r) |
884 |
|
885 |
@ Result must be denormalized: put remainder in lr for |
886 |
@ rounding considerations. |
887 |
LSYM(Ldv_u): |
888 |
orr lr, r5, r6 |
889 |
b LSYM(Lml_r) |
890 |
|
891 |
@ One or both arguments are denormalized. |
892 |
@ Scale them leftwards and preserve sign bit. |
893 |
LSYM(Ldv_d): |
894 |
mov lr, #0 |
895 |
teq r4, #0 |
896 |
bne 2f |
897 |
and r6, xh, #0x80000000 |
898 |
1: movs xl, xl, lsl #1 |
899 |
adc xh, lr, xh, lsl #1 |
900 |
tst xh, #0x00100000 |
901 |
subeq r4, r4, #(1 << 19) |
902 |
beq 1b |
903 |
orr xh, xh, r6 |
904 |
teq r5, #0 |
905 |
bne LSYM(Ldv_x) |
906 |
2: and r6, yh, #0x80000000 |
907 |
3: movs yl, yl, lsl #1 |
908 |
adc yh, lr, yh, lsl #1 |
909 |
tst yh, #0x00100000 |
910 |
subeq r5, r5, #(1 << 20) |
911 |
beq 3b |
912 |
orr yh, yh, r6 |
913 |
b LSYM(Ldv_x) |
914 |
|
915 |
@ One or both arguments is either INF, NAN or zero. |
916 |
LSYM(Ldv_s): |
917 |
teq r4, ip |
918 |
teqeq r5, ip |
919 |
beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN |
920 |
teq r4, ip |
921 |
bne 1f |
922 |
orrs r4, xl, xh, lsl #12 |
923 |
bne LSYM(Lml_n) @ NAN / <anything> -> NAN |
924 |
b LSYM(Lml_i) @ INF / <anything> -> INF |
925 |
1: teq r5, ip |
926 |
bne 2f |
927 |
orrs r5, yl, yh, lsl #12 |
928 |
bne LSYM(Lml_n) @ <anything> / NAN -> NAN |
929 |
b LSYM(Lml_z) @ <anything> / INF -> 0 |
930 |
2: @ One or both arguments are 0. |
931 |
orrs r4, xl, xh, lsl #1 |
932 |
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF |
933 |
orrs r5, yl, yh, lsl #1 |
934 |
bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 |
935 |
b LSYM(Lml_n) @ 0 / 0 -> NAN |
936 |
|
937 |
FUNC_END divdf3 |
938 |
|
939 |
#endif /* L_muldivdf3 */ |
940 |
|
941 |
#ifdef L_cmpdf2 |
942 |
|
943 |
FUNC_START gedf2 |
944 |
ARM_FUNC_START gtdf2 |
945 |
mov ip, #-1 |
946 |
b 1f |
947 |
|
948 |
FUNC_START ledf2 |
949 |
ARM_FUNC_START ltdf2 |
950 |
mov ip, #1 |
951 |
b 1f |
952 |
|
953 |
FUNC_START nedf2 |
954 |
FUNC_START eqdf2 |
955 |
ARM_FUNC_START cmpdf2 |
956 |
mov ip, #1 @ how should we specify unordered here? |
957 |
|
958 |
1: stmfd sp!, {r4, r5, lr} |
959 |
|
960 |
@ Trap any INF/NAN first. |
961 |
mov lr, #0x7f000000 |
962 |
orr lr, lr, #0x00f00000 |
963 |
and r4, xh, lr |
964 |
and r5, yh, lr |
965 |
teq r4, lr |
966 |
teqne r5, lr |
967 |
beq 3f |
968 |
|
969 |
@ Test for equality. |
970 |
@ Note that 0.0 is equal to -0.0. |
971 |
2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 |
972 |
orreqs ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 |
973 |
teqne xh, yh @ or xh == yh |
974 |
teqeq xl, yl @ and xl == yl |
975 |
moveq r0, #0 @ then equal. |
976 |
RETLDM "r4, r5" eq |
977 |
|
978 |
@ Check for sign difference. |
979 |
teq xh, yh |
980 |
movmi r0, xh, asr #31 |
981 |
orrmi r0, r0, #1 |
982 |
RETLDM "r4, r5" mi |
983 |
|
984 |
@ Compare exponents. |
985 |
cmp r4, r5 |
986 |
|
987 |
@ Compare mantissa if exponents are equal. |
988 |
moveq xh, xh, lsl #12 |
989 |
cmpeq xh, yh, lsl #12 |
990 |
cmpeq xl, yl |
991 |
movcs r0, yh, asr #31 |
992 |
mvncc r0, yh, asr #31 |
993 |
orr r0, r0, #1 |
994 |
RETLDM "r4, r5" |
995 |
|
996 |
@ Look for a NAN. |
997 |
3: teq r4, lr |
998 |
bne 4f |
999 |
orrs xl, xl, xh, lsl #12 |
1000 |
bne 5f @ x is NAN |
1001 |
4: teq r5, lr |
1002 |
bne 2b |
1003 |
orrs yl, yl, yh, lsl #12 |
1004 |
beq 2b @ y is not NAN |
1005 |
5: mov r0, ip @ return unordered code from ip |
1006 |
RETLDM "r4, r5" |
1007 |
|
1008 |
FUNC_END gedf2 |
1009 |
FUNC_END gtdf2 |
1010 |
FUNC_END ledf2 |
1011 |
FUNC_END ltdf2 |
1012 |
FUNC_END nedf2 |
1013 |
FUNC_END eqdf2 |
1014 |
FUNC_END cmpdf2 |
1015 |
|
1016 |
#endif /* L_cmpdf2 */ |
1017 |
|
1018 |
#ifdef L_unorddf2 |
1019 |
|
1020 |
ARM_FUNC_START unorddf2 |
1021 |
str lr, [sp, #-4]! |
1022 |
mov ip, #0x7f000000 |
1023 |
orr ip, ip, #0x00f00000 |
1024 |
and lr, xh, ip |
1025 |
teq lr, ip |
1026 |
bne 1f |
1027 |
orrs xl, xl, xh, lsl #12 |
1028 |
bne 3f @ x is NAN |
1029 |
1: and lr, yh, ip |
1030 |
teq lr, ip |
1031 |
bne 2f |
1032 |
orrs yl, yl, yh, lsl #12 |
1033 |
bne 3f @ y is NAN |
1034 |
2: mov r0, #0 @ arguments are ordered. |
1035 |
RETLDM |
1036 |
|
1037 |
3: mov r0, #1 @ arguments are unordered. |
1038 |
RETLDM |
1039 |
|
1040 |
FUNC_END unorddf2 |
1041 |
|
1042 |
#endif /* L_unorddf2 */ |
1043 |
|
1044 |
#ifdef L_fixdfsi |
1045 |
|
1046 |
ARM_FUNC_START fixdfsi |
1047 |
orrs ip, xl, xh, lsl #1 |
1048 |
beq 1f @ value is 0. |
1049 |
|
1050 |
mov r3, r3, rrx @ preserve C flag (the actual sign) |
1051 |
|
1052 |
@ check exponent range. |
1053 |
mov ip, #0x7f000000 |
1054 |
orr ip, ip, #0x00f00000 |
1055 |
and r2, xh, ip |
1056 |
teq r2, ip |
1057 |
beq 2f @ value is INF or NAN |
1058 |
bic ip, ip, #0x40000000 |
1059 |
cmp r2, ip |
1060 |
bcc 1f @ value is too small |
1061 |
add ip, ip, #(31 << 20) |
1062 |
cmp r2, ip |
1063 |
bcs 3f @ value is too large |
1064 |
|
1065 |
rsb r2, r2, ip |
1066 |
mov ip, xh, lsl #11 |
1067 |
orr ip, ip, #0x80000000 |
1068 |
orr ip, ip, xl, lsr #21 |
1069 |
mov r2, r2, lsr #20 |
1070 |
tst r3, #0x80000000 @ the sign bit |
1071 |
mov r0, ip, lsr r2 |
1072 |
rsbne r0, r0, #0 |
1073 |
RET |
1074 |
|
1075 |
1: mov r0, #0 |
1076 |
RET |
1077 |
|
1078 |
2: orrs xl, xl, xh, lsl #12 |
1079 |
bne 4f @ r0 is NAN. |
1080 |
3: ands r0, r3, #0x80000000 @ the sign bit |
1081 |
moveq r0, #0x7fffffff @ maximum signed positive si |
1082 |
RET |
1083 |
|
1084 |
4: mov r0, #0 @ How should we convert NAN? |
1085 |
RET |
1086 |
|
1087 |
FUNC_END fixdfsi |
1088 |
|
1089 |
#endif /* L_fixdfsi */ |
1090 |
|
1091 |
#ifdef L_fixunsdfsi |
1092 |
|
1093 |
ARM_FUNC_START fixunsdfsi |
1094 |
orrs ip, xl, xh, lsl #1 |
1095 |
movcss r0, #0 @ value is negative |
1096 |
RETc(eq) @ or 0 (xl, xh overlap r0) |
1097 |
|
1098 |
@ check exponent range. |
1099 |
mov ip, #0x7f000000 |
1100 |
orr ip, ip, #0x00f00000 |
1101 |
and r2, xh, ip |
1102 |
teq r2, ip |
1103 |
beq 2f @ value is INF or NAN |
1104 |
bic ip, ip, #0x40000000 |
1105 |
cmp r2, ip |
1106 |
bcc 1f @ value is too small |
1107 |
add ip, ip, #(31 << 20) |
1108 |
cmp r2, ip |
1109 |
bhi 3f @ value is too large |
1110 |
|
1111 |
rsb r2, r2, ip |
1112 |
mov ip, xh, lsl #11 |
1113 |
orr ip, ip, #0x80000000 |
1114 |
orr ip, ip, xl, lsr #21 |
1115 |
mov r2, r2, lsr #20 |
1116 |
mov r0, ip, lsr r2 |
1117 |
RET |
1118 |
|
1119 |
1: mov r0, #0 |
1120 |
RET |
1121 |
|
1122 |
2: orrs xl, xl, xh, lsl #12 |
1123 |
bne 4f @ value is NAN. |
1124 |
3: mov r0, #0xffffffff @ maximum unsigned si |
1125 |
RET |
1126 |
|
1127 |
4: mov r0, #0 @ How should we convert NAN? |
1128 |
RET |
1129 |
|
1130 |
FUNC_END fixunsdfsi |
1131 |
|
1132 |
#endif /* L_fixunsdfsi */ |
1133 |
|
1134 |
#ifdef L_truncdfsf2 |
1135 |
|
1136 |
ARM_FUNC_START truncdfsf2 |
1137 |
orrs r2, xl, xh, lsl #1 |
1138 |
moveq r0, r2, rrx |
1139 |
RETc(eq) @ value is 0.0 or -0.0 |
1140 |
|
1141 |
@ check exponent range. |
1142 |
mov ip, #0x7f000000 |
1143 |
orr ip, ip, #0x00f00000 |
1144 |
and r2, ip, xh |
1145 |
teq r2, ip |
1146 |
beq 2f @ value is INF or NAN |
1147 |
bic xh, xh, ip |
1148 |
cmp r2, #(0x380 << 20) |
1149 |
bls 4f @ value is too small |
1150 |
|
1151 |
@ shift and round mantissa |
1152 |
1: movs r3, xl, lsr #29 |
1153 |
adc r3, r3, xh, lsl #3 |
1154 |
|
1155 |
@ if halfway between two numbers, round towards LSB = 0. |
1156 |
mov xl, xl, lsl #3 |
1157 |
teq xl, #0x80000000 |
1158 |
biceq r3, r3, #1 |
1159 |
|
1160 |
@ rounding might have created an extra MSB. If so adjust exponent. |
1161 |
tst r3, #0x00800000 |
1162 |
addne r2, r2, #(1 << 20) |
1163 |
bicne r3, r3, #0x00800000 |
1164 |
|
1165 |
@ check exponent for overflow |
1166 |
mov ip, #(0x400 << 20) |
1167 |
orr ip, ip, #(0x07f << 20) |
1168 |
cmp r2, ip |
1169 |
bcs 3f @ overflow |
1170 |
|
1171 |
@ adjust exponent, merge with sign bit and mantissa. |
1172 |
movs xh, xh, lsl #1 |
1173 |
mov r2, r2, lsl #4 |
1174 |
orr r0, r3, r2, rrx |
1175 |
eor r0, r0, #0x40000000 |
1176 |
RET |
1177 |
|
1178 |
2: @ chech for NAN |
1179 |
orrs xl, xl, xh, lsl #12 |
1180 |
movne r0, #0x7f000000 |
1181 |
orrne r0, r0, #0x00c00000 |
1182 |
RETc(ne) @ return NAN |
1183 |
|
1184 |
3: @ return INF with sign |
1185 |
and r0, xh, #0x80000000 |
1186 |
orr r0, r0, #0x7f000000 |
1187 |
orr r0, r0, #0x00800000 |
1188 |
RET |
1189 |
|
1190 |
4: @ check if denormalized value is possible |
1191 |
subs r2, r2, #((0x380 - 24) << 20) |
1192 |
andle r0, xh, #0x80000000 @ too small, return signed 0. |
1193 |
RETc(le) |
1194 |
|
1195 |
@ denormalize value so we can resume with the code above afterwards. |
1196 |
orr xh, xh, #0x00100000 |
1197 |
mov r2, r2, lsr #20 |
1198 |
rsb r2, r2, #25 |
1199 |
cmp r2, #20 |
1200 |
bgt 6f |
1201 |
|
1202 |
rsb ip, r2, #32 |
1203 |
mov r3, xl, lsl ip |
1204 |
mov xl, xl, lsr r2 |
1205 |
orr xl, xl, xh, lsl ip |
1206 |
movs xh, xh, lsl #1 |
1207 |
mov xh, xh, lsr r2 |
1208 |
mov xh, xh, rrx |
1209 |
5: teq r3, #0 @ fold r3 bits into the LSB |
1210 |
orrne xl, xl, #1 @ for rounding considerations. |
1211 |
mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent |
1212 |
b 1b |
1213 |
|
1214 |
6: rsb r2, r2, #(12 + 20) |
1215 |
rsb ip, r2, #32 |
1216 |
mov r3, xl, lsl r2 |
1217 |
mov xl, xl, lsr ip |
1218 |
orr xl, xl, xh, lsl r2 |
1219 |
and xh, xh, #0x80000000 |
1220 |
b 5b |
1221 |
|
1222 |
FUNC_END truncdfsf2 |
1223 |
|
1224 |
#endif /* L_truncdfsf2 */ |