Attachment #46814 for bug #75585

Lines 32-42 Link Here

(-)gcc-3.3.3-orig/gcc/config/arm/coff.h (-2 / +6 lines)
32	#define TARGET_VERSION fputs (" (ARM/coff)", stderr)	32	#define TARGET_VERSION fputs (" (ARM/coff)", stderr)
33		33
34	#undef TARGET_DEFAULT	34	#undef TARGET_DEFAULT
35	#define TARGET_DEFAULT (ARM_FLAG_SOFT_FLOAT \| ARM_FLAG_APCS_32 \| ARM_FLAG_APCS_FRAME)	35	#define TARGET_DEFAULT \
		36	( ARM_FLAG_SOFT_FLOAT \
		37	\| ARM_FLAG_VFP \
		38	\| ARM_FLAG_APCS_32 \
		39	\| ARM_FLAG_APCS_FRAME )
36		40
37	#ifndef MULTILIB_DEFAULTS	41	#ifndef MULTILIB_DEFAULTS
38	#define MULTILIB_DEFAULTS \	42	#define MULTILIB_DEFAULTS \
39	{ "marm", "mlittle-endian", "msoft-float", "mapcs-32", "mno-thumb-interwork" }	43	{ "marm", "mlittle-endian", "mapcs-32", "mno-thumb-interwork" }
40	#endif	44	#endif
41		45
42	/* This is COFF, but prefer stabs. */	46	/* This is COFF, but prefer stabs. */

Lines 29-35 Link Here

(-)gcc-3.3.3-orig/gcc/config/arm/conix-elf.h (-1 / +4 lines)
29		29
30	/* Default to using APCS-32 and software floating point. */	30	/* Default to using APCS-32 and software floating point. */
31	#undef TARGET_DEFAULT	31	#undef TARGET_DEFAULT
32	#define TARGET_DEFAULT (ARM_FLAG_SOFT_FLOAT \| ARM_FLAG_APCS_32)	32	#define TARGET_DEFAULT \
		33	( ARM_FLAG_SOFT_FLOAT \
		34	\| ARM_FLAG_VFP \
		35	\| ARM_FLAG_APCS_32 )
33		36
34	#ifndef CPP_APCS_PC_DEFAULT_SPEC	37	#ifndef CPP_APCS_PC_DEFAULT_SPEC
35	#define CPP_APCS_PC_DEFAULT_SPEC "-D__APCS_32__"	38	#define CPP_APCS_PC_DEFAULT_SPEC "-D__APCS_32__"

Lines 46-52 Link Here

(-)gcc-3.3.3-orig/gcc/config/arm/elf.h (-3 / +9 lines)
46		46
47	#ifndef SUBTARGET_ASM_FLOAT_SPEC	47	#ifndef SUBTARGET_ASM_FLOAT_SPEC
48	#define SUBTARGET_ASM_FLOAT_SPEC "\	48	#define SUBTARGET_ASM_FLOAT_SPEC "\
49	%{mapcs-float:-mfloat} %{msoft-float:-mno-fpu}"	49	%{mapcs-float:-mfloat} \
		50	%{mhard-float:-mfpu=fpa} \
		51	%{!mhard-float: %{msoft-float:-mfpu=softvfp} %{!msoft-float:-mfpu=softvfp}}"
50	#endif	52	#endif
51		53
52	#ifndef ASM_SPEC	54	#ifndef ASM_SPEC
Lines 106-117 Link Here
106	#endif	108	#endif
107		109
108	#ifndef TARGET_DEFAULT	110	#ifndef TARGET_DEFAULT
109	#define TARGET_DEFAULT (ARM_FLAG_SOFT_FLOAT \| ARM_FLAG_APCS_32 \| ARM_FLAG_APCS_FRAME)	111	#define TARGET_DEFAULT \
		112	( ARM_FLAG_SOFT_FLOAT \
		113	\| ARM_FLAG_VFP \
		114	\| ARM_FLAG_APCS_32 \
		115	\| ARM_FLAG_APCS_FRAME )
110	#endif	116	#endif
111		117
112	#ifndef MULTILIB_DEFAULTS	118	#ifndef MULTILIB_DEFAULTS
113	#define MULTILIB_DEFAULTS \	119	#define MULTILIB_DEFAULTS \
114	{ "marm", "mlittle-endian", "msoft-float", "mapcs-32", "mno-thumb-interwork", "fno-leading-underscore" }	120	{ "marm", "mlittle-endian", "mapcs-32", "mno-thumb-interwork", "fno-leading-underscore" }
115	#endif	121	#endif
116		122
117		123




/* ieee754-df.S double-precision floating point support for ARM

   Copyright (C) 2003  Free Software Foundation, Inc.
   Contributed by Nicolas Pitre (nico@cam.org)

   This file is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 2, or (at your option) any
   later version.

   In addition to the permissions in the GNU General Public License, the
   Free Software Foundation gives you unlimited permission to link the
   compiled version of this file into combinations with other programs,
   and to distribute those combinations without any restriction coming
   from the use of this file.  (The General Public License restrictions
   do apply in other respects; for example, they cover modification of
   the file, and distribution when not linked into a combine
   executable.)

   This file is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; see the file COPYING.  If not, write to
   the Free Software Foundation, 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */

/*
 * Notes: 
 * 
 * The goal of this code is to be as fast as possible.  This is
 * not meant to be easy to understand for the casual reader.
 * For slightly simpler code please see the single precision version
 * of this file.
 * 
 * Only the default rounding mode is intended for best performances.
 * Exceptions aren't supported yet, but that can be added quite easily
 * if necessary without impacting performances.
 */


@ For FPA, float words are always big-endian.
@ For VFP, floats words follow the memory system mode.
#if defined(__VFP_FP__) && !defined(__ARMEB__)
#define xl r0
#define xh r1
#define yl r2
#define yh r3
#else
#define xh r0
#define xl r1
#define yh r2
#define yl r3
#endif


#ifdef L_negdf2

ARM_FUNC_START negdf2
	@ flip sign bit
	eor	xh, xh, #0x80000000
	RET

	FUNC_END negdf2

#endif

#ifdef L_addsubdf3

ARM_FUNC_START subdf3
	@ flip sign bit of second arg
	eor	yh, yh, #0x80000000
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
	b	1f			@ Skip Thumb-code prologue
#endif

ARM_FUNC_START adddf3

1:	@ Compare both args, return zero if equal but the sign.
	teq	xl, yl
	eoreq	ip, xh, yh
	teqeq	ip, #0x80000000
	beq	LSYM(Lad_z)

	@ If first arg is 0 or -0, return second arg.
	@ If second arg is 0 or -0, return first arg.
	orrs	ip, xl, xh, lsl #1
	moveq	xl, yl
	moveq	xh, yh
	orrnes	ip, yl, yh, lsl #1
	RETc(eq)

	stmfd	sp!, {r4, r5, lr}

	@ Mask out exponents.
	mov	ip, #0x7f000000
	orr	ip, ip, #0x00f00000
	and	r4, xh, ip
	and	r5, yh, ip

	@ If either of them is 0x7ff, result will be INF or NAN
	teq	r4, ip
	teqne	r5, ip
	beq	LSYM(Lad_i)

	@ Compute exponent difference.  Make largest exponent in r4,
	@ corresponding arg in xh-xl, and positive exponent difference in r5.
	subs	r5, r5, r4
	rsblt	r5, r5, #0
	ble	1f
	add	r4, r4, r5
	eor	yl, xl, yl
	eor	yh, xh, yh
	eor	xl, yl, xl
	eor	xh, yh, xh
	eor	yl, xl, yl
	eor	yh, xh, yh
1:

	@ If exponent difference is too large, return largest argument
	@ already in xh-xl.  We need up to 54 bit to handle proper rounding
	@ of 0x1p54 - 1.1.
	cmp	r5, #(54 << 20)
	RETLDM	"r4, r5" hi

	@ Convert mantissa to signed integer.
	tst	xh, #0x80000000
	bic	xh, xh, ip, lsl #1
	orr	xh, xh, #0x00100000
	beq	1f
	rsbs	xl, xl, #0
	rsc	xh, xh, #0
1:
	tst	yh, #0x80000000
	bic	yh, yh, ip, lsl #1
	orr	yh, yh, #0x00100000
	beq	1f
	rsbs	yl, yl, #0
	rsc	yh, yh, #0
1:
	@ If exponent == difference, one or both args were denormalized.
	@ Since this is not common case, rescale them off line.
	teq	r4, r5
	beq	LSYM(Lad_d)
LSYM(Lad_x):
	@ Scale down second arg with exponent difference.
	@ Apply shift one bit left to first arg and the rest to second arg
	@ to simplify things later, but only if exponent does not become 0.
	mov	ip, #0
	movs	r5, r5, lsr #20
	beq	3f
	teq	r4, #(1 << 20)
	beq	1f
	movs	xl, xl, lsl #1
	adc	xh, ip, xh, lsl #1
	sub	r4, r4, #(1 << 20)
	subs	r5, r5, #1
	beq	3f

	@ Shift yh-yl right per r5, keep leftover bits into ip.
1:	rsbs	lr, r5, #32
	blt	2f
	mov	ip, yl, lsl lr
	mov	yl, yl, lsr r5
	orr	yl, yl, yh, lsl lr
	mov	yh, yh, asr r5
	b	3f
2:	sub	r5, r5, #32
	add	lr, lr, #32
	cmp	yl, #1
	adc	ip, ip, yh, lsl lr
	mov	yl, yh, asr r5
	mov	yh, yh, asr #32
3:
	@ the actual addition
	adds	xl, xl, yl
	adc	xh, xh, yh

	@ We now have a result in xh-xl-ip.
	@ Keep absolute value in xh-xl-ip, sign in r5.
	ands	r5, xh, #0x80000000
	bpl	LSYM(Lad_p)
	rsbs	ip, ip, #0
	rscs	xl, xl, #0
	rsc	xh, xh, #0

	@ Determine how to normalize the result.
LSYM(Lad_p):
	cmp	xh, #0x00100000
	bcc	LSYM(Lad_l)
	cmp	xh, #0x00200000
	bcc	LSYM(Lad_r0)
	cmp	xh, #0x00400000
	bcc	LSYM(Lad_r1)

	@ Result needs to be shifted right.
	movs	xh, xh, lsr #1
	movs	xl, xl, rrx
	movs	ip, ip, rrx
	orrcs	ip, ip, #1
	add	r4, r4, #(1 << 20)
LSYM(Lad_r1):
	movs	xh, xh, lsr #1
	movs	xl, xl, rrx
	movs	ip, ip, rrx
	orrcs	ip, ip, #1
	add	r4, r4, #(1 << 20)

	@ Our result is now properly aligned into xh-xl, remaining bits in ip.
	@ Round with MSB of ip. If halfway between two numbers, round towards
	@ LSB of xl = 0.
LSYM(Lad_r0):
	adds	xl, xl, ip, lsr #31
	adc	xh, xh, #0
	teq	ip, #0x80000000
	biceq	xl, xl, #1

	@ One extreme rounding case may add a new MSB.  Adjust exponent.
	@ That MSB will be cleared when exponent is merged below. 
	tst	xh, #0x00200000
	addne	r4, r4, #(1 << 20)

	@ Make sure we did not bust our exponent.
	adds	ip, r4, #(1 << 20)
	bmi	LSYM(Lad_o)

	@ Pack final result together.
LSYM(Lad_e):
	bic	xh, xh, #0x00300000
	orr	xh, xh, r4
	orr	xh, xh, r5
	RETLDM	"r4, r5"

LSYM(Lad_l):
	@ Result must be shifted left and exponent adjusted.
	@ No rounding necessary since ip will always be 0.
#if __ARM_ARCH__ < 5

	teq	xh, #0
	movne	r3, #-11
	moveq	r3, #21
	moveq	xh, xl
	moveq	xl, #0
	mov	r2, xh
	movs	ip, xh, lsr #16
	moveq	r2, r2, lsl #16
	addeq	r3, r3, #16
	tst	r2, #0xff000000
	moveq	r2, r2, lsl #8
	addeq	r3, r3, #8
	tst	r2, #0xf0000000
	moveq	r2, r2, lsl #4
	addeq	r3, r3, #4
	tst	r2, #0xc0000000
	moveq	r2, r2, lsl #2
	addeq	r3, r3, #2
	tst	r2, #0x80000000
	addeq	r3, r3, #1

#else

	teq	xh, #0
	moveq	xh, xl
	moveq	xl, #0
	clz	r3, xh
	addeq	r3, r3, #32
	sub	r3, r3, #11

#endif

	@ determine how to shift the value.
	subs	r2, r3, #32
	bge	2f
	adds	r2, r2, #12
	ble	1f

	@ shift value left 21 to 31 bits, or actually right 11 to 1 bits
	@ since a register switch happened above.
	add	ip, r2, #20
	rsb	r2, r2, #12
	mov	xl, xh, lsl ip
	mov	xh, xh, lsr r2
	b	3f

	@ actually shift value left 1 to 20 bits, which might also represent
	@ 32 to 52 bits if counting the register switch that happened earlier.
1:	add	r2, r2, #20
2:	rsble	ip, r2, #32
	mov	xh, xh, lsl r2
	orrle	xh, xh, xl, lsr ip
	movle	xl, xl, lsl r2

	@ adjust exponent accordingly.
3:	subs	r4, r4, r3, lsl #20
	bgt	LSYM(Lad_e)

	@ Exponent too small, denormalize result.
	@ Find out proper shift value.
	mvn	r4, r4, asr #20
	subs	r4, r4, #30
	bge	2f
	adds	r4, r4, #12
	bgt	1f

	@ shift result right of 1 to 20 bits, sign is in r5.
	add	r4, r4, #20
	rsb	r2, r4, #32
	mov	xl, xl, lsr r4
	orr	xl, xl, xh, lsl r2
	orr	xh, r5, xh, lsr r4
	RETLDM	"r4, r5"

	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
	@ a register switch from xh to xl.
1:	rsb	r4, r4, #12
	rsb	r2, r4, #32
	mov	xl, xl, lsr r2
	orr	xl, xl, xh, lsl r4
	mov	xh, r5
	RETLDM	"r4, r5"

	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
	@ from xh to xl.
2:	mov	xl, xh, lsr r4
	mov	xh, r5
	RETLDM	"r4, r5"

	@ Adjust exponents for denormalized arguments.
LSYM(Lad_d):
	teq	r4, #0
	eoreq	xh, xh, #0x00100000
	addeq	r4, r4, #(1 << 20)
	eor	yh, yh, #0x00100000
	subne	r5, r5, #(1 << 20)
	b	LSYM(Lad_x)

	@ Result is x - x = 0, unless x = INF or NAN.
LSYM(Lad_z):
	sub	ip, ip, #0x00100000	@ ip becomes 0x7ff00000
	and	r2, xh, ip
	teq	r2, ip
	orreq	xh, ip, #0x00080000
	movne	xh, #0
	mov	xl, #0
	RET

	@ Overflow: return INF.
LSYM(Lad_o):
	orr	xh, r5, #0x7f000000
	orr	xh, xh, #0x00f00000
	mov	xl, #0
	RETLDM	"r4, r5"

	@ At least one of x or y is INF/NAN.
	@   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
	@   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
	@   if either is NAN: return NAN
	@   if opposite sign: return NAN
	@   return xh-xl (which is INF or -INF)
LSYM(Lad_i):
	teq	r4, ip
	movne	xh, yh
	movne	xl, yl
	teqeq	r5, ip
	RETLDM	"r4, r5" ne

	orrs	r4, xl, xh, lsl #12
	orreqs	r4, yl, yh, lsl #12
	teqeq	xh, yh
	orrne	xh, r5, #0x00080000
	movne	xl, #0
	RETLDM	"r4, r5"

	FUNC_END subdf3
	FUNC_END adddf3

ARM_FUNC_START floatunsidf
	teq	r0, #0
	moveq	r1, #0
	RETc(eq)
	stmfd	sp!, {r4, r5, lr}
	mov	r4, #(0x400 << 20)	@ initial exponent
	add	r4, r4, #((52-1) << 20)
	mov	r5, #0			@ sign bit is 0
	mov	xl, r0
	mov	xh, #0
	b	LSYM(Lad_l)

	FUNC_END floatunsidf

ARM_FUNC_START floatsidf
	teq	r0, #0
	moveq	r1, #0
	RETc(eq)
	stmfd	sp!, {r4, r5, lr}
	mov	r4, #(0x400 << 20)	@ initial exponent
	add	r4, r4, #((52-1) << 20)
	ands	r5, r0, #0x80000000	@ sign bit in r5
	rsbmi	r0, r0, #0		@ absolute value
	mov	xl, r0
	mov	xh, #0
	b	LSYM(Lad_l)

	FUNC_END floatsidf

ARM_FUNC_START extendsfdf2
	movs	r2, r0, lsl #1
	beq	1f			@ value is 0.0 or -0.0
	mov	xh, r2, asr #3		@ stretch exponent
	mov	xh, xh, rrx		@ retrieve sign bit
	mov	xl, r2, lsl #28		@ retrieve remaining bits
	ands	r2, r2, #0xff000000	@ isolate exponent
	beq	2f			@ exponent was 0 but not mantissa
	teq	r2, #0xff000000		@ check if INF or NAN
	eorne	xh, xh, #0x38000000	@ fixup exponent otherwise.
	RET

1:	mov	xh, r0
	mov	xl, #0
	RET

2:	@ value was denormalized.  We can normalize it now.
	stmfd	sp!, {r4, r5, lr}
	mov	r4, #(0x380 << 20)	@ setup corresponding exponent
	add	r4, r4, #(1 << 20)
	and	r5, xh, #0x80000000	@ move sign bit in r5
	bic	xh, xh, #0x80000000
	b	LSYM(Lad_l)

	FUNC_END extendsfdf2

#endif /* L_addsubdf3 */

#ifdef L_muldivdf3

ARM_FUNC_START muldf3

	stmfd	sp!, {r4, r5, r6, lr}

	@ Mask out exponents.
	mov	ip, #0x7f000000
	orr	ip, ip, #0x00f00000
	and	r4, xh, ip
	and	r5, yh, ip

	@ Trap any INF/NAN.
	teq	r4, ip
	teqne	r5, ip
	beq	LSYM(Lml_s)

	@ Trap any multiplication by 0.
	orrs	r6, xl, xh, lsl #1
	orrnes	r6, yl, yh, lsl #1
	beq	LSYM(Lml_z)

	@ Shift exponents right one bit to make room for overflow bit.
	@ If either of them is 0, scale denormalized arguments off line.
	@ Then add both exponents together.
	movs	r4, r4, lsr #1
	teqne	r5, #0
	beq	LSYM(Lml_d)
LSYM(Lml_x):
	add	r4, r4, r5, asr #1

	@ Preserve final sign in r4 along with exponent for now.
	teq	xh, yh
	orrmi	r4, r4, #0x8000

	@ Convert mantissa to unsigned integer.
	bic	xh, xh, ip, lsl #1
	bic	yh, yh, ip, lsl #1
	orr	xh, xh, #0x00100000
	orr	yh, yh, #0x00100000

#if __ARM_ARCH__ < 4

	@ Well, no way to make it shorter without the umull instruction.
	@ We must perform that 53 x 53 bit multiplication by hand.
	stmfd	sp!, {r7, r8, r9, sl, fp}
	mov	r7, xl, lsr #16
	mov	r8, yl, lsr #16
	mov	r9, xh, lsr #16
	mov	sl, yh, lsr #16
	bic	xl, xl, r7, lsl #16
	bic	yl, yl, r8, lsl #16
	bic	xh, xh, r9, lsl #16
	bic	yh, yh, sl, lsl #16
	mul	ip, xl, yl
	mul	fp, xl, r8
	mov	lr, #0
	adds	ip, ip, fp, lsl #16
	adc	lr, lr, fp, lsr #16
	mul	fp, r7, yl
	adds	ip, ip, fp, lsl #16
	adc	lr, lr, fp, lsr #16
	mul	fp, xl, sl
	mov	r5, #0
	adds	lr, lr, fp, lsl #16
	adc	r5, r5, fp, lsr #16
	mul	fp, r7, yh
	adds	lr, lr, fp, lsl #16
	adc	r5, r5, fp, lsr #16
	mul	fp, xh, r8
	adds	lr, lr, fp, lsl #16
	adc	r5, r5, fp, lsr #16
	mul	fp, r9, yl
	adds	lr, lr, fp, lsl #16
	adc	r5, r5, fp, lsr #16
	mul	fp, xh, sl
	mul	r6, r9, sl
	adds	r5, r5, fp, lsl #16
	adc	r6, r6, fp, lsr #16
	mul	fp, r9, yh
	adds	r5, r5, fp, lsl #16
	adc	r6, r6, fp, lsr #16
	mul	fp, xl, yh
	adds	lr, lr, fp
	mul	fp, r7, sl
	adcs	r5, r5, fp
	mul	fp, xh, yl
	adc	r6, r6, #0
	adds	lr, lr, fp
	mul	fp, r9, r8
	adcs	r5, r5, fp
	mul	fp, r7, r8
	adc	r6, r6, #0
	adds	lr, lr, fp
	mul	fp, xh, yh
	adcs	r5, r5, fp
	adc	r6, r6, #0
	ldmfd	sp!, {r7, r8, r9, sl, fp}

#else

	@ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.
	umull	ip, lr, xl, yl
	mov	r5, #0
	umlal	lr, r5, xl, yh
	umlal	lr, r5, xh, yl
	mov	r6, #0
	umlal	r5, r6, xh, yh

#endif

	@ The LSBs in ip are only significant for the final rounding.
	@ Fold them into one bit of lr.
	teq	ip, #0
	orrne	lr, lr, #1

	@ Put final sign in xh.
	mov	xh, r4, lsl #16
	bic	r4, r4, #0x8000

	@ Adjust result if one extra MSB appeared (one of four times).
	tst	r6, #(1 << 9)
	beq	1f
	add	r4, r4, #(1 << 19)
	movs	r6, r6, lsr #1
	movs	r5, r5, rrx
	movs	lr, lr, rrx
	orrcs	lr, lr, #1
1:
	@ Scale back to 53 bits.
	@ xh contains sign bit already.
	orr	xh, xh, r6, lsl #12
	orr	xh, xh, r5, lsr #20
	mov	xl, r5, lsl #12
	orr	xl, xl, lr, lsr #20

	@ Apply exponent bias, check range for underflow.
	sub	r4, r4, #0x00f80000
	subs	r4, r4, #0x1f000000
	ble	LSYM(Lml_u)

	@ Round the result.
	movs	lr, lr, lsl #12
	bpl	1f
	adds	xl, xl, #1
	adc	xh, xh, #0
	teq	lr, #0x80000000
	biceq	xl, xl, #1

	@ Rounding may have produced an extra MSB here.
	@ The extra bit is cleared before merging the exponent below.
	tst	xh, #0x00200000
	addne	r4, r4, #(1 << 19)
1:
	@ Check exponent for overflow.
	adds	ip, r4, #(1 << 19)
	tst	ip, #(1 << 30)
	bne	LSYM(Lml_o)

	@ Add final exponent.
	bic	xh, xh, #0x00300000
	orr	xh, xh, r4, lsl #1
	RETLDM	"r4, r5, r6"

	@ Result is 0, but determine sign anyway.
LSYM(Lml_z):
	eor	xh, xh, yh
LSYM(Ldv_z):
	bic	xh, xh, #0x7fffffff
	mov	xl, #0
	RETLDM	"r4, r5, r6"

	@ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u):
	cmn	r4, #(53 << 19)
	movle	xl, #0
	bicle	xh, xh, #0x7fffffff
	RETLDM	"r4, r5, r6" le

	@ Find out proper shift value.
LSYM(Lml_r):
	mvn	r4, r4, asr #19
	subs	r4, r4, #30
	bge	2f
	adds	r4, r4, #12
	bgt	1f

	@ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
	add	r4, r4, #20
	rsb	r5, r4, #32
	mov	r3, xl, lsl r5
	mov	xl, xl, lsr r4
	orr	xl, xl, xh, lsl r5
	movs	xh, xh, lsl #1
	mov	xh, xh, lsr r4
	mov	xh, xh, rrx
	adds	xl, xl, r3, lsr #31
	adc	xh, xh, #0
	teq	lr, #0
	teqeq	r3, #0x80000000
	biceq	xl, xl, #1
	RETLDM	"r4, r5, r6"

	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
	@ a register switch from xh to xl. Then round.
1:	rsb	r4, r4, #12
	rsb	r5, r4, #32
	mov	r3, xl, lsl r4
	mov	xl, xl, lsr r5
	orr	xl, xl, xh, lsl r4
	bic	xh, xh, #0x7fffffff
	adds	xl, xl, r3, lsr #31
	adc	xh, xh, #0
	teq	lr, #0
	teqeq	r3, #0x80000000
	biceq	xl, xl, #1
	RETLDM	"r4, r5, r6"

	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
	@ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
2:	rsb	r5, r4, #32
	mov	r6, xl, lsl r5
	mov	r3, xl, lsr r4
	orr	r3, r3, xh, lsl r5
	mov	xl, xh, lsr r4
	bic	xh, xh, #0x7fffffff
	adds	xl, xl, r3, lsr #31
	adc	xh, xh, #0
	orrs	r6, r6, lr
	teqeq	r3, #0x80000000
	biceq	xl, xl, #1
	RETLDM	"r4, r5, r6"

	@ One or both arguments are denormalized.
	@ Scale them leftwards and preserve sign bit.
LSYM(Lml_d):
	mov	lr, #0
	teq	r4, #0
	bne	2f
	and	r6, xh, #0x80000000
1:	movs	xl, xl, lsl #1
	adc	xh, lr, xh, lsl #1
	tst	xh, #0x00100000
	subeq	r4, r4, #(1 << 19)
	beq	1b
	orr	xh, xh, r6
	teq	r5, #0
	bne	LSYM(Lml_x)
2:	and	r6, yh, #0x80000000
3:	movs	yl, yl, lsl #1
	adc	yh, lr, yh, lsl #1
	tst	yh, #0x00100000
	subeq	r5, r5, #(1 << 20)
	beq	3b
	orr	yh, yh, r6
	b	LSYM(Lml_x)

	@ One or both args are INF or NAN.
LSYM(Lml_s):
	orrs	r6, xl, xh, lsl #1
	orrnes	r6, yl, yh, lsl #1
	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
	teq	r4, ip
	bne	1f
	orrs	r6, xl, xh, lsl #12
	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
1:	teq	r5, ip
	bne	LSYM(Lml_i)
	orrs	r6, yl, yh, lsl #12
	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN

	@ Result is INF, but we need to determine its sign.
LSYM(Lml_i):
	eor	xh, xh, yh

	@ Overflow: return INF (sign already in xh).
LSYM(Lml_o):
	and	xh, xh, #0x80000000
	orr	xh, xh, #0x7f000000
	orr	xh, xh, #0x00f00000
	mov	xl, #0
	RETLDM	"r4, r5, r6"

	@ Return NAN.
LSYM(Lml_n):
	mov	xh, #0x7f000000
	orr	xh, xh, #0x00f80000
	RETLDM	"r4, r5, r6"

	FUNC_END muldf3

ARM_FUNC_START divdf3

	stmfd	sp!, {r4, r5, r6, lr}

	@ Mask out exponents.
	mov	ip, #0x7f000000
	orr	ip, ip, #0x00f00000
	and	r4, xh, ip
	and	r5, yh, ip

	@ Trap any INF/NAN or zeroes.
	teq	r4, ip
	teqne	r5, ip
	orrnes	r6, xl, xh, lsl #1
	orrnes	r6, yl, yh, lsl #1
	beq	LSYM(Ldv_s)

	@ Shift exponents right one bit to make room for overflow bit.
	@ If either of them is 0, scale denormalized arguments off line.
	@ Then substract divisor exponent from dividend''s.
	movs	r4, r4, lsr #1
	teqne	r5, #0
	beq	LSYM(Ldv_d)
LSYM(Ldv_x):
	sub	r4, r4, r5, asr #1

	@ Preserve final sign into lr.
	eor	lr, xh, yh

	@ Convert mantissa to unsigned integer.
	@ Dividend -> r5-r6, divisor -> yh-yl.
	mov	r5, #0x10000000
	mov	yh, yh, lsl #12
	orr	yh, r5, yh, lsr #4
	orr	yh, yh, yl, lsr #24
	movs	yl, yl, lsl #8
	mov	xh, xh, lsl #12
	teqeq	yh, r5
	beq	LSYM(Ldv_1)
	orr	r5, r5, xh, lsr #4
	orr	r5, r5, xl, lsr #24
	mov	r6, xl, lsl #8

	@ Initialize xh with final sign bit.
	and	xh, lr, #0x80000000

	@ Ensure result will land to known bit position.
	cmp	r5, yh
	cmpeq	r6, yl
	bcs	1f
	sub	r4, r4, #(1 << 19)
	movs	yh, yh, lsr #1
	mov	yl, yl, rrx
1:
	@ Apply exponent bias, check range for over/underflow.
	add	r4, r4, #0x1f000000
	add	r4, r4, #0x00f80000
	cmn	r4, #(53 << 19)
	ble	LSYM(Ldv_z)
	cmp	r4, ip, lsr #1
	bge	LSYM(Lml_o)

	@ Perform first substraction to align result to a nibble.
	subs	r6, r6, yl
	sbc	r5, r5, yh
	movs	yh, yh, lsr #1
	mov	yl, yl, rrx
	mov	xl, #0x00100000
	mov	ip, #0x00080000

	@ The actual division loop.
1:	subs	lr, r6, yl
	sbcs	lr, r5, yh
	subcs	r6, r6, yl
	movcs	r5, lr
	orrcs	xl, xl, ip
	movs	yh, yh, lsr #1
	mov	yl, yl, rrx
	subs	lr, r6, yl
	sbcs	lr, r5, yh
	subcs	r6, r6, yl
	movcs	r5, lr
	orrcs	xl, xl, ip, lsr #1
	movs	yh, yh, lsr #1
	mov	yl, yl, rrx
	subs	lr, r6, yl
	sbcs	lr, r5, yh
	subcs	r6, r6, yl
	movcs	r5, lr
	orrcs	xl, xl, ip, lsr #2
	movs	yh, yh, lsr #1
	mov	yl, yl, rrx
	subs	lr, r6, yl
	sbcs	lr, r5, yh
	subcs	r6, r6, yl
	movcs	r5, lr
	orrcs	xl, xl, ip, lsr #3

	orrs	lr, r5, r6
	beq	2f
	mov	r5, r5, lsl #4
	orr	r5, r5, r6, lsr #28
	mov	r6, r6, lsl #4
	mov	yh, yh, lsl #3
	orr	yh, yh, yl, lsr #29
	mov	yl, yl, lsl #3
	movs	ip, ip, lsr #4
	bne	1b

	@ We are done with a word of the result.
	@ Loop again for the low word if this pass was for the high word.
	tst	xh, #0x00100000
	bne	3f
	orr	xh, xh, xl
	mov	xl, #0
	mov	ip, #0x80000000
	b	1b
2:
	@ Be sure result starts in the high word.
	tst	xh, #0x00100000
	orreq	xh, xh, xl
	moveq	xl, #0
3:
	@ Check if denormalized result is needed.
	cmp	r4, #0
	ble	LSYM(Ldv_u)

	@ Apply proper rounding.
	subs	ip, r5, yh
	subeqs	ip, r6, yl
	adcs	xl, xl, #0
	adc	xh, xh, #0
	teq	ip, #0
	biceq	xl, xl, #1

	@ Add exponent to result.
	bic	xh, xh, #0x00100000
	orr	xh, xh, r4, lsl #1
	RETLDM	"r4, r5, r6"

	@ Division by 0x1p*: shortcut a lot of code.
LSYM(Ldv_1):
	and	lr, lr, #0x80000000
	orr	xh, lr, xh, lsr #12
	add	r4, r4, #0x1f000000
	add	r4, r4, #0x00f80000
	cmp	r4, ip, lsr #1
	bge	LSYM(Lml_o)
	cmp	r4, #0
	orrgt	xh, xh, r4, lsl #1
	RETLDM	"r4, r5, r6" gt

	cmn	r4, #(53 << 19)
	ble	LSYM(Ldv_z)
	orr	xh, xh, #0x00100000
	mov	lr, #0
	b	LSYM(Lml_r)

	@ Result must be denormalized: put remainder in lr for
	@ rounding considerations.
LSYM(Ldv_u):
	orr	lr, r5, r6
	b	LSYM(Lml_r)

	@ One or both arguments are denormalized.
	@ Scale them leftwards and preserve sign bit.
LSYM(Ldv_d):
	mov	lr, #0
	teq	r4, #0
	bne	2f
	and	r6, xh, #0x80000000
1:	movs	xl, xl, lsl #1
	adc	xh, lr, xh, lsl #1
	tst	xh, #0x00100000
	subeq	r4, r4, #(1 << 19)
	beq	1b
	orr	xh, xh, r6
	teq	r5, #0
	bne	LSYM(Ldv_x)
2:	and	r6, yh, #0x80000000
3:	movs	yl, yl, lsl #1
	adc	yh, lr, yh, lsl #1
	tst	yh, #0x00100000
	subeq	r5, r5, #(1 << 20)
	beq	3b
	orr	yh, yh, r6
	b	LSYM(Ldv_x)

	@ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
	teq	r4, ip
	teqeq	r5, ip
	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
	teq	r4, ip
	bne	1f
	orrs	r4, xl, xh, lsl #12
	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
	b	LSYM(Lml_i)		@ INF / <anything> -> INF
1:	teq	r5, ip
	bne	2f
	orrs	r5, yl, yh, lsl #12
	bne	LSYM(Lml_n)		@ <anything> / NAN -> NAN
	b	LSYM(Lml_z)		@ <anything> / INF -> 0
2:	@ One or both arguments are 0.
	orrs	r4, xl, xh, lsl #1
	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
	orrs	r5, yl, yh, lsl #1
	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
	b	LSYM(Lml_n)		@ 0 / 0 -> NAN

	FUNC_END divdf3

#endif /* L_muldivdf3 */

#ifdef L_cmpdf2

FUNC_START gedf2
ARM_FUNC_START gtdf2
	mov	ip, #-1
	b	1f

FUNC_START ledf2
ARM_FUNC_START ltdf2
	mov	ip, #1
	b	1f

FUNC_START nedf2
FUNC_START eqdf2
ARM_FUNC_START cmpdf2
	mov	ip, #1			@ how should we specify unordered here?

1:	stmfd	sp!, {r4, r5, lr}

	@ Trap any INF/NAN first.
	mov	lr, #0x7f000000
	orr	lr, lr, #0x00f00000
	and	r4, xh, lr
	and	r5, yh, lr
	teq	r4, lr
	teqne	r5, lr
	beq	3f

	@ Test for equality.
	@ Note that 0.0 is equal to -0.0.
2:	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
	orreqs	ip, yl, yh, lsl #1	@ and y == 0.0 or -0.0
	teqne	xh, yh			@ or xh == yh
	teqeq	xl, yl			@ and xl == yl
	moveq	r0, #0			@ then equal.
	RETLDM	"r4, r5" eq

	@ Check for sign difference.
	teq	xh, yh
	movmi	r0, xh, asr #31
	orrmi	r0, r0, #1
	RETLDM	"r4, r5" mi

	@ Compare exponents.
	cmp	r4, r5

	@ Compare mantissa if exponents are equal.
	moveq	xh, xh, lsl #12
	cmpeq	xh, yh, lsl #12
	cmpeq	xl, yl
	movcs	r0, yh, asr #31
	mvncc	r0, yh, asr #31
	orr	r0, r0, #1
	RETLDM	"r4, r5"

	@ Look for a NAN.
3:	teq	r4, lr
	bne	4f
	orrs	xl, xl, xh, lsl #12
	bne	5f			@ x is NAN
4:	teq	r5, lr
	bne	2b
	orrs	yl, yl, yh, lsl #12
	beq	2b			@ y is not NAN
5:	mov	r0, ip			@ return unordered code from ip
	RETLDM	"r4, r5"

	FUNC_END gedf2
	FUNC_END gtdf2
	FUNC_END ledf2
	FUNC_END ltdf2
	FUNC_END nedf2
	FUNC_END eqdf2
	FUNC_END cmpdf2

#endif /* L_cmpdf2 */

#ifdef L_unorddf2

ARM_FUNC_START unorddf2
	str	lr, [sp, #-4]!
	mov	ip, #0x7f000000
	orr	ip, ip, #0x00f00000
	and	lr, xh, ip
	teq	lr, ip
	bne	1f
	orrs	xl, xl, xh, lsl #12
	bne	3f			@ x is NAN
1:	and	lr, yh, ip
	teq	lr, ip
	bne	2f
	orrs	yl, yl, yh, lsl #12
	bne	3f			@ y is NAN
2:	mov	r0, #0			@ arguments are ordered.
	RETLDM

3:	mov	r0, #1			@ arguments are unordered.
	RETLDM

	FUNC_END unorddf2

#endif /* L_unorddf2 */

#ifdef L_fixdfsi

ARM_FUNC_START fixdfsi
	orrs	ip, xl, xh, lsl #1
	beq	1f			@ value is 0.

	mov	r3, r3, rrx		@ preserve C flag (the actual sign)

	@ check exponent range.
	mov	ip, #0x7f000000
	orr	ip, ip, #0x00f00000
	and	r2, xh, ip
	teq	r2, ip
	beq	2f			@ value is INF or NAN
	bic	ip, ip, #0x40000000
	cmp	r2, ip
	bcc	1f			@ value is too small
	add	ip, ip, #(31 << 20)
	cmp	r2, ip
	bcs	3f			@ value is too large

	rsb	r2, r2, ip
	mov	ip, xh, lsl #11
	orr	ip, ip, #0x80000000
	orr	ip, ip, xl, lsr #21
	mov	r2, r2, lsr #20
	tst	r3, #0x80000000		@ the sign bit
	mov	r0, ip, lsr r2
	rsbne	r0, r0, #0
	RET

1:	mov	r0, #0
	RET

2:	orrs	xl, xl, xh, lsl #12
	bne	4f			@ r0 is NAN.
3:	ands	r0, r3, #0x80000000	@ the sign bit
	moveq	r0, #0x7fffffff		@ maximum signed positive si
	RET

4:	mov	r0, #0			@ How should we convert NAN?
	RET

	FUNC_END fixdfsi

#endif /* L_fixdfsi */

#ifdef L_fixunsdfsi

ARM_FUNC_START fixunsdfsi
	orrs	ip, xl, xh, lsl #1
	movcss	r0, #0			@ value is negative
	RETc(eq)			@ or 0 (xl, xh overlap r0)

	@ check exponent range.
	mov	ip, #0x7f000000
	orr	ip, ip, #0x00f00000
	and	r2, xh, ip
	teq	r2, ip
	beq	2f			@ value is INF or NAN
	bic	ip, ip, #0x40000000
	cmp	r2, ip
	bcc	1f			@ value is too small
	add	ip, ip, #(31 << 20)
	cmp	r2, ip
	bhi	3f			@ value is too large

	rsb	r2, r2, ip
	mov	ip, xh, lsl #11
	orr	ip, ip, #0x80000000
	orr	ip, ip, xl, lsr #21
	mov	r2, r2, lsr #20
	mov	r0, ip, lsr r2
	RET

1:	mov	r0, #0
	RET

2:	orrs	xl, xl, xh, lsl #12
	bne	4f			@ value is NAN.
3:	mov	r0, #0xffffffff		@ maximum unsigned si
	RET

4:	mov	r0, #0			@ How should we convert NAN?
	RET

	FUNC_END fixunsdfsi

#endif /* L_fixunsdfsi */

#ifdef L_truncdfsf2

ARM_FUNC_START truncdfsf2
	orrs	r2, xl, xh, lsl #1
	moveq	r0, r2, rrx
	RETc(eq)			@ value is 0.0 or -0.0
	
	@ check exponent range.
	mov	ip, #0x7f000000
	orr	ip, ip, #0x00f00000
	and	r2, ip, xh
	teq	r2, ip
	beq	2f			@ value is INF or NAN
	bic	xh, xh, ip
	cmp	r2, #(0x380 << 20)
	bls	4f			@ value is too small

	@ shift and round mantissa
1:	movs	r3, xl, lsr #29
	adc	r3, r3, xh, lsl #3

	@ if halfway between two numbers, round towards LSB = 0.
	mov	xl, xl, lsl #3
	teq	xl, #0x80000000
	biceq	r3, r3, #1

	@ rounding might have created an extra MSB.  If so adjust exponent.
	tst	r3, #0x00800000
	addne	r2, r2, #(1 << 20)
	bicne	r3, r3, #0x00800000

	@ check exponent for overflow
	mov	ip, #(0x400 << 20)
	orr	ip, ip, #(0x07f << 20)
	cmp	r2, ip
	bcs	3f			@ overflow

	@ adjust exponent, merge with sign bit and mantissa.
	movs	xh, xh, lsl #1
	mov	r2, r2, lsl #4
	orr	r0, r3, r2, rrx
	eor	r0, r0, #0x40000000
	RET

2:	@ chech for NAN
	orrs	xl, xl, xh, lsl #12
	movne	r0, #0x7f000000
	orrne	r0, r0, #0x00c00000
	RETc(ne)			@ return NAN

3:	@ return INF with sign
	and	r0, xh, #0x80000000
	orr	r0, r0, #0x7f000000
	orr	r0, r0, #0x00800000
	RET

4:	@ check if denormalized value is possible
	subs	r2, r2, #((0x380 - 24) << 20)
	andle	r0, xh, #0x80000000	@ too small, return signed 0.
	RETc(le)
	
	@ denormalize value so we can resume with the code above afterwards.
	orr	xh, xh, #0x00100000
	mov	r2, r2, lsr #20
	rsb	r2, r2, #25
	cmp	r2, #20
	bgt	6f

	rsb	ip, r2, #32
	mov	r3, xl, lsl ip
	mov	xl, xl, lsr r2
	orr	xl, xl, xh, lsl ip
	movs	xh, xh, lsl #1
	mov	xh, xh, lsr r2
	mov	xh, xh, rrx
5:	teq	r3, #0			@ fold r3 bits into the LSB
	orrne	xl, xl, #1		@ for rounding considerations. 
	mov	r2, #(0x380 << 20)	@ equivalent to the 0 float exponent
	b	1b

6:	rsb	r2, r2, #(12 + 20)
	rsb	ip, r2, #32
	mov	r3, xl, lsl r2
	mov	xl, xl, lsr ip
	orr	xl, xl, xh, lsl r2
	and	xh, xh, #0x80000000
	b	5b

	FUNC_END truncdfsf2

#endif /* L_truncdfsf2 */




/* ieee754-sf.S single-precision floating point support for ARM

   Copyright (C) 2003  Free Software Foundation, Inc.
   Contributed by Nicolas Pitre (nico@cam.org)

   This file is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 2, or (at your option) any
   later version.

   In addition to the permissions in the GNU General Public License, the
   Free Software Foundation gives you unlimited permission to link the
   compiled version of this file into combinations with other programs,
   and to distribute those combinations without any restriction coming
   from the use of this file.  (The General Public License restrictions
   do apply in other respects; for example, they cover modification of
   the file, and distribution when not linked into a combine
   executable.)

   This file is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; see the file COPYING.  If not, write to
   the Free Software Foundation, 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */

/*
 * Notes:
 *
 * The goal of this code is to be as fast as possible.  This is
 * not meant to be easy to understand for the casual reader.
 *
 * Only the default rounding mode is intended for best performances.
 * Exceptions aren't supported yet, but that can be added quite easily
 * if necessary without impacting performances.
 */

#ifdef L_negsf2
	
ARM_FUNC_START negsf2
	eor	r0, r0, #0x80000000	@ flip sign bit
	RET

	FUNC_END negsf2

#endif

#ifdef L_addsubsf3

ARM_FUNC_START subsf3
	eor	r1, r1, #0x80000000	@ flip sign bit of second arg
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
	b	1f			@ Skip Thumb-code prologue
#endif

ARM_FUNC_START addsf3

1:	@ Compare both args, return zero if equal but the sign.
	eor	r2, r0, r1
	teq	r2, #0x80000000
	beq	LSYM(Lad_z)

	@ If first arg is 0 or -0, return second arg.
	@ If second arg is 0 or -0, return first arg.
	bics	r2, r0, #0x80000000
	moveq	r0, r1
	bicnes	r2, r1, #0x80000000
	RETc(eq)

	@ Mask out exponents.
	mov	ip, #0xff000000
	and	r2, r0, ip, lsr #1
	and	r3, r1, ip, lsr #1

	@ If either of them is 255, result will be INF or NAN
	teq	r2, ip, lsr #1
	teqne	r3, ip, lsr #1
	beq	LSYM(Lad_i)

	@ Compute exponent difference.  Make largest exponent in r2,
	@ corresponding arg in r0, and positive exponent difference in r3.
	subs	r3, r3, r2
	addgt	r2, r2, r3
	eorgt	r1, r0, r1
	eorgt	r0, r1, r0
	eorgt	r1, r0, r1
	rsblt	r3, r3, #0

	@ If exponent difference is too large, return largest argument
	@ already in r0.  We need up to 25 bit to handle proper rounding
	@ of 0x1p25 - 1.1.
	cmp	r3, #(25 << 23)
	RETc(hi)

	@ Convert mantissa to signed integer.
	tst	r0, #0x80000000
	orr	r0, r0, #0x00800000
	bic	r0, r0, #0xff000000
	rsbne	r0, r0, #0
	tst	r1, #0x80000000
	orr	r1, r1, #0x00800000
	bic	r1, r1, #0xff000000
	rsbne	r1, r1, #0

	@ If exponent == difference, one or both args were denormalized.
	@ Since this is not common case, rescale them off line.
	teq	r2, r3
	beq	LSYM(Lad_d)
LSYM(Lad_x):

	@ Scale down second arg with exponent difference.
	@ Apply shift one bit left to first arg and the rest to second arg
	@ to simplify things later, but only if exponent does not become 0.
	movs	r3, r3, lsr #23
	teqne	r2, #(1 << 23)
	movne	r0, r0, lsl #1
	subne	r2, r2, #(1 << 23)
	subne	r3, r3, #1

	@ Shift second arg into ip, keep leftover bits into r1.
	mov	ip, r1, asr r3
	rsb	r3, r3, #32
	mov	r1, r1, lsl r3

	add	r0, r0, ip		@ the actual addition

	@ We now have a 64 bit result in r0-r1.
	@ Keep absolute value in r0-r1, sign in r3.
	ands	r3, r0, #0x80000000
	bpl	LSYM(Lad_p)
	rsbs	r1, r1, #0
	rsc	r0, r0, #0

	@ Determine how to normalize the result.
LSYM(Lad_p):
	cmp	r0, #0x00800000
	bcc	LSYM(Lad_l)
	cmp	r0, #0x01000000
	bcc	LSYM(Lad_r0)
	cmp	r0, #0x02000000
	bcc	LSYM(Lad_r1)

	@ Result needs to be shifted right.
	movs	r0, r0, lsr #1
	mov	r1, r1, rrx
	add	r2, r2, #(1 << 23)
LSYM(Lad_r1):
	movs	r0, r0, lsr #1
	mov	r1, r1, rrx
	add	r2, r2, #(1 << 23)

	@ Our result is now properly aligned into r0, remaining bits in r1.
	@ Round with MSB of r1. If halfway between two numbers, round towards
	@ LSB of r0 = 0. 
LSYM(Lad_r0):
	add	r0, r0, r1, lsr #31
	teq	r1, #0x80000000
	biceq	r0, r0, #1

	@ Rounding may have added a new MSB.  Adjust exponent.
	@ That MSB will be cleared when exponent is merged below.
	tst	r0, #0x01000000
	addne	r2, r2, #(1 << 23)

	@ Make sure we did not bust our exponent.
	cmp	r2, #(254 << 23)
	bhi	LSYM(Lad_o)

	@ Pack final result together.
LSYM(Lad_e):
	bic	r0, r0, #0x01800000
	orr	r0, r0, r2
	orr	r0, r0, r3
	RET

	@ Result must be shifted left.
	@ No rounding necessary since r1 will always be 0.
LSYM(Lad_l):

#if __ARM_ARCH__ < 5

	movs	ip, r0, lsr #12
	moveq	r0, r0, lsl #12
	subeq	r2, r2, #(12 << 23)
	tst	r0, #0x00ff0000
	moveq	r0, r0, lsl #8
	subeq	r2, r2, #(8 << 23)
	tst	r0, #0x00f00000
	moveq	r0, r0, lsl #4
	subeq	r2, r2, #(4 << 23)
	tst	r0, #0x00c00000
	moveq	r0, r0, lsl #2
	subeq	r2, r2, #(2 << 23)
	tst	r0, #0x00800000
	moveq	r0, r0, lsl #1
	subeq	r2, r2, #(1 << 23)
	cmp	r2, #0
	bgt	LSYM(Lad_e)

#else

	clz	ip, r0
	sub	ip, ip, #8
	mov	r0, r0, lsl ip
	subs	r2, r2, ip, lsl #23
	bgt	LSYM(Lad_e)

#endif

	@ Exponent too small, denormalize result.
	mvn	r2, r2, asr #23
	add	r2, r2, #2
	orr	r0, r3, r0, lsr r2
	RET

	@ Fixup and adjust bit position for denormalized arguments.
	@ Note that r2 must not remain equal to 0.
LSYM(Lad_d):
	teq	r2, #0
	eoreq	r0, r0, #0x00800000
	addeq	r2, r2, #(1 << 23)
	eor	r1, r1, #0x00800000
	subne	r3, r3, #(1 << 23)
	b	LSYM(Lad_x)

	@ Result is x - x = 0, unless x is INF or NAN.
LSYM(Lad_z):
	mov	ip, #0xff000000
	and	r2, r0, ip, lsr #1
	teq	r2, ip, lsr #1
	moveq	r0, ip, asr #2
	movne	r0, #0
	RET

	@ Overflow: return INF.
LSYM(Lad_o):
	orr	r0, r3, #0x7f000000
	orr	r0, r0, #0x00800000
	RET

	@ At least one of r0/r1 is INF/NAN.
	@   if r0 != INF/NAN: return r1 (which is INF/NAN)
	@   if r1 != INF/NAN: return r0 (which is INF/NAN)
	@   if r0 or r1 is NAN: return NAN
	@   if opposite sign: return NAN
	@   return r0 (which is INF or -INF)
LSYM(Lad_i):
	teq	r2, ip, lsr #1
	movne	r0, r1
	teqeq	r3, ip, lsr #1
	RETc(ne)
	movs	r2, r0, lsl #9
	moveqs	r2, r1, lsl #9
	teqeq	r0, r1
	orrne	r0, r3, #0x00400000	@ NAN
	RET

	FUNC_END addsf3
	FUNC_END subsf3

ARM_FUNC_START floatunsisf
	mov	r3, #0
	b	1f

ARM_FUNC_START floatsisf
	ands	r3, r0, #0x80000000
	rsbmi	r0, r0, #0

1:	teq	r0, #0
	RETc(eq)

	mov	r1, #0
	mov	r2, #((127 + 23) << 23)
	tst	r0, #0xfc000000
	beq	LSYM(Lad_p)

	@ We need to scale the value a little before branching to code above.
	tst	r0, #0xf0000000
	movne	r1, r0, lsl #28
	movne	r0, r0, lsr #4
	addne	r2, r2, #(4 << 23)
	tst	r0, #0x0c000000
	beq	LSYM(Lad_p)
	mov	r1, r1, lsr #2
	orr	r1, r1, r0, lsl #30
	mov	r0, r0, lsr #2
	add	r2, r2, #(2 << 23)
	b	LSYM(Lad_p)

	FUNC_END floatsisf
	FUNC_END floatunsisf

#endif /* L_addsubsf3 */

#ifdef L_muldivsf3

ARM_FUNC_START mulsf3

	@ Mask out exponents.
	mov	ip, #0xff000000
	and	r2, r0, ip, lsr #1
	and	r3, r1, ip, lsr #1

	@ Trap any INF/NAN.
	teq	r2, ip, lsr #1
	teqne	r3, ip, lsr #1
	beq	LSYM(Lml_s)

	@ Trap any multiplication by 0.
	bics	ip, r0, #0x80000000
	bicnes	ip, r1, #0x80000000
	beq	LSYM(Lml_z)

	@ Shift exponents right one bit to make room for overflow bit.
	@ If either of them is 0, scale denormalized arguments off line.
	@ Then add both exponents together.
	movs	r2, r2, lsr #1
	teqne	r3, #0
	beq	LSYM(Lml_d)
LSYM(Lml_x):
	add	r2, r2, r3, asr #1

	@ Preserve final sign in r2 along with exponent for now.
	teq	r0, r1
	orrmi	r2, r2, #0x8000

	@ Convert mantissa to unsigned integer.
	bic	r0, r0, #0xff000000
	bic	r1, r1, #0xff000000
	orr	r0, r0, #0x00800000
	orr	r1, r1, #0x00800000

#if __ARM_ARCH__ < 4

	@ Well, no way to make it shorter without the umull instruction.
	@ We must perform that 24 x 24 -> 48 bit multiplication by hand.
	stmfd	sp!, {r4, r5}
	mov	r4, r0, lsr #16
	mov	r5, r1, lsr #16
	bic	r0, r0, #0x00ff0000
	bic	r1, r1, #0x00ff0000
	mul	ip, r4, r5
	mul	r3, r0, r1
	mul	r0, r5, r0
	mla	r0, r4, r1, r0
	adds	r3, r3, r0, lsl #16
	adc	ip, ip, r0, lsr #16
	ldmfd	sp!, {r4, r5}

#else

	umull	r3, ip, r0, r1		@ The actual multiplication.

#endif

	@ Put final sign in r0.
	mov	r0, r2, lsl #16
	bic	r2, r2, #0x8000

	@ Adjust result if one extra MSB appeared.
	@ The LSB may be lost but this never changes the result in this case.
	tst	ip, #(1 << 15)
	addne	r2, r2, #(1 << 22)
	movnes	ip, ip, lsr #1
	movne	r3, r3, rrx

	@ Apply exponent bias, check range for underflow.
	subs	r2, r2, #(127 << 22)
	ble	LSYM(Lml_u)

	@ Scale back to 24 bits with rounding.
	@ r0 contains sign bit already.
	orrs	r0, r0, r3, lsr #23
	adc	r0, r0, ip, lsl #9

	@ If halfway between two numbers, rounding should be towards LSB = 0.
	mov	r3, r3, lsl #9
	teq	r3, #0x80000000
	biceq	r0, r0, #1

	@ Note: rounding may have produced an extra MSB here.
	@ The extra bit is cleared before merging the exponent below.
	tst	r0, #0x01000000
	addne	r2, r2, #(1 << 22)

	@ Check for exponent overflow
	cmp	r2, #(255 << 22)
	bge	LSYM(Lml_o)

	@ Add final exponent.
	bic	r0, r0, #0x01800000
	orr	r0, r0, r2, lsl #1
	RET

	@ Result is 0, but determine sign anyway.
LSYM(Lml_z):	eor	r0, r0, r1
	bic	r0, r0, #0x7fffffff
	RET

	@ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u):
	cmn	r2, #(24 << 22)
	RETc(le)

	@ Find out proper shift value.
	mvn	r1, r2, asr #22
	subs	r1, r1, #7
	bgt	LSYM(Lml_ur)

	@ Shift value left, round, etc.
	add	r1, r1, #32
	orrs	r0, r0, r3, lsr r1
	rsb	r1, r1, #32
	adc	r0, r0, ip, lsl r1
	mov	ip, r3, lsl r1
	teq	ip, #0x80000000
	biceq	r0, r0, #1
	RET

	@ Shift value right, round, etc.
	@ Note: r1 must not be 0 otherwise carry does not get set.
LSYM(Lml_ur):
	orrs	r0, r0, ip, lsr r1
	adc	r0, r0, #0
	rsb	r1, r1, #32
	mov	ip, ip, lsl r1
	teq	r3, #0
	teqeq	ip, #0x80000000
	biceq	r0, r0, #1
	RET

	@ One or both arguments are denormalized.
	@ Scale them leftwards and preserve sign bit.
LSYM(Lml_d):
	teq	r2, #0
	and	ip, r0, #0x80000000
1:	moveq	r0, r0, lsl #1
	tsteq	r0, #0x00800000
	subeq	r2, r2, #(1 << 22)
	beq	1b
	orr	r0, r0, ip
	teq	r3, #0
	and	ip, r1, #0x80000000
2:	moveq	r1, r1, lsl #1
	tsteq	r1, #0x00800000
	subeq	r3, r3, #(1 << 23)
	beq	2b
	orr	r1, r1, ip
	b	LSYM(Lml_x)

	@ One or both args are INF or NAN.
LSYM(Lml_s):
	teq	r0, #0x0
	teqne	r1, #0x0
	teqne	r0, #0x80000000
	teqne	r1, #0x80000000
	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
	teq	r2, ip, lsr #1
	bne	1f
	movs	r2, r0, lsl #9
	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
1:	teq	r3, ip, lsr #1
	bne	LSYM(Lml_i)
	movs	r3, r1, lsl #9
	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN

	@ Result is INF, but we need to determine its sign.
LSYM(Lml_i):
	eor	r0, r0, r1

	@ Overflow: return INF (sign already in r0).
LSYM(Lml_o):
	and	r0, r0, #0x80000000
	orr	r0, r0, #0x7f000000
	orr	r0, r0, #0x00800000
	RET

	@ Return NAN.
LSYM(Lml_n):
	mov	r0, #0x7f000000
	orr	r0, r0, #0x00c00000
	RET

	FUNC_END mulsf3

ARM_FUNC_START divsf3

	@ Mask out exponents.
	mov	ip, #0xff000000
	and	r2, r0, ip, lsr #1
	and	r3, r1, ip, lsr #1

	@ Trap any INF/NAN or zeroes.
	teq	r2, ip, lsr #1
	teqne	r3, ip, lsr #1
	bicnes	ip, r0, #0x80000000
	bicnes	ip, r1, #0x80000000
	beq	LSYM(Ldv_s)

	@ Shift exponents right one bit to make room for overflow bit.
	@ If either of them is 0, scale denormalized arguments off line.
	@ Then substract divisor exponent from dividend''s.
	movs	r2, r2, lsr #1
	teqne	r3, #0
	beq	LSYM(Ldv_d)
LSYM(Ldv_x):
	sub	r2, r2, r3, asr #1

	@ Preserve final sign into ip.
	eor	ip, r0, r1

	@ Convert mantissa to unsigned integer.
	@ Dividend -> r3, divisor -> r1.
	mov	r3, #0x10000000
	movs	r1, r1, lsl #9
	mov	r0, r0, lsl #9
	beq	LSYM(Ldv_1)
	orr	r1, r3, r1, lsr #4
	orr	r3, r3, r0, lsr #4

	@ Initialize r0 (result) with final sign bit.
	and	r0, ip, #0x80000000

	@ Ensure result will land to known bit position.
	cmp	r3, r1
	subcc	r2, r2, #(1 << 22)
	movcc	r3, r3, lsl #1

	@ Apply exponent bias, check range for over/underflow.
	add	r2, r2, #(127 << 22)
	cmn	r2, #(24 << 22)
	RETc(le)
	cmp	r2, #(255 << 22)
	bge	LSYM(Lml_o)

	@ The actual division loop.
	mov	ip, #0x00800000
1:	cmp	r3, r1
	subcs	r3, r3, r1
	orrcs	r0, r0, ip
	cmp	r3, r1, lsr #1
	subcs	r3, r3, r1, lsr #1
	orrcs	r0, r0, ip, lsr #1
	cmp	r3, r1, lsr #2
	subcs	r3, r3, r1, lsr #2
	orrcs	r0, r0, ip, lsr #2
	cmp	r3, r1, lsr #3
	subcs	r3, r3, r1, lsr #3
	orrcs	r0, r0, ip, lsr #3
	movs	r3, r3, lsl #4
	movnes	ip, ip, lsr #4
	bne	1b

	@ Check if denormalized result is needed.
	cmp	r2, #0
	ble	LSYM(Ldv_u)

	@ Apply proper rounding.
	cmp	r3, r1
	addcs	r0, r0, #1
	biceq	r0, r0, #1

	@ Add exponent to result.
	bic	r0, r0, #0x00800000
	orr	r0, r0, r2, lsl #1
	RET

	@ Division by 0x1p*: let''s shortcut a lot of code.
LSYM(Ldv_1):
	and	ip, ip, #0x80000000
	orr	r0, ip, r0, lsr #9
	add	r2, r2, #(127 << 22)
	cmp	r2, #(255 << 22)
	bge	LSYM(Lml_o)
	cmp	r2, #0
	orrgt	r0, r0, r2, lsl #1
	RETc(gt)
	cmn	r2, #(24 << 22)
	movle	r0, ip
	RETc(le)
	orr	r0, r0, #0x00800000
	mov	r3, #0

	@ Result must be denormalized: prepare parameters to use code above.
	@ r3 already contains remainder for rounding considerations.
LSYM(Ldv_u):
	bic	ip, r0, #0x80000000
	and	r0, r0, #0x80000000
	mvn	r1, r2, asr #22
	add	r1, r1, #2
	b	LSYM(Lml_ur)

	@ One or both arguments are denormalized.
	@ Scale them leftwards and preserve sign bit.
LSYM(Ldv_d):
	teq	r2, #0
	and	ip, r0, #0x80000000
1:	moveq	r0, r0, lsl #1
	tsteq	r0, #0x00800000
	subeq	r2, r2, #(1 << 22)
	beq	1b
	orr	r0, r0, ip
	teq	r3, #0
	and	ip, r1, #0x80000000
2:	moveq	r1, r1, lsl #1
	tsteq	r1, #0x00800000
	subeq	r3, r3, #(1 << 23)
	beq	2b
	orr	r1, r1, ip
	b	LSYM(Ldv_x)

	@ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
	mov	ip, #0xff000000
	teq	r2, ip, lsr #1
	teqeq	r3, ip, lsr #1
	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
	teq	r2, ip, lsr #1
	bne	1f
	movs	r2, r0, lsl #9
	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
	b	LSYM(Lml_i)		@ INF / <anything> -> INF
1:	teq	r3, ip, lsr #1
	bne	2f
	movs	r3, r1, lsl #9
	bne	LSYM(Lml_n)		@ <anything> / NAN -> NAN
	b	LSYM(Lml_z)		@ <anything> / INF -> 0
2:	@ One or both arguments are 0.
	bics	r2, r0, #0x80000000
	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
	bics	r3, r1, #0x80000000
	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
	b	LSYM(Lml_n)		@ 0 / 0 -> NAN

	FUNC_END divsf3

#endif /* L_muldivsf3 */

#ifdef L_cmpsf2

FUNC_START gesf2
ARM_FUNC_START gtsf2
	mov	r3, #-1
	b	1f

FUNC_START lesf2
ARM_FUNC_START ltsf2
	mov	r3, #1
	b	1f

FUNC_START nesf2
FUNC_START eqsf2
ARM_FUNC_START cmpsf2
	mov	r3, #1			@ how should we specify unordered here?

1:	@ Trap any INF/NAN first.
	mov	ip, #0xff000000
	and	r2, r1, ip, lsr #1
	teq	r2, ip, lsr #1
	and	r2, r0, ip, lsr #1
	teqne	r2, ip, lsr #1
	beq	3f

	@ Test for equality.
	@ Note that 0.0 is equal to -0.0.
2:	orr	r3, r0, r1
	bics	r3, r3, #0x80000000	@ either 0.0 or -0.0
	teqne	r0, r1			@ or both the same
	moveq	r0, #0
	RETc(eq)

	@ Check for sign difference.  The N flag is set if it is the case.
	@ If so, return sign of r0.
	movmi	r0, r0, asr #31
	orrmi	r0, r0, #1
	RETc(mi)

	@ Compare exponents.
	and	r3, r1, ip, lsr #1
	cmp	r2, r3

	@ Compare mantissa if exponents are equal
	moveq	r0, r0, lsl #9
	cmpeq	r0, r1, lsl #9
	movcs	r0, r1, asr #31
	mvncc	r0, r1, asr #31
	orr	r0, r0, #1
	RET

	@ Look for a NAN. 
3:	and	r2, r1, ip, lsr #1
	teq	r2, ip, lsr #1
	bne	4f
	movs	r2, r1, lsl #9
	bne	5f			@ r1 is NAN
4:	and	r2, r0, ip, lsr #1
	teq	r2, ip, lsr #1
	bne	2b
	movs	ip, r0, lsl #9
	beq	2b			@ r0 is not NAN
5:	mov	r0, r3			@ return unordered code from r3.
	RET

	FUNC_END gesf2
	FUNC_END gtsf2
	FUNC_END lesf2
	FUNC_END ltsf2
	FUNC_END nesf2
	FUNC_END eqsf2
	FUNC_END cmpsf2

#endif /* L_cmpsf2 */

#ifdef L_unordsf2

ARM_FUNC_START unordsf2
	mov	ip, #0xff000000
	and	r2, r1, ip, lsr #1
	teq	r2, ip, lsr #1
	bne	1f
	movs	r2, r1, lsl #9
	bne	3f			@ r1 is NAN
1:	and	r2, r0, ip, lsr #1
	teq	r2, ip, lsr #1
	bne	2f
	movs	r2, r0, lsl #9
	bne	3f			@ r0 is NAN
2:	mov	r0, #0			@ arguments are ordered.
	RET
3:	mov	r0, #1			@ arguments are unordered.
	RET

	FUNC_END unordsf2

#endif /* L_unordsf2 */

#ifdef L_fixsfsi

ARM_FUNC_START fixsfsi
	movs	r0, r0, lsl #1
	RETc(eq)			@ value is 0.

	mov	r1, r1, rrx		@ preserve C flag (the actual sign)

	@ check exponent range.
	and	r2, r0, #0xff000000
	cmp	r2, #(127 << 24)
	movcc	r0, #0			@ value is too small
	RETc(cc)
	cmp	r2, #((127 + 31) << 24)
	bcs	1f			@ value is too large

	mov	r0, r0, lsl #7
	orr	r0, r0, #0x80000000
	mov	r2, r2, lsr #24
	rsb	r2, r2, #(127 + 31)
	tst	r1, #0x80000000		@ the sign bit
	mov	r0, r0, lsr r2
	rsbne	r0, r0, #0
	RET

1:	teq	r2, #0xff000000
	bne	2f
	movs	r0, r0, lsl #8
	bne	3f			@ r0 is NAN.
2:	ands	r0, r1, #0x80000000	@ the sign bit
	moveq	r0, #0x7fffffff		@ the maximum signed positive si
	RET

3:	mov	r0, #0			@ What should we convert NAN to?
	RET

	FUNC_END fixsfsi

#endif /* L_fixsfsi */

#ifdef L_fixunssfsi

ARM_FUNC_START fixunssfsi
	movs	r0, r0, lsl #1
	movcss	r0, #0			@ value is negative...
	RETc(eq)			@ ... or 0.


	@ check exponent range.
	and	r2, r0, #0xff000000
	cmp	r2, #(127 << 24)
	movcc	r0, #0			@ value is too small
	RETc(cc)
	cmp	r2, #((127 + 32) << 24)
	bcs	1f			@ value is too large

	mov	r0, r0, lsl #7
	orr	r0, r0, #0x80000000
	mov	r2, r2, lsr #24
	rsb	r2, r2, #(127 + 31)
	mov	r0, r0, lsr r2
	RET

1:	teq	r2, #0xff000000
	bne	2f
	movs	r0, r0, lsl #8
	bne	3f			@ r0 is NAN.
2:	mov	r0, #0xffffffff		@ maximum unsigned si
	RET

3:	mov	r0, #0			@ What should we convert NAN to?
	RET

	FUNC_END fixunssfsi

#endif /* L_fixunssfsi */




#endif
#define TYPE(x) .type SYM(x),function
#define SIZE(x) .size SYM(x), . - SYM(x)
#define LSYM(x) .x
#else
#define __PLT__
#define TYPE(x)
#define SIZE(x)
#define LSYM(x) x
#endif

/* Function end macros.  Variants for 26 bit APCS and interworking.  */

@ This selects the minimum architecture level required.
#define __ARM_ARCH__ 3

#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
	|| defined(__ARM_ARCH_4T__)
/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
   long multiply instructions.  That includes v3M.  */
# undef __ARM_ARCH__
# define __ARM_ARCH__ 4
#endif
	
#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
	|| defined(__ARM_ARCH_5TE__)
# undef __ARM_ARCH__
# define __ARM_ARCH__ 5
#endif

/* How to return from a function call depends on the architecture variant.  */

#ifdef __APCS_26__

# define RET		movs	pc, lr
# define RETc(x)	mov##x##s	pc, lr

#elif (__ARM_ARCH__ > 4) || defined(__thumb__) || defined(__THUMB_INTERWORK__)

# define RET		bx	lr
# define RETc(x)	bx##x	lr

# if (__ARM_ARCH__ == 4) \
	&& (defined(__thumb__) || defined(__THUMB_INTERWORK__))
#  define __INTERWORKING__
# endif

#else

# define RET		mov	pc, lr
# define RETc(x)	mov##x	pc, lr

#endif

/* Don't pass dirn, it's there just to get token pasting right.  */

.macro	RETLDM	regs=, cond=, dirn=ia
#ifdef __APCS_26__
	.ifc "\regs",""
	ldm\cond\dirn	sp!, {pc}^
	.else
	ldm\cond\dirn	sp!, {\regs, pc}^
	.endif
#elif defined (__INTERWORKING__)
	.ifc "\regs",""
	ldr\cond	lr, [sp], #4
	.else
	ldm\cond\dirn	sp!, {\regs, lr}
	.endif
	bx\cond	lr
#else
	.ifc "\regs",""
	ldr\cond	pc, [sp], #4
	.else
	ldm\cond\dirn	sp!, {\regs, pc}
	.endif
#endif
.endm


.macro ARM_LDIV0
LSYM(Ldiv0):
	str	lr, [sp, #-4]!
	bl	SYM (__div0) __PLT__
	mov	r0, #0			@ About as wrong as it could be.
	RETLDM
.endm


#  define RET		bx	lr
#  define RETc(x)	bx##x	lr
.macro THUMB_LDIV0
LSYM(Ldiv0):
	push	{ lr }
	bl	SYM (__div0)
	mov	r0, #0			@ About as wrong as it could be.
#if defined (__INTERWORKING__)
	pop	{ r1 }
	bx	r1
#else
.macro ARM_LDIV0
Ldiv0:
	str	lr, [sp, #-4]!
	bl	SYM (__div0) __PLT__
	mov	r0, #0			@ About as wrong as it could be.
	ldr	lr, [sp], #4
	bx	lr
.endm	
# else
#  define RET		mov	pc, lr
#  define RETc(x)	mov##x	pc, lr
.macro THUMB_LDIV0
Ldiv0:
	push	{ lr }
	bl	SYM (__div0)
	mov	r0, #0			@ About as wrong as it could be.
	pop	{ pc }
.endm
.macro ARM_LDIV0
Ldiv0:
	str	lr, [sp, #-4]!
	bl	SYM (__div0) __PLT__
	mov	r0, #0			@ About as wrong as it could be.
	ldmia	sp!, {pc}
.endm	
# endif
# define RETCOND
#endif
.endm

.macro FUNC_END name
	SIZE (__\name)
.endm

.macro DIV_FUNC_END name
LSYM(Ldiv0):
#ifdef __thumb__
	THUMB_LDIV0
#else
	ARM_LDIV0
#endif
	FUNC_END \name
.endm

.macro THUMB_FUNC_START name

	THUMB_FUNC
SYM (__\name):
.endm

/* Special function that will always be coded in ARM assembly, even if
   in Thumb-only compilation.  */

#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
.macro	ARM_FUNC_START name
	FUNC_START \name
	bx	pc
	nop
	.arm
_L__\name:		/* A hook to tell gdb that we've switched to ARM */
.endm
#else
.macro	ARM_FUNC_START name
	FUNC_START \name
.endm
#endif

/* Register aliases.  */

work		.req	r4	@ XXXX is this safe ?

overdone	.req	r2
result		.req	r2
curbit		.req	r3
#if 0
ip		.req	r12
sp		.req	r13
lr		.req	r14
pc		.req	r15
#endif
/* ------------------------------------------------------------------------ */
/*		Bodies of the division and modulo routines.		    */
/* ------------------------------------------------------------------------ */	
.macro ARM_DIV_MOD_BODY modulo
LSYM(Loop1):
	@ Unless the divisor is very big, shift it up in multiples of
	@ four bits, since this is the amount of unwinding in the main
	@ division loop.  Continue shifting until the divisor is 

	cmplo	divisor, dividend
	movlo	divisor, divisor, lsl #4
	movlo	curbit,  curbit,  lsl #4
	blo	LSYM(Loop1)

LSYM(Lbignum):
	@ For very big divisors, we must shift it a bit at a time, or
	@ we will be in danger of overflowing.
	cmp	divisor, #0x80000000
	cmplo	divisor, dividend
	movlo	divisor, divisor, lsl #1
	movlo	curbit,  curbit,  lsl #1
	blo	LSYM(Lbignum)

LSYM(Loop3):
	@ Test for possible subtractions.  On the final pass, this may 
	@ subtract too much from the dividend ...
	

	cmp	dividend, #0			@ Early termination?
	movnes	curbit,   curbit,  lsr #4	@ No, any more bits to do?
	movne	divisor,  divisor, lsr #4
	bne	LSYM(Loop3)

  .if \modulo
LSYM(Lfixup_dividend):	
	@ Any subtractions that we should not have done will be recorded in
	@ the top three bits of OVERDONE.  Exactly which were not needed
	@ are governed by the position of the bit, stored in IP.

	@ the bit in ip could be in the top two bits which might then match
	@ with one of the smaller RORs.
	tstne	ip, #0x7
	beq	LSYM(Lgot_result)
	tst	overdone, ip, ror #3
	addne	dividend, dividend, divisor, lsr #3
	tst	overdone, ip, ror #2

	addne	dividend, dividend, divisor, lsr #1
  .endif

LSYM(Lgot_result):
.endm
/* ------------------------------------------------------------------------ */
.macro THUMB_DIV_MOD_BODY modulo
	@ Load the constant 0x10000000 into our work register.
	mov	work, #1
	lsl	work, #28
LSYM(Loop1):
	@ Unless the divisor is very big, shift it up in multiples of
	@ four bits, since this is the amount of unwinding in the main
	@ division loop.  Continue shifting until the divisor is 
	@ larger than the dividend.
	cmp	divisor, work
	bhs	LSYM(Lbignum)
	cmp	divisor, dividend
	bhs	LSYM(Lbignum)
	lsl	divisor, #4
	lsl	curbit,  #4
	b	LSYM(Loop1)
LSYM(Lbignum):
	@ Set work to 0x80000000
	lsl	work, #3
LSYM(Loop2):
	@ For very big divisors, we must shift it a bit at a time, or
	@ we will be in danger of overflowing.
	cmp	divisor, work
	bhs	LSYM(Loop3)
	cmp	divisor, dividend
	bhs	LSYM(Loop3)
	lsl	divisor, #1
	lsl	curbit,  #1
	b	LSYM(Loop2)
LSYM(Loop3):
	@ Test for possible subtractions ...
  .if \modulo
	@ ... On the final pass, this may subtract too much from the dividend, 

	@ afterwards.
	mov	overdone, #0
	cmp	dividend, divisor
	blo	LSYM(Lover1)
	sub	dividend, dividend, divisor
LSYM(Lover1):
	lsr	work, divisor, #1
	cmp	dividend, work
	blo	LSYM(Lover2)
	sub	dividend, dividend, work
	mov	ip, curbit
	mov	work, #1
	ror	curbit, work
	orr	overdone, curbit
	mov	curbit, ip
LSYM(Lover2):
	lsr	work, divisor, #2
	cmp	dividend, work
	blo	LSYM(Lover3)
	sub	dividend, dividend, work
	mov	ip, curbit
	mov	work, #2
	ror	curbit, work
	orr	overdone, curbit
	mov	curbit, ip
LSYM(Lover3):
	lsr	work, divisor, #3
	cmp	dividend, work
	blo	LSYM(Lover4)
	sub	dividend, dividend, work
	mov	ip, curbit
	mov	work, #3
	ror	curbit, work
	orr	overdone, curbit
	mov	curbit, ip
LSYM(Lover4):
	mov	ip, curbit
  .else
	@ ... and note which bits are done in the result.  On the final pass,
	@ this may subtract too much from the dividend, but the result will be ok,
	@ since the "bit" will have been shifted out at the bottom.
	cmp	dividend, divisor
	blo	LSYM(Lover1)
	sub	dividend, dividend, divisor
	orr	result, result, curbit
LSYM(Lover1):
	lsr	work, divisor, #1
	cmp	dividend, work
	blo	LSYM(Lover2)
	sub	dividend, dividend, work
	lsr	work, curbit, #1
	orr	result, work
LSYM(Lover2):
	lsr	work, divisor, #2
	cmp	dividend, work
	blo	LSYM(Lover3)
	sub	dividend, dividend, work
	lsr	work, curbit, #2
	orr	result, work
LSYM(Lover3):
	lsr	work, divisor, #3
	cmp	dividend, work
	blo	LSYM(Lover4)
	sub	dividend, dividend, work
	lsr	work, curbit, #3
	orr	result, work
LSYM(Lover4):
  .endif
	
	cmp	dividend, #0			@ Early termination?
	beq	LSYM(Lover5)
	lsr	curbit,  #4			@ No, any more bits to do?
	beq	LSYM(Lover5)
	lsr	divisor, #4
	b	LSYM(Loop3)
LSYM(Lover5):
  .if \modulo
	@ Any subtractions that we should not have done will be recorded in
	@ the top three bits of "overdone".  Exactly which were not needed

	mov	work, #0xe
	lsl	work, #28
	and	overdone, work
	beq	LSYM(Lgot_result)
	
	@ If we terminated early, because dividend became zero, then the 
	@ bit in ip will not be in the bottom nibble, and we should not

	mov	curbit, ip
	mov	work, #0x7
	tst	curbit, work
	beq	LSYM(Lgot_result)
	
	mov	curbit, ip
	mov	work, #3
	ror	curbit, work
	tst	overdone, curbit
	beq	LSYM(Lover6)
	lsr	work, divisor, #3
	add	dividend, work
LSYM(Lover6):
	mov	curbit, ip
	mov	work, #2
	ror	curbit, work
	tst	overdone, curbit
	beq	LSYM(Lover7)
	lsr	work, divisor, #2
	add	dividend, work
LSYM(Lover7):
	mov	curbit, ip
	mov	work, #1
	ror	curbit, work
	tst	overdone, curbit
	beq	LSYM(Lgot_result)
	lsr	work, divisor, #1
	add	dividend, work
  .endif
LSYM(Lgot_result):
.endm	
/* ------------------------------------------------------------------------ */
/*		Start of the Real Functions				    */

#ifdef __thumb__

	cmp	divisor, #0
	beq	LSYM(Ldiv0)
	mov	curbit, #1
	mov	result, #0
	
	push	{ work }
	cmp	dividend, divisor
	blo	LSYM(Lgot_result)

	THUMB_DIV_MOD_BODY 0
	

#else /* ARM version.  */
	
	cmp	divisor, #0
	beq	LSYM(Ldiv0)
	mov	curbit, #1
	mov	result, #0
	cmp	dividend, divisor
	blo	LSYM(Lgot_result)
	
	ARM_DIV_MOD_BODY 0
	


#endif /* ARM version */

	DIV_FUNC_END udivsi3

#endif /* L_udivsi3 */
/* ------------------------------------------------------------------------ */

#ifdef __thumb__

	cmp	divisor, #0
	beq	LSYM(Ldiv0)
	mov	curbit, #1
	cmp	dividend, divisor
	bhs	LSYM(Lover10)
	RET	

LSYM(Lover10):
	push	{ work }

	THUMB_DIV_MOD_BODY 1

#else  /* ARM version.  */
	
	cmp	divisor, #0
	beq	LSYM(Ldiv0)
	cmp     divisor, #1
	cmpne	dividend, divisor
	moveq   dividend, #0


#endif /* ARM version.  */
	
	DIV_FUNC_END umodsi3

#endif /* L_umodsi3 */
/* ------------------------------------------------------------------------ */


#ifdef __thumb__
	cmp	divisor, #0
	beq	LSYM(Ldiv0)
	
	push	{ work }
	mov	work, dividend

	mov	curbit, #1
	mov	result, #0
	cmp	divisor, #0
	bpl	LSYM(Lover10)
	neg	divisor, divisor	@ Loops below use unsigned.
LSYM(Lover10):
	cmp	dividend, #0
	bpl	LSYM(Lover11)
	neg	dividend, dividend
LSYM(Lover11):
	cmp	dividend, divisor
	blo	LSYM(Lgot_result)

	THUMB_DIV_MOD_BODY 0
	
	mov	r0, result
	mov	work, ip
	cmp	work, #0
	bpl	LSYM(Lover12)
	neg	r0, r0
LSYM(Lover12):
	pop	{ work }
	RET


	mov	result, #0
	cmp	divisor, #0
	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
	beq	LSYM(Ldiv0)
	cmp	dividend, #0
	rsbmi	dividend, dividend, #0
	cmp	dividend, divisor
	blo	LSYM(Lgot_result)

	ARM_DIV_MOD_BODY 0
	


#endif /* ARM version */
	
	DIV_FUNC_END divsi3

#endif /* L_divsi3 */
/* ------------------------------------------------------------------------ */


	mov	curbit, #1
	cmp	divisor, #0
	beq	LSYM(Ldiv0)
	bpl	LSYM(Lover10)
	neg	divisor, divisor		@ Loops below use unsigned.
LSYM(Lover10):
	push	{ work }
	@ Need to save the sign of the dividend, unfortunately, we need
	@ work later on.  Must do this after saving the original value of
	@ the work register, because we will pop this value off first.
	push	{ dividend }
	cmp	dividend, #0
	bpl	LSYM(Lover11)
	neg	dividend, dividend
LSYM(Lover11):
	cmp	dividend, divisor
	blo	LSYM(Lgot_result)

	THUMB_DIV_MOD_BODY 1
		
	pop	{ work }
	cmp	work, #0
	bpl	LSYM(Lover12)
	neg	dividend, dividend
LSYM(Lover12):
	pop	{ work }
	RET	


	
	cmp	divisor, #0
	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
	beq	LSYM(Ldiv0)
	@ Need to save the sign of the dividend, unfortunately, we need
	@ ip later on; this is faster than pushing lr and using that.
	str	dividend, [sp, #-4]!
	cmp	dividend, #0			@ Test dividend against zero
	rsbmi	dividend, dividend, #0		@ If negative make positive
	cmp	dividend, divisor		@ else if zero return zero
	blo	LSYM(Lgot_result)		@ if smaller return dividend
	mov	curbit, #1

	ARM_DIV_MOD_BODY 1


#endif /* ARM version */
	
	DIV_FUNC_END modsi3

#endif /* L_modsi3 */
/* ------------------------------------------------------------------------ */


	RET

	FUNC_END div0
	
#endif /* L_divmodsi_tools */
/* ------------------------------------------------------------------------ */

#define __NR_getpid			(__NR_SYSCALL_BASE+ 20)
#define __NR_kill			(__NR_SYSCALL_BASE+ 37)

	.code	32
	FUNC_START div0

	stmfd	sp!, {r1, lr}
	swi	__NR_getpid
	cmn	r0, #1000
	RETLDM	r1 hs
	mov	r1, #SIGFPE
	swi	__NR_kill
	RETLDM	r1
	ldmfd	sp!, {r1, lr}
	bx	lr
#else
	ldmfd	sp!, {r1, pc}RETCOND
#endif

	FUNC_END div0
	
#endif /* L_dvmd_lnx */
/* ------------------------------------------------------------------------ */


	.code   32
	.globl _arm_return
_arm_return:
	RETLDM
	bx 	r12
	.code   16

.macro interwork register
	.code	16

	THUMB_FUNC_START _interwork_call_via_\register

	bx	pc
	nop

	.code	32
	.globl LSYM(Lchange_\register)
LSYM(Lchange_\register):
	tst	\register, #1
	streq	lr, [sp, #-4]!
	adreq	lr, _arm_return
	bx	\register


	SIZE	(_interwork_call_via_lr)
	
#endif /* L_interwork_call_via_rX */

#include "ieee754-df.S"
#include "ieee754-sf.S"


Lines 30-38 Link Here

(-)gcc-3.3.3-orig/gcc/config/arm/linux-elf.h (-4 / +21 lines)
30	/* Do not assume anything about header files. */	30	/* Do not assume anything about header files. */
31	#define NO_IMPLICIT_EXTERN_C	31	#define NO_IMPLICIT_EXTERN_C
32		32
33	/* Default is to use APCS-32 mode. */	33	/*
		34	* Default is to use APCS-32 mode with soft-vfp.
		35	* The old Linux default for floats can be achieved with -mhard-float
		36	* or with the configure --with-float=hard option.
		37	* If -msoft-float or --with-float=soft is used then software float
		38	* support will be used just like the default but with the legacy
		39	* big endian word ordering for double float representation instead.
		40	*/
		41
34	#undef TARGET_DEFAULT	42	#undef TARGET_DEFAULT
35	#define TARGET_DEFAULT (ARM_FLAG_APCS_32 \| ARM_FLAG_MMU_TRAPS)	43	#define TARGET_DEFAULT \
		44	( ARM_FLAG_APCS_32 \
		45	\| ARM_FLAG_SOFT_FLOAT \
		46	\| ARM_FLAG_VFP \
		47	\| ARM_FLAG_MMU_TRAPS )
		48
		49	#undef SUBTARGET_EXTRA_ASM_SPEC
		50	#define SUBTARGET_EXTRA_ASM_SPEC "\
		51	%{mhard-float:-mfpu=fpa} \
		52	%{!mhard-float: %{msoft-float:-mfpu=softvfp} %{!msoft-float:-mfpu=softvfp}}"
36		53
37	#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm6	54	#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm6
Lines 40-46 Link Here
40		40
41	#undef MULTILIB_DEFAULTS	41	#undef MULTILIB_DEFAULTS
42	#define MULTILIB_DEFAULTS \	42	#define MULTILIB_DEFAULTS \
43	{ "marm", "mlittle-endian", "mhard-float", "mapcs-32", "mno-thumb-interwork" }	43	{ "marm", "mlittle-endian", "mapcs-32", "mno-thumb-interwork" }
44		44
45	#define CPP_APCS_PC_DEFAULT_SPEC "-D__APCS_32__"	45	#define CPP_APCS_PC_DEFAULT_SPEC "-D__APCS_32__"
46		46
Lines 54-60 Link Here
54	%{shared:-lc} \	72	%{shared:-lc} \
55	%{!shared:%{profile:-lc_p}%{!profile:-lc}}"	73	%{!shared:%{profile:-lc_p}%{!profile:-lc}}"
56		74
57	#define LIBGCC_SPEC "%{msoft-float:-lfloat} -lgcc"	75	#define LIBGCC_SPEC "-lgcc"
58		76
59	/* Provide a STARTFILE_SPEC appropriate for GNU/Linux. Here we add	77	/* Provide a STARTFILE_SPEC appropriate for GNU/Linux. Here we add
60	the GNU/Linux magical crtbegin.o file (see crtstuff.c) which	78	the GNU/Linux magical crtbegin.o file (see crtstuff.c) which

Lines 7-13 Link Here

(-)gcc-3.3.3-orig/gcc/config/arm/t-linux (-1 / +4 lines)
7	ENQUIRE=	7	ENQUIRE=
8		8
9	LIB1ASMSRC = arm/lib1funcs.asm	9	LIB1ASMSRC = arm/lib1funcs.asm
10	LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx	10	LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx \
		11	_negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
		12	_truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
		13	_fixsfsi _fixunssfsi
11		14
12	# MULTILIB_OPTIONS = mhard-float/msoft-float	15	# MULTILIB_OPTIONS = mhard-float/msoft-float
13	# MULTILIB_DIRNAMES = hard-float soft-float	16	# MULTILIB_DIRNAMES = hard-float soft-float

Lines 29-35 Link Here

(-)gcc-3.3.3-orig/gcc/config/arm/unknown-elf.h (-1 / +5 lines)
29		29
30	/* Default to using APCS-32 and software floating point. */	30	/* Default to using APCS-32 and software floating point. */
31	#ifndef TARGET_DEFAULT	31	#ifndef TARGET_DEFAULT
32	#define TARGET_DEFAULT (ARM_FLAG_SOFT_FLOAT \| ARM_FLAG_APCS_32 \| ARM_FLAG_APCS_FRAME)	32	#define TARGET_DEFAULT \
		33	( ARM_FLAG_SOFT_FLOAT \
		34	\| ARM_FLAG_VFP \
		35	\| ARM_FLAG_APCS_32 \
		36	\| ARM_FLAG_APCS_FRAME )
33	#endif	37	#endif
34		38
35	/* Now we define the strings used to build the spec file. */	39	/* Now we define the strings used to build the spec file. */

Lines 28-36 Link Here

(-)gcc-3.3.3-orig/gcc/config/arm/xscale-elf.h (-2 / +5 lines)
28	#define SUBTARGET_CPU_DEFAULT TARGET_CPU_xscale	28	#define SUBTARGET_CPU_DEFAULT TARGET_CPU_xscale
29	#endif	29	#endif
30		30
31	#define SUBTARGET_EXTRA_ASM_SPEC "%{!mcpu=*:-mcpu=xscale} %{!mhard-float:-mno-fpu}"	31	#define SUBTARGET_EXTRA_ASM_SPEC "\
		32	%{!mcpu=*:-mcpu=xscale} \
		33	%{mhard-float:-mfpu=fpa} \
		34	%{!mhard-float: %{msoft-float:-mfpu=softvfp} %{!msoft-float:-mfpu=softvfp}}"
32		35
33	#ifndef MULTILIB_DEFAULTS	36	#ifndef MULTILIB_DEFAULTS
34	#define MULTILIB_DEFAULTS \	37	#define MULTILIB_DEFAULTS \
35	{ "mlittle-endian", "mno-thumb-interwork", "marm", "msoft-float" }	38	{ "mlittle-endian", "mno-thumb-interwork", "marm" }
36	#endif	39	#endif

Return to bug 75585

Line 0 Link Here

(-)gcc-3.3.3-orig/gcc/config/arm/ieee754-sf.S (+815 lines)
		1	/* ieee754-sf.S single-precision floating point support for ARM
		2
		3	Copyright (C) 2003 Free Software Foundation, Inc.
		4	Contributed by Nicolas Pitre (nico@cam.org)
		5
		6	This file is free software; you can redistribute it and/or modify it
		7	under the terms of the GNU General Public License as published by the
		8	Free Software Foundation; either version 2, or (at your option) any
		9	later version.
		10
		11	In addition to the permissions in the GNU General Public License, the
		12	Free Software Foundation gives you unlimited permission to link the
		13	compiled version of this file into combinations with other programs,
		14	and to distribute those combinations without any restriction coming
		15	from the use of this file. (The General Public License restrictions
		16	do apply in other respects; for example, they cover modification of
		17	the file, and distribution when not linked into a combine
		18	executable.)
		19
		20	This file is distributed in the hope that it will be useful, but
		21	WITHOUT ANY WARRANTY; without even the implied warranty of
		22	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		23	General Public License for more details.
		24
		25	You should have received a copy of the GNU General Public License
		26	along with this program; see the file COPYING. If not, write to
		27	the Free Software Foundation, 59 Temple Place - Suite 330,
		28	Boston, MA 02111-1307, USA. */
		29
		30	/*
		31	* Notes:
		32	*
		33	* The goal of this code is to be as fast as possible. This is
		34	* not meant to be easy to understand for the casual reader.
		35	*
		36	* Only the default rounding mode is intended for best performances.
		37	* Exceptions aren't supported yet, but that can be added quite easily
		38	* if necessary without impacting performances.
		39	*/
		40
		41	#ifdef L_negsf2
		42
		43	ARM_FUNC_START negsf2
		44	eor r0, r0, #0x80000000 @ flip sign bit
		45	RET
		46
		47	FUNC_END negsf2
		48
		49	#endif
		50
		51	#ifdef L_addsubsf3
		52
		53	ARM_FUNC_START subsf3
		54	eor r1, r1, #0x80000000 @ flip sign bit of second arg
		55	#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
		56	b 1f @ Skip Thumb-code prologue
		57	#endif
		58
		59	ARM_FUNC_START addsf3
		60
		61	1: @ Compare both args, return zero if equal but the sign.
		62	eor r2, r0, r1
		63	teq r2, #0x80000000
		64	beq LSYM(Lad_z)
65
66	@ If first arg is 0 or -0, return second arg.
67	@ If second arg is 0 or -0, return first arg.
68	bics r2, r0, #0x80000000
69	moveq r0, r1
70	bicnes r2, r1, #0x80000000
71	RETc(eq)
72
73	@ Mask out exponents.
74	mov ip, #0xff000000
75	and r2, r0, ip, lsr #1
76	and r3, r1, ip, lsr #1
77
78	@ If either of them is 255, result will be INF or NAN
79	teq r2, ip, lsr #1
80	teqne r3, ip, lsr #1
81	beq LSYM(Lad_i)
82
83	@ Compute exponent difference. Make largest exponent in r2,
84	@ corresponding arg in r0, and positive exponent difference in r3.
85	subs r3, r3, r2
86	addgt r2, r2, r3
87	eorgt r1, r0, r1
88	eorgt r0, r1, r0
89	eorgt r1, r0, r1
90	rsblt r3, r3, #0
91
92	@ If exponent difference is too large, return largest argument
93	@ already in r0. We need up to 25 bit to handle proper rounding
94	@ of 0x1p25 - 1.1.
95	cmp r3, #(25 << 23)
96	RETc(hi)
97
98	@ Convert mantissa to signed integer.
99	tst r0, #0x80000000
100	orr r0, r0, #0x00800000
101	bic r0, r0, #0xff000000
102	rsbne r0, r0, #0
103	tst r1, #0x80000000
104	orr r1, r1, #0x00800000
105	bic r1, r1, #0xff000000
106	rsbne r1, r1, #0
107
108	@ If exponent == difference, one or both args were denormalized.
109	@ Since this is not common case, rescale them off line.
110	teq r2, r3
111	beq LSYM(Lad_d)
112	LSYM(Lad_x):
113
114	@ Scale down second arg with exponent difference.
115	@ Apply shift one bit left to first arg and the rest to second arg
116	@ to simplify things later, but only if exponent does not become 0.
117	movs r3, r3, lsr #23
118	teqne r2, #(1 << 23)
119	movne r0, r0, lsl #1
120	subne r2, r2, #(1 << 23)
121	subne r3, r3, #1
122
123	@ Shift second arg into ip, keep leftover bits into r1.
124	mov ip, r1, asr r3
125	rsb r3, r3, #32
126	mov r1, r1, lsl r3
127
128	add r0, r0, ip @ the actual addition
129
130	@ We now have a 64 bit result in r0-r1.
131	@ Keep absolute value in r0-r1, sign in r3.
132	ands r3, r0, #0x80000000
133	bpl LSYM(Lad_p)
134	rsbs r1, r1, #0
135	rsc r0, r0, #0
136
137	@ Determine how to normalize the result.
138	LSYM(Lad_p):
139	cmp r0, #0x00800000
140	bcc LSYM(Lad_l)
141	cmp r0, #0x01000000
142	bcc LSYM(Lad_r0)
143	cmp r0, #0x02000000
144	bcc LSYM(Lad_r1)
145
146	@ Result needs to be shifted right.
147	movs r0, r0, lsr #1
148	mov r1, r1, rrx
149	add r2, r2, #(1 << 23)
150	LSYM(Lad_r1):
151	movs r0, r0, lsr #1
152	mov r1, r1, rrx
153	add r2, r2, #(1 << 23)
154
155	@ Our result is now properly aligned into r0, remaining bits in r1.
156	@ Round with MSB of r1. If halfway between two numbers, round towards
157	@ LSB of r0 = 0.
158	LSYM(Lad_r0):
159	add r0, r0, r1, lsr #31
160	teq r1, #0x80000000
161	biceq r0, r0, #1
162
163	@ Rounding may have added a new MSB. Adjust exponent.
164	@ That MSB will be cleared when exponent is merged below.
165	tst r0, #0x01000000
166	addne r2, r2, #(1 << 23)
167
168	@ Make sure we did not bust our exponent.
169	cmp r2, #(254 << 23)
170	bhi LSYM(Lad_o)
171
172	@ Pack final result together.
173	LSYM(Lad_e):
174	bic r0, r0, #0x01800000
175	orr r0, r0, r2
176	orr r0, r0, r3
177	RET
178
179	@ Result must be shifted left.
180	@ No rounding necessary since r1 will always be 0.
181	LSYM(Lad_l):
182
183	#if __ARM_ARCH__ < 5
184
185	movs ip, r0, lsr #12
186	moveq r0, r0, lsl #12
187	subeq r2, r2, #(12 << 23)
188	tst r0, #0x00ff0000
189	moveq r0, r0, lsl #8
190	subeq r2, r2, #(8 << 23)
191	tst r0, #0x00f00000
192	moveq r0, r0, lsl #4
193	subeq r2, r2, #(4 << 23)
194	tst r0, #0x00c00000
195	moveq r0, r0, lsl #2
196	subeq r2, r2, #(2 << 23)
197	tst r0, #0x00800000
198	moveq r0, r0, lsl #1
199	subeq r2, r2, #(1 << 23)
200	cmp r2, #0
201	bgt LSYM(Lad_e)
202
203	#else
204
205	clz ip, r0
206	sub ip, ip, #8
207	mov r0, r0, lsl ip
208	subs r2, r2, ip, lsl #23
209	bgt LSYM(Lad_e)
210
211	#endif
212
213	@ Exponent too small, denormalize result.
214	mvn r2, r2, asr #23
215	add r2, r2, #2
216	orr r0, r3, r0, lsr r2
217	RET
218
219	@ Fixup and adjust bit position for denormalized arguments.
220	@ Note that r2 must not remain equal to 0.
221	LSYM(Lad_d):
222	teq r2, #0
223	eoreq r0, r0, #0x00800000
224	addeq r2, r2, #(1 << 23)
225	eor r1, r1, #0x00800000
226	subne r3, r3, #(1 << 23)
227	b LSYM(Lad_x)
228
229	@ Result is x - x = 0, unless x is INF or NAN.
230	LSYM(Lad_z):
231	mov ip, #0xff000000
232	and r2, r0, ip, lsr #1
233	teq r2, ip, lsr #1
234	moveq r0, ip, asr #2
235	movne r0, #0
236	RET
237
238	@ Overflow: return INF.
239	LSYM(Lad_o):
240	orr r0, r3, #0x7f000000
241	orr r0, r0, #0x00800000
242	RET
243
244	@ At least one of r0/r1 is INF/NAN.
245	@ if r0 != INF/NAN: return r1 (which is INF/NAN)
246	@ if r1 != INF/NAN: return r0 (which is INF/NAN)
247	@ if r0 or r1 is NAN: return NAN
248	@ if opposite sign: return NAN
249	@ return r0 (which is INF or -INF)
250	LSYM(Lad_i):
251	teq r2, ip, lsr #1
252	movne r0, r1
253	teqeq r3, ip, lsr #1
254	RETc(ne)
255	movs r2, r0, lsl #9
256	moveqs r2, r1, lsl #9
257	teqeq r0, r1
258	orrne r0, r3, #0x00400000 @ NAN
259	RET
260
261	FUNC_END addsf3
262	FUNC_END subsf3
263
264	ARM_FUNC_START floatunsisf
265	mov r3, #0
266	b 1f
267
268	ARM_FUNC_START floatsisf
269	ands r3, r0, #0x80000000
270	rsbmi r0, r0, #0
271
272	1: teq r0, #0
273	RETc(eq)
274
275	mov r1, #0
276	mov r2, #((127 + 23) << 23)
277	tst r0, #0xfc000000
278	beq LSYM(Lad_p)
279
280	@ We need to scale the value a little before branching to code above.
281	tst r0, #0xf0000000
282	movne r1, r0, lsl #28
283	movne r0, r0, lsr #4
284	addne r2, r2, #(4 << 23)
285	tst r0, #0x0c000000
286	beq LSYM(Lad_p)
287	mov r1, r1, lsr #2
288	orr r1, r1, r0, lsl #30
289	mov r0, r0, lsr #2
290	add r2, r2, #(2 << 23)
291	b LSYM(Lad_p)
292
293	FUNC_END floatsisf
294	FUNC_END floatunsisf
295
296	#endif /* L_addsubsf3 */
297
298	#ifdef L_muldivsf3
299
300	ARM_FUNC_START mulsf3
301
302	@ Mask out exponents.
303	mov ip, #0xff000000
304	and r2, r0, ip, lsr #1
305	and r3, r1, ip, lsr #1
306
307	@ Trap any INF/NAN.
308	teq r2, ip, lsr #1
309	teqne r3, ip, lsr #1
310	beq LSYM(Lml_s)
311
312	@ Trap any multiplication by 0.
313	bics ip, r0, #0x80000000
314	bicnes ip, r1, #0x80000000
315	beq LSYM(Lml_z)
316
317	@ Shift exponents right one bit to make room for overflow bit.
318	@ If either of them is 0, scale denormalized arguments off line.
319	@ Then add both exponents together.
320	movs r2, r2, lsr #1
321	teqne r3, #0
322	beq LSYM(Lml_d)
323	LSYM(Lml_x):
324	add r2, r2, r3, asr #1
325
326	@ Preserve final sign in r2 along with exponent for now.
327	teq r0, r1
328	orrmi r2, r2, #0x8000
329
330	@ Convert mantissa to unsigned integer.
331	bic r0, r0, #0xff000000
332	bic r1, r1, #0xff000000
333	orr r0, r0, #0x00800000
334	orr r1, r1, #0x00800000
335
336	#if __ARM_ARCH__ < 4
337
338	@ Well, no way to make it shorter without the umull instruction.
339	@ We must perform that 24 x 24 -> 48 bit multiplication by hand.
340	stmfd sp!, {r4, r5}
341	mov r4, r0, lsr #16
342	mov r5, r1, lsr #16
343	bic r0, r0, #0x00ff0000
344	bic r1, r1, #0x00ff0000
345	mul ip, r4, r5
346	mul r3, r0, r1
347	mul r0, r5, r0
348	mla r0, r4, r1, r0
349	adds r3, r3, r0, lsl #16
350	adc ip, ip, r0, lsr #16
351	ldmfd sp!, {r4, r5}
352
353	#else
354
355	umull r3, ip, r0, r1 @ The actual multiplication.
356
357	#endif
358
359	@ Put final sign in r0.
360	mov r0, r2, lsl #16
361	bic r2, r2, #0x8000
362
363	@ Adjust result if one extra MSB appeared.
364	@ The LSB may be lost but this never changes the result in this case.
365	tst ip, #(1 << 15)
366	addne r2, r2, #(1 << 22)
367	movnes ip, ip, lsr #1
368	movne r3, r3, rrx
369
370	@ Apply exponent bias, check range for underflow.
371	subs r2, r2, #(127 << 22)
372	ble LSYM(Lml_u)
373
374	@ Scale back to 24 bits with rounding.
375	@ r0 contains sign bit already.
376	orrs r0, r0, r3, lsr #23
377	adc r0, r0, ip, lsl #9
378
379	@ If halfway between two numbers, rounding should be towards LSB = 0.
380	mov r3, r3, lsl #9
381	teq r3, #0x80000000
382	biceq r0, r0, #1
383
384	@ Note: rounding may have produced an extra MSB here.
385	@ The extra bit is cleared before merging the exponent below.
386	tst r0, #0x01000000
387	addne r2, r2, #(1 << 22)
388
389	@ Check for exponent overflow
390	cmp r2, #(255 << 22)
391	bge LSYM(Lml_o)
392
393	@ Add final exponent.
394	bic r0, r0, #0x01800000
395	orr r0, r0, r2, lsl #1
396	RET
397
398	@ Result is 0, but determine sign anyway.
399	LSYM(Lml_z): eor r0, r0, r1
400	bic r0, r0, #0x7fffffff
401	RET
402
403	@ Check if denormalized result is possible, otherwise return signed 0.
404	LSYM(Lml_u):
405	cmn r2, #(24 << 22)
406	RETc(le)
407
408	@ Find out proper shift value.
409	mvn r1, r2, asr #22
410	subs r1, r1, #7
411	bgt LSYM(Lml_ur)
412
413	@ Shift value left, round, etc.
414	add r1, r1, #32
415	orrs r0, r0, r3, lsr r1
416	rsb r1, r1, #32
417	adc r0, r0, ip, lsl r1
418	mov ip, r3, lsl r1
419	teq ip, #0x80000000
420	biceq r0, r0, #1
421	RET
422
423	@ Shift value right, round, etc.
424	@ Note: r1 must not be 0 otherwise carry does not get set.
425	LSYM(Lml_ur):
426	orrs r0, r0, ip, lsr r1
427	adc r0, r0, #0
428	rsb r1, r1, #32
429	mov ip, ip, lsl r1
430	teq r3, #0
431	teqeq ip, #0x80000000
432	biceq r0, r0, #1
433	RET
434
435	@ One or both arguments are denormalized.
436	@ Scale them leftwards and preserve sign bit.
437	LSYM(Lml_d):
438	teq r2, #0
439	and ip, r0, #0x80000000
440	1: moveq r0, r0, lsl #1
441	tsteq r0, #0x00800000
442	subeq r2, r2, #(1 << 22)
443	beq 1b
444	orr r0, r0, ip
445	teq r3, #0
446	and ip, r1, #0x80000000
447	2: moveq r1, r1, lsl #1
448	tsteq r1, #0x00800000
449	subeq r3, r3, #(1 << 23)
450	beq 2b
451	orr r1, r1, ip
452	b LSYM(Lml_x)
453
454	@ One or both args are INF or NAN.
455	LSYM(Lml_s):
456	teq r0, #0x0
457	teqne r1, #0x0
458	teqne r0, #0x80000000
459	teqne r1, #0x80000000
460	beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
461	teq r2, ip, lsr #1
462	bne 1f
463	movs r2, r0, lsl #9
464	bne LSYM(Lml_n) @ NAN * <anything> -> NAN
465	1: teq r3, ip, lsr #1
466	bne LSYM(Lml_i)
467	movs r3, r1, lsl #9
468	bne LSYM(Lml_n) @ <anything> * NAN -> NAN
469
470	@ Result is INF, but we need to determine its sign.
471	LSYM(Lml_i):
472	eor r0, r0, r1
473
474	@ Overflow: return INF (sign already in r0).
475	LSYM(Lml_o):
476	and r0, r0, #0x80000000
477	orr r0, r0, #0x7f000000
478	orr r0, r0, #0x00800000
479	RET
480
481	@ Return NAN.
482	LSYM(Lml_n):
483	mov r0, #0x7f000000
484	orr r0, r0, #0x00c00000
485	RET
486
487	FUNC_END mulsf3
488
489	ARM_FUNC_START divsf3
490
491	@ Mask out exponents.
492	mov ip, #0xff000000
493	and r2, r0, ip, lsr #1
494	and r3, r1, ip, lsr #1
495
496	@ Trap any INF/NAN or zeroes.
497	teq r2, ip, lsr #1
498	teqne r3, ip, lsr #1
499	bicnes ip, r0, #0x80000000
500	bicnes ip, r1, #0x80000000
501	beq LSYM(Ldv_s)
502
503	@ Shift exponents right one bit to make room for overflow bit.
504	@ If either of them is 0, scale denormalized arguments off line.
505	@ Then substract divisor exponent from dividend''s.
506	movs r2, r2, lsr #1
507	teqne r3, #0
508	beq LSYM(Ldv_d)
509	LSYM(Ldv_x):
510	sub r2, r2, r3, asr #1
511
512	@ Preserve final sign into ip.
513	eor ip, r0, r1
514
515	@ Convert mantissa to unsigned integer.
516	@ Dividend -> r3, divisor -> r1.
517	mov r3, #0x10000000
518	movs r1, r1, lsl #9
519	mov r0, r0, lsl #9
520	beq LSYM(Ldv_1)
521	orr r1, r3, r1, lsr #4
522	orr r3, r3, r0, lsr #4
523
524	@ Initialize r0 (result) with final sign bit.
525	and r0, ip, #0x80000000
526
527	@ Ensure result will land to known bit position.
528	cmp r3, r1
529	subcc r2, r2, #(1 << 22)
530	movcc r3, r3, lsl #1
531
532	@ Apply exponent bias, check range for over/underflow.
533	add r2, r2, #(127 << 22)
534	cmn r2, #(24 << 22)
535	RETc(le)
536	cmp r2, #(255 << 22)
537	bge LSYM(Lml_o)
538
539	@ The actual division loop.
540	mov ip, #0x00800000
541	1: cmp r3, r1
542	subcs r3, r3, r1
543	orrcs r0, r0, ip
544	cmp r3, r1, lsr #1
545	subcs r3, r3, r1, lsr #1
546	orrcs r0, r0, ip, lsr #1
547	cmp r3, r1, lsr #2
548	subcs r3, r3, r1, lsr #2
549	orrcs r0, r0, ip, lsr #2
550	cmp r3, r1, lsr #3
551	subcs r3, r3, r1, lsr #3
552	orrcs r0, r0, ip, lsr #3
553	movs r3, r3, lsl #4
554	movnes ip, ip, lsr #4
555	bne 1b
556
557	@ Check if denormalized result is needed.
558	cmp r2, #0
559	ble LSYM(Ldv_u)
560
561	@ Apply proper rounding.
562	cmp r3, r1
563	addcs r0, r0, #1
564	biceq r0, r0, #1
565
566	@ Add exponent to result.
567	bic r0, r0, #0x00800000
568	orr r0, r0, r2, lsl #1
569	RET
570
571	@ Division by 0x1p*: let''s shortcut a lot of code.
572	LSYM(Ldv_1):
573	and ip, ip, #0x80000000
574	orr r0, ip, r0, lsr #9
575	add r2, r2, #(127 << 22)
576	cmp r2, #(255 << 22)
577	bge LSYM(Lml_o)
578	cmp r2, #0
579	orrgt r0, r0, r2, lsl #1
580	RETc(gt)
581	cmn r2, #(24 << 22)
582	movle r0, ip
583	RETc(le)
584	orr r0, r0, #0x00800000
585	mov r3, #0
586
587	@ Result must be denormalized: prepare parameters to use code above.
588	@ r3 already contains remainder for rounding considerations.
589	LSYM(Ldv_u):
590	bic ip, r0, #0x80000000
591	and r0, r0, #0x80000000
592	mvn r1, r2, asr #22
593	add r1, r1, #2
594	b LSYM(Lml_ur)
595
596	@ One or both arguments are denormalized.
597	@ Scale them leftwards and preserve sign bit.
598	LSYM(Ldv_d):
599	teq r2, #0
600	and ip, r0, #0x80000000
601	1: moveq r0, r0, lsl #1
602	tsteq r0, #0x00800000
603	subeq r2, r2, #(1 << 22)
604	beq 1b
605	orr r0, r0, ip
606	teq r3, #0
607	and ip, r1, #0x80000000
608	2: moveq r1, r1, lsl #1
609	tsteq r1, #0x00800000
610	subeq r3, r3, #(1 << 23)
611	beq 2b
612	orr r1, r1, ip
613	b LSYM(Ldv_x)
614
615	@ One or both arguments is either INF, NAN or zero.
616	LSYM(Ldv_s):
617	mov ip, #0xff000000
618	teq r2, ip, lsr #1
619	teqeq r3, ip, lsr #1
620	beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
621	teq r2, ip, lsr #1
622	bne 1f
623	movs r2, r0, lsl #9
624	bne LSYM(Lml_n) @ NAN / <anything> -> NAN
625	b LSYM(Lml_i) @ INF / <anything> -> INF
626	1: teq r3, ip, lsr #1
627	bne 2f
628	movs r3, r1, lsl #9
629	bne LSYM(Lml_n) @ <anything> / NAN -> NAN
630	b LSYM(Lml_z) @ <anything> / INF -> 0
631	2: @ One or both arguments are 0.
632	bics r2, r0, #0x80000000
633	bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
634	bics r3, r1, #0x80000000
635	bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
636	b LSYM(Lml_n) @ 0 / 0 -> NAN
637
638	FUNC_END divsf3
639
640	#endif /* L_muldivsf3 */
641
642	#ifdef L_cmpsf2
643
644	FUNC_START gesf2
645	ARM_FUNC_START gtsf2
646	mov r3, #-1
647	b 1f
648
649	FUNC_START lesf2
650	ARM_FUNC_START ltsf2
651	mov r3, #1
652	b 1f
653
654	FUNC_START nesf2
655	FUNC_START eqsf2
656	ARM_FUNC_START cmpsf2
657	mov r3, #1 @ how should we specify unordered here?
658
659	1: @ Trap any INF/NAN first.
660	mov ip, #0xff000000
661	and r2, r1, ip, lsr #1
662	teq r2, ip, lsr #1
663	and r2, r0, ip, lsr #1
664	teqne r2, ip, lsr #1
665	beq 3f
666
667	@ Test for equality.
668	@ Note that 0.0 is equal to -0.0.
669	2: orr r3, r0, r1
670	bics r3, r3, #0x80000000 @ either 0.0 or -0.0
671	teqne r0, r1 @ or both the same
672	moveq r0, #0
673	RETc(eq)
674
675	@ Check for sign difference. The N flag is set if it is the case.
676	@ If so, return sign of r0.
677	movmi r0, r0, asr #31
678	orrmi r0, r0, #1
679	RETc(mi)
680
681	@ Compare exponents.
682	and r3, r1, ip, lsr #1
683	cmp r2, r3
684
685	@ Compare mantissa if exponents are equal
686	moveq r0, r0, lsl #9
687	cmpeq r0, r1, lsl #9
688	movcs r0, r1, asr #31
689	mvncc r0, r1, asr #31
690	orr r0, r0, #1
691	RET
692
693	@ Look for a NAN.
694	3: and r2, r1, ip, lsr #1
695	teq r2, ip, lsr #1
696	bne 4f
697	movs r2, r1, lsl #9
698	bne 5f @ r1 is NAN
699	4: and r2, r0, ip, lsr #1
700	teq r2, ip, lsr #1
701	bne 2b
702	movs ip, r0, lsl #9
703	beq 2b @ r0 is not NAN
704	5: mov r0, r3 @ return unordered code from r3.
705	RET
706
707	FUNC_END gesf2
708	FUNC_END gtsf2
709	FUNC_END lesf2
710	FUNC_END ltsf2
711	FUNC_END nesf2
712	FUNC_END eqsf2
713	FUNC_END cmpsf2
714
715	#endif /* L_cmpsf2 */
716
717	#ifdef L_unordsf2
718
719	ARM_FUNC_START unordsf2
720	mov ip, #0xff000000
721	and r2, r1, ip, lsr #1
722	teq r2, ip, lsr #1
723	bne 1f
724	movs r2, r1, lsl #9
725	bne 3f @ r1 is NAN
726	1: and r2, r0, ip, lsr #1
727	teq r2, ip, lsr #1
728	bne 2f
729	movs r2, r0, lsl #9
730	bne 3f @ r0 is NAN
731	2: mov r0, #0 @ arguments are ordered.
732	RET
733	3: mov r0, #1 @ arguments are unordered.
734	RET
735
736	FUNC_END unordsf2
737
738	#endif /* L_unordsf2 */
739
740	#ifdef L_fixsfsi
741
742	ARM_FUNC_START fixsfsi
743	movs r0, r0, lsl #1
744	RETc(eq) @ value is 0.
745
746	mov r1, r1, rrx @ preserve C flag (the actual sign)
747
748	@ check exponent range.
749	and r2, r0, #0xff000000
750	cmp r2, #(127 << 24)
751	movcc r0, #0 @ value is too small
752	RETc(cc)
753	cmp r2, #((127 + 31) << 24)
754	bcs 1f @ value is too large
755
756	mov r0, r0, lsl #7
757	orr r0, r0, #0x80000000
758	mov r2, r2, lsr #24
759	rsb r2, r2, #(127 + 31)
760	tst r1, #0x80000000 @ the sign bit
761	mov r0, r0, lsr r2
762	rsbne r0, r0, #0
763	RET
764
765	1: teq r2, #0xff000000
766	bne 2f
767	movs r0, r0, lsl #8
768	bne 3f @ r0 is NAN.
769	2: ands r0, r1, #0x80000000 @ the sign bit
770	moveq r0, #0x7fffffff @ the maximum signed positive si
771	RET
772
773	3: mov r0, #0 @ What should we convert NAN to?
774	RET
775
776	FUNC_END fixsfsi
777
778	#endif /* L_fixsfsi */
779
780	#ifdef L_fixunssfsi
781
782	ARM_FUNC_START fixunssfsi
783	movs r0, r0, lsl #1
784	movcss r0, #0 @ value is negative...
785	RETc(eq) @ ... or 0.
786
787
788	@ check exponent range.
789	and r2, r0, #0xff000000
790	cmp r2, #(127 << 24)
791	movcc r0, #0 @ value is too small
792	RETc(cc)
793	cmp r2, #((127 + 32) << 24)
794	bcs 1f @ value is too large
795
796	mov r0, r0, lsl #7
797	orr r0, r0, #0x80000000
798	mov r2, r2, lsr #24
799	rsb r2, r2, #(127 + 31)
800	mov r0, r0, lsr r2
801	RET
802
803	1: teq r2, #0xff000000
804	bne 2f
805	movs r0, r0, lsl #8
806	bne 3f @ r0 is NAN.
807	2: mov r0, #0xffffffff @ maximum unsigned si
808	RET
809
810	3: mov r0, #0 @ What should we convert NAN to?
811	RET
812
813	FUNC_END fixunssfsi
814
815	#endif /* L_fixunssfsi */