summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorrearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4>2003-08-27 12:52:58 +0000
committerrearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4>2003-08-27 12:52:58 +0000
commit60f30e1335bdbc4e3409b6bd0481aa21cdd55c49 (patch)
tree8dd03076871f1247d318053ae956e716bc6733bb /gcc
parent0d21cbc776f7eff52137e72df2aa9f704fd1355a (diff)
downloadgcc-60f30e1335bdbc4e3409b6bd0481aa21cdd55c49.tar.gz
2003-08-27 Richard Earnshaw <rearnsha@arm.com>
* lib1funcs.asm (L_ieee754_sp): New. Include ieee754-sf.S. (L_ieee754_dp): New. Include ieee754-df.S. * arm/ieee754-sf.S: Rework to allow interworking, calling from Thumb, and compilation in apcs-26 mode. * arm/ieee754-df.S: Likewise. * t-arm-elf (DPBIT, FPBIT, fp-bit.c dp-bit.c): Delete rules (LIB1ASMFUNCS): Add _ieee754_sp and _ieee754_dp targets. 2003-08-27 Nicolas Pitre <nico@cam.org> * arm/ieee754-sf.S: New. * arm/ieee754-df.S: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@70845 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/config/arm/ieee754-df.S1331
-rw-r--r--gcc/config/arm/ieee754-sf.S813
-rw-r--r--gcc/config/arm/lib1funcs.asm14
-rw-r--r--gcc/config/arm/t-arm-elf27
5 files changed, 2177 insertions, 23 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0300d78069f..4d398dcc12b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2003-08-27 Richard Earnshaw <rearnsha@arm.com>
+
+ * lib1funcs.asm (L_ieee754_sp): New. Include ieee754-sf.S.
+ (L_ieee754_dp): New. Include ieee754-df.S.
+ * arm/ieee754-sf.S: Rework to allow interworking, calling from Thumb,
+ and compilation in apcs-26 mode.
+ * arm/ieee754-df.S: Likewise.
+ * t-arm-elf (DPBIT, FPBIT, fp-bit.c dp-bit.c): Delete rules
+ (LIB1ASMFUNCS): Add _ieee754_sp and _ieee754_dp targets.
+
+2003-08-27 Nicolas Pitre <nico@cam.org>
+
+ * arm/ieee754-sf.S: New.
+ * arm/ieee754-df.S: New.
+
2003-08-27 Jakub Jelinek <jakub@redhat.com>
* builtins.c (expand_builtin_expect_jump): Save pending_stack_adjust
diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S
new file mode 100644
index 00000000000..9a00dcea25e
--- /dev/null
+++ b/gcc/config/arm/ieee754-df.S
@@ -0,0 +1,1331 @@
+/* ieee754-df.S double-precision floating point support for ARM
+
+ Copyright (C) 2003 Free Software Foundation, Inc.
+ Contributed by Nicolas Pitre (nico@cam.org)
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ In addition to the permissions in the GNU General Public License, the
+ Free Software Foundation gives you unlimited permission to link the
+ compiled version of this file into combinations with other programs,
+ and to distribute those combinations without any restriction coming
+ from the use of this file. (The General Public License restrictions
+ do apply in other respects; for example, they cover modification of
+ the file, and distribution when not linked into a combine
+ executable.)
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to
+ the Free Software Foundation, 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/*
+ * Notes:
+ *
+ * The goal of this code is to be as fast as possible. This is
+ * not meant to be easy to understand for the casual reader.
+ * For slightly simpler code please see the single precision version
+ * of this file.
+ *
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+@ This selects the minimum architecture level required.
+#undef __ARM_ARCH__
+#define __ARM_ARCH__ 3
+
+#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
+ || defined(__ARM_ARCH_4T__)
+#undef __ARM_ARCH__
+/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
+ long multiply instructions. That includes v3M. */
+#define __ARM_ARCH__ 4
+#endif
+
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+ || defined(__ARM_ARCH_5TE__)
+#undef __ARM_ARCH__
+#define __ARM_ARCH__ 5
+#endif
+
+#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
+#undef RET
+#undef RETc
+#define RET bx lr
+#define RETc(x) bx##x lr
+#if (__ARM_ARCH__ == 4) && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
+#define __FP_INTERWORKING__
+#endif
+#endif
+
+@ For FPA, float words are always big-endian.
+@ For VFP, floats words follow the memory system mode.
+#if defined(__VFP_FP__) && !defined(__ARMEB__)
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#else
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#endif
+
+
+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+.macro ARM_FUNC_START name
+ FUNC_START \name
+ bx pc
+ nop
+ .arm
+.endm
+#else
+.macro ARM_FUNC_START name
+ FUNC_START \name
+.endm
+#endif
+
+ARM_FUNC_START negdf2
+ @ flip sign bit
+ eor xh, xh, #0x80000000
+ RET
+
+ARM_FUNC_START subdf3
+ @ flip sign bit of second arg
+ eor yh, yh, #0x80000000
+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+ b 1f @ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START adddf3
+
+1: @ Compare both args, return zero if equal but the sign.
+ teq xl, yl
+ eoreq ip, xh, yh
+ teqeq ip, #0x80000000
+ beq LSYM(Lad_z)
+
+ @ If first arg is 0 or -0, return second arg.
+ @ If second arg is 0 or -0, return first arg.
+ orrs ip, xl, xh, lsl #1
+ moveq xl, yl
+ moveq xh, yh
+ orrnes ip, yl, yh, lsl #1
+ RETc(eq)
+
+ stmfd sp!, {r4, r5, lr}
+
+ @ Mask out exponents.
+ mov ip, #0x7f000000
+ orr ip, ip, #0x00f00000
+ and r4, xh, ip
+ and r5, yh, ip
+
+ @ If either of them is 0x7ff, result will be INF or NAN
+ teq r4, ip
+ teqne r5, ip
+ beq LSYM(Lad_i)
+
+ @ Compute exponent difference. Make largest exponent in r4,
+ @ corresponding arg in xh-xl, and positive exponent difference in r5.
+ subs r5, r5, r4
+ rsblt r5, r5, #0
+ ble 1f
+ add r4, r4, r5
+ eor yl, xl, yl
+ eor yh, xh, yh
+ eor xl, yl, xl
+ eor xh, yh, xh
+ eor yl, xl, yl
+ eor yh, xh, yh
+1:
+
+ @ If exponent difference is too large, return largest argument
+ @ already in xh-xl. We need up to 54 bit to handle proper rounding
+ @ of 0x1p54 - 1.1.
+ cmp r5, #(54 << 20)
+#ifdef __FP_INTERWORKING__
+ ldmhifd sp!, {r4, r5, lr}
+ bxhi lr
+#else
+ ldmhifd sp!, {r4, r5, pc}RETCOND
+#endif
+
+ @ Convert mantissa to signed integer.
+ tst xh, #0x80000000
+ bic xh, xh, ip, lsl #1
+ orr xh, xh, #0x00100000
+ beq 1f
+ rsbs xl, xl, #0
+ rsc xh, xh, #0
+1:
+ tst yh, #0x80000000
+ bic yh, yh, ip, lsl #1
+ orr yh, yh, #0x00100000
+ beq 1f
+ rsbs yl, yl, #0
+ rsc yh, yh, #0
+1:
+ @ If exponent == difference, one or both args were denormalized.
+ @ Since this is not common case, rescale them off line.
+ teq r4, r5
+ beq LSYM(Lad_d)
+LSYM(Lad_x):
+ @ Scale down second arg with exponent difference.
+ @ Apply shift one bit left to first arg and the rest to second arg
+ @ to simplify things later, but only if exponent does not become 0.
+ mov ip, #0
+ movs r5, r5, lsr #20
+ beq 3f
+ teq r4, #(1 << 20)
+ beq 1f
+ movs xl, xl, lsl #1
+ adc xh, ip, xh, lsl #1
+ sub r4, r4, #(1 << 20)
+ subs r5, r5, #1
+ beq 3f
+
+ @ Shift yh-yl right per r5, keep leftover bits into ip.
+1: rsbs lr, r5, #32
+ blt 2f
+ mov ip, yl, lsl lr
+ mov yl, yl, lsr r5
+ orr yl, yl, yh, lsl lr
+ mov yh, yh, asr r5
+ b 3f
+2: sub r5, r5, #32
+ add lr, lr, #32
+ cmp yl, #1
+ adc ip, ip, yh, lsl lr
+ mov yl, yh, asr r5
+ mov yh, yh, asr #32
+3:
+ @ the actual addition
+ adds xl, xl, yl
+ adc xh, xh, yh
+
+ @ We now have a result in xh-xl-ip.
+ @ Keep absolute value in xh-xl-ip, sign in r5.
+ ands r5, xh, #0x80000000
+ bpl LSYM(Lad_p)
+ rsbs ip, ip, #0
+ rscs xl, xl, #0
+ rsc xh, xh, #0
+
+ @ Determine how to normalize the result.
+LSYM(Lad_p):
+ cmp xh, #0x00100000
+ bcc LSYM(Lad_l)
+ cmp r0, #0x00200000
+ bcc LSYM(Lad_r0)
+ cmp r0, #0x00400000
+ bcc LSYM(Lad_r1)
+
+ @ Result needs to be shifted right.
+ movs xh, xh, lsr #1
+ movs xl, xl, rrx
+ movs ip, ip, rrx
+ orrcs ip, ip, #1
+ add r4, r4, #(1 << 20)
+LSYM(Lad_r1):
+ movs xh, xh, lsr #1
+ movs xl, xl, rrx
+ movs ip, ip, rrx
+ orrcs ip, ip, #1
+ add r4, r4, #(1 << 20)
+
+ @ Our result is now properly aligned into xh-xl, remaining bits in ip.
+ @ Round with MSB of ip. If halfway between two numbers, round towards
+ @ LSB of xl = 0.
+LSYM(Lad_r0):
+ adds xl, xl, ip, lsr #31
+ adc xh, xh, #0
+ teq ip, #0x80000000
+ biceq xl, xl, #1
+
+ @ One extreme rounding case may add a new MSB. Adjust exponent.
+ @ That MSB will be cleared when exponent is merged below.
+ tst xh, #0x00200000
+ addne r4, r4, #(1 << 20)
+
+ @ Make sure we did not bust our exponent.
+ adds ip, r4, #(1 << 20)
+ bmi LSYM(Lad_o)
+
+ @ Pack final result together.
+LSYM(Lad_e):
+ bic xh, xh, #0x00300000
+ orr xh, xh, r4
+ orr xh, xh, r5
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+LSYM(Lad_l): @ Result must be shifted left and exponent adjusted.
+ @ No rounding necessary since ip will always be 0.
+#if __ARM_ARCH__ < 5
+
+ teq xh, #0
+ movne r3, #-11
+ moveq r3, #21
+ moveq xh, xl
+ moveq xl, #0
+ mov r2, xh
+ movs ip, xh, lsr #16
+ moveq r2, r2, lsl #16
+ addeq r3, r3, #16
+ tst r2, #0xff000000
+ moveq r2, r2, lsl #8
+ addeq r3, r3, #8
+ tst r2, #0xf0000000
+ moveq r2, r2, lsl #4
+ addeq r3, r3, #4
+ tst r2, #0xc0000000
+ moveq r2, r2, lsl #2
+ addeq r3, r3, #2
+ tst r2, #0x80000000
+ addeq r3, r3, #1
+
+#else
+
+ teq xh, #0
+ moveq xh, xl
+ moveq xl, #0
+ clz r3, xh
+ addeq r3, r3, #32
+ sub r3, r3, #11
+
+#endif
+
+ @ determine how to shift the value.
+ subs r2, r3, #32
+ bge 2f
+ adds r2, r2, #12
+ ble 1f
+
+ @ shift value left 21 to 31 bits, or actually right 11 to 1 bits
+ @ since a register switch happened above.
+ add ip, r2, #20
+ rsb r2, r2, #12
+ mov xl, xh, lsl ip
+ mov xh, xh, lsr r2
+ b 3f
+
+ @ actually shift value left 1 to 20 bits, which might also represent
+ @ 32 to 52 bits if counting the register switch that happened earlier.
+1: add r2, r2, #20
+2: rsble ip, r2, #32
+ mov xh, xh, lsl r2
+ orrle xh, xh, xl, lsr ip
+ movle xl, xl, lsl r2
+
+ @ adjust exponent accordingly.
+3: subs r4, r4, r3, lsl #20
+ bgt LSYM(Lad_e)
+
+ @ Exponent too small, denormalize result.
+ @ Find out proper shift value.
+ mvn r4, r4, asr #20
+ subs r4, r4, #30
+ bge 2f
+ adds r4, r4, #12
+ bgt 1f
+
+ @ shift result right of 1 to 20 bits, sign is in r5.
+ add r4, r4, #20
+ rsb r2, r4, #32
+ mov xl, xl, lsr r4
+ orr xl, xl, xh, lsl r2
+ orr xh, r5, xh, lsr r4
+#ifdef __FP_INTERWORKING
+ ldmfd sp!, {r4, r5, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+ @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+ @ a register switch from xh to xl.
+1: rsb r4, r4, #12
+ rsb r2, r4, #32
+ mov xl, xl, lsr r2
+ orr xl, xl, xh, lsl r4
+ mov xh, r5
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+ @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+ @ from xh to xl.
+2: mov xl, xh, lsr r4
+ mov xh, r5
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+ @ Adjust exponents for denormalized arguments.
+LSYM(Lad_d):
+ teq r4, #0
+ eoreq xh, xh, #0x00100000
+ addeq r4, r4, #(1 << 20)
+ eor yh, yh, #0x00100000
+ subne r5, r5, #(1 << 20)
+ b LSYM(Lad_x)
+
+ @ Result is x - x = 0, unless x = INF or NAN.
+LSYM(Lad_z):
+ sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000
+ and r2, xh, ip
+ teq r2, ip
+ orreq xh, ip, #0x00080000
+ movne xh, #0
+ mov xl, #0
+ RET
+
+ @ Overflow: return INF.
+LSYM(Lad_o):
+ orr xh, r5, #0x7f000000
+ orr xh, xh, #0x00f00000
+ mov xl, #0
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+ @ At least one of x or y is INF/NAN.
+ @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
+ @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
+ @ if either is NAN: return NAN
+ @ if opposite sign: return NAN
+ @ return xh-xl (which is INF or -INF)
+LSYM(Lad_i):
+ teq r4, ip
+ movne xh, yh
+ movne xl, yl
+ teqeq r5, ip
+#ifdef __FP_INTERWORKING__
+ ldmnefd sp!, {r4, r5, lr}
+ bxne lr
+#else
+ ldmnefd sp!, {r4, r5, pc}RETCOND
+#endif
+ orrs r4, xl, xh, lsl #12
+ orreqs r4, yl, yh, lsl #12
+ teqeq xh, yh
+ orrne xh, r5, #0x00080000
+ movne xl, #0
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+
+ARM_FUNC_START floatunsidf
+ teq r0, #0
+ moveq r1, #0
+ RETc(eq)
+ stmfd sp!, {r4, r5, lr}
+ mov r4, #(0x400 << 20) @ initial exponent
+ add r4, r4, #((52-1) << 20)
+ mov r5, #0 @ sign bit is 0
+ mov xl, r0
+ mov xh, #0
+ b LSYM(Lad_l)
+
+
+ARM_FUNC_START floatsidf
+ teq r0, #0
+ moveq r1, #0
+ RETc(eq)
+ stmfd sp!, {r4, r5, lr}
+ mov r4, #(0x400 << 20) @ initial exponent
+ add r4, r4, #((52-1) << 20)
+ ands r5, r0, #0x80000000 @ sign bit in r5
+ rsbmi r0, r0, #0 @ absolute value
+ mov xl, r0
+ mov xh, #0
+ b LSYM(Lad_l)
+
+
+ARM_FUNC_START extendsfdf2
+ movs r2, r0, lsl #1
+ beq 1f @ value is 0.0 or -0.0
+ mov xh, r2, asr #3 @ stretch exponent
+ mov xh, xh, rrx @ retrieve sign bit
+ mov xl, r2, lsl #28 @ retrieve remaining bits
+ ands r2, r2, #0xff000000 @ isolate exponent
+ beq 2f @ exponent was 0 but not mantissa
+ teq r2, #0xff000000 @ check if INF or NAN
+ eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
+ RET
+
+1: mov xh, r0
+ mov xl, #0
+ RET
+
+2: @ value was denormalized. We can normalize it now.
+ stmfd sp!, {r4, r5, lr}
+ mov r4, #(0x380 << 20) @ setup corresponding exponent
+ add r4, r4, #(1 << 20)
+ and r5, xh, #0x80000000 @ move sign bit in r5
+ bic xh, xh, #0x80000000
+ b LSYM(Lad_l)
+
+
+ARM_FUNC_START muldf3
+
+ stmfd sp!, {r4, r5, r6, lr}
+
+ @ Mask out exponents.
+ mov ip, #0x7f000000
+ orr ip, ip, #0x00f00000
+ and r4, xh, ip
+ and r5, yh, ip
+
+ @ Trap any INF/NAN.
+ teq r4, ip
+ teqne r5, ip
+ beq LSYM(Lml_s)
+
+ @ Trap any multiplication by 0.
+ orrs r6, xl, xh, lsl #1
+ orrnes r6, yl, yh, lsl #1
+ beq LSYM(Lml_z)
+
+ @ Shift exponents right one bit to make room for overflow bit.
+ @ If either of them is 0, scale denormalized arguments off line.
+ @ Then add both exponents together.
+ movs r4, r4, lsr #1
+ teqne r5, #0
+ beq LSYM(Lml_d)
+LSYM(Lml_x):
+ add r4, r4, r5, asr #1
+
+ @ Preserve final sign in r4 along with exponent for now.
+ teq xh, yh
+ orrmi r4, r4, #0x8000
+
+ @ Convert mantissa to unsigned integer.
+ bic xh, xh, ip, lsl #1
+ bic yh, yh, ip, lsl #1
+ orr xh, xh, #0x00100000
+ orr yh, yh, #0x00100000
+
+#if __ARM_ARCH__ < 4
+
+ @ Well, no way to make it shorter without the umull instruction.
+ @ We must perform that 53 x 53 bit multiplication by hand.
+ stmfd sp!, {r7, r8, r9, sl, fp}
+ mov r7, xl, lsr #16
+ mov r8, yl, lsr #16
+ mov r9, xh, lsr #16
+ mov sl, yh, lsr #16
+ bic xl, xl, r7, lsl #16
+ bic yl, yl, r8, lsl #16
+ bic xh, xh, r9, lsl #16
+ bic yh, yh, sl, lsl #16
+ mul ip, xl, yl
+ mul fp, xl, r8
+ mov lr, #0
+ adds ip, ip, fp, lsl #16
+ adc lr, lr, fp, lsr #16
+ mul fp, r7, yl
+ adds ip, ip, fp, lsl #16
+ adc lr, lr, fp, lsr #16
+ mul fp, xl, sl
+ mov r5, #0
+ adds lr, lr, fp, lsl #16
+ adc r5, r5, fp, lsr #16
+ mul fp, r7, yh
+ adds lr, lr, fp, lsl #16
+ adc r5, r5, fp, lsr #16
+ mul fp, xh, r8
+ adds lr, lr, fp, lsl #16
+ adc r5, r5, fp, lsr #16
+ mul fp, r9, yl
+ adds lr, lr, fp, lsl #16
+ adc r5, r5, fp, lsr #16
+ mul fp, xh, sl
+ mul r6, r9, sl
+ adds r5, r5, fp, lsl #16
+ adc r6, r6, fp, lsr #16
+ mul fp, r9, yh
+ adds r5, r5, fp, lsl #16
+ adc r6, r6, fp, lsr #16
+ mul fp, xl, yh
+ adds lr, lr, fp
+ mul fp, r7, sl
+ adcs r5, r5, fp
+ mul fp, xh, yl
+ adc r6, r6, #0
+ adds lr, lr, fp
+ mul fp, r9, r8
+ adcs r5, r5, fp
+ mul fp, r7, r8
+ adc r6, r6, #0
+ adds lr, lr, fp
+ mul fp, xh, yh
+ adcs r5, r5, fp
+ adc r6, r6, #0
+ ldmfd sp!, {r7, r8, r9, sl, fp}
+
+#else
+
+ @ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.
+ umull ip, lr, xl, yl
+ mov r5, #0
+ umlal lr, r5, xl, yh
+ umlal lr, r5, xh, yl
+ mov r6, #0
+ umlal r5, r6, xh, yh
+
+#endif
+
+ @ The LSBs in ip are only significant for the final rounding.
+ @ Fold them into one bit of lr.
+ teq ip, #0
+ orrne lr, lr, #1
+
+ @ Put final sign in xh.
+ mov xh, r4, lsl #16
+ bic r4, r4, #0x8000
+
+ @ Adjust result if one extra MSB appeared (one of four times).
+ tst r6, #(1 << 9)
+ beq 1f
+ add r4, r4, #(1 << 19)
+ movs r6, r6, lsr #1
+ movs r5, r5, rrx
+ movs lr, lr, rrx
+ orrcs lr, lr, #1
+1:
+ @ Scale back to 53 bits.
+ @ xh contains sign bit already.
+ orr xh, xh, r6, lsl #12
+ orr xh, xh, r5, lsr #20
+ mov xl, r5, lsl #12
+ orr xl, xl, lr, lsr #20
+
+ @ Apply exponent bias, check range for underflow.
+ sub r4, r4, #0x00f80000
+ subs r4, r4, #0x1f000000
+ ble LSYM(Lml_u)
+
+ @ Round the result.
+ movs lr, lr, lsl #12
+ bpl 1f
+ adds xl, xl, #1
+ adc xh, xh, #0
+ teq lr, #0x80000000
+ biceq xl, xl, #1
+
+ @ Rounding may have produced an extra MSB here.
+ @ The extra bit is cleared before merging the exponent below.
+ tst xh, #0x00200000
+ addne r4, r4, #(1 << 19)
+1:
+ @ Check exponent for overflow.
+ adds ip, r4, #(1 << 19)
+ tst ip, #(1 << 30)
+ bne LSYM(Lml_o)
+
+ @ Add final exponent.
+ bic xh, xh, #0x00300000
+ orr xh, xh, r4, lsl #1
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, r6, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+ @ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+ eor xh, xh, yh
+LSYM(Ldv_z):
+ bic xh, xh, #0x7fffffff
+ mov xl, #0
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, r6, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+ @ Check if denormalized result is possible, otherwise return signed 0.
+LSYM(Lml_u):
+ cmn r4, #(53 << 19)
+ movle xl, #0
+#ifdef __FP_INTERWORKING__
+ ldmlefd sp!, {r4, r5, r6, lr}
+ bxle lr
+#else
+ ldmlefd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+ @ Find out proper shift value.
+LSYM(Lml_r):
+ mvn r4, r4, asr #19
+ subs r4, r4, #30
+ bge 2f
+ adds r4, r4, #12
+ bgt 1f
+
+ @ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
+ add r4, r4, #20
+ rsb r5, r4, #32
+ mov r3, xl, lsl r5
+ mov xl, xl, lsr r4
+ orr xl, xl, xh, lsl r5
+ movs xh, xh, lsl #1
+ mov xh, xh, lsr r4
+ mov xh, xh, rrx
+ adds xl, xl, r3, lsr #31
+ adc xh, xh, #0
+ teq lr, #0
+ teqeq r3, #0x80000000
+ biceq xl, xl, #1
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, r6, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+ @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+ @ a register switch from xh to xl. Then round.
+1: rsb r4, r4, #12
+ rsb r5, r4, #32
+ mov r3, xl, lsl r4
+ mov xl, xl, lsr r5
+ orr xl, xl, xh, lsl r4
+ bic xh, xh, #0x7fffffff
+ adds xl, xl, r3, lsr #31
+ adc xh, xh, #0
+ teq lr, #0
+ teqeq r3, #0x80000000
+ biceq xl, xl, #1
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, r6, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+ @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+ @ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
+2: rsb r5, r4, #32
+ mov r6, xl, lsl r5
+ mov r3, xl, lsr r4
+ orr r3, r3, xh, lsl r5
+ mov xl, xh, lsr r4
+ bic xh, xh, #0x7fffffff
+ adds xl, xl, r3, lsr #31
+ adc xh, xh, #0
+ orrs r6, r6, lr
+ teqeq r3, #0x80000000
+ biceq xl, xl, #1
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, r6, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+ @ One or both arguments are denormalized.
+ @ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+ mov lr, #0
+ teq r4, #0
+ bne 2f
+ and r6, xh, #0x80000000
+1: movs xl, xl, lsl #1
+ adc xh, lr, xh, lsl #1
+ tst xh, #0x00100000
+ subeq r4, r4, #(1 << 19)
+ beq 1b
+ orr xh, xh, r6
+ teq r5, #0
+ bne LSYM(Lml_x)
+2: and r6, yh, #0x80000000
+3: movs yl, yl, lsl #1
+ adc yh, lr, yh, lsl #1
+ tst yh, #0x00100000
+ subeq r5, r5, #(1 << 20)
+ beq 3b
+ orr yh, yh, r6
+ b LSYM(Lml_x)
+
+ @ One or both args are INF or NAN.
+LSYM(Lml_s):
+ orrs r6, xl, xh, lsl #1
+ orrnes r6, yl, yh, lsl #1
+ beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ teq r4, ip
+ bne 1f
+ orrs r6, xl, xh, lsl #12
+ bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+1: teq r5, ip
+ bne LSYM(Lml_i)
+ orrs r6, yl, yh, lsl #12
+ bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+
+ @ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+ eor xh, xh, yh
+
+ @ Overflow: return INF (sign already in xh).
+LSYM(Lml_o):
+ and xh, xh, #0x80000000
+ orr xh, xh, #0x7f000000
+ orr xh, xh, #0x00f00000
+ mov xl, #0
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, r6, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+ @ Return NAN.
+LSYM(Lml_n):
+ mov xh, #0x7f000000
+ orr xh, xh, #0x00f80000
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, r6, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+
+ARM_FUNC_START divdf3
+
+ stmfd sp!, {r4, r5, r6, lr}
+
+ @ Mask out exponents.
+ mov ip, #0x7f000000
+ orr ip, ip, #0x00f00000
+ and r4, xh, ip
+ and r5, yh, ip
+
+ @ Trap any INF/NAN or zeroes.
+ teq r4, ip
+ teqne r5, ip
+ orrnes r6, xl, xh, lsl #1
+ orrnes r6, yl, yh, lsl #1
+ beq LSYM(Ldv_s)
+
+ @ Shift exponents right one bit to make room for overflow bit.
+ @ If either of them is 0, scale denormalized arguments off line.
+ @ Then substract divisor exponent from dividend''s.
+ movs r4, r4, lsr #1
+ teqne r5, #0
+ beq LSYM(Ldv_d)
+LSYM(Ldv_x):
+ sub r4, r4, r5, asr #1
+
+ @ Preserve final sign into lr.
+ eor lr, xh, yh
+
+ @ Convert mantissa to unsigned integer.
+ @ Dividend -> r5-r6, divisor -> yh-yl.
+ mov r5, #0x10000000
+ mov yh, yh, lsl #12
+ orr yh, r5, yh, lsr #4
+ orr yh, yh, yl, lsr #24
+ movs yl, yl, lsl #8
+ mov xh, xh, lsl #12
+ teqeq yh, r5
+ beq LSYM(Ldv_1)
+ orr r5, r5, xh, lsr #4
+ orr r5, r5, xl, lsr #24
+ mov r6, xl, lsl #8
+
+ @ Initialize xh with final sign bit.
+ and xh, lr, #0x80000000
+
+ @ Ensure result will land to known bit position.
+ cmp r5, yh
+ cmpeq r6, yl
+ bcs 1f
+ sub r4, r4, #(1 << 19)
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+1:
+ @ Apply exponent bias, check range for over/underflow.
+ add r4, r4, #0x1f000000
+ add r4, r4, #0x00f80000
+ cmn r4, #(53 << 19)
+ ble LSYM(Ldv_z)
+ cmp r4, ip, lsr #1
+ bge LSYM(Lml_o)
+
+ @ Perform first substraction to align result to a nibble.
+ subs r6, r6, yl
+ sbc r5, r5, yh
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+ mov xl, #0x00100000
+ mov ip, #0x00080000
+
+ @ The actual division loop.
+1: subs lr, r6, yl
+ sbcs lr, r5, yh
+ subcs r6, r6, yl
+ movcs r5, lr
+ orrcs xl, xl, ip
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+ subs lr, r6, yl
+ sbcs lr, r5, yh
+ subcs r6, r6, yl
+ movcs r5, lr
+ orrcs xl, xl, ip, lsr #1
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+ subs lr, r6, yl
+ sbcs lr, r5, yh
+ subcs r6, r6, yl
+ movcs r5, lr
+ orrcs xl, xl, ip, lsr #2
+ movs yh, yh, lsr #1
+ mov yl, yl, rrx
+ subs lr, r6, yl
+ sbcs lr, r5, yh
+ subcs r6, r6, yl
+ movcs r5, lr
+ orrcs xl, xl, ip, lsr #3
+
+ orrs lr, r5, r6
+ beq 2f
+ mov r5, r5, lsl #4
+ orr r5, r5, r6, lsr #28
+ mov r6, r6, lsl #4
+ mov yh, yh, lsl #3
+ orr yh, yh, yl, lsr #29
+ mov yl, yl, lsl #3
+ movs ip, ip, lsr #4
+ bne 1b
+
+ @ We are done with a word of the result.
+ @ Loop again for the low word if this pass was for the high word.
+ tst xh, #0x00100000
+ bne 3f
+ orr xh, xh, xl
+ mov xl, #0
+ mov ip, #0x80000000
+ b 1b
+2:
+ @ Be sure result starts in the high word.
+ tst xh, #0x00100000
+ orreq xh, xh, xl
+ moveq xl, #0
+3:
+ @ Check if denormalized result is needed.
+ cmp r4, #0
+ ble LSYM(Ldv_u)
+
+ @ Apply proper rounding.
+ subs ip, r5, yh
+ subeqs ip, r6, yl
+ adcs xl, xl, #0
+ adc xh, xh, #0
+ teq ip, #0
+ biceq xl, xl, #1
+
+ @ Add exponent to result.
+ bic xh, xh, #0x00100000
+ orr xh, xh, r4, lsl #1
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, r6, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+
+ @ Division by 0x1p*: shortcut a lot of code.
+LSYM(Ldv_1):
+ and lr, lr, #0x80000000
+ orr xh, lr, xh, lsr #12
+ add r4, r4, #0x1f000000
+ add r4, r4, #0x00f80000
+ cmp r4, ip, lsr #1
+ bge LSYM(Lml_o)
+ cmp r4, #0
+ orrgt xh, xh, r4, lsl #1
+#ifdef __FP_INTERWORKING__
+ ldmgtfd sp!, {r4, r5, r6, lr}
+ bxgt lr
+#else
+ ldmgtfd sp!, {r4, r5, r6, pc}RETCOND
+#endif
+ cmn r4, #(53 << 19)
+ ble LSYM(Ldv_z)
+ orr xh, xh, #0x00100000
+ mov lr, #0
+ b LSYM(Lml_r)
+
+ @ Result must be denormalized: put remainder in lr for
+ @ rounding considerations.
+LSYM(Ldv_u):
+ orr lr, r5, r6
+ b LSYM(Lml_r)
+
+ @ One or both arguments are denormalized.
+ @ Scale them leftwards and preserve sign bit.
+LSYM(Ldv_d):
+ mov lr, #0
+ teq r4, #0
+ bne 2f
+ and r6, xh, #0x80000000
+1: movs xl, xl, lsl #1
+ adc xh, lr, xh, lsl #1
+ tst xh, #0x00100000
+ subeq r4, r4, #(1 << 19)
+ beq 1b
+ orr xh, xh, r6
+ teq r5, #0
+ bne LSYM(Ldv_x)
+2: and r6, yh, #0x80000000
+3: movs yl, yl, lsl #1
+ adc yh, lr, yh, lsl #1
+ tst yh, #0x00100000
+ subeq r5, r5, #(1 << 20)
+ beq 3b
+ orr yh, yh, r6
+ b LSYM(Ldv_x)
+
+ @ One or both arguments is either INF, NAN or zero.
+LSYM(Ldv_s):
+ teq r4, ip
+ teqeq r5, ip
+ beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
+ teq r4, ip
+ bne 1f
+ orrs r4, xl, xh, lsl #12
+ bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ b LSYM(Lml_i) @ INF / <anything> -> INF
+1: teq r5, ip
+ bne 2f
+ orrs r5, yl, yh, lsl #12
+ bne LSYM(Lml_n) @ <anything> / NAN -> NAN
+ b LSYM(Lml_z) @ <anything> / INF -> 0
+2: @ One or both arguments are 0.
+ orrs r4, xl, xh, lsl #1
+ bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ orrs r5, yl, yh, lsl #1
+ bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
+ b LSYM(Lml_n) @ 0 / 0 -> NAN
+
+
+FUNC_START gedf2
+ARM_FUNC_START gtdf2
+ mov ip, #-1
+ b 1f
+
+FUNC_START ledf2
+ARM_FUNC_START ltdf2
+ mov ip, #1
+ b 1f
+
+FUNC_START nedf2
+FUNC_START eqdf2
+ARM_FUNC_START cmpdf2
+ mov ip, #1 @ how should we specify unordered here?
+
+1: stmfd sp!, {r4, r5, lr}
+
+ @ Trap any INF/NAN first.
+ mov lr, #0x7f000000
+ orr lr, lr, #0x00f00000
+ and r4, xh, lr
+ and r5, yh, lr
+ teq r4, lr
+ teqne r5, lr
+ beq 3f
+
+ @ Test for equality.
+ @ Note that 0.0 is equal to -0.0.
+2: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0
+ orreqs ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0
+ teqne xh, yh @ or xh == yh
+ teqeq xl, yl @ and xl == yl
+ moveq r0, #0 @ then equal.
+#ifdef __FP_INTERWORKING__
+ ldmeqfd sp!, {r4, r5, lr}
+ bxeq lr
+#else
+ ldmeqfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+ @ Check for sign difference.
+ teq xh, yh
+ movmi r0, xh, asr #31
+ orrmi r0, r0, #1
+#ifdef __FP_INTERWORKING__
+ ldmmifd sp!, {r4, r5, lr}
+ bxmi lr
+#else
+ ldmmifd sp!, {r4, r5, pc}RETCOND
+#endif
+
+ @ Compare exponents.
+ cmp r4, r5
+
+ @ Compare mantissa if exponents are equal.
+ moveq xh, xh, lsl #12
+ cmpeq xh, yh, lsl #12
+ cmpeq xl, yl
+ movcs r0, yh, asr #31
+ mvncc r0, yh, asr #31
+ orr r0, r0, #1
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+ @ Look for a NAN.
+3: teq r4, lr
+ bne 4f
+ orrs xl, xl, xh, lsl #12
+ bne 5f @ x is NAN
+4: teq r5, lr
+ bne 2b
+ orrs yl, yl, yh, lsl #12
+ beq 2b @ y is not NAN
+5: mov r0, ip @ return unordered code from ip
+#ifdef __FP_INTERWORKING__
+ ldmfd sp!, {r4, r5, lr}
+ bx lr
+#else
+ ldmfd sp!, {r4, r5, pc}RETCOND
+#endif
+
+
+ARM_FUNC_START unorddf2
+ str lr, [sp, #-4]!
+ mov ip, #0x7f000000
+ orr ip, ip, #0x00f00000
+ and lr, xh, ip
+ teq lr, ip
+ bne 1f
+ orrs xl, xl, xh, lsl #12
+ bne 3f @ x is NAN
+1: and lr, yh, ip
+ teq lr, ip
+ bne 2f
+ orrs yl, yl, yh, lsl #12
+ bne 3f @ y is NAN
+2: mov r0, #0 @ arguments are ordered.
+#ifdef __FP_INTERWORKING__
+ ldr lr, [sp], #4
+ bx lr
+#elif defined (__APCS_26__)
+ ldmia sp!, {pc}^
+#else
+ ldr pc, [sp], #4
+#endif
+3: mov r0, #1 @ arguments are unordered.
+#ifdef __FP_INTERWORKING__
+ ldr lr, [sp], #4
+ bx lr
+#elif defined (__APCS_26__)
+ ldmia sp!, {pc}^
+#else
+ ldr pc, [sp], #4
+#endif
+
+
+ARM_FUNC_START fixdfsi
+ orrs ip, xl, xh, lsl #1
+ beq 1f @ value is 0.
+
+ @ preserve C flag (the actual sign)
+#ifdef __APCS_26__
+ mov r3, pc
+#else
+ mrs r3, cpsr
+#endif
+
+ @ check exponent range.
+ mov ip, #0x7f000000
+ orr ip, ip, #0x00f00000
+ and r2, xh, ip
+ teq r2, ip
+ beq 2f @ value is INF or NAN
+ bic ip, ip, #0x40000000
+ cmp r2, ip
+ bcc 1f @ value is too small
+ add ip, ip, #(31 << 20)
+ cmp r2, ip
+ bcs 3f @ value is too large
+
+ rsb r2, r2, ip
+ mov ip, xh, lsl #11
+ orr ip, ip, #0x80000000
+ orr ip, ip, xl, lsr #21
+ mov r2, r2, lsr #20
+ mov r0, ip, lsr r2
+ tst r3, #0x20000000 @ the sign bit
+ rsbne r0, r0, #0
+ RET
+
+1: mov r0, #0
+ RET
+
+2: orrs xl, xl, xh, lsl #12
+ bne 4f @ r0 is NAN.
+3: tst r3, #0x20000000 @ the sign bit
+ moveq r0, #0x7fffffff @ maximum signed positive si
+ movne r0, #0x80000000 @ maximum signed negative si
+ RET
+
+4: mov r0, #0 @ How should we convert NAN?
+ RET
+
+ARM_FUNC_START fixunsdfsi
+ orrs ip, xl, xh, lsl #1
+ beq 1b @ value is 0
+ bcs 1b @ value is negative
+
+ @ check exponent range.
+ mov ip, #0x7f000000
+ orr ip, ip, #0x00f00000
+ and r2, xh, ip
+ teq r2, ip
+ beq 1f @ value is INF or NAN
+ bic ip, ip, #0x40000000
+ cmp r2, ip
+ bcc 1b @ value is too small
+ add ip, ip, #(31 << 20)
+ cmp r2, ip
+ bhi 2f @ value is too large
+
+ rsb r2, r2, ip
+ mov ip, xh, lsl #11
+ orr ip, ip, #0x80000000
+ orr ip, ip, xl, lsr #21
+ mov r2, r2, lsr #20
+ mov r0, ip, lsr r2
+ RET
+
+1: orrs xl, xl, xh, lsl #12
+ bne 4b @ value is NAN.
+2: mov r0, #0xffffffff @ maximum unsigned si
+ RET
+
+
+ARM_FUNC_START truncdfsf2
+ orrs r2, xl, xh, lsl #1
+ moveq r0, r2, rrx
+ RETc(eq) @ value is 0.0 or -0.0
+
+ @ check exponent range.
+ mov ip, #0x7f000000
+ orr ip, ip, #0x00f00000
+ and r2, ip, xh
+ teq r2, ip
+ beq 2f @ value is INF or NAN
+ bic xh, xh, ip
+ cmp r2, #(0x380 << 20)
+ bls 4f @ value is too small
+
+ @ shift and round mantissa
+1: movs r3, xl, lsr #29
+ adc r3, r3, xh, lsl #3
+
+ @ if halfway between two numbers, round towards LSB = 0.
+ mov xl, xl, lsl #3
+ teq xl, #0x80000000
+ biceq r3, r3, #1
+
+ @ rounding might have created an extra MSB. If so adjust exponent.
+ tst r3, #0x00800000
+ addne r2, r2, #(1 << 20)
+ bicne r3, r3, #0x00800000
+
+ @ check exponent for overflow
+ mov ip, #(0x400 << 20)
+ orr ip, ip, #(0x07f << 20)
+ cmp r2, ip
+ bcs 3f @ overflow
+
+ @ adjust exponent, merge with sign bit and mantissa.
+ movs xh, xh, lsl #1
+ mov r2, r2, lsl #4
+ orr r0, r3, r2, rrx
+ eor r0, r0, #0x40000000
+ RET
+
+2: @ chech for NAN
+ orrs xl, xl, xh, lsl #12
+ movne r0, #0x7f000000
+ orrne r0, r0, #0x00c00000
+ RETc(ne) @ return NAN
+
+3: @ return INF with sign
+ and r0, xh, #0x80000000
+ orr r0, r0, #0x7f000000
+ orr r0, r0, #0x00800000
+ RET
+
+4: @ check if denormalized value is possible
+ subs r2, r2, #((0x380 - 24) << 20)
+ andle r0, xh, #0x80000000 @ too small, return signed 0.
+ RETc(le)
+
+ @ denormalize value so we can resume with the code above afterwards.
+ orr xh, xh, #0x00100000
+ mov r2, r2, lsr #20
+ rsb r2, r2, #25
+ cmp r2, #20
+ bgt 6f
+
+ rsb ip, r2, #32
+ mov r3, xl, lsl ip
+ mov xl, xl, lsr r2
+ orr xl, xl, xh, lsl ip
+ movs xh, xh, lsl #1
+ mov xh, xh, lsr r2
+ mov xh, xh, rrx
+5: teq r3, #0 @ fold r3 bits into the LSB
+ orrne xl, xl, #1 @ for rounding considerations.
+ mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent
+ b 1b
+
+6: rsb r2, r2, #(12 + 20)
+ rsb ip, r2, #32
+ mov r3, xl, lsl r2
+ mov xl, xl, lsr ip
+ orr xl, xl, xh, lsl r2
+ and xh, xh, #0x80000000
+ b 5b
+
+
diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S
new file mode 100644
index 00000000000..88ded2931bf
--- /dev/null
+++ b/gcc/config/arm/ieee754-sf.S
@@ -0,0 +1,813 @@
+/* ieee754-sf.S single-precision floating point support for ARM
+
+ Copyright (C) 2003 Free Software Foundation, Inc.
+ Contributed by Nicolas Pitre (nico@cam.org)
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ In addition to the permissions in the GNU General Public License, the
+ Free Software Foundation gives you unlimited permission to link the
+ compiled version of this file into combinations with other programs,
+ and to distribute those combinations without any restriction coming
+ from the use of this file. (The General Public License restrictions
+ do apply in other respects; for example, they cover modification of
+ the file, and distribution when not linked into a combine
+ executable.)
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to
+ the Free Software Foundation, 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/*
+ * Notes:
+ *
+ * The goal of this code is to be as fast as possible. This is
+ * not meant to be easy to understand for the casual reader.
+ *
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+@ This selects the minimum architecture level required.
+#undef __ARM_ARCH__
+#define __ARM_ARCH__ 3
+
+#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
+ || defined(__ARM_ARCH_4T__)
+#undef __ARM_ARCH__
+/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
+ long multiply instructions. That includes v3M. */
+#define __ARM_ARCH__ 4
+#endif
+
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+ || defined(__ARM_ARCH_5TE__)
+#undef __ARM_ARCH__
+#define __ARM_ARCH__ 5
+#endif
+
+#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
+#undef RET
+#undef RETc
+#define RET bx lr
+#define RETc(x) bx##x lr
+#if (__ARM_ARCH__ == 4) && (defined(__thumb__) || defined(__THUMB_INTERWORK__))
+#define __FP_INTERWORKING__
+#endif
+#endif
+
+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+.macro ARM_FUNC_START name
+ FUNC_START \name
+ bx pc
+ nop
+ .arm
+.endm
+#else
+.macro ARM_FUNC_START name
+ FUNC_START \name
+.endm
+#endif
+
+ARM_FUNC_START negsf2
+ eor r0, r0, #0x80000000 @ flip sign bit
+ RET
+
+ARM_FUNC_START subsf3
+ eor r1, r1, #0x80000000 @ flip sign bit of second arg
+#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+ b 1f @ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START addsf3
+
+1: @ Compare both args, return zero if equal but the sign.
+ eor r2, r0, r1
+ teq r2, #0x80000000
+ beq LSYM(Lad_z)
+
+ @ If first arg is 0 or -0, return second arg.
+ @ If second arg is 0 or -0, return first arg.
+ bics r2, r0, #0x80000000
+ moveq r0, r1
+ bicnes r2, r1, #0x80000000
+ RETc(eq)
+
+ @ Mask out exponents.
+ mov ip, #0xff000000
+ and r2, r0, ip, lsr #1
+ and r3, r1, ip, lsr #1
+
+ @ If either of them is 255, result will be INF or NAN
+ teq r2, ip, lsr #1
+ teqne r3, ip, lsr #1
+ beq LSYM(Lad_i)
+
+ @ Compute exponent difference. Make largest exponent in r2,
+ @ corresponding arg in r0, and positive exponent difference in r3.
+ subs r3, r3, r2
+ addgt r2, r2, r3
+ eorgt r1, r0, r1
+ eorgt r0, r1, r0
+ eorgt r1, r0, r1
+ rsblt r3, r3, #0
+
+ @ If exponent difference is too large, return largest argument
+ @ already in r0. We need up to 25 bit to handle proper rounding
+ @ of 0x1p25 - 1.1.
+ cmp r3, #(25 << 23)
+ RETc(hi)
+
+ @ Convert mantissa to signed integer.
+ tst r0, #0x80000000
+ orr r0, r0, #0x00800000
+ bic r0, r0, #0xff000000
+ rsbne r0, r0, #0
+ tst r1, #0x80000000
+ orr r1, r1, #0x00800000
+ bic r1, r1, #0xff000000
+ rsbne r1, r1, #0
+
+ @ If exponent == difference, one or both args were denormalized.
+ @ Since this is not common case, rescale them off line.
+ teq r2, r3
+ beq LSYM(Lad_d)
+LSYM(Lad_x):
+
+ @ Scale down second arg with exponent difference.
+ @ Apply shift one bit left to first arg and the rest to second arg
+ @ to simplify things later, but only if exponent does not become 0.
+ movs r3, r3, lsr #23
+ teqne r2, #(1 << 23)
+ movne r0, r0, lsl #1
+ subne r2, r2, #(1 << 23)
+ subne r3, r3, #1
+
+ @ Shift second arg into ip, keep leftover bits into r1.
+ mov ip, r1, asr r3
+ rsb r3, r3, #32
+ mov r1, r1, lsl r3
+
+ add r0, r0, ip @ the actual addition
+
+ @ We now have a 64 bit result in r0-r1.
+ @ Keep absolute value in r0-r1, sign in r3.
+ ands r3, r0, #0x80000000
+ bpl LSYM(Lad_p)
+ rsbs r1, r1, #0
+ rsc r0, r0, #0
+
+ @ Determine how to normalize the result.
+LSYM(Lad_p):
+ cmp r0, #0x00800000
+ bcc LSYM(Lad_l)
+ cmp r0, #0x01000000
+ bcc LSYM(Lad_r0)
+ cmp r0, #0x02000000
+ bcc LSYM(Lad_r1)
+
+ @ Result needs to be shifted right.
+ movs r0, r0, lsr #1
+ mov r1, r1, rrx
+ add r2, r2, #(1 << 23)
+LSYM(Lad_r1):
+ movs r0, r0, lsr #1
+ mov r1, r1, rrx
+ add r2, r2, #(1 << 23)
+
+ @ Our result is now properly aligned into r0, remaining bits in r1.
+ @ Round with MSB of r1. If halfway between two numbers, round towards
+ @ LSB of r0 = 0.
+LSYM(Lad_r0):
+ add r0, r0, r1, lsr #31
+ teq r1, #0x80000000
+ biceq r0, r0, #1
+
+ @ Rounding may have added a new MSB. Adjust exponent.
+ @ That MSB will be cleared when exponent is merged below.
+ tst r0, #0x01000000
+ addne r2, r2, #(1 << 23)
+
+ @ Make sure we did not bust our exponent.
+ cmp r2, #(254 << 23)
+ bhi LSYM(Lad_o)
+
+ @ Pack final result together.
+LSYM(Lad_e):
+ bic r0, r0, #0x01800000
+ orr r0, r0, r2
+ orr r0, r0, r3
+ RET
+
+ @ Result must be shifted left.
+ @ No rounding necessary since r1 will always be 0.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+ movs ip, r0, lsr #12
+ moveq r0, r0, lsl #12
+ subeq r2, r2, #(12 << 23)
+ tst r0, #0x00ff0000
+ moveq r0, r0, lsl #8
+ subeq r2, r2, #(8 << 23)
+ tst r0, #0x00f00000
+ moveq r0, r0, lsl #4
+ subeq r2, r2, #(4 << 23)
+ tst r0, #0x00c00000
+ moveq r0, r0, lsl #2
+ subeq r2, r2, #(2 << 23)
+ tst r0, #0x00800000
+ moveq r0, r0, lsl #1
+ subeq r2, r2, #(1 << 23)
+ cmp r2, #0
+ bgt LSYM(Lad_e)
+
+#else
+
+ clz ip, r0
+ sub ip, ip, #8
+ mov r0, r0, lsl ip
+ subs r2, r2, ip, lsl #23
+ bgt LSYM(Lad_e)
+
+#endif
+
+ @ Exponent too small, denormalize result.
+ mvn r2, r2, asr #23
+ add r2, r2, #2
+ orr r0, r3, r0, lsr r2
+ RET
+
+ @ Fixup and adjust bit position for denormalized arguments.
+ @ Note that r2 must not remain equal to 0.
+LSYM(Lad_d):
+ teq r2, #0
+ eoreq r0, r0, #0x00800000
+ addeq r2, r2, #(1 << 23)
+ eor r1, r1, #0x00800000
+ subne r3, r3, #(1 << 23)
+ b LSYM(Lad_x)
+
+ @ Result is x - x = 0, unless x is INF or NAN.
+LSYM(Lad_z):
+ mov ip, #0xff000000
+ and r2, r0, ip, lsr #1
+ teq r2, ip, lsr #1
+ moveq r0, ip, asr #2
+ movne r0, #0
+ RET
+
+ @ Overflow: return INF.
+LSYM(Lad_o):
+ orr r0, r3, #0x7f000000
+ orr r0, r0, #0x00800000
+ RET
+
+ @ At least one of r0/r1 is INF/NAN.
+ @ if r0 != INF/NAN: return r1 (which is INF/NAN)
+ @ if r1 != INF/NAN: return r0 (which is INF/NAN)
+ @ if r0 or r1 is NAN: return NAN
+ @ if opposite sign: return NAN
+ @ return r0 (which is INF or -INF)
+LSYM(Lad_i):
+ teq r2, ip, lsr #1
+ movne r0, r1
+ teqeq r3, ip, lsr #1
+ RETc(ne)
+ movs r2, r0, lsl #9
+ moveqs r2, r1, lsl #9
+ teqeq r0, r1
+ orrne r0, r3, #0x00400000 @ NAN
+ RET
+
+
+ARM_FUNC_START floatunsisf
+ mov r3, #0
+ b 1f
+
+ARM_FUNC_START floatsisf
+ ands r3, r0, #0x80000000
+ rsbmi r0, r0, #0
+
+1: teq r0, #0
+ RETc(eq)
+
+ mov r1, #0
+ mov r2, #((127 + 23) << 23)
+ tst r0, #0xfc000000
+ beq LSYM(Lad_p)
+
+ @ We need to scale the value a little before branching to code above.
+ tst r0, #0xf0000000
+ movne r1, r0, lsl #28
+ movne r0, r0, lsr #4
+ addne r2, r2, #(4 << 23)
+ tst r0, #0x0c000000
+ beq LSYM(Lad_p)
+ mov r1, r1, lsr #2
+ orr r1, r1, r0, lsl #30
+ mov r0, r0, lsr #2
+ add r2, r2, #(2 << 23)
+ b LSYM(Lad_p)
+
+
+ARM_FUNC_START mulsf3
+
+ @ Mask out exponents.
+ mov ip, #0xff000000
+ and r2, r0, ip, lsr #1
+ and r3, r1, ip, lsr #1
+
+ @ Trap any INF/NAN.
+ teq r2, ip, lsr #1
+ teqne r3, ip, lsr #1
+ beq LSYM(Lml_s)
+
+ @ Trap any multiplication by 0.
+ bics ip, r0, #0x80000000
+ bicnes ip, r1, #0x80000000
+ beq LSYM(Lml_z)
+
+ @ Shift exponents right one bit to make room for overflow bit.
+ @ If either of them is 0, scale denormalized arguments off line.
+ @ Then add both exponents together.
+ movs r2, r2, lsr #1
+ teqne r3, #0
+ beq LSYM(Lml_d)
+LSYM(Lml_x):
+ add r2, r2, r3, asr #1
+
+ @ Preserve final sign in r2 along with exponent for now.
+ teq r0, r1
+ orrmi r2, r2, #0x8000
+
+ @ Convert mantissa to unsigned integer.
+ bic r0, r0, #0xff000000
+ bic r1, r1, #0xff000000
+ orr r0, r0, #0x00800000
+ orr r1, r1, #0x00800000
+
+#if __ARM_ARCH__ < 4
+
+ @ Well, no way to make it shorter without the umull instruction.
+ @ We must perform that 24 x 24 -> 48 bit multiplication by hand.
+ stmfd sp!, {r4, r5}
+ mov r4, r0, lsr #16
+ mov r5, r1, lsr #16
+ bic r0, r0, #0x00ff0000
+ bic r1, r1, #0x00ff0000
+ mul ip, r4, r5
+ mul r3, r0, r1
+ mul r0, r5, r0
+ mla r0, r4, r1, r0
+ adds r3, r3, r0, lsl #16
+ adc ip, ip, r0, lsr #16
+ ldmfd sp!, {r4, r5}
+
+#else
+
+ umull r3, ip, r0, r1 @ The actual multiplication.
+
+#endif
+
+ @ Put final sign in r0.
+ mov r0, r2, lsl #16
+ bic r2, r2, #0x8000
+
+ @ Adjust result if one extra MSB appeared.
+ @ The LSB may be lost but this never changes the result in this case.
+ tst ip, #(1 << 15)
+ addne r2, r2, #(1 << 22)
+ movnes ip, ip, lsr #1
+ movne r3, r3, rrx
+
+ @ Apply exponent bias, check range for underflow.
+ subs r2, r2, #(127 << 22)
+ ble LSYM(Lml_u)
+
+ @ Scale back to 24 bits with rounding.
+ @ r0 contains sign bit already.
+ orrs r0, r0, r3, lsr #23
+ adc r0, r0, ip, lsl #9
+
+ @ If halfway between two numbers, rounding should be towards LSB = 0.
+ mov r3, r3, lsl #9
+ teq r3, #0x80000000
+ biceq r0, r0, #1
+
+ @ Note: rounding may have produced an extra MSB here.
+ @ The extra bit is cleared before merging the exponent below.
+ tst r0, #0x01000000
+ addne r2, r2, #(1 << 22)
+
+ @ Check for exponent overflow
+ cmp r2, #(255 << 22)
+ bge LSYM(Lml_o)
+
+ @ Add final exponent.
+ bic r0, r0, #0x01800000
+ orr r0, r0, r2, lsl #1
+ RET
+
+ @ Result is 0, but determine sign anyway.
+LSYM(Lml_z): eor r0, r0, r1
+ bic r0, r0, #0x7fffffff
+ RET
+
+ @ Check if denormalized result is possible, otherwise return signed 0.
+LSYM(Lml_u):
+ cmn r2, #(24 << 22)
+ RETc(le)
+
+ @ Find out proper shift value.
+ mvn r1, r2, asr #22
+ subs r1, r1, #7
+ bgt LSYM(Lml_ur)
+
+ @ Shift value left, round, etc.
+ add r1, r1, #32
+ orrs r0, r0, r3, lsr r1
+ rsb r1, r1, #32
+ adc r0, r0, ip, lsl r1
+ mov ip, r3, lsl r1
+ teq ip, #0x80000000
+ biceq r0, r0, #1
+ RET
+
+ @ Shift value right, round, etc.
+ @ Note: r1 must not be 0 otherwise carry does not get set.
+LSYM(Lml_ur):
+ orrs r0, r0, ip, lsr r1
+ adc r0, r0, #0
+ rsb r1, r1, #32
+ mov ip, ip, lsl r1
+ teq r3, #0
+ teqeq ip, #0x80000000
+ biceq r0, r0, #1
+ RET
+
+ @ One or both arguments are denormalized.
+ @ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+ teq r2, #0
+ and ip, r0, #0x80000000
+1: moveq r0, r0, lsl #1
+ tsteq r0, #0x00800000
+ subeq r2, r2, #(1 << 22)
+ beq 1b
+ orr r0, r0, ip
+ teq r3, #0
+ and ip, r1, #0x80000000
+2: moveq r1, r1, lsl #1
+ tsteq r1, #0x00800000
+ subeq r3, r3, #(1 << 23)
+ beq 2b
+ orr r1, r1, ip
+ b LSYM(Lml_x)
+
+ @ One or both args are INF or NAN.
+LSYM(Lml_s):
+ teq r0, #0x0
+ teqne r1, #0x0
+ teqne r0, #0x80000000
+ teqne r1, #0x80000000
+ beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ teq r2, ip, lsr #1
+ bne 1f
+ movs r2, r0, lsl #9
+ bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+1: teq r3, ip, lsr #1
+ bne LSYM(Lml_i)
+ movs r3, r1, lsl #9
+ bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+
+ @ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+ eor r0, r0, r1
+
+ @ Overflow: return INF (sign already in r0).
+LSYM(Lml_o):
+ and r0, r0, #0x80000000
+ orr r0, r0, #0x7f000000
+ orr r0, r0, #0x00800000
+ RET
+
+ @ Return NAN.
+LSYM(Lml_n):
+ mov r0, #0x7f000000
+ orr r0, r0, #0x00c00000
+ RET
+
+
+ARM_FUNC_START divsf3
+
+ @ Mask out exponents.
+ mov ip, #0xff000000
+ and r2, r0, ip, lsr #1
+ and r3, r1, ip, lsr #1
+
+ @ Trap any INF/NAN or zeroes.
+ teq r2, ip, lsr #1
+ teqne r3, ip, lsr #1
+ bicnes ip, r0, #0x80000000
+ bicnes ip, r1, #0x80000000
+ beq LSYM(Ldv_s)
+
+ @ Shift exponents right one bit to make room for overflow bit.
+ @ If either of them is 0, scale denormalized arguments off line.
+ @ Then substract divisor exponent from dividend''s.
+ movs r2, r2, lsr #1
+ teqne r3, #0
+ beq LSYM(Ldv_d)
+LSYM(Ldv_x):
+ sub r2, r2, r3, asr #1
+
+ @ Preserve final sign into ip.
+ eor ip, r0, r1
+
+ @ Convert mantissa to unsigned integer.
+ @ Dividend -> r3, divisor -> r1.
+ mov r3, #0x10000000
+ movs r1, r1, lsl #9
+ mov r0, r0, lsl #9
+ beq LSYM(Ldv_1)
+ orr r1, r3, r1, lsr #4
+ orr r3, r3, r0, lsr #4
+
+ @ Initialize r0 (result) with final sign bit.
+ and r0, ip, #0x80000000
+
+ @ Ensure result will land to known bit position.
+ cmp r3, r1
+ subcc r2, r2, #(1 << 22)
+ movcc r3, r3, lsl #1
+
+ @ Apply exponent bias, check range for over/underflow.
+ add r2, r2, #(127 << 22)
+ cmn r2, #(24 << 22)
+ RETc(le)
+ cmp r2, #(255 << 22)
+ bge LSYM(Lml_o)
+
+ @ The actual division loop.
+ mov ip, #0x00800000
+1: cmp r3, r1
+ subcs r3, r3, r1
+ orrcs r0, r0, ip
+ cmp r3, r1, lsr #1
+ subcs r3, r3, r1, lsr #1
+ orrcs r0, r0, ip, lsr #1
+ cmp r3, r1, lsr #2
+ subcs r3, r3, r1, lsr #2
+ orrcs r0, r0, ip, lsr #2
+ cmp r3, r1, lsr #3
+ subcs r3, r3, r1, lsr #3
+ orrcs r0, r0, ip, lsr #3
+ movs r3, r3, lsl #4
+ movnes ip, ip, lsr #4
+ bne 1b
+
+ @ Check if denormalized result is needed.
+ cmp r2, #0
+ ble LSYM(Ldv_u)
+
+ @ Apply proper rounding.
+ cmp r3, r1
+ addcs r0, r0, #1
+ biceq r0, r0, #1
+
+ @ Add exponent to result.
+ bic r0, r0, #0x00800000
+ orr r0, r0, r2, lsl #1
+ RET
+
+ @ Division by 0x1p*: let''s shortcut a lot of code.
+LSYM(Ldv_1):
+ and ip, ip, #0x80000000
+ orr r0, ip, r0, lsr #9
+ add r2, r2, #(127 << 22)
+ cmp r2, #(255 << 22)
+ bge LSYM(Lml_o)
+ cmp r2, #0
+ orrgt r0, r0, r2, lsl #1
+ RETc(gt)
+ cmn r2, #(24 << 22)
+ movle r0, ip
+ RETc(le)
+ orr r0, r0, #0x00800000
+ mov r3, #0
+
+ @ Result must be denormalized: prepare parameters to use code above.
+ @ r3 already contains remainder for rounding considerations.
+LSYM(Ldv_u):
+ bic ip, r0, #0x80000000
+ and r0, r0, #0x80000000
+ mvn r1, r2, asr #22
+ add r1, r1, #2
+ b LSYM(Lml_ur)
+
+ @ One or both arguments are denormalized.
+ @ Scale them leftwards and preserve sign bit.
+LSYM(Ldv_d):
+ teq r2, #0
+ and ip, r0, #0x80000000
+1: moveq r0, r0, lsl #1
+ tsteq r0, #0x00800000
+ subeq r2, r2, #(1 << 22)
+ beq 1b
+ orr r0, r0, ip
+ teq r3, #0
+ and ip, r1, #0x80000000
+2: moveq r1, r1, lsl #1
+ tsteq r1, #0x00800000
+ subeq r3, r3, #(1 << 23)
+ beq 2b
+ orr r1, r1, ip
+ b LSYM(Ldv_x)
+
+ @ One or both arguments is either INF, NAN or zero.
+LSYM(Ldv_s):
+ mov ip, #0xff000000
+ teq r2, ip, lsr #1
+ teqeq r3, ip, lsr #1
+ beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
+ teq r2, ip, lsr #1
+ bne 1f
+ movs r2, r0, lsl #9
+ bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ b LSYM(Lml_i) @ INF / <anything> -> INF
+1: teq r3, ip, lsr #1
+ bne 2f
+ movs r3, r1, lsl #9
+ bne LSYM(Lml_n) @ <anything> / NAN -> NAN
+ b LSYM(Lml_z) @ <anything> / INF -> 0
+2: @ One or both arguments are 0.
+ bics r2, r0, #0x80000000
+ bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ bics r3, r1, #0x80000000
+ bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
+ b LSYM(Lml_n) @ 0 / 0 -> NAN
+
+
+FUNC_START gesf2
+ARM_FUNC_START gtsf2
+ mov r3, #-1
+ b 1f
+
+FUNC_START lesf2
+ARM_FUNC_START ltsf2
+ mov r3, #1
+ b 1f
+
+FUNC_START nesf2
+FUNC_START eqsf2
+ARM_FUNC_START cmpsf2
+ mov r3, #1 @ how should we specify unordered here?
+
+1: @ Trap any INF/NAN first.
+ mov ip, #0xff000000
+ and r2, r1, ip, lsr #1
+ teq r2, ip, lsr #1
+ and r2, r0, ip, lsr #1
+ teqne r2, ip, lsr #1
+ beq 3f
+
+ @ Test for equality.
+ @ Note that 0.0 is equal to -0.0.
+2: orr r3, r0, r1
+ bics r3, r3, #0x80000000 @ either 0.0 or -0.0
+ teqne r0, r1 @ or both the same
+ moveq r0, #0
+ RETc(eq)
+
+ @ Check for sign difference. The N flag is set if it is the case.
+ @ If so, return sign of r0.
+ movmi r0, r0, asr #31
+ orrmi r0, r0, #1
+ RETc(mi)
+
+ @ Compare exponents.
+ and r3, r1, ip, lsr #1
+ cmp r2, r3
+
+ @ Compare mantissa if exponents are equal
+ moveq r0, r0, lsl #9
+ cmpeq r0, r1, lsl #9
+ movcs r0, r1, asr #31
+ mvncc r0, r1, asr #31
+ orr r0, r0, #1
+ RET
+
+ @ Look for a NAN.
+3: and r2, r1, ip, lsr #1
+ teq r2, ip, lsr #1
+ bne 4f
+ movs r2, r1, lsl #9
+ bne 5f @ r1 is NAN
+4: and r2, r0, ip, lsr #1
+ teq r2, ip, lsr #1
+ bne 2b
+ movs ip, r0, lsl #9
+ beq 2b @ r0 is not NAN
+5: mov r0, r3 @ return unordered code from r3.
+ RET
+
+
+ARM_FUNC_START unordsf2
+ mov ip, #0xff000000
+ and r2, r1, ip, lsr #1
+ teq r2, ip, lsr #1
+ bne 1f
+ movs r2, r1, lsl #9
+ bne 3f @ r1 is NAN
+1: and r2, r0, ip, lsr #1
+ teq r2, ip, lsr #1
+ bne 2f
+ movs r2, r0, lsl #9
+ bne 3f @ r0 is NAN
+2: mov r0, #0 @ arguments are ordered.
+ RET
+3: mov r0, #1 @ arguments are unordered.
+ RET
+
+
+ARM_FUNC_START fixsfsi
+ movs r0, r0, lsl #1
+ RETc(eq) @ value is 0.
+ @ preserve C flag (the actual sign)
+#ifdef __APCS_26__
+ mov r1, pc
+#else
+ mrs r1, cpsr
+#endif
+
+ @ check exponent range.
+ and r2, r0, #0xff000000
+ cmp r2, #(127 << 24)
+ movcc r0, #0 @ value is too small
+ RETc(cc)
+ cmp r2, #((127 + 31) << 24)
+ bcs 1f @ value is too large
+
+ mov r0, r0, lsl #7
+ orr r0, r0, #0x80000000
+ mov r2, r2, lsr #24
+ rsb r2, r2, #(127 + 31)
+ mov r0, r0, lsr r2
+ tst r1, #0x20000000 @ the sign bit
+ rsbne r0, r0, #0
+ RET
+
+1: teq r2, #0xff000000
+ bne 2f
+ movs r0, r0, lsl #8
+ bne 3f @ r0 is NAN.
+2: tst r1, #0x20000000 @ the sign bit
+ moveq r0, #0x7fffffff @ the maximum signed positive si
+ movne r0, #0x80000000 @ the maximum signed negative si
+ RET
+
+3: mov r0, #0 @ What should we convert NAN to?
+ RET
+
+
+ARM_FUNC_START fixunssfsi
+ movs r0, r0, lsl #1
+ RETc(eq) @ value is 0.
+ movcs r0, #0
+ RETc(cs) @ value is negative.
+
+ @ check exponent range.
+ and r2, r0, #0xff000000
+ cmp r2, #(127 << 24)
+ movcc r0, #0 @ value is too small
+ RETc(cc)
+ cmp r2, #((127 + 32) << 24)
+ bcs 1f @ value is too large
+
+ mov r0, r0, lsl #7
+ orr r0, r0, #0x80000000
+ mov r2, r2, lsr #24
+ rsb r2, r2, #(127 + 31)
+ mov r0, r0, lsr r2
+ RET
+
+1: teq r2, #0xff000000
+ bne 2f
+ movs r0, r0, lsl #8
+ bne 3b @ r0 is NAN.
+2: mov r0, #0xffffffff @ maximum unsigned si
+ RET
+
+
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm
index 0f35d8145c1..f587bc2969e 100644
--- a/gcc/config/arm/lib1funcs.asm
+++ b/gcc/config/arm/lib1funcs.asm
@@ -782,3 +782,17 @@ _arm_return:
SIZE (_interwork_call_via_lr)
#endif /* L_interwork_call_via_rX */
+
+#ifdef L_ieee754_dp
+ /* These functions are coded in ARM state, even when called from
+ Thumb. */
+ .arm
+#include "ieee754-df.S"
+#endif
+
+#ifdef L_ieee754_sp
+ /* These functions are coded in ARM state, even when called from
+ Thumb. */
+ .arm
+#include "ieee754-sf.S"
+#endif
diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf
index 79506fb5a62..7b0b86754d4 100644
--- a/gcc/config/arm/t-arm-elf
+++ b/gcc/config/arm/t-arm-elf
@@ -1,29 +1,10 @@
LIB1ASMSRC = arm/lib1funcs.asm
-LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX
+LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _ieee754_dp _ieee754_sp
-# We want fine grained libraries, so use the new code to build the
-# floating point emulation libraries.
-FPBIT = fp-bit.c
-DPBIT = dp-bit.c
-
-fp-bit.c: $(srcdir)/config/fp-bit.c
- echo '#define FLOAT' > fp-bit.c
- echo '#ifndef __ARMEB__' >> fp-bit.c
- echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
- echo '#endif' >> fp-bit.c
- cat $(srcdir)/config/fp-bit.c >> fp-bit.c
-
-dp-bit.c: $(srcdir)/config/fp-bit.c
- echo '#ifndef __ARMEB__' > dp-bit.c
- echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
- echo '#define FLOAT_WORD_ORDER_MISMATCH' >> dp-bit.c
- echo '#endif' >> dp-bit.c
- cat $(srcdir)/config/fp-bit.c >> dp-bit.c
-
-
MULTILIB_OPTIONS = marm/mthumb
MULTILIB_DIRNAMES = arm thumb
MULTILIB_EXCEPTIONS =
+MULTILIB_MATCHES =
# MULTILIB_OPTIONS += mcpu=ep9312
# MULTILIB_DIRNAMES += ep9312
@@ -31,8 +12,7 @@ MULTILIB_EXCEPTIONS =
# MULTILIB_OPTIONS += mlittle-endian/mbig-endian
# MULTILIB_DIRNAMES += le be
-# MULTILIB_EXCEPTIONS =
-# MULTILIB_MATCHES = mbig-endian=mbe mlittle-endian=mle
+# MULTILIB_MATCHES += mbig-endian=mbe mlittle-endian=mle
#
# MULTILIB_OPTIONS += mhard-float/msoft-float
# MULTILIB_DIRNAMES += fpu soft
@@ -97,3 +77,4 @@ $(T)crti.o: $(srcdir)/config/arm/crti.asm $(GCC_PASSES)
$(T)crtn.o: $(srcdir)/config/arm/crtn.asm $(GCC_PASSES)
$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/arm/crtn.asm
+