diff options
author | rearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-08-27 12:52:58 +0000 |
---|---|---|
committer | rearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-08-27 12:52:58 +0000 |
commit | 60f30e1335bdbc4e3409b6bd0481aa21cdd55c49 (patch) | |
tree | 8dd03076871f1247d318053ae956e716bc6733bb /gcc/config/arm/ieee754-sf.S | |
parent | 0d21cbc776f7eff52137e72df2aa9f704fd1355a (diff) | |
download | gcc-60f30e1335bdbc4e3409b6bd0481aa21cdd55c49.tar.gz |
2003-08-27 Richard Earnshaw <rearnsha@arm.com>
* lib1funcs.asm (L_ieee754_sp): New. Include ieee754-sf.S.
(L_ieee754_dp): New. Include ieee754-df.S.
* arm/ieee754-sf.S: Rework to allow interworking, calling from Thumb,
and compilation in apcs-26 mode.
* arm/ieee754-df.S: Likewise.
* t-arm-elf (DPBIT, FPBIT, fp-bit.c dp-bit.c): Delete rules
(LIB1ASMFUNCS): Add _ieee754_sp and _ieee754_dp targets.
2003-08-27 Nicolas Pitre <nico@cam.org>
* arm/ieee754-sf.S: New.
* arm/ieee754-df.S: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@70845 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/arm/ieee754-sf.S')
-rw-r--r-- | gcc/config/arm/ieee754-sf.S | 813 |
1 files changed, 813 insertions, 0 deletions
diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S new file mode 100644 index 00000000000..88ded2931bf --- /dev/null +++ b/gcc/config/arm/ieee754-sf.S @@ -0,0 +1,813 @@ +/* ieee754-sf.S single-precision floating point support for ARM + + Copyright (C) 2003 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + In addition to the permissions in the GNU General Public License, the + Free Software Foundation gives you unlimited permission to link the + compiled version of this file into combinations with other programs, + and to distribute those combinations without any restriction coming + from the use of this file. (The General Public License restrictions + do apply in other respects; for example, they cover modification of + the file, and distribution when not linked into a combine + executable.) + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + +@ This selects the minimum architecture level required. +#undef __ARM_ARCH__ +#define __ARM_ARCH__ 3 + +#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ + || defined(__ARM_ARCH_4T__) +#undef __ARM_ARCH__ +/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with + long multiply instructions. That includes v3M. */ +#define __ARM_ARCH__ 4 +#endif + +#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5TE__) +#undef __ARM_ARCH__ +#define __ARM_ARCH__ 5 +#endif + +#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) +#undef RET +#undef RETc +#define RET bx lr +#define RETc(x) bx##x lr +#if (__ARM_ARCH__ == 4) && (defined(__thumb__) || defined(__THUMB_INTERWORK__)) +#define __FP_INTERWORKING__ +#endif +#endif + +#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) +.macro ARM_FUNC_START name + FUNC_START \name + bx pc + nop + .arm +.endm +#else +.macro ARM_FUNC_START name + FUNC_START \name +.endm +#endif + +ARM_FUNC_START negsf2 + eor r0, r0, #0x80000000 @ flip sign bit + RET + +ARM_FUNC_START subsf3 + eor r1, r1, #0x80000000 @ flip sign bit of second arg +#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START addsf3 + +1: @ Compare both args, return zero if equal but the sign. + eor r2, r0, r1 + teq r2, #0x80000000 + beq LSYM(Lad_z) + + @ If first arg is 0 or -0, return second arg. + @ If second arg is 0 or -0, return first arg. + bics r2, r0, #0x80000000 + moveq r0, r1 + bicnes r2, r1, #0x80000000 + RETc(eq) + + @ Mask out exponents. + mov ip, #0xff000000 + and r2, r0, ip, lsr #1 + and r3, r1, ip, lsr #1 + + @ If either of them is 255, result will be INF or NAN + teq r2, ip, lsr #1 + teqne r3, ip, lsr #1 + beq LSYM(Lad_i) + + @ Compute exponent difference. Make largest exponent in r2, + @ corresponding arg in r0, and positive exponent difference in r3. + subs r3, r3, r2 + addgt r2, r2, r3 + eorgt r1, r0, r1 + eorgt r0, r1, r0 + eorgt r1, r0, r1 + rsblt r3, r3, #0 + + @ If exponent difference is too large, return largest argument + @ already in r0. We need up to 25 bit to handle proper rounding + @ of 0x1p25 - 1.1. + cmp r3, #(25 << 23) + RETc(hi) + + @ Convert mantissa to signed integer. + tst r0, #0x80000000 + orr r0, r0, #0x00800000 + bic r0, r0, #0xff000000 + rsbne r0, r0, #0 + tst r1, #0x80000000 + orr r1, r1, #0x00800000 + bic r1, r1, #0xff000000 + rsbne r1, r1, #0 + + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r2, r3 + beq LSYM(Lad_d) +LSYM(Lad_x): + + @ Scale down second arg with exponent difference. + @ Apply shift one bit left to first arg and the rest to second arg + @ to simplify things later, but only if exponent does not become 0. + movs r3, r3, lsr #23 + teqne r2, #(1 << 23) + movne r0, r0, lsl #1 + subne r2, r2, #(1 << 23) + subne r3, r3, #1 + + @ Shift second arg into ip, keep leftover bits into r1. + mov ip, r1, asr r3 + rsb r3, r3, #32 + mov r1, r1, lsl r3 + + add r0, r0, ip @ the actual addition + + @ We now have a 64 bit result in r0-r1. + @ Keep absolute value in r0-r1, sign in r3. + ands r3, r0, #0x80000000 + bpl LSYM(Lad_p) + rsbs r1, r1, #0 + rsc r0, r0, #0 + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp r0, #0x00800000 + bcc LSYM(Lad_l) + cmp r0, #0x01000000 + bcc LSYM(Lad_r0) + cmp r0, #0x02000000 + bcc LSYM(Lad_r1) + + @ Result needs to be shifted right. + movs r0, r0, lsr #1 + mov r1, r1, rrx + add r2, r2, #(1 << 23) +LSYM(Lad_r1): + movs r0, r0, lsr #1 + mov r1, r1, rrx + add r2, r2, #(1 << 23) + + @ Our result is now properly aligned into r0, remaining bits in r1. + @ Round with MSB of r1. If halfway between two numbers, round towards + @ LSB of r0 = 0. +LSYM(Lad_r0): + add r0, r0, r1, lsr #31 + teq r1, #0x80000000 + biceq r0, r0, #1 + + @ Rounding may have added a new MSB. Adjust exponent. + @ That MSB will be cleared when exponent is merged below. + tst r0, #0x01000000 + addne r2, r2, #(1 << 23) + + @ Make sure we did not bust our exponent. + cmp r2, #(254 << 23) + bhi LSYM(Lad_o) + + @ Pack final result together. +LSYM(Lad_e): + bic r0, r0, #0x01800000 + orr r0, r0, r2 + orr r0, r0, r3 + RET + + @ Result must be shifted left. + @ No rounding necessary since r1 will always be 0. +LSYM(Lad_l): + +#if __ARM_ARCH__ < 5 + + movs ip, r0, lsr #12 + moveq r0, r0, lsl #12 + subeq r2, r2, #(12 << 23) + tst r0, #0x00ff0000 + moveq r0, r0, lsl #8 + subeq r2, r2, #(8 << 23) + tst r0, #0x00f00000 + moveq r0, r0, lsl #4 + subeq r2, r2, #(4 << 23) + tst r0, #0x00c00000 + moveq r0, r0, lsl #2 + subeq r2, r2, #(2 << 23) + tst r0, #0x00800000 + moveq r0, r0, lsl #1 + subeq r2, r2, #(1 << 23) + cmp r2, #0 + bgt LSYM(Lad_e) + +#else + + clz ip, r0 + sub ip, ip, #8 + mov r0, r0, lsl ip + subs r2, r2, ip, lsl #23 + bgt LSYM(Lad_e) + +#endif + + @ Exponent too small, denormalize result. + mvn r2, r2, asr #23 + add r2, r2, #2 + orr r0, r3, r0, lsr r2 + RET + + @ Fixup and adjust bit position for denormalized arguments. + @ Note that r2 must not remain equal to 0. +LSYM(Lad_d): + teq r2, #0 + eoreq r0, r0, #0x00800000 + addeq r2, r2, #(1 << 23) + eor r1, r1, #0x00800000 + subne r3, r3, #(1 << 23) + b LSYM(Lad_x) + + @ Result is x - x = 0, unless x is INF or NAN. +LSYM(Lad_z): + mov ip, #0xff000000 + and r2, r0, ip, lsr #1 + teq r2, ip, lsr #1 + moveq r0, ip, asr #2 + movne r0, #0 + RET + + @ Overflow: return INF. +LSYM(Lad_o): + orr r0, r3, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ At least one of r0/r1 is INF/NAN. + @ if r0 != INF/NAN: return r1 (which is INF/NAN) + @ if r1 != INF/NAN: return r0 (which is INF/NAN) + @ if r0 or r1 is NAN: return NAN + @ if opposite sign: return NAN + @ return r0 (which is INF or -INF) +LSYM(Lad_i): + teq r2, ip, lsr #1 + movne r0, r1 + teqeq r3, ip, lsr #1 + RETc(ne) + movs r2, r0, lsl #9 + moveqs r2, r1, lsl #9 + teqeq r0, r1 + orrne r0, r3, #0x00400000 @ NAN + RET + + +ARM_FUNC_START floatunsisf + mov r3, #0 + b 1f + +ARM_FUNC_START floatsisf + ands r3, r0, #0x80000000 + rsbmi r0, r0, #0 + +1: teq r0, #0 + RETc(eq) + + mov r1, #0 + mov r2, #((127 + 23) << 23) + tst r0, #0xfc000000 + beq LSYM(Lad_p) + + @ We need to scale the value a little before branching to code above. + tst r0, #0xf0000000 + movne r1, r0, lsl #28 + movne r0, r0, lsr #4 + addne r2, r2, #(4 << 23) + tst r0, #0x0c000000 + beq LSYM(Lad_p) + mov r1, r1, lsr #2 + orr r1, r1, r0, lsl #30 + mov r0, r0, lsr #2 + add r2, r2, #(2 << 23) + b LSYM(Lad_p) + + +ARM_FUNC_START mulsf3 + + @ Mask out exponents. + mov ip, #0xff000000 + and r2, r0, ip, lsr #1 + and r3, r1, ip, lsr #1 + + @ Trap any INF/NAN. + teq r2, ip, lsr #1 + teqne r3, ip, lsr #1 + beq LSYM(Lml_s) + + @ Trap any multiplication by 0. + bics ip, r0, #0x80000000 + bicnes ip, r1, #0x80000000 + beq LSYM(Lml_z) + + @ Shift exponents right one bit to make room for overflow bit. + @ If either of them is 0, scale denormalized arguments off line. + @ Then add both exponents together. + movs r2, r2, lsr #1 + teqne r3, #0 + beq LSYM(Lml_d) +LSYM(Lml_x): + add r2, r2, r3, asr #1 + + @ Preserve final sign in r2 along with exponent for now. + teq r0, r1 + orrmi r2, r2, #0x8000 + + @ Convert mantissa to unsigned integer. + bic r0, r0, #0xff000000 + bic r1, r1, #0xff000000 + orr r0, r0, #0x00800000 + orr r1, r1, #0x00800000 + +#if __ARM_ARCH__ < 4 + + @ Well, no way to make it shorter without the umull instruction. + @ We must perform that 24 x 24 -> 48 bit multiplication by hand. + stmfd sp!, {r4, r5} + mov r4, r0, lsr #16 + mov r5, r1, lsr #16 + bic r0, r0, #0x00ff0000 + bic r1, r1, #0x00ff0000 + mul ip, r4, r5 + mul r3, r0, r1 + mul r0, r5, r0 + mla r0, r4, r1, r0 + adds r3, r3, r0, lsl #16 + adc ip, ip, r0, lsr #16 + ldmfd sp!, {r4, r5} + +#else + + umull r3, ip, r0, r1 @ The actual multiplication. + +#endif + + @ Put final sign in r0. + mov r0, r2, lsl #16 + bic r2, r2, #0x8000 + + @ Adjust result if one extra MSB appeared. + @ The LSB may be lost but this never changes the result in this case. + tst ip, #(1 << 15) + addne r2, r2, #(1 << 22) + movnes ip, ip, lsr #1 + movne r3, r3, rrx + + @ Apply exponent bias, check range for underflow. + subs r2, r2, #(127 << 22) + ble LSYM(Lml_u) + + @ Scale back to 24 bits with rounding. + @ r0 contains sign bit already. + orrs r0, r0, r3, lsr #23 + adc r0, r0, ip, lsl #9 + + @ If halfway between two numbers, rounding should be towards LSB = 0. + mov r3, r3, lsl #9 + teq r3, #0x80000000 + biceq r0, r0, #1 + + @ Note: rounding may have produced an extra MSB here. + @ The extra bit is cleared before merging the exponent below. + tst r0, #0x01000000 + addne r2, r2, #(1 << 22) + + @ Check for exponent overflow + cmp r2, #(255 << 22) + bge LSYM(Lml_o) + + @ Add final exponent. + bic r0, r0, #0x01800000 + orr r0, r0, r2, lsl #1 + RET + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): eor r0, r0, r1 + bic r0, r0, #0x7fffffff + RET + + @ Check if denormalized result is possible, otherwise return signed 0. +LSYM(Lml_u): + cmn r2, #(24 << 22) + RETc(le) + + @ Find out proper shift value. + mvn r1, r2, asr #22 + subs r1, r1, #7 + bgt LSYM(Lml_ur) + + @ Shift value left, round, etc. + add r1, r1, #32 + orrs r0, r0, r3, lsr r1 + rsb r1, r1, #32 + adc r0, r0, ip, lsl r1 + mov ip, r3, lsl r1 + teq ip, #0x80000000 + biceq r0, r0, #1 + RET + + @ Shift value right, round, etc. + @ Note: r1 must not be 0 otherwise carry does not get set. +LSYM(Lml_ur): + orrs r0, r0, ip, lsr r1 + adc r0, r0, #0 + rsb r1, r1, #32 + mov ip, ip, lsl r1 + teq r3, #0 + teqeq ip, #0x80000000 + biceq r0, r0, #1 + RET + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #(1 << 22) + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #(1 << 23) + beq 2b + orr r1, r1, ip + b LSYM(Lml_x) + + @ One or both args are INF or NAN. +LSYM(Lml_s): + teq r0, #0x0 + teqne r1, #0x0 + teqne r0, #0x80000000 + teqne r1, #0x80000000 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r2, ip, lsr #1 + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN * <anything> -> NAN +1: teq r3, ip, lsr #1 + bne LSYM(Lml_i) + movs r3, r1, lsl #9 + bne LSYM(Lml_n) @ <anything> * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor r0, r0, r1 + + @ Overflow: return INF (sign already in r0). +LSYM(Lml_o): + and r0, r0, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ Return NAN. +LSYM(Lml_n): + mov r0, #0x7f000000 + orr r0, r0, #0x00c00000 + RET + + +ARM_FUNC_START divsf3 + + @ Mask out exponents. + mov ip, #0xff000000 + and r2, r0, ip, lsr #1 + and r3, r1, ip, lsr #1 + + @ Trap any INF/NAN or zeroes. + teq r2, ip, lsr #1 + teqne r3, ip, lsr #1 + bicnes ip, r0, #0x80000000 + bicnes ip, r1, #0x80000000 + beq LSYM(Ldv_s) + + @ Shift exponents right one bit to make room for overflow bit. + @ If either of them is 0, scale denormalized arguments off line. + @ Then substract divisor exponent from dividend''s. + movs r2, r2, lsr #1 + teqne r3, #0 + beq LSYM(Ldv_d) +LSYM(Ldv_x): + sub r2, r2, r3, asr #1 + + @ Preserve final sign into ip. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ Dividend -> r3, divisor -> r1. + mov r3, #0x10000000 + movs r1, r1, lsl #9 + mov r0, r0, lsl #9 + beq LSYM(Ldv_1) + orr r1, r3, r1, lsr #4 + orr r3, r3, r0, lsr #4 + + @ Initialize r0 (result) with final sign bit. + and r0, ip, #0x80000000 + + @ Ensure result will land to known bit position. + cmp r3, r1 + subcc r2, r2, #(1 << 22) + movcc r3, r3, lsl #1 + + @ Apply exponent bias, check range for over/underflow. + add r2, r2, #(127 << 22) + cmn r2, #(24 << 22) + RETc(le) + cmp r2, #(255 << 22) + bge LSYM(Lml_o) + + @ The actual division loop. + mov ip, #0x00800000 +1: cmp r3, r1 + subcs r3, r3, r1 + orrcs r0, r0, ip + cmp r3, r1, lsr #1 + subcs r3, r3, r1, lsr #1 + orrcs r0, r0, ip, lsr #1 + cmp r3, r1, lsr #2 + subcs r3, r3, r1, lsr #2 + orrcs r0, r0, ip, lsr #2 + cmp r3, r1, lsr #3 + subcs r3, r3, r1, lsr #3 + orrcs r0, r0, ip, lsr #3 + movs r3, r3, lsl #4 + movnes ip, ip, lsr #4 + bne 1b + + @ Check if denormalized result is needed. + cmp r2, #0 + ble LSYM(Ldv_u) + + @ Apply proper rounding. + cmp r3, r1 + addcs r0, r0, #1 + biceq r0, r0, #1 + + @ Add exponent to result. + bic r0, r0, #0x00800000 + orr r0, r0, r2, lsl #1 + RET + + @ Division by 0x1p*: let''s shortcut a lot of code. +LSYM(Ldv_1): + and ip, ip, #0x80000000 + orr r0, ip, r0, lsr #9 + add r2, r2, #(127 << 22) + cmp r2, #(255 << 22) + bge LSYM(Lml_o) + cmp r2, #0 + orrgt r0, r0, r2, lsl #1 + RETc(gt) + cmn r2, #(24 << 22) + movle r0, ip + RETc(le) + orr r0, r0, #0x00800000 + mov r3, #0 + + @ Result must be denormalized: prepare parameters to use code above. + @ r3 already contains remainder for rounding considerations. +LSYM(Ldv_u): + bic ip, r0, #0x80000000 + and r0, r0, #0x80000000 + mvn r1, r2, asr #22 + add r1, r1, #2 + b LSYM(Lml_ur) + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Ldv_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #(1 << 22) + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #(1 << 23) + beq 2b + orr r1, r1, ip + b LSYM(Ldv_x) + + @ One or both arguments is either INF, NAN or zero. +LSYM(Ldv_s): + mov ip, #0xff000000 + teq r2, ip, lsr #1 + teqeq r3, ip, lsr #1 + beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN + teq r2, ip, lsr #1 + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN / <anything> -> NAN + b LSYM(Lml_i) @ INF / <anything> -> INF +1: teq r3, ip, lsr #1 + bne 2f + movs r3, r1, lsl #9 + bne LSYM(Lml_n) @ <anything> / NAN -> NAN + b LSYM(Lml_z) @ <anything> / INF -> 0 +2: @ One or both arguments are 0. + bics r2, r0, #0x80000000 + bne LSYM(Lml_i) @ <non_zero> / 0 -> INF + bics r3, r1, #0x80000000 + bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + +FUNC_START gesf2 +ARM_FUNC_START gtsf2 + mov r3, #-1 + b 1f + +FUNC_START lesf2 +ARM_FUNC_START ltsf2 + mov r3, #1 + b 1f + +FUNC_START nesf2 +FUNC_START eqsf2 +ARM_FUNC_START cmpsf2 + mov r3, #1 @ how should we specify unordered here? + +1: @ Trap any INF/NAN first. + mov ip, #0xff000000 + and r2, r1, ip, lsr #1 + teq r2, ip, lsr #1 + and r2, r0, ip, lsr #1 + teqne r2, ip, lsr #1 + beq 3f + + @ Test for equality. + @ Note that 0.0 is equal to -0.0. +2: orr r3, r0, r1 + bics r3, r3, #0x80000000 @ either 0.0 or -0.0 + teqne r0, r1 @ or both the same + moveq r0, #0 + RETc(eq) + + @ Check for sign difference. The N flag is set if it is the case. + @ If so, return sign of r0. + movmi r0, r0, asr #31 + orrmi r0, r0, #1 + RETc(mi) + + @ Compare exponents. + and r3, r1, ip, lsr #1 + cmp r2, r3 + + @ Compare mantissa if exponents are equal + moveq r0, r0, lsl #9 + cmpeq r0, r1, lsl #9 + movcs r0, r1, asr #31 + mvncc r0, r1, asr #31 + orr r0, r0, #1 + RET + + @ Look for a NAN. +3: and r2, r1, ip, lsr #1 + teq r2, ip, lsr #1 + bne 4f + movs r2, r1, lsl #9 + bne 5f @ r1 is NAN +4: and r2, r0, ip, lsr #1 + teq r2, ip, lsr #1 + bne 2b + movs ip, r0, lsl #9 + beq 2b @ r0 is not NAN +5: mov r0, r3 @ return unordered code from r3. + RET + + +ARM_FUNC_START unordsf2 + mov ip, #0xff000000 + and r2, r1, ip, lsr #1 + teq r2, ip, lsr #1 + bne 1f + movs r2, r1, lsl #9 + bne 3f @ r1 is NAN +1: and r2, r0, ip, lsr #1 + teq r2, ip, lsr #1 + bne 2f + movs r2, r0, lsl #9 + bne 3f @ r0 is NAN +2: mov r0, #0 @ arguments are ordered. + RET +3: mov r0, #1 @ arguments are unordered. + RET + + +ARM_FUNC_START fixsfsi + movs r0, r0, lsl #1 + RETc(eq) @ value is 0. + @ preserve C flag (the actual sign) +#ifdef __APCS_26__ + mov r1, pc +#else + mrs r1, cpsr +#endif + + @ check exponent range. + and r2, r0, #0xff000000 + cmp r2, #(127 << 24) + movcc r0, #0 @ value is too small + RETc(cc) + cmp r2, #((127 + 31) << 24) + bcs 1f @ value is too large + + mov r0, r0, lsl #7 + orr r0, r0, #0x80000000 + mov r2, r2, lsr #24 + rsb r2, r2, #(127 + 31) + mov r0, r0, lsr r2 + tst r1, #0x20000000 @ the sign bit + rsbne r0, r0, #0 + RET + +1: teq r2, #0xff000000 + bne 2f + movs r0, r0, lsl #8 + bne 3f @ r0 is NAN. +2: tst r1, #0x20000000 @ the sign bit + moveq r0, #0x7fffffff @ the maximum signed positive si + movne r0, #0x80000000 @ the maximum signed negative si + RET + +3: mov r0, #0 @ What should we convert NAN to? + RET + + +ARM_FUNC_START fixunssfsi + movs r0, r0, lsl #1 + RETc(eq) @ value is 0. + movcs r0, #0 + RETc(cs) @ value is negative. + + @ check exponent range. + and r2, r0, #0xff000000 + cmp r2, #(127 << 24) + movcc r0, #0 @ value is too small + RETc(cc) + cmp r2, #((127 + 32) << 24) + bcs 1f @ value is too large + + mov r0, r0, lsl #7 + orr r0, r0, #0x80000000 + mov r2, r2, lsr #24 + rsb r2, r2, #(127 + 31) + mov r0, r0, lsr r2 + RET + +1: teq r2, #0xff000000 + bne 2f + movs r0, r0, lsl #8 + bne 3b @ r0 is NAN. +2: mov r0, #0xffffffff @ maximum unsigned si + RET + + |