summaryrefslogtreecommitdiff
path: root/libgcc/config/rl78/fpmath-sf.S
diff options
context:
space:
mode:
Diffstat (limited to 'libgcc/config/rl78/fpmath-sf.S')
-rw-r--r--libgcc/config/rl78/fpmath-sf.S1036
1 files changed, 1036 insertions, 0 deletions
diff --git a/libgcc/config/rl78/fpmath-sf.S b/libgcc/config/rl78/fpmath-sf.S
new file mode 100644
index 0000000000..6d4d4bd897
--- /dev/null
+++ b/libgcc/config/rl78/fpmath-sf.S
@@ -0,0 +1,1036 @@
+; SF format is:
+;
+; [sign] 1.[23bits] E[8bits(n-127)]
+;
+; SEEEEEEE Emmmmmmm mmmmmmmm mmmmmmmm
+;
+; [A+0] mmmmmmmm
+; [A+1] mmmmmmmm
+; [A+2] Emmmmmmm
+; [A+3] SEEEEEEE
+;
+; Special values (xxx != 0):
+;
+; r11 r10 r9 r8
+; [HL+3] [HL+2] [HL+1] [HL+0]
+; s1111111 10000000 00000000 00000000 infinity
+; s1111111 1xxxxxxx xxxxxxxx xxxxxxxx NaN
+; s0000000 00000000 00000000 00000000 zero
+; s0000000 0xxxxxxx xxxxxxxx xxxxxxxx denormals
+;
+; Note that CMPtype is "signed char" for rl78
+;
+
+#include "vregs.h"
+
+#define Z PSW.6
+
+; External Functions:
+;
+; __int_isnan [HL] -> Z if NaN
+; __int_iszero [HL] -> Z if zero
+
+START_FUNC __int_isinf
+ ;; [HL] points to value, returns Z if it's #Inf
+
+ mov a, [hl+2]
+ and a, #0x80
+ mov x, a
+ mov a, [hl+3]
+ and a, #0x7f
+ cmpw ax, #0x7f80
+ skz
+ ret ; return NZ if not NaN
+ mov a, [hl+2]
+ and a, #0x7f
+ or a, [hl+1]
+ or a, [hl]
+ ret
+
+END_FUNC __int_isinf
+
+#define A_SIGN [hl+0] /* byte */
+#define A_EXP [hl+2] /* word */
+#define A_FRAC_L [hl+4] /* word */
+#define A_FRAC_LH [hl+5] /* byte */
+#define A_FRAC_H [hl+6] /* word or byte */
+#define A_FRAC_HH [hl+7] /* byte */
+
+#define B_SIGN [hl+8]
+#define B_EXP [hl+10]
+#define B_FRAC_L [hl+12]
+#define B_FRAC_LH [hl+13]
+#define B_FRAC_H [hl+14]
+#define B_FRAC_HH [hl+15]
+
+START_FUNC _int_unpack_sf
+ ;; convert 32-bit SFmode [DE] to 6-byte struct [HL] ("A")
+
+ mov a, [de+3]
+ sar a, 7
+ mov A_SIGN, a
+
+ movw ax, [de+2]
+ and a, #0x7f
+ shrw ax, 7
+ movw bc, ax ; remember if the exponent is all zeros
+ subw ax, #127 ; exponent is now non-biased
+ movw A_EXP, ax
+
+ movw ax, [de]
+ movw A_FRAC_L, ax
+
+ mov a, [de+2]
+ and a, #0x7f
+ cmp0 c ; if the exp is all zeros, it's denormal
+ skz
+ or a, #0x80
+ mov A_FRAC_H, a
+
+ mov a, #0
+ mov A_FRAC_HH, a
+
+ ;; rounding-bit-shift
+ movw ax, A_FRAC_L
+ shlw ax, 1
+ movw A_FRAC_L, ax
+ mov a, A_FRAC_H
+ rolc a, 1
+ mov A_FRAC_H, a
+ mov a, A_FRAC_HH
+ rolc a, 1
+ mov A_FRAC_HH, a
+
+ ret
+
+END_FUNC _int_unpack_sf
+
+; func(SF a,SF b)
+; [SP+4..7] a
+; [SP+8..11] b
+
+START_FUNC ___subsf3
+
+ ;; a - b => a + (-b)
+
+ ;; Note - we cannot just change the sign of B on the stack and
+ ;; then fall through into __addsf3. The stack'ed value may be
+ ;; used again (it was created by our caller after all). Instead
+ ;; we have to allocate some stack space of our own, copy A and B,
+ ;; change the sign of B, call __addsf3, release the allocated stack
+ ;; and then return.
+
+ subw sp, #8
+ movw ax, [sp+4+8]
+ movw [sp], ax
+ movw ax, [sp+4+2+8]
+ movw [sp+2], ax
+ movw ax, [sp+4+4+8]
+ movw [sp+4], ax
+ mov a, [sp+4+6+8]
+ mov [sp+6], a
+ mov a, [sp+4+7+8]
+ xor a, #0x80
+ mov [sp+7], a
+ call $!___addsf3
+ addw sp, #8
+ ret
+END_FUNC ___subsf3
+
+START_FUNC ___addsf3
+
+ ;; if (isnan(a)) return a
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+ret_a:
+ movw ax, [sp+4]
+ movw r8, ax
+ movw ax, [sp+6]
+ movw r10, ax
+ ret
+
+1: ;; if (isnan (b)) return b;
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_isnan
+ bnz $2f
+ret_b:
+ movw ax, [sp+8]
+ movw r8, ax
+ movw ax, [sp+10]
+ movw r10, ax
+ ret
+
+2: ;; if (isinf (a))
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call $!__int_isinf
+ bnz $3f
+
+ ;; if (isinf (b) && a->sign != b->sign) return NaN
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bnz $ret_a
+
+ mov a, [sp+7]
+ mov h, a
+ mov a, [sp+11]
+ xor a, h
+ bf a.7, $ret_a
+
+ movw r8, #0x0001
+ movw r10, #0x7f80
+ ret
+
+3: ;; if (isinf (b)) return b;
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bz $ret_b
+
+ ;; if (iszero (b))
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bnz $4f
+
+ ;; if (iszero (a))
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bnz $ret_a
+
+ movw ax, [sp+4]
+ movw r8, ax
+ mov a, [sp+7]
+ mov h, a
+ movw ax, [sp+10]
+ and a, h
+ movw r10, ax
+ ret
+
+4: ;; if (iszero (a)) return b;
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bz $ret_b
+
+; Normalize the two numbers relative to each other. At this point,
+; we need the numbers converted to their "unpacked" format.
+
+ subw sp, #16 ; Save room for two unpacked values.
+
+ movw ax, sp
+ movw hl, ax
+ addw ax, #16+4
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ addw ax, #16+8-8
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; diff = a.exponent - b.exponent
+ movw ax, B_EXP ; sign/exponent word
+ movw bc, ax
+ movw ax, A_EXP ; sign/exponent word
+
+ subw ax, bc ; a = a.exp - b.exp
+ movw de, ax ; d = sdiff
+
+ ;; if (diff < 0) diff = -diff
+ bf a.7, $1f
+ xor a, #0xff
+ xor r_0, #0xff ; x
+ incw ax ; a = diff
+1:
+ ;; if (diff >= 23) zero the smaller one
+ cmpw ax, #24
+ bc $.L661 ; if a < 23 goto 661
+
+ ;; zero out the smaller one
+
+ movw ax, de
+ bt a.7, $1f ; if sdiff < 0 (a_exp < b_exp) goto 1f
+ ;; "zero out" b
+ movw ax, A_EXP
+ movw B_EXP, ax
+ movw ax, #0
+ movw B_FRAC_L, ax
+ movw B_FRAC_H, ax
+ br $5f
+1:
+ ;; "zero out" a
+ movw ax, B_EXP
+ movw A_EXP, ax
+ movw ax, #0
+ movw A_FRAC_L, ax
+ movw A_FRAC_H, ax
+
+ br $5f
+.L661:
+ ;; shift the smaller one so they have the same exponents
+1:
+ movw ax, de
+ bt a.7, $1f
+ cmpw ax, #0 ; sdiff > 0
+ bnh $1f ; if (sdiff <= 0) goto 1f
+
+ decw de
+ incw B_EXP ; because it's [HL+byte]
+
+ movw ax, B_FRAC_H
+ shrw ax, 1
+ movw B_FRAC_H, ax
+ mov a, B_FRAC_LH
+ rorc a, 1
+ mov B_FRAC_LH, a
+ mov a, B_FRAC_L
+ rorc a, 1
+ mov B_FRAC_L, a
+
+ br $1b
+1:
+ movw ax, de
+ bf a.7, $1f
+
+ incw de
+ incw A_EXP ; because it's [HL+byte]
+
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_L
+ rorc a, 1
+ mov A_FRAC_L, a
+
+ br $1b
+1:
+
+5: ;; At this point, A and B have the same exponent.
+
+ mov a, A_SIGN
+ cmp a, B_SIGN
+ bnz $1f
+
+ ;; Same sign, just add.
+ movw ax, A_FRAC_L
+ addw ax, B_FRAC_L
+ movw A_FRAC_L, ax
+ mov a, A_FRAC_H
+ addc a, B_FRAC_H
+ mov A_FRAC_H, a
+ mov a, A_FRAC_HH
+ addc a, B_FRAC_HH
+ mov A_FRAC_HH, a
+
+ br $.L728
+
+1: ;; Signs differ - A has A_SIGN still.
+ bf a.7, $.L696
+
+ ;; A is negative, do B-A
+ movw ax, B_FRAC_L
+ subw ax, A_FRAC_L
+ movw A_FRAC_L, ax
+ mov a, B_FRAC_H
+ subc a, A_FRAC_H
+ mov A_FRAC_H, a
+ mov a, B_FRAC_HH
+ subc a, A_FRAC_HH
+ mov A_FRAC_HH, a
+
+ br $.L698
+.L696:
+ ;; B is negative, do A-B
+ movw ax, A_FRAC_L
+ subw ax, B_FRAC_L
+ movw A_FRAC_L, ax
+ mov a, A_FRAC_H
+ subc a, B_FRAC_H
+ mov A_FRAC_H, a
+ mov a, A_FRAC_HH
+ subc a, B_FRAC_HH
+ mov A_FRAC_HH, a
+
+.L698:
+ ;; A is still A_FRAC_HH
+ bt a.7, $.L706
+
+ ;; subtraction was positive
+ mov a, #0
+ mov A_SIGN, a
+ br $.L712
+
+.L706:
+ ;; subtraction was negative
+ mov a, #0xff
+ mov A_SIGN, a
+
+ ;; This negates A_FRAC
+ mov a, A_FRAC_L
+ xor a, #0xff ; XOR doesn't mess with carry
+ add a, #1 ; INC doesn't set the carry
+ mov A_FRAC_L, a
+ mov a, A_FRAC_LH
+ xor a, #0xff
+ addc a, #0
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_H
+ xor a, #0xff
+ addc a, #0
+ mov A_FRAC_H, a
+ mov a, A_FRAC_HH
+ xor a, #0xff
+ addc a, #0
+ mov A_FRAC_HH, a
+
+.L712:
+ ;; Renormalize the subtraction
+
+ mov a, A_FRAC_L
+ or a, A_FRAC_LH
+ or a, A_FRAC_H
+ or a, A_FRAC_HH
+ bz $.L728
+
+ ;; Mantissa is not zero, left shift until the MSB is in the
+ ;; right place
+1:
+ movw ax, A_FRAC_H
+ cmpw ax, #0x0200
+ bnc $.L728
+
+ decw A_EXP
+
+ movw ax, A_FRAC_L
+ shlw ax, 1
+ movw A_FRAC_L, ax
+ movw ax, A_FRAC_H
+ rolwc ax, 1
+ movw A_FRAC_H, ax
+ br $1b
+
+.L728:
+ ;; normalize A and pack it
+
+ movw ax, A_FRAC_H
+ cmpw ax, #0x01ff
+ bnh $1f
+ ;; overflow in the mantissa; adjust
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_L
+ rorc a, 1
+ mov A_FRAC_L, a
+ incw A_EXP
+1:
+
+ call $!__rl78_int_pack_a_r8
+ addw sp, #16
+ ret
+
+END_FUNC ___addsf3
+
+START_FUNC __rl78_int_pack_a_r8
+ ;; pack A to R8
+ movw ax, A_EXP
+ addw ax, #126 ; not 127, we want the "bt/bf" test to check for denormals
+
+ bf a.7, $1f
+ ;; make a denormal
+2:
+ movw bc, ax
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_L
+ rorc a, 1
+ mov A_FRAC_L, a
+ movw ax, bc
+ incw ax
+ bt a.7, $2b
+ decw ax
+1:
+ incw ax ; now it's as if we added 127
+ movw A_EXP, ax
+
+ cmpw ax, #0xfe
+ bnh $1f
+ ;; store #Inf instead
+ mov a, A_SIGN
+ or a, #0x7f
+ mov x, #0x80
+ movw r10, ax
+ movw r8, #0
+ ret
+
+1:
+ bf a.7, $1f ; note AX has EXP at top of loop
+ ;; underflow, denormal?
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ movw A_FRAC_LH, ax
+ mov a, A_FRAC_L
+ rorc a, 1
+ movw A_FRAC_L, ax
+ incw A_EXP
+ movw ax, A_EXP
+ br $1b
+
+1:
+ ;; undo the rounding-bit-shift
+ mov a, A_FRAC_L
+ bf a.0, $1f
+ ;; round up
+ movw ax, A_FRAC_L
+ addw ax, #1
+ movw A_FRAC_L, ax
+ bnc $1f
+ incw A_FRAC_H
+
+ ;; If the rounding set the bit beyond the end of the fraction, increment the exponent.
+ mov a, A_FRAC_HH
+ bf a.1, $1f
+ incw A_EXP
+
+1:
+ movw ax, A_FRAC_H
+ shrw ax, 1
+ movw A_FRAC_H, ax
+ mov a, A_FRAC_LH
+ rorc a, 1
+ mov A_FRAC_LH, a
+ mov a, A_FRAC_L
+ rorc a, 1
+ mov A_FRAC_L, a
+
+ movw ax, A_FRAC_L
+ movw r8, ax
+
+ or a, x
+ or a, A_FRAC_H
+ or a, A_FRAC_HH
+ bnz $1f
+ movw ax, #0
+ movw A_EXP, ax
+1:
+ mov a, A_FRAC_H
+ and a, #0x7f
+ mov b, a
+ mov a, A_EXP
+ shl a, 7
+ or a, b
+ mov r10, a
+
+ mov a, A_SIGN
+ and a, #0x80
+ mov b, a
+ mov a, A_EXP
+ shr a, 1
+ or a, b
+ mov r11, a
+
+ ret
+END_FUNC __rl78_int_pack_a_r8
+
+START_FUNC ___mulsf3
+
+ ;; if (isnan(a)) return a
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+mret_a:
+ movw ax, [sp+4]
+ movw r8, ax
+ mov a, [sp+11]
+ and a, #0x80
+ mov b, a
+ movw ax, [sp+6]
+ xor a, b ; sign is always a ^ b
+ movw r10, ax
+ ret
+1:
+ ;; if (isnan (b)) return b;
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+mret_b:
+ movw ax, [sp+8]
+ movw r8, ax
+ mov a, [sp+7]
+ and a, #0x80
+ mov b, a
+ movw ax, [sp+10]
+ xor a, b ; sign is always a ^ b
+ movw r10, ax
+ ret
+1:
+ ;; if (isinf (a)) return (b==0) ? nan : a
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call $!__int_isinf
+ bnz $.L805
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bnz $mret_a
+
+ movw r8, #0x0001 ; return NaN
+ movw r10, #0x7f80
+ ret
+
+.L805:
+ ;; if (isinf (b)) return (a==0) ? nan : b
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bnz $.L814
+
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bnz $mret_b
+
+ movw r8, #0x0001 ; return NaN
+ movw r10, #0x7f80
+ ret
+
+.L814:
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bz $mret_a
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bz $mret_b
+
+ ;; at this point, we're doing the multiplication.
+
+ subw sp, #16 ; save room for two unpacked values
+
+ movw ax, sp
+ movw hl, ax
+ addw ax, #16+4
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ addw ax, #16+8-8
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; multiply SI a.FRAC * SI b.FRAC to DI r8
+
+ subw sp, #16
+ movw ax, A_FRAC_L
+ movw [sp+0], ax
+ movw ax, A_FRAC_H
+ movw [sp+2], ax
+
+ movw ax, B_FRAC_L
+ movw [sp+8], ax
+ movw ax, B_FRAC_H
+ movw [sp+10], ax
+
+ movw ax, #0
+ movw [sp+4], ax
+ movw [sp+6], ax
+ movw [sp+12], ax
+ movw [sp+14], ax
+
+ call !!___muldi3 ; MTMPa * MTMPb -> R8..R15
+ addw sp, #16
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; add the exponents together
+ movw ax, A_EXP
+ addw ax, B_EXP
+ movw bc, ax ; exponent in BC
+
+ ;; now, re-normalize the DI value in R8..R15 to have the
+ ;; MSB in the "right" place, adjusting BC as we shift it.
+
+ ;; The value will normally be in this range:
+ ;; R15 R8
+ ;; 0001_0000_0000_0000
+ ;; 0003_ffff_fc00_0001
+
+ ;; so to speed it up, we normalize to:
+ ;; 0001_xxxx_xxxx_xxxx
+ ;; then extract the bytes we want (r11-r14)
+
+1:
+ mov a, r15
+ cmp0 a
+ bnz $2f
+ mov a, r14
+ and a, #0xfe
+ bz $1f
+2:
+ ;; shift right, inc exponent
+ movw ax, r14
+ shrw ax, 1
+ movw r14, ax
+ mov a, r13
+ rorc a, 1
+ mov r13, a
+ mov a, r12
+ rorc a, 1
+ mov r12, a
+ mov a, r11
+ rorc a, 1
+ mov r11, a
+ ;; we don't care about r8/r9/r10 if we're shifting this way
+ incw bc
+ br $1b
+1:
+ mov a, r15
+ or a, r14
+ bnz $1f
+ ;; shift left, dec exponent
+ movw ax, r8
+ shlw ax, 1
+ movw r8, ax
+ movw ax, r10
+ rolwc ax, 1
+ movw r10, ax
+ movw ax, r12
+ rolwc ax, 1
+ movw r12, ax
+ movw ax, r14
+ rolwc ax, 1
+ movw r14, ax
+ decw bc
+ br $1b
+1:
+ ;; at this point, FRAC is in R11..R14 and EXP is in BC
+ movw ax, bc
+ movw A_EXP, ax
+
+ mov a, r11
+ mov A_FRAC_L, a
+ mov a, r12
+ mov A_FRAC_LH, a
+ mov a, r13
+ mov A_FRAC_H, a
+ mov a, r14
+ mov A_FRAC_HH, a
+
+ mov a, A_SIGN
+ xor a, B_SIGN
+ mov A_SIGN, a
+
+ call $!__rl78_int_pack_a_r8
+
+ addw sp, #16
+ ret
+
+END_FUNC ___mulsf3
+
+START_FUNC ___divsf3
+
+ ;; if (isnan(a)) return a
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+dret_a:
+ movw ax, [sp+4]
+ movw r8, ax
+ mov a, [sp+11]
+ and a, #0x80
+ mov b, a
+ movw ax, [sp+6]
+ xor a, b ; sign is always a ^ b
+ movw r10, ax
+ ret
+1:
+ ;; if (isnan (b)) return b;
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_isnan
+ bnz $1f
+dret_b:
+ movw ax, [sp+8]
+ movw r8, ax
+ mov a, [sp+7]
+ and a, #0x80
+ mov b, a
+ movw ax, [sp+10]
+ xor a, b ; sign is always a ^ b
+ movw r10, ax
+ ret
+1:
+
+ ;; if (isinf (a)) return isinf(b) ? nan : a
+
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call $!__int_isinf
+ bnz $1f
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bnz $dret_a
+dret_nan:
+ movw r8, #0x0001 ; return NaN
+ movw r10, #0x7f80
+ ret
+
+1:
+
+ ;; if (iszero (a)) return iszero(b) ? nan : a
+
+ movw ax, sp
+ addw ax, #4
+ movw hl, ax
+ call !!__int_iszero
+ bnz $1f
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bnz $dret_a
+ br $dret_nan
+
+1:
+ ;; if (isinf (b)) return 0
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call $!__int_isinf
+ bnz $1f
+
+ mov a, [sp+7]
+ mov b, a
+ mov a, [sp+11]
+ xor a, b
+ and a, #0x80
+ mov r11, a
+ movw r8, #0
+ mov r10, #0
+ ret
+
+1:
+ ;; if (iszero (b)) return Inf
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ call !!__int_iszero
+ bnz $1f
+
+ mov a, [sp+7]
+ mov b, a
+ mov a, [sp+11]
+ xor a, b
+ or a, #0x7f
+ mov r11, a
+ movw r8, #0
+ mov r10, #0x80
+ ret
+1:
+
+ ;; at this point, we're doing the division. Normalized
+ ;; mantissas look like:
+ ;; 01.xx.xx.xx
+ ;; so we divide:
+ ;; 01.xx.xx.xx.00.00.00.00
+ ;; by 01.xx.xx.xx
+ ;; to get approx 00.80.00.00.00 to 01.ff.ff.ff.00
+
+
+ subw sp, #16 ; save room for two unpacked values
+
+ movw ax, sp
+ movw hl, ax
+ addw ax, #16+4
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ addw ax, #8
+ movw hl, ax
+ addw ax, #16+8-8
+ movw de, ax
+ call $!_int_unpack_sf
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; divide DI a.FRAC / SI b.FRAC to DI r8
+
+ subw sp, #16
+ movw ax, A_FRAC_L
+ movw [sp+4], ax
+ movw ax, A_FRAC_H
+ movw [sp+6], ax
+
+ movw ax, B_FRAC_L
+ movw [sp+8], ax
+ movw ax, B_FRAC_H
+ movw [sp+10], ax
+
+ movw ax, #0
+ movw [sp+0], ax
+ movw [sp+2], ax
+ movw [sp+12], ax
+ movw [sp+14], ax
+
+ call !!___divdi3 ; MTMPa / MTMPb -> R8..R15
+ addw sp, #16
+
+ movw ax, sp
+ movw hl, ax
+
+ ;; subtract the exponents A - B
+ movw ax, A_EXP
+ subw ax, B_EXP
+ movw bc, ax ; exponent in BC
+
+ ;; now, re-normalize the DI value in R8..R15 to have the
+ ;; MSB in the "right" place, adjusting BC as we shift it.
+
+ ;; The value will normally be in this range:
+ ;; R15 R8
+ ;; 0000_0000_8000_0000
+ ;; 0000_0001_ffff_ff00
+
+ ;; so to speed it up, we normalize to:
+ ;; 0000_0001_xxxx_xxxx
+ ;; then extract the bytes we want (r9-r12)
+
+1:
+ movw ax, r14
+ cmpw ax, #0
+ bnz $2f
+ movw ax, r12
+ cmpw ax, #1
+ bnh $1f
+2:
+ ;; shift right, inc exponent
+ movw ax, r14
+ shrw ax, 1
+ movw r14, ax
+ mov a, r13
+ rorc a, 1
+ mov r13, a
+ mov a, r12
+ rorc a, 1
+ mov r12, a
+ mov a, r11
+ rorc a, 1
+ mov r11, a
+ mov a, r10
+ rorc a, 1
+ mov r10, a
+ mov a, r9
+ rorc a, 1
+ mov r9, a
+ mov a, r8
+ rorc a, 1
+ mov r8, a
+
+ incw bc
+ br $1b
+1:
+ ;; the previous loop leaves r15.r13 zero
+ mov a, r12
+ cmp0 a
+ bnz $1f
+ ;; shift left, dec exponent
+ movw ax, r8
+ shlw ax, 1
+ movw r8, ax
+ movw ax, r10
+ rolwc ax, 1
+ movw r10, ax
+ movw ax, r12
+ rolwc ax, 1
+ movw r12, ax
+ ;; don't need to do r14
+ decw bc
+ br $1b
+1:
+ ;; at this point, FRAC is in R8..R11 and EXP is in BC
+ movw ax, bc
+ movw A_EXP, ax
+
+ mov a, r9
+ mov A_FRAC_L, a
+ mov a, r10
+ mov A_FRAC_LH, a
+ mov a, r11
+ mov A_FRAC_H, a
+ mov a, r12
+ mov A_FRAC_HH, a
+
+ mov a, A_SIGN
+ xor a, B_SIGN
+ mov A_SIGN, a
+
+ call $!__rl78_int_pack_a_r8
+
+ addw sp, #16
+ ret
+
+END_FUNC ___divsf3