summaryrefslogtreecommitdiff
path: root/libc/i386fp/fdiv.x
diff options
context:
space:
mode:
Diffstat (limited to 'libc/i386fp/fdiv.x')
-rw-r--r--libc/i386fp/fdiv.x312
1 files changed, 312 insertions, 0 deletions
diff --git a/libc/i386fp/fdiv.x b/libc/i386fp/fdiv.x
new file mode 100644
index 0000000..4a5cf74
--- /dev/null
+++ b/libc/i386fp/fdiv.x
@@ -0,0 +1,312 @@
+#define EF_SIZE 4
+
+! bcc 386 floating point routines (version 2) -- Fdiv, Fdivd, Fdivf
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE)
+
+ .extern Fpushf
+ .extern fpdivzero
+ .extern fpoverflow
+ .extern fpunderflow
+
+! double Fdiv(double x, double y) returns x / y
+
+! pop 2 doubles from stack, divide first by second, and push quotient on stack
+
+! we denote upper and lower dwords of x and y (or their fractions)
+! by (xu,xl), (yu,yl)
+
+ .globl Fdivf
+ .align ALIGNMENT
+Fdivf:
+ sub esp,#D_SIZE ! make space for dummy double on stack
+ push ebp
+ push edi ! save some regs
+ push esi
+ mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ...
+ mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot
+ call Fpushf
+ pop esi ! yl
+ pop edi ! yu
+ mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu
+ jmp division
+
+ .globl Fdiv
+ .align ALIGNMENT
+Fdiv:
+ push ebp
+ push edi ! save some regs
+ push esi
+ mov eax,FRAME_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_HIGH[esp] ! xu
+ mov esi,FRAME_SIZE+D_SIZE+D_LOW[esp] ! yl
+ mov edi,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! yu
+ jmp division
+
+ .align ALIGNMENT
+exp_y_0:
+ mov ebx,edi
+ or ebx,esi
+ beq zerodivide
+ mov ebx,#1
+fix_y:
+ test edi,edi ! XXX - sloow
+ js y_unpacked
+ shld edi,esi,#1
+ shl esi,#1
+ dec bx
+ jmp fix_y
+
+ .align ALIGNMENT
+exp_x_0:
+ mov ecx,edx
+ or ecx,eax
+ beq retz
+ mov ecx,#1 ! change exponent from 0 to 1
+fix_x:
+ test edx,#1 << (REG_BIT-1-2) ! XXX - sloow
+ jnz x_unpacked
+ shld edx,eax,#1
+ shl eax,#1
+ dec cx
+ jmp fix_x
+
+! Fdivd pops double from stack, divides it by double at [ebx],
+! and pushes quotient back on stack
+
+ .globl Fdivd
+ .align ALIGNMENT
+Fdivd:
+ sub esp,#D_SIZE ! make space for dummy double on stack
+ push ebp
+ push edi ! save some regs
+ push esi
+ mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ...
+ mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot
+ mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu
+ mov esi,D_LOW[ebx] ! yl
+ mov edi,D_HIGH[ebx] ! yu
+
+division:
+
+! The full calculations are
+
+! (xu,xl,0) = yu * (zu,zl) + (0,r,0) (normal 96/32 -> 64 bit division)
+! yl * zu = yu * q1 + r1 (32*32 -> 64 bit mul and 64/32 -> 32 bit div)
+
+! so
+
+! (xu,xl,0,0) = (yu,yl) * (zu,zl-q1) + (0,0,r-r1,yl*(q1-zl))
+
+! where the calculations zl-q1, r-r1 and yl*(q1-zl) are more complicated
+! than the notation suggests. They may be negative and the one with the
+! multiplication may not fit in 32 bits and in both cases the overflow
+! has to be moved into higher bit positions.
+
+! See Knuth for why (zu,zl-q1) is the correct 64-bit quotient to within
+! 1 bit either way (assuming the normalization x < 2 * y).
+
+! We only need to calculate the remainder (0,0,r-r1,yl*(q1-zl)) to resolve
+! tie cases. It tells whether the approximate quotient is too high or too
+! low.
+
+#define NTEMPS 5
+
+ sub esp,#NTEMPS*GENREG_SIZE ! space to remember values for rounding of tie case
+
+! Offsets from esp for these values (offsets using FRAME_SIZE are invalid
+! while these temps are active)
+r = 0
+q1 = 4
+r1 = 8
+yl = 12
+zl = 16
+
+! Step 1: unpack and normalize x to fraction in edx:eax (left shifted as
+! far as possible less 2 so that x < y, and later z < y); unpack and normalize
+! y to a fraction in edi:esi (left shifted as far as possible), put difference
+! of signs (= sign of quotient) in ecx(D_SIGN_MASK) and difference of exponents
+! (= exponent of quotient before normalization) in cx.
+
+ mov ebp,edx ! xu
+ xor ebp,edi ! xu ^ yu
+ and ebp,#D_SIGN_MASK ! sign of result is difference of signs
+
+! Unpack y first to trap 0 / 0
+
+ mov ebx,edi ! remember yu for exponent of y
+ shld edi,esi,#D_BIT-D_FRAC_BIT ! extract fraction of y ...
+ shl esi,#D_BIT-D_FRAC_BIT
+ and ebx,#D_EXP_MASK ! exponent of y
+ jz exp_y_0
+ shr ebx,#D_EXP_SHIFT ! in ebx (actually in bx, with high bits 0)
+ or edi,#D_NORM_MASK << (D_BIT-D_FRAC_BIT) ! normalize
+y_unpacked:
+
+! Unpack x
+
+ mov ecx,edx ! remember xu for exponent of x
+ shld edx,eax,#D_BIT-D_FRAC_BIT-2 ! extract fraction of x ...
+ shl eax,#D_BIT-D_FRAC_BIT-2
+ and edx,#(D_NORM_MASK << (D_BIT-D_FRAC_BIT-2+1))-1
+ ! XXX - above may be shifted 1 extra unnecessarily
+ and ecx,#D_EXP_MASK ! exponent of x
+ jz exp_x_0
+ shr ecx,#D_EXP_SHIFT ! in ecx (actually in cx, with high bits 0)
+ or edx,#D_NORM_MASK << (D_BIT-D_FRAC_BIT-2) ! normalize
+x_unpacked:
+
+ sub cx,bx ! not ecx,ebx because we want to use high bit for sign
+ add cx,#D_EXP_BIAS ! adjust exponent of quotient
+
+ or ecx,ebp ! include sign with exponent
+
+! Step 2: quotient of fractions -> (edx,eax)
+
+! 2a: (xu,xl,0) div yu = (zu,zl) -> (ebx,esi)
+
+ div eax,edi ! (xu,xl) div yu = zu in eax; remainder (rem) in edx
+ mov ebx,eax ! save zu in ebx
+ sub eax,eax ! clear eax: (edx,eax) = (rem,0)
+ div eax,edi ! (rem,0) div yu = zl in eax
+ mov r[esp],edx
+ mov zl[esp],eax
+ xchg eax,esi ! store zl in esi; save yl in eax
+ mov yl[esp],eax
+
+! 2b: (yl * zu) div yu -> (0,eax)
+
+ mul eax,ebx ! yl * zu -> (edx,eax)
+ div eax,edi ! (yl * zu) div yu in eax
+ mov q1[esp],eax
+ mov r1[esp],edx
+
+! 2c: (xu,xl) / (yu,yl) = (zu,zl) - (yl * zu) div yu -> (edx,eax)
+
+ mov edx,ebx ! zu
+ xchg eax,esi ! eax <- zl; esi <- (yl * zu) div yu
+ sub eax,esi
+ sbb edx,#0
+
+! Step 3: normalise quotient
+
+ test edx,#1 << (REG_BIT-2) ! is fraction too small? (can only be by 1 bit)
+ jnz div4
+ shld edx,eax,#1 ! yes; multiply fraction ...
+ shl eax,#1 ! ... by 2 ...
+ dec cx ! ... and decrement exponent
+
+! Step 4: shift and round
+
+div4:
+ mov ebx,eax ! save for rounding
+ shrd eax,edx,#D_BIT-D_FRAC_BIT-1 ! shift fraction of result ...
+ shr edx,#D_BIT-D_FRAC_BIT-1 ! ... to proper position
+ and ebx,#(1 << (D_BIT-D_FRAC_BIT-1))-1 ! look at bits shifted out
+ cmp ebx,#D_NORM_MASK >> (D_BIT-D_FRAC_BIT) ! compare with middle value
+ jb div5 ! below middle, don't round up
+ ja roundup ! above middle, round up
+
+! The low bits don't contain enough information to resolve the tie case,
+! because the quotient itself is only an approximation.
+! Calculate the exact remainder.
+! This case is not very common, so don't worry much about speed.
+! Unfortunately we had to save extra in all cases to prepare for it.
+
+ push edx
+ push eax
+
+ sub esi,esi ! the calculation requires 33 bits - carry to here
+ mov eax,2*GENREG_SIZE+q1[esp]
+ sub eax,2*GENREG_SIZE+zl[esp]
+ pushfd
+ mul dword EF_SIZE+2*GENREG_SIZE+yl[esp]
+ popfd
+ jnc foo
+ sub edx,2*GENREG_SIZE+yl[esp]
+ sbb esi,#0
+foo:
+ add edx,2*GENREG_SIZE+r[esp]
+ adc esi,#0
+ sub edx,2*GENREG_SIZE+r1[esp]
+ sbb esi,#0
+ mov ebx,eax
+ mov edi,edx
+
+ pop eax
+ pop edx
+
+! Can finally decide rounding of tie case
+
+ js div5 ! remainder < 0 from looking at top 64 bits
+ jnz roundup ! remainder > 0 from looking at top 64 bits
+ or edi,ebx ! test bottom 64 bits
+ jnz roundup ! remainder > 0
+
+ test al,#1 ! at last we know it is the tie case, check parity bit
+ jz div5 ! already even, otherwise round up to make even
+
+roundup:
+ add eax,#1 ! add rounding bit
+ adc edx,#0
+ test edx,#D_NORM_MASK << 1 ! has fraction overflowed (very unlikely)
+ jz div5
+! Why were the shifts commented out?
+ shrd eax,edx,#1 ! yes, divide fraction ...
+ shr edx,#1 ! ... by 2 ...
+ inc cx ! ... and increment exponent
+
+! Step 5: put it all together
+
+div5:
+ mov ebx,ecx ! extract sign
+ and ebx,D_SIGN_MASK
+ cmp cx,#D_EXP_INFINITE ! is exponent too big?
+ jge overflow
+ test cx,cx
+ jle underflow
+ shl ecx,#D_EXP_SHIFT
+
+ and edx,#D_FRAC_MASK ! remove norm bit
+ or edx,ecx ! include exponent ...
+ or edx,ebx ! ... and sign
+
+return:
+ add esp,#NTEMPS*GENREG_SIZE ! reclaim temp space
+ mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax ! "push" lower dword of product ...
+ mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx ! ... and upper dword
+ pop esi ! restore registers
+ pop edi
+ pop ebp
+ ret #D_SIZE
+
+retz:
+ sub edx,edx ! clear upper dword
+ sub eax,eax ! ... and lower dword
+ jmp return
+
+overflow:
+ mov edx,ecx ! put sign in usual reg
+ call fpoverflow
+ mov eax,ecx ! XXX - wrong reg
+ jmp return
+
+underflow:
+ mov esi,edx ! put upper part of fraction in usual reg
+ mov edx,ecx ! sign
+ movsx edi,cx ! put shift in usual reg
+ neg edi
+ inc edi
+ call fpunderflow
+ jmp return
+
+zerodivide:
+ mov edx,ebp ! sign
+ call fpdivzero
+ mov eax,ecx ! XXX - wrong reg
+ jmp return