diff options
Diffstat (limited to 'bin86-0.3/bccfp/fdiv.x')
-rw-r--r-- | bin86-0.3/bccfp/fdiv.x | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/bin86-0.3/bccfp/fdiv.x b/bin86-0.3/bccfp/fdiv.x new file mode 100644 index 0000000..4a5cf74 --- /dev/null +++ b/bin86-0.3/bccfp/fdiv.x @@ -0,0 +1,312 @@ +#define EF_SIZE 4 + +! bcc 386 floating point routines (version 2) -- Fdiv, Fdivd, Fdivf +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + +#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE) + + .extern Fpushf + .extern fpdivzero + .extern fpoverflow + .extern fpunderflow + +! double Fdiv(double x, double y) returns x / y + +! pop 2 doubles from stack, divide first by second, and push quotient on stack + +! we denote upper and lower dwords of x and y (or their fractions) +! by (xu,xl), (yu,yl) + + .globl Fdivf + .align ALIGNMENT +Fdivf: + sub esp,#D_SIZE ! make space for dummy double on stack + push ebp + push edi ! save some regs + push esi + mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ... + mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot + call Fpushf + pop esi ! yl + pop edi ! yu + mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu + jmp division + + .globl Fdiv + .align ALIGNMENT +Fdiv: + push ebp + push edi ! save some regs + push esi + mov eax,FRAME_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_HIGH[esp] ! xu + mov esi,FRAME_SIZE+D_SIZE+D_LOW[esp] ! yl + mov edi,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! yu + jmp division + + .align ALIGNMENT +exp_y_0: + mov ebx,edi + or ebx,esi + beq zerodivide + mov ebx,#1 +fix_y: + test edi,edi ! XXX - sloow + js y_unpacked + shld edi,esi,#1 + shl esi,#1 + dec bx + jmp fix_y + + .align ALIGNMENT +exp_x_0: + mov ecx,edx + or ecx,eax + beq retz + mov ecx,#1 ! change exponent from 0 to 1 +fix_x: + test edx,#1 << (REG_BIT-1-2) ! XXX - sloow + jnz x_unpacked + shld edx,eax,#1 + shl eax,#1 + dec cx + jmp fix_x + +! Fdivd pops double from stack, divides it by double at [ebx], +! and pushes quotient back on stack + + .globl Fdivd + .align ALIGNMENT +Fdivd: + sub esp,#D_SIZE ! make space for dummy double on stack + push ebp + push edi ! save some regs + push esi + mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ... + mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot + mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu + mov esi,D_LOW[ebx] ! yl + mov edi,D_HIGH[ebx] ! yu + +division: + +! The full calculations are + +! (xu,xl,0) = yu * (zu,zl) + (0,r,0) (normal 96/32 -> 64 bit division) +! yl * zu = yu * q1 + r1 (32*32 -> 64 bit mul and 64/32 -> 32 bit div) + +! so + +! (xu,xl,0,0) = (yu,yl) * (zu,zl-q1) + (0,0,r-r1,yl*(q1-zl)) + +! where the calculations zl-q1, r-r1 and yl*(q1-zl) are more complicated +! than the notation suggests. They may be negative and the one with the +! multiplication may not fit in 32 bits and in both cases the overflow +! has to be moved into higher bit positions. + +! See Knuth for why (zu,zl-q1) is the correct 64-bit quotient to within +! 1 bit either way (assuming the normalization x < 2 * y). + +! We only need to calculate the remainder (0,0,r-r1,yl*(q1-zl)) to resolve +! tie cases. It tells whether the approximate quotient is too high or too +! low. + +#define NTEMPS 5 + + sub esp,#NTEMPS*GENREG_SIZE ! space to remember values for rounding of tie case + +! Offsets from esp for these values (offsets using FRAME_SIZE are invalid +! while these temps are active) +r = 0 +q1 = 4 +r1 = 8 +yl = 12 +zl = 16 + +! Step 1: unpack and normalize x to fraction in edx:eax (left shifted as +! far as possible less 2 so that x < y, and later z < y); unpack and normalize +! y to a fraction in edi:esi (left shifted as far as possible), put difference +! of signs (= sign of quotient) in ecx(D_SIGN_MASK) and difference of exponents +! (= exponent of quotient before normalization) in cx. + + mov ebp,edx ! xu + xor ebp,edi ! xu ^ yu + and ebp,#D_SIGN_MASK ! sign of result is difference of signs + +! Unpack y first to trap 0 / 0 + + mov ebx,edi ! remember yu for exponent of y + shld edi,esi,#D_BIT-D_FRAC_BIT ! extract fraction of y ... + shl esi,#D_BIT-D_FRAC_BIT + and ebx,#D_EXP_MASK ! exponent of y + jz exp_y_0 + shr ebx,#D_EXP_SHIFT ! in ebx (actually in bx, with high bits 0) + or edi,#D_NORM_MASK << (D_BIT-D_FRAC_BIT) ! normalize +y_unpacked: + +! Unpack x + + mov ecx,edx ! remember xu for exponent of x + shld edx,eax,#D_BIT-D_FRAC_BIT-2 ! extract fraction of x ... + shl eax,#D_BIT-D_FRAC_BIT-2 + and edx,#(D_NORM_MASK << (D_BIT-D_FRAC_BIT-2+1))-1 + ! XXX - above may be shifted 1 extra unnecessarily + and ecx,#D_EXP_MASK ! exponent of x + jz exp_x_0 + shr ecx,#D_EXP_SHIFT ! in ecx (actually in cx, with high bits 0) + or edx,#D_NORM_MASK << (D_BIT-D_FRAC_BIT-2) ! normalize +x_unpacked: + + sub cx,bx ! not ecx,ebx because we want to use high bit for sign + add cx,#D_EXP_BIAS ! adjust exponent of quotient + + or ecx,ebp ! include sign with exponent + +! Step 2: quotient of fractions -> (edx,eax) + +! 2a: (xu,xl,0) div yu = (zu,zl) -> (ebx,esi) + + div eax,edi ! (xu,xl) div yu = zu in eax; remainder (rem) in edx + mov ebx,eax ! save zu in ebx + sub eax,eax ! clear eax: (edx,eax) = (rem,0) + div eax,edi ! (rem,0) div yu = zl in eax + mov r[esp],edx + mov zl[esp],eax + xchg eax,esi ! store zl in esi; save yl in eax + mov yl[esp],eax + +! 2b: (yl * zu) div yu -> (0,eax) + + mul eax,ebx ! yl * zu -> (edx,eax) + div eax,edi ! (yl * zu) div yu in eax + mov q1[esp],eax + mov r1[esp],edx + +! 2c: (xu,xl) / (yu,yl) = (zu,zl) - (yl * zu) div yu -> (edx,eax) + + mov edx,ebx ! zu + xchg eax,esi ! eax <- zl; esi <- (yl * zu) div yu + sub eax,esi + sbb edx,#0 + +! Step 3: normalise quotient + + test edx,#1 << (REG_BIT-2) ! is fraction too small? (can only be by 1 bit) + jnz div4 + shld edx,eax,#1 ! yes; multiply fraction ... + shl eax,#1 ! ... by 2 ... + dec cx ! ... and decrement exponent + +! Step 4: shift and round + +div4: + mov ebx,eax ! save for rounding + shrd eax,edx,#D_BIT-D_FRAC_BIT-1 ! shift fraction of result ... + shr edx,#D_BIT-D_FRAC_BIT-1 ! ... to proper position + and ebx,#(1 << (D_BIT-D_FRAC_BIT-1))-1 ! look at bits shifted out + cmp ebx,#D_NORM_MASK >> (D_BIT-D_FRAC_BIT) ! compare with middle value + jb div5 ! below middle, don't round up + ja roundup ! above middle, round up + +! The low bits don't contain enough information to resolve the tie case, +! because the quotient itself is only an approximation. +! Calculate the exact remainder. +! This case is not very common, so don't worry much about speed. +! Unfortunately we had to save extra in all cases to prepare for it. + + push edx + push eax + + sub esi,esi ! the calculation requires 33 bits - carry to here + mov eax,2*GENREG_SIZE+q1[esp] + sub eax,2*GENREG_SIZE+zl[esp] + pushfd + mul dword EF_SIZE+2*GENREG_SIZE+yl[esp] + popfd + jnc foo + sub edx,2*GENREG_SIZE+yl[esp] + sbb esi,#0 +foo: + add edx,2*GENREG_SIZE+r[esp] + adc esi,#0 + sub edx,2*GENREG_SIZE+r1[esp] + sbb esi,#0 + mov ebx,eax + mov edi,edx + + pop eax + pop edx + +! Can finally decide rounding of tie case + + js div5 ! remainder < 0 from looking at top 64 bits + jnz roundup ! remainder > 0 from looking at top 64 bits + or edi,ebx ! test bottom 64 bits + jnz roundup ! remainder > 0 + + test al,#1 ! at last we know it is the tie case, check parity bit + jz div5 ! already even, otherwise round up to make even + +roundup: + add eax,#1 ! add rounding bit + adc edx,#0 + test edx,#D_NORM_MASK << 1 ! has fraction overflowed (very unlikely) + jz div5 +! Why were the shifts commented out? + shrd eax,edx,#1 ! yes, divide fraction ... + shr edx,#1 ! ... by 2 ... + inc cx ! ... and increment exponent + +! Step 5: put it all together + +div5: + mov ebx,ecx ! extract sign + and ebx,D_SIGN_MASK + cmp cx,#D_EXP_INFINITE ! is exponent too big? + jge overflow + test cx,cx + jle underflow + shl ecx,#D_EXP_SHIFT + + and edx,#D_FRAC_MASK ! remove norm bit + or edx,ecx ! include exponent ... + or edx,ebx ! ... and sign + +return: + add esp,#NTEMPS*GENREG_SIZE ! reclaim temp space + mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax ! "push" lower dword of product ... + mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx ! ... and upper dword + pop esi ! restore registers + pop edi + pop ebp + ret #D_SIZE + +retz: + sub edx,edx ! clear upper dword + sub eax,eax ! ... and lower dword + jmp return + +overflow: + mov edx,ecx ! put sign in usual reg + call fpoverflow + mov eax,ecx ! XXX - wrong reg + jmp return + +underflow: + mov esi,edx ! put upper part of fraction in usual reg + mov edx,ecx ! sign + movsx edi,cx ! put shift in usual reg + neg edi + inc edi + call fpunderflow + jmp return + +zerodivide: + mov edx,ebp ! sign + call fpdivzero + mov eax,ecx ! XXX - wrong reg + jmp return |