#define EF_SIZE 4 ! bcc 386 floating point routines (version 2) -- Fdiv, Fdivd, Fdivf ! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans #include "fplib.h" #define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE) .extern Fpushf .extern fpdivzero .extern fpoverflow .extern fpunderflow ! double Fdiv(double x, double y) returns x / y ! pop 2 doubles from stack, divide first by second, and push quotient on stack ! we denote upper and lower dwords of x and y (or their fractions) ! by (xu,xl), (yu,yl) .globl Fdivf .align ALIGNMENT Fdivf: sub esp,#D_SIZE ! make space for dummy double on stack push ebp push edi ! save some regs push esi mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ... mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot call Fpushf pop esi ! yl pop edi ! yu mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu jmp division .globl Fdiv .align ALIGNMENT Fdiv: push ebp push edi ! save some regs push esi mov eax,FRAME_SIZE+D_LOW[esp] ! xl mov edx,FRAME_SIZE+D_HIGH[esp] ! xu mov esi,FRAME_SIZE+D_SIZE+D_LOW[esp] ! yl mov edi,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! yu jmp division .align ALIGNMENT exp_y_0: mov ebx,edi or ebx,esi beq zerodivide mov ebx,#1 fix_y: test edi,edi ! XXX - sloow js y_unpacked shld edi,esi,#1 shl esi,#1 dec bx jmp fix_y .align ALIGNMENT exp_x_0: mov ecx,edx or ecx,eax beq retz mov ecx,#1 ! change exponent from 0 to 1 fix_x: test edx,#1 << (REG_BIT-1-2) ! XXX - sloow jnz x_unpacked shld edx,eax,#1 shl eax,#1 dec cx jmp fix_x ! Fdivd pops double from stack, divides it by double at [ebx], ! and pushes quotient back on stack .globl Fdivd .align ALIGNMENT Fdivd: sub esp,#D_SIZE ! make space for dummy double on stack push ebp push edi ! save some regs push esi mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ... mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu mov esi,D_LOW[ebx] ! yl mov edi,D_HIGH[ebx] ! yu division: ! The full calculations are ! (xu,xl,0) = yu * (zu,zl) + (0,r,0) (normal 96/32 -> 64 bit division) ! yl * zu = yu * q1 + r1 (32*32 -> 64 bit mul and 64/32 -> 32 bit div) ! so ! (xu,xl,0,0) = (yu,yl) * (zu,zl-q1) + (0,0,r-r1,yl*(q1-zl)) ! where the calculations zl-q1, r-r1 and yl*(q1-zl) are more complicated ! than the notation suggests. They may be negative and the one with the ! multiplication may not fit in 32 bits and in both cases the overflow ! has to be moved into higher bit positions. ! See Knuth for why (zu,zl-q1) is the correct 64-bit quotient to within ! 1 bit either way (assuming the normalization x < 2 * y). ! We only need to calculate the remainder (0,0,r-r1,yl*(q1-zl)) to resolve ! tie cases. It tells whether the approximate quotient is too high or too ! low. #define NTEMPS 5 sub esp,#NTEMPS*GENREG_SIZE ! space to remember values for rounding of tie case ! Offsets from esp for these values (offsets using FRAME_SIZE are invalid ! while these temps are active) r = 0 q1 = 4 r1 = 8 yl = 12 zl = 16 ! Step 1: unpack and normalize x to fraction in edx:eax (left shifted as ! far as possible less 2 so that x < y, and later z < y); unpack and normalize ! y to a fraction in edi:esi (left shifted as far as possible), put difference ! of signs (= sign of quotient) in ecx(D_SIGN_MASK) and difference of exponents ! (= exponent of quotient before normalization) in cx. mov ebp,edx ! xu xor ebp,edi ! xu ^ yu and ebp,#D_SIGN_MASK ! sign of result is difference of signs ! Unpack y first to trap 0 / 0 mov ebx,edi ! remember yu for exponent of y shld edi,esi,#D_BIT-D_FRAC_BIT ! extract fraction of y ... shl esi,#D_BIT-D_FRAC_BIT and ebx,#D_EXP_MASK ! exponent of y jz exp_y_0 shr ebx,#D_EXP_SHIFT ! in ebx (actually in bx, with high bits 0) or edi,#D_NORM_MASK << (D_BIT-D_FRAC_BIT) ! normalize y_unpacked: ! Unpack x mov ecx,edx ! remember xu for exponent of x shld edx,eax,#D_BIT-D_FRAC_BIT-2 ! extract fraction of x ... shl eax,#D_BIT-D_FRAC_BIT-2 and edx,#(D_NORM_MASK << (D_BIT-D_FRAC_BIT-2+1))-1 ! XXX - above may be shifted 1 extra unnecessarily and ecx,#D_EXP_MASK ! exponent of x jz exp_x_0 shr ecx,#D_EXP_SHIFT ! in ecx (actually in cx, with high bits 0) or edx,#D_NORM_MASK << (D_BIT-D_FRAC_BIT-2) ! normalize x_unpacked: sub cx,bx ! not ecx,ebx because we want to use high bit for sign add cx,#D_EXP_BIAS ! adjust exponent of quotient or ecx,ebp ! include sign with exponent ! Step 2: quotient of fractions -> (edx,eax) ! 2a: (xu,xl,0) div yu = (zu,zl) -> (ebx,esi) div eax,edi ! (xu,xl) div yu = zu in eax; remainder (rem) in edx mov ebx,eax ! save zu in ebx sub eax,eax ! clear eax: (edx,eax) = (rem,0) div eax,edi ! (rem,0) div yu = zl in eax mov r[esp],edx mov zl[esp],eax xchg eax,esi ! store zl in esi; save yl in eax mov yl[esp],eax ! 2b: (yl * zu) div yu -> (0,eax) mul eax,ebx ! yl * zu -> (edx,eax) div eax,edi ! (yl * zu) div yu in eax mov q1[esp],eax mov r1[esp],edx ! 2c: (xu,xl) / (yu,yl) = (zu,zl) - (yl * zu) div yu -> (edx,eax) mov edx,ebx ! zu xchg eax,esi ! eax <- zl; esi <- (yl * zu) div yu sub eax,esi sbb edx,#0 ! Step 3: normalise quotient test edx,#1 << (REG_BIT-2) ! is fraction too small? (can only be by 1 bit) jnz div4 shld edx,eax,#1 ! yes; multiply fraction ... shl eax,#1 ! ... by 2 ... dec cx ! ... and decrement exponent ! Step 4: shift and round div4: mov ebx,eax ! save for rounding shrd eax,edx,#D_BIT-D_FRAC_BIT-1 ! shift fraction of result ... shr edx,#D_BIT-D_FRAC_BIT-1 ! ... to proper position and ebx,#(1 << (D_BIT-D_FRAC_BIT-1))-1 ! look at bits shifted out cmp ebx,#D_NORM_MASK >> (D_BIT-D_FRAC_BIT) ! compare with middle value jb div5 ! below middle, don't round up ja roundup ! above middle, round up ! The low bits don't contain enough information to resolve the tie case, ! because the quotient itself is only an approximation. ! Calculate the exact remainder. ! This case is not very common, so don't worry much about speed. ! Unfortunately we had to save extra in all cases to prepare for it. push edx push eax sub esi,esi ! the calculation requires 33 bits - carry to here mov eax,2*GENREG_SIZE+q1[esp] sub eax,2*GENREG_SIZE+zl[esp] pushfd mul dword EF_SIZE+2*GENREG_SIZE+yl[esp] popfd jnc foo sub edx,2*GENREG_SIZE+yl[esp] sbb esi,#0 foo: add edx,2*GENREG_SIZE+r[esp] adc esi,#0 sub edx,2*GENREG_SIZE+r1[esp] sbb esi,#0 mov ebx,eax mov edi,edx pop eax pop edx ! Can finally decide rounding of tie case js div5 ! remainder < 0 from looking at top 64 bits jnz roundup ! remainder > 0 from looking at top 64 bits or edi,ebx ! test bottom 64 bits jnz roundup ! remainder > 0 test al,#1 ! at last we know it is the tie case, check parity bit jz div5 ! already even, otherwise round up to make even roundup: add eax,#1 ! add rounding bit adc edx,#0 test edx,#D_NORM_MASK << 1 ! has fraction overflowed (very unlikely) jz div5 ! Why were the shifts commented out? shrd eax,edx,#1 ! yes, divide fraction ... shr edx,#1 ! ... by 2 ... inc cx ! ... and increment exponent ! Step 5: put it all together div5: mov ebx,ecx ! extract sign and ebx,D_SIGN_MASK cmp cx,#D_EXP_INFINITE ! is exponent too big? jge overflow test cx,cx jle underflow shl ecx,#D_EXP_SHIFT and edx,#D_FRAC_MASK ! remove norm bit or edx,ecx ! include exponent ... or edx,ebx ! ... and sign return: add esp,#NTEMPS*GENREG_SIZE ! reclaim temp space mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax ! "push" lower dword of product ... mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx ! ... and upper dword pop esi ! restore registers pop edi pop ebp ret #D_SIZE retz: sub edx,edx ! clear upper dword sub eax,eax ! ... and lower dword jmp return overflow: mov edx,ecx ! put sign in usual reg call fpoverflow mov eax,ecx ! XXX - wrong reg jmp return underflow: mov esi,edx ! put upper part of fraction in usual reg mov edx,ecx ! sign movsx edi,cx ! put shift in usual reg neg edi inc edi call fpunderflow jmp return zerodivide: mov edx,ebp ! sign call fpdivzero mov eax,ecx ! XXX - wrong reg jmp return