! bcc 386 floating point routines (version 2) ! -- Fadd, Faddd, Faddf, Fsub, Fsubd, Fsubf, normalize2 ! author: Bruce Evans #include "fplib.h" #define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE) .extern Fpushf .extern fpdenormal .extern fpoverflow .extern fpunderflow .globl Fadd .align ALIGNMENT Fadd: push ebp push edi push esi mov eax,FRAME_SIZE+D_LOW[esp] mov edx,FRAME_SIZE+D_HIGH[esp] mov ebx,FRAME_SIZE+D_SIZE+D_LOW[esp] mov ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp] call addition mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx pop esi pop edi pop ebp ret #D_SIZE .globl Faddd .align ALIGNMENT Faddd: push ebp push edi push esi mov eax,FRAME_SIZE+D_LOW[esp] mov edx,FRAME_SIZE+D_HIGH[esp] mov ecx,D_HIGH[ebx] mov ebx,D_LOW[ebx] call addition mov FRAME_SIZE+D_LOW[esp],eax mov FRAME_SIZE+D_HIGH[esp],edx pop esi pop edi pop ebp ret .globl Faddf .align ALIGNMENT Faddf: push ebp push edi push esi call Fpushf pop ebx ! yl pop ecx ! yu mov eax,FRAME_SIZE+D_LOW[esp] ! xl mov edx,FRAME_SIZE+D_HIGH[esp] ! xu call addition mov FRAME_SIZE+D_LOW[esp],eax mov FRAME_SIZE+D_HIGH[esp],edx pop esi pop edi pop ebp ret .globl Fsub .align ALIGNMENT Fsub: push ebp push edi push esi mov eax,FRAME_SIZE+D_LOW[esp] mov edx,FRAME_SIZE+D_HIGH[esp] mov ebx,FRAME_SIZE+D_SIZE+D_LOW[esp] mov ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp] xor ecx,#D_SIGN_MASK ! complement sign call addition mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx pop esi pop edi pop ebp ret #D_SIZE .globl Fsubd .align ALIGNMENT Fsubd: push ebp push edi push esi mov eax,FRAME_SIZE+D_LOW[esp] mov edx,FRAME_SIZE+D_HIGH[esp] mov ecx,D_HIGH[ebx] mov ebx,D_LOW[ebx] xor ecx,#D_SIGN_MASK ! complement sign call addition mov FRAME_SIZE+D_LOW[esp],eax mov FRAME_SIZE+D_HIGH[esp],edx pop esi pop edi pop ebp ret .globl Fsubf .align ALIGNMENT Fsubf: push ebp push edi push esi call Fpushf pop ebx ! yl pop ecx ! yu mov eax,FRAME_SIZE+D_LOW[esp] ! xl mov edx,FRAME_SIZE+D_HIGH[esp] ! xu xor ecx,#D_SIGN_MASK ! complement sign call addition mov FRAME_SIZE+D_LOW[esp],eax mov FRAME_SIZE+D_HIGH[esp],edx pop esi pop edi pop ebp ret .align ALIGNMENT exp_y_0: ! Check for x denormal, to split off special case where both are denormal, ! so the norm bit (or 1 higher) is known to be set for addition, so addition ! can be done faster test esi,#D_EXP_MASK jnz x_normal_exp_y_0 test esi,esi ! test top bits of x fraction jnz both_denorm ! denormal iff nonzero fraction with zero exp test eax,eax ! test rest of fraction jz return_edx_eax ! everything 0 (XXX - do signs matter?) both_denorm: call fpdenormal test ebp,#D_SIGN_MASK jnz denorm_subtract ! Add denormal x to denormal or zero y #if D_NORM_BIT != D_EXP_SHIFT #include "error, carry into norm bit does not go into exponent" #endif add eax,ebx adc esi,edi or edx,esi ret denorm_subtract: sub eax,ebx sbb esi,edi or edx,esi ret .align ALIGNMENT x_normal_exp_y_0: test edi,edi ! this is like the check for x denormal jnz y_denorm test ebx,ebx jz return_edx_eax ! y = 0 y_denorm: call fpdenormal or ecx,#1 << D_EXP_SHIFT ! normalize y by setting exponent to 1 jmp got_y .align ALIGNMENT return_edx_eax: ret .align ALIGNMENT add_bigshift: cmp ecx,#D_FRAC_BIT+2 jae return_edx_eax ! x dominates y sub ecx,#REG_BIT shrd ebp,ebx,cl shrd ebx,edi,cl shr edi,cl add eax,edi adc esi,#0 xchg ebp,ebx br normalize .align ALIGNMENT addition: mov esi,edx ! this mainly for consistent naming and esi,#D_EXP_MASK | D_FRAC_MASK ! discard sign so comparison is simple mov edi,ecx ! free cl for shifts and edi,#D_EXP_MASK | D_FRAC_MASK cmp esi,edi ja xbigger jb swap cmp eax,ebx jae xbigger swap: xchg edx,ecx xchg eax,ebx xchg esi,edi xbigger: ! edx holds sign of result from here on ! and exponent of result before the normalization step mov ebp,edx ! prepare difference of signs xor ebp,ecx and ecx,#D_EXP_MASK ! extract exp_y and check for y 0 or denormal beq exp_y_0 ! otherwise x is not 0 or denormal either and edi,#D_FRAC_MASK ! extract fraction or edi,#D_NORM_MASK ! normalize got_y: and esi,#D_FRAC_MASK ! extract fraction or esi,#D_NORM_MASK ! normalize sub ecx,edx ! carries from non-exp bits in edx killed later neg ecx and ecx,#D_EXP_MASK shr ecx,#D_EXP_SHIFT ! difference of exponents got_x_and_y: and ebp,#D_SIGN_MASK ! see if signs are same bne subtract ! else roundoff reg ebp has been cleared cmp cl,#REG_BIT bhis add_bigshift shrd ebp,ebx,cl shrd ebx,edi,cl shr edi,cl add eax,ebx adc esi,edi ! result edx(D_SIGN_MASK | D_EXP_MASK bits):esi:eax:ebp but needs normalization mov edi,edx and edi,#D_EXP_MASK test esi,#D_NORM_MASK << 1 jnz add_loverflow add_round: cmp ebp,#1 << (REG_BIT-1) ! test roundoff register jb add_done ! no rounding jz tie add_roundup: add eax,#1 adc esi,#0 test esi,#D_NORM_MASK << 1 jnz pre_add_loverflow ! rounding may cause overflow! add_done: mov ecx,edx ! duplicated code from 'done' and edx,#D_SIGN_MASK or edx,edi and esi,#D_FRAC_MASK or edx,esi ret .align ALIGNMENT tie: test al,#1 ! tie case, round to even jz add_done ! even, no rounding jmp add_roundup .align ALIGNMENT pre_add_loverflow: sub ebp,ebp ! clear rounding register ! probably avoiding tests for more rounding add_loverflow: shrd ebp,eax,#1 jnc over_set_sticky_bit or ebp,#1 over_set_sticky_bit: shrd eax,esi,#1 shr esi,#1 add edi,1 << D_EXP_SHIFT cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT jl add_round overflow: call fpoverflow mov eax,ecx ! XXX - wrong reg ret ! result edx(D_SIGN_MASK | D_EXP_MASK bits): ! esi((D_NORM_MASK << 1) | D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx ! but needs normalization .align ALIGNMENT normalize: mov edi,edx and edi,#D_EXP_MASK test esi,#D_NORM_MASK << 1 bne loverflow ! result edx(D_SIGN_MASK bit):edi(D_EXP_MASK bits): ! esi(D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx ! but needs normalization .globl normalize2 normalize2: test esi,#D_NORM_MASK ! already-normalized is very common jz normalize3 round: cmp ebp,#1 << (REG_BIT-1) ! test roundoff register jb done ! no rounding jz near_tie roundup: add eax,#1 adc esi,#0 test esi,#D_NORM_MASK << 1 bne pre_loverflow ! rounding may cause overflow! done: cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT jae overflow and edx,#D_SIGN_MASK ! extract sign of largest and result or edx,edi ! include exponent with sign and esi,#D_FRAC_MASK ! discard norm bit or edx,esi ! include fraction with sign and exponent ret .align ALIGNMENT near_tie: test ebx,ebx jnz roundup test al,#1 ! tie case, round to even jz done ! even, no rounding jmp roundup .align ALIGNMENT not_in_8_below: shld ecx,esi,#REG_BIT-D_NORM_BIT+16 ! in 9 to 16 below? jz not_in_16_below ! must be way below (17-20 for usual D_NORM_BIT) mov cl,bsr_table[ecx] ! bsr(esi) - (D_NORM_BIT-16) neg ecx ! (D_NORM_BIT-16) - bsr(esi) add ecx,#16 jmp got_shift .align ALIGNMENT not_in_16_below: mov cl,bsr_table[esi] ! bsr(esi) directly neg ecx ! -bsr(esi) add ecx,#D_NORM_BIT ! D_NORM_BIT - bsr(esi) jmp got_shift .align ALIGNMENT normalize3: test esi,esi jz shift32 ! Find first nonzero bit in esi ! Don't use bsr, it is very slow (const + 3 * bit_found) ! We know that there is some nonzero bit, and the norm bit and above are clear sub ecx,ecx ! prepare unsigned extension of cl shld ecx,esi,#REG_BIT-D_NORM_BIT+8 ! any bits in 8 below norm bit? jz not_in_8_below mov cl,bsr_table[ecx] ! bsr(esi) - (D_NORM_BIT-8) neg ecx ! (D_NORM_BIT-8) - bsr(esi) add ecx,#8 ! D_NORM_BIT - bsr(esi) got_shift: shld esi,eax,cl shld eax,ebp,cl shld ebp,ebx,cl shl ebx,cl shl ecx,D_EXP_SHIFT sub edi,ecx bhi round ! XXX - can rounding change the exponent to > 0? ! not bgt since edi may be 0x80000000 neg edi shr edi,#D_EXP_SHIFT inc edi br fpunderflow .align ALIGNMENT pre_loverflow: sub ebp,ebp ! clear rounding registers sub ebx,ebx ! probably avoiding tests for more rounding loverflow: shr esi,#1 ! carry bit stayed in the reg rcr eax,#1 rcr ebp,#1 rcr ebx,#1 add edi,1 << D_EXP_SHIFT cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT blt round call fpoverflow mov eax,ecx ! XXX - wrong reg ret .align ALIGNMENT shift32: test eax,eax jz shift64 mov esi,eax mov eax,ebp mov ebp,ebx sub ebx,ebx sub edi,#REG_BIT << D_EXP_SHIFT shiftxx: test esi,#~(D_NORM_MASK | D_FRAC_MASK) jz over_adjust ! else too big already shrd ebx,ebp,#D_BIT-D_FRAC_BIT shrd ebp,eax,#D_BIT-D_FRAC_BIT shrd eax,esi,#D_BIT-D_FRAC_BIT shr esi,#D_BIT-D_FRAC_BIT add edi,#(D_BIT-D_FRAC_BIT) << D_EXP_SHIFT over_adjust: test edi,edi bgt normalize2 neg edi shr edi,#D_EXP_SHIFT inc edi br fpunderflow .align ALIGNMENT shift64: test ebp,ebp jz shift96 mov esi,ebp mov eax,ebx sub ebp,ebp mov ebx,ebp sub edi,#(2*REG_BIT) << D_EXP_SHIFT jmp shiftxx .align ALIGNMENT shift96: test ebx,ebx ! XXX - this test is probably unnecessary ! since the shift must be small unless we ! are subtracting 2 almost-equal numbers, ! and then the bits beyond 64 will mostly ! be 0 jz return_esi_eax ! all zero mov esi,ebx sub ebx,ebx sub edi,#(3*REG_BIT) << D_EXP_SHIFT jmp shiftxx .align ALIGNMENT return_esi_eax: mov edx,esi ret .align ALIGNMENT subtract: sub ebp,ebp ! set up roundoff register cmp ecx,#REG_BIT jae subtract_bigshift shrd ebp,ebx,cl shrd ebx,edi,cl shr edi,cl neg ebp ! begin subtraction esi:eax:0 - edi:ebx:ebp sbb eax,ebx sbb esi,edi sub ebx,ebx mov edi,edx and edi,#D_EXP_MASK br normalize2 .align ALIGNMENT subtract_bigshift: cmp ecx,#D_FRAC_BIT+2 bhis return_edx_eax ! x dominates y sub ecx,#REG_BIT shrd ebp,ebx,cl shrd ebx,edi,cl shr edi,cl not ebp ! begin subtraction esi:eax:0:0 - 0:edi:ebx:ebp not ebx add ebp,#1 adc ebx,#0 cmc sbb eax,edi sbb esi,#0 xchg ebp,ebx mov edi,edx and edi,#D_EXP_MASK br normalize2 .data .extern bsr_table