diff options
Diffstat (limited to 'bin86-0.3/bccfp')
-rw-r--r-- | bin86-0.3/bccfp/Makefile | 44 | ||||
-rw-r--r-- | bin86-0.3/bccfp/bccfp.tex | 0 | ||||
-rw-r--r-- | bin86-0.3/bccfp/changes | 30 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fabs.x | 17 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fadd.x | 485 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fcomp.x | 89 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fdiv.x | 312 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fmul.x | 150 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fpbsr.x | 25 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fperr.c | 50 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fperr.h | 14 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fperror.x | 126 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fplib.h | 49 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fptoi.x | 117 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fpulld.x | 20 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fpullf.x | 101 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fpushd.x | 60 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fpushf.x | 74 | ||||
-rw-r--r-- | bin86-0.3/bccfp/fpushi.x | 126 | ||||
-rw-r--r-- | bin86-0.3/bccfp/frexp.x | 66 | ||||
-rw-r--r-- | bin86-0.3/bccfp/ftst.x | 28 | ||||
-rw-r--r-- | bin86-0.3/bccfp/ldexp.x | 74 | ||||
-rw-r--r-- | bin86-0.3/bccfp/modf.c | 20 | ||||
-rw-r--r-- | bin86-0.3/bccfp/test.c | 124 |
24 files changed, 2201 insertions, 0 deletions
diff --git a/bin86-0.3/bccfp/Makefile b/bin86-0.3/bccfp/Makefile new file mode 100644 index 0000000..6caa362 --- /dev/null +++ b/bin86-0.3/bccfp/Makefile @@ -0,0 +1,44 @@ +# Makefile for bcc 386 software floating point library + +.SUFFIXES: .x # .x files are .s files that need C-preprocessing +.x.o: + $(CPP) $(CPPFLAGS) $< >tmp + $(AS) tmp -n $* -o $@ + +AS =as -3 +CFLAGS =-O +CPP =/lib/cpp +CPPFLAGS =-P +FPDIST =Makefile $(FPSRC) test.c bccfp.tex +FPSRC =fadd.x fcomp.x fdiv.x fmul.x fbsr.x \ + fperr.c fperror.x fptoi.x fpushd.x fpulld.x \ + fpushi.x fpushf.x fpullf.x frexp.x ftst.x \ + gcclib.x \ + fabs.x ldexp.x modf.c \ + fperr.h fplib.h +FPOBJ =fadd.o fcomp.o fdiv.o fmul.o fpbsr.o \ + fperr.o fperror.o fptoi.o fpushd.o fpulld.o \ + fpushi.o fpushf.o fpullf.o frexp.o ftst.o \ + fabs.o ldexp.o modf.o +JUNK =tmp +LIB =. + +test: test.c $(LIB)/libfp.a + $(CC) -o $@ test.c $(LIB)/libfp.a -lm + +$(FPOBJ): fplib.h +fperr.c fperror.x: fperr.h + +$(LIB)/libfp.a: $(FPOBJ) + ar rc $(LIB)/libfp.a $(FPOBJ) + rm -f $(JUNK) + +dist: $(FPDIST) + /bin/tar cvf - $(FPDIST) | /bin/compress -b 13 >bccfp.tar.Z + uue bccfp.tar.Z + +clean: + rm -f $(FPOBJ) $(JUNK) test + +realclean: clean + rm -f $(LIB)/libfp.a bccfp.tar.Z bccfp.uue diff --git a/bin86-0.3/bccfp/bccfp.tex b/bin86-0.3/bccfp/bccfp.tex new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/bin86-0.3/bccfp/bccfp.tex diff --git a/bin86-0.3/bccfp/changes b/bin86-0.3/bccfp/changes new file mode 100644 index 0000000..2cc632a --- /dev/null +++ b/bin86-0.3/bccfp/changes @@ -0,0 +1,30 @@ +fcomp: +Fixes for negative 0 (perhaps this shouldn't be generated, like denormals +and infinities (these would cause even more trouble) but Fsub routine or +something generated one). + +frexp.x: +Deleted 3rd arg (used to return value when bcc wasn't doing it right). + +Fixed frexp(value = 0) and ldexp(value = 0) returning nonzero. + +Most files: +Changed comment symbol to '!' for new assembler (not the native ';' in +case this is ported to ACK someday). + +Avoided using ebp and unnecessary register saves. + +Changed assembler style to make it a bit more portable or like I do it +(no '$' for hex, 8[esp] instead of [esp+8], use only .define and not export +or .globl, use '#' (could use nothing) instead of '*' for immediate). +The partly-supported 8(ebp) and .globl would be still more portable. + +Changed terminology 'mantissa' to 'fraction'. + +Round to even. Required for 'paranioa' not to find any defects. + +Used preprocessor. + +Parametrized most of the magic numbers. Phew! + +Supported denormals. Now 'paranioa' doesn't find any flaws. diff --git a/bin86-0.3/bccfp/fabs.x b/bin86-0.3/bccfp/fabs.x new file mode 100644 index 0000000..fe81676 --- /dev/null +++ b/bin86-0.3/bccfp/fabs.x @@ -0,0 +1,17 @@ +! bcc 386 floating point routines (version 2) -- _fabs +! author: Bruce Evans + +#include "fplib.h" + +! double fabs(double value); +! returns the absolute value of a number +! this works for all NaNs, like the 80*87 fabs, but perhaps we should check +! for exceptions that can happen when an 80*87 register is loaded + + .globl _fabs + .align ALIGNMENT +_fabs: + mov eax,PC_SIZE+D_LOW[esp] + mov edx,PC_SIZE+D_HIGH[esp] + and edx,~D_SIGN_MASK + ret diff --git a/bin86-0.3/bccfp/fadd.x b/bin86-0.3/bccfp/fadd.x new file mode 100644 index 0000000..d1e60b1 --- /dev/null +++ b/bin86-0.3/bccfp/fadd.x @@ -0,0 +1,485 @@ +! bcc 386 floating point routines (version 2) +! -- Fadd, Faddd, Faddf, Fsub, Fsubd, Fsubf, normalize2 +! author: Bruce Evans + +#include "fplib.h" + +#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE) + + .extern Fpushf + .extern fpdenormal + .extern fpoverflow + .extern fpunderflow + + .globl Fadd + .align ALIGNMENT +Fadd: + push ebp + push edi + push esi + mov eax,FRAME_SIZE+D_LOW[esp] + mov edx,FRAME_SIZE+D_HIGH[esp] + mov ebx,FRAME_SIZE+D_SIZE+D_LOW[esp] + mov ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp] + call addition + mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret #D_SIZE + + .globl Faddd + .align ALIGNMENT +Faddd: + push ebp + push edi + push esi + mov eax,FRAME_SIZE+D_LOW[esp] + mov edx,FRAME_SIZE+D_HIGH[esp] + mov ecx,D_HIGH[ebx] + mov ebx,D_LOW[ebx] + call addition + mov FRAME_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret + + .globl Faddf + .align ALIGNMENT +Faddf: + push ebp + push edi + push esi + call Fpushf + pop ebx ! yl + pop ecx ! yu + mov eax,FRAME_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_HIGH[esp] ! xu + call addition + mov FRAME_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret + + .globl Fsub + .align ALIGNMENT +Fsub: + push ebp + push edi + push esi + mov eax,FRAME_SIZE+D_LOW[esp] + mov edx,FRAME_SIZE+D_HIGH[esp] + mov ebx,FRAME_SIZE+D_SIZE+D_LOW[esp] + mov ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp] + xor ecx,#D_SIGN_MASK ! complement sign + call addition + mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret #D_SIZE + + .globl Fsubd + .align ALIGNMENT +Fsubd: + push ebp + push edi + push esi + mov eax,FRAME_SIZE+D_LOW[esp] + mov edx,FRAME_SIZE+D_HIGH[esp] + mov ecx,D_HIGH[ebx] + mov ebx,D_LOW[ebx] + xor ecx,#D_SIGN_MASK ! complement sign + call addition + mov FRAME_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret + + .globl Fsubf + .align ALIGNMENT +Fsubf: + push ebp + push edi + push esi + call Fpushf + pop ebx ! yl + pop ecx ! yu + mov eax,FRAME_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_HIGH[esp] ! xu + xor ecx,#D_SIGN_MASK ! complement sign + call addition + mov FRAME_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret + + .align ALIGNMENT +exp_y_0: + +! Check for x denormal, to split off special case where both are denormal, +! so the norm bit (or 1 higher) is known to be set for addition, so addition +! can be done faster + + test esi,#D_EXP_MASK + jnz x_normal_exp_y_0 + test esi,esi ! test top bits of x fraction + jnz both_denorm ! denormal iff nonzero fraction with zero exp + test eax,eax ! test rest of fraction + jz return_edx_eax ! everything 0 (XXX - do signs matter?) +both_denorm: + call fpdenormal + test ebp,#D_SIGN_MASK + jnz denorm_subtract + +! Add denormal x to denormal or zero y + +#if D_NORM_BIT != D_EXP_SHIFT +#include "error, carry into norm bit does not go into exponent" +#endif + + add eax,ebx + adc esi,edi + or edx,esi + ret + +denorm_subtract: + sub eax,ebx + sbb esi,edi + or edx,esi + ret + + .align ALIGNMENT +x_normal_exp_y_0: + test edi,edi ! this is like the check for x denormal + jnz y_denorm + test ebx,ebx + jz return_edx_eax ! y = 0 +y_denorm: + call fpdenormal + or ecx,#1 << D_EXP_SHIFT ! normalize y by setting exponent to 1 + jmp got_y + + .align ALIGNMENT +return_edx_eax: + ret + + .align ALIGNMENT +add_bigshift: + cmp ecx,#D_FRAC_BIT+2 + jae return_edx_eax ! x dominates y + sub ecx,#REG_BIT + shrd ebp,ebx,cl + shrd ebx,edi,cl + shr edi,cl + add eax,edi + adc esi,#0 + xchg ebp,ebx + br normalize + + .align ALIGNMENT +addition: + mov esi,edx ! this mainly for consistent naming + and esi,#D_EXP_MASK | D_FRAC_MASK ! discard sign so comparison is simple + mov edi,ecx ! free cl for shifts + and edi,#D_EXP_MASK | D_FRAC_MASK + cmp esi,edi + ja xbigger + jb swap + cmp eax,ebx + jae xbigger +swap: + xchg edx,ecx + xchg eax,ebx + xchg esi,edi +xbigger: + +! edx holds sign of result from here on +! and exponent of result before the normalization step + + mov ebp,edx ! prepare difference of signs + xor ebp,ecx + + and ecx,#D_EXP_MASK ! extract exp_y and check for y 0 or denormal + beq exp_y_0 ! otherwise x is not 0 or denormal either + and edi,#D_FRAC_MASK ! extract fraction + or edi,#D_NORM_MASK ! normalize +got_y: + and esi,#D_FRAC_MASK ! extract fraction + or esi,#D_NORM_MASK ! normalize + + sub ecx,edx ! carries from non-exp bits in edx killed later + neg ecx + and ecx,#D_EXP_MASK + shr ecx,#D_EXP_SHIFT ! difference of exponents + +got_x_and_y: + and ebp,#D_SIGN_MASK ! see if signs are same + bne subtract ! else roundoff reg ebp has been cleared + + cmp cl,#REG_BIT + bhis add_bigshift + shrd ebp,ebx,cl + shrd ebx,edi,cl + shr edi,cl + add eax,ebx + adc esi,edi + +! result edx(D_SIGN_MASK | D_EXP_MASK bits):esi:eax:ebp but needs normalization + + mov edi,edx + and edi,#D_EXP_MASK + test esi,#D_NORM_MASK << 1 + jnz add_loverflow + +add_round: + cmp ebp,#1 << (REG_BIT-1) ! test roundoff register + jb add_done ! no rounding + jz tie +add_roundup: + add eax,#1 + adc esi,#0 + test esi,#D_NORM_MASK << 1 + jnz pre_add_loverflow ! rounding may cause overflow! +add_done: + mov ecx,edx ! duplicated code from 'done' + and edx,#D_SIGN_MASK + or edx,edi + and esi,#D_FRAC_MASK + or edx,esi + ret + + .align ALIGNMENT +tie: + test al,#1 ! tie case, round to even + jz add_done ! even, no rounding + jmp add_roundup + + .align ALIGNMENT +pre_add_loverflow: + sub ebp,ebp ! clear rounding register + ! probably avoiding tests for more rounding +add_loverflow: + shrd ebp,eax,#1 + jnc over_set_sticky_bit + or ebp,#1 +over_set_sticky_bit: + shrd eax,esi,#1 + shr esi,#1 + add edi,1 << D_EXP_SHIFT + cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT + jl add_round +overflow: + call fpoverflow + mov eax,ecx ! XXX - wrong reg + ret + +! result edx(D_SIGN_MASK | D_EXP_MASK bits): +! esi((D_NORM_MASK << 1) | D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx +! but needs normalization + + .align ALIGNMENT +normalize: + mov edi,edx + and edi,#D_EXP_MASK + test esi,#D_NORM_MASK << 1 + bne loverflow + +! result edx(D_SIGN_MASK bit):edi(D_EXP_MASK bits): +! esi(D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx +! but needs normalization + + .globl normalize2 +normalize2: + test esi,#D_NORM_MASK ! already-normalized is very common + jz normalize3 +round: + cmp ebp,#1 << (REG_BIT-1) ! test roundoff register + jb done ! no rounding + jz near_tie +roundup: + add eax,#1 + adc esi,#0 + test esi,#D_NORM_MASK << 1 + bne pre_loverflow ! rounding may cause overflow! +done: +cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT +jae overflow + and edx,#D_SIGN_MASK ! extract sign of largest and result + or edx,edi ! include exponent with sign + and esi,#D_FRAC_MASK ! discard norm bit + or edx,esi ! include fraction with sign and exponent + ret + + .align ALIGNMENT +near_tie: + test ebx,ebx + jnz roundup + test al,#1 ! tie case, round to even + jz done ! even, no rounding + jmp roundup + + .align ALIGNMENT +not_in_8_below: + shld ecx,esi,#REG_BIT-D_NORM_BIT+16 ! in 9 to 16 below? + jz not_in_16_below ! must be way below (17-20 for usual D_NORM_BIT) + mov cl,bsr_table[ecx] ! bsr(esi) - (D_NORM_BIT-16) + neg ecx ! (D_NORM_BIT-16) - bsr(esi) + add ecx,#16 + jmp got_shift + + .align ALIGNMENT +not_in_16_below: + mov cl,bsr_table[esi] ! bsr(esi) directly + neg ecx ! -bsr(esi) + add ecx,#D_NORM_BIT ! D_NORM_BIT - bsr(esi) + jmp got_shift + + .align ALIGNMENT +normalize3: + test esi,esi + jz shift32 + +! Find first nonzero bit in esi +! Don't use bsr, it is very slow (const + 3 * bit_found) +! We know that there is some nonzero bit, and the norm bit and above are clear + + sub ecx,ecx ! prepare unsigned extension of cl + shld ecx,esi,#REG_BIT-D_NORM_BIT+8 ! any bits in 8 below norm bit? + jz not_in_8_below + mov cl,bsr_table[ecx] ! bsr(esi) - (D_NORM_BIT-8) + neg ecx ! (D_NORM_BIT-8) - bsr(esi) + add ecx,#8 ! D_NORM_BIT - bsr(esi) +got_shift: + shld esi,eax,cl + shld eax,ebp,cl + shld ebp,ebx,cl + shl ebx,cl + shl ecx,D_EXP_SHIFT + sub edi,ecx + bhi round ! XXX - can rounding change the exponent to > 0? + ! not bgt since edi may be 0x80000000 + neg edi + shr edi,#D_EXP_SHIFT + inc edi + br fpunderflow + + .align ALIGNMENT +pre_loverflow: + sub ebp,ebp ! clear rounding registers + sub ebx,ebx ! probably avoiding tests for more rounding + +loverflow: + shr esi,#1 ! carry bit stayed in the reg + rcr eax,#1 + rcr ebp,#1 + rcr ebx,#1 + add edi,1 << D_EXP_SHIFT + cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT + blt round + call fpoverflow + mov eax,ecx ! XXX - wrong reg + ret + + .align ALIGNMENT +shift32: + test eax,eax + jz shift64 + mov esi,eax + mov eax,ebp + mov ebp,ebx + sub ebx,ebx + sub edi,#REG_BIT << D_EXP_SHIFT +shiftxx: + test esi,#~(D_NORM_MASK | D_FRAC_MASK) + jz over_adjust ! else too big already + shrd ebx,ebp,#D_BIT-D_FRAC_BIT + shrd ebp,eax,#D_BIT-D_FRAC_BIT + shrd eax,esi,#D_BIT-D_FRAC_BIT + shr esi,#D_BIT-D_FRAC_BIT + add edi,#(D_BIT-D_FRAC_BIT) << D_EXP_SHIFT +over_adjust: + test edi,edi + bgt normalize2 + neg edi + shr edi,#D_EXP_SHIFT + inc edi + br fpunderflow + + .align ALIGNMENT +shift64: + test ebp,ebp + jz shift96 + mov esi,ebp + mov eax,ebx + sub ebp,ebp + mov ebx,ebp + sub edi,#(2*REG_BIT) << D_EXP_SHIFT + jmp shiftxx + + .align ALIGNMENT +shift96: + test ebx,ebx ! XXX - this test is probably unnecessary + ! since the shift must be small unless we + ! are subtracting 2 almost-equal numbers, + ! and then the bits beyond 64 will mostly + ! be 0 + jz return_esi_eax ! all zero + mov esi,ebx + sub ebx,ebx + sub edi,#(3*REG_BIT) << D_EXP_SHIFT + jmp shiftxx + + .align ALIGNMENT +return_esi_eax: + mov edx,esi + ret + + .align ALIGNMENT +subtract: + sub ebp,ebp ! set up roundoff register + cmp ecx,#REG_BIT + jae subtract_bigshift + shrd ebp,ebx,cl + shrd ebx,edi,cl + shr edi,cl + neg ebp ! begin subtraction esi:eax:0 - edi:ebx:ebp + sbb eax,ebx + sbb esi,edi + sub ebx,ebx + mov edi,edx + and edi,#D_EXP_MASK + br normalize2 + + .align ALIGNMENT +subtract_bigshift: + cmp ecx,#D_FRAC_BIT+2 + bhis return_edx_eax ! x dominates y + sub ecx,#REG_BIT + shrd ebp,ebx,cl + shrd ebx,edi,cl + shr edi,cl + not ebp ! begin subtraction esi:eax:0:0 - 0:edi:ebx:ebp + not ebx + add ebp,#1 + adc ebx,#0 + cmc + sbb eax,edi + sbb esi,#0 + xchg ebp,ebx + mov edi,edx + and edi,#D_EXP_MASK + br normalize2 + + .data + .extern bsr_table diff --git a/bin86-0.3/bccfp/fcomp.x b/bin86-0.3/bccfp/fcomp.x new file mode 100644 index 0000000..71148ab --- /dev/null +++ b/bin86-0.3/bccfp/fcomp.x @@ -0,0 +1,89 @@ +! bcc 386 floating point routines (version 2) -- Fcomp, Fcompd, Fcompf +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + + .extern Fpushf + +! Pop 2 doubles from stack and compare them, return result in flags so +! normal signed branches work (unlike 80x87 which returns the result in +! the zero and carry flags). + + .globl Fcomp + .align ALIGNMENT +Fcomp: + pop ecx ! get return address + pop eax ! xl + pop edx ! xu + push ecx ! put back ret address - pop 2nd double later + +! All this popping is bad on 486's since plain mov takes 1+ cycle and pop +! takes 4 cycles. But this code is designed for 386's where popping is +! nominally the same speed and saves code space and so maybe instruction +! fetch time as well as the instruction to adjust the stack (ret #n takes +! no longer than plain ret but inhibits gotos). + + mov ebx,PC_SIZE+D_LOW[esp] ! yl + mov ecx,PC_SIZE+D_HIGH[esp] ! yu + jmp compare + +! Pop double from stack and compare with double at [ebx] + + .globl Fcompd + .align ALIGNMENT +Fcompd: + mov eax,PC_SIZE+D_LOW[esp] ! xl + mov edx,PC_SIZE+D_HIGH[esp] ! xu + mov ecx,D_HIGH[ebx] ! yu + mov ebx,D_LOW[ebx] ! yl + +compare: + test edx,#D_SIGN_MASK ! is x >= 0? + jz cmp0 ! yes; just compare x and y + test ecx,#D_SIGN_MASK ! no; but is y >= 0? + jz cmp0 ! yes; just compare x and y + + xchg edx,ecx ! x, y < 0, so ... + xchg eax,ebx ! ... swap x and y ... + xor edx,#D_SIGN_MASK ! ... and toggle signs + xor ecx,#D_SIGN_MASK + +cmp0: + cmp edx,ecx ! compare upper dwords + jnz checkneg0 ! if upper dwords differ, job is almost done + mov edx,eax ! upper dwords equal, so ... + mov ecx,ebx ! ... must make unsigned comparison of lower dwords + shr edx,#1 ! shift past sign + shr ecx,#1 + cmp edx,ecx ! compare top 31 bits of lower dwords + jnz return ! if these differ, job is done + and eax,#1 ! compare lowest bits + and ebx,#1 + cmp eax,ebx + +return: + ret #D_SIZE ! return, popping 1 double from stack + +checkneg0: + test edx,#D_EXP_MASK | D_FRAC_MASK ! check to catch unusual case ... + jnz recheck + test eax,eax + jnz recheck + test ecx,#D_EXP_MASK | D_FRAC_MASK + jnz recheck + test ebx,ebx + jz return ! ... both are (+-) zero, return 'z' + +recheck: + cmp edx,ecx ! the upper words were really different + ret #D_SIZE + + .globl Fcompf + .align ALIGNMENT +Fcompf: + call Fpushf + pop ebx ! yl + pop ecx ! yu + mov eax,PC_SIZE+D_LOW[esp] ! xl + mov edx,PC_SIZE+D_HIGH[esp] ! xu + jmp compare diff --git a/bin86-0.3/bccfp/fdiv.x b/bin86-0.3/bccfp/fdiv.x new file mode 100644 index 0000000..4a5cf74 --- /dev/null +++ b/bin86-0.3/bccfp/fdiv.x @@ -0,0 +1,312 @@ +#define EF_SIZE 4 + +! bcc 386 floating point routines (version 2) -- Fdiv, Fdivd, Fdivf +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + +#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE) + + .extern Fpushf + .extern fpdivzero + .extern fpoverflow + .extern fpunderflow + +! double Fdiv(double x, double y) returns x / y + +! pop 2 doubles from stack, divide first by second, and push quotient on stack + +! we denote upper and lower dwords of x and y (or their fractions) +! by (xu,xl), (yu,yl) + + .globl Fdivf + .align ALIGNMENT +Fdivf: + sub esp,#D_SIZE ! make space for dummy double on stack + push ebp + push edi ! save some regs + push esi + mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ... + mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot + call Fpushf + pop esi ! yl + pop edi ! yu + mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu + jmp division + + .globl Fdiv + .align ALIGNMENT +Fdiv: + push ebp + push edi ! save some regs + push esi + mov eax,FRAME_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_HIGH[esp] ! xu + mov esi,FRAME_SIZE+D_SIZE+D_LOW[esp] ! yl + mov edi,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! yu + jmp division + + .align ALIGNMENT +exp_y_0: + mov ebx,edi + or ebx,esi + beq zerodivide + mov ebx,#1 +fix_y: + test edi,edi ! XXX - sloow + js y_unpacked + shld edi,esi,#1 + shl esi,#1 + dec bx + jmp fix_y + + .align ALIGNMENT +exp_x_0: + mov ecx,edx + or ecx,eax + beq retz + mov ecx,#1 ! change exponent from 0 to 1 +fix_x: + test edx,#1 << (REG_BIT-1-2) ! XXX - sloow + jnz x_unpacked + shld edx,eax,#1 + shl eax,#1 + dec cx + jmp fix_x + +! Fdivd pops double from stack, divides it by double at [ebx], +! and pushes quotient back on stack + + .globl Fdivd + .align ALIGNMENT +Fdivd: + sub esp,#D_SIZE ! make space for dummy double on stack + push ebp + push edi ! save some regs + push esi + mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ... + mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot + mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu + mov esi,D_LOW[ebx] ! yl + mov edi,D_HIGH[ebx] ! yu + +division: + +! The full calculations are + +! (xu,xl,0) = yu * (zu,zl) + (0,r,0) (normal 96/32 -> 64 bit division) +! yl * zu = yu * q1 + r1 (32*32 -> 64 bit mul and 64/32 -> 32 bit div) + +! so + +! (xu,xl,0,0) = (yu,yl) * (zu,zl-q1) + (0,0,r-r1,yl*(q1-zl)) + +! where the calculations zl-q1, r-r1 and yl*(q1-zl) are more complicated +! than the notation suggests. They may be negative and the one with the +! multiplication may not fit in 32 bits and in both cases the overflow +! has to be moved into higher bit positions. + +! See Knuth for why (zu,zl-q1) is the correct 64-bit quotient to within +! 1 bit either way (assuming the normalization x < 2 * y). + +! We only need to calculate the remainder (0,0,r-r1,yl*(q1-zl)) to resolve +! tie cases. It tells whether the approximate quotient is too high or too +! low. + +#define NTEMPS 5 + + sub esp,#NTEMPS*GENREG_SIZE ! space to remember values for rounding of tie case + +! Offsets from esp for these values (offsets using FRAME_SIZE are invalid +! while these temps are active) +r = 0 +q1 = 4 +r1 = 8 +yl = 12 +zl = 16 + +! Step 1: unpack and normalize x to fraction in edx:eax (left shifted as +! far as possible less 2 so that x < y, and later z < y); unpack and normalize +! y to a fraction in edi:esi (left shifted as far as possible), put difference +! of signs (= sign of quotient) in ecx(D_SIGN_MASK) and difference of exponents +! (= exponent of quotient before normalization) in cx. + + mov ebp,edx ! xu + xor ebp,edi ! xu ^ yu + and ebp,#D_SIGN_MASK ! sign of result is difference of signs + +! Unpack y first to trap 0 / 0 + + mov ebx,edi ! remember yu for exponent of y + shld edi,esi,#D_BIT-D_FRAC_BIT ! extract fraction of y ... + shl esi,#D_BIT-D_FRAC_BIT + and ebx,#D_EXP_MASK ! exponent of y + jz exp_y_0 + shr ebx,#D_EXP_SHIFT ! in ebx (actually in bx, with high bits 0) + or edi,#D_NORM_MASK << (D_BIT-D_FRAC_BIT) ! normalize +y_unpacked: + +! Unpack x + + mov ecx,edx ! remember xu for exponent of x + shld edx,eax,#D_BIT-D_FRAC_BIT-2 ! extract fraction of x ... + shl eax,#D_BIT-D_FRAC_BIT-2 + and edx,#(D_NORM_MASK << (D_BIT-D_FRAC_BIT-2+1))-1 + ! XXX - above may be shifted 1 extra unnecessarily + and ecx,#D_EXP_MASK ! exponent of x + jz exp_x_0 + shr ecx,#D_EXP_SHIFT ! in ecx (actually in cx, with high bits 0) + or edx,#D_NORM_MASK << (D_BIT-D_FRAC_BIT-2) ! normalize +x_unpacked: + + sub cx,bx ! not ecx,ebx because we want to use high bit for sign + add cx,#D_EXP_BIAS ! adjust exponent of quotient + + or ecx,ebp ! include sign with exponent + +! Step 2: quotient of fractions -> (edx,eax) + +! 2a: (xu,xl,0) div yu = (zu,zl) -> (ebx,esi) + + div eax,edi ! (xu,xl) div yu = zu in eax; remainder (rem) in edx + mov ebx,eax ! save zu in ebx + sub eax,eax ! clear eax: (edx,eax) = (rem,0) + div eax,edi ! (rem,0) div yu = zl in eax + mov r[esp],edx + mov zl[esp],eax + xchg eax,esi ! store zl in esi; save yl in eax + mov yl[esp],eax + +! 2b: (yl * zu) div yu -> (0,eax) + + mul eax,ebx ! yl * zu -> (edx,eax) + div eax,edi ! (yl * zu) div yu in eax + mov q1[esp],eax + mov r1[esp],edx + +! 2c: (xu,xl) / (yu,yl) = (zu,zl) - (yl * zu) div yu -> (edx,eax) + + mov edx,ebx ! zu + xchg eax,esi ! eax <- zl; esi <- (yl * zu) div yu + sub eax,esi + sbb edx,#0 + +! Step 3: normalise quotient + + test edx,#1 << (REG_BIT-2) ! is fraction too small? (can only be by 1 bit) + jnz div4 + shld edx,eax,#1 ! yes; multiply fraction ... + shl eax,#1 ! ... by 2 ... + dec cx ! ... and decrement exponent + +! Step 4: shift and round + +div4: + mov ebx,eax ! save for rounding + shrd eax,edx,#D_BIT-D_FRAC_BIT-1 ! shift fraction of result ... + shr edx,#D_BIT-D_FRAC_BIT-1 ! ... to proper position + and ebx,#(1 << (D_BIT-D_FRAC_BIT-1))-1 ! look at bits shifted out + cmp ebx,#D_NORM_MASK >> (D_BIT-D_FRAC_BIT) ! compare with middle value + jb div5 ! below middle, don't round up + ja roundup ! above middle, round up + +! The low bits don't contain enough information to resolve the tie case, +! because the quotient itself is only an approximation. +! Calculate the exact remainder. +! This case is not very common, so don't worry much about speed. +! Unfortunately we had to save extra in all cases to prepare for it. + + push edx + push eax + + sub esi,esi ! the calculation requires 33 bits - carry to here + mov eax,2*GENREG_SIZE+q1[esp] + sub eax,2*GENREG_SIZE+zl[esp] + pushfd + mul dword EF_SIZE+2*GENREG_SIZE+yl[esp] + popfd + jnc foo + sub edx,2*GENREG_SIZE+yl[esp] + sbb esi,#0 +foo: + add edx,2*GENREG_SIZE+r[esp] + adc esi,#0 + sub edx,2*GENREG_SIZE+r1[esp] + sbb esi,#0 + mov ebx,eax + mov edi,edx + + pop eax + pop edx + +! Can finally decide rounding of tie case + + js div5 ! remainder < 0 from looking at top 64 bits + jnz roundup ! remainder > 0 from looking at top 64 bits + or edi,ebx ! test bottom 64 bits + jnz roundup ! remainder > 0 + + test al,#1 ! at last we know it is the tie case, check parity bit + jz div5 ! already even, otherwise round up to make even + +roundup: + add eax,#1 ! add rounding bit + adc edx,#0 + test edx,#D_NORM_MASK << 1 ! has fraction overflowed (very unlikely) + jz div5 +! Why were the shifts commented out? + shrd eax,edx,#1 ! yes, divide fraction ... + shr edx,#1 ! ... by 2 ... + inc cx ! ... and increment exponent + +! Step 5: put it all together + +div5: + mov ebx,ecx ! extract sign + and ebx,D_SIGN_MASK + cmp cx,#D_EXP_INFINITE ! is exponent too big? + jge overflow + test cx,cx + jle underflow + shl ecx,#D_EXP_SHIFT + + and edx,#D_FRAC_MASK ! remove norm bit + or edx,ecx ! include exponent ... + or edx,ebx ! ... and sign + +return: + add esp,#NTEMPS*GENREG_SIZE ! reclaim temp space + mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax ! "push" lower dword of product ... + mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx ! ... and upper dword + pop esi ! restore registers + pop edi + pop ebp + ret #D_SIZE + +retz: + sub edx,edx ! clear upper dword + sub eax,eax ! ... and lower dword + jmp return + +overflow: + mov edx,ecx ! put sign in usual reg + call fpoverflow + mov eax,ecx ! XXX - wrong reg + jmp return + +underflow: + mov esi,edx ! put upper part of fraction in usual reg + mov edx,ecx ! sign + movsx edi,cx ! put shift in usual reg + neg edi + inc edi + call fpunderflow + jmp return + +zerodivide: + mov edx,ebp ! sign + call fpdivzero + mov eax,ecx ! XXX - wrong reg + jmp return diff --git a/bin86-0.3/bccfp/fmul.x b/bin86-0.3/bccfp/fmul.x new file mode 100644 index 0000000..aa62b5c --- /dev/null +++ b/bin86-0.3/bccfp/fmul.x @@ -0,0 +1,150 @@ +! bcc 386 floating point routines (version 2) -- Fmul, Fmuld, Fmulf +! author: Bruce Evans + +#include "fplib.h" + +#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE) + + .extern Fpushf + .extern fpoverflow + .extern fpunderflow + .extern normalize2 + + .globl Fmul + .align ALIGNMENT +Fmul: + push ebp + push edi + push esi + mov eax,FRAME_SIZE+D_LOW[esp] + mov edx,FRAME_SIZE+D_HIGH[esp] + mov ebx,FRAME_SIZE+D_SIZE+D_LOW[esp] + mov ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp] + call multiplication + mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret #D_SIZE + + .globl Fmuld + .align ALIGNMENT +Fmuld: + push ebp + push edi + push esi + mov eax,FRAME_SIZE+D_LOW[esp] + mov edx,FRAME_SIZE+D_HIGH[esp] + mov ecx,D_HIGH[ebx] + mov ebx,D_LOW[ebx] + call multiplication + mov FRAME_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret + + .globl Fmulf + .align ALIGNMENT +Fmulf: + push ebp + push edi + push esi + call Fpushf + pop ebx ! yl + pop ecx ! xu + mov eax,FRAME_SIZE+D_LOW[esp] ! xl + mov edx,FRAME_SIZE+D_HIGH[esp] ! xu + call multiplication + mov FRAME_SIZE+D_LOW[esp],eax + mov FRAME_SIZE+D_HIGH[esp],edx + pop esi + pop edi + pop ebp + ret + + .align ALIGNMENT +exp_x_0: + mov edx,#1 << D_EXP_SHIFT ! change exponent from 0 to 1 + jmp x_unpacked ! XXX - check for denormal? + + .align ALIGNMENT +exp_y_0: + mov ecx,#1 << D_EXP_SHIFT + jmp y_unpacked + + .align ALIGNMENT +multiplication: + mov ebp,edx ! xu + xor ebp,ecx ! xu ^ yu + and ebp,#D_SIGN_MASK ! sign of result is difference of signs + + mov esi,edx ! free edx for multiplications + and esi,#D_FRAC_MASK ! discard sign and exponent + and edx,#D_EXP_MASK ! exponent(x) + jz exp_x_0 + or esi,#D_NORM_MASK ! normalize +x_unpacked: + + mov edi,ecx ! this mainly for consistent naming + and edi,#D_FRAC_MASK + and ecx,#D_EXP_MASK ! exponent(y) + jz exp_y_0 + or edi,#D_NORM_MASK +y_unpacked: + + add ecx,edx ! add exponents + +! exponent is in ecx, sign in ebp, operands in esi:eax and edi:ebx, edx is free +! product to go in esi:eax:ebp:ebx +! terminology: x * y = (xu,xl) * (yu,yl) +! = (xu * yu,0,0) + (0,xu * yl + xl * yu,0) + (0,0,xl * yl) + + push ecx + push ebp + mov ecx,eax + mul ebx ! xl * yl + mov ebp,edx ! (xl * yl).u in ebp + xchg ebx,eax ! (xl * yl).l in ebx (final), yl in eax + mul esi ! xu * yl + push eax ! (xu * yl).l on stack + push edx ! (xu * yl).u on stack + mov eax,esi ! xu + mul edi ! xu * yu + mov esi,edx ! (xu * yu).u in esi (final except carries) + xchg ecx,eax ! (xu * yu).l in ecx, xl in eax + mul edi ! xl * yu + + add ebp,eax ! (xl * yl).u + (xl * yu).l + pop eax ! (xu * yl).u + adc eax,edx ! (xu * yl).u + (xl * yu).u + adc esi,#0 + pop edx ! (xu * yl).l + add ebp,edx ! ((xl * yl).u + (xl * yu).l) + (xu * yl).l + adc eax,ecx ! ((xu * yl).u + (xl * yu).u) + (xu * yu).l + adc esi,#0 + pop edx ! sign + pop edi ! exponent + sub edi,#(D_EXP_BIAS+1-(D_EXP_BIT+2)) << D_EXP_SHIFT ! adjust +! cmp edi,#(D_EXP_INFINITE-1+(D_EXP_BIT+2)) << D_EXP_SHIFT +! jae outofbounds ! 0 will be caught as underflow by normalize2 +cmp edi,#(2*D_EXP_INFINITE-(D_EXP_BIAS+1)+(D_EXP_BIT+2)) << D_EXP_SHIFT +ja underflow + br normalize2 + + .align ALIGNMENT +overflow: + mov edx,ebp ! put sign in usual reg + call fpoverflow + mov eax,ecx ! XXX - wrong reg + ret + + .align ALIGNMENT +underflow: + mov edx,ebp ! put sign in usual reg + neg edi + shr edi,#D_EXP_SHIFT + inc edi + br fpunderflow diff --git a/bin86-0.3/bccfp/fpbsr.x b/bin86-0.3/bccfp/fpbsr.x new file mode 100644 index 0000000..8ff38d7 --- /dev/null +++ b/bin86-0.3/bccfp/fpbsr.x @@ -0,0 +1,25 @@ +! bcc 386 floating point routines (version 2) -- bsr_table +! author: Bruce Evans + +#include "fplib.h" + + .globl bsr_table + .data + .align ALIGNMENT +bsr_table: ! table to replace bsr on range 0-255 +.byte -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 +.byte 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 +.byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 +.byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 +.byte 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 +.byte 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 +.byte 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 +.byte 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 +.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 diff --git a/bin86-0.3/bccfp/fperr.c b/bin86-0.3/bccfp/fperr.c new file mode 100644 index 0000000..d5372dc --- /dev/null +++ b/bin86-0.3/bccfp/fperr.c @@ -0,0 +1,50 @@ +/* + * bin86/bccfp/fperr.c + * + * Copyright (C) 1992 Bruce Evans + */ + +#include <stdio.h> +#include <signal.h> + +#include "fperr.h" + +void fperr(errno) +int errno; +{ + +#if defined(DEBUG) || 0 + switch(errno) { + + case EFDENORMAL: + fputs("\nDenormal - ", stderr); + break; + + case EFINFINITY: + fputs("\nInfinity - ", stderr); + break; + + case EFNAN: + fputs("\nNaN - ", stderr); + break; + + case EFOVERFLOW: + fputs("\nOverflow - ", stderr); + break; + + case EFUNDERFLOW: + fputs("\nUnderflow - ", stderr); + break; + + case EFDIVZERO: + fputs("\nZero divide - ", stderr); + break; + + default: + fprintf(stderr, "\nUnknown error 0x%x - ", errno); + } + fflush(stderr); +#endif + + kill(getpid(), SIGFPE); +} diff --git a/bin86-0.3/bccfp/fperr.h b/bin86-0.3/bccfp/fperr.h new file mode 100644 index 0000000..42d54fd --- /dev/null +++ b/bin86-0.3/bccfp/fperr.h @@ -0,0 +1,14 @@ +/* + * bin86/bccfp/fperr.h + * + * Copyright (C) 1992 Bruce Evans + */ + +/* fperr.h */ + +#define EFDENORMAL 1 +#define EFINFINITY 2 +#define EFNAN 3 +#define EFOVERFLOW 4 +#define EFUNDERFLOW 5 +#define EFDIVZERO 6 diff --git a/bin86-0.3/bccfp/fperror.x b/bin86-0.3/bccfp/fperror.x new file mode 100644 index 0000000..04f3f74 --- /dev/null +++ b/bin86-0.3/bccfp/fperror.x @@ -0,0 +1,126 @@ +! bcc 386 floating point routines (version 2) +! --- fpdenormal, fperror, fpinfinity, fpNaN, fpoverflow, fpunderflow,fpdivzero +! author: Bruce Evans + +#include "fperr.h" +#include "fplib.h" + + .extern _fperr + +! Cause a denormal-operand exception +! Preserves all general registers if signal handler returns + + .globl fpdenormal + .align ALIGNMENT +fpdenormal: +#if 0 + push eax + mov eax,#EFDENORMAL + call fperror + pop eax +#endif + ret + +! Cause an exception with error code eax, preserving all genregs except eax + + .globl fperror + .align ALIGNMENT +fperror: + push ebp ! set up usual frame ... + mov ebp,esp ! ... for debugging + push edx ! save default + push ecx + push eax ! error code is arg to C routine + call _fperr + add esp,#GENREG_SIZE + pop ecx ! restore default + pop edx + pop ebp + ret + + .align ALIGNMENT +fphuge: + mov ecx,#D_HUGE_LOW ! prepare number +-HUGEVAL + or edx,#D_HUGE_HIGH ! ... in case signal handler returns + jmp fperror + +! Cause an infinite-operand exception +! Return +-HUGEVAL in edx:ecx with sign from edx + + .globl fpinfinity + .align ALIGNMENT +fpinfinity: + mov eax,#EFINFINITY + jmp fphuge ! almost right + +! Cause an NaN-operand exception +! Return +-HUGEVAL in edx:ecx with sign from edx + + .globl fpNaN + .align ALIGNMENT +fpNaN: + mov eax,#EFNAN ! there are different types of NaNs but... + jmp fphuge ! WRONG + +! Cause an overflow exception +! Return +-HUGEVAL in edx:ecx with sign from edx + + .globl fpoverflow + .align ALIGNMENT +fpoverflow: + mov eax,#EFOVERFLOW + jmp fphuge ! almost right + +! Cause an underflow exception (actually assume it is masked for now) +! Return denormal or 0.0 in edx:ecx +! XXX - this should cause a denormal exception or none for the denormal case +! Args: sign in edx, fraction in esi:eax, right shift in edi +! Returns: denormalized number in edx:eax + + .globl fpunderflow + .align ALIGNMENT +fpunderflow: +#if 0 + mov eax,#EFUNDERFLOW + jmp fperror +#endif + cmp edi,#REG_BIT + jb denormalize1 + mov eax,esi + sub esi,esi + sub edi,#REG_BIT + cmp edi,#REG_BIT + jb denormalize1 +denormalize_underflow: +#if 0 + mov eax,#EFUNDERFLOW + jmp fperror +#endif + sub eax,eax + mov edx,eax + ret + + .align ALIGNMENT +denormalize1: + mov ecx,edi + shrd eax,esi,cl + shr esi,cl + mov ecx,esi + or ecx,eax + jz denormalize_underflow + and edx,#D_SIGN_MASK + or edx,esi + ret + +! Cause an fp division by zero exception +! Return +-HUGEVAL in edx:ecx with sign from edx + + .globl fpdivzero + .align ALIGNMENT +fpdivzero: + mov eax,#EFDIVZERO + test edx,#D_EXP_MASK + jnz fphuge ! almost right + sub ecx,ecx + mov edx,ecx + jmp fperror diff --git a/bin86-0.3/bccfp/fplib.h b/bin86-0.3/bccfp/fplib.h new file mode 100644 index 0000000..b346c61 --- /dev/null +++ b/bin86-0.3/bccfp/fplib.h @@ -0,0 +1,49 @@ +/* + * bin86/bccfp/fplib.h + * + * Copyright (C) 1992 Bruce Evans + */ + +#define ALIGNMENT 4 +#define CHAR_BIT 8 +#define D_BIT (D_SIZE * CHAR_BIT) +#define D_EXP_BIAS ((1 << (D_EXP_BIT - 1)) - 1) +#define D_EXP_BIT 11 +#define D_EXP_INFINITE ((1 << D_EXP_BIT) - 1) +#define D_EXP_MASK (((1 << D_EXP_BIT) - 1) << D_EXP_SHIFT) +#define D_EXP_SHIFT (REG_BIT - (1 + D_EXP_BIT)) +#define D_FRAC_BIT 53 +#define D_FRAC_MASK (D_NORM_MASK - 1) +#define D_HIGH 4 +#define D_HUGE_HIGH (D_EXP_MASK - 1) +#define D_HUGE_LOW 0xFFFFFFFF +#define D_LOW 0 +#define D_NORM_BIT (D_FRAC_BIT - 1 - REG_BIT) +#define D_NORM_MASK (1 << D_NORM_BIT) +#define D_SIGN_BIT 63 +#define D_SIGN_MASK (1 << (D_SIGN_BIT - REG_BIT)) +#define D_SIZE 8 +#define F_BIT (F_SIZE * CHAR_BIT) +#define F_EXP_BIAS ((1 << (F_EXP_BIT - 1)) - 1) +#define F_EXP_BIT 8 +#define F_EXP_INFINITE ((1 << F_EXP_BIT) - 1) +#define F_EXP_MASK (((1 << F_EXP_BIT) - 1) << F_EXP_SHIFT) +#define F_EXP_SHIFT (REG_BIT - (1 + F_EXP_BIT)) +#define F_FRAC_BIT 24 +#define F_FRAC_MASK (F_NORM_MASK - 1) +#define F_HIGH 0 +#define F_HUGE_HIGH (F_EXP_MASK - 1) +#define F_NORM_BIT (F_FRAC_BIT - 1) +#define F_NORM_MASK (1 << F_NORM_BIT) +#define F_SIGN_BIT 31 +#define F_SIGN_MASK (1 << F_SIGN_BIT) +#define F_SIZE 4 +#define FREE_D_SIGN_BIT_TEST (D_SIGN_BIT % REG_BIT == REG_BIT - 1) +#define GENREG_SIZE 4 +#define INT_BIT 32 +#define INT_MAX 0x7FFFFFFF +#define INT_MIN (-0x7FFFFFFF - 1) +#define PC_SIZE 4 +#define REG_BIT 32 +#define SHORT_BIT 16 +#define UINT_MAX 0xFFFFFFFF diff --git a/bin86-0.3/bccfp/fptoi.x b/bin86-0.3/bccfp/fptoi.x new file mode 100644 index 0000000..30de729 --- /dev/null +++ b/bin86-0.3/bccfp/fptoi.x @@ -0,0 +1,117 @@ +! bcc 386 floating point routines (version 2) +! -- dtoi, dtol, dtoui, dtoul, ftoi, ftol (todo: ftoui, ftoul) +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + + .extern fpoverflow + .extern Fpushf + +! Convert double x at [ebx] to int and return in eax + + .globl dtoi + .globl dtol + .align ALIGNMENT +dtoi: +dtol: + mov eax,D_HIGH[ebx] + mov ecx,eax + and ecx,#D_EXP_MASK ! extract exponent + jz retz ! if 0 return 0 + test eax,#D_SIGN_MASK + jnz negative + call into_dtoui + cmp eax,#INT_MAX + ja overflow_int_max + ret + + .align ALIGNMENT +negative: + and eax,#~D_SIGN_MASK + call into_dtoui + cmp eax,#INT_MIN + ja overflow_int_min + neg eax + ret + + .align ALIGNMENT +overflow_int_max: + call fpoverflow + mov eax,#INT_MAX + ret + + .align ALIGNMENT +overflow_int_min: + js return ! actually INT_MIN is OK + call fpoverflow + mov eax,#INT_MIN +return: + ret + + .align ALIGNMENT +retz: + sub eax,eax ! clear return value + ret + +! Convert double x at [ebx] to unsigned and return in eax + + .globl dtoui + .globl dtoul + .align ALIGNMENT +dtoui: +dtoul: + mov eax,D_HIGH[ebx] + mov ecx,eax + and ecx,#D_EXP_MASK ! extract exponent + jz retz ! if 0 return 0 + test eax,#D_SIGN_MASK + jnz overflow_0 +into_dtoui: + mov edx,D_LOW[ebx] + + and eax,#D_FRAC_MASK ! extract fraction + or eax,#D_NORM_MASK ! restore normalization bit + + shr ecx,#D_EXP_SHIFT ! convert exponent to number + sub ecx,#D_EXP_BIAS+D_NORM_BIT ! adjust radix point + jl dtoui_rightshift ! should we shift left or right? + cmp ecx,#D_BIT-D_FRAC_BIT ! can shift left by at most this + ja overflow_uint_max ! if more, overflow + shld eax,edx,cl + ret + + .align ALIGNMENT +dtoui_rightshift: + neg ecx ! make shift count > 0 + cmp ecx,#REG_BIT ! big shifts would be taken mod REG_BIT ... + jae retz ! ... no good + shr eax,cl ! otherwise it is faster to do the shift ... + ret ! ... then to jump for the slightly smaller + ! ... shift counts that shift out all bits + + .align ALIGNMENT +overflow_0: + call fpoverflow + sub eax,eax + ret + + .align ALIGNMENT +overflow_uint_max: + call fpoverflow + mov eax,#UINT_MAX + ret + +! ftoi is like dtoi except ebx points to a float instead of a double. +! This is a quickly-written slowish version that does not take advantage +! of the float being smaller. + + .globl ftoi + .globl ftol + .align ALIGNMENT +ftoi: +ftol: + call Fpushf + mov ebx,esp + call dtoi + add esp,#D_SIZE + ret diff --git a/bin86-0.3/bccfp/fpulld.x b/bin86-0.3/bccfp/fpulld.x new file mode 100644 index 0000000..928a846 --- /dev/null +++ b/bin86-0.3/bccfp/fpulld.x @@ -0,0 +1,20 @@ +! bcc 386 floating point routines (version 2) -- Fpulld +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + +! Pop double from stack and store at address [ebx] + + .globl Fpulld + .align ALIGNMENT +Fpulld: + pop ecx + pop dword D_LOW[ebx] + pop dword D_HIGH[ebx] + jmp ecx ! return + +! This popping method is much slower on 486's because popping to memory +! takes 5+ while moving twice takes 2 and the return address doesn't +! have to be moved. However, popping is a little faster on a non-cached +! 386/20 with static column RAM although the memory access pattern is +! better for a double-width move than for popping. What about a cached 386? diff --git a/bin86-0.3/bccfp/fpullf.x b/bin86-0.3/bccfp/fpullf.x new file mode 100644 index 0000000..417ef92 --- /dev/null +++ b/bin86-0.3/bccfp/fpullf.x @@ -0,0 +1,101 @@ +! bcc 386 floating point routines (version 2) -- Fpullf +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + + .extern fpoverflow + .extern fpunderflow + +! pop double from stack, convert to float and store at address [ebx] + + .globl Fpullf + .align ALIGNMENT +Fpullf: + +! Step 1: load and shift left + + mov eax,PC_SIZE+D_LOW[esp] ! lower dword + mov edx,PC_SIZE+D_HIGH[esp] ! upper dword + mov ecx,edx ! copy upper dword into ecx ... + and ecx,#D_SIGN_MASK ! ... and extract sign + and edx,#D_EXP_MASK | D_FRAC_MASK ! extract exponent and fraction + sub edx,#(D_EXP_BIAS-F_EXP_BIAS) << D_EXP_SHIFT ! adjust exponent bias + jz underflow + cmp edx,#F_EXP_INFINITE << D_EXP_SHIFT ! check if exponent lies in reduced range + jae outofbounds + shld edx,eax,#D_EXP_BIT-F_EXP_BIT ! shift exponent and fraction + +! Step 2: round + + test eax,#1 << (REG_BIT-1-(D_EXP_BIT-F_EXP_BIT)) ! test upper rounding bit + jz step3 ! below middle, don't round up + test eax,#(1 << (REG_BIT-1-(D_EXP_BIT-F_EXP_BIT)))-1 ! test other rounding bits + jnz roundup ! above middle, round up + test dl,#1 ! in middle, check parity bit + jz step3 ! already even, otherwise round up to make even + +roundup: + inc edx ! carry 1 + test edx,#F_FRAC_MASK ! is fraction now 0? (carry into F_EXPMASK) + jnz step3 ! no -- carry complete + cmp edx,#(F_EXP_INFINITE << F_EXP_SHIFT) & ~F_NORM_MASK ! yes (very unlikely): check for overflow + ! XXX - I think these tests say 0x7e7fffff overflows + jae overflow + +! Step 3: put it all together + +step3: + or edx,ecx ! include sign + mov F_HIGH[ebx],edx ! store the result in [ebx] + ret #D_SIZE ! return and release double from stack + + .align ALIGNMENT +outofbounds: + jns overflow ! have just compared exponent with the max +underflow: +! call fpunderflow ! XXX + push ecx ! save sign + mov ecx,edx + and ecx,#~D_FRAC_MASK ! assume fraction is below exp + cmp ecx,#-((D_EXP_BIAS-F_EXP_BIAS) << D_EXP_SHIFT) ! was exp = 0? + jz exp_x_0 + shr ecx,#D_EXP_SHIFT + neg ecx + and edx,#D_FRAC_MASK + or edx,#D_NORM_MASK + shld edx,eax,#D_EXP_BIT-F_EXP_BIT-1 + shl eax,#D_EXP_BIT-F_EXP_BIT-1 + push ebx ! save to use for rounding + sub ebx,ebx + shrd ebx,eax,cl + shrd eax,edx,cl + shr edx,cl + cmp eax,#1 << (REG_BIT-1) + jb over_denorm_roundup + ja denorm_roundup + test dl,#1 + jz over_denorm_roundup +denorm_roundup: +#if F_NORM_BIT != F_EXP_SHIFT +#include "carry into norm bit doesn't go into low exp bit" +#endif + inc edx +over_denorm_roundup: + pop ebx + pop ecx + or edx,ecx + mov F_HIGH[ebx],edx + ret #D_SIZE + + .align ALIGNMENT +exp_x_0: ! XXX check for denormals - they underflow + pop ecx + mov dword F_HIGH[ebx],#0 + ret #D_SIZE + + .align ALIGNMENT +overflow: + mov edx,ebx ! put sign in usual reg + call fpoverflow + mov F_HIGH[ebx],dword #F_HUGE_HIGH ! XXX - should use infinity + ret #D_SIZE ! ... if fpoverflow does diff --git a/bin86-0.3/bccfp/fpushd.x b/bin86-0.3/bccfp/fpushd.x new file mode 100644 index 0000000..68caab0 --- /dev/null +++ b/bin86-0.3/bccfp/fpushd.x @@ -0,0 +1,60 @@ +! bcc 386 floating point routines (version 2) -- dtof, Fpushd, Fneg, Fnegd +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + + .extern Fpullf + +! dtof converts the double at [ebx] to a float and pushes the float onto +! the stack (D_SIZE bytes are allocated for the float although only the bottom +! F_SIZE are used). +! This is a quickly-written slowish version. + + .globl dtof + .align ALIGNMENT +dtof: + pop eax + sub esp,#D_SIZE ! build result here + push eax ! put back return address + call Fpushd + lea ebx,D_SIZE+PC_SIZE[esp] + call Fpullf + ret + +! Push double at address [ebx] onto stack + + .globl Fpushd + .align ALIGNMENT +Fpushd: + pop ecx + push dword D_HIGH[ebx] + push dword D_LOW[ebx] + jmp ecx ! return + +! Push double at address [ebx] onto stack, negating it on the way. + +! Don't worry about generating -0 because other routines have to allow for +! it anyway. + +! Perhaps this and Fneg should check for denormals and illegal operands +! (I think only signalling NaNs are illegal). +! fchs doesn't check, but fld does. +! Our Fpushd is not quite like fld because no conversions are involved. + + .globl Fnegd + .align ALIGNMENT +Fnegd: + pop ecx + mov eax,D_HIGH[ebx] + xor eax,#D_SIGN_MASK ! toggle sign + push eax + push dword D_LOW[ebx] + jmp ecx ! return + +! Negate double on stack + + .globl Fneg + .align ALIGNMENT +Fneg: + xorb PC_SIZE+D_SIZE-1[esp],D_SIGN_MASK >> (REG_BIT-CHAR_BIT) ! toggle sign + ret diff --git a/bin86-0.3/bccfp/fpushf.x b/bin86-0.3/bccfp/fpushf.x new file mode 100644 index 0000000..7cb2f8d --- /dev/null +++ b/bin86-0.3/bccfp/fpushf.x @@ -0,0 +1,74 @@ +! bcc 386 floating point routines (version 2) -- Fpushf, Fnegf +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + + .extern fpdenormal + +! Load float at [ebx], convert to double and push on stack + + .globl Fpushf + .align ALIGNMENT +Fpushf: + mov edx,F_HIGH[ebx] +into_Fpushf: + test edx,#F_EXP_MASK ! is exponent 0? + jz exp_x_0 + + mov ecx,edx ! extract sign + and ecx,#F_SIGN_MASK + + and edx,#F_EXP_MASK | F_FRAC_MASK ! extract exponent and fraction + sub eax,eax ! clear lower dword + shrd eax,edx,#D_EXP_BIT-F_EXP_BIT ! shift exponent and fraction to new position + shr edx,#D_EXP_BIT-F_EXP_BIT + + add edx,#(D_EXP_BIAS-F_EXP_BIAS) << D_EXP_SHIFT ! adjust exponent bias + or edx,ecx ! include sign + + pop ecx + push edx ! upper dword + push eax ! lower dword + jmp ecx ! return + + .align ALIGNMENT +exp_x_0: + mov eax,edx + and eax,#F_FRAC_MASK + jnz x_denorm + pop ecx + push eax ! upper dword = 0 + push eax ! lower dword = 0 + jmp ecx ! return + + .align ALIGNMENT +x_denorm: + call fpdenormal + bsr ecx,eax ! zzzz + neg ecx + add ecx,#F_NORM_BIT + shl eax,cl + and eax,#F_FRAC_MASK + neg ecx + add ecx,#D_EXP_BIAS-F_EXP_BIAS+1 + shl ecx,#D_EXP_SHIFT + and edx,#F_SIGN_MASK ! assumed same as D_SIGN_MASK + or edx,ecx + sub ecx,ecx + shrd ecx,eax,#D_EXP_BIT-F_EXP_BIT + shr eax,#D_EXP_BIT-F_EXP_BIT + or edx,eax + + pop eax + push edx ! upper dword + push ecx ! lower dword + jmp eax ! return + +! Fnegf: as Fpushf, but negate double before pushing onto stack + + .globl Fnegf + .align ALIGNMENT +Fnegf: + mov edx,F_HIGH[ebx] + xor edx,#F_SIGN_MASK ! toggle sign + jmp into_Fpushf ! join Fpushf diff --git a/bin86-0.3/bccfp/fpushi.x b/bin86-0.3/bccfp/fpushi.x new file mode 100644 index 0000000..b19aae2 --- /dev/null +++ b/bin86-0.3/bccfp/fpushi.x @@ -0,0 +1,126 @@ +! bcc 386 floating point routines (version 2) +! -- Fpushi, Fpushl, Fpushs, Fpushc, Fpushuc, Fpushui, Fpushul, Fpushus +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + +! Convert the short in ax to double and push on stack + + .globl Fpushs + .align ALIGNMENT +Fpushs: + cwde + add eax,#0 ! fast 3-byte instruction to align + +! Convert the int or long in eax to double and push on stack + + .globl Fpushi + .globl Fpushl +! .align ALIGNMENT ! don't do this until it pads with nop's +Fpushi: +Fpushl: + test eax,eax + jz return_eax ! got 0 in eax + mov ebx,#(D_EXP_BIAS+D_NORM_BIT) << D_EXP_SHIFT ! set no-sign and exponent + jns normalize ! sign and fraction bits already set up + mov ebx,#D_SIGN_MASK | ((D_EXP_BIAS+D_NORM_BIT) << D_EXP_SHIFT) ! adjust sign + neg eax ! adjust fraction + jmp normalize + + .align ALIGNMENT +ret1: + mov eax,#D_EXP_BIAS << D_EXP_SHIFT + add eax,#0 ! fast 3-byte instruction to align + +! .align ALIGNMENT ! don't do this until it pads with nop's +return_eax: + pop ecx + push eax ! upper dword + push dword #0 ! lower dword = 0 + jmp ecx ! return + +! Convert the (unsigned) char in al to double and push on stack + + .globl Fpushc + .globl Fpushuc + .align ALIGNMENT +Fpushc: +Fpushuc: + and eax,#(1 << CHAR_BIT)-1 + add eax,#0 ! fast 3-byte instruction to align + +! Convert the unsigned short in ax to double and push on stack + + .globl Fpushus +! .align ALIGNMENT ! don't do this until it pads with nop's +Fpushus: + and eax,#(1 << SHORT_BIT)-1 + add eax,#0 ! fast 3-byte instruction to align + +! Convert the unsigned int or long in eax to double and push on stack + + .globl Fpushui + .globl Fpushul +! .align ALIGNMENT ! don't do this until it pads with nop's +Fpushui: +Fpushul: + cmp eax,#1 ! this tests for both 0 and 1 + jb return_eax ! got 0 in eax + jz ret1 + mov ebx,#(D_EXP_BIAS+D_NORM_BIT) << D_EXP_SHIFT ! set no-sign and exponent + +! .align ALIGNMENT ! don't do this until it pads with nop's +normalize: + sub edx,edx ! clear lower dword of result + +! Find first nonzero bit +! Don't use bsr, it is slow (const + 3n on 386, const + n on 486) + + sub ecx,ecx ! prepare unsigned extension of cl + test eax,#~D_FRAC_MASK + jnz large + test eax,#0xFF << (D_NORM_BIT-8) + jnz middle + shl eax,#8 + sub ebx,#8 << D_EXP_SHIFT + test eax,#0xFF << (D_NORM_BIT-8) + jnz middle + shl eax,#8 + sub ebx,#8 << D_EXP_SHIFT +middle: + shld ecx,eax,#D_NORM_BIT + mov cl,bsr_table[ecx] + add ecx,#REG_BIT-D_NORM_BIT-D_NORM_BIT + neg ecx + shl eax,cl + shl ecx,#D_EXP_SHIFT + sub ebx,ecx +return: + and eax,#D_FRAC_MASK ! remove normalization bit + or eax,ebx ! include exponent (and sign) to fraction + pop ecx + push eax ! upper dword + push edx ! lower dword + jmp ecx ! return + + .align ALIGNMENT +large: + shld ecx,eax,#REG_BIT-(D_NORM_BIT+8) + jnz huge + shld ecx,eax,#REG_BIT-D_NORM_BIT + mov cl,bsr_table[ecx] +got_shift_right: + shrd edx,eax,cl + shr eax,cl + shl ecx,#D_EXP_SHIFT + add ebx,ecx + jmp return + + .align ALIGNMENT +huge: + mov cl,bsr_table[ecx] + add cl,#8 + jmp got_shift_right + + .data + .extern bsr_table diff --git a/bin86-0.3/bccfp/frexp.x b/bin86-0.3/bccfp/frexp.x new file mode 100644 index 0000000..318fc34 --- /dev/null +++ b/bin86-0.3/bccfp/frexp.x @@ -0,0 +1,66 @@ +! bcc 386 floating point routines (version 2) -- _frexp +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + + .extern fpdenormal + +! void frexp(double value, int *exponent); +! splits a double into exponent and fraction (where 0.5 <= fraction < 1.0) + + .globl _frexp + .align ALIGNMENT +_frexp: +push ebx +#undef PC_SIZE +#define PC_SIZE 8 + mov eax,PC_SIZE+D_LOW[esp] ! lower dword of x + mov ebx,PC_SIZE+D_HIGH[esp] ! upper dword of x + mov edx,PC_SIZE+D_SIZE[esp] ! exponent pointer + mov ecx,ebx ! extract exponent here + and ecx,#D_EXP_MASK + jz exp_x_0 + + shr ecx,#D_EXP_SHIFT ! exponent + bias +got_x: + sub ecx,#D_EXP_BIAS-1 ! D_EXP_BIAS is for 1.x form, we want 0.1x form + mov [edx],ecx ! return exponent + and ebx,#D_SIGN_MASK | D_FRAC_MASK ! extract sign and fraction + or ebx,#(D_EXP_BIAS-1) << D_EXP_SHIFT ! set new exponent for 0.1x +mov edx,ebx +pop ebx + ret + + .align ALIGNMENT +exp_x_0: + test ebx,#D_FRAC_MASK + jnz xu_denorm + test eax,eax + jnz xl_denorm + mov [edx],ecx ! return zero exponent + mov ebx,ecx ! guard against -0 (may not be necessary) +mov edx,ebx +pop ebx + ret + + .align ALIGNMENT +xl_denorm: + call fpdenormal + bsr ecx,eax ! zzzz + neg ecx + add ecx,#REG_BIT-1 + shl eax,cl + shld ebx,eax,#D_NORM_BIT+1 + shl eax,#D_NORM_BIT+1 + sub ecx,#D_NORM_BIT+1 + jmp got_x + + .align ALIGNMENT +xu_denorm: + call fpdenormal + bsr ecx,ebx + neg ecx + add ecx,#D_NORM_BIT + shld ebx,eax,cl + shl eax,cl + jmp got_x diff --git a/bin86-0.3/bccfp/ftst.x b/bin86-0.3/bccfp/ftst.x new file mode 100644 index 0000000..2a92ef1 --- /dev/null +++ b/bin86-0.3/bccfp/ftst.x @@ -0,0 +1,28 @@ +! bcc 386 floating point routines (version 2) -- Ftst, Ftstd, Ftstf +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + +#if 0 /* bcc doesn't generate Ftst (but it might in future) */ + .globl Ftst +#endif + .align ALIGNMENT +Ftst: + cmp dword PC_SIZE+D_HIGH[esp],#0 ! need only test upper dword of x + ret #D_SIZE + +! Compare double at address [ebx] with 0 + + .globl Ftstd + .align ALIGNMENT +Ftstd: + cmp dword D_HIGH[ebx],#0 ! need only test upper dword of x + ret + +! Compare float at address [ebx] with 0 + + .globl Ftstf + .align ALIGNMENT +Ftstf: + cmp dword F_HIGH[ebx],#0 + ret diff --git a/bin86-0.3/bccfp/ldexp.x b/bin86-0.3/bccfp/ldexp.x new file mode 100644 index 0000000..bc9dd03 --- /dev/null +++ b/bin86-0.3/bccfp/ldexp.x @@ -0,0 +1,74 @@ +! bcc 386 floating point routines (version 2) -- _ldexp +! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans + +#include "fplib.h" + + .extern fpoverflow + .extern fpunderflow + +! void ldexp(double value, int exponent); +! returns value * (2 ** exponent) + + .globl _ldexp + .align ALIGNMENT +_ldexp: +push ebx +#undef PC_SIZE +#define PC_SIZE 8 + mov ebx,PC_SIZE+D_HIGH[esp] ! upper dword of x + mov ecx,PC_SIZE+D_SIZE[esp] ! exponent arg + mov eax,ebx ! extract exponent (of x) here + and eax,#D_EXP_MASK +! jz exp_y_0 ! may need check for preposterous exponent arg too + + shr eax,#D_EXP_SHIFT ! shift to low bits just for testing + jz underflow ! denormal? + add eax,ecx ! test-add the exponents + jz underflow ! XXX probably need to fiddle norm bit + cmp eax,#D_EXP_INFINITE ! check if still within range + jae outofbounds ! the unsigned compare catches all overflow cases + ! because the exponent of x is non-negative + + shl ecx,#D_EXP_SHIFT ! shift exponent arg bits into final position ... + add ebx,ecx ! ... safe to add it to exponent of x now + mov eax,PC_SIZE+D_LOW[esp] ! lower dword of x +mov edx,ebx +pop ebx + ret + + + .align ALIGNMENT +outofbounds: + test ecx,ecx ! overflow or underflow? + jns overflow +underflow: + mov edx,ebx ! put sign in usual reg + push edi + push esi + mov edi,eax ! put exponent in usual reg + mov eax,2*GENREG_SIZE+PC_SIZE+D_LOW[esp] + ! put lower dword of x in usual reg + mov esi,ebx ! put upper dword of x in usual reg + and esi,#D_EXP_MASK | D_FRAC_MASK + test esi,#D_EXP_MASK + jz foo + and esi,#D_FRAC_MASK + or esi,#D_NORM_MASK +foo: + neg edi +! inc edi ! XXX ? + call fpunderflow + pop esi + pop edi + mov ebx,edx ! XXX = wrong reg +pop ebx + ret + + .align ALIGNMENT +overflow: + mov edx,ebx ! put sign in usual reg + call fpoverflow + mov eax,ecx ! XXX = wrong reg + mov ebx,edx ! XXX = wrong reg +pop ebx + ret diff --git a/bin86-0.3/bccfp/modf.c b/bin86-0.3/bccfp/modf.c new file mode 100644 index 0000000..a83f801 --- /dev/null +++ b/bin86-0.3/bccfp/modf.c @@ -0,0 +1,20 @@ +/* + * bin86/bccfp/modf.c + * + * Copyright (C) 1992 Bruce Evans + */ + +#include <math.h> + +/* Slooow version. */ + +double modf(x, pint) +double x; +double *pint; +{ + if (x >= 0) + *pint = floor(x); + else + *pint = ceil(x); + return x - *pint; +} diff --git a/bin86-0.3/bccfp/test.c b/bin86-0.3/bccfp/test.c new file mode 100644 index 0000000..05b5d84 --- /dev/null +++ b/bin86-0.3/bccfp/test.c @@ -0,0 +1,124 @@ +/* + * bin86/bccfp/test.c + * + * Copyright (C) 1992 Bruce Evans + */ + +#include <sys/times.h> +#include <limits.h> +#include <stdio.h> +#include <time.h> + +#define CONVTYPE int +#define MAX (MIN + NITER - 1) +#define MIN INT_MIN + +#define NITER 100000 + +double one = 1; +double two = 2; +double big = 1e99; + +double d; +double d1; +float f; + +int main() +{ + CONVTYPE cti; + CONVTYPE cto; + clock_t delta; + struct tms finish; + int i; + struct tms start; + +#if 0 + times(&start); + for (cti = MIN; cti <= MAX; ++cti) + { + d = cti; + cto = d; + if (cti != cto) + printf("%08x %08x\n", cti, cto); + if (cti % 10000000 == 0) + { + printf("%8x ok ", cti); + fflush(stdout); + } + } + times(&finish); + delta = finish.tms_utime - start.tms_utime; + printf("Time for %d i -> d and d -> i conversions was %g s (%d t)\n", + MAX - MIN + 1, delta / (double) CLOCKS_PER_SEC, delta); +#endif + + times(&start); + for (cti = MIN; cti <= MAX; ++cti) + d = cti; + times(&finish); + delta = finish.tms_utime - start.tms_utime; + printf("Time for %d i -> d conversions was %g s (%d t)\n", + MAX - MIN + 1, delta / (double) CLOCKS_PER_SEC, delta); + + times(&start); + for (cti = MIN; cti <= MAX; ++cti) + { + d = cti; + cto = d; + } + times(&finish); + delta = finish.tms_utime - start.tms_utime - delta; + printf("Time for %d d -> i conversions was %g s (%d t)\n", + MAX - MIN + 1, delta / (double) CLOCKS_PER_SEC, delta); + + d = 0; + times(&start); + for (i = 0; i < NITER; ++i) + d = d + 1; + times(&finish); + delta = finish.tms_utime - start.tms_utime; + printf("Time for adding %d 1.0's to 0.0 was %g s (%d t), result = %g\n", + NITER, delta / (double) CLOCKS_PER_SEC, delta, d); + + d = 0; + times(&start); + for (; d < NITER;) + d = d + 1; + times(&finish); + delta = finish.tms_utime - start.tms_utime; + printf("Time for adding %d 1.0's to 0.0 (d index) was %g s (%d t), result = %g\n", + NITER, delta / (double) CLOCKS_PER_SEC, delta, d); + + times(&start); + for (i = 1; i <= NITER; ++i) + { + d1 = i; + d = d1 * d1; + } + times(&finish); + delta = finish.tms_utime - start.tms_utime; + printf("Time for %d mults was %g s (%d t), result = %g\n", + NITER, delta / (double) CLOCKS_PER_SEC, delta, d); + + times(&start); + for (i = 1; i <= NITER; ++i) + { + d1 = i; + d = 1 / d1; + } + times(&finish); + delta = finish.tms_utime - start.tms_utime; + printf("Time for %d divs was %g s (%d t), result = %g\n", + NITER, delta / (double) CLOCKS_PER_SEC, delta, d); + + f = 0; + times(&start); + for (i = 0; i < NITER; ++i) + f = f + 1; + times(&finish); + delta = finish.tms_utime - start.tms_utime; + printf("Time for adding %d 1.0f's to 0.0f was %g s (%d t), result = %g\n", + NITER, delta / (double) CLOCKS_PER_SEC, delta, f); + + return 0; +} |