summaryrefslogtreecommitdiff
path: root/bin86-0.3/bccfp
diff options
context:
space:
mode:
Diffstat (limited to 'bin86-0.3/bccfp')
-rw-r--r--bin86-0.3/bccfp/Makefile44
-rw-r--r--bin86-0.3/bccfp/bccfp.tex0
-rw-r--r--bin86-0.3/bccfp/changes30
-rw-r--r--bin86-0.3/bccfp/fabs.x17
-rw-r--r--bin86-0.3/bccfp/fadd.x485
-rw-r--r--bin86-0.3/bccfp/fcomp.x89
-rw-r--r--bin86-0.3/bccfp/fdiv.x312
-rw-r--r--bin86-0.3/bccfp/fmul.x150
-rw-r--r--bin86-0.3/bccfp/fpbsr.x25
-rw-r--r--bin86-0.3/bccfp/fperr.c50
-rw-r--r--bin86-0.3/bccfp/fperr.h14
-rw-r--r--bin86-0.3/bccfp/fperror.x126
-rw-r--r--bin86-0.3/bccfp/fplib.h49
-rw-r--r--bin86-0.3/bccfp/fptoi.x117
-rw-r--r--bin86-0.3/bccfp/fpulld.x20
-rw-r--r--bin86-0.3/bccfp/fpullf.x101
-rw-r--r--bin86-0.3/bccfp/fpushd.x60
-rw-r--r--bin86-0.3/bccfp/fpushf.x74
-rw-r--r--bin86-0.3/bccfp/fpushi.x126
-rw-r--r--bin86-0.3/bccfp/frexp.x66
-rw-r--r--bin86-0.3/bccfp/ftst.x28
-rw-r--r--bin86-0.3/bccfp/ldexp.x74
-rw-r--r--bin86-0.3/bccfp/modf.c20
-rw-r--r--bin86-0.3/bccfp/test.c124
24 files changed, 2201 insertions, 0 deletions
diff --git a/bin86-0.3/bccfp/Makefile b/bin86-0.3/bccfp/Makefile
new file mode 100644
index 0000000..6caa362
--- /dev/null
+++ b/bin86-0.3/bccfp/Makefile
@@ -0,0 +1,44 @@
+# Makefile for bcc 386 software floating point library
+
+.SUFFIXES: .x # .x files are .s files that need C-preprocessing
+.x.o:
+ $(CPP) $(CPPFLAGS) $< >tmp
+ $(AS) tmp -n $* -o $@
+
+AS =as -3
+CFLAGS =-O
+CPP =/lib/cpp
+CPPFLAGS =-P
+FPDIST =Makefile $(FPSRC) test.c bccfp.tex
+FPSRC =fadd.x fcomp.x fdiv.x fmul.x fbsr.x \
+ fperr.c fperror.x fptoi.x fpushd.x fpulld.x \
+ fpushi.x fpushf.x fpullf.x frexp.x ftst.x \
+ gcclib.x \
+ fabs.x ldexp.x modf.c \
+ fperr.h fplib.h
+FPOBJ =fadd.o fcomp.o fdiv.o fmul.o fpbsr.o \
+ fperr.o fperror.o fptoi.o fpushd.o fpulld.o \
+ fpushi.o fpushf.o fpullf.o frexp.o ftst.o \
+ fabs.o ldexp.o modf.o
+JUNK =tmp
+LIB =.
+
+test: test.c $(LIB)/libfp.a
+ $(CC) -o $@ test.c $(LIB)/libfp.a -lm
+
+$(FPOBJ): fplib.h
+fperr.c fperror.x: fperr.h
+
+$(LIB)/libfp.a: $(FPOBJ)
+ ar rc $(LIB)/libfp.a $(FPOBJ)
+ rm -f $(JUNK)
+
+dist: $(FPDIST)
+ /bin/tar cvf - $(FPDIST) | /bin/compress -b 13 >bccfp.tar.Z
+ uue bccfp.tar.Z
+
+clean:
+ rm -f $(FPOBJ) $(JUNK) test
+
+realclean: clean
+ rm -f $(LIB)/libfp.a bccfp.tar.Z bccfp.uue
diff --git a/bin86-0.3/bccfp/bccfp.tex b/bin86-0.3/bccfp/bccfp.tex
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/bin86-0.3/bccfp/bccfp.tex
diff --git a/bin86-0.3/bccfp/changes b/bin86-0.3/bccfp/changes
new file mode 100644
index 0000000..2cc632a
--- /dev/null
+++ b/bin86-0.3/bccfp/changes
@@ -0,0 +1,30 @@
+fcomp:
+Fixes for negative 0 (perhaps this shouldn't be generated, like denormals
+and infinities (these would cause even more trouble) but Fsub routine or
+something generated one).
+
+frexp.x:
+Deleted 3rd arg (used to return value when bcc wasn't doing it right).
+
+Fixed frexp(value = 0) and ldexp(value = 0) returning nonzero.
+
+Most files:
+Changed comment symbol to '!' for new assembler (not the native ';' in
+case this is ported to ACK someday).
+
+Avoided using ebp and unnecessary register saves.
+
+Changed assembler style to make it a bit more portable or like I do it
+(no '$' for hex, 8[esp] instead of [esp+8], use only .define and not export
+or .globl, use '#' (could use nothing) instead of '*' for immediate).
+The partly-supported 8(ebp) and .globl would be still more portable.
+
+Changed terminology 'mantissa' to 'fraction'.
+
+Round to even. Required for 'paranioa' not to find any defects.
+
+Used preprocessor.
+
+Parametrized most of the magic numbers. Phew!
+
+Supported denormals. Now 'paranioa' doesn't find any flaws.
diff --git a/bin86-0.3/bccfp/fabs.x b/bin86-0.3/bccfp/fabs.x
new file mode 100644
index 0000000..fe81676
--- /dev/null
+++ b/bin86-0.3/bccfp/fabs.x
@@ -0,0 +1,17 @@
+! bcc 386 floating point routines (version 2) -- _fabs
+! author: Bruce Evans
+
+#include "fplib.h"
+
+! double fabs(double value);
+! returns the absolute value of a number
+! this works for all NaNs, like the 80*87 fabs, but perhaps we should check
+! for exceptions that can happen when an 80*87 register is loaded
+
+ .globl _fabs
+ .align ALIGNMENT
+_fabs:
+ mov eax,PC_SIZE+D_LOW[esp]
+ mov edx,PC_SIZE+D_HIGH[esp]
+ and edx,~D_SIGN_MASK
+ ret
diff --git a/bin86-0.3/bccfp/fadd.x b/bin86-0.3/bccfp/fadd.x
new file mode 100644
index 0000000..d1e60b1
--- /dev/null
+++ b/bin86-0.3/bccfp/fadd.x
@@ -0,0 +1,485 @@
+! bcc 386 floating point routines (version 2)
+! -- Fadd, Faddd, Faddf, Fsub, Fsubd, Fsubf, normalize2
+! author: Bruce Evans
+
+#include "fplib.h"
+
+#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE)
+
+ .extern Fpushf
+ .extern fpdenormal
+ .extern fpoverflow
+ .extern fpunderflow
+
+ .globl Fadd
+ .align ALIGNMENT
+Fadd:
+ push ebp
+ push edi
+ push esi
+ mov eax,FRAME_SIZE+D_LOW[esp]
+ mov edx,FRAME_SIZE+D_HIGH[esp]
+ mov ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
+ mov ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
+ call addition
+ mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret #D_SIZE
+
+ .globl Faddd
+ .align ALIGNMENT
+Faddd:
+ push ebp
+ push edi
+ push esi
+ mov eax,FRAME_SIZE+D_LOW[esp]
+ mov edx,FRAME_SIZE+D_HIGH[esp]
+ mov ecx,D_HIGH[ebx]
+ mov ebx,D_LOW[ebx]
+ call addition
+ mov FRAME_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ .globl Faddf
+ .align ALIGNMENT
+Faddf:
+ push ebp
+ push edi
+ push esi
+ call Fpushf
+ pop ebx ! yl
+ pop ecx ! yu
+ mov eax,FRAME_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_HIGH[esp] ! xu
+ call addition
+ mov FRAME_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ .globl Fsub
+ .align ALIGNMENT
+Fsub:
+ push ebp
+ push edi
+ push esi
+ mov eax,FRAME_SIZE+D_LOW[esp]
+ mov edx,FRAME_SIZE+D_HIGH[esp]
+ mov ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
+ mov ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
+ xor ecx,#D_SIGN_MASK ! complement sign
+ call addition
+ mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret #D_SIZE
+
+ .globl Fsubd
+ .align ALIGNMENT
+Fsubd:
+ push ebp
+ push edi
+ push esi
+ mov eax,FRAME_SIZE+D_LOW[esp]
+ mov edx,FRAME_SIZE+D_HIGH[esp]
+ mov ecx,D_HIGH[ebx]
+ mov ebx,D_LOW[ebx]
+ xor ecx,#D_SIGN_MASK ! complement sign
+ call addition
+ mov FRAME_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ .globl Fsubf
+ .align ALIGNMENT
+Fsubf:
+ push ebp
+ push edi
+ push esi
+ call Fpushf
+ pop ebx ! yl
+ pop ecx ! yu
+ mov eax,FRAME_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_HIGH[esp] ! xu
+ xor ecx,#D_SIGN_MASK ! complement sign
+ call addition
+ mov FRAME_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ .align ALIGNMENT
+exp_y_0:
+
+! Check for x denormal, to split off special case where both are denormal,
+! so the norm bit (or 1 higher) is known to be set for addition, so addition
+! can be done faster
+
+ test esi,#D_EXP_MASK
+ jnz x_normal_exp_y_0
+ test esi,esi ! test top bits of x fraction
+ jnz both_denorm ! denormal iff nonzero fraction with zero exp
+ test eax,eax ! test rest of fraction
+ jz return_edx_eax ! everything 0 (XXX - do signs matter?)
+both_denorm:
+ call fpdenormal
+ test ebp,#D_SIGN_MASK
+ jnz denorm_subtract
+
+! Add denormal x to denormal or zero y
+
+#if D_NORM_BIT != D_EXP_SHIFT
+#include "error, carry into norm bit does not go into exponent"
+#endif
+
+ add eax,ebx
+ adc esi,edi
+ or edx,esi
+ ret
+
+denorm_subtract:
+ sub eax,ebx
+ sbb esi,edi
+ or edx,esi
+ ret
+
+ .align ALIGNMENT
+x_normal_exp_y_0:
+ test edi,edi ! this is like the check for x denormal
+ jnz y_denorm
+ test ebx,ebx
+ jz return_edx_eax ! y = 0
+y_denorm:
+ call fpdenormal
+ or ecx,#1 << D_EXP_SHIFT ! normalize y by setting exponent to 1
+ jmp got_y
+
+ .align ALIGNMENT
+return_edx_eax:
+ ret
+
+ .align ALIGNMENT
+add_bigshift:
+ cmp ecx,#D_FRAC_BIT+2
+ jae return_edx_eax ! x dominates y
+ sub ecx,#REG_BIT
+ shrd ebp,ebx,cl
+ shrd ebx,edi,cl
+ shr edi,cl
+ add eax,edi
+ adc esi,#0
+ xchg ebp,ebx
+ br normalize
+
+ .align ALIGNMENT
+addition:
+ mov esi,edx ! this mainly for consistent naming
+ and esi,#D_EXP_MASK | D_FRAC_MASK ! discard sign so comparison is simple
+ mov edi,ecx ! free cl for shifts
+ and edi,#D_EXP_MASK | D_FRAC_MASK
+ cmp esi,edi
+ ja xbigger
+ jb swap
+ cmp eax,ebx
+ jae xbigger
+swap:
+ xchg edx,ecx
+ xchg eax,ebx
+ xchg esi,edi
+xbigger:
+
+! edx holds sign of result from here on
+! and exponent of result before the normalization step
+
+ mov ebp,edx ! prepare difference of signs
+ xor ebp,ecx
+
+ and ecx,#D_EXP_MASK ! extract exp_y and check for y 0 or denormal
+ beq exp_y_0 ! otherwise x is not 0 or denormal either
+ and edi,#D_FRAC_MASK ! extract fraction
+ or edi,#D_NORM_MASK ! normalize
+got_y:
+ and esi,#D_FRAC_MASK ! extract fraction
+ or esi,#D_NORM_MASK ! normalize
+
+ sub ecx,edx ! carries from non-exp bits in edx killed later
+ neg ecx
+ and ecx,#D_EXP_MASK
+ shr ecx,#D_EXP_SHIFT ! difference of exponents
+
+got_x_and_y:
+ and ebp,#D_SIGN_MASK ! see if signs are same
+ bne subtract ! else roundoff reg ebp has been cleared
+
+ cmp cl,#REG_BIT
+ bhis add_bigshift
+ shrd ebp,ebx,cl
+ shrd ebx,edi,cl
+ shr edi,cl
+ add eax,ebx
+ adc esi,edi
+
+! result edx(D_SIGN_MASK | D_EXP_MASK bits):esi:eax:ebp but needs normalization
+
+ mov edi,edx
+ and edi,#D_EXP_MASK
+ test esi,#D_NORM_MASK << 1
+ jnz add_loverflow
+
+add_round:
+ cmp ebp,#1 << (REG_BIT-1) ! test roundoff register
+ jb add_done ! no rounding
+ jz tie
+add_roundup:
+ add eax,#1
+ adc esi,#0
+ test esi,#D_NORM_MASK << 1
+ jnz pre_add_loverflow ! rounding may cause overflow!
+add_done:
+ mov ecx,edx ! duplicated code from 'done'
+ and edx,#D_SIGN_MASK
+ or edx,edi
+ and esi,#D_FRAC_MASK
+ or edx,esi
+ ret
+
+ .align ALIGNMENT
+tie:
+ test al,#1 ! tie case, round to even
+ jz add_done ! even, no rounding
+ jmp add_roundup
+
+ .align ALIGNMENT
+pre_add_loverflow:
+ sub ebp,ebp ! clear rounding register
+ ! probably avoiding tests for more rounding
+add_loverflow:
+ shrd ebp,eax,#1
+ jnc over_set_sticky_bit
+ or ebp,#1
+over_set_sticky_bit:
+ shrd eax,esi,#1
+ shr esi,#1
+ add edi,1 << D_EXP_SHIFT
+ cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT
+ jl add_round
+overflow:
+ call fpoverflow
+ mov eax,ecx ! XXX - wrong reg
+ ret
+
+! result edx(D_SIGN_MASK | D_EXP_MASK bits):
+! esi((D_NORM_MASK << 1) | D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx
+! but needs normalization
+
+ .align ALIGNMENT
+normalize:
+ mov edi,edx
+ and edi,#D_EXP_MASK
+ test esi,#D_NORM_MASK << 1
+ bne loverflow
+
+! result edx(D_SIGN_MASK bit):edi(D_EXP_MASK bits):
+! esi(D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx
+! but needs normalization
+
+ .globl normalize2
+normalize2:
+ test esi,#D_NORM_MASK ! already-normalized is very common
+ jz normalize3
+round:
+ cmp ebp,#1 << (REG_BIT-1) ! test roundoff register
+ jb done ! no rounding
+ jz near_tie
+roundup:
+ add eax,#1
+ adc esi,#0
+ test esi,#D_NORM_MASK << 1
+ bne pre_loverflow ! rounding may cause overflow!
+done:
+cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT
+jae overflow
+ and edx,#D_SIGN_MASK ! extract sign of largest and result
+ or edx,edi ! include exponent with sign
+ and esi,#D_FRAC_MASK ! discard norm bit
+ or edx,esi ! include fraction with sign and exponent
+ ret
+
+ .align ALIGNMENT
+near_tie:
+ test ebx,ebx
+ jnz roundup
+ test al,#1 ! tie case, round to even
+ jz done ! even, no rounding
+ jmp roundup
+
+ .align ALIGNMENT
+not_in_8_below:
+ shld ecx,esi,#REG_BIT-D_NORM_BIT+16 ! in 9 to 16 below?
+ jz not_in_16_below ! must be way below (17-20 for usual D_NORM_BIT)
+ mov cl,bsr_table[ecx] ! bsr(esi) - (D_NORM_BIT-16)
+ neg ecx ! (D_NORM_BIT-16) - bsr(esi)
+ add ecx,#16
+ jmp got_shift
+
+ .align ALIGNMENT
+not_in_16_below:
+ mov cl,bsr_table[esi] ! bsr(esi) directly
+ neg ecx ! -bsr(esi)
+ add ecx,#D_NORM_BIT ! D_NORM_BIT - bsr(esi)
+ jmp got_shift
+
+ .align ALIGNMENT
+normalize3:
+ test esi,esi
+ jz shift32
+
+! Find first nonzero bit in esi
+! Don't use bsr, it is very slow (const + 3 * bit_found)
+! We know that there is some nonzero bit, and the norm bit and above are clear
+
+ sub ecx,ecx ! prepare unsigned extension of cl
+ shld ecx,esi,#REG_BIT-D_NORM_BIT+8 ! any bits in 8 below norm bit?
+ jz not_in_8_below
+ mov cl,bsr_table[ecx] ! bsr(esi) - (D_NORM_BIT-8)
+ neg ecx ! (D_NORM_BIT-8) - bsr(esi)
+ add ecx,#8 ! D_NORM_BIT - bsr(esi)
+got_shift:
+ shld esi,eax,cl
+ shld eax,ebp,cl
+ shld ebp,ebx,cl
+ shl ebx,cl
+ shl ecx,D_EXP_SHIFT
+ sub edi,ecx
+ bhi round ! XXX - can rounding change the exponent to > 0?
+ ! not bgt since edi may be 0x80000000
+ neg edi
+ shr edi,#D_EXP_SHIFT
+ inc edi
+ br fpunderflow
+
+ .align ALIGNMENT
+pre_loverflow:
+ sub ebp,ebp ! clear rounding registers
+ sub ebx,ebx ! probably avoiding tests for more rounding
+
+loverflow:
+ shr esi,#1 ! carry bit stayed in the reg
+ rcr eax,#1
+ rcr ebp,#1
+ rcr ebx,#1
+ add edi,1 << D_EXP_SHIFT
+ cmp edi,#D_EXP_INFINITE << D_EXP_SHIFT
+ blt round
+ call fpoverflow
+ mov eax,ecx ! XXX - wrong reg
+ ret
+
+ .align ALIGNMENT
+shift32:
+ test eax,eax
+ jz shift64
+ mov esi,eax
+ mov eax,ebp
+ mov ebp,ebx
+ sub ebx,ebx
+ sub edi,#REG_BIT << D_EXP_SHIFT
+shiftxx:
+ test esi,#~(D_NORM_MASK | D_FRAC_MASK)
+ jz over_adjust ! else too big already
+ shrd ebx,ebp,#D_BIT-D_FRAC_BIT
+ shrd ebp,eax,#D_BIT-D_FRAC_BIT
+ shrd eax,esi,#D_BIT-D_FRAC_BIT
+ shr esi,#D_BIT-D_FRAC_BIT
+ add edi,#(D_BIT-D_FRAC_BIT) << D_EXP_SHIFT
+over_adjust:
+ test edi,edi
+ bgt normalize2
+ neg edi
+ shr edi,#D_EXP_SHIFT
+ inc edi
+ br fpunderflow
+
+ .align ALIGNMENT
+shift64:
+ test ebp,ebp
+ jz shift96
+ mov esi,ebp
+ mov eax,ebx
+ sub ebp,ebp
+ mov ebx,ebp
+ sub edi,#(2*REG_BIT) << D_EXP_SHIFT
+ jmp shiftxx
+
+ .align ALIGNMENT
+shift96:
+ test ebx,ebx ! XXX - this test is probably unnecessary
+ ! since the shift must be small unless we
+ ! are subtracting 2 almost-equal numbers,
+ ! and then the bits beyond 64 will mostly
+ ! be 0
+ jz return_esi_eax ! all zero
+ mov esi,ebx
+ sub ebx,ebx
+ sub edi,#(3*REG_BIT) << D_EXP_SHIFT
+ jmp shiftxx
+
+ .align ALIGNMENT
+return_esi_eax:
+ mov edx,esi
+ ret
+
+ .align ALIGNMENT
+subtract:
+ sub ebp,ebp ! set up roundoff register
+ cmp ecx,#REG_BIT
+ jae subtract_bigshift
+ shrd ebp,ebx,cl
+ shrd ebx,edi,cl
+ shr edi,cl
+ neg ebp ! begin subtraction esi:eax:0 - edi:ebx:ebp
+ sbb eax,ebx
+ sbb esi,edi
+ sub ebx,ebx
+ mov edi,edx
+ and edi,#D_EXP_MASK
+ br normalize2
+
+ .align ALIGNMENT
+subtract_bigshift:
+ cmp ecx,#D_FRAC_BIT+2
+ bhis return_edx_eax ! x dominates y
+ sub ecx,#REG_BIT
+ shrd ebp,ebx,cl
+ shrd ebx,edi,cl
+ shr edi,cl
+ not ebp ! begin subtraction esi:eax:0:0 - 0:edi:ebx:ebp
+ not ebx
+ add ebp,#1
+ adc ebx,#0
+ cmc
+ sbb eax,edi
+ sbb esi,#0
+ xchg ebp,ebx
+ mov edi,edx
+ and edi,#D_EXP_MASK
+ br normalize2
+
+ .data
+ .extern bsr_table
diff --git a/bin86-0.3/bccfp/fcomp.x b/bin86-0.3/bccfp/fcomp.x
new file mode 100644
index 0000000..71148ab
--- /dev/null
+++ b/bin86-0.3/bccfp/fcomp.x
@@ -0,0 +1,89 @@
+! bcc 386 floating point routines (version 2) -- Fcomp, Fcompd, Fcompf
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+ .extern Fpushf
+
+! Pop 2 doubles from stack and compare them, return result in flags so
+! normal signed branches work (unlike 80x87 which returns the result in
+! the zero and carry flags).
+
+ .globl Fcomp
+ .align ALIGNMENT
+Fcomp:
+ pop ecx ! get return address
+ pop eax ! xl
+ pop edx ! xu
+ push ecx ! put back ret address - pop 2nd double later
+
+! All this popping is bad on 486's since plain mov takes 1+ cycle and pop
+! takes 4 cycles. But this code is designed for 386's where popping is
+! nominally the same speed and saves code space and so maybe instruction
+! fetch time as well as the instruction to adjust the stack (ret #n takes
+! no longer than plain ret but inhibits gotos).
+
+ mov ebx,PC_SIZE+D_LOW[esp] ! yl
+ mov ecx,PC_SIZE+D_HIGH[esp] ! yu
+ jmp compare
+
+! Pop double from stack and compare with double at [ebx]
+
+ .globl Fcompd
+ .align ALIGNMENT
+Fcompd:
+ mov eax,PC_SIZE+D_LOW[esp] ! xl
+ mov edx,PC_SIZE+D_HIGH[esp] ! xu
+ mov ecx,D_HIGH[ebx] ! yu
+ mov ebx,D_LOW[ebx] ! yl
+
+compare:
+ test edx,#D_SIGN_MASK ! is x >= 0?
+ jz cmp0 ! yes; just compare x and y
+ test ecx,#D_SIGN_MASK ! no; but is y >= 0?
+ jz cmp0 ! yes; just compare x and y
+
+ xchg edx,ecx ! x, y < 0, so ...
+ xchg eax,ebx ! ... swap x and y ...
+ xor edx,#D_SIGN_MASK ! ... and toggle signs
+ xor ecx,#D_SIGN_MASK
+
+cmp0:
+ cmp edx,ecx ! compare upper dwords
+ jnz checkneg0 ! if upper dwords differ, job is almost done
+ mov edx,eax ! upper dwords equal, so ...
+ mov ecx,ebx ! ... must make unsigned comparison of lower dwords
+ shr edx,#1 ! shift past sign
+ shr ecx,#1
+ cmp edx,ecx ! compare top 31 bits of lower dwords
+ jnz return ! if these differ, job is done
+ and eax,#1 ! compare lowest bits
+ and ebx,#1
+ cmp eax,ebx
+
+return:
+ ret #D_SIZE ! return, popping 1 double from stack
+
+checkneg0:
+ test edx,#D_EXP_MASK | D_FRAC_MASK ! check to catch unusual case ...
+ jnz recheck
+ test eax,eax
+ jnz recheck
+ test ecx,#D_EXP_MASK | D_FRAC_MASK
+ jnz recheck
+ test ebx,ebx
+ jz return ! ... both are (+-) zero, return 'z'
+
+recheck:
+ cmp edx,ecx ! the upper words were really different
+ ret #D_SIZE
+
+ .globl Fcompf
+ .align ALIGNMENT
+Fcompf:
+ call Fpushf
+ pop ebx ! yl
+ pop ecx ! yu
+ mov eax,PC_SIZE+D_LOW[esp] ! xl
+ mov edx,PC_SIZE+D_HIGH[esp] ! xu
+ jmp compare
diff --git a/bin86-0.3/bccfp/fdiv.x b/bin86-0.3/bccfp/fdiv.x
new file mode 100644
index 0000000..4a5cf74
--- /dev/null
+++ b/bin86-0.3/bccfp/fdiv.x
@@ -0,0 +1,312 @@
+#define EF_SIZE 4
+
+! bcc 386 floating point routines (version 2) -- Fdiv, Fdivd, Fdivf
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE)
+
+ .extern Fpushf
+ .extern fpdivzero
+ .extern fpoverflow
+ .extern fpunderflow
+
+! double Fdiv(double x, double y) returns x / y
+
+! pop 2 doubles from stack, divide first by second, and push quotient on stack
+
+! we denote upper and lower dwords of x and y (or their fractions)
+! by (xu,xl), (yu,yl)
+
+ .globl Fdivf
+ .align ALIGNMENT
+Fdivf:
+ sub esp,#D_SIZE ! make space for dummy double on stack
+ push ebp
+ push edi ! save some regs
+ push esi
+ mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ...
+ mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot
+ call Fpushf
+ pop esi ! yl
+ pop edi ! yu
+ mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu
+ jmp division
+
+ .globl Fdiv
+ .align ALIGNMENT
+Fdiv:
+ push ebp
+ push edi ! save some regs
+ push esi
+ mov eax,FRAME_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_HIGH[esp] ! xu
+ mov esi,FRAME_SIZE+D_SIZE+D_LOW[esp] ! yl
+ mov edi,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! yu
+ jmp division
+
+ .align ALIGNMENT
+exp_y_0:
+ mov ebx,edi
+ or ebx,esi
+ beq zerodivide
+ mov ebx,#1
+fix_y:
+ test edi,edi ! XXX - sloow
+ js y_unpacked
+ shld edi,esi,#1
+ shl esi,#1
+ dec bx
+ jmp fix_y
+
+ .align ALIGNMENT
+exp_x_0:
+ mov ecx,edx
+ or ecx,eax
+ beq retz
+ mov ecx,#1 ! change exponent from 0 to 1
+fix_x:
+ test edx,#1 << (REG_BIT-1-2) ! XXX - sloow
+ jnz x_unpacked
+ shld edx,eax,#1
+ shl eax,#1
+ dec cx
+ jmp fix_x
+
+! Fdivd pops double from stack, divides it by double at [ebx],
+! and pushes quotient back on stack
+
+ .globl Fdivd
+ .align ALIGNMENT
+Fdivd:
+ sub esp,#D_SIZE ! make space for dummy double on stack
+ push ebp
+ push edi ! save some regs
+ push esi
+ mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ...
+ mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot
+ mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu
+ mov esi,D_LOW[ebx] ! yl
+ mov edi,D_HIGH[ebx] ! yu
+
+division:
+
+! The full calculations are
+
+! (xu,xl,0) = yu * (zu,zl) + (0,r,0) (normal 96/32 -> 64 bit division)
+! yl * zu = yu * q1 + r1 (32*32 -> 64 bit mul and 64/32 -> 32 bit div)
+
+! so
+
+! (xu,xl,0,0) = (yu,yl) * (zu,zl-q1) + (0,0,r-r1,yl*(q1-zl))
+
+! where the calculations zl-q1, r-r1 and yl*(q1-zl) are more complicated
+! than the notation suggests. They may be negative and the one with the
+! multiplication may not fit in 32 bits and in both cases the overflow
+! has to be moved into higher bit positions.
+
+! See Knuth for why (zu,zl-q1) is the correct 64-bit quotient to within
+! 1 bit either way (assuming the normalization x < 2 * y).
+
+! We only need to calculate the remainder (0,0,r-r1,yl*(q1-zl)) to resolve
+! tie cases. It tells whether the approximate quotient is too high or too
+! low.
+
+#define NTEMPS 5
+
+ sub esp,#NTEMPS*GENREG_SIZE ! space to remember values for rounding of tie case
+
+! Offsets from esp for these values (offsets using FRAME_SIZE are invalid
+! while these temps are active)
+r = 0
+q1 = 4
+r1 = 8
+yl = 12
+zl = 16
+
+! Step 1: unpack and normalize x to fraction in edx:eax (left shifted as
+! far as possible less 2 so that x < y, and later z < y); unpack and normalize
+! y to a fraction in edi:esi (left shifted as far as possible), put difference
+! of signs (= sign of quotient) in ecx(D_SIGN_MASK) and difference of exponents
+! (= exponent of quotient before normalization) in cx.
+
+ mov ebp,edx ! xu
+ xor ebp,edi ! xu ^ yu
+ and ebp,#D_SIGN_MASK ! sign of result is difference of signs
+
+! Unpack y first to trap 0 / 0
+
+ mov ebx,edi ! remember yu for exponent of y
+ shld edi,esi,#D_BIT-D_FRAC_BIT ! extract fraction of y ...
+ shl esi,#D_BIT-D_FRAC_BIT
+ and ebx,#D_EXP_MASK ! exponent of y
+ jz exp_y_0
+ shr ebx,#D_EXP_SHIFT ! in ebx (actually in bx, with high bits 0)
+ or edi,#D_NORM_MASK << (D_BIT-D_FRAC_BIT) ! normalize
+y_unpacked:
+
+! Unpack x
+
+ mov ecx,edx ! remember xu for exponent of x
+ shld edx,eax,#D_BIT-D_FRAC_BIT-2 ! extract fraction of x ...
+ shl eax,#D_BIT-D_FRAC_BIT-2
+ and edx,#(D_NORM_MASK << (D_BIT-D_FRAC_BIT-2+1))-1
+ ! XXX - above may be shifted 1 extra unnecessarily
+ and ecx,#D_EXP_MASK ! exponent of x
+ jz exp_x_0
+ shr ecx,#D_EXP_SHIFT ! in ecx (actually in cx, with high bits 0)
+ or edx,#D_NORM_MASK << (D_BIT-D_FRAC_BIT-2) ! normalize
+x_unpacked:
+
+ sub cx,bx ! not ecx,ebx because we want to use high bit for sign
+ add cx,#D_EXP_BIAS ! adjust exponent of quotient
+
+ or ecx,ebp ! include sign with exponent
+
+! Step 2: quotient of fractions -> (edx,eax)
+
+! 2a: (xu,xl,0) div yu = (zu,zl) -> (ebx,esi)
+
+ div eax,edi ! (xu,xl) div yu = zu in eax; remainder (rem) in edx
+ mov ebx,eax ! save zu in ebx
+ sub eax,eax ! clear eax: (edx,eax) = (rem,0)
+ div eax,edi ! (rem,0) div yu = zl in eax
+ mov r[esp],edx
+ mov zl[esp],eax
+ xchg eax,esi ! store zl in esi; save yl in eax
+ mov yl[esp],eax
+
+! 2b: (yl * zu) div yu -> (0,eax)
+
+ mul eax,ebx ! yl * zu -> (edx,eax)
+ div eax,edi ! (yl * zu) div yu in eax
+ mov q1[esp],eax
+ mov r1[esp],edx
+
+! 2c: (xu,xl) / (yu,yl) = (zu,zl) - (yl * zu) div yu -> (edx,eax)
+
+ mov edx,ebx ! zu
+ xchg eax,esi ! eax <- zl; esi <- (yl * zu) div yu
+ sub eax,esi
+ sbb edx,#0
+
+! Step 3: normalise quotient
+
+ test edx,#1 << (REG_BIT-2) ! is fraction too small? (can only be by 1 bit)
+ jnz div4
+ shld edx,eax,#1 ! yes; multiply fraction ...
+ shl eax,#1 ! ... by 2 ...
+ dec cx ! ... and decrement exponent
+
+! Step 4: shift and round
+
+div4:
+ mov ebx,eax ! save for rounding
+ shrd eax,edx,#D_BIT-D_FRAC_BIT-1 ! shift fraction of result ...
+ shr edx,#D_BIT-D_FRAC_BIT-1 ! ... to proper position
+ and ebx,#(1 << (D_BIT-D_FRAC_BIT-1))-1 ! look at bits shifted out
+ cmp ebx,#D_NORM_MASK >> (D_BIT-D_FRAC_BIT) ! compare with middle value
+ jb div5 ! below middle, don't round up
+ ja roundup ! above middle, round up
+
+! The low bits don't contain enough information to resolve the tie case,
+! because the quotient itself is only an approximation.
+! Calculate the exact remainder.
+! This case is not very common, so don't worry much about speed.
+! Unfortunately we had to save extra in all cases to prepare for it.
+
+ push edx
+ push eax
+
+ sub esi,esi ! the calculation requires 33 bits - carry to here
+ mov eax,2*GENREG_SIZE+q1[esp]
+ sub eax,2*GENREG_SIZE+zl[esp]
+ pushfd
+ mul dword EF_SIZE+2*GENREG_SIZE+yl[esp]
+ popfd
+ jnc foo
+ sub edx,2*GENREG_SIZE+yl[esp]
+ sbb esi,#0
+foo:
+ add edx,2*GENREG_SIZE+r[esp]
+ adc esi,#0
+ sub edx,2*GENREG_SIZE+r1[esp]
+ sbb esi,#0
+ mov ebx,eax
+ mov edi,edx
+
+ pop eax
+ pop edx
+
+! Can finally decide rounding of tie case
+
+ js div5 ! remainder < 0 from looking at top 64 bits
+ jnz roundup ! remainder > 0 from looking at top 64 bits
+ or edi,ebx ! test bottom 64 bits
+ jnz roundup ! remainder > 0
+
+ test al,#1 ! at last we know it is the tie case, check parity bit
+ jz div5 ! already even, otherwise round up to make even
+
+roundup:
+ add eax,#1 ! add rounding bit
+ adc edx,#0
+ test edx,#D_NORM_MASK << 1 ! has fraction overflowed (very unlikely)
+ jz div5
+! Why were the shifts commented out?
+ shrd eax,edx,#1 ! yes, divide fraction ...
+ shr edx,#1 ! ... by 2 ...
+ inc cx ! ... and increment exponent
+
+! Step 5: put it all together
+
+div5:
+ mov ebx,ecx ! extract sign
+ and ebx,D_SIGN_MASK
+ cmp cx,#D_EXP_INFINITE ! is exponent too big?
+ jge overflow
+ test cx,cx
+ jle underflow
+ shl ecx,#D_EXP_SHIFT
+
+ and edx,#D_FRAC_MASK ! remove norm bit
+ or edx,ecx ! include exponent ...
+ or edx,ebx ! ... and sign
+
+return:
+ add esp,#NTEMPS*GENREG_SIZE ! reclaim temp space
+ mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax ! "push" lower dword of product ...
+ mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx ! ... and upper dword
+ pop esi ! restore registers
+ pop edi
+ pop ebp
+ ret #D_SIZE
+
+retz:
+ sub edx,edx ! clear upper dword
+ sub eax,eax ! ... and lower dword
+ jmp return
+
+overflow:
+ mov edx,ecx ! put sign in usual reg
+ call fpoverflow
+ mov eax,ecx ! XXX - wrong reg
+ jmp return
+
+underflow:
+ mov esi,edx ! put upper part of fraction in usual reg
+ mov edx,ecx ! sign
+ movsx edi,cx ! put shift in usual reg
+ neg edi
+ inc edi
+ call fpunderflow
+ jmp return
+
+zerodivide:
+ mov edx,ebp ! sign
+ call fpdivzero
+ mov eax,ecx ! XXX - wrong reg
+ jmp return
diff --git a/bin86-0.3/bccfp/fmul.x b/bin86-0.3/bccfp/fmul.x
new file mode 100644
index 0000000..aa62b5c
--- /dev/null
+++ b/bin86-0.3/bccfp/fmul.x
@@ -0,0 +1,150 @@
+! bcc 386 floating point routines (version 2) -- Fmul, Fmuld, Fmulf
+! author: Bruce Evans
+
+#include "fplib.h"
+
+#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE)
+
+ .extern Fpushf
+ .extern fpoverflow
+ .extern fpunderflow
+ .extern normalize2
+
+ .globl Fmul
+ .align ALIGNMENT
+Fmul:
+ push ebp
+ push edi
+ push esi
+ mov eax,FRAME_SIZE+D_LOW[esp]
+ mov edx,FRAME_SIZE+D_HIGH[esp]
+ mov ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
+ mov ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
+ call multiplication
+ mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret #D_SIZE
+
+ .globl Fmuld
+ .align ALIGNMENT
+Fmuld:
+ push ebp
+ push edi
+ push esi
+ mov eax,FRAME_SIZE+D_LOW[esp]
+ mov edx,FRAME_SIZE+D_HIGH[esp]
+ mov ecx,D_HIGH[ebx]
+ mov ebx,D_LOW[ebx]
+ call multiplication
+ mov FRAME_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ .globl Fmulf
+ .align ALIGNMENT
+Fmulf:
+ push ebp
+ push edi
+ push esi
+ call Fpushf
+ pop ebx ! yl
+ pop ecx ! xu
+ mov eax,FRAME_SIZE+D_LOW[esp] ! xl
+ mov edx,FRAME_SIZE+D_HIGH[esp] ! xu
+ call multiplication
+ mov FRAME_SIZE+D_LOW[esp],eax
+ mov FRAME_SIZE+D_HIGH[esp],edx
+ pop esi
+ pop edi
+ pop ebp
+ ret
+
+ .align ALIGNMENT
+exp_x_0:
+ mov edx,#1 << D_EXP_SHIFT ! change exponent from 0 to 1
+ jmp x_unpacked ! XXX - check for denormal?
+
+ .align ALIGNMENT
+exp_y_0:
+ mov ecx,#1 << D_EXP_SHIFT
+ jmp y_unpacked
+
+ .align ALIGNMENT
+multiplication:
+ mov ebp,edx ! xu
+ xor ebp,ecx ! xu ^ yu
+ and ebp,#D_SIGN_MASK ! sign of result is difference of signs
+
+ mov esi,edx ! free edx for multiplications
+ and esi,#D_FRAC_MASK ! discard sign and exponent
+ and edx,#D_EXP_MASK ! exponent(x)
+ jz exp_x_0
+ or esi,#D_NORM_MASK ! normalize
+x_unpacked:
+
+ mov edi,ecx ! this mainly for consistent naming
+ and edi,#D_FRAC_MASK
+ and ecx,#D_EXP_MASK ! exponent(y)
+ jz exp_y_0
+ or edi,#D_NORM_MASK
+y_unpacked:
+
+ add ecx,edx ! add exponents
+
+! exponent is in ecx, sign in ebp, operands in esi:eax and edi:ebx, edx is free
+! product to go in esi:eax:ebp:ebx
+! terminology: x * y = (xu,xl) * (yu,yl)
+! = (xu * yu,0,0) + (0,xu * yl + xl * yu,0) + (0,0,xl * yl)
+
+ push ecx
+ push ebp
+ mov ecx,eax
+ mul ebx ! xl * yl
+ mov ebp,edx ! (xl * yl).u in ebp
+ xchg ebx,eax ! (xl * yl).l in ebx (final), yl in eax
+ mul esi ! xu * yl
+ push eax ! (xu * yl).l on stack
+ push edx ! (xu * yl).u on stack
+ mov eax,esi ! xu
+ mul edi ! xu * yu
+ mov esi,edx ! (xu * yu).u in esi (final except carries)
+ xchg ecx,eax ! (xu * yu).l in ecx, xl in eax
+ mul edi ! xl * yu
+
+ add ebp,eax ! (xl * yl).u + (xl * yu).l
+ pop eax ! (xu * yl).u
+ adc eax,edx ! (xu * yl).u + (xl * yu).u
+ adc esi,#0
+ pop edx ! (xu * yl).l
+ add ebp,edx ! ((xl * yl).u + (xl * yu).l) + (xu * yl).l
+ adc eax,ecx ! ((xu * yl).u + (xl * yu).u) + (xu * yu).l
+ adc esi,#0
+ pop edx ! sign
+ pop edi ! exponent
+ sub edi,#(D_EXP_BIAS+1-(D_EXP_BIT+2)) << D_EXP_SHIFT ! adjust
+! cmp edi,#(D_EXP_INFINITE-1+(D_EXP_BIT+2)) << D_EXP_SHIFT
+! jae outofbounds ! 0 will be caught as underflow by normalize2
+cmp edi,#(2*D_EXP_INFINITE-(D_EXP_BIAS+1)+(D_EXP_BIT+2)) << D_EXP_SHIFT
+ja underflow
+ br normalize2
+
+ .align ALIGNMENT
+overflow:
+ mov edx,ebp ! put sign in usual reg
+ call fpoverflow
+ mov eax,ecx ! XXX - wrong reg
+ ret
+
+ .align ALIGNMENT
+underflow:
+ mov edx,ebp ! put sign in usual reg
+ neg edi
+ shr edi,#D_EXP_SHIFT
+ inc edi
+ br fpunderflow
diff --git a/bin86-0.3/bccfp/fpbsr.x b/bin86-0.3/bccfp/fpbsr.x
new file mode 100644
index 0000000..8ff38d7
--- /dev/null
+++ b/bin86-0.3/bccfp/fpbsr.x
@@ -0,0 +1,25 @@
+! bcc 386 floating point routines (version 2) -- bsr_table
+! author: Bruce Evans
+
+#include "fplib.h"
+
+ .globl bsr_table
+ .data
+ .align ALIGNMENT
+bsr_table: ! table to replace bsr on range 0-255
+.byte -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3
+.byte 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+.byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+.byte 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+.byte 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
+.byte 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
+.byte 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
+.byte 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
+.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+.byte 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
diff --git a/bin86-0.3/bccfp/fperr.c b/bin86-0.3/bccfp/fperr.c
new file mode 100644
index 0000000..d5372dc
--- /dev/null
+++ b/bin86-0.3/bccfp/fperr.c
@@ -0,0 +1,50 @@
+/*
+ * bin86/bccfp/fperr.c
+ *
+ * Copyright (C) 1992 Bruce Evans
+ */
+
+#include <stdio.h>
+#include <signal.h>
+
+#include "fperr.h"
+
+void fperr(errno)
+int errno;
+{
+
+#if defined(DEBUG) || 0
+ switch(errno) {
+
+ case EFDENORMAL:
+ fputs("\nDenormal - ", stderr);
+ break;
+
+ case EFINFINITY:
+ fputs("\nInfinity - ", stderr);
+ break;
+
+ case EFNAN:
+ fputs("\nNaN - ", stderr);
+ break;
+
+ case EFOVERFLOW:
+ fputs("\nOverflow - ", stderr);
+ break;
+
+ case EFUNDERFLOW:
+ fputs("\nUnderflow - ", stderr);
+ break;
+
+ case EFDIVZERO:
+ fputs("\nZero divide - ", stderr);
+ break;
+
+ default:
+ fprintf(stderr, "\nUnknown error 0x%x - ", errno);
+ }
+ fflush(stderr);
+#endif
+
+ kill(getpid(), SIGFPE);
+}
diff --git a/bin86-0.3/bccfp/fperr.h b/bin86-0.3/bccfp/fperr.h
new file mode 100644
index 0000000..42d54fd
--- /dev/null
+++ b/bin86-0.3/bccfp/fperr.h
@@ -0,0 +1,14 @@
+/*
+ * bin86/bccfp/fperr.h
+ *
+ * Copyright (C) 1992 Bruce Evans
+ */
+
+/* fperr.h */
+
+#define EFDENORMAL 1
+#define EFINFINITY 2
+#define EFNAN 3
+#define EFOVERFLOW 4
+#define EFUNDERFLOW 5
+#define EFDIVZERO 6
diff --git a/bin86-0.3/bccfp/fperror.x b/bin86-0.3/bccfp/fperror.x
new file mode 100644
index 0000000..04f3f74
--- /dev/null
+++ b/bin86-0.3/bccfp/fperror.x
@@ -0,0 +1,126 @@
+! bcc 386 floating point routines (version 2)
+! --- fpdenormal, fperror, fpinfinity, fpNaN, fpoverflow, fpunderflow,fpdivzero
+! author: Bruce Evans
+
+#include "fperr.h"
+#include "fplib.h"
+
+ .extern _fperr
+
+! Cause a denormal-operand exception
+! Preserves all general registers if signal handler returns
+
+ .globl fpdenormal
+ .align ALIGNMENT
+fpdenormal:
+#if 0
+ push eax
+ mov eax,#EFDENORMAL
+ call fperror
+ pop eax
+#endif
+ ret
+
+! Cause an exception with error code eax, preserving all genregs except eax
+
+ .globl fperror
+ .align ALIGNMENT
+fperror:
+ push ebp ! set up usual frame ...
+ mov ebp,esp ! ... for debugging
+ push edx ! save default
+ push ecx
+ push eax ! error code is arg to C routine
+ call _fperr
+ add esp,#GENREG_SIZE
+ pop ecx ! restore default
+ pop edx
+ pop ebp
+ ret
+
+ .align ALIGNMENT
+fphuge:
+ mov ecx,#D_HUGE_LOW ! prepare number +-HUGEVAL
+ or edx,#D_HUGE_HIGH ! ... in case signal handler returns
+ jmp fperror
+
+! Cause an infinite-operand exception
+! Return +-HUGEVAL in edx:ecx with sign from edx
+
+ .globl fpinfinity
+ .align ALIGNMENT
+fpinfinity:
+ mov eax,#EFINFINITY
+ jmp fphuge ! almost right
+
+! Cause an NaN-operand exception
+! Return +-HUGEVAL in edx:ecx with sign from edx
+
+ .globl fpNaN
+ .align ALIGNMENT
+fpNaN:
+ mov eax,#EFNAN ! there are different types of NaNs but...
+ jmp fphuge ! WRONG
+
+! Cause an overflow exception
+! Return +-HUGEVAL in edx:ecx with sign from edx
+
+ .globl fpoverflow
+ .align ALIGNMENT
+fpoverflow:
+ mov eax,#EFOVERFLOW
+ jmp fphuge ! almost right
+
+! Cause an underflow exception (actually assume it is masked for now)
+! Return denormal or 0.0 in edx:ecx
+! XXX - this should cause a denormal exception or none for the denormal case
+! Args: sign in edx, fraction in esi:eax, right shift in edi
+! Returns: denormalized number in edx:eax
+
+ .globl fpunderflow
+ .align ALIGNMENT
+fpunderflow:
+#if 0
+ mov eax,#EFUNDERFLOW
+ jmp fperror
+#endif
+ cmp edi,#REG_BIT
+ jb denormalize1
+ mov eax,esi
+ sub esi,esi
+ sub edi,#REG_BIT
+ cmp edi,#REG_BIT
+ jb denormalize1
+denormalize_underflow:
+#if 0
+ mov eax,#EFUNDERFLOW
+ jmp fperror
+#endif
+ sub eax,eax
+ mov edx,eax
+ ret
+
+ .align ALIGNMENT
+denormalize1:
+ mov ecx,edi
+ shrd eax,esi,cl
+ shr esi,cl
+ mov ecx,esi
+ or ecx,eax
+ jz denormalize_underflow
+ and edx,#D_SIGN_MASK
+ or edx,esi
+ ret
+
+! Cause an fp division by zero exception
+! Return +-HUGEVAL in edx:ecx with sign from edx
+
+ .globl fpdivzero
+ .align ALIGNMENT
+fpdivzero:
+ mov eax,#EFDIVZERO
+ test edx,#D_EXP_MASK
+ jnz fphuge ! almost right
+ sub ecx,ecx
+ mov edx,ecx
+ jmp fperror
diff --git a/bin86-0.3/bccfp/fplib.h b/bin86-0.3/bccfp/fplib.h
new file mode 100644
index 0000000..b346c61
--- /dev/null
+++ b/bin86-0.3/bccfp/fplib.h
@@ -0,0 +1,49 @@
+/*
+ * bin86/bccfp/fplib.h
+ *
+ * Copyright (C) 1992 Bruce Evans
+ */
+
+#define ALIGNMENT 4
+#define CHAR_BIT 8
+#define D_BIT (D_SIZE * CHAR_BIT)
+#define D_EXP_BIAS ((1 << (D_EXP_BIT - 1)) - 1)
+#define D_EXP_BIT 11
+#define D_EXP_INFINITE ((1 << D_EXP_BIT) - 1)
+#define D_EXP_MASK (((1 << D_EXP_BIT) - 1) << D_EXP_SHIFT)
+#define D_EXP_SHIFT (REG_BIT - (1 + D_EXP_BIT))
+#define D_FRAC_BIT 53
+#define D_FRAC_MASK (D_NORM_MASK - 1)
+#define D_HIGH 4
+#define D_HUGE_HIGH (D_EXP_MASK - 1)
+#define D_HUGE_LOW 0xFFFFFFFF
+#define D_LOW 0
+#define D_NORM_BIT (D_FRAC_BIT - 1 - REG_BIT)
+#define D_NORM_MASK (1 << D_NORM_BIT)
+#define D_SIGN_BIT 63
+#define D_SIGN_MASK (1 << (D_SIGN_BIT - REG_BIT))
+#define D_SIZE 8
+#define F_BIT (F_SIZE * CHAR_BIT)
+#define F_EXP_BIAS ((1 << (F_EXP_BIT - 1)) - 1)
+#define F_EXP_BIT 8
+#define F_EXP_INFINITE ((1 << F_EXP_BIT) - 1)
+#define F_EXP_MASK (((1 << F_EXP_BIT) - 1) << F_EXP_SHIFT)
+#define F_EXP_SHIFT (REG_BIT - (1 + F_EXP_BIT))
+#define F_FRAC_BIT 24
+#define F_FRAC_MASK (F_NORM_MASK - 1)
+#define F_HIGH 0
+#define F_HUGE_HIGH (F_EXP_MASK - 1)
+#define F_NORM_BIT (F_FRAC_BIT - 1)
+#define F_NORM_MASK (1 << F_NORM_BIT)
+#define F_SIGN_BIT 31
+#define F_SIGN_MASK (1 << F_SIGN_BIT)
+#define F_SIZE 4
+#define FREE_D_SIGN_BIT_TEST (D_SIGN_BIT % REG_BIT == REG_BIT - 1)
+#define GENREG_SIZE 4
+#define INT_BIT 32
+#define INT_MAX 0x7FFFFFFF
+#define INT_MIN (-0x7FFFFFFF - 1)
+#define PC_SIZE 4
+#define REG_BIT 32
+#define SHORT_BIT 16
+#define UINT_MAX 0xFFFFFFFF
diff --git a/bin86-0.3/bccfp/fptoi.x b/bin86-0.3/bccfp/fptoi.x
new file mode 100644
index 0000000..30de729
--- /dev/null
+++ b/bin86-0.3/bccfp/fptoi.x
@@ -0,0 +1,117 @@
+! bcc 386 floating point routines (version 2)
+! -- dtoi, dtol, dtoui, dtoul, ftoi, ftol (todo: ftoui, ftoul)
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+ .extern fpoverflow
+ .extern Fpushf
+
+! Convert double x at [ebx] to int and return in eax
+
+ .globl dtoi
+ .globl dtol
+ .align ALIGNMENT
+dtoi:
+dtol:
+ mov eax,D_HIGH[ebx]
+ mov ecx,eax
+ and ecx,#D_EXP_MASK ! extract exponent
+ jz retz ! if 0 return 0
+ test eax,#D_SIGN_MASK
+ jnz negative
+ call into_dtoui
+ cmp eax,#INT_MAX
+ ja overflow_int_max
+ ret
+
+ .align ALIGNMENT
+negative:
+ and eax,#~D_SIGN_MASK
+ call into_dtoui
+ cmp eax,#INT_MIN
+ ja overflow_int_min
+ neg eax
+ ret
+
+ .align ALIGNMENT
+overflow_int_max:
+ call fpoverflow
+ mov eax,#INT_MAX
+ ret
+
+ .align ALIGNMENT
+overflow_int_min:
+ js return ! actually INT_MIN is OK
+ call fpoverflow
+ mov eax,#INT_MIN
+return:
+ ret
+
+ .align ALIGNMENT
+retz:
+ sub eax,eax ! clear return value
+ ret
+
+! Convert double x at [ebx] to unsigned and return in eax
+
+ .globl dtoui
+ .globl dtoul
+ .align ALIGNMENT
+dtoui:
+dtoul:
+ mov eax,D_HIGH[ebx]
+ mov ecx,eax
+ and ecx,#D_EXP_MASK ! extract exponent
+ jz retz ! if 0 return 0
+ test eax,#D_SIGN_MASK
+ jnz overflow_0
+into_dtoui:
+ mov edx,D_LOW[ebx]
+
+ and eax,#D_FRAC_MASK ! extract fraction
+ or eax,#D_NORM_MASK ! restore normalization bit
+
+ shr ecx,#D_EXP_SHIFT ! convert exponent to number
+ sub ecx,#D_EXP_BIAS+D_NORM_BIT ! adjust radix point
+ jl dtoui_rightshift ! should we shift left or right?
+ cmp ecx,#D_BIT-D_FRAC_BIT ! can shift left by at most this
+ ja overflow_uint_max ! if more, overflow
+ shld eax,edx,cl
+ ret
+
+ .align ALIGNMENT
+dtoui_rightshift:
+ neg ecx ! make shift count > 0
+ cmp ecx,#REG_BIT ! big shifts would be taken mod REG_BIT ...
+ jae retz ! ... no good
+ shr eax,cl ! otherwise it is faster to do the shift ...
+ ret ! ... then to jump for the slightly smaller
+ ! ... shift counts that shift out all bits
+
+ .align ALIGNMENT
+overflow_0:
+ call fpoverflow
+ sub eax,eax
+ ret
+
+ .align ALIGNMENT
+overflow_uint_max:
+ call fpoverflow
+ mov eax,#UINT_MAX
+ ret
+
+! ftoi is like dtoi except ebx points to a float instead of a double.
+! This is a quickly-written slowish version that does not take advantage
+! of the float being smaller.
+
+ .globl ftoi
+ .globl ftol
+ .align ALIGNMENT
+ftoi:
+ftol:
+ call Fpushf
+ mov ebx,esp
+ call dtoi
+ add esp,#D_SIZE
+ ret
diff --git a/bin86-0.3/bccfp/fpulld.x b/bin86-0.3/bccfp/fpulld.x
new file mode 100644
index 0000000..928a846
--- /dev/null
+++ b/bin86-0.3/bccfp/fpulld.x
@@ -0,0 +1,20 @@
+! bcc 386 floating point routines (version 2) -- Fpulld
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+! Pop double from stack and store at address [ebx]
+
+ .globl Fpulld
+ .align ALIGNMENT
+Fpulld:
+ pop ecx
+ pop dword D_LOW[ebx]
+ pop dword D_HIGH[ebx]
+ jmp ecx ! return
+
+! This popping method is much slower on 486's because popping to memory
+! takes 5+ while moving twice takes 2 and the return address doesn't
+! have to be moved. However, popping is a little faster on a non-cached
+! 386/20 with static column RAM although the memory access pattern is
+! better for a double-width move than for popping. What about a cached 386?
diff --git a/bin86-0.3/bccfp/fpullf.x b/bin86-0.3/bccfp/fpullf.x
new file mode 100644
index 0000000..417ef92
--- /dev/null
+++ b/bin86-0.3/bccfp/fpullf.x
@@ -0,0 +1,101 @@
+! bcc 386 floating point routines (version 2) -- Fpullf
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+ .extern fpoverflow
+ .extern fpunderflow
+
+! pop double from stack, convert to float and store at address [ebx]
+
+ .globl Fpullf
+ .align ALIGNMENT
+Fpullf:
+
+! Step 1: load and shift left
+
+ mov eax,PC_SIZE+D_LOW[esp] ! lower dword
+ mov edx,PC_SIZE+D_HIGH[esp] ! upper dword
+ mov ecx,edx ! copy upper dword into ecx ...
+ and ecx,#D_SIGN_MASK ! ... and extract sign
+ and edx,#D_EXP_MASK | D_FRAC_MASK ! extract exponent and fraction
+ sub edx,#(D_EXP_BIAS-F_EXP_BIAS) << D_EXP_SHIFT ! adjust exponent bias
+ jz underflow
+ cmp edx,#F_EXP_INFINITE << D_EXP_SHIFT ! check if exponent lies in reduced range
+ jae outofbounds
+ shld edx,eax,#D_EXP_BIT-F_EXP_BIT ! shift exponent and fraction
+
+! Step 2: round
+
+ test eax,#1 << (REG_BIT-1-(D_EXP_BIT-F_EXP_BIT)) ! test upper rounding bit
+ jz step3 ! below middle, don't round up
+ test eax,#(1 << (REG_BIT-1-(D_EXP_BIT-F_EXP_BIT)))-1 ! test other rounding bits
+ jnz roundup ! above middle, round up
+ test dl,#1 ! in middle, check parity bit
+ jz step3 ! already even, otherwise round up to make even
+
+roundup:
+ inc edx ! carry 1
+ test edx,#F_FRAC_MASK ! is fraction now 0? (carry into F_EXPMASK)
+ jnz step3 ! no -- carry complete
+ cmp edx,#(F_EXP_INFINITE << F_EXP_SHIFT) & ~F_NORM_MASK ! yes (very unlikely): check for overflow
+ ! XXX - I think these tests say 0x7e7fffff overflows
+ jae overflow
+
+! Step 3: put it all together
+
+step3:
+ or edx,ecx ! include sign
+ mov F_HIGH[ebx],edx ! store the result in [ebx]
+ ret #D_SIZE ! return and release double from stack
+
+ .align ALIGNMENT
+outofbounds:
+ jns overflow ! have just compared exponent with the max
+underflow:
+! call fpunderflow ! XXX
+ push ecx ! save sign
+ mov ecx,edx
+ and ecx,#~D_FRAC_MASK ! assume fraction is below exp
+ cmp ecx,#-((D_EXP_BIAS-F_EXP_BIAS) << D_EXP_SHIFT) ! was exp = 0?
+ jz exp_x_0
+ shr ecx,#D_EXP_SHIFT
+ neg ecx
+ and edx,#D_FRAC_MASK
+ or edx,#D_NORM_MASK
+ shld edx,eax,#D_EXP_BIT-F_EXP_BIT-1
+ shl eax,#D_EXP_BIT-F_EXP_BIT-1
+ push ebx ! save to use for rounding
+ sub ebx,ebx
+ shrd ebx,eax,cl
+ shrd eax,edx,cl
+ shr edx,cl
+ cmp eax,#1 << (REG_BIT-1)
+ jb over_denorm_roundup
+ ja denorm_roundup
+ test dl,#1
+ jz over_denorm_roundup
+denorm_roundup:
+#if F_NORM_BIT != F_EXP_SHIFT
+#include "carry into norm bit doesn't go into low exp bit"
+#endif
+ inc edx
+over_denorm_roundup:
+ pop ebx
+ pop ecx
+ or edx,ecx
+ mov F_HIGH[ebx],edx
+ ret #D_SIZE
+
+ .align ALIGNMENT
+exp_x_0: ! XXX check for denormals - they underflow
+ pop ecx
+ mov dword F_HIGH[ebx],#0
+ ret #D_SIZE
+
+ .align ALIGNMENT
+overflow:
+ mov edx,ebx ! put sign in usual reg
+ call fpoverflow
+ mov F_HIGH[ebx],dword #F_HUGE_HIGH ! XXX - should use infinity
+ ret #D_SIZE ! ... if fpoverflow does
diff --git a/bin86-0.3/bccfp/fpushd.x b/bin86-0.3/bccfp/fpushd.x
new file mode 100644
index 0000000..68caab0
--- /dev/null
+++ b/bin86-0.3/bccfp/fpushd.x
@@ -0,0 +1,60 @@
+! bcc 386 floating point routines (version 2) -- dtof, Fpushd, Fneg, Fnegd
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+ .extern Fpullf
+
+! dtof converts the double at [ebx] to a float and pushes the float onto
+! the stack (D_SIZE bytes are allocated for the float although only the bottom
+! F_SIZE are used).
+! This is a quickly-written slowish version.
+
+ .globl dtof
+ .align ALIGNMENT
+dtof:
+ pop eax
+ sub esp,#D_SIZE ! build result here
+ push eax ! put back return address
+ call Fpushd
+ lea ebx,D_SIZE+PC_SIZE[esp]
+ call Fpullf
+ ret
+
+! Push double at address [ebx] onto stack
+
+ .globl Fpushd
+ .align ALIGNMENT
+Fpushd:
+ pop ecx
+ push dword D_HIGH[ebx]
+ push dword D_LOW[ebx]
+ jmp ecx ! return
+
+! Push double at address [ebx] onto stack, negating it on the way.
+
+! Don't worry about generating -0 because other routines have to allow for
+! it anyway.
+
+! Perhaps this and Fneg should check for denormals and illegal operands
+! (I think only signalling NaNs are illegal).
+! fchs doesn't check, but fld does.
+! Our Fpushd is not quite like fld because no conversions are involved.
+
+ .globl Fnegd
+ .align ALIGNMENT
+Fnegd:
+ pop ecx
+ mov eax,D_HIGH[ebx]
+ xor eax,#D_SIGN_MASK ! toggle sign
+ push eax
+ push dword D_LOW[ebx]
+ jmp ecx ! return
+
+! Negate double on stack
+
+ .globl Fneg
+ .align ALIGNMENT
+Fneg:
+ xorb PC_SIZE+D_SIZE-1[esp],D_SIGN_MASK >> (REG_BIT-CHAR_BIT) ! toggle sign
+ ret
diff --git a/bin86-0.3/bccfp/fpushf.x b/bin86-0.3/bccfp/fpushf.x
new file mode 100644
index 0000000..7cb2f8d
--- /dev/null
+++ b/bin86-0.3/bccfp/fpushf.x
@@ -0,0 +1,74 @@
+! bcc 386 floating point routines (version 2) -- Fpushf, Fnegf
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+ .extern fpdenormal
+
+! Load float at [ebx], convert to double and push on stack
+
+ .globl Fpushf
+ .align ALIGNMENT
+Fpushf:
+ mov edx,F_HIGH[ebx]
+into_Fpushf:
+ test edx,#F_EXP_MASK ! is exponent 0?
+ jz exp_x_0
+
+ mov ecx,edx ! extract sign
+ and ecx,#F_SIGN_MASK
+
+ and edx,#F_EXP_MASK | F_FRAC_MASK ! extract exponent and fraction
+ sub eax,eax ! clear lower dword
+ shrd eax,edx,#D_EXP_BIT-F_EXP_BIT ! shift exponent and fraction to new position
+ shr edx,#D_EXP_BIT-F_EXP_BIT
+
+ add edx,#(D_EXP_BIAS-F_EXP_BIAS) << D_EXP_SHIFT ! adjust exponent bias
+ or edx,ecx ! include sign
+
+ pop ecx
+ push edx ! upper dword
+ push eax ! lower dword
+ jmp ecx ! return
+
+ .align ALIGNMENT
+exp_x_0:
+ mov eax,edx
+ and eax,#F_FRAC_MASK
+ jnz x_denorm
+ pop ecx
+ push eax ! upper dword = 0
+ push eax ! lower dword = 0
+ jmp ecx ! return
+
+ .align ALIGNMENT
+x_denorm:
+ call fpdenormal
+ bsr ecx,eax ! zzzz
+ neg ecx
+ add ecx,#F_NORM_BIT
+ shl eax,cl
+ and eax,#F_FRAC_MASK
+ neg ecx
+ add ecx,#D_EXP_BIAS-F_EXP_BIAS+1
+ shl ecx,#D_EXP_SHIFT
+ and edx,#F_SIGN_MASK ! assumed same as D_SIGN_MASK
+ or edx,ecx
+ sub ecx,ecx
+ shrd ecx,eax,#D_EXP_BIT-F_EXP_BIT
+ shr eax,#D_EXP_BIT-F_EXP_BIT
+ or edx,eax
+
+ pop eax
+ push edx ! upper dword
+ push ecx ! lower dword
+ jmp eax ! return
+
+! Fnegf: as Fpushf, but negate double before pushing onto stack
+
+ .globl Fnegf
+ .align ALIGNMENT
+Fnegf:
+ mov edx,F_HIGH[ebx]
+ xor edx,#F_SIGN_MASK ! toggle sign
+ jmp into_Fpushf ! join Fpushf
diff --git a/bin86-0.3/bccfp/fpushi.x b/bin86-0.3/bccfp/fpushi.x
new file mode 100644
index 0000000..b19aae2
--- /dev/null
+++ b/bin86-0.3/bccfp/fpushi.x
@@ -0,0 +1,126 @@
+! bcc 386 floating point routines (version 2)
+! -- Fpushi, Fpushl, Fpushs, Fpushc, Fpushuc, Fpushui, Fpushul, Fpushus
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+! Convert the short in ax to double and push on stack
+
+ .globl Fpushs
+ .align ALIGNMENT
+Fpushs:
+ cwde
+ add eax,#0 ! fast 3-byte instruction to align
+
+! Convert the int or long in eax to double and push on stack
+
+ .globl Fpushi
+ .globl Fpushl
+! .align ALIGNMENT ! don't do this until it pads with nop's
+Fpushi:
+Fpushl:
+ test eax,eax
+ jz return_eax ! got 0 in eax
+ mov ebx,#(D_EXP_BIAS+D_NORM_BIT) << D_EXP_SHIFT ! set no-sign and exponent
+ jns normalize ! sign and fraction bits already set up
+ mov ebx,#D_SIGN_MASK | ((D_EXP_BIAS+D_NORM_BIT) << D_EXP_SHIFT) ! adjust sign
+ neg eax ! adjust fraction
+ jmp normalize
+
+ .align ALIGNMENT
+ret1:
+ mov eax,#D_EXP_BIAS << D_EXP_SHIFT
+ add eax,#0 ! fast 3-byte instruction to align
+
+! .align ALIGNMENT ! don't do this until it pads with nop's
+return_eax:
+ pop ecx
+ push eax ! upper dword
+ push dword #0 ! lower dword = 0
+ jmp ecx ! return
+
+! Convert the (unsigned) char in al to double and push on stack
+
+ .globl Fpushc
+ .globl Fpushuc
+ .align ALIGNMENT
+Fpushc:
+Fpushuc:
+ and eax,#(1 << CHAR_BIT)-1
+ add eax,#0 ! fast 3-byte instruction to align
+
+! Convert the unsigned short in ax to double and push on stack
+
+ .globl Fpushus
+! .align ALIGNMENT ! don't do this until it pads with nop's
+Fpushus:
+ and eax,#(1 << SHORT_BIT)-1
+ add eax,#0 ! fast 3-byte instruction to align
+
+! Convert the unsigned int or long in eax to double and push on stack
+
+ .globl Fpushui
+ .globl Fpushul
+! .align ALIGNMENT ! don't do this until it pads with nop's
+Fpushui:
+Fpushul:
+ cmp eax,#1 ! this tests for both 0 and 1
+ jb return_eax ! got 0 in eax
+ jz ret1
+ mov ebx,#(D_EXP_BIAS+D_NORM_BIT) << D_EXP_SHIFT ! set no-sign and exponent
+
+! .align ALIGNMENT ! don't do this until it pads with nop's
+normalize:
+ sub edx,edx ! clear lower dword of result
+
+! Find first nonzero bit
+! Don't use bsr, it is slow (const + 3n on 386, const + n on 486)
+
+ sub ecx,ecx ! prepare unsigned extension of cl
+ test eax,#~D_FRAC_MASK
+ jnz large
+ test eax,#0xFF << (D_NORM_BIT-8)
+ jnz middle
+ shl eax,#8
+ sub ebx,#8 << D_EXP_SHIFT
+ test eax,#0xFF << (D_NORM_BIT-8)
+ jnz middle
+ shl eax,#8
+ sub ebx,#8 << D_EXP_SHIFT
+middle:
+ shld ecx,eax,#D_NORM_BIT
+ mov cl,bsr_table[ecx]
+ add ecx,#REG_BIT-D_NORM_BIT-D_NORM_BIT
+ neg ecx
+ shl eax,cl
+ shl ecx,#D_EXP_SHIFT
+ sub ebx,ecx
+return:
+ and eax,#D_FRAC_MASK ! remove normalization bit
+ or eax,ebx ! include exponent (and sign) to fraction
+ pop ecx
+ push eax ! upper dword
+ push edx ! lower dword
+ jmp ecx ! return
+
+ .align ALIGNMENT
+large:
+ shld ecx,eax,#REG_BIT-(D_NORM_BIT+8)
+ jnz huge
+ shld ecx,eax,#REG_BIT-D_NORM_BIT
+ mov cl,bsr_table[ecx]
+got_shift_right:
+ shrd edx,eax,cl
+ shr eax,cl
+ shl ecx,#D_EXP_SHIFT
+ add ebx,ecx
+ jmp return
+
+ .align ALIGNMENT
+huge:
+ mov cl,bsr_table[ecx]
+ add cl,#8
+ jmp got_shift_right
+
+ .data
+ .extern bsr_table
diff --git a/bin86-0.3/bccfp/frexp.x b/bin86-0.3/bccfp/frexp.x
new file mode 100644
index 0000000..318fc34
--- /dev/null
+++ b/bin86-0.3/bccfp/frexp.x
@@ -0,0 +1,66 @@
+! bcc 386 floating point routines (version 2) -- _frexp
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+ .extern fpdenormal
+
+! void frexp(double value, int *exponent);
+! splits a double into exponent and fraction (where 0.5 <= fraction < 1.0)
+
+ .globl _frexp
+ .align ALIGNMENT
+_frexp:
+push ebx
+#undef PC_SIZE
+#define PC_SIZE 8
+ mov eax,PC_SIZE+D_LOW[esp] ! lower dword of x
+ mov ebx,PC_SIZE+D_HIGH[esp] ! upper dword of x
+ mov edx,PC_SIZE+D_SIZE[esp] ! exponent pointer
+ mov ecx,ebx ! extract exponent here
+ and ecx,#D_EXP_MASK
+ jz exp_x_0
+
+ shr ecx,#D_EXP_SHIFT ! exponent + bias
+got_x:
+ sub ecx,#D_EXP_BIAS-1 ! D_EXP_BIAS is for 1.x form, we want 0.1x form
+ mov [edx],ecx ! return exponent
+ and ebx,#D_SIGN_MASK | D_FRAC_MASK ! extract sign and fraction
+ or ebx,#(D_EXP_BIAS-1) << D_EXP_SHIFT ! set new exponent for 0.1x
+mov edx,ebx
+pop ebx
+ ret
+
+ .align ALIGNMENT
+exp_x_0:
+ test ebx,#D_FRAC_MASK
+ jnz xu_denorm
+ test eax,eax
+ jnz xl_denorm
+ mov [edx],ecx ! return zero exponent
+ mov ebx,ecx ! guard against -0 (may not be necessary)
+mov edx,ebx
+pop ebx
+ ret
+
+ .align ALIGNMENT
+xl_denorm:
+ call fpdenormal
+ bsr ecx,eax ! zzzz
+ neg ecx
+ add ecx,#REG_BIT-1
+ shl eax,cl
+ shld ebx,eax,#D_NORM_BIT+1
+ shl eax,#D_NORM_BIT+1
+ sub ecx,#D_NORM_BIT+1
+ jmp got_x
+
+ .align ALIGNMENT
+xu_denorm:
+ call fpdenormal
+ bsr ecx,ebx
+ neg ecx
+ add ecx,#D_NORM_BIT
+ shld ebx,eax,cl
+ shl eax,cl
+ jmp got_x
diff --git a/bin86-0.3/bccfp/ftst.x b/bin86-0.3/bccfp/ftst.x
new file mode 100644
index 0000000..2a92ef1
--- /dev/null
+++ b/bin86-0.3/bccfp/ftst.x
@@ -0,0 +1,28 @@
+! bcc 386 floating point routines (version 2) -- Ftst, Ftstd, Ftstf
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+#if 0 /* bcc doesn't generate Ftst (but it might in future) */
+ .globl Ftst
+#endif
+ .align ALIGNMENT
+Ftst:
+ cmp dword PC_SIZE+D_HIGH[esp],#0 ! need only test upper dword of x
+ ret #D_SIZE
+
+! Compare double at address [ebx] with 0
+
+ .globl Ftstd
+ .align ALIGNMENT
+Ftstd:
+ cmp dword D_HIGH[ebx],#0 ! need only test upper dword of x
+ ret
+
+! Compare float at address [ebx] with 0
+
+ .globl Ftstf
+ .align ALIGNMENT
+Ftstf:
+ cmp dword F_HIGH[ebx],#0
+ ret
diff --git a/bin86-0.3/bccfp/ldexp.x b/bin86-0.3/bccfp/ldexp.x
new file mode 100644
index 0000000..bc9dd03
--- /dev/null
+++ b/bin86-0.3/bccfp/ldexp.x
@@ -0,0 +1,74 @@
+! bcc 386 floating point routines (version 2) -- _ldexp
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+ .extern fpoverflow
+ .extern fpunderflow
+
+! void ldexp(double value, int exponent);
+! returns value * (2 ** exponent)
+
+ .globl _ldexp
+ .align ALIGNMENT
+_ldexp:
+push ebx
+#undef PC_SIZE
+#define PC_SIZE 8
+ mov ebx,PC_SIZE+D_HIGH[esp] ! upper dword of x
+ mov ecx,PC_SIZE+D_SIZE[esp] ! exponent arg
+ mov eax,ebx ! extract exponent (of x) here
+ and eax,#D_EXP_MASK
+! jz exp_y_0 ! may need check for preposterous exponent arg too
+
+ shr eax,#D_EXP_SHIFT ! shift to low bits just for testing
+ jz underflow ! denormal?
+ add eax,ecx ! test-add the exponents
+ jz underflow ! XXX probably need to fiddle norm bit
+ cmp eax,#D_EXP_INFINITE ! check if still within range
+ jae outofbounds ! the unsigned compare catches all overflow cases
+ ! because the exponent of x is non-negative
+
+ shl ecx,#D_EXP_SHIFT ! shift exponent arg bits into final position ...
+ add ebx,ecx ! ... safe to add it to exponent of x now
+ mov eax,PC_SIZE+D_LOW[esp] ! lower dword of x
+mov edx,ebx
+pop ebx
+ ret
+
+
+ .align ALIGNMENT
+outofbounds:
+ test ecx,ecx ! overflow or underflow?
+ jns overflow
+underflow:
+ mov edx,ebx ! put sign in usual reg
+ push edi
+ push esi
+ mov edi,eax ! put exponent in usual reg
+ mov eax,2*GENREG_SIZE+PC_SIZE+D_LOW[esp]
+ ! put lower dword of x in usual reg
+ mov esi,ebx ! put upper dword of x in usual reg
+ and esi,#D_EXP_MASK | D_FRAC_MASK
+ test esi,#D_EXP_MASK
+ jz foo
+ and esi,#D_FRAC_MASK
+ or esi,#D_NORM_MASK
+foo:
+ neg edi
+! inc edi ! XXX ?
+ call fpunderflow
+ pop esi
+ pop edi
+ mov ebx,edx ! XXX = wrong reg
+pop ebx
+ ret
+
+ .align ALIGNMENT
+overflow:
+ mov edx,ebx ! put sign in usual reg
+ call fpoverflow
+ mov eax,ecx ! XXX = wrong reg
+ mov ebx,edx ! XXX = wrong reg
+pop ebx
+ ret
diff --git a/bin86-0.3/bccfp/modf.c b/bin86-0.3/bccfp/modf.c
new file mode 100644
index 0000000..a83f801
--- /dev/null
+++ b/bin86-0.3/bccfp/modf.c
@@ -0,0 +1,20 @@
+/*
+ * bin86/bccfp/modf.c
+ *
+ * Copyright (C) 1992 Bruce Evans
+ */
+
+#include <math.h>
+
+/* Slooow version. */
+
+double modf(x, pint)
+double x;
+double *pint;
+{
+ if (x >= 0)
+ *pint = floor(x);
+ else
+ *pint = ceil(x);
+ return x - *pint;
+}
diff --git a/bin86-0.3/bccfp/test.c b/bin86-0.3/bccfp/test.c
new file mode 100644
index 0000000..05b5d84
--- /dev/null
+++ b/bin86-0.3/bccfp/test.c
@@ -0,0 +1,124 @@
+/*
+ * bin86/bccfp/test.c
+ *
+ * Copyright (C) 1992 Bruce Evans
+ */
+
+#include <sys/times.h>
+#include <limits.h>
+#include <stdio.h>
+#include <time.h>
+
+#define CONVTYPE int
+#define MAX (MIN + NITER - 1)
+#define MIN INT_MIN
+
+#define NITER 100000
+
+double one = 1;
+double two = 2;
+double big = 1e99;
+
+double d;
+double d1;
+float f;
+
+int main()
+{
+ CONVTYPE cti;
+ CONVTYPE cto;
+ clock_t delta;
+ struct tms finish;
+ int i;
+ struct tms start;
+
+#if 0
+ times(&start);
+ for (cti = MIN; cti <= MAX; ++cti)
+ {
+ d = cti;
+ cto = d;
+ if (cti != cto)
+ printf("%08x %08x\n", cti, cto);
+ if (cti % 10000000 == 0)
+ {
+ printf("%8x ok ", cti);
+ fflush(stdout);
+ }
+ }
+ times(&finish);
+ delta = finish.tms_utime - start.tms_utime;
+ printf("Time for %d i -> d and d -> i conversions was %g s (%d t)\n",
+ MAX - MIN + 1, delta / (double) CLOCKS_PER_SEC, delta);
+#endif
+
+ times(&start);
+ for (cti = MIN; cti <= MAX; ++cti)
+ d = cti;
+ times(&finish);
+ delta = finish.tms_utime - start.tms_utime;
+ printf("Time for %d i -> d conversions was %g s (%d t)\n",
+ MAX - MIN + 1, delta / (double) CLOCKS_PER_SEC, delta);
+
+ times(&start);
+ for (cti = MIN; cti <= MAX; ++cti)
+ {
+ d = cti;
+ cto = d;
+ }
+ times(&finish);
+ delta = finish.tms_utime - start.tms_utime - delta;
+ printf("Time for %d d -> i conversions was %g s (%d t)\n",
+ MAX - MIN + 1, delta / (double) CLOCKS_PER_SEC, delta);
+
+ d = 0;
+ times(&start);
+ for (i = 0; i < NITER; ++i)
+ d = d + 1;
+ times(&finish);
+ delta = finish.tms_utime - start.tms_utime;
+ printf("Time for adding %d 1.0's to 0.0 was %g s (%d t), result = %g\n",
+ NITER, delta / (double) CLOCKS_PER_SEC, delta, d);
+
+ d = 0;
+ times(&start);
+ for (; d < NITER;)
+ d = d + 1;
+ times(&finish);
+ delta = finish.tms_utime - start.tms_utime;
+ printf("Time for adding %d 1.0's to 0.0 (d index) was %g s (%d t), result = %g\n",
+ NITER, delta / (double) CLOCKS_PER_SEC, delta, d);
+
+ times(&start);
+ for (i = 1; i <= NITER; ++i)
+ {
+ d1 = i;
+ d = d1 * d1;
+ }
+ times(&finish);
+ delta = finish.tms_utime - start.tms_utime;
+ printf("Time for %d mults was %g s (%d t), result = %g\n",
+ NITER, delta / (double) CLOCKS_PER_SEC, delta, d);
+
+ times(&start);
+ for (i = 1; i <= NITER; ++i)
+ {
+ d1 = i;
+ d = 1 / d1;
+ }
+ times(&finish);
+ delta = finish.tms_utime - start.tms_utime;
+ printf("Time for %d divs was %g s (%d t), result = %g\n",
+ NITER, delta / (double) CLOCKS_PER_SEC, delta, d);
+
+ f = 0;
+ times(&start);
+ for (i = 0; i < NITER; ++i)
+ f = f + 1;
+ times(&finish);
+ delta = finish.tms_utime - start.tms_utime;
+ printf("Time for adding %d 1.0f's to 0.0f was %g s (%d t), result = %g\n",
+ NITER, delta / (double) CLOCKS_PER_SEC, delta, f);
+
+ return 0;
+}