1 files changed, 485 insertions, 0 deletions
diff --git a/libc/i386fp/fadd.x b/libc/i386fp/fadd.x
new file mode 100644
index 0000000..d1e60b1
--- /dev/null
+++ b/libc/i386fp/fadd.x
@@ -0,0 +1,485 @@
+! bcc 386 floating point routines (version 2)
+! -- Fadd, Faddd, Faddf, Fsub, Fsubd, Fsubf, normalize2
+! author: Bruce Evans
+
+#include "fplib.h"
+
+#define FRAME_SIZE	(3 * GENREG_SIZE + PC_SIZE)
+
+	.extern	Fpushf
+	.extern	fpdenormal
+	.extern	fpoverflow
+	.extern	fpunderflow
+
+	.globl	Fadd
+	.align	ALIGNMENT
+Fadd:
+	push	ebp
+	push	edi
+	push	esi
+	mov	eax,FRAME_SIZE+D_LOW[esp]
+	mov	edx,FRAME_SIZE+D_HIGH[esp]
+	mov	ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
+	mov	ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
+	call	addition
+	mov	FRAME_SIZE+D_SIZE+D_LOW[esp],eax
+	mov	FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
+	pop	esi
+	pop	edi
+	pop	ebp
+	ret	#D_SIZE
+
+	.globl	Faddd
+	.align	ALIGNMENT
+Faddd:
+	push	ebp
+	push	edi
+	push	esi
+	mov	eax,FRAME_SIZE+D_LOW[esp]
+	mov	edx,FRAME_SIZE+D_HIGH[esp]
+	mov	ecx,D_HIGH[ebx]
+	mov	ebx,D_LOW[ebx]
+	call	addition
+	mov	FRAME_SIZE+D_LOW[esp],eax
+	mov	FRAME_SIZE+D_HIGH[esp],edx
+	pop	esi
+	pop	edi
+	pop	ebp
+	ret
+
+	.globl	Faddf
+	.align	ALIGNMENT
+Faddf:
+	push	ebp
+	push	edi
+	push	esi
+	call	Fpushf
+	pop	ebx		! yl
+	pop	ecx		! yu
+	mov	eax,FRAME_SIZE+D_LOW[esp]	! xl
+	mov	edx,FRAME_SIZE+D_HIGH[esp]	! xu
+	call	addition
+	mov	FRAME_SIZE+D_LOW[esp],eax
+	mov	FRAME_SIZE+D_HIGH[esp],edx
+	pop	esi
+	pop	edi
+	pop	ebp
+	ret
+
+	.globl	Fsub
+	.align	ALIGNMENT
+Fsub:
+	push	ebp
+	push	edi
+	push	esi
+	mov	eax,FRAME_SIZE+D_LOW[esp]
+	mov	edx,FRAME_SIZE+D_HIGH[esp]
+	mov	ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
+	mov	ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
+	xor	ecx,#D_SIGN_MASK	! complement sign
+	call	addition
+	mov	FRAME_SIZE+D_SIZE+D_LOW[esp],eax
+	mov	FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
+	pop	esi
+	pop	edi
+	pop	ebp
+	ret	#D_SIZE
+
+	.globl	Fsubd
+	.align	ALIGNMENT
+Fsubd:
+	push	ebp
+	push	edi
+	push	esi
+	mov	eax,FRAME_SIZE+D_LOW[esp]
+	mov	edx,FRAME_SIZE+D_HIGH[esp]
+	mov	ecx,D_HIGH[ebx]
+	mov	ebx,D_LOW[ebx]
+	xor	ecx,#D_SIGN_MASK	! complement sign
+	call	addition
+	mov	FRAME_SIZE+D_LOW[esp],eax
+	mov	FRAME_SIZE+D_HIGH[esp],edx
+	pop	esi
+	pop	edi
+	pop	ebp
+	ret
+
+	.globl	Fsubf
+	.align	ALIGNMENT
+Fsubf:
+	push	ebp
+	push	edi
+	push	esi
+	call	Fpushf
+	pop	ebx		! yl
+	pop	ecx		! yu
+	mov	eax,FRAME_SIZE+D_LOW[esp]	! xl
+	mov	edx,FRAME_SIZE+D_HIGH[esp]	! xu
+	xor	ecx,#D_SIGN_MASK	! complement sign
+	call	addition
+	mov	FRAME_SIZE+D_LOW[esp],eax
+	mov	FRAME_SIZE+D_HIGH[esp],edx
+	pop	esi
+	pop	edi
+	pop	ebp
+	ret
+
+	.align	ALIGNMENT
+exp_y_0:
+
+! Check for x denormal, to split off special case where both are denormal,
+! so the norm bit (or 1 higher) is known to be set for addition, so addition
+! can be done faster
+
+	test	esi,#D_EXP_MASK
+	jnz	x_normal_exp_y_0
+	test	esi,esi		! test top bits of x fraction
+	jnz	both_denorm	! denormal iff nonzero fraction with zero exp
+	test	eax,eax		! test rest of fraction
+	jz	return_edx_eax	! everything 0 (XXX - do signs matter?)
+both_denorm:
+	call	fpdenormal
+	test	ebp,#D_SIGN_MASK
+	jnz	denorm_subtract
+
+! Add denormal x to denormal or zero y
+
+#if D_NORM_BIT != D_EXP_SHIFT
+#include "error, carry into norm bit does not go into exponent"
+#endif
+
+	add	eax,ebx
+	adc	esi,edi
+	or	edx,esi
+	ret
+
+denorm_subtract:
+	sub	eax,ebx
+	sbb	esi,edi
+	or	edx,esi
+	ret
+
+	.align	ALIGNMENT
+x_normal_exp_y_0:
+	test	edi,edi		! this is like the check for x denormal
+	jnz	y_denorm
+	test	ebx,ebx	
+	jz	return_edx_eax	! y = 0
+y_denorm:
+	call	fpdenormal
+	or	ecx,#1 << D_EXP_SHIFT	! normalize y by setting exponent to 1
+	jmp	got_y
+
+	.align	ALIGNMENT
+return_edx_eax:
+	ret
+
+	.align	ALIGNMENT
+add_bigshift:
+	cmp	ecx,#D_FRAC_BIT+2
+	jae	return_edx_eax	! x dominates y
+	sub	ecx,#REG_BIT
+	shrd	ebp,ebx,cl
+	shrd	ebx,edi,cl
+	shr	edi,cl
+	add	eax,edi
+	adc	esi,#0
+	xchg	ebp,ebx
+	br	normalize
+
+	.align	ALIGNMENT
+addition:
+	mov	esi,edx		! this mainly for consistent naming
+	and	esi,#D_EXP_MASK | D_FRAC_MASK	! discard sign so comparison is simple
+	mov	edi,ecx		! free cl for shifts
+	and	edi,#D_EXP_MASK | D_FRAC_MASK
+	cmp	esi,edi
+	ja	xbigger
+	jb	swap
+	cmp	eax,ebx
+	jae	xbigger
+swap:
+	xchg	edx,ecx
+	xchg	eax,ebx
+	xchg	esi,edi
+xbigger:
+
+! edx holds sign of result from here on
+! and exponent of result before the normalization step
+
+	mov	ebp,edx		! prepare difference of signs
+	xor	ebp,ecx
+
+	and	ecx,#D_EXP_MASK	! extract exp_y and check for y 0 or denormal
+	beq	exp_y_0		! otherwise x is not 0 or denormal either
+	and	edi,#D_FRAC_MASK	! extract fraction
+	or	edi,#D_NORM_MASK	! normalize
+got_y:
+	and	esi,#D_FRAC_MASK	! extract fraction
+	or	esi,#D_NORM_MASK	! normalize
+
+	sub	ecx,edx		! carries from non-exp bits in edx killed later
+	neg	ecx
+	and	ecx,#D_EXP_MASK
+	shr	ecx,#D_EXP_SHIFT	! difference of exponents
+
+got_x_and_y:
+	and	ebp,#D_SIGN_MASK 	! see if signs are same
+	bne	subtract	! else roundoff reg ebp has been cleared
+
+	cmp	cl,#REG_BIT
+	bhis	add_bigshift
+	shrd	ebp,ebx,cl
+	shrd	ebx,edi,cl
+	shr	edi,cl
+	add	eax,ebx
+	adc	esi,edi
+
+! result edx(D_SIGN_MASK | D_EXP_MASK bits):esi:eax:ebp but needs normalization
+
+	mov	edi,edx
+	and	edi,#D_EXP_MASK
+	test	esi,#D_NORM_MASK << 1
+	jnz	add_loverflow
+
+add_round:
+	cmp	ebp,#1 << (REG_BIT-1)	! test roundoff register
+	jb	add_done	! no rounding
+	jz	tie
+add_roundup:
+	add	eax,#1
+	adc	esi,#0
+	test	esi,#D_NORM_MASK << 1
+	jnz	pre_add_loverflow	! rounding may cause overflow!
+add_done:
+ 	mov	ecx,edx		! duplicated code from 'done'
+	and	edx,#D_SIGN_MASK
+	or	edx,edi
+	and	esi,#D_FRAC_MASK
+	or	edx,esi
+	ret
+
+	.align	ALIGNMENT
+tie:
+	test	al,#1		! tie case, round to even
+	jz	add_done	! even, no rounding
+	jmp	add_roundup
+
+	.align	ALIGNMENT
+pre_add_loverflow:
+	sub	ebp,ebp		! clear rounding register
+				! probably avoiding tests for more rounding
+add_loverflow:
+	shrd	ebp,eax,#1
+	jnc	over_set_sticky_bit
+	or	ebp,#1
+over_set_sticky_bit:
+	shrd	eax,esi,#1
+	shr	esi,#1
+	add	edi,1 << D_EXP_SHIFT
+	cmp	edi,#D_EXP_INFINITE << D_EXP_SHIFT
+	jl	add_round
+overflow:
+	call	fpoverflow
+	mov	eax,ecx		! XXX - wrong reg
+	ret
+
+! result edx(D_SIGN_MASK | D_EXP_MASK bits):
+!        esi((D_NORM_MASK << 1) | D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx
+! but needs normalization
+
+	.align	ALIGNMENT
+normalize:
+	mov	edi,edx
+	and	edi,#D_EXP_MASK
+	test	esi,#D_NORM_MASK << 1
+	bne	loverflow
+
+! result edx(D_SIGN_MASK bit):edi(D_EXP_MASK bits):
+!        esi(D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx
+! but needs normalization
+
+	.globl	normalize2
+normalize2:
+	test	esi,#D_NORM_MASK	! already-normalized is very common
+	jz	normalize3
+round:
+	cmp	ebp,#1 << (REG_BIT-1)	! test roundoff register
+	jb	done		! no rounding
+	jz	near_tie
+roundup:
+	add	eax,#1
+	adc	esi,#0
+	test	esi,#D_NORM_MASK << 1
+	bne	pre_loverflow	! rounding may cause overflow!
+done:
+cmp	edi,#D_EXP_INFINITE << D_EXP_SHIFT
+jae	overflow
+	and	edx,#D_SIGN_MASK	! extract sign of largest and result
+	or	edx,edi		! include exponent with sign
+	and	esi,#D_FRAC_MASK	! discard norm bit
+	or	edx,esi		! include fraction with sign and exponent
+	ret
+
+	.align	ALIGNMENT
+near_tie:
+	test	ebx,ebx
+	jnz	roundup
+	test	al,#1		! tie case, round to even
+	jz	done		! even, no rounding
+	jmp	roundup
+
+	.align	ALIGNMENT
+not_in_8_below:
+	shld	ecx,esi,#REG_BIT-D_NORM_BIT+16	! in 9 to 16 below?
+	jz	not_in_16_below	! must be way below (17-20 for usual D_NORM_BIT)
+	mov	cl,bsr_table[ecx]	! bsr(esi) - (D_NORM_BIT-16)
+	neg	ecx		! (D_NORM_BIT-16) - bsr(esi)
+	add	ecx,#16
+	jmp	got_shift
+
+	.align	ALIGNMENT
+not_in_16_below:
+	mov	cl,bsr_table[esi]	! bsr(esi) directly
+	neg	ecx			! -bsr(esi)
+	add	ecx,#D_NORM_BIT		! D_NORM_BIT - bsr(esi)
+	jmp	got_shift
+
+	.align	ALIGNMENT
+normalize3:
+	test	esi,esi
+	jz	shift32
+
+! Find first nonzero bit in esi
+! Don't use bsr, it is very slow (const + 3 * bit_found)
+! We know that there is some nonzero bit, and the norm bit and above are clear
+
+	sub	ecx,ecx		! prepare unsigned extension of cl
+	shld	ecx,esi,#REG_BIT-D_NORM_BIT+8	! any bits in 8 below norm bit?
+	jz	not_in_8_below
+	mov	cl,bsr_table[ecx]	! bsr(esi) - (D_NORM_BIT-8)
+	neg	ecx		! (D_NORM_BIT-8) - bsr(esi)
+	add	ecx,#8		! D_NORM_BIT - bsr(esi)
+got_shift:
+	shld	esi,eax,cl
+	shld	eax,ebp,cl
+	shld	ebp,ebx,cl
+	shl	ebx,cl
+	shl	ecx,D_EXP_SHIFT
+	sub	edi,ecx
+	bhi	round		! XXX - can rounding change the exponent to > 0?
+				! not bgt since edi may be 0x80000000
+	neg	edi
+	shr	edi,#D_EXP_SHIFT
+	inc	edi
+	br	fpunderflow
+
+	.align	ALIGNMENT
+pre_loverflow:
+	sub	ebp,ebp		! clear rounding registers
+	sub	ebx,ebx		! probably avoiding tests for more rounding
+
+loverflow:
+	shr	esi,#1		! carry bit stayed in the reg
+	rcr	eax,#1
+	rcr	ebp,#1
+	rcr	ebx,#1
+	add	edi,1 << D_EXP_SHIFT
+	cmp	edi,#D_EXP_INFINITE << D_EXP_SHIFT
+	blt	round
+	call	fpoverflow
+	mov	eax,ecx		! XXX - wrong reg
+	ret
+
+	.align	ALIGNMENT
+shift32:
+	test	eax,eax
+	jz	shift64
+	mov	esi,eax
+	mov	eax,ebp
+	mov	ebp,ebx
+	sub	ebx,ebx
+	sub	edi,#REG_BIT << D_EXP_SHIFT
+shiftxx:
+	test	esi,#~(D_NORM_MASK | D_FRAC_MASK)
+	jz	over_adjust	! else too big already
+	shrd	ebx,ebp,#D_BIT-D_FRAC_BIT
+	shrd	ebp,eax,#D_BIT-D_FRAC_BIT
+	shrd	eax,esi,#D_BIT-D_FRAC_BIT
+	shr	esi,#D_BIT-D_FRAC_BIT
+	add	edi,#(D_BIT-D_FRAC_BIT) << D_EXP_SHIFT
+over_adjust:
+	test	edi,edi
+	bgt	normalize2
+	neg	edi
+	shr	edi,#D_EXP_SHIFT
+	inc	edi
+	br	fpunderflow
+
+	.align	ALIGNMENT
+shift64:
+	test	ebp,ebp
+	jz	shift96
+	mov	esi,ebp
+	mov	eax,ebx
+	sub	ebp,ebp
+	mov	ebx,ebp
+	sub	edi,#(2*REG_BIT) << D_EXP_SHIFT
+	jmp	shiftxx
+
+	.align	ALIGNMENT
+shift96:
+	test	ebx,ebx		! XXX - this test is probably unnecessary
+				! since the shift must be small unless we
+				! are subtracting 2 almost-equal numbers,
+				! and then the bits beyond 64 will mostly
+				! be 0
+	jz	return_esi_eax	! all zero
+	mov	esi,ebx
+	sub	ebx,ebx
+	sub	edi,#(3*REG_BIT) << D_EXP_SHIFT
+	jmp	shiftxx
+
+	.align	ALIGNMENT
+return_esi_eax:
+	mov	edx,esi
+	ret
+
+	.align	ALIGNMENT
+subtract:
+	sub	ebp,ebp		! set up roundoff register
+	cmp	ecx,#REG_BIT
+	jae	subtract_bigshift
+	shrd	ebp,ebx,cl
+	shrd	ebx,edi,cl
+	shr	edi,cl
+	neg	ebp		! begin subtraction esi:eax:0 - edi:ebx:ebp
+	sbb	eax,ebx
+	sbb	esi,edi
+	sub	ebx,ebx
+	mov	edi,edx
+	and	edi,#D_EXP_MASK
+	br	normalize2
+
+	.align	ALIGNMENT
+subtract_bigshift:
+	cmp	ecx,#D_FRAC_BIT+2
+	bhis	return_edx_eax	! x dominates y
+	sub	ecx,#REG_BIT
+	shrd	ebp,ebx,cl
+	shrd	ebx,edi,cl
+	shr	edi,cl
+	not	ebp		! begin subtraction esi:eax:0:0 - 0:edi:ebx:ebp
+	not	ebx
+	add	ebp,#1
+	adc	ebx,#0
+	cmc
+	sbb	eax,edi
+	sbb	esi,#0
+	xchg	ebp,ebx
+	mov	edi,edx
+	and	edi,#D_EXP_MASK
+	br	normalize2
+
+	.data
+	.extern	bsr_table