1 files changed, 312 insertions, 0 deletions
diff --git a/libc/i386fp/fdiv.x b/libc/i386fp/fdiv.x
new file mode 100644
index 0000000..4a5cf74
--- /dev/null
+++ b/libc/i386fp/fdiv.x
@@ -0,0 +1,312 @@
+#define EF_SIZE		4
+
+! bcc 386 floating point routines (version 2) -- Fdiv, Fdivd, Fdivf
+! authors: Timothy Murphy (tim@maths.tcd.ie), Bruce Evans
+
+#include "fplib.h"
+
+#define FRAME_SIZE	(3 * GENREG_SIZE + PC_SIZE)
+
+	.extern	Fpushf
+	.extern	fpdivzero
+	.extern	fpoverflow
+	.extern	fpunderflow
+
+! double Fdiv(double x, double y) returns x / y
+
+! pop 2 doubles from stack, divide first by second, and push quotient on stack
+
+! we denote upper and lower dwords of x and y (or their fractions)
+! by (xu,xl), (yu,yl)
+
+	.globl	Fdivf
+	.align	ALIGNMENT
+Fdivf:
+	sub	esp,#D_SIZE	! make space for dummy double on stack
+	push	ebp
+	push	edi		! save some regs
+	push	esi
+	mov	eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp]	! move return address ...
+	mov	FRAME_SIZE-PC_SIZE[esp],eax	! ... to usual spot
+	call	Fpushf
+	pop	esi		! yl
+	pop	edi		! yu
+	mov	eax,FRAME_SIZE+D_SIZE+D_LOW[esp]	! xl
+	mov	edx,FRAME_SIZE+D_SIZE+D_HIGH[esp]	! xu
+	jmp	division
+
+	.globl	Fdiv
+	.align	ALIGNMENT
+Fdiv:
+	push	ebp
+	push	edi		! save some regs
+	push	esi
+	mov	eax,FRAME_SIZE+D_LOW[esp]	! xl
+	mov	edx,FRAME_SIZE+D_HIGH[esp]	! xu
+	mov	esi,FRAME_SIZE+D_SIZE+D_LOW[esp]	! yl
+	mov	edi,FRAME_SIZE+D_SIZE+D_HIGH[esp]	! yu
+	jmp	division
+
+	.align	ALIGNMENT
+exp_y_0:
+	mov	ebx,edi
+	or	ebx,esi
+	beq	zerodivide
+	mov	ebx,#1
+fix_y:
+	test	edi,edi		! XXX - sloow
+	js	y_unpacked
+	shld	edi,esi,#1
+	shl	esi,#1
+	dec	bx
+	jmp	fix_y
+
+	.align	ALIGNMENT
+exp_x_0:
+	mov	ecx,edx
+	or	ecx,eax
+	beq	retz
+	mov	ecx,#1	! change exponent from 0 to 1
+fix_x:
+	test	edx,#1 << (REG_BIT-1-2)		! XXX - sloow
+	jnz	x_unpacked
+	shld	edx,eax,#1
+	shl	eax,#1
+	dec	cx
+	jmp	fix_x
+
+! Fdivd pops double from stack, divides it by double at [ebx],
+! and pushes quotient back on stack
+
+	.globl	Fdivd
+	.align	ALIGNMENT
+Fdivd:
+	sub	esp,#D_SIZE	! make space for dummy double on stack
+	push	ebp
+	push	edi		! save some regs
+	push	esi
+	mov	eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp]	! move return address ...
+	mov	FRAME_SIZE-PC_SIZE[esp],eax	! ... to usual spot
+	mov	eax,FRAME_SIZE+D_SIZE+D_LOW[esp]	! xl
+	mov	edx,FRAME_SIZE+D_SIZE+D_HIGH[esp]	! xu
+	mov	esi,D_LOW[ebx]	! yl
+	mov	edi,D_HIGH[ebx]	! yu
+
+division:
+
+! The full calculations are
+
+!	(xu,xl,0) = yu * (zu,zl) + (0,r,0)  (normal 96/32 -> 64 bit division)
+!	yl * zu = yu * q1 + r1  (32*32 -> 64 bit mul and 64/32 -> 32 bit div)
+
+! so
+
+!	(xu,xl,0,0) = (yu,yl) * (zu,zl-q1) + (0,0,r-r1,yl*(q1-zl))
+
+! where the calculations zl-q1, r-r1 and yl*(q1-zl) are more complicated
+! than the notation suggests. They may be negative and the one with the
+! multiplication may not fit in 32 bits and in both cases the overflow
+! has to be moved into higher bit positions.
+
+! See Knuth for why (zu,zl-q1) is the correct 64-bit quotient to within
+! 1 bit either way (assuming the normalization x < 2 * y).
+
+! We only need to calculate the remainder (0,0,r-r1,yl*(q1-zl)) to resolve
+! tie cases. It tells whether the approximate quotient is too high or too
+! low.
+
+#define NTEMPS	5
+
+	sub	esp,#NTEMPS*GENREG_SIZE	! space to remember values for rounding of tie case
+
+! Offsets from esp for these values (offsets using FRAME_SIZE are invalid
+! while these temps are active)
+r	=	0
+q1	=	4
+r1	=	8
+yl	=	12
+zl	=	16
+
+! Step 1: unpack and normalize x to fraction in edx:eax (left shifted as
+! far as possible less 2 so that x < y, and later z < y); unpack and normalize
+! y to a fraction in edi:esi (left shifted as far as possible), put difference
+! of signs (= sign of quotient) in ecx(D_SIGN_MASK) and difference of exponents 
+! (= exponent of quotient before normalization) in cx.
+
+	mov	ebp,edx		! xu
+	xor	ebp,edi		! xu ^ yu
+	and	ebp,#D_SIGN_MASK	! sign of result is difference of signs
+
+! Unpack y first to trap 0 / 0
+
+	mov	ebx,edi		! remember yu for exponent of y
+	shld	edi,esi,#D_BIT-D_FRAC_BIT	! extract fraction of y ...
+	shl	esi,#D_BIT-D_FRAC_BIT
+	and	ebx,#D_EXP_MASK	! exponent of y
+	jz	exp_y_0
+	shr	ebx,#D_EXP_SHIFT	! in ebx (actually in bx, with high bits 0)
+	or	edi,#D_NORM_MASK << (D_BIT-D_FRAC_BIT)	! normalize
+y_unpacked:
+
+! Unpack x
+
+	mov	ecx,edx		! remember xu for exponent of x
+	shld	edx,eax,#D_BIT-D_FRAC_BIT-2	! extract fraction of x ...
+	shl	eax,#D_BIT-D_FRAC_BIT-2
+	and	edx,#(D_NORM_MASK << (D_BIT-D_FRAC_BIT-2+1))-1
+				! XXX - above may be shifted 1 extra unnecessarily
+	and	ecx,#D_EXP_MASK	! exponent of x
+	jz	exp_x_0
+	shr	ecx,#D_EXP_SHIFT	! in ecx (actually in cx, with high bits 0)
+	or	edx,#D_NORM_MASK << (D_BIT-D_FRAC_BIT-2) ! normalize
+x_unpacked:
+
+	sub	cx,bx		! not ecx,ebx because we want to use high bit for sign
+	add	cx,#D_EXP_BIAS	! adjust exponent of quotient
+
+	or	ecx,ebp		! include sign with exponent
+
+! Step 2: quotient of fractions -> (edx,eax)
+
+! 2a: (xu,xl,0) div yu = (zu,zl) -> (ebx,esi)
+
+	div	eax,edi		! (xu,xl) div yu = zu in eax; remainder (rem) in edx
+	mov	ebx,eax		! save zu in ebx
+	sub	eax,eax		! clear eax: (edx,eax) = (rem,0)
+	div	eax,edi		! (rem,0) div yu = zl in eax
+	mov	r[esp],edx
+	mov	zl[esp],eax
+	xchg	eax,esi		! store zl in esi; save yl in eax
+	mov	yl[esp],eax
+
+! 2b: (yl * zu) div yu -> (0,eax)
+
+	mul	eax,ebx		! yl * zu -> (edx,eax)
+	div	eax,edi		! (yl * zu) div yu in eax
+	mov	q1[esp],eax
+	mov	r1[esp],edx
+
+! 2c: (xu,xl) / (yu,yl) = (zu,zl) - (yl * zu) div yu -> (edx,eax)
+
+	mov	edx,ebx		! zu
+	xchg	eax,esi		! eax <- zl; esi <- (yl * zu) div yu
+	sub	eax,esi
+	sbb	edx,#0
+
+! Step 3: normalise quotient
+
+	test	edx,#1 << (REG_BIT-2)	! is fraction too small? (can only be by 1 bit)
+	jnz	div4
+	shld	edx,eax,#1	! yes; multiply fraction ...
+	shl	eax,#1		! ... by 2 ...
+	dec	cx		! ... and decrement exponent
+
+! Step 4: shift and round
+
+div4:
+	mov	ebx,eax		! save for rounding
+	shrd	eax,edx,#D_BIT-D_FRAC_BIT-1	! shift fraction of result ...
+	shr	edx,#D_BIT-D_FRAC_BIT-1		! ... to proper position
+	and	ebx,#(1 << (D_BIT-D_FRAC_BIT-1))-1	! look at bits shifted out
+	cmp	ebx,#D_NORM_MASK >> (D_BIT-D_FRAC_BIT)	! compare with middle value
+	jb	div5		! below middle, don't round up
+	ja	roundup		! above middle, round up
+
+! The low bits don't contain enough information to resolve the tie case,
+! because the quotient itself is only an approximation.
+! Calculate the exact remainder.
+! This case is not very common, so don't worry much about speed.
+! Unfortunately we had to save extra in all cases to prepare for it.
+
+	push	edx
+	push	eax
+
+	sub	esi,esi		! the calculation requires 33 bits - carry to here
+	mov	eax,2*GENREG_SIZE+q1[esp]
+	sub	eax,2*GENREG_SIZE+zl[esp]
+	pushfd
+	mul	dword EF_SIZE+2*GENREG_SIZE+yl[esp]
+	popfd
+	jnc	foo
+	sub	edx,2*GENREG_SIZE+yl[esp]
+	sbb	esi,#0
+foo:
+	add	edx,2*GENREG_SIZE+r[esp]
+	adc	esi,#0
+	sub	edx,2*GENREG_SIZE+r1[esp]
+	sbb	esi,#0
+	mov	ebx,eax
+	mov	edi,edx
+
+	pop	eax
+	pop	edx
+
+! Can finally decide rounding of tie case
+
+	js	div5		! remainder < 0 from looking at top 64 bits
+	jnz	roundup		! remainder > 0 from looking at top 64 bits
+	or	edi,ebx		! test bottom 64 bits
+	jnz	roundup		! remainder > 0
+
+	test	al,#1		! at last we know it is the tie case, check parity bit
+	jz	div5		! already even, otherwise round up to make even
+
+roundup:
+	add	eax,#1		! add rounding bit
+	adc	edx,#0
+	test	edx,#D_NORM_MASK << 1	! has fraction overflowed (very unlikely)
+	jz	div5
+! Why were the shifts commented out?
+	shrd	eax,edx,#1	! yes, divide fraction ...
+	shr	edx,#1		! ... by 2 ...
+	inc	cx		! ... and increment exponent
+
+! Step 5: put it all together
+
+div5:
+	mov	ebx,ecx		! extract sign
+	and	ebx,D_SIGN_MASK
+	cmp	cx,#D_EXP_INFINITE	! is exponent too big?
+	jge	overflow
+	test	cx,cx
+	jle	underflow
+	shl	ecx,#D_EXP_SHIFT
+
+	and	edx,#D_FRAC_MASK	! remove norm bit
+	or	edx,ecx		! include exponent ...
+	or	edx,ebx		! ... and sign
+
+return:
+	add	esp,#NTEMPS*GENREG_SIZE	! reclaim temp space
+	mov	FRAME_SIZE+D_SIZE+D_LOW[esp],eax	! "push" lower dword of product ...
+	mov	FRAME_SIZE+D_SIZE+D_HIGH[esp],edx	! ... and upper dword
+	pop	esi		! restore registers
+	pop	edi
+	pop	ebp
+	ret	#D_SIZE
+
+retz:
+	sub	edx,edx		! clear upper dword
+	sub	eax,eax		! ... and lower dword
+	jmp	return
+
+overflow:
+	mov	edx,ecx		! put sign in usual reg
+	call	fpoverflow
+	mov	eax,ecx		! XXX - wrong reg
+	jmp	return
+
+underflow:
+	mov	esi,edx		! put upper part of fraction in usual reg
+	mov	edx,ecx		! sign
+	movsx	edi,cx		! put shift in usual reg
+	neg	edi
+	inc	edi
+	call	fpunderflow
+	jmp	return
+
+zerodivide:
+	mov	edx,ebp		! sign
+	call	fpdivzero
+	mov	eax,ecx		! XXX - wrong reg
+	jmp	return