libc/i386fp/fadd.x


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485

! bcc 386 floating point routines (version 2)
! -- Fadd, Faddd, Faddf, Fsub, Fsubd, Fsubf, normalize2
! author: Bruce Evans

#include "fplib.h"

#define FRAME_SIZE	(3 * GENREG_SIZE + PC_SIZE)

	.extern	Fpushf
	.extern	fpdenormal
	.extern	fpoverflow
	.extern	fpunderflow

	.globl	Fadd
	.align	ALIGNMENT
Fadd:
	push	ebp
	push	edi
	push	esi
	mov	eax,FRAME_SIZE+D_LOW[esp]
	mov	edx,FRAME_SIZE+D_HIGH[esp]
	mov	ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
	mov	ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
	call	addition
	mov	FRAME_SIZE+D_SIZE+D_LOW[esp],eax
	mov	FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
	pop	esi
	pop	edi
	pop	ebp
	ret	#D_SIZE

	.globl	Faddd
	.align	ALIGNMENT
Faddd:
	push	ebp
	push	edi
	push	esi
	mov	eax,FRAME_SIZE+D_LOW[esp]
	mov	edx,FRAME_SIZE+D_HIGH[esp]
	mov	ecx,D_HIGH[ebx]
	mov	ebx,D_LOW[ebx]
	call	addition
	mov	FRAME_SIZE+D_LOW[esp],eax
	mov	FRAME_SIZE+D_HIGH[esp],edx
	pop	esi
	pop	edi
	pop	ebp
	ret

	.globl	Faddf
	.align	ALIGNMENT
Faddf:
	push	ebp
	push	edi
	push	esi
	call	Fpushf
	pop	ebx		! yl
	pop	ecx		! yu
	mov	eax,FRAME_SIZE+D_LOW[esp]	! xl
	mov	edx,FRAME_SIZE+D_HIGH[esp]	! xu
	call	addition
	mov	FRAME_SIZE+D_LOW[esp],eax
	mov	FRAME_SIZE+D_HIGH[esp],edx
	pop	esi
	pop	edi
	pop	ebp
	ret

	.globl	Fsub
	.align	ALIGNMENT
Fsub:
	push	ebp
	push	edi
	push	esi
	mov	eax,FRAME_SIZE+D_LOW[esp]
	mov	edx,FRAME_SIZE+D_HIGH[esp]
	mov	ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
	mov	ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
	xor	ecx,#D_SIGN_MASK	! complement sign
	call	addition
	mov	FRAME_SIZE+D_SIZE+D_LOW[esp],eax
	mov	FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
	pop	esi
	pop	edi
	pop	ebp
	ret	#D_SIZE

	.globl	Fsubd
	.align	ALIGNMENT
Fsubd:
	push	ebp
	push	edi
	push	esi
	mov	eax,FRAME_SIZE+D_LOW[esp]
	mov	edx,FRAME_SIZE+D_HIGH[esp]
	mov	ecx,D_HIGH[ebx]
	mov	ebx,D_LOW[ebx]
	xor	ecx,#D_SIGN_MASK	! complement sign
	call	addition
	mov	FRAME_SIZE+D_LOW[esp],eax
	mov	FRAME_SIZE+D_HIGH[esp],edx
	pop	esi
	pop	edi
	pop	ebp
	ret

	.globl	Fsubf
	.align	ALIGNMENT
Fsubf:
	push	ebp
	push	edi
	push	esi
	call	Fpushf
	pop	ebx		! yl
	pop	ecx		! yu
	mov	eax,FRAME_SIZE+D_LOW[esp]	! xl
	mov	edx,FRAME_SIZE+D_HIGH[esp]	! xu
	xor	ecx,#D_SIGN_MASK	! complement sign
	call	addition
	mov	FRAME_SIZE+D_LOW[esp],eax
	mov	FRAME_SIZE+D_HIGH[esp],edx
	pop	esi
	pop	edi
	pop	ebp
	ret

	.align	ALIGNMENT
exp_y_0:

! Check for x denormal, to split off special case where both are denormal,
! so the norm bit (or 1 higher) is known to be set for addition, so addition
! can be done faster

	test	esi,#D_EXP_MASK
	jnz	x_normal_exp_y_0
	test	esi,esi		! test top bits of x fraction
	jnz	both_denorm	! denormal iff nonzero fraction with zero exp
	test	eax,eax		! test rest of fraction
	jz	return_edx_eax	! everything 0 (XXX - do signs matter?)
both_denorm:
	call	fpdenormal
	test	ebp,#D_SIGN_MASK
	jnz	denorm_subtract

! Add denormal x to denormal or zero y

#if D_NORM_BIT != D_EXP_SHIFT
#include "error, carry into norm bit does not go into exponent"
#endif

	add	eax,ebx
	adc	esi,edi
	or	edx,esi
	ret

denorm_subtract:
	sub	eax,ebx
	sbb	esi,edi
	or	edx,esi
	ret

	.align	ALIGNMENT
x_normal_exp_y_0:
	test	edi,edi		! this is like the check for x denormal
	jnz	y_denorm
	test	ebx,ebx	
	jz	return_edx_eax	! y = 0
y_denorm:
	call	fpdenormal
	or	ecx,#1 << D_EXP_SHIFT	! normalize y by setting exponent to 1
	jmp	got_y

	.align	ALIGNMENT
return_edx_eax:
	ret

	.align	ALIGNMENT
add_bigshift:
	cmp	ecx,#D_FRAC_BIT+2
	jae	return_edx_eax	! x dominates y
	sub	ecx,#REG_BIT
	shrd	ebp,ebx,cl
	shrd	ebx,edi,cl
	shr	edi,cl
	add	eax,edi
	adc	esi,#0
	xchg	ebp,ebx
	br	normalize

	.align	ALIGNMENT
addition:
	mov	esi,edx		! this mainly for consistent naming
	and	esi,#D_EXP_MASK | D_FRAC_MASK	! discard sign so comparison is simple
	mov	edi,ecx		! free cl for shifts
	and	edi,#D_EXP_MASK | D_FRAC_MASK
	cmp	esi,edi
	ja	xbigger
	jb	swap
	cmp	eax,ebx
	jae	xbigger
swap:
	xchg	edx,ecx
	xchg	eax,ebx
	xchg	esi,edi
xbigger:

! edx holds sign of result from here on
! and exponent of result before the normalization step

	mov	ebp,edx		! prepare difference of signs
	xor	ebp,ecx

	and	ecx,#D_EXP_MASK	! extract exp_y and check for y 0 or denormal
	beq	exp_y_0		! otherwise x is not 0 or denormal either
	and	edi,#D_FRAC_MASK	! extract fraction
	or	edi,#D_NORM_MASK	! normalize
got_y:
	and	esi,#D_FRAC_MASK	! extract fraction
	or	esi,#D_NORM_MASK	! normalize

	sub	ecx,edx		! carries from non-exp bits in edx killed later
	neg	ecx
	and	ecx,#D_EXP_MASK
	shr	ecx,#D_EXP_SHIFT	! difference of exponents

got_x_and_y:
	and	ebp,#D_SIGN_MASK 	! see if signs are same
	bne	subtract	! else roundoff reg ebp has been cleared

	cmp	cl,#REG_BIT
	bhis	add_bigshift
	shrd	ebp,ebx,cl
	shrd	ebx,edi,cl
	shr	edi,cl
	add	eax,ebx
	adc	esi,edi

! result edx(D_SIGN_MASK | D_EXP_MASK bits):esi:eax:ebp but needs normalization

	mov	edi,edx
	and	edi,#D_EXP_MASK
	test	esi,#D_NORM_MASK << 1
	jnz	add_loverflow

add_round:
	cmp	ebp,#1 << (REG_BIT-1)	! test roundoff register
	jb	add_done	! no rounding
	jz	tie
add_roundup:
	add	eax,#1
	adc	esi,#0
	test	esi,#D_NORM_MASK << 1
	jnz	pre_add_loverflow	! rounding may cause overflow!
add_done:
 	mov	ecx,edx		! duplicated code from 'done'
	and	edx,#D_SIGN_MASK
	or	edx,edi
	and	esi,#D_FRAC_MASK
	or	edx,esi
	ret

	.align	ALIGNMENT
tie:
	test	al,#1		! tie case, round to even
	jz	add_done	! even, no rounding
	jmp	add_roundup

	.align	ALIGNMENT
pre_add_loverflow:
	sub	ebp,ebp		! clear rounding register
				! probably avoiding tests for more rounding
add_loverflow:
	shrd	ebp,eax,#1
	jnc	over_set_sticky_bit
	or	ebp,#1
over_set_sticky_bit:
	shrd	eax,esi,#1
	shr	esi,#1
	add	edi,1 << D_EXP_SHIFT
	cmp	edi,#D_EXP_INFINITE << D_EXP_SHIFT
	jl	add_round
overflow:
	call	fpoverflow
	mov	eax,ecx		! XXX - wrong reg
	ret

! result edx(D_SIGN_MASK | D_EXP_MASK bits):
!        esi((D_NORM_MASK << 1) | D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx
! but needs normalization

	.align	ALIGNMENT
normalize:
	mov	edi,edx
	and	edi,#D_EXP_MASK
	test	esi,#D_NORM_MASK << 1
	bne	loverflow

! result edx(D_SIGN_MASK bit):edi(D_EXP_MASK bits):
!        esi(D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx
! but needs normalization

	.globl	normalize2
normalize2:
	test	esi,#D_NORM_MASK	! already-normalized is very common
	jz	normalize3
round:
	cmp	ebp,#1 << (REG_BIT-1)	! test roundoff register
	jb	done		! no rounding
	jz	near_tie
roundup:
	add	eax,#1
	adc	esi,#0
	test	esi,#D_NORM_MASK << 1
	bne	pre_loverflow	! rounding may cause overflow!
done:
cmp	edi,#D_EXP_INFINITE << D_EXP_SHIFT
jae	overflow
	and	edx,#D_SIGN_MASK	! extract sign of largest and result
	or	edx,edi		! include exponent with sign
	and	esi,#D_FRAC_MASK	! discard norm bit
	or	edx,esi		! include fraction with sign and exponent
	ret

	.align	ALIGNMENT
near_tie:
	test	ebx,ebx
	jnz	roundup
	test	al,#1		! tie case, round to even
	jz	done		! even, no rounding
	jmp	roundup

	.align	ALIGNMENT
not_in_8_below:
	shld	ecx,esi,#REG_BIT-D_NORM_BIT+16	! in 9 to 16 below?
	jz	not_in_16_below	! must be way below (17-20 for usual D_NORM_BIT)
	mov	cl,bsr_table[ecx]	! bsr(esi) - (D_NORM_BIT-16)
	neg	ecx		! (D_NORM_BIT-16) - bsr(esi)
	add	ecx,#16
	jmp	got_shift

	.align	ALIGNMENT
not_in_16_below:
	mov	cl,bsr_table[esi]	! bsr(esi) directly
	neg	ecx			! -bsr(esi)
	add	ecx,#D_NORM_BIT		! D_NORM_BIT - bsr(esi)
	jmp	got_shift

	.align	ALIGNMENT
normalize3:
	test	esi,esi
	jz	shift32

! Find first nonzero bit in esi
! Don't use bsr, it is very slow (const + 3 * bit_found)
! We know that there is some nonzero bit, and the norm bit and above are clear

	sub	ecx,ecx		! prepare unsigned extension of cl
	shld	ecx,esi,#REG_BIT-D_NORM_BIT+8	! any bits in 8 below norm bit?
	jz	not_in_8_below
	mov	cl,bsr_table[ecx]	! bsr(esi) - (D_NORM_BIT-8)
	neg	ecx		! (D_NORM_BIT-8) - bsr(esi)
	add	ecx,#8		! D_NORM_BIT - bsr(esi)
got_shift:
	shld	esi,eax,cl
	shld	eax,ebp,cl
	shld	ebp,ebx,cl
	shl	ebx,cl
	shl	ecx,D_EXP_SHIFT
	sub	edi,ecx
	bhi	round		! XXX - can rounding change the exponent to > 0?
				! not bgt since edi may be 0x80000000
	neg	edi
	shr	edi,#D_EXP_SHIFT
	inc	edi
	br	fpunderflow

	.align	ALIGNMENT
pre_loverflow:
	sub	ebp,ebp		! clear rounding registers
	sub	ebx,ebx		! probably avoiding tests for more rounding

loverflow:
	shr	esi,#1		! carry bit stayed in the reg
	rcr	eax,#1
	rcr	ebp,#1
	rcr	ebx,#1
	add	edi,1 << D_EXP_SHIFT
	cmp	edi,#D_EXP_INFINITE << D_EXP_SHIFT
	blt	round
	call	fpoverflow
	mov	eax,ecx		! XXX - wrong reg
	ret

	.align	ALIGNMENT
shift32:
	test	eax,eax
	jz	shift64
	mov	esi,eax
	mov	eax,ebp
	mov	ebp,ebx
	sub	ebx,ebx
	sub	edi,#REG_BIT << D_EXP_SHIFT
shiftxx:
	test	esi,#~(D_NORM_MASK | D_FRAC_MASK)
	jz	over_adjust	! else too big already
	shrd	ebx,ebp,#D_BIT-D_FRAC_BIT
	shrd	ebp,eax,#D_BIT-D_FRAC_BIT
	shrd	eax,esi,#D_BIT-D_FRAC_BIT
	shr	esi,#D_BIT-D_FRAC_BIT
	add	edi,#(D_BIT-D_FRAC_BIT) << D_EXP_SHIFT
over_adjust:
	test	edi,edi
	bgt	normalize2
	neg	edi
	shr	edi,#D_EXP_SHIFT
	inc	edi
	br	fpunderflow

	.align	ALIGNMENT
shift64:
	test	ebp,ebp
	jz	shift96
	mov	esi,ebp
	mov	eax,ebx
	sub	ebp,ebp
	mov	ebx,ebp
	sub	edi,#(2*REG_BIT) << D_EXP_SHIFT
	jmp	shiftxx

	.align	ALIGNMENT
shift96:
	test	ebx,ebx		! XXX - this test is probably unnecessary
				! since the shift must be small unless we
				! are subtracting 2 almost-equal numbers,
				! and then the bits beyond 64 will mostly
				! be 0
	jz	return_esi_eax	! all zero
	mov	esi,ebx
	sub	ebx,ebx
	sub	edi,#(3*REG_BIT) << D_EXP_SHIFT
	jmp	shiftxx

	.align	ALIGNMENT
return_esi_eax:
	mov	edx,esi
	ret

	.align	ALIGNMENT
subtract:
	sub	ebp,ebp		! set up roundoff register
	cmp	ecx,#REG_BIT
	jae	subtract_bigshift
	shrd	ebp,ebx,cl
	shrd	ebx,edi,cl
	shr	edi,cl
	neg	ebp		! begin subtraction esi:eax:0 - edi:ebx:ebp
	sbb	eax,ebx
	sbb	esi,edi
	sub	ebx,ebx
	mov	edi,edx
	and	edi,#D_EXP_MASK
	br	normalize2

	.align	ALIGNMENT
subtract_bigshift:
	cmp	ecx,#D_FRAC_BIT+2
	bhis	return_edx_eax	! x dominates y
	sub	ecx,#REG_BIT
	shrd	ebp,ebx,cl
	shrd	ebx,edi,cl
	shr	edi,cl
	not	ebp		! begin subtraction esi:eax:0:0 - 0:edi:ebx:ebp
	not	ebx
	add	ebp,#1
	adc	ebx,#0
	cmc
	sbb	eax,edi
	sbb	esi,#0
	xchg	ebp,ebx
	mov	edi,edx
	and	edi,#D_EXP_MASK
	br	normalize2

	.data
	.extern	bsr_table