summaryrefslogtreecommitdiff
path: root/as/asm/fadd.asm
blob: d18f002c2aa96d12416109eb5739c421d54d112a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
_fadd:
	PUSH	BP
	MOV	BP,SP
	MOV	EAX,DWORD PTR [BP+4]
	MOV	EDX,DWORD PTR [BP+8]
	MOV	EBX,DWORD PTR [BP+12]
	MOV	ECX,DWORD PTR [BP+16]
	CALL	faddfxfy
	MOV	DWORD PTR _facc,EAX
	MOV	DWORD PTR _facc+4,EDX
	POP	BP
	RET

fsubfxfy:
	XOR	ECX,#$80000000	; complement sign bit, fall into add routine
faddfxfy:
	PUSH	EBP
	PUSH	EDI
	PUSH	ESI
	MOV	EDI,ECX		; free CL for shifts
	MOV	ESI,EDX		; this mainly for consistent naming
	AND	ESI,#$7FFFFFFF	; discard sign so comparison is simple
	AND	EDI,#$7FFFFFFF

	CMP	ESI,EDI
	JA	XBIG
	JB	SWAP
	CMP	EAX,EBX
	JAE	XBIG
SWAP:
	XCHG	EDX,ECX
	XCHG	ESI,EDI
	XCHG	EAX,EBX
XBIG:
	AND	ESI,#$000FFFFF	; discard exponent
	AND	EDI,#$000FFFFF
	OR	ESI,#$00100000	; normalize
	OR	EDI,#$00100000

	SHR	ECX,32-(1+11)
	SHR	EDX,32-(1+11)	
	MOV	EBP,ECX		; prepare to compare signs (want high bits 0)
	SUB	CX,DX		; get difference of signs in CX
	NEG	CX		; D holds sign and exponent of both throughout
	CMP	CX,#(64-11)+2
	JAE	TO_DONE1	; x dominates y
	XOR	BP,DX
	AND	BP,#$0800	; see if signs are same
	JNZ	TO_SUBTRACT	; else roundoff reg EBP is 0

	CMP	CL,#32
	JAE	TO_ADD_BIGSHIFT
	SHRD	EBP,EBX,CL
	SHRD	EBX,EDI,CL
	SHR	EDI,CL
	ADD	EAX,EBX
	ADC	ESI,EDI
	SUB	EBX,EBX

; result DX(1+11):SI:AX:BP:BX but needs normalization

NORMALIZE:
	MOV	CX,DX
	AND	CX,#$07FF
	TEST	ESI,#$00200000
	JZ	NORMALIZE2
	BR	LOVERFLOW

TO_DONE1:
	JMP	DONE1

TO_SUBTRACT:
	BR	SUBTRACT

TO_ADD_BIGSHIFT:
	BR	ADD_BIGSHIFT

TO_NORMLITTLE:
	BR	NORMLITTLE

; result DX(1):CX(11):SI:AX:BP:BX

NORMALIZE2:
	SHRD	EDI,ESI,32-11
				; top 11 bits of ESI known 0 and BSR is slooow
	BSR	EDI,EDI		; index of leading 1 bit in EDI is 11..31 in DI
	JZ	TO_NORMLITTLE	; ESI is zero (flag wrong in Intel Manual)
	SUB	DI,#31
	NEG	DI
	PUSH	CX		; gr
	MOV	CX,DI		; rr
	SHLD	ESI,EAX,CL
	SHLD	EAX,EBP,CL
	SHLD	EBP,EBX,CL
	SHL	EBX,CL
	POP	CX		; rr
	SUB	CX,DI
	JC	UNDERFLOW	

ROUND:
	CMP	EBP,#$80000000	; test roundoff register
	JA	ROUNDUP
	JB	DONE		; no rounding
	TEST	EBX,EBX
	JNZ	ROUNDUP
	TEST	AL,#1		; ambiguous case, round to even
	JZ	DONE		; even, no rounding
ROUNDUP:
	ADD	EAX,#1
	ADC	ESI,#0
	SUB	EBP,EBP
	SUB	EBX,EBX
	TEST	ESI,#$00200000
	JNZ	LOVERFLOW	; rounding may cause overflow!

DONE:
	AND	DX,#$0800	; extract sign of largest and result
	OR	DX,CX		; include exponent with sign
DONE1:
	SHL	EDX,32-(1+11)
	AND	ESI,#$000FFFFF	; discard normalization bit
	OR	EDX,ESI	
	POP	ESI
	POP	EDI
	POP	EBP
	RET

UNDERFLOW:				; should have error message here
ANSWER0:
	SUB	EDX,EDX
	MOV	EAX,EDX
	POP	ESI
	POP	EDI
	POP	EBP
	RET

LOVERFLOW:			; carry bit must be right-shifted back in
	SHR	ESI,1
	RCR	EAX,1
	RCR	EBP,1
	RCR	EBX,1
	INC	CX
	CMP	CX,#$0800
	JNZ	ROUND

OVERFLOW:			; should have error message here
	MOV	EDX,#$FFE00000	; + infinity
	SUB	EAX,EAX
	POP	ESI
	POP	EDI
	POP	EBP
	RET

ADD_BIGSHIFT:
	SUB	CL,#32
	SHRD	EBP,EBX,CL
	SHRD	EBX,EDI,CL
	SHR	EDI,CL
	ADD	EAX,EDI
	ADC	ESI,#0
	XCHG	EBP,EBX
	BR	NORMALIZE

NORMLITTLE:
	SHLD	ESI,EAX,32-(1+11)
	SHLD	EAX,EBP,32-(1+11)
	SHLD	EBP,EBX,32-(1+11)
	SHL	EBX,20
	SUB	CL,#32-(1+11)
	JC	UNDERFLOW
	BR	NORMALIZE2

SUBTRACT:
	SUB	EBP,EBP		; set up roundoff register
	CMP	CL,#32
	JAE	SUBTRACT_BIGSHIFT
	SHRD	EBP,EBX,CL
	SHRD	EBX,EDI,CL
	SHR	EDI,CL
	NEG	EBP
	SBB	EAX,EBX
	SBB	ESI,EDI
	SUB	EBX,EBX
	MOV	CX,DX
	AND	CX,#$07FF
	BR	NORMALIZE2

SUBTRACT_BIGSHIFT:
	SUB	CL,#32
	SHRD	EBP,EBX,CL
	SHRD	EBX,EDI,CL
	SHR	EDI,CL
	NEG	EBX
	NEG	EBP
	SBB	EBX,#0
	SBB	EAX,EDI
	SBB	ESI,#0
	XCHG	EBP,EBX
	MOV	CX,DX
	AND	CX,#$07FF
	BR	NORMALIZE2

TO_ANSWER0:
	BR	ANSWER0

TO_OVERFLOW:
	JMP	TO_OVERFLOW

TO_UNDERFLOW:
	BR	UNDERFLOW

fmulfxfy:
	PUSH	EBP
	PUSH	EDI
	PUSH	ESI
	MOV	ESI,EDX		; free DX for multiplications
	MOV	EDI,ECX		; this mainly for consistent naming
	SHR	EDX,32-(1+11)
	SHR	ECX,32-(1+11)	
	MOV	BP,DX
	XOR	BP,CX
	AND	BP,#$0800	; extract sign
	AND	DX,#$07FF	; exp(x)
	JZ	TO_ANSWER0
	AND	CX,#$07FF	; exp(y)
	JZ	TO_ANSWER0
	ADD	CX,DX
	SUB	CX,#$0400
	JB	TO_UNDERFLOW
	CMP	CX,#$07FF
	JA	TO_OVERFLOW	; probably not quite right

	AND	ESI,#$000FFFFF	; discard sign and exponent
	AND	EDI,#$000FFFFF
	OR	ESI,#$00100000	; normalize
	OR	EDI,#$00100000

; exponent is in CX, sign in BP, operands in ESI:EAX and EDI:EBX, DX is free
; product to go in ESI:EAX:EBP:EBX
; terminology: x * y = (x32:x0) * (y32:y0) = x32y32 + x32y0 + x0y32 +x0y0

	PUSH	CX
	PUSH	BP
	MOV	ECX,EAX
	MUL	EBX		; x0y0
	MOV	EBP,EDX		; x0y0.high in EBP
	XCHG	EBX,EAX		; x0y0.low in EBX (final), y0 in EAX
	MUL	ESI		; x32y0
	PUSH	EAX		; x32y0.low on stack
	PUSH	EDX		; x32y0.high on stack
	MOV	EAX,ESI
	MUL	EDI		; x32y32
	MOV	ESI,EDX		; x32y32.high in ESI (final except carries)
	XCHG	ECX,EAX		; x32y32.low in ECX, x0 in EAX
	MUL	EDI		; x0y32

	ADD	EBP,EAX		; x0y0.high + x0y32.low
	POP	EAX		; x32y0.high
	ADC	EAX,EDX		; x32y0.high + x0y32.high
	ADC	ESI,#0
	POP	EDX		; x32y0.low
	ADD	EBP,EDX		; (x0y0.high + x0y32.low) + x32y0.low
	ADC	EAX,ECX		; (x32y0.high + x0y32.high) + x32y32.low
	ADC	ESI,#0
	POP	DX		; sign
	POP	CX		; exponent
	ADD	CX,#13		; temp fixup
	BR	NORMALIZE2

_facc:
	.word	0,0