1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
|
/* Copyright 2018 The Chromium OS Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*
* __divdi3.S: signed 64 bit division
*/
#define NREGS $r6
#define DREGS $r8
#define P1H $r1
#define P1L $r0
#define P2H $r3
#define P2L $r2
#define NUMHI $r7
#define NUMLO $r6
#define DENHI $r9
#define DENLO $r8
#define OFFSET_L 0
#define OFFSET_H 4
#define MHI P1H
#define MLO P1L
#define W2 $r3
#define W1 $r5
#define W0 $r4
#define T2 P1L
#define NHI P1H
#define NLO P1L
#define D $r2
#define DLO $r3
#define DHI $r10
#define Q NHI
#define QHI W0
#define R NLO
#define RHI NHI
#define M T2
#define M2 DLO
.text
.align 2
.globl umul_ppmm
.type umul_ppmm, @function
! =====================================================================
! uint64_t umul_ppmm(uint32_t a, uint32_t b)
!
! This function multiplies `a' by `b' to obtain a 64-bit product. The
! product is broken into two 32-bit pieces which are stored in the zl
! (low-part at P1L) and zh (high-part at P1H).
! =====================================================================
umul_ppmm:
zeh P2L, $r0 ! al=a&0xffff
srli P2H, $r0, 16 ! ah=a>>16
zeh P1L, $r1 ! bl=b&0xffff
srli P1H, $r1, 16 ! bh=b>>16
mul W1, P2L, P1H ! zA=al*bh
mul P2L, P2L, P1L ! zl=al*bl
mul P1L, P2H, P1L ! zB=ah*bl
add W1, W1, P1L ! zA+=zB
slt $ta, W1, P1L ! zA<zB
slli $ta, $ta, 16 ! (zA<zB)<<16
maddr32 $ta, P2H, P1H ! zh=ah*bh+((zA<zB)<<16)
srli P1H, W1, 16 ! zA>>16
add P1H, P1H, $ta ! zh+=(zA>>16)
slli P1L, W1, 16 ! zA<<=16
add P1L, P1L, P2L ! zl+=zA
slt $ta, P1L, P2L ! zl<zA
add P1H, P1H, $ta ! zh+=(zl<zA)
ret
.size umul_ppmm, .-umul_ppmm
.text
.align 2
.type fudiv_qrnnd, @function
! =====================================================================
! uint64_t fudiv_qrnnd(uint64_t n, uint32_t d)
!
! This function divides 64-bit numerator n by 32-bit denominator d. The
! 64-bit return value contains remainder (low-part at P1L) and quotient
! (high-part at P1H).
! This function uses a custom calling convention,
! with register DHI ($r10) call-clobbered instead of callee-saved.
! =====================================================================
fudiv_qrnnd:
srli DHI, D, 16 ! d1 = ll_highpart (d)
zeh W1, NLO ! ll_lowpart (n0)
srli T2, NLO, 16 ! ll_highpart (n0)
divr QHI, RHI, NHI, DHI ! q1 = n1 / __d1, r1 = n1 % __d1
zeh DLO, D ! d0 = ll_lowpart (d)
slli RHI, RHI, 16 ! r1 << 16
or RHI, RHI, T2 ! __r1 = (__r1 << 16) | ll_highpart(n0)
mul M, QHI, DLO ! m = __q1*__d0
slt $ta, RHI, M ! __r1 < __m
beqz $ta, .L2 ! if no, skip
addi QHI, QHI, -1 ! __q1--
add RHI, RHI, D ! __r1 += d
slt $ta, RHI, D ! __r1 < d
bnez $ta, .L2 ! if yes, skip
slt $ta, RHI, M ! __r1 < __m
beqz $ta, .L2 ! if no, skip
addi QHI, QHI, -1 ! __q1--
add RHI, RHI, D ! __r1 += d
.L2:
sub RHI, RHI, M ! __r1 -= __m
divr Q, T2, RHI, DHI ! __q0 = r1 / __d1, __r0 = r1 % __d1
slli T2, T2, 16 ! __r0 << 16
or R, T2, W1 ! __r0 = (__r0 << 16) | ll_lowpart(n0)
mul M2, DLO, Q ! __m = __q0 * __d0
slt $ta, R, M2 ! __r0 < __m
beqz $ta, .L5 ! if no, skip
add R, R, D ! __r0 += d
addi Q, Q, -1 ! __q0--
slt $ta, R, D ! __r0 < d
bnez $ta, .L5 ! if yes, skip
slt $ta, R, M2 ! __r0 < __m
beqz $ta, .L5 ! if no, skip
add R, R, D ! __r0 += d
addi Q, Q, -1 ! __q0--
.L5:
sub R, R, M2 ! r = r0 = __r0 - __m
slli QHI, QHI, 16 ! __q1 << 16
or Q, Q, QHI ! q = (__q1 << 16) | __q0
ret
.size fudiv_qrnnd, .-fudiv_qrnnd
.align 2
.globl __udivmoddi4
.type __udivmoddi4, @function
! =====================================================================
! uint64_t __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
!
! This function divides 64-bit numerator n by 64-bit denominator d. The
! quotient is returned as 64-bit return value and the 64-bit remainder
! is stored at the input address r.
! stack allocation:
! sp+40 +------------------+
! | q |
! sp+32 +------------------+
! | bm |
! sp+28 +------------------+
! | $lp |
! sp+24 +------------------+
! | $fp |
! sp+20 +------------------+
! | $r10 |
! sp+16 +------------------+
! | $r6 - $r9 |
! sp +------------------+
! =====================================================================
__udivmoddi4:
addi $sp, $sp, -40
smw.bi $r6, [$sp], $r10 , 10
movd44 NREGS, $r0 ! (n1,n0)
movd44 DREGS, $r2 ! (d1,d0)
move $fp, $r4 ! rp
bnez P2H, .L9 ! if d1 != 0, skip
slt $ta, NUMHI, DENLO ! n1 < d0
beqz $ta, .L10 ! if no, skip
move $r0, DENLO
bal __clzsi2
swi $r0, [$sp+(28)] ! bm
beqz $r0, .LZskipnorm1 ! if bm == 0, skip
sll DENLO, DENLO, $r0 ! d0 <<= bm
subri W1, $r0, 32 ! 32 - bm
srl W1, NUMLO, W1 ! n0 >> (32 - bm)
sll NUMHI, NUMHI, $r0 ! n1 << bm
or NUMHI, NUMHI, W1 ! n1 = (n1 << bm) | (n0 >> (32 - bm))
sll NUMLO, NUMLO, $r0 ! n0 <<= bm
.LZskipnorm1:
movd44 $r0, NREGS ! (n1,n0)
move $r2, DENLO ! d0
bal fudiv_qrnnd ! calculate q0 n0
swi P1H, [$sp+(32+OFFSET_L)]! q0
move NUMLO, P1L ! n0
move W1, 0
swi W1, [$sp+(32+OFFSET_H)] ! q1 = 0
b .L19
.L10:
beqz P2L, .LZdivzero ! if d0 != 0, skip
move $r0, DENLO
bal __clzsi2
swi $r0, [$sp+(28)] ! bm
bnez $r0, .LZnorm1 ! if bm != 0, skip
sub NUMHI, NUMHI, DENLO ! n1 -= d0
movi W1, 1
swi W1, [$sp+(32+OFFSET_H)] ! q1 = 1
b .L29
! to eliminate unaligned branch target
.align 2
.LZnorm1:
subri $ta, $r0, 32 ! b = 32 - bm
sll DENLO, DENLO, $r0 ! d0 <<= bm
move $r2, DENLO
srl W0, NUMLO, $ta ! n0 >> b
sll W1, NUMHI, $r0 ! n1 << bm
sll NUMLO, NUMLO, $r0 ! n0 <<= bm
or P1L, W1, W0 ! n1 = (n1 << bm) | (n0 >> b)
srl P1H, NUMHI, $ta ! n2 = n1 >> b
bal fudiv_qrnnd ! caculate q1, n1
swi P1H, [$sp+(32+OFFSET_H)]! q1
move NUMHI, P1L ! n1
.L29:
movd44 $r0, NREGS ! (n1,n0)
move $r2, DENLO ! d0
bal fudiv_qrnnd ! calcuate q0, n0
swi P1H, [$sp+(32+OFFSET_L)]
move NUMLO, P1L
! to eliminate unaligned branch target
.align 2
.L19:
beqz $fp, .LZsetq ! if rp == 0, skip
lwi W2, [$sp+(28)] ! bm
movi NUMHI, 0
srl NUMLO, NUMLO, W2 ! n0 >> bm
b .LZsetr
! to eliminate unaligned branch target
.align 2
.LZdivzero:
! divide-by-zero exception or quotient = 0 and remainder = 0 returned
divr NUMHI, NUMLO, DENLO, DENLO
.LZqzero:
movi P1H, 0
movi P1L, 0
beqz $fp, .LZret ! if rp == NULL, skip
swi NUMLO, [$fp+OFFSET_L] ! *rp
swi NUMHI, [$fp+OFFSET_H]
b .LZret
.L9:
slt $ta, NUMHI, DENHI ! n1 < d1
bnez $ta, .LZqzero ! if yes, skip
move $r0, DENHI
bal __clzsi2
swi $r0, [$sp+(28)] ! bm
beqz $r0, .LZskipnorm2 ! if bm == 0, skip
subri W0, $r0, 32 ! b = 32 - bm
srl W1, DENLO, W0 ! d0 >> b
sll $r2, DENHI, $r0 ! d1 << bm
or $r2, $r2, W1 ! d1 = (d0 >> b) | (d1 << bm)
move DENHI, $r2
sll DENLO, DENLO, $r0 ! d0 <<= bm
srl W2, NUMLO, W0 ! n0 >> b
sll NUMLO, NUMLO, $r0 ! n0 <<= bm
sll P1L, NUMHI, $r0 ! n1 << bm
srl P1H, NUMHI, W0 ! n2 = n1 >> b
or P1L, P1L, W2 ! n1 = (n0 >> b) | (n1 << bm)
bal fudiv_qrnnd ! calculate q0, n1
swi P1H, [$sp+(32+OFFSET_L)]
move NUMHI, P1L
move P1L, DENLO ! d0
bal umul_ppmm
slt $ta, NUMHI, MHI ! n1 < m1
bnez $ta, .L46 ! if yes, skip
bne MHI, NUMHI, .L45 ! if m1 != n1, skip
slt $ta, NUMLO, MLO ! n0 < m0
beqz $ta, .L45 ! if no, skip
.L46:
lwi W2, [$sp+(32+OFFSET_L)]
sub MHI, MHI, DENHI ! m1 - d1
addi W2, W2, -1 ! q0--
swi W2, [$sp+(32+OFFSET_L)]
sub W2, MLO, DENLO ! __x = m0 - d0
slt $ta, MLO, W2 ! m0 < __x
sub MHI, MHI, $ta ! m1 = m1 - d1 - (__x > m0)
move MLO, W2 ! m0 = __x
.L45:
movi W2, 0
swi W2, [$sp+(32+OFFSET_H)] ! q1 = 0
beqz $fp, .LZsetq ! if yes, skip
sub P1L, NUMLO, MLO ! __x = n0 - m0
sub P1H, NUMHI, MHI ! n1 - m1
slt $ta, NUMLO, P1L ! n0 < __x
sub P1H, P1H, $ta ! n1 = n1 - m1 - (__x > n0)
lwi W2, [$sp+(28)] ! bm
subri W0, W2, 32 ! b
sll NUMHI, P1H, W0 ! n1 << b
srl NUMLO, P1L, W2 ! n0 >> bm
or NUMLO, NUMLO, NUMHI ! (n1 << b) | (n0 >> bm)
srl NUMHI, P1H, W2 ! n1 >> bm
.LZsetr:
swi NUMLO, [$fp+OFFSET_L] ! remainder
swi NUMHI, [$fp+OFFSET_H]
.LZsetq:
lwi P1L, [$sp+(32+OFFSET_L)]! quotient
lwi P1H, [$sp+(32+OFFSET_H)]
! to eliminate unaligned branch target
.align 2
.LZret:
lmw.bi $r6, [$sp], $r10 , 10
addi $sp, $sp, 40
ret
.LZskipnorm2:
move W2, 0
slt $ta, DENHI, NUMHI ! n1 > d1
bnez $ta, .L52 ! if yes, skip
slt $ta, NUMLO, DENLO ! n0 < d0
bnez $ta, .L51 ! if yes, skip
.L52:
move W1, 1
swi W1, [$sp+(32+OFFSET_L)] ! q0 = 1
sub W0, NUMLO, DENLO ! __x = n0 - d0
sub NUMHI, NUMHI, DENHI ! n1 - d1
slt $ta, NUMLO, W0 ! n0 < __x
sub NUMHI, NUMHI, $ta ! n1 = n1 -d1 - (_-x > n0)
move NUMLO, W0 ! n0 = __x
b .L54
.L51:
swi W2, [$sp+(32+OFFSET_L)] ! q0 = 0
.L54:
swi W2, [$sp+(32+OFFSET_H)] ! q1 = 0
bnez $fp, .LZsetr
b .LZsetq
.size __udivmoddi4, .-__udivmoddi4
.text
.align 2
.globl __divdi3
.type __divdi3, @function
__divdi3:
! =====================================================================
! uint64_t __divdi3(uint64_t n, uint64-t d)
!
! This function divides n by d and returns the quotient.
!
! stack allocation:
! sp+8 +-----------------------+
! | $lp |
! sp+4 +-----------------------+
! | $r6 |
! sp +-----------------------+
! =====================================================================
smw.adm $r6, [$sp], $r6, 2
xor $r6, P1H, P2H
srai45 $r6, 31 ! signof(numerator xor denominator)
! abs(denominator)
bgez P2H, .L80
neg P2H, P2H
beqz P2L, .L80
neg P2L, P2L
addi P2H, P2H, -1
.L80:
! abs(numerator)
bgez P1H, .L81
neg P1H, P1H
beqz P1L, .L81
neg P1L, P1L
addi P1H, P1H, -1
.L81:
! abs(numerator) / abs(denominator)
movi $r4, 0 ! ignore remainder
bal __udivmoddi4
! numerator / denominator
beqz $r6, .L82
or $r4, P1H, P1L
beqz $r4, .L82
neg P1H, P1H
beqz P1L, .L82
neg P1L, P1L
addi P1H, P1H, -1
! to eliminate unaligned branch target
.align 2
.L82:
lmw.bim $r6, [$sp], $r6, 2
ret
.size __divdi3, .-__divdi3
|