1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
/* Runtime ABI for the ARM Cortex-M0
* uldivmod.S: unsigned 64 bit division
*
* Copyright (c) 2012 Jörg Mische <bobbl@gmx.de>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "software_panic.h"
.syntax unified
.text
.thumb
.cpu cortex-m0
@ {unsigned long long quotient, unsigned long long remainder}
@ __aeabi_uldivmod(unsigned long long numerator, unsigned long long denominator)
@
@ Divide r1:r0 by r3:r2 and return the quotient in r1:r0 and the remainder
@ in r3:r2 (all unsigned)
@
.thumb_func
.section .text.__aeabi_uldivmod
.global __aeabi_uldivmod
__aeabi_uldivmod:
cmp r3, #0
bne L_large_denom
cmp r2, #0
beq L_divison_by_0
cmp r1, #0
beq L_fallback_32bits
@ case 1: num >= 2^32 and denom < 2^32
@ Result might be > 2^32, therefore we first calculate the upper 32
@ bits of the result. It is done similar to the calculation of the
@ lower 32 bits, but with a denominator that is shifted by 32.
@ Hence the lower 32 bits of the denominator are always 0 and the
@ costly 64 bit shift and sub operations can be replaced by cheap 32
@ bit operations.
push {r4, r5, r6, r7, lr}
@ shift left the denominator until it is greater than the numerator
@ denom(r7:r6) = r3:r2 << 32
movs r5, #1 @ bitmask
adds r7, r2, #0 @ dont shift if denominator would overflow
bmi L_upper_result
cmp r1, r7
blo L_upper_result
L_denom_shift_loop1:
lsls r5, #1
lsls r7, #1
bmi L_upper_result @ dont shift if overflow
cmp r1, r7
bhs L_denom_shift_loop1
L_upper_result:
mov r3, r1
mov r2, r0
movs r1, #0 @ upper result = 0
L_sub_loop1:
cmp r3, r7
bcc L_dont_sub1 @ if (num>denom)
subs r3, r7 @ num -= denom
orrs r1, r5 @ result(r7:r6) |= bitmask(r5)
L_dont_sub1:
lsrs r7, #1 @ denom(r7:r6) >>= 1
lsrs r5, #1 @ bitmask(r5) >>= 1
bne L_sub_loop1
movs r5, #1
lsls r5, #31
movs r6, #0
b L_lower_result
@ case 2: division by 0
@ call __aeabi_ldiv0
L_divison_by_0:
b __aeabi_ldiv0
@ case 3: num < 2^32 and denom < 2^32
@ fallback to 32 bit division
L_fallback_32bits:
mov r1, r2
push {lr}
bl __aeabi_uidivmod
mov r2, r1
movs r1, #0
movs r3, #0
pop {pc}
@ case 4: denom >= 2^32
@ result is smaller than 2^32
L_large_denom:
push {r4, r5, r6, r7, lr}
mov r7, r3
mov r6, r2
mov r3, r1
mov r2, r0
@ Shift left the denominator until it is greater than the numerator
movs r1, #0 @ high word of result is 0
movs r5, #1 @ bitmask
adds r7, #0 @ dont shift if denominator would overflow
bmi L_lower_result
cmp r3, r7
blo L_lower_result
L_denom_shift_loop4:
lsls r5, #1
lsls r7, #1
lsls r6, #1
adcs r7, r1 @ r1=0
bmi L_lower_result @ dont shift if overflow
cmp r3, r7
bhs L_denom_shift_loop4
L_lower_result:
movs r0, #0
L_sub_loop4:
mov r4, r3
cmp r2, r6
sbcs r4, r7
bcc L_dont_sub4 @ if (num>denom)
subs r2, r6 @ numerator -= denom
sbcs r3, r7
orrs r0, r5 @ result(r1:r0) |= bitmask(r5)
L_dont_sub4:
lsls r4, r7, #31 @ denom(r7:r6) >>= 1
lsrs r6, #1
lsrs r7, #1
orrs r6, r4
lsrs r5, #1 @ bitmask(r5) >>= 1
bne L_sub_loop4
pop {r4, r5, r6, r7, pc}
__aeabi_ldiv0:
ldr SOFTWARE_PANIC_REASON_REG, =DIV_ZERO_PANIC
bl exception_panic
|