third_party/libaeabi-cortexm0/core/cortex-m/uldivmod.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180

/* Runtime ABI for the ARM Cortex-M
 * uldivmod.S: unsigned 64 bit division
 *
 * Copyright (c) 2012 Jörg Mische <bobbl@gmx.de>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include "software_panic.h"


	.syntax unified
	.text
	.code 16


@ {unsigned long long quotient, unsigned long long remainder}
@ __aeabi_uldivmod(unsigned long long numerator, unsigned long long denominator)
@
@ Divide r1:r0 by r3:r2 and return the quotient in r1:r0 and the remainder
@ in r3:r2 (all unsigned)
@
	.thumb_func
	.section .text.__aeabi_uldivmod
        .global __aeabi_uldivmod
__aeabi_uldivmod:

	cmp	r3, #0
	bne	L_large_denom
	cmp	r2, #0
	beq	L_divison_by_0
	cmp	r1, #0
	beq	L_fallback_32bits


	@ case 1: num >= 2^32 and denom < 2^32
	@ Result might be > 2^32, therefore we first calculate the upper 32
	@ bits of the result. It is done similar to the calculation of the
	@ lower 32 bits, but with a denominator that is shifted by 32.
	@ Hence the lower 32 bits of the denominator are always 0 and the
	@ costly 64 bit shift and sub operations can be replaced by cheap 32
	@ bit operations.

	push	{r4, r5, r6, r7, lr}

	@ shift left the denominator until it is greater than the numerator
	@ denom(r7:r6) = r3:r2 << 32

	@ TODO(crosbug.com/p/36128): Loops like this (which occur in several
	@ places in this file) are inefficent in ARMv6-m.

	movs	r5, #1		@ bitmask
	adds	r7, r2, #0	@ dont shift if denominator would overflow
	bmi	L_upper_result
	cmp	r1, r7
	blo	L_upper_result

L_denom_shift_loop1:
	lsl	r5, #1
	lsls	r7, #1
	bmi	L_upper_result	@ dont shift if overflow
	cmp	r1, r7
	bhs	L_denom_shift_loop1

L_upper_result:
	mov	r3, r1
	mov	r2, r0
	mov	r1, #0		@ upper result = 0
	mov	r6, #0

L_sub_loop1:
	orr	r1, r5		@ result(r7:r6) |= bitmask(r5)
	subs	r2, r6		@ num -= denom
	sbcs	r3, r7
	bhs	L_done_sub1

	eor	r1, r5		@ undo add mask
	adds	r2, r6		@ undo subtract
	adc	r3, r3, r7

L_done_sub1:
	lsrs	r7, #1		@ denom(r7:r6) >>= 1
	rrx	r6, r6
	lsrs	r5, #1		@ bitmask(r5) >>= 1
	bne	L_sub_loop1

	rrx r5, r5
	b	L_lower_result


	@ case 2: division by 0
	@ call __aeabi_ldiv0

L_divison_by_0:
	b	__aeabi_ldiv0


	@ case 3: num < 2^32 and denom < 2^32
	@ fallback to 32 bit division

L_fallback_32bits:
	mov	r1, r0
	udiv	r0, r0, r2	@ r0 = quotient
	mul	r3, r0, r2	@ r3 = quotient * divisor
	sub	r2, r1, r3	@ r2 = remainder
	mov	r1, #0
	mov	r3, #0
	bx	lr


	@ case 4: denom >= 2^32
	@ result is smaller than 2^32

L_large_denom:
	push	{r4, r5, r6, r7, lr}

	mov	r7, r3
	mov	r6, r2
	mov	r3, r1
	mov	r2, r0

	@ Shift left the denominator until it is greater than the numerator

	mov	r1, #0		@ high word of result is 0
	mov	r5, #1		@ bitmask
	adds	r7, #0		@ dont shift if denominator would overflow
	bmi	L_lower_result
	cmp	r3, r7
	blo	L_lower_result

L_denom_shift_loop4:
	lsl	r5, #1
	lsls	r6, #1		@ denom(r7:r6) <<= 1
	adcs	r7, r7
	bmi	L_lower_result	@ dont shift if overflow
	cmp	r3, r7
	bhs	L_denom_shift_loop4


L_lower_result:
	movs	r0, #0

L_sub_loop4:
	orr	r0, r5		@ result(r1:r0) |= bitmask(r5)
	subs	r2, r6		@ numerator -= denom
	sbcs	r3, r7
	bhs	L_done_sub4

	eor	r0, r5		 @ undo add mask
	adds	r2, r6		 @ undo subtract
	adc	r3, r3, r7

L_done_sub4:
	lsrs	r7, #1		@ denom(r7:r6) >>= 1
	rrx	r6, r6
	lsrs	r5, #1		@ bitmask(r5) >>= 1
	bne	L_sub_loop4

	pop	{r4, r5, r6, r7, pc}

__aeabi_ldiv0:
	ldr	SOFTWARE_PANIC_REASON_REG, =PANIC_SW_DIV_ZERO
	mov	SOFTWARE_PANIC_INFO_REG, r14
	bl	exception_panic