summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/builtins/arm/udivmodsi4.S62
-rw-r--r--lib/builtins/arm/udivsi3.S65
-rw-r--r--lib/builtins/arm/umodsi3.S61
-rw-r--r--lib/builtins/assembly.h14
4 files changed, 182 insertions, 20 deletions
diff --git a/lib/builtins/arm/udivmodsi4.S b/lib/builtins/arm/udivmodsi4.S
index ddc875219..b93fb0a3a 100644
--- a/lib/builtins/arm/udivmodsi4.S
+++ b/lib/builtins/arm/udivmodsi4.S
@@ -16,6 +16,9 @@
.syntax unified
.text
+#if __ARM_ARCH_ISA_THUMB == 2
+ .thumb
+#endif
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
@@ -38,11 +41,15 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
*
* r0 is the numerator, r1 the denominator.
*
+ * ARM:
* The code before JMP computes the correct shift I, so that
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 12 * I.
* This depends on the fixed instruction size of block.
*
+ * Thumb 2:
+ * Uses a jumptable to jump to the appropriate block.
+ *
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
* that (r0 << shift) < 2 * r1. The quotient is stored in r3.
@@ -52,17 +59,59 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
clz ip, r0
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+#if __ARM_ARCH_ISA_THUMB == 2
+ sub ip, r3, ip
+ mov r3, #0
+ tbb [pc, ip]
+LOCAL_LABEL(JT):
+ .byte (LOCAL_LABEL( 0) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 1) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 2) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 3) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 4) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 5) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 6) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 7) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 8) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 9) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(10) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(11) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(12) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(13) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(14) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(15) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(16) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(17) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(18) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(19) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(20) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(21) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(22) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(23) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(24) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(25) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(26) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(27) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(28) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(29) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(30) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(31) - LOCAL_LABEL(JT)) / 2
+#else
sub r3, r3, ip
- adr ip, LOCAL_LABEL(div0block)
+ adr ip, LOCAL_LABEL(0)
sub ip, ip, r3, lsl #2
sub ip, ip, r3, lsl #3
mov r3, #0
bx ip
+#endif
# else
+#if __ARM_ARCH_ISA_THUMB == 2
+#error unsupported configuration
+#endif
str r4, [sp, #-8]!
mov r4, r0
- adr ip, LOCAL_LABEL(div0block)
+ adr ip, LOCAL_LABEL(0)
lsr r3, r4, #16
cmp r3, r1
@@ -96,9 +145,11 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
#define IMM #
-#define block(shift) \
- cmp r0, r1, lsl IMM shift; \
- addhs r3, r3, IMM (1 << shift); \
+#define block(shift) \
+LOCAL_LABEL(shift): \
+ cmp r0, r1, lsl IMM shift; \
+ ITT hs; \
+ addhs r3, r3, IMM (1 << shift); \
subhs r0, r0, r1, lsl IMM shift
block(31)
@@ -132,7 +183,6 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
block(3)
block(2)
block(1)
-LOCAL_LABEL(div0block):
block(0)
str r0, [r2]
diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S
index 8fb1dca0f..c184b513a 100644
--- a/lib/builtins/arm/udivsi3.S
+++ b/lib/builtins/arm/udivsi3.S
@@ -16,6 +16,9 @@
.syntax unified
.text
+#if __ARM_ARCH_ISA_THUMB == 2
+ .thumb
+#endif
.p2align 2
DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
@@ -32,6 +35,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
bcc LOCAL_LABEL(divby0)
JMPc(lr, eq)
cmp r0, r1
+ IT cc
movcc r0, #0
JMPc(lr, cc)
/*
@@ -39,11 +43,15 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
*
* r0 is the numerator, r1 the denominator.
*
+ * ARM:
* The code before JMP computes the correct shift I, so that
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 12 * I.
* This depends on the fixed instruction size of block.
*
+ * Thumb 2:
+ * Uses a jumptable to jump to the appropriate block.
+ *
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
* that (r0 << shift) < 2 * r1. The quotient is stored in r3.
@@ -53,15 +61,57 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
clz ip, r0
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
+#if __ARM_ARCH_ISA_THUMB == 2
+ sub ip, r3, ip
+ mov r3, #0
+ tbb [pc, ip]
+LOCAL_LABEL(JT):
+ .byte (LOCAL_LABEL( 0) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 1) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 2) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 3) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 4) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 5) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 6) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 7) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 8) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 9) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(10) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(11) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(12) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(13) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(14) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(15) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(16) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(17) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(18) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(19) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(20) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(21) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(22) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(23) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(24) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(25) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(26) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(27) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(28) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(29) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(30) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(31) - LOCAL_LABEL(JT)) / 2
+#else
sub r3, r3, ip
- adr ip, LOCAL_LABEL(div0block)
+ adr ip, LOCAL_LABEL(0)
sub ip, ip, r3, lsl #2
sub ip, ip, r3, lsl #3
mov r3, #0
bx ip
+#endif
# else
+#if __ARM_ARCH_ISA_THUMB == 2
+#error unsupported configuration
+#endif
mov r2, r0
- adr ip, LOCAL_LABEL(div0block)
+ adr ip, LOCAL_LABEL(0)
lsr r3, r2, #16
cmp r3, r1
@@ -94,10 +144,12 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
#define IMM #
-#define block(shift) \
- cmp r0, r1, lsl IMM shift; \
- addhs r3, r3, IMM (1 << shift); \
- subhs r0, r0, r1, lsl IMM shift
+#define block(shift) \
+LOCAL_LABEL(shift): \
+ cmp r0, r1, lsl IMM shift; \
+ ITT hs; \
+ addhs r3, r3, IMM(1 << shift); \
+ subhs r0, r0, r1, lsl IMM shift
block(31)
block(30)
@@ -130,7 +182,6 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
block(3)
block(2)
block(1)
-LOCAL_LABEL(div0block):
block(0)
mov r0, r3
diff --git a/lib/builtins/arm/umodsi3.S b/lib/builtins/arm/umodsi3.S
index 164646b1f..8a979e56c 100644
--- a/lib/builtins/arm/umodsi3.S
+++ b/lib/builtins/arm/umodsi3.S
@@ -16,6 +16,9 @@
.syntax unified
.text
+#if __ARM_ARCH_ISA_THUMB == 2
+ .thumb
+#endif
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__umodsi3)
@@ -30,6 +33,7 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
#else
cmp r1, #1
bcc LOCAL_LABEL(divby0)
+ IT eq
moveq r0, #0
JMPc(lr, eq)
cmp r0, r1
@@ -39,11 +43,15 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
*
* r0 is the numerator, r1 the denominator.
*
+ * For ARM:
* The code before JMP computes the correct shift I, so that
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 8 * I.
* This depends on the fixed instruction size of block.
*
+ * For Thumb:
+ * Uses a jumptable to jump to the appropriate block.
+ *
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
* that (r0 << shift) < 2 * r1. The quotient is stored in r3.
@@ -54,12 +62,52 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
sub r3, r3, ip
- adr ip, LOCAL_LABEL(div0block)
+#if __ARM_ARCH_ISA_THUMB == 2
+ tbb [pc, r3]
+LOCAL_LABEL(JT):
+ .byte (LOCAL_LABEL( 0) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 1) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 2) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 3) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 4) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 5) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 6) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 7) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 8) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL( 9) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(10) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(11) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(12) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(13) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(14) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(15) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(16) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(17) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(18) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(19) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(20) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(21) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(22) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(23) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(24) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(25) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(26) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(27) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(28) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(29) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(30) - LOCAL_LABEL(JT)) / 2
+ .byte (LOCAL_LABEL(31) - LOCAL_LABEL(JT)) / 2
+#else
+ adr ip, LOCAL_LABEL(0)
sub ip, ip, r3, lsl #3
bx ip
+#endif
# else
+#if __ARM_ARCH_ISA_THUMB == 2
+#error unsupported configuration
+#endif
mov r2, r0
- adr ip, LOCAL_LABEL(div0block)
+ adr ip, LOCAL_LABEL(0)
lsr r3, r2, #16
cmp r3, r1
@@ -90,9 +138,11 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
#define IMM #
-#define block(shift) \
- cmp r0, r1, lsl IMM shift; \
- subhs r0, r0, r1, lsl IMM shift
+#define block(shift) \
+LOCAL_LABEL(shift): \
+ cmp r0, r1, lsl IMM shift; \
+ IT hs; \
+ subhs r0, r0, r1, lsl IMM shift
block(31)
block(30)
@@ -125,7 +175,6 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
block(3)
block(2)
block(1)
-LOCAL_LABEL(div0block):
block(0)
JMP(lr)
#endif /* __ARM_ARCH_EXT_IDIV__ */
diff --git a/lib/builtins/assembly.h b/lib/builtins/assembly.h
index d415a5f8d..b09fcd55a 100644
--- a/lib/builtins/assembly.h
+++ b/lib/builtins/assembly.h
@@ -22,6 +22,16 @@
#define SEPARATOR ;
#endif
+#if defined(__arm__)
+#if __ARM_ARCH_ISA_THUMB == 2
+#define IT it
+#define ITT itt
+#else
+#define IT @
+#define ITT @
+#endif
+#endif
+
#if defined(__APPLE__)
#define HIDDEN(name) .private_extern name
#define LOCAL_LABEL(name) L_##name
@@ -86,7 +96,9 @@
#ifdef ARM_HAS_BX
#define JMP(r) bx r
-#define JMPc(r, c) bx##c r
+#define JMPc(r, c) \
+ IT c; \
+ bx##c r
#else
#define JMP(r) mov pc, r
#define JMPc(r, c) mov##c pc, r