diff options
-rw-r--r-- | lib/builtins/arm/udivmodsi4.S | 62 | ||||
-rw-r--r-- | lib/builtins/arm/udivsi3.S | 65 | ||||
-rw-r--r-- | lib/builtins/arm/umodsi3.S | 61 | ||||
-rw-r--r-- | lib/builtins/assembly.h | 14 |
4 files changed, 20 insertions, 182 deletions
diff --git a/lib/builtins/arm/udivmodsi4.S b/lib/builtins/arm/udivmodsi4.S index b93fb0a3a..ddc875219 100644 --- a/lib/builtins/arm/udivmodsi4.S +++ b/lib/builtins/arm/udivmodsi4.S @@ -16,9 +16,6 @@ .syntax unified .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif .p2align 2 DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) @@ -41,15 +38,11 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) * * r0 is the numerator, r1 the denominator. * - * ARM: * The code before JMP computes the correct shift I, so that * r0 and (r1 << I) have the highest bit set in the same position. * At the time of JMP, ip := .Ldiv0block - 12 * I. * This depends on the fixed instruction size of block. * - * Thumb 2: - * Uses a jumptable to jump to the appropriate block. - * * block(shift) implements the test-and-update-quotient core. * It assumes (r0 << shift) can be computed without overflow and * that (r0 << shift) < 2 * r1. The quotient is stored in r3. @@ -59,59 +52,17 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) clz ip, r0 clz r3, r1 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ -#if __ARM_ARCH_ISA_THUMB == 2 - sub ip, r3, ip - mov r3, #0 - tbb [pc, ip] -LOCAL_LABEL(JT): - .byte (LOCAL_LABEL( 0) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 1) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 2) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 3) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 4) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 5) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 6) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 7) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 8) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 9) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(10) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(11) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(12) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(13) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(14) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(15) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(16) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(17) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(18) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(19) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(20) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(21) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(22) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(23) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(24) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(25) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(26) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(27) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(28) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(29) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(30) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(31) - LOCAL_LABEL(JT)) / 2 -#else sub r3, r3, ip - adr ip, LOCAL_LABEL(0) + adr ip, LOCAL_LABEL(div0block) sub ip, ip, r3, lsl #2 sub ip, ip, r3, lsl #3 mov r3, #0 bx ip -#endif # else -#if __ARM_ARCH_ISA_THUMB == 2 -#error unsupported configuration -#endif str r4, [sp, #-8]! mov r4, r0 - adr ip, LOCAL_LABEL(0) + adr ip, LOCAL_LABEL(div0block) lsr r3, r4, #16 cmp r3, r1 @@ -145,11 +96,9 @@ LOCAL_LABEL(JT): #define IMM # -#define block(shift) \ -LOCAL_LABEL(shift): \ - cmp r0, r1, lsl IMM shift; \ - ITT hs; \ - addhs r3, r3, IMM (1 << shift); \ +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + addhs r3, r3, IMM (1 << shift); \ subhs r0, r0, r1, lsl IMM shift block(31) @@ -183,6 +132,7 @@ LOCAL_LABEL(shift): \ block(3) block(2) block(1) +LOCAL_LABEL(div0block): block(0) str r0, [r2] diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S index c184b513a..8fb1dca0f 100644 --- a/lib/builtins/arm/udivsi3.S +++ b/lib/builtins/arm/udivsi3.S @@ -16,9 +16,6 @@ .syntax unified .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif .p2align 2 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) @@ -35,7 +32,6 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) bcc LOCAL_LABEL(divby0) JMPc(lr, eq) cmp r0, r1 - IT cc movcc r0, #0 JMPc(lr, cc) /* @@ -43,15 +39,11 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) * * r0 is the numerator, r1 the denominator. * - * ARM: * The code before JMP computes the correct shift I, so that * r0 and (r1 << I) have the highest bit set in the same position. * At the time of JMP, ip := .Ldiv0block - 12 * I. * This depends on the fixed instruction size of block. * - * Thumb 2: - * Uses a jumptable to jump to the appropriate block. - * * block(shift) implements the test-and-update-quotient core. * It assumes (r0 << shift) can be computed without overflow and * that (r0 << shift) < 2 * r1. The quotient is stored in r3. @@ -61,57 +53,15 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) clz ip, r0 clz r3, r1 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ -#if __ARM_ARCH_ISA_THUMB == 2 - sub ip, r3, ip - mov r3, #0 - tbb [pc, ip] -LOCAL_LABEL(JT): - .byte (LOCAL_LABEL( 0) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 1) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 2) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 3) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 4) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 5) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 6) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 7) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 8) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 9) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(10) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(11) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(12) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(13) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(14) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(15) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(16) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(17) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(18) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(19) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(20) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(21) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(22) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(23) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(24) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(25) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(26) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(27) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(28) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(29) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(30) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(31) - LOCAL_LABEL(JT)) / 2 -#else sub r3, r3, ip - adr ip, LOCAL_LABEL(0) + adr ip, LOCAL_LABEL(div0block) sub ip, ip, r3, lsl #2 sub ip, ip, r3, lsl #3 mov r3, #0 bx ip -#endif # else -#if __ARM_ARCH_ISA_THUMB == 2 -#error unsupported configuration -#endif mov r2, r0 - adr ip, LOCAL_LABEL(0) + adr ip, LOCAL_LABEL(div0block) lsr r3, r2, #16 cmp r3, r1 @@ -144,12 +94,10 @@ LOCAL_LABEL(JT): #define IMM # -#define block(shift) \ -LOCAL_LABEL(shift): \ - cmp r0, r1, lsl IMM shift; \ - ITT hs; \ - addhs r3, r3, IMM(1 << shift); \ - subhs r0, r0, r1, lsl IMM shift +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + addhs r3, r3, IMM (1 << shift); \ + subhs r0, r0, r1, lsl IMM shift block(31) block(30) @@ -182,6 +130,7 @@ LOCAL_LABEL(shift): \ block(3) block(2) block(1) +LOCAL_LABEL(div0block): block(0) mov r0, r3 diff --git a/lib/builtins/arm/umodsi3.S b/lib/builtins/arm/umodsi3.S index 8a979e56c..164646b1f 100644 --- a/lib/builtins/arm/umodsi3.S +++ b/lib/builtins/arm/umodsi3.S @@ -16,9 +16,6 @@ .syntax unified .text -#if __ARM_ARCH_ISA_THUMB == 2 - .thumb -#endif .p2align 2 DEFINE_COMPILERRT_FUNCTION(__umodsi3) @@ -33,7 +30,6 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3) #else cmp r1, #1 bcc LOCAL_LABEL(divby0) - IT eq moveq r0, #0 JMPc(lr, eq) cmp r0, r1 @@ -43,15 +39,11 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3) * * r0 is the numerator, r1 the denominator. * - * For ARM: * The code before JMP computes the correct shift I, so that * r0 and (r1 << I) have the highest bit set in the same position. * At the time of JMP, ip := .Ldiv0block - 8 * I. * This depends on the fixed instruction size of block. * - * For Thumb: - * Uses a jumptable to jump to the appropriate block. - * * block(shift) implements the test-and-update-quotient core. * It assumes (r0 << shift) can be computed without overflow and * that (r0 << shift) < 2 * r1. The quotient is stored in r3. @@ -62,52 +54,12 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3) clz r3, r1 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ sub r3, r3, ip -#if __ARM_ARCH_ISA_THUMB == 2 - tbb [pc, r3] -LOCAL_LABEL(JT): - .byte (LOCAL_LABEL( 0) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 1) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 2) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 3) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 4) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 5) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 6) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 7) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 8) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL( 9) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(10) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(11) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(12) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(13) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(14) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(15) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(16) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(17) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(18) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(19) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(20) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(21) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(22) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(23) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(24) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(25) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(26) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(27) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(28) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(29) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(30) - LOCAL_LABEL(JT)) / 2 - .byte (LOCAL_LABEL(31) - LOCAL_LABEL(JT)) / 2 -#else - adr ip, LOCAL_LABEL(0) + adr ip, LOCAL_LABEL(div0block) sub ip, ip, r3, lsl #3 bx ip -#endif # else -#if __ARM_ARCH_ISA_THUMB == 2 -#error unsupported configuration -#endif mov r2, r0 - adr ip, LOCAL_LABEL(0) + adr ip, LOCAL_LABEL(div0block) lsr r3, r2, #16 cmp r3, r1 @@ -138,11 +90,9 @@ LOCAL_LABEL(JT): #define IMM # -#define block(shift) \ -LOCAL_LABEL(shift): \ - cmp r0, r1, lsl IMM shift; \ - IT hs; \ - subhs r0, r0, r1, lsl IMM shift +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + subhs r0, r0, r1, lsl IMM shift block(31) block(30) @@ -175,6 +125,7 @@ LOCAL_LABEL(shift): \ block(3) block(2) block(1) +LOCAL_LABEL(div0block): block(0) JMP(lr) #endif /* __ARM_ARCH_EXT_IDIV__ */ diff --git a/lib/builtins/assembly.h b/lib/builtins/assembly.h index b09fcd55a..d415a5f8d 100644 --- a/lib/builtins/assembly.h +++ b/lib/builtins/assembly.h @@ -22,16 +22,6 @@ #define SEPARATOR ; #endif -#if defined(__arm__) -#if __ARM_ARCH_ISA_THUMB == 2 -#define IT it -#define ITT itt -#else -#define IT @ -#define ITT @ -#endif -#endif - #if defined(__APPLE__) #define HIDDEN(name) .private_extern name #define LOCAL_LABEL(name) L_##name @@ -96,9 +86,7 @@ #ifdef ARM_HAS_BX #define JMP(r) bx r -#define JMPc(r, c) \ - IT c; \ - bx##c r +#define JMPc(r, c) bx##c r #else #define JMP(r) mov pc, r #define JMPc(r, c) mov##c pc, r |