summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Canon <scanon@apple.com>2011-03-18 16:35:02 +0000
committerStephen Canon <scanon@apple.com>2011-03-18 16:35:02 +0000
commit6bbe0bb0856f7e7cabe11bc0a10c268db034142b (patch)
tree2ebf6ee0a369de962191666d930df0f946a26060
parent5c0809916b70dbb754cf0c999e2923909e5603c6 (diff)
downloadcompiler-rt-6bbe0bb0856f7e7cabe11bc0a10c268db034142b.tar.gz
Carefully written implementations of the 32-bit integer divide and modulus functions for ARM. These are still using a naive digit-by-digit algorithm, but the core loop has been carefully written.
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@127882 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/arm/divmodsi4.S47
-rw-r--r--lib/arm/divsi3.S39
-rw-r--r--lib/arm/modsi3.S67
-rw-r--r--lib/arm/udivmodsi4.S80
-rw-r--r--lib/arm/udivsi3.S78
-rw-r--r--lib/arm/umodsi3.S58
6 files changed, 337 insertions, 32 deletions
diff --git a/lib/arm/divmodsi4.S b/lib/arm/divmodsi4.S
new file mode 100644
index 000000000..6e72eabbd
--- /dev/null
+++ b/lib/arm/divmodsi4.S
@@ -0,0 +1,47 @@
+/*===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __divmodsi4 (32-bit signed integer divide and
+ * modulus) function for the ARM architecture. A naive digit-by-digit
+ * computation is employed for simplicity.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+ push {r4-r7, lr} ;\
+ add r7, sp, #12
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r4-r7, pc}
+
+.syntax unified
+.align 3
+DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
+ ESTABLISH_FRAME
+// Set aside the sign of the quotient and modulus, and the address for the
+// modulus.
+ eor r4, r0, r1
+ mov r5, r0
+ mov r6, r2
+// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+ eor ip, r0, r0, asr #31
+ eor lr, r1, r1, asr #31
+ sub r0, ip, r0, asr #31
+ sub r1, lr, r1, asr #31
+// Unsigned divmod:
+ bl ___udivmodsi4
+// Apply the sign of quotient and modulus
+ ldr r1, [r6]
+ eor r0, r0, r4, asr #31
+ sub r0, r0, r4, asr #31
+ eor r1, r1, r5, asr #31
+ sub r1, r1, r5, asr #31
+ str r1, [r6]
+ CLEAR_FRAME_AND_RETURN
diff --git a/lib/arm/divsi3.S b/lib/arm/divsi3.S
new file mode 100644
index 000000000..69ef20c4a
--- /dev/null
+++ b/lib/arm/divsi3.S
@@ -0,0 +1,39 @@
+/*===-- divsi3.S - 32-bit signed integer divide ---------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __divsi3 (32-bit signed integer divide) function
+ * for the ARM architecture as a wrapper around the unsigned routine.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+ push {r4, r7, lr} ;\
+ add r7, sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r4, r7, pc}
+
+.syntax unified
+.align 3
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+ ESTABLISH_FRAME
+// Set aside the sign of the quotient.
+ eor r4, r0, r1
+// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+ eor r2, r0, r0, asr #31
+ eor r3, r1, r1, asr #31
+ sub r0, r2, r0, asr #31
+ sub r1, r3, r1, asr #31
+// abs(a) / abs(b)
+ bl ___udivsi3
+// Apply sign of quotient to result and return.
+ eor r0, r0, r4, asr #31
+ sub r0, r0, r4, asr #31
+ CLEAR_FRAME_AND_RETURN
diff --git a/lib/arm/modsi3.S b/lib/arm/modsi3.S
index 40ba856e3..97573395d 100644
--- a/lib/arm/modsi3.S
+++ b/lib/arm/modsi3.S
@@ -1,36 +1,39 @@
-//===-------- modsi3.S - Implement modsi3 ---------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
+/*===-- modsi3.S - 32-bit signed integer modulus --------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __modsi3 (32-bit signed integer modulus) function
+ * for the ARM architecture as a wrapper around the unsigned routine.
+ *
+ *===----------------------------------------------------------------------===*/
#include "../assembly.h"
-//
-// extern int32_t __modsi3(int32_t a, int32_t b);
-//
-// Returns the remainder when dividing two 32-bit signed integers.
-// Conceptually, the function is: { return a - (a / b) * b; }
-// But if you write that in C, llvm compiles it to a call to __modsi3...
-//
- .align 2
-DEFINE_COMPILERRT_FUNCTION(__modsi3)
- push {r4, r5, r7, lr}
- add r7, sp, #8 // set stack frame
- mov r5, r0 // save a
- mov r4, r1 // save b
- bl ___divsi3 // compute a/b
-#if __ARM_ARCH_7A__
- mls r0, r4, r0, r5 // mulitple result * b and subtract from a
-#else
- // before armv7, does not have "mls" instruction
- mul r3, r0, r4 // multiple result * b
- sub r0, r5, r3 // a - result
-#endif
- pop {r4, r5, r7, pc}
-
-
+#define ESTABLISH_FRAME \
+ push {r4, r7, lr} ;\
+ add r7, sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r4, r7, pc}
+.syntax unified
+.align 3
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+ ESTABLISH_FRAME
+ // Set aside the sign of the dividend.
+ mov r4, r0
+ // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+ eor r2, r0, r0, asr #31
+ eor r3, r1, r1, asr #31
+ sub r0, r2, r0, asr #31
+ sub r1, r3, r1, asr #31
+ // abs(a) % abs(b)
+ bl ___umodsi3
+ // Apply sign of dividend to result and return.
+ eor r0, r0, r4, asr #31
+ sub r0, r0, r4, asr #31
+ CLEAR_FRAME_AND_RETURN
diff --git a/lib/arm/udivmodsi4.S b/lib/arm/udivmodsi4.S
new file mode 100644
index 000000000..a2bda6312
--- /dev/null
+++ b/lib/arm/udivmodsi4.S
@@ -0,0 +1,80 @@
+/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
+ * modulus) function for the ARM architecture. A naive digit-by-digit
+ * computation is employed for simplicity.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+ push {r4, r7, lr} ;\
+ add r7, sp, #4
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r4, r7, pc}
+
+#define a r0
+#define b r1
+#define i r3
+#define r r4
+#define q ip
+#define one lr
+
+.syntax unified
+.align 3
+DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
+// We use a simple digit by digit algorithm; before we get into the actual
+// divide loop, we must calculate the left-shift amount necessary to align
+// the MSB of the divisor with that of the dividend (If this shift is
+// negative, then the result is zero, and we early out). We also conjure a
+// bit mask of 1 to use in constructing the quotient, and initialize the
+// quotient to zero.
+ ESTABLISH_FRAME
+ clz r4, a
+ tst b, b // detect divide-by-zero
+ clz r3, b
+ mov q, #0
+ beq L_return // return 0 if b is zero.
+ mov one, #1
+ subs i, r3, r4
+ blt L_return // return 0 if MSB(a) < MSB(b)
+
+L_mainLoop:
+// This loop basically implements the following:
+//
+// do {
+// if (a >= b << i) {
+// a -= b << i;
+// q |= 1 << i;
+// if (a == 0) break;
+// }
+// } while (--i)
+//
+// Note that this does not perform the final iteration (i == 0); by doing it
+// this way, we can merge the two branches which is a substantial win for
+// such a tight loop on current ARM architectures.
+ subs r, a, b, lsl i
+ orrhs q, q,one, lsl i
+ movhs a, r
+ subsne i, i, #1
+ bhi L_mainLoop
+
+// Do the final test subtraction and update of quotient (i == 0), as it is
+// not performed in the main loop.
+ subs r, a, b
+ orrhs q, #1
+ movhs a, r
+
+L_return:
+// Store the remainder, and move the quotient to r0, then return.
+ str a, [r2]
+ mov r0, q
+ CLEAR_FRAME_AND_RETURN
diff --git a/lib/arm/udivsi3.S b/lib/arm/udivsi3.S
new file mode 100644
index 000000000..d721b6299
--- /dev/null
+++ b/lib/arm/udivsi3.S
@@ -0,0 +1,78 @@
+/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __udivsi3 (32-bit unsigned integer divide)
+ * function for the ARM architecture. A naive digit-by-digit computation is
+ * employed for simplicity.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define ESTABLISH_FRAME \
+ push {r7, lr} ;\
+ mov r7, sp
+#define CLEAR_FRAME_AND_RETURN \
+ pop {r7, pc}
+
+#define a r0
+#define b r1
+#define r r2
+#define i r3
+#define q ip
+#define one lr
+
+.syntax unified
+.align 3
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+// We use a simple digit by digit algorithm; before we get into the actual
+// divide loop, we must calculate the left-shift amount necessary to align
+// the MSB of the divisor with that of the dividend (If this shift is
+// negative, then the result is zero, and we early out). We also conjure a
+// bit mask of 1 to use in constructing the quotient, and initialize the
+// quotient to zero.
+ ESTABLISH_FRAME
+ clz r2, a
+ tst b, b // detect divide-by-zero
+ clz r3, b
+ mov q, #0
+ beq L_return // return 0 if b is zero.
+ mov one, #1
+ subs i, r3, r2
+ blt L_return // return 0 if MSB(a) < MSB(b)
+
+L_mainLoop:
+// This loop basically implements the following:
+//
+// do {
+// if (a >= b << i) {
+// a -= b << i;
+// q |= 1 << i;
+// if (a == 0) break;
+// }
+// } while (--i)
+//
+// Note that this does not perform the final iteration (i == 0); by doing it
+// this way, we can merge the two branches which is a substantial win for
+// such a tight loop on current ARM architectures.
+ subs r, a, b, lsl i
+ orrhs q, q,one, lsl i
+ movhs a, r
+ subsne i, i, #1
+ bhi L_mainLoop
+
+// Do the final test subtraction and update of quotient (i == 0), as it is
+// not performed in the main loop.
+ subs r, a, b
+ orrhs q, #1
+
+L_return:
+// Move the quotient to r0 and return.
+ mov r0, q
+ CLEAR_FRAME_AND_RETURN
diff --git a/lib/arm/umodsi3.S b/lib/arm/umodsi3.S
new file mode 100644
index 000000000..e3e391baf
--- /dev/null
+++ b/lib/arm/umodsi3.S
@@ -0,0 +1,58 @@
+/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __umodsi3 (32-bit unsigned integer modulus)
+ * function for the ARM architecture. A naive digit-by-digit computation is
+ * employed for simplicity.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define a r0
+#define b r1
+#define r r2
+#define i r3
+
+.syntax unified
+.align 3
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+// We use a simple digit by digit algorithm; before we get into the actual
+// divide loop, we must calculate the left-shift amount necessary to align
+// the MSB of the divisor with that of the dividend.
+ clz r2, a
+ tst b, b // detect b == 0
+ clz r3, b
+ bxeq lr // return a if b == 0
+ subs i, r3, r2
+ bxlt lr // return a if MSB(a) < MSB(b)
+
+L_mainLoop:
+// This loop basically implements the following:
+//
+// do {
+// if (a >= b << i) {
+// a -= b << i;
+// if (a == 0) break;
+// }
+// } while (--i)
+//
+// Note that this does not perform the final iteration (i == 0); by doing it
+// this way, we can merge the two branches which is a substantial win for
+// such a tight loop on current ARM architectures.
+ subs r, a, b, lsl i
+ movhs a, r
+ subsne i, i, #1
+ bhi L_mainLoop
+
+// Do the final test subtraction and update of remainder (i == 0), as it is
+// not performed in the main loop.
+ subs r, a, b
+ movhs a, r
+ bx lr