summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorrearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4>2003-09-30 10:30:32 +0000
committerrearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4>2003-09-30 10:30:32 +0000
commitff0763988e05559cbfd04b57597dcbfa69625c19 (patch)
tree93abdfe4366b19848ed2a38e00fe26f6f53933f7 /gcc
parentc3bea1aec572ef4f4ec33b28cf759ff65613c214 (diff)
downloadgcc-ff0763988e05559cbfd04b57597dcbfa69625c19.tar.gz
Nicolas Pitre <nico@cam.org>
* arm/lib1funcs.asm (ARM_DIV_MOD_BODY) split into ARM_DIV_BODY and ARM_MOD_BODY. (ARM_MOD_BODY): rewritten. added clz insns for __ARM_ARCH__ >= 5. (ARM_DIV_BODY): added clz insns for __ARM_ARCH__ >= 5, added better divisor alignment in the other case. (ARM_DIV2_ORDER): added, finds the order of a single bit divisor. (__divsi3, __udivsi3, __modsi3, __umodsi3): rewritten using the macros above, add fast exits for divisor >= dividend, etc. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@71940 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/arm/lib1funcs.asm345
2 files changed, 236 insertions, 120 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cb968684491..1a505c1652c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
2003-09-30 Nicolas Pitre <nico@cam.org>
+ * arm/lib1funcs.asm (ARM_DIV_MOD_BODY) split into ARM_DIV_BODY
+ and ARM_MOD_BODY.
+ (ARM_MOD_BODY): rewritten. added clz insns for __ARM_ARCH__ >= 5.
+ (ARM_DIV_BODY): added clz insns for __ARM_ARCH__ >= 5,
+ added better divisor alignment in the other case.
+ (ARM_DIV2_ORDER): added, finds the order of a single bit divisor.
+ (__divsi3, __udivsi3, __modsi3, __umodsi3): rewritten using the
+ macros above, add fast exits for divisor >= dividend, etc.
+
+2003-09-30 Nicolas Pitre <nico@cam.org>
+
* arm/ieee754-df.S: split compilation of fixunsdfsi from
L_fixdfsi target.
* arm/t-arm-elf: add _fixunsdfsi target due to the above.
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm
index 34cf986d03e..fe14070e812 100644
--- a/gcc/config/arm/lib1funcs.asm
+++ b/gcc/config/arm/lib1funcs.asm
@@ -208,6 +208,7 @@ _L__\name: /* A hook to tell gdb that we've switched to ARM */
.endm
#endif
+#ifdef __thumb__
/* Register aliases. */
work .req r4 @ XXXX is this safe ?
@@ -216,102 +217,180 @@ divisor .req r1
overdone .req r2
result .req r2
curbit .req r3
+#endif
#if 0
ip .req r12
sp .req r13
lr .req r14
pc .req r15
#endif
+
/* ------------------------------------------------------------------------ */
/* Bodies of the division and modulo routines. */
/* ------------------------------------------------------------------------ */
-.macro ARM_DIV_MOD_BODY modulo
-LSYM(Loop1):
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __ARM_ARCH__ >= 5
+
+ clz \curbit, \divisor
+ clz \result, \dividend
+ sub \result, \curbit, \result
+ mov \curbit, #1
+ mov \divisor, \divisor, lsl \result
+ mov \curbit, \curbit, lsl \result
+ mov \result, #0
+
+#else
+
+ @ Initially shift the divisor left 3 bits if possible,
+ @ set curbit accordingly. This allows for curbit to be located
+ @ at the left end of each 4 bit nibbles in the division loop
+ @ to save one loop in most cases.
+ tst \divisor, #0xe0000000
+ moveq \divisor, \divisor, lsl #3
+ moveq \curbit, #8
+ movne \curbit, #1
+
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
- cmp divisor, #0x10000000
- cmplo divisor, dividend
- movlo divisor, divisor, lsl #4
- movlo curbit, curbit, lsl #4
- blo LSYM(Loop1)
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ movlo \curbit, \curbit, lsl #4
+ blo 1b
-LSYM(Lbignum):
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
- cmp divisor, #0x80000000
- cmplo divisor, dividend
- movlo divisor, divisor, lsl #1
- movlo curbit, curbit, lsl #1
- blo LSYM(Lbignum)
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ movlo \curbit, \curbit, lsl #1
+ blo 1b
-LSYM(Loop3):
- @ Test for possible subtractions. On the final pass, this may
- @ subtract too much from the dividend ...
+ mov \result, #0
+
+#endif
+
+ @ Division loop
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ orrhs \result, \result, \curbit
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ orrhs \result, \result, \curbit, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ orrhs \result, \result, \curbit, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ orrhs \result, \result, \curbit, lsr #3
+ cmp \dividend, #0 @ Early termination?
+ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
+ movne \divisor, \divisor, lsr #4
+ bne 1b
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ rsb \order, \order, #31
+
+#else
+
+ cmp \divisor, #(1 << 16)
+ movhs \divisor, \divisor, lsr #16
+ movhs \order, #16
+ movlo \order, #0
+
+ cmp \divisor, #(1 << 8)
+ movhs \divisor, \divisor, lsr #8
+ addhs \order, \order, #8
+
+ cmp \divisor, #(1 << 4)
+ movhs \divisor, \divisor, lsr #4
+ addhs \order, \order, #4
+
+ cmp \divisor, #(1 << 2)
+ addhi \order, \order, #3
+ addls \order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ clz \spare, \dividend
+ sub \order, \order, \spare
+ mov \divisor, \divisor, lsl \order
- .if \modulo
- @ ... so keep track of which subtractions are done in OVERDONE.
- @ We can fix them up afterwards.
- mov overdone, #0
- cmp dividend, divisor
- subhs dividend, dividend, divisor
- cmp dividend, divisor, lsr #1
- subhs dividend, dividend, divisor, lsr #1
- orrhs overdone, overdone, curbit, ror #1
- cmp dividend, divisor, lsr #2
- subhs dividend, dividend, divisor, lsr #2
- orrhs overdone, overdone, curbit, ror #2
- cmp dividend, divisor, lsr #3
- subhs dividend, dividend, divisor, lsr #3
- orrhs overdone, overdone, curbit, ror #3
- mov ip, curbit
- .else
- @ ... so keep track of which subtractions are done in RESULT.
- @ The result will be ok, since the "bit" will have been
- @ shifted out at the bottom.
- cmp dividend, divisor
- subhs dividend, dividend, divisor
- orrhs result, result, curbit
- cmp dividend, divisor, lsr #1
- subhs dividend, dividend, divisor, lsr #1
- orrhs result, result, curbit, lsr #1
- cmp dividend, divisor, lsr #2
- subhs dividend, dividend, divisor, lsr #2
- orrhs result, result, curbit, lsr #2
- cmp dividend, divisor, lsr #3
- subhs dividend, dividend, divisor, lsr #3
- orrhs result, result, curbit, lsr #3
- .endif
+#else
- cmp dividend, #0 @ Early termination?
- movnes curbit, curbit, lsr #4 @ No, any more bits to do?
- movne divisor, divisor, lsr #4
- bne LSYM(Loop3)
+ mov \order, #0
- .if \modulo
-LSYM(Lfixup_dividend):
- @ Any subtractions that we should not have done will be recorded in
- @ the top three bits of OVERDONE. Exactly which were not needed
- @ are governed by the position of the bit, stored in IP.
- ands overdone, overdone, #0xe0000000
- @ If we terminated early, because dividend became zero, then the
- @ bit in ip will not be in the bottom nibble, and we should not
- @ perform the additions below. We must test for this though
- @ (rather relying upon the TSTs to prevent the additions) since
- @ the bit in ip could be in the top two bits which might then match
- @ with one of the smaller RORs.
- tstne ip, #0x7
- beq LSYM(Lgot_result)
- tst overdone, ip, ror #3
- addne dividend, dividend, divisor, lsr #3
- tst overdone, ip, ror #2
- addne dividend, dividend, divisor, lsr #2
- tst overdone, ip, ror #1
- addne dividend, dividend, divisor, lsr #1
- .endif
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ addlo \order, \order, #4
+ blo 1b
-LSYM(Lgot_result):
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ addlo \order, \order, #1
+ blo 1b
+
+#endif
+
+ @ Perform all needed substractions to keep only the reminder.
+ @ Do comparisons in batch of 4 first.
+ subs \order, \order, #3 @ yes, 3 is intended here
+ blt 2f
+
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ cmp \dividend, #1
+ mov \divisor, \divisor, lsr #4
+ subges \order, \order, #4
+ bge 1b
+
+ tst \order, #3
+ teqne \dividend, #0
+ beq 5f
+
+ @ Either 1, 2 or 3 comparison/substractions are left.
+2: cmn \order, #2
+ blt 4f
+ beq 3f
+ cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+3: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+4: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+5:
.endm
/* ------------------------------------------------------------------------ */
.macro THUMB_DIV_MOD_BODY modulo
@@ -495,19 +574,29 @@ LSYM(Lgot_result):
RET
#else /* ARM version. */
+
+ subs r2, r1, #1
+ RETc(eq)
+ bcc LSYM(Ldiv0)
+ cmp r0, r1
+ bls 11f
+ tst r1, r2
+ beq 12f
- cmp divisor, #0
- beq LSYM(Ldiv0)
- mov curbit, #1
- mov result, #0
- cmp dividend, divisor
- blo LSYM(Lgot_result)
-
- ARM_DIV_MOD_BODY 0
+ ARM_DIV_BODY r0, r1, r2, r3
- mov r0, result
+ mov r0, r2
RET
+11: moveq r0, #1
+ movne r0, #0
+ RET
+
+12: ARM_DIV2_ORDER r1, r2
+
+ mov r0, r0, lsr r2
+ RET
+
#endif /* ARM version */
DIV_FUNC_END udivsi3
@@ -537,15 +626,15 @@ LSYM(Lover10):
#else /* ARM version. */
- cmp divisor, #0
- beq LSYM(Ldiv0)
- cmp divisor, #1
- cmpne dividend, divisor
- moveq dividend, #0
- RETc(lo)
- mov curbit, #1
+ subs r2, r1, #1 @ compare divisor with 1
+ bcc LSYM(Ldiv0)
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ RETc(ls)
- ARM_DIV_MOD_BODY 1
+ ARM_MOD_BODY r0, r1, r2, r3
RET
@@ -593,24 +682,41 @@ LSYM(Lover12):
#else /* ARM version. */
- eor ip, dividend, divisor @ Save the sign of the result.
- mov curbit, #1
- mov result, #0
- cmp divisor, #0
- rsbmi divisor, divisor, #0 @ Loops below use unsigned.
+ cmp r1, #0
+ eor ip, r0, r1 @ save the sign of the result.
beq LSYM(Ldiv0)
- cmp dividend, #0
- rsbmi dividend, dividend, #0
- cmp dividend, divisor
- blo LSYM(Lgot_result)
-
- ARM_DIV_MOD_BODY 0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ subs r2, r1, #1 @ division by 1 or -1 ?
+ beq 10f
+ movs r3, r0
+ rsbmi r3, r0, #0 @ positive dividend value
+ cmp r3, r1
+ bls 11f
+ tst r1, r2 @ divisor is power of 2 ?
+ beq 12f
+
+ ARM_DIV_BODY r3, r1, r0, r2
- mov r0, result
cmp ip, #0
rsbmi r0, r0, #0
RET
+10: teq ip, r0 @ same sign ?
+ rsbmi r0, r0, #0
+ RET
+
+11: movlo r0, #0
+ moveq r0, ip, asr #31
+ orreq r0, r0, #1
+ RET
+
+12: ARM_DIV2_ORDER r1, r2
+
+ cmp ip, #0
+ mov r0, r3, lsr r2
+ rsbmi r0, r0, #0
+ RET
+
#endif /* ARM version */
DIV_FUNC_END divsi3
@@ -653,23 +759,22 @@ LSYM(Lover12):
#else /* ARM version. */
- cmp divisor, #0
- rsbmi divisor, divisor, #0 @ Loops below use unsigned.
+ cmp r1, #0
beq LSYM(Ldiv0)
- @ Need to save the sign of the dividend, unfortunately, we need
- @ ip later on; this is faster than pushing lr and using that.
- str dividend, [sp, #-4]!
- cmp dividend, #0 @ Test dividend against zero
- rsbmi dividend, dividend, #0 @ If negative make positive
- cmp dividend, divisor @ else if zero return zero
- blo LSYM(Lgot_result) @ if smaller return dividend
- mov curbit, #1
-
- ARM_DIV_MOD_BODY 1
-
- ldr ip, [sp], #4
- cmp ip, #0
- rsbmi dividend, dividend, #0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ movs ip, r0 @ preserve sign of dividend
+ rsbmi r0, r0, #0 @ if negative make positive
+ subs r2, r1, #1 @ compare divisor with 1
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ bls 10f
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+10: cmp ip, #0
+ rsbmi r0, r0, #0
RET
#endif /* ARM version */