diff options
author | olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-09-10 20:35:25 +0000 |
---|---|---|
committer | olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-09-10 20:35:25 +0000 |
commit | 6e7c6395a6426d4d13cf90da2182652aeb7d5431 (patch) | |
tree | 438d32c35f3976d35bb70d589d71f65510a0510e | |
parent | 5e847dede0786fc0aa5302170abaa0cdccee16f8 (diff) | |
download | gcc-6e7c6395a6426d4d13cf90da2182652aeb7d5431.tar.gz |
PR target/54089
* config/sh/sh.h (SH_DYNAMIC_SHIFT_COST): Set always to 1 if
dynamic shifts are available.
(SHIFT_COUNT_TRUNCATED): Always define to 0. Correct comment.
* config/sh/sh.c (ashl_lshr_seq, ext_ashl_lshr_seq): Add comments.
* config/sh/predicates.md (shift_count_operand): Allow
arith_reg_operand even if TARGET_DYNSHIFT is false.
* config/sh/sh.md (ashlsi3, lshrsi3): Expand library call patterns
if needed.
(ashlsi3_d_call, lshrsi3_d_call): New insns.
PR target/54089
* config/sh/lib1funcs.S (ashlsi3): Reimplement as ashlsi3_r0.
(lshrsi3): Reimplement as lshrsi3_r0.
PR target/54089
* gcc.target/sh/pr54089-3.c: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@191161 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 17 | ||||
-rw-r--r-- | gcc/config/sh/predicates.md | 5 | ||||
-rw-r--r-- | gcc/config/sh/sh.c | 32 | ||||
-rw-r--r-- | gcc/config/sh/sh.h | 34 | ||||
-rw-r--r-- | gcc/config/sh/sh.md | 55 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/sh/pr54089-3.c | 40 | ||||
-rw-r--r-- | libgcc/ChangeLog | 6 | ||||
-rw-r--r-- | libgcc/config/sh/lib1funcs.S | 451 |
9 files changed, 404 insertions, 243 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7c1030c5bf1..de27e83656e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,11 +1,24 @@ +2012-09-10 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/54089 + * config/sh/sh.h (SH_DYNAMIC_SHIFT_COST): Set always to 1 if + dynamic shifts are available. + (SHIFT_COUNT_TRUNCATED): Always define to 0. Correct comment. + * config/sh/sh.c (ashl_lshr_seq, ext_ashl_lshr_seq): Add comments. + * config/sh/predicates.md (shift_count_operand): Allow + arith_reg_operand even if TARGET_DYNSHIFT is false. + * config/sh/sh.md (ashlsi3, lshrsi3): Expand library call patterns + if needed. + (ashlsi3_d_call, lshrsi3_d_call): New insns. + 2012-09-10 Richard Sandiford <rdsandiford@googlemail.com> * ira.c (setup_pressure_classes): Handle synonymous classes. 2012-09-10 Marc Glisse <marc.glisse@inria.fr> - * tree-ssa-forwprop.c (simplify_bitfield_ref): New function. - (ssa_forward_propagate_and_combine): Call it. + * tree-ssa-forwprop.c (simplify_bitfield_ref): New function. + (ssa_forward_propagate_and_combine): Call it. 2012-09-10 Steve Ellcey <sellcey@mips.com> diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md index 92a7b689c84..3936ab2953f 100644 --- a/gcc/config/sh/predicates.md +++ b/gcc/config/sh/predicates.md @@ -791,9 +791,8 @@ /* Allow T_REG as shift count for dynamic shifts, although it is not really possible. It will then be copied to a general purpose reg. */ if (! TARGET_SHMEDIA) - return const_int_operand (op, mode) - || (TARGET_DYNSHIFT && (arith_reg_operand (op, mode) - || t_reg_operand (op, mode))); + return const_int_operand (op, mode) || arith_reg_operand (op, mode) + || (TARGET_DYNSHIFT && t_reg_operand (op, mode)); return (CONSTANT_P (op) ? (CONST_INT_P (op) diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 10dad62b8f7..0abf28facc2 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -2871,35 +2871,35 @@ struct ashl_lshr_sequence static const struct ashl_lshr_sequence ashl_lshr_seq[32] = { - { 0, { 0 }, 0 }, + { 0, { 0 }, 0 }, // 0 { 1, { 1 }, LSHR_CLOBBERS_T }, { 1, { 2 }, 0 }, { 2, { 2, 1 }, LSHR_CLOBBERS_T }, - { 2, { 2, 2 }, 0 }, + { 2, { 2, 2 }, 0 }, // 4 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 2, 2, 2 }, 0 }, { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T }, - { 1, { 8 }, 0 }, + { 1, { 8 }, 0 }, // 8 { 2, { 8, 1 }, LSHR_CLOBBERS_T }, { 2, { 8, 2 }, 0 }, { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, - { 3, { 8, 2, 2 }, 0 }, + { 3, { 8, 2, 2 }, 0 }, // 12 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 8, -2, 8 }, 0 }, { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T }, - { 1, { 16 }, 0 }, + { 1, { 16 }, 0 }, // 16 { 2, { 16, 1 }, LSHR_CLOBBERS_T }, { 2, { 16, 2 }, 0 }, { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, - { 3, { 16, 2, 2 }, 0 }, + { 3, { 16, 2, 2 }, 0 }, // 20 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 16, -2, 8 }, 0 }, { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, - { 2, { 16, 8 }, 0 }, + { 2, { 16, 8 }, 0 }, // 24 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, { 3, { 16, 8, 2 }, 0 }, { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, - { 4, { 16, 8, 2, 2 }, 0 }, + { 4, { 16, 8, 2, 2 }, 0 }, // 28 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, { 3, { 16, -2, 16 }, 0 }, @@ -2915,35 +2915,35 @@ static const struct ashl_lshr_sequence ashl_lshr_seq[32] = kind of sign or zero extension. */ static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] = { - { 0, { 0 }, 0 }, + { 0, { 0 }, 0 }, // 0 { 1, { 1 }, LSHR_CLOBBERS_T }, { 1, { 2 }, 0 }, { 2, { 2, 1 }, LSHR_CLOBBERS_T }, - { 2, { 2, 2 }, 0 }, + { 2, { 2, 2 }, 0 }, // 4 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, { 2, { 8, -2 }, 0 }, { 2, { 8, -1 }, ASHL_CLOBBERS_T }, - { 1, { 8 }, 0 }, + { 1, { 8 }, 0 }, // 8 { 2, { 8, 1 }, LSHR_CLOBBERS_T }, { 2, { 8, 2 }, 0 }, { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, - { 3, { 8, 2, 2 }, 0 }, + { 3, { 8, 2, 2 }, 0 }, // 12 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T }, { 2, { 16, -2 }, 0 }, { 2, { 16, -1 }, ASHL_CLOBBERS_T }, - { 1, { 16 }, 0 }, + { 1, { 16 }, 0 }, // 16 { 2, { 16, 1 }, LSHR_CLOBBERS_T }, { 2, { 16, 2 }, 0 }, { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, - { 3, { 16, 2, 2 }, 0 }, + { 3, { 16, 2, 2 }, 0 }, // 20 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 16, -2, 8 }, 0 }, { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, - { 2, { 16, 8 }, 0 }, + { 2, { 16, 8 }, 0 }, // 24 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, { 3, { 16, 8, 2 }, 0 }, { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, - { 4, { 16, 8, 2, 2 }, 0 }, + { 4, { 16, 8, 2, 2 }, 0 }, // 28 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, { 3, { 16, -2, 16 }, 0 }, { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index b36287276aa..d72379022bb 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -1932,19 +1932,27 @@ struct sh_args { like shad and shld. */ #define TARGET_DYNSHIFT (TARGET_SH3 || TARGET_SH2A) -#define SH_DYNAMIC_SHIFT_COST \ - (TARGET_HARD_SH4 ? 1 : TARGET_DYNSHIFT ? (optimize_size ? 1 : 2) : 20) - -/* Immediate shift counts are truncated by the output routines (or was it - the assembler?). Shift counts in a register are truncated by SH. Note - that the native compiler puts too large (> 32) immediate shift counts - into a register and shifts by the register, letting the SH decide what - to do instead of doing that itself. */ -/* ??? The library routines in lib1funcs.S truncate the shift count. - However, the SH3 has hardware shifts that do not truncate exactly as gcc - expects - the sign bit is significant - so it appears that we need to - leave this zero for correct SH3 code. */ -#define SHIFT_COUNT_TRUNCATED (! TARGET_SH3 && ! TARGET_SH2A) +/* The cost of using the dynamic shift insns (shad, shld) are the same + if they are available. If they are not available a library function will + be emitted instead, which is more expensive. */ +#define SH_DYNAMIC_SHIFT_COST (TARGET_DYNSHIFT ? 1 : 20) + +/* Defining SHIFT_COUNT_TRUNCATED tells the combine pass that code like + (X << (Y % 32)) for register X, Y is equivalent to (X << Y). + This is not generally true when hardware dynamic shifts (shad, shld) are + used, because they check the sign bit _before_ the modulo op. The sign + bit determines whether it is a left shift or a right shift: + if (Y < 0) + return X << (Y & 31); + else + return X >> (-Y) & 31); + + The dynamic shift library routines in lib1funcs.S do not use the sign bit + like the hardware dynamic shifts and truncate the shift count to 31. + We define SHIFT_COUNT_TRUNCATED to 0 and express the implied shift count + truncation in the library function call patterns, as this gives slightly + more compact code. */ +#define SHIFT_COUNT_TRUNCATED (0) /* CANONICALIZE_COMPARISON macro for the combine pass. */ #define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 8b44fbda496..c06a51011b7 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -4023,6 +4023,17 @@ label: operands[2])); DONE; } + + /* Expand a library call for the dynamic shift. */ + if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT) + { + emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]); + rtx funcaddr = gen_reg_rtx (Pmode); + function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC); + emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr)); + + DONE; + } }) (define_insn "ashlsi3_k" @@ -4067,6 +4078,23 @@ label: } [(set_attr "type" "dyn_shift")]) +;; If dynamic shifts are not available use a library function. +;; By specifying the pattern we reduce the number of call clobbered regs. +;; In order to make combine understand the truncation of the shift amount +;; operand we have to allow it to use pseudo regs for the shift operands. +(define_insn "ashlsi3_d_call" + [(set (match_operand:SI 0 "arith_reg_dest" "=z") + (ashift:SI (reg:SI R4_REG) + (and:SI (match_operand:SI 1 "arith_reg_operand" "z") + (const_int 31)))) + (use (match_operand:SI 2 "arith_reg_operand" "r")) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1 && !TARGET_DYNSHIFT" + "jsr @%2%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + (define_insn_and_split "ashlsi3_n" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") @@ -4512,6 +4540,16 @@ label: operands[2])); DONE; } + + /* Expand a library call for the dynamic shift. */ + if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT) + { + emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]); + rtx funcaddr = gen_reg_rtx (Pmode); + function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC); + emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr)); + DONE; + } }) (define_insn "lshrsi3_k" @@ -4556,6 +4594,23 @@ label: } [(set_attr "type" "dyn_shift")]) +;; If dynamic shifts are not available use a library function. +;; By specifying the pattern we reduce the number of call clobbered regs. +;; In order to make combine understand the truncation of the shift amount +;; operand we have to allow it to use pseudo regs for the shift operands. +(define_insn "lshrsi3_d_call" + [(set (match_operand:SI 0 "arith_reg_dest" "=z") + (lshiftrt:SI (reg:SI R4_REG) + (and:SI (match_operand:SI 1 "arith_reg_operand" "z") + (const_int 31)))) + (use (match_operand:SI 2 "arith_reg_operand" "r")) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1 && !TARGET_DYNSHIFT" + "jsr @%2%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + (define_insn_and_split "lshrsi3_n" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index dc3577401e8..1f8edec5b6d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,6 +1,11 @@ +2012-09-10 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/54089 + * gcc.target/sh/pr54089-3.c: New. + 2012-09-10 Marc Glisse <marc.glisse@inria.fr> - * gcc.dg/tree-ssa/forwprop-21.c: New testcase. + * gcc.dg/tree-ssa/forwprop-21.c: New testcase. 2012-09-10 Aldy Hernandez <aldyh@redhat.com> diff --git a/gcc/testsuite/gcc.target/sh/pr54089-3.c b/gcc/testsuite/gcc.target/sh/pr54089-3.c new file mode 100644 index 00000000000..ffb976ba11b --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr54089-3.c @@ -0,0 +1,40 @@ +/* The dynamic shift library functions truncate the shift count to 5 bits. + Verify that this is taken into account and no extra shift count + truncations are generated before the library call. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m1*" "-m2" "-m2e*" } } */ +/* { dg-final { scan-assembler-not "and" } } */ +/* { dg-final { scan-assembler-not "31" } } */ + +int +test00 (unsigned int a, int* b, int c, int* d, unsigned int e) +{ + int s = 0; + int i; + for (i = 0; i < c; ++i) + s += d[i] + b[i] + (e << (i & 31)); + return s; +} + +int +test01 (unsigned int a, int* b, int c, int* d, unsigned int e) +{ + int s = 0; + int i; + for (i = 0; i < c; ++i) + s += d[i] + b[i] + (e >> (i & 31)); + return s; +} + +int +test03 (unsigned int a, unsigned int b) +{ + return b << (a & 31); +} + +unsigned int +test04 (unsigned int a, int b) +{ + return a >> (b & 31); +} diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 36f11ea07dd..a225e5875e0 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,9 @@ +2012-09-10 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/54089 + * config/sh/lib1funcs.S (ashlsi3): Reimplement as ashlsi3_r0. + (lshrsi3): Reimplement as lshrsi3_r0. + 2012-09-10 Andreas Schwab <schwab@linux-m68k.org> PR target/46191 diff --git a/libgcc/config/sh/lib1funcs.S b/libgcc/config/sh/lib1funcs.S index 2f0ca16cd91..c5a00db9a20 100644 --- a/libgcc/config/sh/lib1funcs.S +++ b/libgcc/config/sh/lib1funcs.S @@ -1,5 +1,5 @@ /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2009 + 2004, 2005, 2006, 2009, 2012 Free Software Foundation, Inc. This file is free software; you can redistribute it and/or modify it @@ -241,7 +241,7 @@ GLOBAL(ashiftrt_r4_0): ! Entry: ! ! r4: Value to shift -! r5: Shifts +! r5: Shift count ! ! Exit: ! @@ -249,7 +249,7 @@ GLOBAL(ashiftrt_r4_0): ! ! Destroys: ! -! (none) +! T bit, r5 ! .global GLOBAL(ashrsi3) @@ -388,318 +388,353 @@ LOCAL(ashrsi3_0): ! ! GLOBAL(ashlsi3) +! (For compatibility with older binaries, not used by compiler) ! ! Entry: -! -! r4: Value to shift -! r5: Shifts +! r4: Value to shift +! r5: Shift count ! ! Exit: -! -! r0: Result +! r0: Result ! ! Destroys: +! T bit +! ! -! (none) +! GLOBAL(ashlsi3_r0) ! +! Entry: +! r4: Value to shift +! r0: Shift count +! +! Exit: +! r0: Result +! +! Destroys: +! T bit + .global GLOBAL(ashlsi3) + .global GLOBAL(ashlsi3_r0) HIDDEN_FUNC(GLOBAL(ashlsi3)) - .align 2 + HIDDEN_FUNC(GLOBAL(ashlsi3_r0)) GLOBAL(ashlsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(ashlsi3_table),r0 - mov.b @(r0,r5),r5 + mov r5,r0 + .align 2 +GLOBAL(ashlsi3_r0): + #ifdef __sh1__ - add r5,r0 + and #31,r0 + shll2 r0 + mov.l r4,@-r15 + mov r0,r4 + mova LOCAL(ashlsi3_table),r0 + add r4,r0 + mov.l @r15+,r4 jmp @r0 + mov r4,r0 + .align 2 #else - braf r5 -#endif + and #31,r0 + shll2 r0 + braf r0 mov r4,r0 +#endif - .align 2 LOCAL(ashlsi3_table): - .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) - -LOCAL(ashlsi3_6): - shll2 r0 -LOCAL(ashlsi3_4): - shll2 r0 -LOCAL(ashlsi3_2): + rts // << 0 + nop +LOCAL(ashlsi_1): + rts // << 1 + shll r0 +LOCAL(ashlsi_2): // << 2 rts shll2 r0 - -LOCAL(ashlsi3_7): - shll2 r0 -LOCAL(ashlsi3_5): + bra LOCAL(ashlsi_1) // << 3 shll2 r0 -LOCAL(ashlsi3_3): + bra LOCAL(ashlsi_2) // << 4 shll2 r0 -LOCAL(ashlsi3_1): - rts + bra LOCAL(ashlsi_5) // << 5 shll r0 - -LOCAL(ashlsi3_14): - shll2 r0 -LOCAL(ashlsi3_12): + bra LOCAL(ashlsi_6) // << 6 shll2 r0 -LOCAL(ashlsi3_10): - shll2 r0 -LOCAL(ashlsi3_8): + bra LOCAL(ashlsi_7) // << 7 + shll r0 +LOCAL(ashlsi_8): // << 8 rts shll8 r0 - -LOCAL(ashlsi3_15): - shll2 r0 -LOCAL(ashlsi3_13): + bra LOCAL(ashlsi_8) // << 9 + shll r0 + bra LOCAL(ashlsi_8) // << 10 shll2 r0 -LOCAL(ashlsi3_11): + bra LOCAL(ashlsi_11) // << 11 + shll r0 + bra LOCAL(ashlsi_12) // << 12 shll2 r0 -LOCAL(ashlsi3_9): + bra LOCAL(ashlsi_13) // << 13 + shll r0 + bra LOCAL(ashlsi_14) // << 14 + shll8 r0 + bra LOCAL(ashlsi_15) // << 15 shll8 r0 +LOCAL(ashlsi_16): // << 16 rts + shll16 r0 + bra LOCAL(ashlsi_16) // << 17 shll r0 - -LOCAL(ashlsi3_22): - shll2 r0 -LOCAL(ashlsi3_20): + bra LOCAL(ashlsi_16) // << 18 shll2 r0 -LOCAL(ashlsi3_18): + bra LOCAL(ashlsi_19) // << 19 + shll r0 + bra LOCAL(ashlsi_20) // << 20 shll2 r0 -LOCAL(ashlsi3_16): - rts + bra LOCAL(ashlsi_21) // << 21 + shll r0 + bra LOCAL(ashlsi_22) // << 22 shll16 r0 - -LOCAL(ashlsi3_23): - shll2 r0 -LOCAL(ashlsi3_21): + bra LOCAL(ashlsi_23) // << 23 + shll16 r0 + bra LOCAL(ashlsi_16) // << 24 + shll8 r0 + bra LOCAL(ashlsi_25) // << 25 + shll r0 + bra LOCAL(ashlsi_26) // << 26 shll2 r0 -LOCAL(ashlsi3_19): + bra LOCAL(ashlsi_27) // << 27 + shll r0 + bra LOCAL(ashlsi_28) // << 28 shll2 r0 -LOCAL(ashlsi3_17): + bra LOCAL(ashlsi_29) // << 29 + shll16 r0 + bra LOCAL(ashlsi_30) // << 30 shll16 r0 + and #1,r0 // << 31 rts - shll r0 + rotr r0 -LOCAL(ashlsi3_30): - shll2 r0 -LOCAL(ashlsi3_28): +LOCAL(ashlsi_7): shll2 r0 -LOCAL(ashlsi3_26): +LOCAL(ashlsi_5): +LOCAL(ashlsi_6): shll2 r0 -LOCAL(ashlsi3_24): - shll16 r0 rts - shll8 r0 - -LOCAL(ashlsi3_31): +LOCAL(ashlsi_13): shll2 r0 -LOCAL(ashlsi3_29): +LOCAL(ashlsi_12): +LOCAL(ashlsi_11): + shll8 r0 + rts +LOCAL(ashlsi_21): shll2 r0 -LOCAL(ashlsi3_27): +LOCAL(ashlsi_20): +LOCAL(ashlsi_19): + shll16 r0 + rts +LOCAL(ashlsi_28): +LOCAL(ashlsi_27): shll2 r0 -LOCAL(ashlsi3_25): +LOCAL(ashlsi_26): +LOCAL(ashlsi_25): shll16 r0 + rts + shll8 r0 + +LOCAL(ashlsi_22): +LOCAL(ashlsi_14): + shlr2 r0 + rts shll8 r0 + +LOCAL(ashlsi_23): +LOCAL(ashlsi_15): + shlr r0 rts - shll r0 + shll8 r0 -LOCAL(ashlsi3_0): +LOCAL(ashlsi_29): + shlr r0 +LOCAL(ashlsi_30): + shlr2 r0 rts - nop + shll16 r0 ENDFUNC(GLOBAL(ashlsi3)) + ENDFUNC(GLOBAL(ashlsi3_r0)) #endif #ifdef L_lshiftrt ! ! GLOBAL(lshrsi3) +! (For compatibility with older binaries, not used by compiler) ! ! Entry: -! -! r4: Value to shift -! r5: Shifts +! r4: Value to shift +! r5: Shift count ! ! Exit: -! -! r0: Result +! r0: Result ! ! Destroys: +! T bit ! -! (none) ! +! GLOBAL(lshrsi3_r0) +! +! Entry: +! r4: Value to shift +! r0: Shift count +! +! Exit: +! r0: Result +! +! Destroys: +! T bit + .global GLOBAL(lshrsi3) + .global GLOBAL(lshrsi3_r0) HIDDEN_FUNC(GLOBAL(lshrsi3)) - .align 2 + HIDDEN_FUNC(GLOBAL(lshrsi3_r0)) GLOBAL(lshrsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(lshrsi3_table),r0 - mov.b @(r0,r5),r5 + mov r5,r0 + .align 2 +GLOBAL(lshrsi3_r0): + #ifdef __sh1__ - add r5,r0 + and #31,r0 + shll2 r0 + mov.l r4,@-r15 + mov r0,r4 + mova LOCAL(lshrsi3_table),r0 + add r4,r0 + mov.l @r15+,r4 jmp @r0 + mov r4,r0 + .align 2 #else - braf r5 -#endif + and #31,r0 + shll2 r0 + braf r0 mov r4,r0 - - .align 2 +#endif LOCAL(lshrsi3_table): - .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) - -LOCAL(lshrsi3_6): - shlr2 r0 -LOCAL(lshrsi3_4): - shlr2 r0 -LOCAL(lshrsi3_2): + rts // >> 0 + nop +LOCAL(lshrsi_1): // >> 1 + rts + shlr r0 +LOCAL(lshrsi_2): // >> 2 rts shlr2 r0 - -LOCAL(lshrsi3_7): - shlr2 r0 -LOCAL(lshrsi3_5): + bra LOCAL(lshrsi_1) // >> 3 shlr2 r0 -LOCAL(lshrsi3_3): + bra LOCAL(lshrsi_2) // >> 4 shlr2 r0 -LOCAL(lshrsi3_1): - rts + bra LOCAL(lshrsi_5) // >> 5 shlr r0 - -LOCAL(lshrsi3_14): - shlr2 r0 -LOCAL(lshrsi3_12): - shlr2 r0 -LOCAL(lshrsi3_10): + bra LOCAL(lshrsi_6) // >> 6 shlr2 r0 -LOCAL(lshrsi3_8): + bra LOCAL(lshrsi_7) // >> 7 + shlr r0 +LOCAL(lshrsi_8): // >> 8 rts shlr8 r0 - -LOCAL(lshrsi3_15): - shlr2 r0 -LOCAL(lshrsi3_13): + bra LOCAL(lshrsi_8) // >> 9 + shlr r0 + bra LOCAL(lshrsi_8) // >> 10 shlr2 r0 -LOCAL(lshrsi3_11): + bra LOCAL(lshrsi_11) // >> 11 + shlr r0 + bra LOCAL(lshrsi_12) // >> 12 shlr2 r0 -LOCAL(lshrsi3_9): + bra LOCAL(lshrsi_13) // >> 13 + shlr r0 + bra LOCAL(lshrsi_14) // >> 14 shlr8 r0 + bra LOCAL(lshrsi_15) // >> 15 + shlr8 r0 +LOCAL(lshrsi_16): // >> 16 rts + shlr16 r0 + bra LOCAL(lshrsi_16) // >> 17 shlr r0 - -LOCAL(lshrsi3_22): + bra LOCAL(lshrsi_16) // >> 18 shlr2 r0 -LOCAL(lshrsi3_20): - shlr2 r0 -LOCAL(lshrsi3_18): + bra LOCAL(lshrsi_19) // >> 19 + shlr r0 + bra LOCAL(lshrsi_20) // >> 20 shlr2 r0 -LOCAL(lshrsi3_16): - rts + bra LOCAL(lshrsi_21) // >> 21 + shlr r0 + bra LOCAL(lshrsi_22) // >> 22 shlr16 r0 - -LOCAL(lshrsi3_23): - shlr2 r0 -LOCAL(lshrsi3_21): + bra LOCAL(lshrsi_23) // >> 23 + shlr16 r0 + bra LOCAL(lshrsi_16) // >> 24 + shlr8 r0 + bra LOCAL(lshrsi_25) // >> 25 + shlr r0 + bra LOCAL(lshrsi_26) // >> 26 shlr2 r0 -LOCAL(lshrsi3_19): + bra LOCAL(lshrsi_27) // >> 27 + shlr r0 + bra LOCAL(lshrsi_28) // >> 28 shlr2 r0 -LOCAL(lshrsi3_17): + bra LOCAL(lshrsi_29) // >> 29 shlr16 r0 + bra LOCAL(lshrsi_30) // >> 30 + shlr16 r0 + shll r0 // >> 31 rts - shlr r0 + movt r0 -LOCAL(lshrsi3_30): +LOCAL(lshrsi_7): shlr2 r0 -LOCAL(lshrsi3_28): +LOCAL(lshrsi_5): +LOCAL(lshrsi_6): shlr2 r0 -LOCAL(lshrsi3_26): - shlr2 r0 -LOCAL(lshrsi3_24): - shlr16 r0 rts - shlr8 r0 - -LOCAL(lshrsi3_31): +LOCAL(lshrsi_13): shlr2 r0 -LOCAL(lshrsi3_29): +LOCAL(lshrsi_12): +LOCAL(lshrsi_11): + shlr8 r0 + rts +LOCAL(lshrsi_21): shlr2 r0 -LOCAL(lshrsi3_27): +LOCAL(lshrsi_20): +LOCAL(lshrsi_19): + shlr16 r0 + rts +LOCAL(lshrsi_28): +LOCAL(lshrsi_27): shlr2 r0 -LOCAL(lshrsi3_25): +LOCAL(lshrsi_26): +LOCAL(lshrsi_25): shlr16 r0 + rts + shlr8 r0 + +LOCAL(lshrsi_22): +LOCAL(lshrsi_14): + shll2 r0 + rts shlr8 r0 + +LOCAL(lshrsi_23): +LOCAL(lshrsi_15): + shll r0 rts - shlr r0 + shlr8 r0 -LOCAL(lshrsi3_0): +LOCAL(lshrsi_29): + shll r0 +LOCAL(lshrsi_30): + shll2 r0 rts - nop + shlr16 r0 ENDFUNC(GLOBAL(lshrsi3)) + ENDFUNC(GLOBAL(lshrsi3_r0)) #endif #ifdef L_movmem |