diff options
author | bernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-04-12 13:39:35 +0000 |
---|---|---|
committer | bernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-04-12 13:39:35 +0000 |
commit | 6daa377bb42d13707b5e954f1f217a58a2c25e6b (patch) | |
tree | b13beefca85f131d4dfe6524d6ccf3324a44b684 /gcc/config/bfin | |
parent | c9ddd4139a990ca0028b995f79b63ca2f3b89441 (diff) | |
download | gcc-6daa377bb42d13707b5e954f1f217a58a2c25e6b.tar.gz |
* config/bfin/lib1funcs.asm (___umulsi3_highpart, __smulsi3_highpart):
Use a more efficient implementation.
* config/bfin/bfin.md (umulsi3_highpart, smulsi3_highpart): Emit
inline sequences when not optimizing for size.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@123748 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/bfin')
-rw-r--r-- | gcc/config/bfin/bfin.md | 120 | ||||
-rw-r--r-- | gcc/config/bfin/lib1funcs.asm | 42 |
2 files changed, 102 insertions, 60 deletions
diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md index e1eeaa6b96f..ed0da5a3732 100644 --- a/gcc/config/bfin/bfin.md +++ b/gcc/config/bfin/bfin.md @@ -1451,42 +1451,102 @@ [(set_attr "type" "mult")]) (define_expand "umulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "") - (truncate:SI - (lshiftrt:DI - (mult:DI (zero_extend:DI - (match_operand:SI 1 "nonimmediate_operand" "")) - (zero_extend:DI - (match_operand:SI 2 "register_operand" ""))) - (const_int 32))))] - "" -{ - rtx umulsi3_highpart_libfunc - = init_one_libfunc ("__umulsi3_highpart"); + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (zero_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (reg:PDI REG_A0)) + (clobber (reg:PDI REG_A1))])] + "" +{ + if (!optimize_size) + { + rtx a1reg = gen_rtx_REG (PDImode, REG_A1); + rtx a0reg = gen_rtx_REG (PDImode, REG_A0); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, + gen_lowpart (V2HImode, operands[1]), + gen_lowpart (V2HImode, operands[2]), + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_FU), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_flag_machi_parts_acconly (a1reg, + gen_lowpart (V2HImode, operands[2]), + gen_lowpart (V2HImode, operands[1]), + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg)); + } + else + { + rtx umulsi3_highpart_libfunc + = init_one_libfunc ("__umulsi3_highpart"); - emit_library_call_value (umulsi3_highpart_libfunc, - operands[0], LCT_NORMAL, SImode, - 2, operands[1], SImode, operands[2], SImode); + emit_library_call_value (umulsi3_highpart_libfunc, + operands[0], LCT_NORMAL, SImode, + 2, operands[1], SImode, operands[2], SImode); + } DONE; }) (define_expand "smulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "") - (truncate:SI - (lshiftrt:DI - (mult:DI (sign_extend:DI - (match_operand:SI 1 "nonimmediate_operand" "")) - (sign_extend:DI - (match_operand:SI 2 "register_operand" ""))) - (const_int 32))))] - "" -{ - rtx smulsi3_highpart_libfunc - = init_one_libfunc ("__smulsi3_highpart"); + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (reg:PDI REG_A0)) + (clobber (reg:PDI REG_A1))])] + "" +{ + if (!optimize_size) + { + rtx a1reg = gen_rtx_REG (PDImode, REG_A1); + rtx a0reg = gen_rtx_REG (PDImode, REG_A0); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, + gen_lowpart (V2HImode, operands[1]), + gen_lowpart (V2HImode, operands[2]), + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_IS), + GEN_INT (MACFLAG_IS_M))); + emit_insn (gen_flag_machi_parts_acconly (a1reg, + gen_lowpart (V2HImode, operands[2]), + gen_lowpart (V2HImode, operands[1]), + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_IS_M))); + emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg)); + } + else + { + rtx smulsi3_highpart_libfunc + = init_one_libfunc ("__smulsi3_highpart"); - emit_library_call_value (smulsi3_highpart_libfunc, - operands[0], LCT_NORMAL, SImode, - 2, operands[1], SImode, operands[2], SImode); + emit_library_call_value (smulsi3_highpart_libfunc, + operands[0], LCT_NORMAL, SImode, + 2, operands[1], SImode, operands[2], SImode); + } DONE; }) diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm index 1d2db9beb3e..fe4c3d53eb5 100644 --- a/gcc/config/bfin/lib1funcs.asm +++ b/gcc/config/bfin/lib1funcs.asm @@ -123,17 +123,12 @@ ___umodsi3: .type ___umulsi3_highpart, STT_FUNC; ___umulsi3_highpart: - R2 = R1.H * R0.H, R3 = R1.L * R0.H (FU); - R0 = R1.L * R0.L, R1 = R1.H * R0.L (FU); - R0 >>= 16; - /* Unsigned multiplication has the nice property that we can - ignore carry on this first addition. */ - R0 = R0 + R3; - R0 = R0 + R1; - cc = ac0; - R1 = cc; - R1 = PACK(R1.l,R0.h); - R0 = R1 + R2; + A1 = R1.L * R0.L (FU); + A1 = A1 >> 16; + A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU); + A1 += R0.L * R1.H (FU); + A1 = A1 >> 16; + R0 = (A0 += A1); RTS; #endif @@ -143,24 +138,11 @@ ___umulsi3_highpart: .type ___smulsi3_highpart, STT_FUNC; ___smulsi3_highpart: - R2 = R1.L * R0.L (FU); - R3 = R1.H * R0.L (IS,M); - R0 = R0.H * R1.H, R1 = R0.H * R1.L (IS,M); - - R1.L = R2.H + R1.L; - cc = ac0; - R2 = cc; - - R1.L = R1.L + R3.L; - cc = ac0; - R1 >>>= 16; - R3 >>>= 16; - R1 = R1 + R3; - R1 = R1 + R2; - R2 = cc; - R1 = R1 + R2; - - R0 = R0 + R1; + A1 = R1.L * R0.L (FU); + A1 = A1 >> 16; + A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M); + A1 += R1.H * R0.L (IS,M); + A1 = A1 >>> 16; + R0 = (A0 += A1); RTS; #endif - |