summaryrefslogtreecommitdiff
path: root/gcc/config/bfin
diff options
context:
space:
mode:
authorbernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4>2007-04-12 13:39:35 +0000
committerbernds <bernds@138bc75d-0d04-0410-961f-82ee72b054a4>2007-04-12 13:39:35 +0000
commit6daa377bb42d13707b5e954f1f217a58a2c25e6b (patch)
treeb13beefca85f131d4dfe6524d6ccf3324a44b684 /gcc/config/bfin
parentc9ddd4139a990ca0028b995f79b63ca2f3b89441 (diff)
downloadgcc-6daa377bb42d13707b5e954f1f217a58a2c25e6b.tar.gz
* config/bfin/lib1funcs.asm (___umulsi3_highpart, __smulsi3_highpart):
Use a more efficient implementation. * config/bfin/bfin.md (umulsi3_highpart, smulsi3_highpart): Emit inline sequences when not optimizing for size. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@123748 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/bfin')
-rw-r--r--gcc/config/bfin/bfin.md120
-rw-r--r--gcc/config/bfin/lib1funcs.asm42
2 files changed, 102 insertions, 60 deletions
diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md
index e1eeaa6b96f..ed0da5a3732 100644
--- a/gcc/config/bfin/bfin.md
+++ b/gcc/config/bfin/bfin.md
@@ -1451,42 +1451,102 @@
[(set_attr "type" "mult")])
(define_expand "umulsi3_highpart"
- [(set (match_operand:SI 0 "register_operand" "")
- (truncate:SI
- (lshiftrt:DI
- (mult:DI (zero_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" ""))
- (zero_extend:DI
- (match_operand:SI 2 "register_operand" "")))
- (const_int 32))))]
- ""
-{
- rtx umulsi3_highpart_libfunc
- = init_one_libfunc ("__umulsi3_highpart");
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "")))
+ (const_int 32))))
+ (clobber (reg:PDI REG_A0))
+ (clobber (reg:PDI REG_A1))])]
+ ""
+{
+ if (!optimize_size)
+ {
+ rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
+ rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
+ emit_insn (gen_flag_macinit1hi (a1reg,
+ gen_lowpart (HImode, operands[1]),
+ gen_lowpart (HImode, operands[2]),
+ GEN_INT (MACFLAG_FU)));
+ emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+ emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
+ gen_lowpart (V2HImode, operands[1]),
+ gen_lowpart (V2HImode, operands[2]),
+ const1_rtx, const1_rtx,
+ const1_rtx, const0_rtx, a1reg,
+ const0_rtx, GEN_INT (MACFLAG_FU),
+ GEN_INT (MACFLAG_FU)));
+ emit_insn (gen_flag_machi_parts_acconly (a1reg,
+ gen_lowpart (V2HImode, operands[2]),
+ gen_lowpart (V2HImode, operands[1]),
+ const1_rtx, const0_rtx,
+ a1reg, const0_rtx, GEN_INT (MACFLAG_FU)));
+ emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+ emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg));
+ }
+ else
+ {
+ rtx umulsi3_highpart_libfunc
+ = init_one_libfunc ("__umulsi3_highpart");
- emit_library_call_value (umulsi3_highpart_libfunc,
- operands[0], LCT_NORMAL, SImode,
- 2, operands[1], SImode, operands[2], SImode);
+ emit_library_call_value (umulsi3_highpart_libfunc,
+ operands[0], LCT_NORMAL, SImode,
+ 2, operands[1], SImode, operands[2], SImode);
+ }
DONE;
})
(define_expand "smulsi3_highpart"
- [(set (match_operand:SI 0 "register_operand" "")
- (truncate:SI
- (lshiftrt:DI
- (mult:DI (sign_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" ""))
- (sign_extend:DI
- (match_operand:SI 2 "register_operand" "")))
- (const_int 32))))]
- ""
-{
- rtx smulsi3_highpart_libfunc
- = init_one_libfunc ("__smulsi3_highpart");
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (sign_extend:DI
+ (match_operand:SI 2 "register_operand" "")))
+ (const_int 32))))
+ (clobber (reg:PDI REG_A0))
+ (clobber (reg:PDI REG_A1))])]
+ ""
+{
+ if (!optimize_size)
+ {
+ rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
+ rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
+ emit_insn (gen_flag_macinit1hi (a1reg,
+ gen_lowpart (HImode, operands[1]),
+ gen_lowpart (HImode, operands[2]),
+ GEN_INT (MACFLAG_FU)));
+ emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+ emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
+ gen_lowpart (V2HImode, operands[1]),
+ gen_lowpart (V2HImode, operands[2]),
+ const1_rtx, const1_rtx,
+ const1_rtx, const0_rtx, a1reg,
+ const0_rtx, GEN_INT (MACFLAG_IS),
+ GEN_INT (MACFLAG_IS_M)));
+ emit_insn (gen_flag_machi_parts_acconly (a1reg,
+ gen_lowpart (V2HImode, operands[2]),
+ gen_lowpart (V2HImode, operands[1]),
+ const1_rtx, const0_rtx,
+ a1reg, const0_rtx, GEN_INT (MACFLAG_IS_M)));
+ emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (16)));
+ emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg));
+ }
+ else
+ {
+ rtx smulsi3_highpart_libfunc
+ = init_one_libfunc ("__smulsi3_highpart");
- emit_library_call_value (smulsi3_highpart_libfunc,
- operands[0], LCT_NORMAL, SImode,
- 2, operands[1], SImode, operands[2], SImode);
+ emit_library_call_value (smulsi3_highpart_libfunc,
+ operands[0], LCT_NORMAL, SImode,
+ 2, operands[1], SImode, operands[2], SImode);
+ }
DONE;
})
diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm
index 1d2db9beb3e..fe4c3d53eb5 100644
--- a/gcc/config/bfin/lib1funcs.asm
+++ b/gcc/config/bfin/lib1funcs.asm
@@ -123,17 +123,12 @@ ___umodsi3:
.type ___umulsi3_highpart, STT_FUNC;
___umulsi3_highpart:
- R2 = R1.H * R0.H, R3 = R1.L * R0.H (FU);
- R0 = R1.L * R0.L, R1 = R1.H * R0.L (FU);
- R0 >>= 16;
- /* Unsigned multiplication has the nice property that we can
- ignore carry on this first addition. */
- R0 = R0 + R3;
- R0 = R0 + R1;
- cc = ac0;
- R1 = cc;
- R1 = PACK(R1.l,R0.h);
- R0 = R1 + R2;
+ A1 = R1.L * R0.L (FU);
+ A1 = A1 >> 16;
+ A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
+ A1 += R0.L * R1.H (FU);
+ A1 = A1 >> 16;
+ R0 = (A0 += A1);
RTS;
#endif
@@ -143,24 +138,11 @@ ___umulsi3_highpart:
.type ___smulsi3_highpart, STT_FUNC;
___smulsi3_highpart:
- R2 = R1.L * R0.L (FU);
- R3 = R1.H * R0.L (IS,M);
- R0 = R0.H * R1.H, R1 = R0.H * R1.L (IS,M);
-
- R1.L = R2.H + R1.L;
- cc = ac0;
- R2 = cc;
-
- R1.L = R1.L + R3.L;
- cc = ac0;
- R1 >>>= 16;
- R3 >>>= 16;
- R1 = R1 + R3;
- R1 = R1 + R2;
- R2 = cc;
- R1 = R1 + R2;
-
- R0 = R0 + R1;
+ A1 = R1.L * R0.L (FU);
+ A1 = A1 >> 16;
+ A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
+ A1 += R1.H * R0.L (IS,M);
+ A1 = A1 >>> 16;
+ R0 = (A0 += A1);
RTS;
#endif
-