summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/i386/i386.md55
-rw-r--r--gcc/config/i386/x86-tune.def2
3 files changed, 63 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7ff5bd22948..e0b30a373eb 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2013-10-01 Wei Mi <wmi@google.com>
+
+ * config/i386/x86-tune.def (DEF_TUNE): Remove
+ m_CORE_ALL.
+ * config/i386/i386.md: Add define_peephole2 to
+ break partial reg stall for cvtss2sd/cvtsd2ss.
+
2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
* config/arc/arc.c (pass_arc_ifcvt::clone):
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 03b38426c4b..7368719d313 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5117,6 +5117,61 @@
emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
})
+;; Break partial reg stall for cvtsd2ss.
+
+(define_peephole2
+ [(set (match_operand:SF 0 "register_operand")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ && optimize_function_for_speed_p (cfun)
+ && SSE_REG_P (operands[0])
+ && (!SSE_REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float_truncate:V2SF
+ (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ operands[0] = simplify_gen_subreg (V4SFmode, operands[0],
+ SFmode, 0);
+ operands[1] = simplify_gen_subreg (V2DFmode, operands[1],
+ DFmode, 0);
+ emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
+})
+
+;; Break partial reg stall for cvtss2sd.
+
+(define_peephole2
+ [(set (match_operand:DF 0 "register_operand")
+ (float_extend:DF
+ (match_operand:SF 1 "nonimmediate_operand")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ && optimize_function_for_speed_p (cfun)
+ && SSE_REG_P (operands[0])
+ && (!SSE_REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))"
+ [(set (match_dup 0)
+ (vec_merge:V2DF
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_dup 1)
+ (parallel [(const_int 0) (const_int 1)])))
+ (match_dup 0)
+ (const_int 1)))]
+{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0],
+ DFmode, 0);
+ operands[1] = simplify_gen_subreg (V4SFmode, operands[1],
+ SFmode, 0);
+ emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
+})
+
;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index c3cf00f53b4..6b0a593ddda 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -346,7 +346,7 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
from FP to FP. This form of instructions avoids partial write to the
destination. */
DEF_TUNE (X86_TUNE_USE_VECTOR_FP_CONVERTS, "use_vector_fp_converts",
- m_CORE_ALL | m_AMDFAM10 | m_GENERIC)
+ m_AMDFAM10 | m_GENERIC)
/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
from integer to FP. */