summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authordorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2005-06-18 13:18:52 +0000
committerdorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2005-06-18 13:18:52 +0000
commitea8f3370b1692d96d1289a6ed7a757fd5b0685bd (patch)
tree76522a399271e0448291fb8b63bbbf3f898dc165 /gcc
parentb5a8f2016cff1d18bb6c8ffc411929756d628c0d (diff)
downloadgcc-ea8f3370b1692d96d1289a6ed7a757fd5b0685bd.tar.gz
* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): New
tree-codes. * optabs.h (OTI_reduc_smax, OTI_reduc_umax, OTI_reduc_smin, OTI_reduc_umin, OTI_reduc_plus): New optabs for reduction. (reduc_smax_optab, reduc_umax_optab, reduc_smin_optab, reduc_umin_optab, reduc_plus_optab): New optabs for reduction. * expr.c (expand_expr_real_1): Handle new tree-codes. * tree-inline.c (estimate_num_insns_1): Handle new tree-codes. * tree-pretty-print.c (dump_generic_node, op_prio, op_symbol): Handle new tree-codes. * optabs.c (optab_for_tree_code): Handle new tree-codes. (init_optabs): Initialize new optabs. * genopinit.c (optabs): Define handlers for new optabs. * tree-vect-analyze.c (vect_analyze_operations): Fail vectorization in case of a phi that is marked as relevant. Call vectorizable_reduction. (vect_mark_relevant): Phis may be marked as relevant. (vect_mark_stmts_to_be_vectorized): The use corresponding to the reduction variable in a reduction stmt does not mark its defining phi as relevant. Update documentation accordingly. (vect_can_advance_ivs_p): Skip reduction phis. * tree-vect-transform.c (vect_get_vec_def_for_operand): Takes additional argument. Handle reduction. (vect_create_destination_var): Update call to vect_get_new_vect_var. Handle non-vector argument. (get_initial_def_for_reduction): New function. (vect_create_epilog_for_reduction): New function. (vectorizable_reduction): New function. (vect_get_new_vect_var): Handle new vect_var_kind. (vectorizable_assignment, vectorizable_operation, vectorizable_store, vectorizable_condition): Update call to vect_get_new_vect_var. (vect_transform_stmt): Call vectorizable_reduction. (vect_update_ivs_after_vectorizer): Skip reduction phis. (vect_transform_loop): Skip if stmt is both not relevant and not live. * tree-vectorizer.c (reduction_code_for_scalar_code): New function. (vect_is_simple_reduction): Was empty - added implementation. * tree-vectorizer.h (vect_scalar_var): New enum vect_var_kind value. (reduc_vec_info_type): New enum vect_def_type value. * config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf, reduc_umax_v4si, reduc_smin_v4si, reduc_umin_v4sf, reduc_smin_v4sf, reduc_plus_v4si, reduc_plus_v4sf): New define_expands. * tree-vect-analyze.c (vect_determine_vectorization_factor): Remove ENABLE_CHECKING around gcc_assert. * tree-vect-transform.c (vect_do_peeling_for_loop_bound, (vect_do_peeling_for_alignment, vect_transform_loop, vect_get_vec_def_for_operand): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@101155 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog50
-rw-r--r--gcc/config/rs6000/altivec.md154
-rw-r--r--gcc/expr.c10
-rw-r--r--gcc/genopinit.c7
-rw-r--r--gcc/optabs.c15
-rw-r--r--gcc/optabs.h13
-rw-r--r--gcc/testsuite/ChangeLog8
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-1.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-2.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-3.c4
-rw-r--r--gcc/testsuite/lib/target-supports.exp17
-rw-r--r--gcc/tree-inline.c4
-rw-r--r--gcc/tree-pretty-print.c24
-rw-r--r--gcc/tree-vect-analyze.c126
-rw-r--r--gcc/tree-vect-transform.c566
-rw-r--r--gcc/tree-vectorizer.c225
-rw-r--r--gcc/tree-vectorizer.h9
-rw-r--r--gcc/tree.def10
18 files changed, 1149 insertions, 101 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8ebd8004845..73986314282 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,53 @@
+2005-06-19 Dorit Nuzman <dorit@il.ibm.com>
+
+ * tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): New
+ tree-codes.
+ * optabs.h (OTI_reduc_smax, OTI_reduc_umax, OTI_reduc_smin,
+ OTI_reduc_umin, OTI_reduc_plus): New optabs for reduction.
+ (reduc_smax_optab, reduc_umax_optab, reduc_smin_optab, reduc_umin_optab,
+ reduc_plus_optab): New optabs for reduction.
+ * expr.c (expand_expr_real_1): Handle new tree-codes.
+ * tree-inline.c (estimate_num_insns_1): Handle new tree-codes.
+ * tree-pretty-print.c (dump_generic_node, op_prio, op_symbol): Handle
+ new tree-codes.
+ * optabs.c (optab_for_tree_code): Handle new tree-codes.
+ (init_optabs): Initialize new optabs.
+ * genopinit.c (optabs): Define handlers for new optabs.
+
+ * tree-vect-analyze.c (vect_analyze_operations): Fail vectorization in
+ case of a phi that is marked as relevant. Call vectorizable_reduction.
+ (vect_mark_relevant): Phis may be marked as relevant.
+ (vect_mark_stmts_to_be_vectorized): The use corresponding to the
+ reduction variable in a reduction stmt does not mark its defining phi
+ as relevant. Update documentation accordingly.
+ (vect_can_advance_ivs_p): Skip reduction phis.
+ * tree-vect-transform.c (vect_get_vec_def_for_operand): Takes
+ additional argument. Handle reduction.
+ (vect_create_destination_var): Update call to vect_get_new_vect_var.
+ Handle non-vector argument.
+ (get_initial_def_for_reduction): New function.
+ (vect_create_epilog_for_reduction): New function.
+ (vectorizable_reduction): New function.
+ (vect_get_new_vect_var): Handle new vect_var_kind.
+ (vectorizable_assignment, vectorizable_operation, vectorizable_store,
+ vectorizable_condition): Update call to vect_get_new_vect_var.
+ (vect_transform_stmt): Call vectorizable_reduction.
+ (vect_update_ivs_after_vectorizer): Skip reduction phis.
+ (vect_transform_loop): Skip if stmt is both not relevant and not live.
+ * tree-vectorizer.c (reduction_code_for_scalar_code): New function.
+ (vect_is_simple_reduction): Was empty - added implementation.
+ * tree-vectorizer.h (vect_scalar_var): New enum vect_var_kind value.
+ (reduc_vec_info_type): New enum vect_def_type value.
+ * config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf,
+ reduc_umax_v4si, reduc_smin_v4si, reduc_umin_v4sf, reduc_smin_v4sf,
+ reduc_plus_v4si, reduc_plus_v4sf): New define_expands.
+
+ * tree-vect-analyze.c (vect_determine_vectorization_factor): Remove
+ ENABLE_CHECKING around gcc_assert.
+ * tree-vect-transform.c (vect_do_peeling_for_loop_bound,
+ (vect_do_peeling_for_alignment, vect_transform_loop,
+ vect_get_vec_def_for_operand): Likewise.
+
2005-06-18 Joseph S. Myers <joseph@codesourcery.com>
* config/ia64/ia64.c (ia64_function_arg): Set up a PARALLEL for a
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 847812dd4d6..3b2044783c2 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1825,6 +1825,160 @@
operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
})
+;; Reduction
+
+(define_expand "reduc_smax_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx vtmp1 = gen_reg_rtx (V4SImode);
+ rtx vtmp2 = gen_reg_rtx (V4SImode);
+ rtx vtmp3 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
+ gen_rtx_CONST_INT (SImode, 8)));
+ emit_insn (gen_smaxv4si3 (vtmp2, operands[1], vtmp1));
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
+ gen_rtx_CONST_INT (SImode, 4)));
+ emit_insn (gen_smaxv4si3 (operands[0], vtmp2, vtmp3));
+ DONE;
+}")
+
+(define_expand "reduc_smax_v4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx vtmp1 = gen_reg_rtx (V4SFmode);
+ rtx vtmp2 = gen_reg_rtx (V4SFmode);
+ rtx vtmp3 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
+ gen_rtx_CONST_INT (SImode, 8)));
+ emit_insn (gen_smaxv4sf3 (vtmp2, operands[1], vtmp1));
+ emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
+ gen_rtx_CONST_INT (SImode, 4)));
+ emit_insn (gen_smaxv4sf3 (operands[0], vtmp2, vtmp3));
+ DONE;
+}")
+
+(define_expand "reduc_umax_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx vtmp1 = gen_reg_rtx (V4SImode);
+ rtx vtmp2 = gen_reg_rtx (V4SImode);
+ rtx vtmp3 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
+ gen_rtx_CONST_INT (SImode, 8)));
+ emit_insn (gen_umaxv4si3 (vtmp2, operands[1], vtmp1));
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
+ gen_rtx_CONST_INT (SImode, 4)));
+ emit_insn (gen_umaxv4si3 (operands[0], vtmp2, vtmp3));
+ DONE;
+}")
+
+(define_expand "reduc_smin_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx vtmp1 = gen_reg_rtx (V4SImode);
+ rtx vtmp2 = gen_reg_rtx (V4SImode);
+ rtx vtmp3 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
+ gen_rtx_CONST_INT (SImode, 8)));
+ emit_insn (gen_sminv4si3 (vtmp2, operands[1], vtmp1));
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
+ gen_rtx_CONST_INT (SImode, 4)));
+ emit_insn (gen_sminv4si3 (operands[0], vtmp2, vtmp3));
+ DONE;
+}")
+
+(define_expand "reduc_smin_v4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx vtmp1 = gen_reg_rtx (V4SFmode);
+ rtx vtmp2 = gen_reg_rtx (V4SFmode);
+ rtx vtmp3 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
+ gen_rtx_CONST_INT (SImode, 8)));
+ emit_insn (gen_sminv4sf3 (vtmp2, operands[1], vtmp1));
+ emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
+ gen_rtx_CONST_INT (SImode, 4)));
+ emit_insn (gen_sminv4sf3 (operands[0], vtmp2, vtmp3));
+ DONE;
+}")
+
+(define_expand "reduc_umin_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx vtmp1 = gen_reg_rtx (V4SImode);
+ rtx vtmp2 = gen_reg_rtx (V4SImode);
+ rtx vtmp3 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
+ gen_rtx_CONST_INT (SImode, 8)));
+ emit_insn (gen_uminv4si3 (vtmp2, operands[1], vtmp1));
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
+ gen_rtx_CONST_INT (SImode, 4)));
+ emit_insn (gen_uminv4si3 (operands[0], vtmp2, vtmp3));
+ DONE;
+}")
+
+(define_expand "reduc_plus_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx vtmp1 = gen_reg_rtx (V4SImode);
+ rtx vtmp2 = gen_reg_rtx (V4SImode);
+ rtx vtmp3 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
+ gen_rtx_CONST_INT (SImode, 8)));
+ emit_insn (gen_addv4si3 (vtmp2, operands[1], vtmp1));
+ emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
+ gen_rtx_CONST_INT (SImode, 4)));
+ emit_insn (gen_addv4si3 (operands[0], vtmp2, vtmp3));
+ DONE;
+}")
+
+(define_expand "reduc_plus_v4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx vtmp1 = gen_reg_rtx (V4SFmode);
+ rtx vtmp2 = gen_reg_rtx (V4SFmode);
+ rtx vtmp3 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
+ gen_rtx_CONST_INT (SImode, 8)));
+ emit_insn (gen_addv4sf3 (vtmp2, operands[1], vtmp1));
+ emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
+ gen_rtx_CONST_INT (SImode, 4)));
+ emit_insn (gen_addv4sf3 (operands[0], vtmp2, vtmp3));
+ DONE;
+}")
+
(define_insn "vec_realign_load_v4sf"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")
diff --git a/gcc/expr.c b/gcc/expr.c
index 8b75ed7c605..ac500b5d52c 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8356,6 +8356,16 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
return temp;
}
+ case REDUC_MAX_EXPR:
+ case REDUC_MIN_EXPR:
+ case REDUC_PLUS_EXPR:
+ {
+ op0 = expand_expr (TREE_OPERAND (exp, 0), NULL_RTX, VOIDmode, 0);
+ this_optab = optab_for_tree_code (code, type);
+ temp = expand_unop (mode, this_optab, op0, target, unsignedp);
+ gcc_assert (temp);
+ return temp;
+ }
default:
return lang_hooks.expand_expr (exp, original_target, tmode,
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index d58f8811812..eea084d22d5 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -198,7 +198,12 @@ static const char * const optabs[] =
"vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)",
"vec_realign_load_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_load_$a$)",
"vcond_gen_code[$A] = CODE_FOR_$(vcond$a$)",
- "vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)"
+ "vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)",
+ "reduc_smax_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_smax_$a$)",
+ "reduc_umax_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umax_$a$)",
+ "reduc_smin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_smin_$a$)",
+ "reduc_umin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umin_$a$)",
+ "reduc_plus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_plus_$a$)"
};
static void gen_insn (rtx);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 2e84ac3a0b8..cd4f2cbe6d0 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -294,6 +294,15 @@ optab_for_tree_code (enum tree_code code, tree type)
case REALIGN_LOAD_EXPR:
return vec_realign_load_optab;
+ case REDUC_MAX_EXPR:
+ return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
+
+ case REDUC_MIN_EXPR:
+ return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab;
+
+ case REDUC_PLUS_EXPR:
+ return reduc_plus_optab;
+
default:
break;
}
@@ -5061,6 +5070,12 @@ init_optabs (void)
cstore_optab = init_optab (UNKNOWN);
push_optab = init_optab (UNKNOWN);
+ reduc_smax_optab = init_optab (UNKNOWN);
+ reduc_umax_optab = init_optab (UNKNOWN);
+ reduc_smin_optab = init_optab (UNKNOWN);
+ reduc_umin_optab = init_optab (UNKNOWN);
+ reduc_plus_optab = init_optab (UNKNOWN);
+
vec_extract_optab = init_optab (UNKNOWN);
vec_set_optab = init_optab (UNKNOWN);
vec_init_optab = init_optab (UNKNOWN);
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 1426e570fb9..2495fedef0b 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -231,6 +231,13 @@ enum optab_index
/* Conditional add instruction. */
OTI_addcc,
+ /* Reduction operations on a vector operand. */
+ OTI_reduc_smax,
+ OTI_reduc_umax,
+ OTI_reduc_smin,
+ OTI_reduc_umin,
+ OTI_reduc_plus,
+
/* Set specified field of vector operand. */
OTI_vec_set,
/* Extract specified field of vector operand. */
@@ -347,6 +354,12 @@ extern GTY(()) optab optab_table[OTI_MAX];
#define push_optab (optab_table[OTI_push])
#define addcc_optab (optab_table[OTI_addcc])
+#define reduc_smax_optab (optab_table[OTI_reduc_smax])
+#define reduc_umax_optab (optab_table[OTI_reduc_umax])
+#define reduc_smin_optab (optab_table[OTI_reduc_smin])
+#define reduc_umin_optab (optab_table[OTI_reduc_umin])
+#define reduc_plus_optab (optab_table[OTI_reduc_plus])
+
#define vec_set_optab (optab_table[OTI_vec_set])
#define vec_extract_optab (optab_table[OTI_vec_extract])
#define vec_init_optab (optab_table[OTI_vec_init])
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 992b1760e14..375bd449df3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2005-06-19 Dorit Nuzman <dorit@il.ibm.com>
+
+ * lib/target-supports.exp (check_effective_target_vect_reduction): New.
+ * gcc.dg/vect/vect-reduc-1.c: Now vectorizable for vect_reduction
+ targets.
+ * gcc.dg/vect/vect-reduc-2.c: Likewise.
+ * gcc.dg/vect/vect-reduc-3.c: Likewise.
+
2005-06-18 Joseph S. Myers <joseph@codesourcery.com>
* gcc.target/ia64/float80-varargs-1.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
index 1c3d555f5d4..9fce8b3e1a3 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
@@ -7,7 +7,6 @@
#define DIFF 242
/* Test vectorization of reduction of unsigned-int. */
-/* Not supported yet. */
int main1 (unsigned int x, unsigned int max_result)
{
@@ -52,5 +51,4 @@ int main (void)
return main1 (0, 15);
}
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "not vectorized: unsupported use in stmt." 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c
index fd5e94b5004..38693539f33 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c
@@ -8,7 +8,6 @@
#define DIFF 242
/* Test vectorization of reduction of signed-int. */
-/* Not supported yet. */
int main1 (int x, int max_result)
{
@@ -50,5 +49,4 @@ int main (void)
return main1 (0, 15);
}
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "not vectorized: unsupported use in stmt." 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c
index 86fbc4bdec1..398d9cdb34b 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c
@@ -8,7 +8,6 @@
/* Test vectorization of reduction of unsigned-int in the presence
of unknown-loop-bound. */
-/* Not supported yet. */
int main1 (int n)
{
@@ -37,5 +36,4 @@ int main (void)
return main1 (N-1);
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "not vectorized: unsupported use in stmt." 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail {! vect_reduction} } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 03781699775..4facec651e8 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -988,6 +988,23 @@ proc check_effective_target_vect_int_mult { } {
return $et_vect_int_mult_saved
}
+# Return 1 if the target supports vector reduction
+
+proc check_effective_target_vect_reduction { } {
+ global et_vect_reduction_saved
+
+ if [info exists et_vect_reduction_saved] {
+ verbose "check_effective_target_vect_reduction: using cached result" 2
+ } else {
+ set et_vect_reduction_saved 0
+ if { [istarget powerpc*-*-*] } {
+ set et_vect_reduction_saved 1
+ }
+ }
+ verbose "check_effective_target_vect_reduction: returning $et_vect_reduction_saved" 2
+ return $et_vect_reduction_saved
+}
+
# Return 1 if the target supports atomic operations on "int" and "long".
proc check_effective_target_sync_int_long { } {
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index db4b1e91306..7fa43507630 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -1736,6 +1736,10 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
case REALIGN_LOAD_EXPR:
+ case REDUC_MAX_EXPR:
+ case REDUC_MIN_EXPR:
+ case REDUC_PLUS_EXPR:
+
case RESX_EXPR:
*count += 1;
break;
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index a079f3efb2a..1922be7f4d3 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1535,6 +1535,24 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
pp_string (buffer, " > ");
break;
+ case REDUC_MAX_EXPR:
+ pp_string (buffer, " REDUC_MAX_EXPR < ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, " > ");
+ break;
+
+ case REDUC_MIN_EXPR:
+ pp_string (buffer, " REDUC_MIN_EXPR < ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, " > ");
+ break;
+
+ case REDUC_PLUS_EXPR:
+ pp_string (buffer, " REDUC_PLUS_EXPR < ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, " > ");
+ break;
+
default:
NIY;
}
@@ -1817,6 +1835,9 @@ op_prio (tree op)
case ABS_EXPR:
case REALPART_EXPR:
case IMAGPART_EXPR:
+ case REDUC_MAX_EXPR:
+ case REDUC_MIN_EXPR:
+ case REDUC_PLUS_EXPR:
return 16;
case SAVE_EXPR:
@@ -1907,6 +1928,9 @@ op_symbol (tree op)
case PLUS_EXPR:
return "+";
+ case REDUC_PLUS_EXPR:
+ return "r+";
+
case NEGATE_EXPR:
case MINUS_EXPR:
return "-";
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c
index da033c82874..1cca9e84c07 100644
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -413,10 +413,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
else
vectorization_factor = nunits;
-#ifdef ENABLE_CHECKING
gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
* vectorization_factor == UNITS_PER_SIMD_WORD);
-#endif
}
}
@@ -483,8 +481,16 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
return false;
}
- gcc_assert (!STMT_VINFO_RELEVANT_P (stmt_info));
- }
+ if (STMT_VINFO_RELEVANT_P (stmt_info))
+ {
+ /* Most likely a reduction-like computation that is used
+ in the loop. */
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
+ LOOP_LOC (loop_vinfo)))
+ fprintf (vect_dump, "not vectorized: unsupported pattern.");
+ return false;
+ }
+ }
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{
@@ -541,7 +547,12 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
if (STMT_VINFO_LIVE_P (stmt_info))
{
- ok = vectorizable_live_operation (stmt, NULL, NULL);
+ ok = vectorizable_reduction (stmt, NULL, NULL);
+
+ if (ok)
+ need_to_vectorize = true;
+ else
+ ok = vectorizable_live_operation (stmt, NULL, NULL);
if (!ok)
{
@@ -2148,13 +2159,13 @@ vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
STMT_VINFO_LIVE_P (stmt_info) |= live_p;
+ STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
if (TREE_CODE (stmt) == PHI_NODE)
- /* Don't mark as relevant because it's not going to vectorized. */
+ /* Don't put phi-nodes in the worklist. Phis that are marked relevant
+ or live will fail vectorization later on. */
return;
- STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
-
if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
&& STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
{
@@ -2337,19 +2348,33 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
Exceptions:
- - if USE is used only for address computations (e.g. array indexing),
+ (case 1)
+ If USE is used only for address computations (e.g. array indexing),
which does not need to be directly vectorized, then the
liveness/relevance of the respective DEF_STMT is left unchanged.
- - if STMT has been identified as defining a reduction variable, then:
- STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
- STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
- because even though STMT is classified as live (since it defines a
- value that is used across loop iterations) and irrelevant (since it
- is not used inside the loop), it will be vectorized, and therefore
- the corresponding DEF_STMTs need to marked as relevant.
+ (case 2)
+ If STMT has been identified as defining a reduction variable, then
+ we have two cases:
+ (case 2.1)
+ The last use of STMT is the reduction-variable, which is defined
+ by a loop-header-phi. We don't want to mark the phi as live or
+ relevant (because it does not need to be vectorized, it is handled
+ as part of the vectorization of the reduction), so in this case we
+ skip the call to vect_mark_relevant.
+ (case 2.2)
+ The rest of the uses of STMT are defined in the loop body. For
+ the def_stmt of these uses we want to set liveness/relevance
+ as follows:
+ STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
+ STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
+ because even though STMT is classified as live (since it defines a
+ value that is used across loop iterations) and irrelevant (since it
+ is not used inside the loop), it will be vectorized, and therefore
+ the corresponding DEF_STMTs need to marked as relevant.
*/
+ /* case 2.2: */
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
{
gcc_assert (!relevant_p && live_p);
@@ -2359,42 +2384,42 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
{
- /* We are only interested in uses that need to be vectorized. Uses
- that are used for address computation are not considered relevant.
+ /* case 1: we are only interested in uses that need to be vectorized.
+ Uses that are used for address computation are not considered
+ relevant.
*/
- if (exist_non_indexing_operands_for_use_p (use, stmt))
- {
- if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
- {
- if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
- LOOP_LOC (loop_vinfo)))
- fprintf (vect_dump,
- "not vectorized: unsupported use in stmt.");
- VEC_free (tree, heap, worklist);
- return false;
- }
+ if (!exist_non_indexing_operands_for_use_p (use, stmt))
+ continue;
- if (!def_stmt || IS_EMPTY_STMT (def_stmt))
- continue;
+ if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
+ LOOP_LOC (loop_vinfo)))
+ fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
+ VEC_free (tree, heap, worklist);
+ return false;
+ }
- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
- {
- fprintf (vect_dump, "worklist: examine use %d: ", i);
- print_generic_expr (vect_dump, use, TDF_SLIM);
- }
+ if (!def_stmt || IS_EMPTY_STMT (def_stmt))
+ continue;
- bb = bb_for_stmt (def_stmt);
- if (!flow_bb_inside_loop_p (loop, bb))
- continue;
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "worklist: examine use %d: ", i);
+ print_generic_expr (vect_dump, use, TDF_SLIM);
+ }
- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
- {
- fprintf (vect_dump, "def_stmt: ");
- print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
- }
+ bb = bb_for_stmt (def_stmt);
+ if (!flow_bb_inside_loop_p (loop, bb))
+ continue;
- vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
- }
+ /* case 2.1: the reduction-use does not mark the defining-phi
+ as relevant. */
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+ && TREE_CODE (def_stmt) == PHI_NODE)
+ continue;
+
+ vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
}
} /* while worklist */
@@ -2445,6 +2470,15 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
continue;
}
+ /* Skip reduction phis. */
+
+ if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "reduc phi. skip.");
+ continue;
+ }
+
/* Analyze the evolution function. */
access_fn = instantiate_parameters
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 4695e54ead4..2b4d1d774af 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -42,6 +42,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "langhooks.h"
#include "tree-pass.h"
#include "toplev.h"
+#include "real.h"
/* Utility functions for the code transformation. */
static bool vect_transform_stmt (tree, block_stmt_iterator *);
@@ -52,12 +53,13 @@ static tree vect_create_data_ref_ptr
static tree vect_create_index_for_vector_ref (loop_vec_info);
static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
-static tree vect_get_vec_def_for_operand (tree, tree);
+static tree vect_get_vec_def_for_operand (tree, tree, tree *);
static tree vect_init_vector (tree, tree);
static void vect_finish_stmt_generation
(tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
static bool vect_is_simple_cond (tree, loop_vec_info);
static void update_vuses_to_preheader (tree, struct loop*);
+static tree get_initial_def_for_reduction (tree, tree, tree *);
/* Utility function dealing with loop peeling (not peeling itself). */
static void vect_generate_tmps_on_preheader
@@ -85,10 +87,20 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
const char *prefix;
tree new_vect_var;
- if (var_kind == vect_simple_var)
- prefix = "vect_";
- else
+ switch (var_kind)
+ {
+ case vect_simple_var:
+ prefix = "vect_";
+ break;
+ case vect_scalar_var:
+ prefix = "stmp_";
+ break;
+ case vect_pointer_var:
prefix = "vect_p";
+ break;
+ default:
+ gcc_unreachable ();
+ }
if (name)
new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
@@ -435,13 +447,18 @@ vect_create_destination_var (tree scalar_dest, tree vectype)
{
tree vec_dest;
const char *new_name;
+ tree type;
+ enum vect_var_kind kind;
+
+ kind = vectype ? vect_simple_var : vect_scalar_var;
+ type = vectype ? vectype : TREE_TYPE (scalar_dest);
gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
new_name = get_name (scalar_dest);
if (!new_name)
new_name = "var_";
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
+ vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
add_referenced_tmp_var (vec_dest);
return vec_dest;
@@ -502,7 +519,7 @@ vect_init_vector (tree stmt, tree vector_var)
needs to be introduced. */
static tree
-vect_get_vec_def_for_operand (tree op, tree stmt)
+vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
{
tree vec_oprnd;
tree vec_stmt;
@@ -512,6 +529,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree vec_inv;
tree vec_cst;
tree t = NULL_TREE;
@@ -542,14 +560,14 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
}
}
- /* FORNOW */
- gcc_assert (dt != vect_reduction_def);
-
switch (dt)
{
/* Case 1: operand is a constant. */
case vect_constant_def:
{
+ if (scalar_def)
+ *scalar_def = op;
+
/* Create 'vect_cst_ = {cst,cst,...,cst}' */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
@@ -565,6 +583,9 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
/* Case 2: operand is defined outside the loop - loop invariant. */
case vect_invariant_def:
{
+ if (scalar_def)
+ *scalar_def = def;
+
/* Create 'vec_inv = {inv,inv,..,inv}' */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "Create vector_inv.");
@@ -581,6 +602,9 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
/* Case 3: operand is defined inside the loop. */
case vect_loop_def:
{
+ if (scalar_def)
+ *scalar_def = def_stmt;
+
/* Get the def from the vectorized stmt. */
def_stmt_info = vinfo_for_stmt (def_stmt);
vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
@@ -589,7 +613,17 @@ vect_get_vec_def_for_operand (tree op, tree stmt)
return vec_oprnd;
}
- /* Case 4: operand is defined by loop-header phi - induction. */
+ /* Case 4: operand is defined by a loop header phi - reduction */
+ case vect_reduction_def:
+ {
+ gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
+
+ /* Get the def before the loop */
+ op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+ return get_initial_def_for_reduction (stmt, op, scalar_def);
+ }
+
+ /* Case 5: operand is defined by loop-header phi - induction. */
case vect_induction_def:
{
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
@@ -618,10 +652,8 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
}
-#ifdef ENABLE_CHECKING
/* Make sure bsi points to the stmt that is being vectorized. */
gcc_assert (stmt == bsi_stmt (*bsi));
-#endif
#ifdef USE_MAPPED_LOCATION
SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
@@ -631,6 +663,458 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
}
+#define ADJUST_IN_EPILOG 1
+
+/* Function get_initial_def_for_reduction
+
+ Input:
+ STMT - a stmt that performs a reduction operation in the loop.
+ INIT_VAL - the initial value of the reduction variable
+
+ Output:
+ SCALAR_DEF - a tree that holds a value to be added to the final result
+ of the reduction (used for "ADJUST_IN_EPILOG" - see below).
+ Return a vector variable, initialized according to the operation that STMT
+ performs. This vector will be used as the initial value of the
+ vector of partial results.
+
+ Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
+ add: [0,0,...,0,0]
+ mult: [1,1,...,1,1]
+ min/max: [init_val,init_val,..,init_val,init_val]
+ bit and/or: [init_val,init_val,..,init_val,init_val]
+ and when necessary (e.g. add/mult case) let the caller know
+ that it needs to adjust the result by init_val.
+
+ Option2: Initialize the vector as follows:
+ add: [0,0,...,0,init_val]
+ mult: [1,1,...,1,init_val]
+ min/max: [init_val,init_val,...,init_val]
+ bit and/or: [init_val,init_val,...,init_val]
+ and no adjustments are needed.
+
+ For example, for the following code:
+
+ s = init_val;
+ for (i=0;i<n;i++)
+ s = s + a[i];
+
+ STMT is 's = s + a[i]', and the reduction variable is 's'.
+ For a vector of 4 units, we want to return either [0,0,0,init_val],
+ or [0,0,0,0] and let the caller know that it needs to adjust
+ the result at the end by 'init_val'.
+
+ FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
+ TODO: Use some cost-model to estimate which scheme is more profitable.
+*/
+
+static tree
+get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
+{
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
+ int nelements;
+ enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
+ tree type = TREE_TYPE (init_val);
+ tree def;
+ tree vec, t = NULL_TREE;
+ bool need_epilog_adjust;
+ int i;
+
+ gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
+
+ switch (code)
+ {
+ case PLUS_EXPR:
+ def = INTEGRAL_TYPE_P (type) ? integer_zero_node :
+ build_real (type, dconst0);
+#ifdef ADJUST_IN_EPILOG
+ /* All the 'nunits' elements are set to 0. The final result will be
+ adjusted by 'init_val' at the loop epilog. */
+ nelements = nunits;
+ need_epilog_adjust = true;
+#else
+ /* 'nunits - 1' elements are set to 0; The last element is set to
+ 'init_val'. No further adjustments at the epilog are needed. */
+ nelements = nunits - 1;
+ need_epilog_adjust = false;
+#endif
+ break;
+
+ case MIN_EXPR:
+ case MAX_EXPR:
+ def = init_val;
+ nelements = nunits;
+ need_epilog_adjust = false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ for (i = nelements - 1; i >= 0; --i)
+ {
+ t = tree_cons (NULL_TREE, def, t);
+ }
+
+ if (nelements == nunits - 1)
+ {
+ /* Set the last element of the vector. */
+ t = tree_cons (NULL_TREE, init_val, t);
+ nelements += 1;
+ }
+ gcc_assert (nelements == nunits);
+
+ if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
+ vec = build_vector (vectype, t);
+ else
+ vec = build_constructor (vectype, t);
+
+ if (need_epilog_adjust)
+ *scalar_def = init_val;
+ else
+ *scalar_def = INTEGRAL_TYPE_P (type) ? integer_zero_node
+ : build_real (type, dconst0);
+ return vect_init_vector (stmt, vec);
+}
+
+
+/* Function vect_create_epilog_for_reduction:
+
+ Create code at the loop-epilog to finalize the result of a reduction
+ computation.
+
+ LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
+ into a single result, by applying the operation REDUC_CODE on the
+ partial-results-vector. For this, we need to create a new phi node at the
+ loop exit to preserve loop-closed form, as illustrated below.
+
+ STMT is the original scalar reduction stmt that is being vectorized.
+ REDUCTION_OP is the scalar reduction-variable.
+ REDUCTION_PHI is the phi-node that carries the reduction computation.
+ This function also sets the arguments for the REDUCTION_PHI:
+ The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
+ The loop-latch argument is VECT_DEF - the vector of partial sums.
+
+ This function transforms this:
+
+ loop:
+ vec_def = phi <null, null> # REDUCTION_PHI
+ ....
+ VECT_DEF = ...
+
+ loop_exit:
+ s_out0 = phi <s_loop> # EXIT_PHI
+
+ use <s_out0>
+ use <s_out0>
+
+ Into:
+
+ loop:
+ vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
+ ....
+ VECT_DEF = ...
+
+ loop_exit:
+ s_out0 = phi <s_loop> # EXIT_PHI
+ v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
+
+ v_out2 = reduc_expr <v_out1>
+ s_out3 = extract_field <v_out2, 0>
+
+ use <s_out3>
+ use <s_out3>
+*/
+
+static void
+vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
+ enum tree_code reduc_code, tree reduction_phi)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block exit_bb;
+ tree scalar_dest = TREE_OPERAND (stmt, 0);
+ tree scalar_type = TREE_TYPE (scalar_dest);
+ tree new_phi;
+ block_stmt_iterator exit_bsi;
+ tree vec_dest;
+ tree new_temp;
+ tree epilog_stmt;
+ tree new_scalar_dest, exit_phi;
+ tree bitsize, bitpos;
+ enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
+ tree scalar_initial_def;
+ tree vec_initial_def;
+ tree orig_name;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+
+ /*** 1. Create the reduction def-use cycle ***/
+
+ /* 1.1 set the loop-entry arg of the reduction-phi: */
+ /* For the case of reduction, vect_get_vec_def_for_operand returns
+ the scalar def before the loop, that defines the initial value
+ of the reduction variable. */
+ vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
+ &scalar_initial_def);
+ add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
+
+
+ /* 1.2 set the loop-latch arg for the reduction-phi: */
+ add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "transform reduction: created def-use cycle:");
+ print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
+ fprintf (vect_dump, "\n");
+ print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
+ }
+
+
+ /*** 2. Create epilog code ***/
+
+ /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
+ v_out1 = phi <v_loop> */
+
+ exit_bb = loop->single_exit->dest;
+ new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
+ SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
+
+ exit_bsi = bsi_start (exit_bb);
+
+
+ /* 2.2 Create:
+ v_out2 = reduc_expr <v_out1>
+ s_out3 = extract_field <v_out2, 0> */
+
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
+ new_temp = make_ssa_name (vec_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "transform reduction: created epilog code:");
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ }
+
+ new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
+ bitsize = TYPE_SIZE (scalar_type);
+
+ /* The result is in the low order bits. */
+ if (BITS_BIG_ENDIAN)
+ bitpos = size_binop (MULT_EXPR,
+ bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
+ TYPE_SIZE (scalar_type));
+ else
+ bitpos = bitsize_zero_node;
+
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build3 (BIT_FIELD_REF, scalar_type,
+ new_temp, bitsize, bitpos));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+
+ /* 2.3 Adjust the final result by the initial value of the reduction
+ variable. (when such adjustment is not needed, then
+ 'scalar_initial_def' is zero).
+
+ Create:
+ s_out = scalar_expr <s_out, scalar_initial_def> */
+
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build2 (code, scalar_type, new_temp, scalar_initial_def));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+
+ /* 2.4 Replace uses of s_out0 with uses of s_out3 */
+
+ /* Find the loop-closed-use at the loop exit of the original
+ scalar result. (The reduction result is expected to have
+ two immediate uses - one at the latch block, and one at the
+ loop exit). */
+ exit_phi = NULL;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
+ {
+ if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
+ {
+ exit_phi = USE_STMT (use_p);
+ break;
+ }
+ }
+
+ orig_name = PHI_RESULT (exit_phi);
+
+ FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
+ SET_USE (use_p, new_temp);
+}
+
+
+/* Function vectorizable_reduction.
+
+ Check if STMT performs a reduction operation that can be vectorized.
+ If VEC_STMT is also passed, vectorize the STMT: create a vectorized
+ stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+ Return FALSE if not a vectorizable STMT, TRUE otherwise. */
+
+bool
+vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
+{
+ tree vec_dest;
+ tree scalar_dest;
+ tree op0, op1;
+ tree loop_vec_def;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ tree operation;
+ enum tree_code code, reduc_code = 0;
+ enum machine_mode vec_mode;
+ int op_type;
+ optab optab, reduc_optab;
+ tree new_temp;
+ tree def0, def1, def_stmt0, def_stmt1;
+ enum vect_def_type dt0, dt1;
+ tree new_phi;
+ tree scalar_type;
+ bool is_simple_use0;
+ bool is_simple_use1;
+
+ /* Is vectorizable reduction? */
+
+ /* Not supportable if the reduction variable is used in the loop. */
+ if (STMT_VINFO_RELEVANT_P (stmt_info))
+ return false;
+
+ if (!STMT_VINFO_LIVE_P (stmt_info))
+ return false;
+
+ /* Make sure it was already recognized as a reduction pattern. */
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
+ return false;
+
+ gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
+
+ operation = TREE_OPERAND (stmt, 1);
+ code = TREE_CODE (operation);
+ op_type = TREE_CODE_LENGTH (code);
+
+ if (op_type != binary_op)
+ return false;
+
+ op0 = TREE_OPERAND (operation, 0);
+ op1 = TREE_OPERAND (operation, 1);
+ scalar_dest = TREE_OPERAND (stmt, 0);
+ scalar_type = TREE_TYPE (scalar_dest);
+
+ /* Check the first operand. It is expected to be defined inside the loop. */
+ is_simple_use0 =
+ vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
+ is_simple_use1 =
+ vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
+
+ gcc_assert (is_simple_use0);
+ gcc_assert (is_simple_use1);
+ gcc_assert (dt0 == vect_loop_def);
+ gcc_assert (dt1 == vect_reduction_def);
+ gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
+ gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
+
+ if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
+ return false;
+
+ /* Supportable by target? */
+
+ /* check support for the operation in the loop */
+ optab = optab_for_tree_code (code, vectype);
+ if (!optab)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "no optab.");
+ return false;
+ }
+ vec_mode = TYPE_MODE (vectype);
+ if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "op not supported by target.");
+ return false;
+ }
+
+ /* check support for the epilog operation */
+ if (!reduction_code_for_scalar_code (code, &reduc_code))
+ return false;
+ reduc_optab = optab_for_tree_code (reduc_code, vectype);
+ if (!reduc_optab)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "no optab for reduction.");
+ return false;
+ }
+ if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "op not supported by target.");
+ return false;
+ }
+
+ if (!vec_stmt) /* transformation not required. */
+ {
+ STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ return true;
+ }
+
+ /** Transform. **/
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "transform reduction.");
+
+ /* Create the destination vector */
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+
+
+ /* Create the reduction-phi that defines the reduction-operand. */
+ new_phi = create_phi_node (vec_dest, loop->header);
+
+
+ /* Prepare the operand that is defined inside the loop body */
+ loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
+ gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def))));
+
+
+ /* Create the vectorized operation that computes the partial results */
+ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
+ new_temp = make_ssa_name (vec_dest, *vec_stmt);
+ TREE_OPERAND (*vec_stmt, 0) = new_temp;
+ vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+
+
+ /* Finalize the reduction-phi (set it's arguments) and create the
+ epilog reduction code. */
+ vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
+ return true;
+}
+
+
/* Function vectorizable_assignment.
Check if STMT performs an assignment (copy) that can be vectorized.
@@ -688,7 +1172,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
/* Handle use. */
op = TREE_OPERAND (stmt, 1);
- vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
+ vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
/* Arguments are ready. create the new vector stmt. */
*vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
@@ -846,12 +1330,12 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
/* Handle uses. */
op0 = TREE_OPERAND (operation, 0);
- vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
+ vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
if (op_type == binary_op)
{
op1 = TREE_OPERAND (operation, 1);
- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
}
/* Arguments are ready. create the new vector stmt. */
@@ -940,7 +1424,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
/* Handle use - get the vectorized def from the defining stmt. */
- vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
+ vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
/* Handle def. */
/* FORNOW: make sure the data reference is aligned. */
@@ -1387,11 +1871,11 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
/* Handle cond expr. */
vec_cond_lhs =
- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt);
+ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
vec_cond_rhs =
- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt);
- vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt);
- vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt);
+ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
+ vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
+ vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
/* Arguments are ready. create the new vector stmt. */
vec_compare = build2 (TREE_CODE (cond_expr), vectype,
@@ -1460,8 +1944,23 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
if (STMT_VINFO_LIVE_P (stmt_info))
{
- done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
- gcc_assert (done);
+ switch (STMT_VINFO_TYPE (stmt_info))
+ {
+ case reduc_vec_info_type:
+ done = vectorizable_reduction (stmt, bsi, &vec_stmt);
+ gcc_assert (done);
+ break;
+
+ default:
+ done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
+ gcc_assert (done);
+ }
+
+ if (vec_stmt)
+ {
+ gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
+ STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+ }
}
return is_store;
@@ -1717,6 +2216,14 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
continue;
}
+ /* Skip reduction phis. */
+ if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "reduc phi. skip.");
+ continue;
+ }
+
access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
gcc_assert (access_fn);
evolution_part =
@@ -1770,9 +2277,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
struct loop *new_loop;
edge update_e;
basic_block preheader;
-#ifdef ENABLE_CHECKING
int loop_num;
-#endif
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
@@ -1787,14 +2292,12 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
&ratio_mult_vf_name, ratio);
-#ifdef ENABLE_CHECKING
loop_num = loop->num;
-#endif
new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
ratio_mult_vf_name, ni_name, false);
-#ifdef ENABLE_CHECKING
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
+#ifdef ENABLE_CHECKING
slpeel_verify_cfg_after_peeling (loop, new_loop);
#endif
@@ -2010,8 +2513,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
niters_of_prolog_loop, ni_name, true);
-#ifdef ENABLE_CHECKING
gcc_assert (new_loop);
+#ifdef ENABLE_CHECKING
slpeel_verify_cfg_after_peeling (new_loop, loop);
#endif
@@ -2051,7 +2554,6 @@ vect_transform_loop (loop_vec_info loop_vinfo,
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "=== vec_transform_loop ===");
-
/* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */
@@ -2104,18 +2606,18 @@ vect_transform_loop (loop_vec_info loop_vinfo,
}
stmt_info = vinfo_for_stmt (stmt);
gcc_assert (stmt_info);
- if (!STMT_VINFO_RELEVANT_P (stmt_info))
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
{
bsi_next (&si);
continue;
}
-#ifdef ENABLE_CHECKING
/* FORNOW: Verify that all stmts operate on the same number of
units and no inner unrolling is necessary. */
gcc_assert
(TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
== vectorization_factor);
-#endif
+
/* -------- vectorize statement ------------ */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "transform statement.");
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 1f612c5444c..1967e537125 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -575,9 +575,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
if (!current_new_name)
continue;
}
-#ifdef ENABLE_CHECKING
gcc_assert (get_current_def (current_new_name) == NULL_TREE);
-#endif
set_current_def (current_new_name, PHI_RESULT (new_phi));
bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
@@ -761,9 +759,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
LOC loop_loc;
orig_cond = get_loop_exit_condition (loop);
-#ifdef ENABLE_CHECKING
gcc_assert (orig_cond);
-#endif
loop_cond_bsi = bsi_for_stmt (orig_cond);
standard_iv_increment_position (loop, &incr_bsi, &insert_after);
@@ -1354,6 +1350,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
STMT_VINFO_VECT_STEP (res) = NULL_TREE;
STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
+ STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
return res;
}
@@ -1744,9 +1741,44 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
}
+/* Function reduction_code_for_scalar_code
+
+ Input:
+ CODE - tree_code of a reduction operations.
+
+ Output:
+ REDUC_CODE - the correponding tree-code to be used to reduce the
+ vector of partial results into a single scalar result (which
+ will also reside in a vector).
+
+ Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */
+
+bool
+reduction_code_for_scalar_code (enum tree_code code,
+ enum tree_code *reduc_code)
+{
+ switch (code)
+ {
+ case MAX_EXPR:
+ *reduc_code = REDUC_MAX_EXPR;
+ return true;
+
+ case MIN_EXPR:
+ *reduc_code = REDUC_MIN_EXPR;
+ return true;
+
+ case PLUS_EXPR:
+ *reduc_code = REDUC_PLUS_EXPR;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
/* Function vect_is_simple_reduction
- TODO:
Detect a cross-iteration def-use cucle that represents a simple
reduction computation. We look for the following pattern:
@@ -1756,18 +1788,189 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
a2 = operation (a3, a1)
such that:
- 1. operation is...
- 2. no uses for a2 in the loop (elsewhere) */
+ 1. operation is commutative and associative and it is safe to
+ change the the order of the computation.
+ 2. no uses for a2 in the loop (a2 is used out of the loop)
+ 3. no uses of a1 in the loop besides the reduction operation.
+
+ Condition 1 is tested here.
+ Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */
tree
vect_is_simple_reduction (struct loop *loop ATTRIBUTE_UNUSED,
tree phi ATTRIBUTE_UNUSED)
{
- /* FORNOW */
- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
- fprintf (vect_dump, "reduction: unknown pattern.");
+ edge latch_e = loop_latch_edge (loop);
+ tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
+ tree def_stmt, def1, def2;
+ enum tree_code code;
+ int op_type;
+ tree operation, op1, op2;
+ tree type;
+
+ if (TREE_CODE (loop_arg) != SSA_NAME)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: not ssa_name: ");
+ print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
- return NULL_TREE;
+ def_stmt = SSA_NAME_DEF_STMT (loop_arg);
+ if (!def_stmt)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "reduction: no def_stmt.");
+ return NULL_TREE;
+ }
+
+ if (TREE_CODE (def_stmt) != MODIFY_EXPR)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
+
+ operation = TREE_OPERAND (def_stmt, 1);
+ code = TREE_CODE (operation);
+ if (!commutative_tree_code (code) || !associative_tree_code (code))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: not commutative/associative: ");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
+
+ op_type = TREE_CODE_LENGTH (code);
+ if (op_type != binary_op)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: not binary operation: ");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
+
+ op1 = TREE_OPERAND (operation, 0);
+ op2 = TREE_OPERAND (operation, 1);
+ if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: uses not ssa_names: ");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
+
+ /* Check that it's ok to change the order of the computation */
+ type = TREE_TYPE (operation);
+ if (type != TREE_TYPE (op1) || type != TREE_TYPE (op2))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: multiple types: operation type: ");
+ print_generic_expr (vect_dump, type, TDF_SLIM);
+ fprintf (vect_dump, ", operands types: ");
+ print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
+ fprintf (vect_dump, ",");
+ print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
+
+ /* CHECKME: check for !flag_finite_math_only too? */
+ if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
+ {
+ /* Changing the order of operations changes the sematics. */
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
+ else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
+ {
+ /* Changing the order of operations changes the sematics. */
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: unsafe int math optimization: ");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
+
+ /* reduction is safe. we're dealing with one of the following:
+ 1) integer arithmetic and no trapv
+ 2) floating point arithmetic, and special flags permit this optimization.
+ */
+ def1 = SSA_NAME_DEF_STMT (op1);
+ def2 = SSA_NAME_DEF_STMT (op2);
+ if (!def1 || !def2)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: no defs for operands: ");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
+
+ if (TREE_CODE (def1) == MODIFY_EXPR
+ && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
+ && def2 == phi)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "detected reduction:");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+ return def_stmt;
+ }
+ else if (TREE_CODE (def2) == MODIFY_EXPR
+ && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
+ && def1 == phi)
+ {
+ use_operand_p use;
+ ssa_op_iter iter;
+
+ /* Swap operands (just for simplicity - so that the rest of the code
+ can assume that the reduction variable is always the last (second)
+ argument). */
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "detected reduction: need to swap operands:");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+
+ /* CHECKME */
+ FOR_EACH_SSA_USE_OPERAND (use, def_stmt, iter, SSA_OP_USE)
+ {
+ tree tuse = USE_FROM_PTR (use);
+ if (tuse == op1)
+ SET_USE (use, op2);
+ else if (tuse == op2)
+ SET_USE (use, op1);
+ }
+ return def_stmt;
+ }
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ {
+ fprintf (vect_dump, "reduction: unknown pattern.");
+ print_generic_expr (vect_dump, operation, TDF_SLIM);
+ }
+ return NULL_TREE;
+ }
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 332025b03e6..3c2712dbbad 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -39,7 +39,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
/* Used for naming of new temporaries. */
enum vect_var_kind {
vect_simple_var,
- vect_pointer_var
+ vect_pointer_var,
+ vect_scalar_var
};
/* Defines type of operation: unary or binary. */
@@ -155,7 +156,8 @@ enum stmt_vec_info_type {
store_vec_info_type,
op_vec_info_type,
assignment_vec_info_type,
- condition_vec_info_type
+ condition_vec_info_type,
+ reduc_vec_info_type
};
typedef struct data_reference *dr_p;
@@ -345,6 +347,8 @@ extern tree vect_is_simple_reduction (struct loop *, tree);
extern bool vect_can_force_dr_alignment_p (tree, unsigned int);
extern enum dr_alignment_support vect_supportable_dr_alignment
(struct data_reference *);
+extern bool reduction_code_for_scalar_code (enum tree_code, enum tree_code *);
+
/* Creation and deletion of loop and stmt info structs. */
extern loop_vec_info new_loop_vec_info (struct loop *loop);
extern void destroy_loop_vec_info (loop_vec_info);
@@ -363,6 +367,7 @@ extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);
+extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *);
/* Driver for transformation stage. */
extern void vect_transform_loop (loop_vec_info, struct loops *);
diff --git a/gcc/tree.def b/gcc/tree.def
index b8d5a36daf7..2b8c2806f4a 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -947,6 +947,16 @@ DEFTREECODE (REALIGN_LOAD_EXPR, "realign_load", tcc_expression, 3)
DEFTREECODE (TARGET_MEM_REF, "target_mem_ref", tcc_reference, 7)
+/* Reduction operations.
+ Operations that take a vector of elements and "reduce" it to a scalar
+ result (e.g. summing the elements of the vector, finding the minimum over
+ the vector elements, etc).
+ Operand 0 is a vector; the first element in the vector has the result.
+ Operand 1 is a vector. */
+DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1)
+DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1)
+DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1)
+
/*
Local variables:
mode:c