diff options
author | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-06-18 13:18:52 +0000 |
---|---|---|
committer | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-06-18 13:18:52 +0000 |
commit | ea8f3370b1692d96d1289a6ed7a757fd5b0685bd (patch) | |
tree | 76522a399271e0448291fb8b63bbbf3f898dc165 /gcc | |
parent | b5a8f2016cff1d18bb6c8ffc411929756d628c0d (diff) | |
download | gcc-ea8f3370b1692d96d1289a6ed7a757fd5b0685bd.tar.gz |
* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): New
tree-codes.
* optabs.h (OTI_reduc_smax, OTI_reduc_umax, OTI_reduc_smin,
OTI_reduc_umin, OTI_reduc_plus): New optabs for reduction.
(reduc_smax_optab, reduc_umax_optab, reduc_smin_optab, reduc_umin_optab,
reduc_plus_optab): New optabs for reduction.
* expr.c (expand_expr_real_1): Handle new tree-codes.
* tree-inline.c (estimate_num_insns_1): Handle new tree-codes.
* tree-pretty-print.c (dump_generic_node, op_prio, op_symbol): Handle
new tree-codes.
* optabs.c (optab_for_tree_code): Handle new tree-codes.
(init_optabs): Initialize new optabs.
* genopinit.c (optabs): Define handlers for new optabs.
* tree-vect-analyze.c (vect_analyze_operations): Fail vectorization in
case of a phi that is marked as relevant. Call vectorizable_reduction.
(vect_mark_relevant): Phis may be marked as relevant.
(vect_mark_stmts_to_be_vectorized): The use corresponding to the
reduction variable in a reduction stmt does not mark its defining phi
as relevant. Update documentation accordingly.
(vect_can_advance_ivs_p): Skip reduction phis.
* tree-vect-transform.c (vect_get_vec_def_for_operand): Takes
additional argument. Handle reduction.
(vect_create_destination_var): Update call to vect_get_new_vect_var.
Handle non-vector argument.
(get_initial_def_for_reduction): New function.
(vect_create_epilog_for_reduction): New function.
(vectorizable_reduction): New function.
(vect_get_new_vect_var): Handle new vect_var_kind.
(vectorizable_assignment, vectorizable_operation, vectorizable_store,
vectorizable_condition): Update call to vect_get_new_vect_var.
(vect_transform_stmt): Call vectorizable_reduction.
(vect_update_ivs_after_vectorizer): Skip reduction phis.
(vect_transform_loop): Skip if stmt is both not relevant and not live.
* tree-vectorizer.c (reduction_code_for_scalar_code): New function.
(vect_is_simple_reduction): Was empty - added implementation.
* tree-vectorizer.h (vect_scalar_var): New enum vect_var_kind value.
(reduc_vec_info_type): New enum vect_def_type value.
* config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf,
reduc_umax_v4si, reduc_smin_v4si, reduc_umin_v4sf, reduc_smin_v4sf,
reduc_plus_v4si, reduc_plus_v4sf): New define_expands.
* tree-vect-analyze.c (vect_determine_vectorization_factor): Remove
ENABLE_CHECKING around gcc_assert.
* tree-vect-transform.c (vect_do_peeling_for_loop_bound,
(vect_do_peeling_for_alignment, vect_transform_loop,
vect_get_vec_def_for_operand): Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@101155 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 50 | ||||
-rw-r--r-- | gcc/config/rs6000/altivec.md | 154 | ||||
-rw-r--r-- | gcc/expr.c | 10 | ||||
-rw-r--r-- | gcc/genopinit.c | 7 | ||||
-rw-r--r-- | gcc/optabs.c | 15 | ||||
-rw-r--r-- | gcc/optabs.h | 13 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-reduc-1.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-reduc-2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-reduc-3.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 17 | ||||
-rw-r--r-- | gcc/tree-inline.c | 4 | ||||
-rw-r--r-- | gcc/tree-pretty-print.c | 24 | ||||
-rw-r--r-- | gcc/tree-vect-analyze.c | 126 | ||||
-rw-r--r-- | gcc/tree-vect-transform.c | 566 | ||||
-rw-r--r-- | gcc/tree-vectorizer.c | 225 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 9 | ||||
-rw-r--r-- | gcc/tree.def | 10 |
18 files changed, 1149 insertions, 101 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8ebd8004845..73986314282 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,53 @@ +2005-06-19 Dorit Nuzman <dorit@il.ibm.com> + + * tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): New + tree-codes. + * optabs.h (OTI_reduc_smax, OTI_reduc_umax, OTI_reduc_smin, + OTI_reduc_umin, OTI_reduc_plus): New optabs for reduction. + (reduc_smax_optab, reduc_umax_optab, reduc_smin_optab, reduc_umin_optab, + reduc_plus_optab): New optabs for reduction. + * expr.c (expand_expr_real_1): Handle new tree-codes. + * tree-inline.c (estimate_num_insns_1): Handle new tree-codes. + * tree-pretty-print.c (dump_generic_node, op_prio, op_symbol): Handle + new tree-codes. + * optabs.c (optab_for_tree_code): Handle new tree-codes. + (init_optabs): Initialize new optabs. + * genopinit.c (optabs): Define handlers for new optabs. + + * tree-vect-analyze.c (vect_analyze_operations): Fail vectorization in + case of a phi that is marked as relevant. Call vectorizable_reduction. + (vect_mark_relevant): Phis may be marked as relevant. + (vect_mark_stmts_to_be_vectorized): The use corresponding to the + reduction variable in a reduction stmt does not mark its defining phi + as relevant. Update documentation accordingly. + (vect_can_advance_ivs_p): Skip reduction phis. + * tree-vect-transform.c (vect_get_vec_def_for_operand): Takes + additional argument. Handle reduction. + (vect_create_destination_var): Update call to vect_get_new_vect_var. + Handle non-vector argument. + (get_initial_def_for_reduction): New function. + (vect_create_epilog_for_reduction): New function. + (vectorizable_reduction): New function. + (vect_get_new_vect_var): Handle new vect_var_kind. + (vectorizable_assignment, vectorizable_operation, vectorizable_store, + vectorizable_condition): Update call to vect_get_new_vect_var. + (vect_transform_stmt): Call vectorizable_reduction. + (vect_update_ivs_after_vectorizer): Skip reduction phis. + (vect_transform_loop): Skip if stmt is both not relevant and not live. + * tree-vectorizer.c (reduction_code_for_scalar_code): New function. + (vect_is_simple_reduction): Was empty - added implementation. + * tree-vectorizer.h (vect_scalar_var): New enum vect_var_kind value. + (reduc_vec_info_type): New enum vect_def_type value. + * config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf, + reduc_umax_v4si, reduc_smin_v4si, reduc_umin_v4sf, reduc_smin_v4sf, + reduc_plus_v4si, reduc_plus_v4sf): New define_expands. + + * tree-vect-analyze.c (vect_determine_vectorization_factor): Remove + ENABLE_CHECKING around gcc_assert. + * tree-vect-transform.c (vect_do_peeling_for_loop_bound, + (vect_do_peeling_for_alignment, vect_transform_loop, + vect_get_vec_def_for_operand): Likewise. + 2005-06-18 Joseph S. Myers <joseph@codesourcery.com> * config/ia64/ia64.c (ia64_function_arg): Set up a PARALLEL for a diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 847812dd4d6..3b2044783c2 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1825,6 +1825,160 @@ operands[3] = gen_reg_rtx (GET_MODE (operands[0])); }) +;; Reduction + +(define_expand "reduc_smax_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + " +{ + rtx vtmp1 = gen_reg_rtx (V4SImode); + rtx vtmp2 = gen_reg_rtx (V4SImode); + rtx vtmp3 = gen_reg_rtx (V4SImode); + + emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], + gen_rtx_CONST_INT (SImode, 8))); + emit_insn (gen_smaxv4si3 (vtmp2, operands[1], vtmp1)); + emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, + gen_rtx_CONST_INT (SImode, 4))); + emit_insn (gen_smaxv4si3 (operands[0], vtmp2, vtmp3)); + DONE; +}") + +(define_expand "reduc_smax_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + " +{ + rtx vtmp1 = gen_reg_rtx (V4SFmode); + rtx vtmp2 = gen_reg_rtx (V4SFmode); + rtx vtmp3 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1], + gen_rtx_CONST_INT (SImode, 8))); + emit_insn (gen_smaxv4sf3 (vtmp2, operands[1], vtmp1)); + emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2, + gen_rtx_CONST_INT (SImode, 4))); + emit_insn (gen_smaxv4sf3 (operands[0], vtmp2, vtmp3)); + DONE; +}") + +(define_expand "reduc_umax_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + " +{ + rtx vtmp1 = gen_reg_rtx (V4SImode); + rtx vtmp2 = gen_reg_rtx (V4SImode); + rtx vtmp3 = gen_reg_rtx (V4SImode); + + emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], + gen_rtx_CONST_INT (SImode, 8))); + emit_insn (gen_umaxv4si3 (vtmp2, operands[1], vtmp1)); + emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, + gen_rtx_CONST_INT (SImode, 4))); + emit_insn (gen_umaxv4si3 (operands[0], vtmp2, vtmp3)); + DONE; +}") + +(define_expand "reduc_smin_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + " +{ + rtx vtmp1 = gen_reg_rtx (V4SImode); + rtx vtmp2 = gen_reg_rtx (V4SImode); + rtx vtmp3 = gen_reg_rtx (V4SImode); + + emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], + gen_rtx_CONST_INT (SImode, 8))); + emit_insn (gen_sminv4si3 (vtmp2, operands[1], vtmp1)); + emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, + gen_rtx_CONST_INT (SImode, 4))); + emit_insn (gen_sminv4si3 (operands[0], vtmp2, vtmp3)); + DONE; +}") + +(define_expand "reduc_smin_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + " +{ + rtx vtmp1 = gen_reg_rtx (V4SFmode); + rtx vtmp2 = gen_reg_rtx (V4SFmode); + rtx vtmp3 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1], + gen_rtx_CONST_INT (SImode, 8))); + emit_insn (gen_sminv4sf3 (vtmp2, operands[1], vtmp1)); + emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2, + gen_rtx_CONST_INT (SImode, 4))); + emit_insn (gen_sminv4sf3 (operands[0], vtmp2, vtmp3)); + DONE; +}") + +(define_expand "reduc_umin_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + " +{ + rtx vtmp1 = gen_reg_rtx (V4SImode); + rtx vtmp2 = gen_reg_rtx (V4SImode); + rtx vtmp3 = gen_reg_rtx (V4SImode); + + emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], + gen_rtx_CONST_INT (SImode, 8))); + emit_insn (gen_uminv4si3 (vtmp2, operands[1], vtmp1)); + emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, + gen_rtx_CONST_INT (SImode, 4))); + emit_insn (gen_uminv4si3 (operands[0], vtmp2, vtmp3)); + DONE; +}") + +(define_expand "reduc_plus_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + " +{ + rtx vtmp1 = gen_reg_rtx (V4SImode); + rtx vtmp2 = gen_reg_rtx (V4SImode); + rtx vtmp3 = gen_reg_rtx (V4SImode); + + emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1], + gen_rtx_CONST_INT (SImode, 8))); + emit_insn (gen_addv4si3 (vtmp2, operands[1], vtmp1)); + emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2, + gen_rtx_CONST_INT (SImode, 4))); + emit_insn (gen_addv4si3 (operands[0], vtmp2, vtmp3)); + DONE; +}") + +(define_expand "reduc_plus_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + " +{ + rtx vtmp1 = gen_reg_rtx (V4SFmode); + rtx vtmp2 = gen_reg_rtx (V4SFmode); + rtx vtmp3 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1], + gen_rtx_CONST_INT (SImode, 8))); + emit_insn (gen_addv4sf3 (vtmp2, operands[1], vtmp1)); + emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2, + gen_rtx_CONST_INT (SImode, 4))); + emit_insn (gen_addv4sf3 (operands[0], vtmp2, vtmp3)); + DONE; +}") + (define_insn "vec_realign_load_v4sf" [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v") diff --git a/gcc/expr.c b/gcc/expr.c index 8b75ed7c605..ac500b5d52c 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8356,6 +8356,16 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, return temp; } + case REDUC_MAX_EXPR: + case REDUC_MIN_EXPR: + case REDUC_PLUS_EXPR: + { + op0 = expand_expr (TREE_OPERAND (exp, 0), NULL_RTX, VOIDmode, 0); + this_optab = optab_for_tree_code (code, type); + temp = expand_unop (mode, this_optab, op0, target, unsignedp); + gcc_assert (temp); + return temp; + } default: return lang_hooks.expand_expr (exp, original_target, tmode, diff --git a/gcc/genopinit.c b/gcc/genopinit.c index d58f8811812..eea084d22d5 100644 --- a/gcc/genopinit.c +++ b/gcc/genopinit.c @@ -198,7 +198,12 @@ static const char * const optabs[] = "vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)", "vec_realign_load_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_load_$a$)", "vcond_gen_code[$A] = CODE_FOR_$(vcond$a$)", - "vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)" + "vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)", + "reduc_smax_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_smax_$a$)", + "reduc_umax_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umax_$a$)", + "reduc_smin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_smin_$a$)", + "reduc_umin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umin_$a$)", + "reduc_plus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_plus_$a$)" }; static void gen_insn (rtx); diff --git a/gcc/optabs.c b/gcc/optabs.c index 2e84ac3a0b8..cd4f2cbe6d0 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -294,6 +294,15 @@ optab_for_tree_code (enum tree_code code, tree type) case REALIGN_LOAD_EXPR: return vec_realign_load_optab; + case REDUC_MAX_EXPR: + return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab; + + case REDUC_MIN_EXPR: + return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab; + + case REDUC_PLUS_EXPR: + return reduc_plus_optab; + default: break; } @@ -5061,6 +5070,12 @@ init_optabs (void) cstore_optab = init_optab (UNKNOWN); push_optab = init_optab (UNKNOWN); + reduc_smax_optab = init_optab (UNKNOWN); + reduc_umax_optab = init_optab (UNKNOWN); + reduc_smin_optab = init_optab (UNKNOWN); + reduc_umin_optab = init_optab (UNKNOWN); + reduc_plus_optab = init_optab (UNKNOWN); + vec_extract_optab = init_optab (UNKNOWN); vec_set_optab = init_optab (UNKNOWN); vec_init_optab = init_optab (UNKNOWN); diff --git a/gcc/optabs.h b/gcc/optabs.h index 1426e570fb9..2495fedef0b 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -231,6 +231,13 @@ enum optab_index /* Conditional add instruction. */ OTI_addcc, + /* Reduction operations on a vector operand. */ + OTI_reduc_smax, + OTI_reduc_umax, + OTI_reduc_smin, + OTI_reduc_umin, + OTI_reduc_plus, + /* Set specified field of vector operand. */ OTI_vec_set, /* Extract specified field of vector operand. */ @@ -347,6 +354,12 @@ extern GTY(()) optab optab_table[OTI_MAX]; #define push_optab (optab_table[OTI_push]) #define addcc_optab (optab_table[OTI_addcc]) +#define reduc_smax_optab (optab_table[OTI_reduc_smax]) +#define reduc_umax_optab (optab_table[OTI_reduc_umax]) +#define reduc_smin_optab (optab_table[OTI_reduc_smin]) +#define reduc_umin_optab (optab_table[OTI_reduc_umin]) +#define reduc_plus_optab (optab_table[OTI_reduc_plus]) + #define vec_set_optab (optab_table[OTI_vec_set]) #define vec_extract_optab (optab_table[OTI_vec_extract]) #define vec_init_optab (optab_table[OTI_vec_init]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 992b1760e14..375bd449df3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2005-06-19 Dorit Nuzman <dorit@il.ibm.com> + + * lib/target-supports.exp (check_effective_target_vect_reduction): New. + * gcc.dg/vect/vect-reduc-1.c: Now vectorizable for vect_reduction + targets. + * gcc.dg/vect/vect-reduc-2.c: Likewise. + * gcc.dg/vect/vect-reduc-3.c: Likewise. + 2005-06-18 Joseph S. Myers <joseph@codesourcery.com> * gcc.target/ia64/float80-varargs-1.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c index 1c3d555f5d4..9fce8b3e1a3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c @@ -7,7 +7,6 @@ #define DIFF 242 /* Test vectorization of reduction of unsigned-int. */ -/* Not supported yet. */ int main1 (unsigned int x, unsigned int max_result) { @@ -52,5 +51,4 @@ int main (void) return main1 (0, 15); } -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "not vectorized: unsupported use in stmt." 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c index fd5e94b5004..38693539f33 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c @@ -8,7 +8,6 @@ #define DIFF 242 /* Test vectorization of reduction of signed-int. */ -/* Not supported yet. */ int main1 (int x, int max_result) { @@ -50,5 +49,4 @@ int main (void) return main1 (0, 15); } -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "not vectorized: unsupported use in stmt." 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c index 86fbc4bdec1..398d9cdb34b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c @@ -8,7 +8,6 @@ /* Test vectorization of reduction of unsigned-int in the presence of unknown-loop-bound. */ -/* Not supported yet. */ int main1 (int n) { @@ -37,5 +36,4 @@ int main (void) return main1 (N-1); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "not vectorized: unsupported use in stmt." 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail {! vect_reduction} } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 03781699775..4facec651e8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -988,6 +988,23 @@ proc check_effective_target_vect_int_mult { } { return $et_vect_int_mult_saved } +# Return 1 if the target supports vector reduction + +proc check_effective_target_vect_reduction { } { + global et_vect_reduction_saved + + if [info exists et_vect_reduction_saved] { + verbose "check_effective_target_vect_reduction: using cached result" 2 + } else { + set et_vect_reduction_saved 0 + if { [istarget powerpc*-*-*] } { + set et_vect_reduction_saved 1 + } + } + verbose "check_effective_target_vect_reduction: returning $et_vect_reduction_saved" 2 + return $et_vect_reduction_saved +} + # Return 1 if the target supports atomic operations on "int" and "long". proc check_effective_target_sync_int_long { } { diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index db4b1e91306..7fa43507630 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -1736,6 +1736,10 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data) case REALIGN_LOAD_EXPR: + case REDUC_MAX_EXPR: + case REDUC_MIN_EXPR: + case REDUC_PLUS_EXPR: + case RESX_EXPR: *count += 1; break; diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index a079f3efb2a..1922be7f4d3 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1535,6 +1535,24 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, pp_string (buffer, " > "); break; + case REDUC_MAX_EXPR: + pp_string (buffer, " REDUC_MAX_EXPR < "); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (buffer, " > "); + break; + + case REDUC_MIN_EXPR: + pp_string (buffer, " REDUC_MIN_EXPR < "); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (buffer, " > "); + break; + + case REDUC_PLUS_EXPR: + pp_string (buffer, " REDUC_PLUS_EXPR < "); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (buffer, " > "); + break; + default: NIY; } @@ -1817,6 +1835,9 @@ op_prio (tree op) case ABS_EXPR: case REALPART_EXPR: case IMAGPART_EXPR: + case REDUC_MAX_EXPR: + case REDUC_MIN_EXPR: + case REDUC_PLUS_EXPR: return 16; case SAVE_EXPR: @@ -1907,6 +1928,9 @@ op_symbol (tree op) case PLUS_EXPR: return "+"; + case REDUC_PLUS_EXPR: + return "r+"; + case NEGATE_EXPR: case MINUS_EXPR: return "-"; diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index da033c82874..1cca9e84c07 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -413,10 +413,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) else vectorization_factor = nunits; -#ifdef ENABLE_CHECKING gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) * vectorization_factor == UNITS_PER_SIMD_WORD); -#endif } } @@ -483,8 +481,16 @@ vect_analyze_operations (loop_vec_info loop_vinfo) return false; } - gcc_assert (!STMT_VINFO_RELEVANT_P (stmt_info)); - } + if (STMT_VINFO_RELEVANT_P (stmt_info)) + { + /* Most likely a reduction-like computation that is used + in the loop. */ + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, + LOOP_LOC (loop_vinfo))) + fprintf (vect_dump, "not vectorized: unsupported pattern."); + return false; + } + } for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { @@ -541,7 +547,12 @@ vect_analyze_operations (loop_vec_info loop_vinfo) if (STMT_VINFO_LIVE_P (stmt_info)) { - ok = vectorizable_live_operation (stmt, NULL, NULL); + ok = vectorizable_reduction (stmt, NULL, NULL); + + if (ok) + need_to_vectorize = true; + else + ok = vectorizable_live_operation (stmt, NULL, NULL); if (!ok) { @@ -2148,13 +2159,13 @@ vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt, fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p); STMT_VINFO_LIVE_P (stmt_info) |= live_p; + STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p; if (TREE_CODE (stmt) == PHI_NODE) - /* Don't mark as relevant because it's not going to vectorized. */ + /* Don't put phi-nodes in the worklist. Phis that are marked relevant + or live will fail vectorization later on. */ return; - STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p; - if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p && STMT_VINFO_LIVE_P (stmt_info) == save_live_p) { @@ -2337,19 +2348,33 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) Exceptions: - - if USE is used only for address computations (e.g. array indexing), + (case 1) + If USE is used only for address computations (e.g. array indexing), which does not need to be directly vectorized, then the liveness/relevance of the respective DEF_STMT is left unchanged. - - if STMT has been identified as defining a reduction variable, then: - STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false - STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true - because even though STMT is classified as live (since it defines a - value that is used across loop iterations) and irrelevant (since it - is not used inside the loop), it will be vectorized, and therefore - the corresponding DEF_STMTs need to marked as relevant. + (case 2) + If STMT has been identified as defining a reduction variable, then + we have two cases: + (case 2.1) + The last use of STMT is the reduction-variable, which is defined + by a loop-header-phi. We don't want to mark the phi as live or + relevant (because it does not need to be vectorized, it is handled + as part of the vectorization of the reduction), so in this case we + skip the call to vect_mark_relevant. + (case 2.2) + The rest of the uses of STMT are defined in the loop body. For + the def_stmt of these uses we want to set liveness/relevance + as follows: + STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false + STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true + because even though STMT is classified as live (since it defines a + value that is used across loop iterations) and irrelevant (since it + is not used inside the loop), it will be vectorized, and therefore + the corresponding DEF_STMTs need to marked as relevant. */ + /* case 2.2: */ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) { gcc_assert (!relevant_p && live_p); @@ -2359,42 +2384,42 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) { - /* We are only interested in uses that need to be vectorized. Uses - that are used for address computation are not considered relevant. + /* case 1: we are only interested in uses that need to be vectorized. + Uses that are used for address computation are not considered + relevant. */ - if (exist_non_indexing_operands_for_use_p (use, stmt)) - { - if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt)) - { - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, - LOOP_LOC (loop_vinfo))) - fprintf (vect_dump, - "not vectorized: unsupported use in stmt."); - VEC_free (tree, heap, worklist); - return false; - } + if (!exist_non_indexing_operands_for_use_p (use, stmt)) + continue; - if (!def_stmt || IS_EMPTY_STMT (def_stmt)) - continue; + if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt)) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, + LOOP_LOC (loop_vinfo))) + fprintf (vect_dump, "not vectorized: unsupported use in stmt."); + VEC_free (tree, heap, worklist); + return false; + } - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - { - fprintf (vect_dump, "worklist: examine use %d: ", i); - print_generic_expr (vect_dump, use, TDF_SLIM); - } + if (!def_stmt || IS_EMPTY_STMT (def_stmt)) + continue; - bb = bb_for_stmt (def_stmt); - if (!flow_bb_inside_loop_p (loop, bb)) - continue; + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "worklist: examine use %d: ", i); + print_generic_expr (vect_dump, use, TDF_SLIM); + } - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - { - fprintf (vect_dump, "def_stmt: "); - print_generic_expr (vect_dump, def_stmt, TDF_SLIM); - } + bb = bb_for_stmt (def_stmt); + if (!flow_bb_inside_loop_p (loop, bb)) + continue; - vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p); - } + /* case 2.1: the reduction-use does not mark the defining-phi + as relevant. */ + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def + && TREE_CODE (def_stmt) == PHI_NODE) + continue; + + vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p); } } /* while worklist */ @@ -2445,6 +2470,15 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) continue; } + /* Skip reduction phis. */ + + if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "reduc phi. skip."); + continue; + } + /* Analyze the evolution function. */ access_fn = instantiate_parameters diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 4695e54ead4..2b4d1d774af 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -42,6 +42,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "langhooks.h" #include "tree-pass.h" #include "toplev.h" +#include "real.h" /* Utility functions for the code transformation. */ static bool vect_transform_stmt (tree, block_stmt_iterator *); @@ -52,12 +53,13 @@ static tree vect_create_data_ref_ptr static tree vect_create_index_for_vector_ref (loop_vec_info); static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); -static tree vect_get_vec_def_for_operand (tree, tree); +static tree vect_get_vec_def_for_operand (tree, tree, tree *); static tree vect_init_vector (tree, tree); static void vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); static bool vect_is_simple_cond (tree, loop_vec_info); static void update_vuses_to_preheader (tree, struct loop*); +static tree get_initial_def_for_reduction (tree, tree, tree *); /* Utility function dealing with loop peeling (not peeling itself). */ static void vect_generate_tmps_on_preheader @@ -85,10 +87,20 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) const char *prefix; tree new_vect_var; - if (var_kind == vect_simple_var) - prefix = "vect_"; - else + switch (var_kind) + { + case vect_simple_var: + prefix = "vect_"; + break; + case vect_scalar_var: + prefix = "stmp_"; + break; + case vect_pointer_var: prefix = "vect_p"; + break; + default: + gcc_unreachable (); + } if (name) new_vect_var = create_tmp_var (type, concat (prefix, name, NULL)); @@ -435,13 +447,18 @@ vect_create_destination_var (tree scalar_dest, tree vectype) { tree vec_dest; const char *new_name; + tree type; + enum vect_var_kind kind; + + kind = vectype ? vect_simple_var : vect_scalar_var; + type = vectype ? vectype : TREE_TYPE (scalar_dest); gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); new_name = get_name (scalar_dest); if (!new_name) new_name = "var_"; - vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name); + vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name); add_referenced_tmp_var (vec_dest); return vec_dest; @@ -502,7 +519,7 @@ vect_init_vector (tree stmt, tree vector_var) needs to be introduced. */ static tree -vect_get_vec_def_for_operand (tree op, tree stmt) +vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def) { tree vec_oprnd; tree vec_stmt; @@ -512,6 +529,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt) tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); int nunits = TYPE_VECTOR_SUBPARTS (vectype); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree vec_inv; tree vec_cst; tree t = NULL_TREE; @@ -542,14 +560,14 @@ vect_get_vec_def_for_operand (tree op, tree stmt) } } - /* FORNOW */ - gcc_assert (dt != vect_reduction_def); - switch (dt) { /* Case 1: operand is a constant. */ case vect_constant_def: { + if (scalar_def) + *scalar_def = op; + /* Create 'vect_cst_ = {cst,cst,...,cst}' */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits); @@ -565,6 +583,9 @@ vect_get_vec_def_for_operand (tree op, tree stmt) /* Case 2: operand is defined outside the loop - loop invariant. */ case vect_invariant_def: { + if (scalar_def) + *scalar_def = def; + /* Create 'vec_inv = {inv,inv,..,inv}' */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "Create vector_inv."); @@ -581,6 +602,9 @@ vect_get_vec_def_for_operand (tree op, tree stmt) /* Case 3: operand is defined inside the loop. */ case vect_loop_def: { + if (scalar_def) + *scalar_def = def_stmt; + /* Get the def from the vectorized stmt. */ def_stmt_info = vinfo_for_stmt (def_stmt); vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); @@ -589,7 +613,17 @@ vect_get_vec_def_for_operand (tree op, tree stmt) return vec_oprnd; } - /* Case 4: operand is defined by loop-header phi - induction. */ + /* Case 4: operand is defined by a loop header phi - reduction */ + case vect_reduction_def: + { + gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); + + /* Get the def before the loop */ + op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); + return get_initial_def_for_reduction (stmt, op, scalar_def); + } + + /* Case 5: operand is defined by loop-header phi - induction. */ case vect_induction_def: { if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) @@ -618,10 +652,8 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); } -#ifdef ENABLE_CHECKING /* Make sure bsi points to the stmt that is being vectorized. */ gcc_assert (stmt == bsi_stmt (*bsi)); -#endif #ifdef USE_MAPPED_LOCATION SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt)); @@ -631,6 +663,458 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) } +#define ADJUST_IN_EPILOG 1 + +/* Function get_initial_def_for_reduction + + Input: + STMT - a stmt that performs a reduction operation in the loop. + INIT_VAL - the initial value of the reduction variable + + Output: + SCALAR_DEF - a tree that holds a value to be added to the final result + of the reduction (used for "ADJUST_IN_EPILOG" - see below). + Return a vector variable, initialized according to the operation that STMT + performs. This vector will be used as the initial value of the + vector of partial results. + + Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows: + add: [0,0,...,0,0] + mult: [1,1,...,1,1] + min/max: [init_val,init_val,..,init_val,init_val] + bit and/or: [init_val,init_val,..,init_val,init_val] + and when necessary (e.g. add/mult case) let the caller know + that it needs to adjust the result by init_val. + + Option2: Initialize the vector as follows: + add: [0,0,...,0,init_val] + mult: [1,1,...,1,init_val] + min/max: [init_val,init_val,...,init_val] + bit and/or: [init_val,init_val,...,init_val] + and no adjustments are needed. + + For example, for the following code: + + s = init_val; + for (i=0;i<n;i++) + s = s + a[i]; + + STMT is 's = s + a[i]', and the reduction variable is 's'. + For a vector of 4 units, we want to return either [0,0,0,init_val], + or [0,0,0,0] and let the caller know that it needs to adjust + the result at the end by 'init_val'. + + FORNOW: We use the "ADJUST_IN_EPILOG" scheme. + TODO: Use some cost-model to estimate which scheme is more profitable. +*/ + +static tree +get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def) +{ + stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); + int nelements; + enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); + tree type = TREE_TYPE (init_val); + tree def; + tree vec, t = NULL_TREE; + bool need_epilog_adjust; + int i; + + gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)); + + switch (code) + { + case PLUS_EXPR: + def = INTEGRAL_TYPE_P (type) ? integer_zero_node : + build_real (type, dconst0); +#ifdef ADJUST_IN_EPILOG + /* All the 'nunits' elements are set to 0. The final result will be + adjusted by 'init_val' at the loop epilog. */ + nelements = nunits; + need_epilog_adjust = true; +#else + /* 'nunits - 1' elements are set to 0; The last element is set to + 'init_val'. No further adjustments at the epilog are needed. */ + nelements = nunits - 1; + need_epilog_adjust = false; +#endif + break; + + case MIN_EXPR: + case MAX_EXPR: + def = init_val; + nelements = nunits; + need_epilog_adjust = false; + break; + + default: + gcc_unreachable (); + } + + for (i = nelements - 1; i >= 0; --i) + { + t = tree_cons (NULL_TREE, def, t); + } + + if (nelements == nunits - 1) + { + /* Set the last element of the vector. */ + t = tree_cons (NULL_TREE, init_val, t); + nelements += 1; + } + gcc_assert (nelements == nunits); + + if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST) + vec = build_vector (vectype, t); + else + vec = build_constructor (vectype, t); + + if (need_epilog_adjust) + *scalar_def = init_val; + else + *scalar_def = INTEGRAL_TYPE_P (type) ? integer_zero_node + : build_real (type, dconst0); + return vect_init_vector (stmt, vec); +} + + +/* Function vect_create_epilog_for_reduction: + + Create code at the loop-epilog to finalize the result of a reduction + computation. + + LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it + into a single result, by applying the operation REDUC_CODE on the + partial-results-vector. For this, we need to create a new phi node at the + loop exit to preserve loop-closed form, as illustrated below. + + STMT is the original scalar reduction stmt that is being vectorized. + REDUCTION_OP is the scalar reduction-variable. + REDUCTION_PHI is the phi-node that carries the reduction computation. + This function also sets the arguments for the REDUCTION_PHI: + The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP. + The loop-latch argument is VECT_DEF - the vector of partial sums. + + This function transforms this: + + loop: + vec_def = phi <null, null> # REDUCTION_PHI + .... + VECT_DEF = ... + + loop_exit: + s_out0 = phi <s_loop> # EXIT_PHI + + use <s_out0> + use <s_out0> + + Into: + + loop: + vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI + .... + VECT_DEF = ... + + loop_exit: + s_out0 = phi <s_loop> # EXIT_PHI + v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI + + v_out2 = reduc_expr <v_out1> + s_out3 = extract_field <v_out2, 0> + + use <s_out3> + use <s_out3> +*/ + +static void +vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op, + enum tree_code reduc_code, tree reduction_phi) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block exit_bb; + tree scalar_dest = TREE_OPERAND (stmt, 0); + tree scalar_type = TREE_TYPE (scalar_dest); + tree new_phi; + block_stmt_iterator exit_bsi; + tree vec_dest; + tree new_temp; + tree epilog_stmt; + tree new_scalar_dest, exit_phi; + tree bitsize, bitpos; + enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); + tree scalar_initial_def; + tree vec_initial_def; + tree orig_name; + imm_use_iterator imm_iter; + use_operand_p use_p; + + /*** 1. Create the reduction def-use cycle ***/ + + /* 1.1 set the loop-entry arg of the reduction-phi: */ + /* For the case of reduction, vect_get_vec_def_for_operand returns + the scalar def before the loop, that defines the initial value + of the reduction variable. */ + vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, + &scalar_initial_def); + add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); + + + /* 1.2 set the loop-latch arg for the reduction-phi: */ + add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "transform reduction: created def-use cycle:"); + print_generic_expr (vect_dump, reduction_phi, TDF_SLIM); + fprintf (vect_dump, "\n"); + print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM); + } + + + /*** 2. Create epilog code ***/ + + /* 2.1 Create new loop-exit-phi to preserve loop-closed form: + v_out1 = phi <v_loop> */ + + exit_bb = loop->single_exit->dest; + new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); + SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def); + + exit_bsi = bsi_start (exit_bb); + + + /* 2.2 Create: + v_out2 = reduc_expr <v_out1> + s_out3 = extract_field <v_out2, 0> */ + + vec_dest = vect_create_destination_var (scalar_dest, vectype); + epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, + build1 (reduc_code, vectype, PHI_RESULT (new_phi))); + new_temp = make_ssa_name (vec_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "transform reduction: created epilog code:"); + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + } + + new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); + bitsize = TYPE_SIZE (scalar_type); + + /* The result is in the low order bits. */ + if (BITS_BIG_ENDIAN) + bitpos = size_binop (MULT_EXPR, + bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1), + TYPE_SIZE (scalar_type)); + else + bitpos = bitsize_zero_node; + + epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, + build3 (BIT_FIELD_REF, scalar_type, + new_temp, bitsize, bitpos)); + new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + + + /* 2.3 Adjust the final result by the initial value of the reduction + variable. (when such adjustment is not needed, then + 'scalar_initial_def' is zero). + + Create: + s_out = scalar_expr <s_out, scalar_initial_def> */ + + epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, + build2 (code, scalar_type, new_temp, scalar_initial_def)); + new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + + + /* 2.4 Replace uses of s_out0 with uses of s_out3 */ + + /* Find the loop-closed-use at the loop exit of the original + scalar result. (The reduction result is expected to have + two immediate uses - one at the latch block, and one at the + loop exit). */ + exit_phi = NULL; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) + { + if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p)))) + { + exit_phi = USE_STMT (use_p); + break; + } + } + + orig_name = PHI_RESULT (exit_phi); + + FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name) + SET_USE (use_p, new_temp); +} + + +/* Function vectorizable_reduction. + + Check if STMT performs a reduction operation that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + +bool +vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +{ + tree vec_dest; + tree scalar_dest; + tree op0, op1; + tree loop_vec_def; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree operation; + enum tree_code code, reduc_code = 0; + enum machine_mode vec_mode; + int op_type; + optab optab, reduc_optab; + tree new_temp; + tree def0, def1, def_stmt0, def_stmt1; + enum vect_def_type dt0, dt1; + tree new_phi; + tree scalar_type; + bool is_simple_use0; + bool is_simple_use1; + + /* Is vectorizable reduction? */ + + /* Not supportable if the reduction variable is used in the loop. */ + if (STMT_VINFO_RELEVANT_P (stmt_info)) + return false; + + if (!STMT_VINFO_LIVE_P (stmt_info)) + return false; + + /* Make sure it was already recognized as a reduction pattern. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) + return false; + + gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); + + operation = TREE_OPERAND (stmt, 1); + code = TREE_CODE (operation); + op_type = TREE_CODE_LENGTH (code); + + if (op_type != binary_op) + return false; + + op0 = TREE_OPERAND (operation, 0); + op1 = TREE_OPERAND (operation, 1); + scalar_dest = TREE_OPERAND (stmt, 0); + scalar_type = TREE_TYPE (scalar_dest); + + /* Check the first operand. It is expected to be defined inside the loop. */ + is_simple_use0 = + vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0); + is_simple_use1 = + vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1); + + gcc_assert (is_simple_use0); + gcc_assert (is_simple_use1); + gcc_assert (dt0 == vect_loop_def); + gcc_assert (dt1 == vect_reduction_def); + gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE); + gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1)); + + if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1))) + return false; + + /* Supportable by target? */ + + /* check support for the operation in the loop */ + optab = optab_for_tree_code (code, vectype); + if (!optab) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "no optab."); + return false; + } + vec_mode = TYPE_MODE (vectype); + if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "op not supported by target."); + return false; + } + + /* check support for the epilog operation */ + if (!reduction_code_for_scalar_code (code, &reduc_code)) + return false; + reduc_optab = optab_for_tree_code (reduc_code, vectype); + if (!reduc_optab) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "no optab for reduction."); + return false; + } + if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "op not supported by target."); + return false; + } + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; + return true; + } + + /** Transform. **/ + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "transform reduction."); + + /* Create the destination vector */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + + + /* Create the reduction-phi that defines the reduction-operand. */ + new_phi = create_phi_node (vec_dest, loop->header); + + + /* Prepare the operand that is defined inside the loop body */ + loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL); + gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def)))); + + + /* Create the vectorized operation that computes the partial results */ + *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, + build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi))); + new_temp = make_ssa_name (vec_dest, *vec_stmt); + TREE_OPERAND (*vec_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + + + /* Finalize the reduction-phi (set it's arguments) and create the + epilog reduction code. */ + vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi); + return true; +} + + /* Function vectorizable_assignment. Check if STMT performs an assignment (copy) that can be vectorized. @@ -688,7 +1172,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Handle use. */ op = TREE_OPERAND (stmt, 1); - vec_oprnd = vect_get_vec_def_for_operand (op, stmt); + vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL); /* Arguments are ready. create the new vector stmt. */ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd); @@ -846,12 +1330,12 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Handle uses. */ op0 = TREE_OPERAND (operation, 0); - vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt); + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); - vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt); + vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); } /* Arguments are ready. create the new vector stmt. */ @@ -940,7 +1424,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */ /* Handle use - get the vectorized def from the defining stmt. */ - vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); + vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL); /* Handle def. */ /* FORNOW: make sure the data reference is aligned. */ @@ -1387,11 +1871,11 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Handle cond expr. */ vec_cond_lhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt); + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL); vec_cond_rhs = - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt); - vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt); - vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt); + vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL); + vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL); + vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL); /* Arguments are ready. create the new vector stmt. */ vec_compare = build2 (TREE_CODE (cond_expr), vectype, @@ -1460,8 +1944,23 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi) if (STMT_VINFO_LIVE_P (stmt_info)) { - done = vectorizable_live_operation (stmt, bsi, &vec_stmt); - gcc_assert (done); + switch (STMT_VINFO_TYPE (stmt_info)) + { + case reduc_vec_info_type: + done = vectorizable_reduction (stmt, bsi, &vec_stmt); + gcc_assert (done); + break; + + default: + done = vectorizable_live_operation (stmt, bsi, &vec_stmt); + gcc_assert (done); + } + + if (vec_stmt) + { + gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info)); + STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; + } } return is_store; @@ -1717,6 +2216,14 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, continue; } + /* Skip reduction phis. */ + if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "reduc phi. skip."); + continue; + } + access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); gcc_assert (access_fn); evolution_part = @@ -1770,9 +2277,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, struct loop *new_loop; edge update_e; basic_block preheader; -#ifdef ENABLE_CHECKING int loop_num; -#endif if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); @@ -1787,14 +2292,12 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf_name, ratio); -#ifdef ENABLE_CHECKING loop_num = loop->num; -#endif new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit, ratio_mult_vf_name, ni_name, false); -#ifdef ENABLE_CHECKING gcc_assert (new_loop); gcc_assert (loop_num == loop->num); +#ifdef ENABLE_CHECKING slpeel_verify_cfg_after_peeling (loop, new_loop); #endif @@ -2010,8 +2513,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), niters_of_prolog_loop, ni_name, true); -#ifdef ENABLE_CHECKING gcc_assert (new_loop); +#ifdef ENABLE_CHECKING slpeel_verify_cfg_after_peeling (new_loop, loop); #endif @@ -2051,7 +2554,6 @@ vect_transform_loop (loop_vec_info loop_vinfo, if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "=== vec_transform_loop ==="); - /* Peel the loop if there are data refs with unknown alignment. Only one data ref with unknown store is allowed. */ @@ -2104,18 +2606,18 @@ vect_transform_loop (loop_vec_info loop_vinfo, } stmt_info = vinfo_for_stmt (stmt); gcc_assert (stmt_info); - if (!STMT_VINFO_RELEVANT_P (stmt_info)) + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) { bsi_next (&si); continue; } -#ifdef ENABLE_CHECKING /* FORNOW: Verify that all stmts operate on the same number of units and no inner unrolling is necessary. */ gcc_assert (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) == vectorization_factor); -#endif + /* -------- vectorize statement ------------ */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "transform statement."); diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 1f612c5444c..1967e537125 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -575,9 +575,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, if (!current_new_name) continue; } -#ifdef ENABLE_CHECKING gcc_assert (get_current_def (current_new_name) == NULL_TREE); -#endif set_current_def (current_new_name, PHI_RESULT (new_phi)); bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name)); @@ -761,9 +759,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) LOC loop_loc; orig_cond = get_loop_exit_condition (loop); -#ifdef ENABLE_CHECKING gcc_assert (orig_cond); -#endif loop_cond_bsi = bsi_for_stmt (orig_cond); standard_iv_increment_position (loop, &incr_bsi, &insert_after); @@ -1354,6 +1350,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) STMT_VINFO_VECT_STEP (res) = NULL_TREE; STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false; STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE; + STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5); return res; } @@ -1744,9 +1741,44 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt, } +/* Function reduction_code_for_scalar_code + + Input: + CODE - tree_code of a reduction operations. + + Output: + REDUC_CODE - the correponding tree-code to be used to reduce the + vector of partial results into a single scalar result (which + will also reside in a vector). + + Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */ + +bool +reduction_code_for_scalar_code (enum tree_code code, + enum tree_code *reduc_code) +{ + switch (code) + { + case MAX_EXPR: + *reduc_code = REDUC_MAX_EXPR; + return true; + + case MIN_EXPR: + *reduc_code = REDUC_MIN_EXPR; + return true; + + case PLUS_EXPR: + *reduc_code = REDUC_PLUS_EXPR; + return true; + + default: + return false; + } +} + + /* Function vect_is_simple_reduction - TODO: Detect a cross-iteration def-use cucle that represents a simple reduction computation. We look for the following pattern: @@ -1756,18 +1788,189 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt, a2 = operation (a3, a1) such that: - 1. operation is... - 2. no uses for a2 in the loop (elsewhere) */ + 1. operation is commutative and associative and it is safe to + change the the order of the computation. + 2. no uses for a2 in the loop (a2 is used out of the loop) + 3. no uses of a1 in the loop besides the reduction operation. + + Condition 1 is tested here. + Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */ tree vect_is_simple_reduction (struct loop *loop ATTRIBUTE_UNUSED, tree phi ATTRIBUTE_UNUSED) { - /* FORNOW */ - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - fprintf (vect_dump, "reduction: unknown pattern."); + edge latch_e = loop_latch_edge (loop); + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); + tree def_stmt, def1, def2; + enum tree_code code; + int op_type; + tree operation, op1, op2; + tree type; + + if (TREE_CODE (loop_arg) != SSA_NAME) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: not ssa_name: "); + print_generic_expr (vect_dump, loop_arg, TDF_SLIM); + } + return NULL_TREE; + } - return NULL_TREE; + def_stmt = SSA_NAME_DEF_STMT (loop_arg); + if (!def_stmt) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "reduction: no def_stmt."); + return NULL_TREE; + } + + if (TREE_CODE (def_stmt) != MODIFY_EXPR) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + print_generic_expr (vect_dump, def_stmt, TDF_SLIM); + } + return NULL_TREE; + } + + operation = TREE_OPERAND (def_stmt, 1); + code = TREE_CODE (operation); + if (!commutative_tree_code (code) || !associative_tree_code (code)) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: not commutative/associative: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + op_type = TREE_CODE_LENGTH (code); + if (op_type != binary_op) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: not binary operation: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + op1 = TREE_OPERAND (operation, 0); + op2 = TREE_OPERAND (operation, 1); + if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: uses not ssa_names: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + /* Check that it's ok to change the order of the computation */ + type = TREE_TYPE (operation); + if (type != TREE_TYPE (op1) || type != TREE_TYPE (op2)) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: multiple types: operation type: "); + print_generic_expr (vect_dump, type, TDF_SLIM); + fprintf (vect_dump, ", operands types: "); + print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM); + fprintf (vect_dump, ","); + print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM); + } + return NULL_TREE; + } + + /* CHECKME: check for !flag_finite_math_only too? */ + if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations) + { + /* Changing the order of operations changes the sematics. */ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: unsafe fp math optimization: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv) + { + /* Changing the order of operations changes the sematics. */ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: unsafe int math optimization: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + /* reduction is safe. we're dealing with one of the following: + 1) integer arithmetic and no trapv + 2) floating point arithmetic, and special flags permit this optimization. + */ + def1 = SSA_NAME_DEF_STMT (op1); + def2 = SSA_NAME_DEF_STMT (op2); + if (!def1 || !def2) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: no defs for operands: "); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } + + if (TREE_CODE (def1) == MODIFY_EXPR + && flow_bb_inside_loop_p (loop, bb_for_stmt (def1)) + && def2 == phi) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "detected reduction:"); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return def_stmt; + } + else if (TREE_CODE (def2) == MODIFY_EXPR + && flow_bb_inside_loop_p (loop, bb_for_stmt (def2)) + && def1 == phi) + { + use_operand_p use; + ssa_op_iter iter; + + /* Swap operands (just for simplicity - so that the rest of the code + can assume that the reduction variable is always the last (second) + argument). */ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "detected reduction: need to swap operands:"); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + + /* CHECKME */ + FOR_EACH_SSA_USE_OPERAND (use, def_stmt, iter, SSA_OP_USE) + { + tree tuse = USE_FROM_PTR (use); + if (tuse == op1) + SET_USE (use, op2); + else if (tuse == op2) + SET_USE (use, op1); + } + return def_stmt; + } + else + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "reduction: unknown pattern."); + print_generic_expr (vect_dump, operation, TDF_SLIM); + } + return NULL_TREE; + } } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 332025b03e6..3c2712dbbad 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -39,7 +39,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA /* Used for naming of new temporaries. */ enum vect_var_kind { vect_simple_var, - vect_pointer_var + vect_pointer_var, + vect_scalar_var }; /* Defines type of operation: unary or binary. */ @@ -155,7 +156,8 @@ enum stmt_vec_info_type { store_vec_info_type, op_vec_info_type, assignment_vec_info_type, - condition_vec_info_type + condition_vec_info_type, + reduc_vec_info_type }; typedef struct data_reference *dr_p; @@ -345,6 +347,8 @@ extern tree vect_is_simple_reduction (struct loop *, tree); extern bool vect_can_force_dr_alignment_p (tree, unsigned int); extern enum dr_alignment_support vect_supportable_dr_alignment (struct data_reference *); +extern bool reduction_code_for_scalar_code (enum tree_code, enum tree_code *); + /* Creation and deletion of loop and stmt info structs. */ extern loop_vec_info new_loop_vec_info (struct loop *loop); extern void destroy_loop_vec_info (loop_vec_info); @@ -363,6 +367,7 @@ extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *); extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *); extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *); +extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *); /* Driver for transformation stage. */ extern void vect_transform_loop (loop_vec_info, struct loops *); diff --git a/gcc/tree.def b/gcc/tree.def index b8d5a36daf7..2b8c2806f4a 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -947,6 +947,16 @@ DEFTREECODE (REALIGN_LOAD_EXPR, "realign_load", tcc_expression, 3) DEFTREECODE (TARGET_MEM_REF, "target_mem_ref", tcc_reference, 7) +/* Reduction operations. + Operations that take a vector of elements and "reduce" it to a scalar + result (e.g. summing the elements of the vector, finding the minimum over + the vector elements, etc). + Operand 0 is a vector; the first element in the vector has the result. + Operand 1 is a vector. */ +DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1) +DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1) +DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1) + /* Local variables: mode:c |