diff options
author | irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-07-12 07:09:07 +0000 |
---|---|---|
committer | irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-07-12 07:09:07 +0000 |
commit | 7aa0d35066596b3c0fefb385bbe1ee43f75f8ec1 (patch) | |
tree | 7547f479237ee79f26fd81c529e65dfa6d42573b /gcc/tree-vect-loop.c | |
parent | 24fb1d3ff6a2ce44db82978089367a2d1e77965e (diff) | |
download | gcc-7aa0d35066596b3c0fefb385bbe1ee43f75f8ec1.tar.gz |
* tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction
with additional argument.
* tree-vectorizer.h (enum vect_def_type): Add
vect_double_reduction_def.
(vect_is_simple_reduction): Add argument.
* tree-vect-loop.c (vect_determine_vectorization_factor): Fix
indentation.
(vect_analyze_scalar_cycles_1): Detect double reduction. Call
vect_is_simple_reduction with additional argument.
(vect_analyze_loop_operations): Handle exit phi nodes in case of
double reduction.
(reduction_code_for_scalar_code): Handle additional codes by
returning ERROR_MARK for them. Fix comment and indentation.
(vect_is_simple_reduction): Fix comment, add argument to specify
double reduction. Detect double reduction.
(get_initial_def_for_induction): Fix indentation.
(get_initial_def_for_reduction): Fix comment and indentation.
Handle double reduction. Create initial definitions that do not
require adjustment if ADJUSTMENT_DEF is NULL. Handle additional cases.
(vect_create_epilog_for_reduction): Fix comment, add argument to
handle double reduction. Use PLUS_EXPR in case of MINUS_EXPR in
epilogue result extraction. Create double reduction phi node and
replace relevant uses.
(vectorizable_reduction): Call vect_is_simple_reduction with
additional argument. Fix indentation. Update epilogue code treatment
according to the changes in reduction_code_for_scalar_code. Check
for double reduction. Call vect_create_epilog_for_reduction with
additional argument.
* tree-vect-stmts.c (process_use): Handle double reduction, update
documentation.
(vect_mark_stmts_to_be_vectorized): Handle double reduction.
(vect_get_vec_def_for_operand): Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@149526 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 493 |
1 files changed, 381 insertions, 112 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a37e3c00f72..c96fb04a814 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -291,8 +291,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) } else { - - gcc_assert (! STMT_VINFO_DATA_REF (stmt_info) + gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) && !is_pattern_stmt_p (stmt_info)); scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, @@ -410,6 +409,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) tree dumy; VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64); gimple_stmt_iterator gsi; + bool double_reduc; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_scalar_cycles ==="); @@ -477,26 +477,39 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type); nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo)); - reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle); + reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle, + &double_reduc); if (reduc_stmt) { - if (nested_cycle) + if (double_reduc) { if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "Detected vectorizable nested cycle."); + fprintf (vect_dump, "Detected double reduction."); - STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle; + STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def; STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = - vect_nested_cycle; + vect_double_reduction_def; } - else + else { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "Detected reduction."); + if (nested_cycle) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Detected vectorizable nested cycle."); - STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; - STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = - vect_reduction_def; + STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle; + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = + vect_nested_cycle; + } + else + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Detected reduction."); + + STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = + vect_reduction_def; + } } } else @@ -1111,10 +1124,13 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) /* inner-loop loop-closed exit phi in outer-loop vectorization (i.e. a phi in the tail of the outer-loop). FORNOW: we currently don't support the case that these phis - are not used in the outerloop, cause this case requires - to actually do something here. */ - if (!STMT_VINFO_RELEVANT_P (stmt_info) - || STMT_VINFO_LIVE_P (stmt_info)) + are not used in the outerloop (unless it is double reduction, + i.e., this phi is vect_reduction_def), cause this case + requires to actually do something here. */ + if ((!STMT_VINFO_RELEVANT_P (stmt_info) + || STMT_VINFO_LIVE_P (stmt_info)) + && STMT_VINFO_DEF_TYPE (stmt_info) + != vect_double_reduction_def) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, @@ -1466,31 +1482,40 @@ vect_analyze_loop (struct loop *loop) Output: REDUC_CODE - the corresponding tree-code to be used to reduce the vector of partial results into a single scalar result (which - will also reside in a vector). + will also reside in a vector) or ERROR_MARK if the operation is + a supported reduction operation, but does not have such tree-code. - Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */ + Return FALSE if CODE currently cannot be vectorized as reduction. */ static bool reduction_code_for_scalar_code (enum tree_code code, enum tree_code *reduc_code) { switch (code) - { - case MAX_EXPR: - *reduc_code = REDUC_MAX_EXPR; - return true; - - case MIN_EXPR: - *reduc_code = REDUC_MIN_EXPR; - return true; - - case PLUS_EXPR: - *reduc_code = REDUC_PLUS_EXPR; - return true; - - default: - return false; - } + { + case MAX_EXPR: + *reduc_code = REDUC_MAX_EXPR; + return true; + + case MIN_EXPR: + *reduc_code = REDUC_MIN_EXPR; + return true; + + case PLUS_EXPR: + *reduc_code = REDUC_PLUS_EXPR; + return true; + + case MULT_EXPR: + case MINUS_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case BIT_AND_EXPR: + *reduc_code = ERROR_MARK; + return true; + + default: + return false; + } } @@ -1507,7 +1532,7 @@ report_vect_op (gimple stmt, const char *msg) /* Function vect_is_simple_reduction - Detect a cross-iteration def-use cycle that represents a simple + (1) Detect a cross-iteration def-use cycle that represents a simple reduction computation. We look for the following pattern: loop_header: @@ -1524,12 +1549,20 @@ report_vect_op (gimple stmt, const char *msg) Condition 1 is tested here. Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. - Also detect a cross-iteration def-use cycle in nested loops, i.e., nested - cycles, if CHECK_REDUCTION is false. */ + (2) Detect a cross-iteration def-use cycle in nested loops, i.e., + nested cycles, if CHECK_REDUCTION is false. + + (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double + reductions: + + a1 = phi < a0, a2 > + inner loop (def of a3) + a2 = phi < a3 > +*/ gimple vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, - bool check_reduction) + bool check_reduction, bool *double_reduc) { struct loop *loop = (gimple_bb (phi))->loop_father; struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); @@ -1543,6 +1576,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, tree name; imm_use_iterator imm_iter; use_operand_p use_p; + bool phi_def; + + *double_reduc = false; /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization, otherwise, we assume outer loop vectorization. */ @@ -1584,14 +1620,24 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, return NULL; } - if (!is_gimple_assign (def_stmt)) + if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI) { if (vect_print_dump_info (REPORT_DETAILS)) print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM); return NULL; } - name = gimple_assign_lhs (def_stmt); + if (is_gimple_assign (def_stmt)) + { + name = gimple_assign_lhs (def_stmt); + phi_def = false; + } + else + { + name = PHI_RESULT (def_stmt); + phi_def = true; + } + nloop_uses = 0; FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) { @@ -1608,6 +1654,37 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, } } + /* If DEF_STMT is a phi node itself, we expect it to have a single argument + defined in the inner loop. */ + if (phi_def) + { + op1 = PHI_ARG_DEF (def_stmt, 0); + + if (gimple_phi_num_args (def_stmt) != 1 + || TREE_CODE (op1) != SSA_NAME) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported phi node definition."); + + return NULL; + } + + def1 = SSA_NAME_DEF_STMT (op1); + if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) + && loop->inner + && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) + && is_gimple_assign (def1)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + report_vect_op (def_stmt, "detected double reduction: "); + + *double_reduc = true; + return def_stmt; + } + + return NULL; + } + code = gimple_assign_rhs_code (def_stmt); if (check_reduction @@ -1697,7 +1774,6 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, return NULL; } - /* Check that one def is the reduction def, defined by PHI, the other def is either defined in the loop ("vect_internal_def"), or it's an induction (defined by a loop-header phi-node). */ @@ -2296,7 +2372,7 @@ get_initial_def_for_induction (gimple iv_phi) access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi)); gcc_assert (access_fn); ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn, - &init_expr, &step_expr); + &init_expr, &step_expr); gcc_assert (ok); pe = loop_preheader_edge (iv_loop); @@ -2306,7 +2382,8 @@ get_initial_def_for_induction (gimple iv_phi) /* iv_loop is nested in the loop to be vectorized. init_expr had already been created during vectorization of previous stmts; We obtain it from the STMT_VINFO_VEC_STMT of the defining stmt. */ - tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop)); + tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, + loop_preheader_edge (iv_loop)); vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL); } else @@ -2507,18 +2584,16 @@ get_initial_def_for_induction (gimple iv_phi) vector of partial results. Option1 (adjust in epilog): Initialize the vector as follows: - add: [0,0,...,0,0] - mult: [1,1,...,1,1] - min/max: [init_val,init_val,..,init_val,init_val] - bit and/or: [init_val,init_val,..,init_val,init_val] + add/bit or/xor: [0,0,...,0,0] + mult/bit and: [1,1,...,1,1] + min/max: [init_val,init_val,..,init_val,init_val] and when necessary (e.g. add/mult case) let the caller know that it needs to adjust the result by init_val. Option2: Initialize the vector as follows: - add: [0,0,...,0,init_val] - mult: [1,1,...,1,init_val] - min/max: [init_val,init_val,...,init_val] - bit and/or: [init_val,init_val,...,init_val] + add/bit or/xor: [init_val,0,0,...,0] + mult/bit and: [init_val,1,1,...,1] + min/max: [init_val,init_val,...,init_val] and no adjustments are needed. For example, for the following code: @@ -2533,11 +2608,14 @@ get_initial_def_for_induction (gimple iv_phi) the result at the end by 'init_val'. FORNOW, we are using the 'adjust in epilog' scheme, because this way the - initialization vector is simpler (same element in all entries). + initialization vector is simpler (same element in all entries), if + ADJUSTMENT_DEF is not NULL, and Option2 otherwise. + A cost model should help decide between these two schemes. */ tree -get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) +get_initial_def_for_reduction (gimple stmt, tree init_val, + tree *adjustment_def) { stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); @@ -2551,47 +2629,118 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) tree t = NULL_TREE; int i; bool nested_in_vect_loop = false; + tree init_value; + REAL_VALUE_TYPE real_init_val = dconst0; + int int_init_val = 0; gcc_assert (vectype); nunits = TYPE_VECTOR_SUBPARTS (vectype); gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type) || SCALAR_FLOAT_TYPE_P (scalar_type)); + if (nested_in_vect_loop_p (loop, stmt)) nested_in_vect_loop = true; else gcc_assert (loop == (gimple_bb (stmt))->loop_father); - switch (code) - { - case WIDEN_SUM_EXPR: - case DOT_PROD_EXPR: - case PLUS_EXPR: - case MINUS_EXPR: - if (nested_in_vect_loop) - *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt, NULL); - else - *adjustment_def = init_val; - /* Create a vector of zeros for init_def. */ - if (SCALAR_FLOAT_TYPE_P (scalar_type)) - def_for_init = build_real (scalar_type, dconst0); - else - def_for_init = build_int_cst (scalar_type, 0); - - for (i = nunits - 1; i >= 0; --i) - t = tree_cons (NULL_TREE, def_for_init, t); - init_def = build_vector (vectype, t); - break; + /* In case of double reduction we only create a vector variable to be put + in the reduction phi node. The actual statement creation is done in + vect_create_epilog_for_reduction. */ + if (TREE_CODE (init_val) == SSA_NAME + && vinfo_for_stmt (SSA_NAME_DEF_STMT (init_val)) + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SSA_NAME_DEF_STMT (init_val))) + == vect_double_reduction_def) + { + *adjustment_def = NULL; + return vect_create_destination_var (init_val, vectype); + } - case MIN_EXPR: - case MAX_EXPR: - *adjustment_def = NULL_TREE; - init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL); - break; + if (TREE_CONSTANT (init_val)) + { + if (SCALAR_FLOAT_TYPE_P (scalar_type)) + init_value = build_real (scalar_type, TREE_REAL_CST (init_val)); + else + init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val)); + } + else + init_value = init_val; - default: - gcc_unreachable (); - } + switch (code) + { + case WIDEN_SUM_EXPR: + case DOT_PROD_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case MULT_EXPR: + case BIT_AND_EXPR: + /* ADJUSMENT_DEF is NULL when called from + vect_create_epilog_for_reduction to vectorize double reduction. */ + if (adjustment_def) + { + if (nested_in_vect_loop) + *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt, + NULL); + else + *adjustment_def = init_val; + } + + if (code == MULT_EXPR || code == BIT_AND_EXPR) + { + real_init_val = dconst1; + int_init_val = 1; + } + + if (SCALAR_FLOAT_TYPE_P (scalar_type)) + def_for_init = build_real (scalar_type, real_init_val); + else + def_for_init = build_int_cst (scalar_type, int_init_val); + + /* Create a vector of '0' or '1' except the first element. */ + for (i = nunits - 2; i >= 0; --i) + t = tree_cons (NULL_TREE, def_for_init, t); + + /* Option1: the first element is '0' or '1' as well. */ + if (adjustment_def) + { + t = tree_cons (NULL_TREE, def_for_init, t); + init_def = build_vector (vectype, t); + break; + } + + /* Option2: the first element is INIT_VAL. */ + t = tree_cons (NULL_TREE, init_value, t); + if (TREE_CONSTANT (init_val)) + init_def = build_vector (vectype, t); + else + init_def = build_constructor_from_list (vectype, t); + + break; + + case MIN_EXPR: + case MAX_EXPR: + if (adjustment_def) + { + *adjustment_def = NULL_TREE; + init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL); + break; + } + + for (i = nunits - 1; i >= 0; --i) + t = tree_cons (NULL_TREE, init_value, t); + + if (TREE_CONSTANT (init_val)) + init_def = build_vector (vectype, t); + else + init_def = build_constructor_from_list (vectype, t); + + break; + + default: + gcc_unreachable (); + } return init_def; } @@ -2613,6 +2762,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) REDUCTION_PHI is the phi-node that carries the reduction computation. REDUC_INDEX is the index of the operand in the right hand side of the statement that is defined by REDUCTION_PHI. + DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled. This function: 1. Creates the reduction def-use cycle: sets the arguments for @@ -2657,14 +2807,15 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, int ncopies, enum tree_code reduc_code, gimple reduction_phi, - int reduc_index) + int reduc_index, + bool double_reduc) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info prev_phi_info; tree vectype; enum machine_mode mode; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL; basic_block exit_bb; tree scalar_dest; tree scalar_type; @@ -2694,6 +2845,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, if (nested_in_vect_loop_p (loop, stmt)) { + outer_loop = loop; loop = loop->inner; nested_in_vect_loop = true; } @@ -2726,7 +2878,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, the scalar def before the loop, that defines the initial value of the reduction variable. */ vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, - &adjustment_def); + &adjustment_def); phi = reduction_phi; def = vect_def; @@ -2744,8 +2896,8 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, { fprintf (vect_dump, "transform reduction: created def-use cycle: "); print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); - fprintf (vect_dump, "\n"); - print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM); + fprintf (vect_dump, "\n"); + print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM); } phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); @@ -2831,15 +2983,25 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, bitsize = TYPE_SIZE (scalar_type); bytesize = TYPE_SIZE_UNIT (scalar_type); + /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore, + partial results are added and not subtracted. */ + if (code == MINUS_EXPR) + code = PLUS_EXPR; /* In case this is a reduction in an inner-loop while vectorizing an outer loop - we don't need to extract a single scalar result at the end of the - inner-loop. The final vector of partial results will be used in the - vectorized outer-loop, or reduced to a scalar result at the end of the - outer-loop. */ - if (nested_in_vect_loop) + inner-loop (unless it is double reduction, i.e., the use of reduction is + outside the outer-loop). The final vector of partial results will be used + in the vectorized outer-loop, or reduced to a scalar result at the end of + the outer-loop. */ + if (nested_in_vect_loop && !double_reduc) goto vect_finalize_reduction; + /* The epilogue is created for the outer-loop, i.e., for the loop being + vectorized. */ + if (double_reduc) + loop = outer_loop; + /* FORNOW */ gcc_assert (ncopies == 1); @@ -2914,6 +3076,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, bit_offset /= 2) { tree bitpos = size_int (bit_offset); + epilog_stmt = gimple_build_assign_with_ops (shift_code, vec_dest, new_temp, bitpos); new_name = make_ssa_name (vec_dest, epilog_stmt); @@ -2987,7 +3150,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, { tree rhs; - gcc_assert (!nested_in_vect_loop); + gcc_assert (!nested_in_vect_loop || double_reduc); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "extract scalar result"); @@ -3007,6 +3170,9 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, vect_finalize_reduction: + if (double_reduc) + loop = loop->inner; + /* 2.5 Adjust the final result by the initial value of the reduction variable. (When such adjustment is not needed, then 'adjustment_def' is zero). For example, if code is PLUS we create: @@ -3016,11 +3182,6 @@ vect_finalize_reduction: { if (nested_in_vect_loop) { - /* For MINUS_EXPR we create new_temp = loop_exit_def + adjustment_def - since the initial value is [0,0,...,0]. */ - if (code == MINUS_EXPR) - code = PLUS_EXPR; - gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE); expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def); new_dest = vect_create_destination_var (scalar_dest, vectype); @@ -3055,6 +3216,7 @@ vect_finalize_reduction: VEC_quick_push (gimple, phis, exit_phi); } } + /* We expect to have found an exit_phi because of loop-closed-ssa form. */ gcc_assert (!VEC_empty (gimple, phis)); @@ -3063,12 +3225,13 @@ vect_finalize_reduction: if (nested_in_vect_loop) { stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); + gimple vect_phi; /* FORNOW. Currently not supporting the case that an inner-loop reduction is not used in the outer-loop (but only outside the - outer-loop). */ - gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) - && !STMT_VINFO_LIVE_P (stmt_vinfo)); + outer-loop), unless it is double reduction. */ + gcc_assert ((STMT_VINFO_RELEVANT_P (stmt_vinfo) + && !STMT_VINFO_LIVE_P (stmt_vinfo)) || double_reduc); epilog_stmt = adjustment_def ? epilog_stmt : new_phi; STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt; @@ -3078,7 +3241,88 @@ vect_finalize_reduction: if (adjustment_def) STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi)); - continue; + + if (!double_reduc + || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_double_reduction_def) + continue; + + /* Handle double reduction: + + stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop) + stmt2: s3 = phi <s1, s4> - (regular) reduction phi (inner loop) + stmt3: s4 = use (s3) - (regular) reduction stmt (inner loop) + stmt4: s2 = phi <s4> - double reduction stmt (outer loop) + + At that point the regular reduction (stmt2 and stmt3) is already + vectorized, as well as the exit phi node, stmt4. + Here we vectorize the phi node of double reduction, stmt1, and + update all relevant statements. */ + + /* Go through all the uses of s2 to find double reduction phi node, + i.e., stmt1 above. */ + orig_name = PHI_RESULT (exit_phi); + FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) + { + stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt); + stmt_vec_info new_phi_vinfo; + tree vect_phi_init, preheader_arg, vect_phi_res, init_def; + basic_block bb = gimple_bb (use_stmt); + gimple use; + + /* Check that USE_STMT is really double reduction phi node. */ + if (gimple_code (use_stmt) != GIMPLE_PHI + || gimple_phi_num_args (use_stmt) != 2 + || !use_stmt_vinfo + || STMT_VINFO_DEF_TYPE (use_stmt_vinfo) + != vect_double_reduction_def + || bb->loop_father != outer_loop) + continue; + + /* Create vector phi node for double reduction: + vs1 = phi <vs0, vs2> + vs1 was created previously in this function by a call to + vect_get_vec_def_for_operand and is stored in vec_initial_def; + vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI; + vs0 is created here. */ + + /* Create vector phi node. */ + vect_phi = create_phi_node (vec_initial_def, bb); + new_phi_vinfo = new_stmt_vec_info (vect_phi, + loop_vec_info_for_loop (outer_loop), NULL); + set_vinfo_for_stmt (vect_phi, new_phi_vinfo); + + /* Create vs0 - initial def of the double reduction phi. */ + preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt, + loop_preheader_edge (outer_loop)); + init_def = get_initial_def_for_reduction (stmt, preheader_arg, + NULL); + vect_phi_init = vect_init_vector (use_stmt, init_def, vectype, + NULL); + + /* Update phi node arguments with vs0 and vs2. */ + add_phi_arg (vect_phi, vect_phi_init, + loop_preheader_edge (outer_loop)); + add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt), + loop_latch_edge (outer_loop)); + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "created double reduction phi node: "); + print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM); + } + + vect_phi_res = PHI_RESULT (vect_phi); + + /* Replace the use, i.e., set the correct vs1 in the regular + reduction phi node. FORNOW, NCOPIES is always 1, so the loop + is redundant. */ + use = reduction_phi; + for (j = 0; j < ncopies; j++) + { + edge pr_edge = loop_preheader_edge (loop); + SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res); + use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use)); + } + } } /* Replace the uses: */ @@ -3087,6 +3331,7 @@ vect_finalize_reduction: FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) SET_USE (use_p, new_temp); } + VEC_free (gimple, heap, phis); } @@ -3171,6 +3416,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, gimple reduc_def_stmt = NULL; /* The default is that the reduction variable is the last in statement. */ int reduc_index = 2; + bool double_reduc = false, dummy; + basic_block def_bb; + struct loop * def_stmt_loop; + tree def_arg; if (nested_in_vect_loop_p (loop, stmt)) { @@ -3185,7 +3434,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, return false; /* 1. Is vectorizable reduction? */ - /* Not supportable if the reduction variable is used in the loop. */ if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer) return false; @@ -3300,10 +3548,11 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, if (orig_stmt) gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt, - !nested_cycle)); + !nested_cycle, + &dummy)); else gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt, - !nested_cycle)); + !nested_cycle, &dummy)); if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt))) return false; @@ -3400,25 +3649,43 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, orig_code = code; } - if (nested_cycle) - epilog_reduc_code = orig_code; - else - if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) - return false; + if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) + return false; - reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, optab_default); + reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, + optab_default); if (!reduc_optab) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "no optab for reduction."); epilog_reduc_code = ERROR_MARK; } - if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing) + + if (reduc_optab + && optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "reduc op not supported by target."); epilog_reduc_code = ERROR_MARK; } + + def_bb = gimple_bb (reduc_def_stmt); + def_stmt_loop = def_bb->loop_father; + def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, + loop_preheader_edge (def_stmt_loop)); + if (TREE_CODE (def_arg) == SSA_NAME + && vinfo_for_stmt (SSA_NAME_DEF_STMT (def_arg)) + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SSA_NAME_DEF_STMT (def_arg))) + == vect_double_reduction_def) + double_reduc = true; + + if (double_reduc && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in double reduction"); + + return false; + } if (!vec_stmt) /* transformation not required. */ { @@ -3560,8 +3827,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, epilog reduction code. */ if (!single_defuse_cycle) new_temp = gimple_assign_lhs (*vec_stmt); + vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies, - epilog_reduc_code, first_phi, reduc_index); + epilog_reduc_code, first_phi, reduc_index, + double_reduc); return true; } |