diff options
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r-- | gcc/tree-vect-transform.c | 420 |
1 files changed, 295 insertions, 125 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index e466594f0cf..eb370775cba 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -59,8 +59,8 @@ static tree vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *); static void vect_finish_stmt_generation (gimple stmt, gimple vec_stmt, gimple_stmt_iterator *); static bool vect_is_simple_cond (tree, loop_vec_info); -static void vect_create_epilog_for_reduction (tree, gimple, enum tree_code, - gimple); +static void vect_create_epilog_for_reduction + (tree, gimple, int, enum tree_code, gimple); static tree get_initial_def_for_reduction (gimple, tree, tree *); /* Utility function dealing with loop peeling (not peeling itself). */ @@ -580,6 +580,10 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, int i; int inside_cost = 0, outside_cost = 0; + /* The SLP costs were already calculated during SLP tree build. */ + if (PURE_SLP_STMT (stmt_info)) + return; + inside_cost = ncopies * TARG_VEC_STMT_COST; /* FORNOW: Assuming maximum 2 args per stmts. */ @@ -629,11 +633,15 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, int group_size; int inside_cost = 0, outside_cost = 0; + /* The SLP costs were already calculated during SLP tree build. */ + if (PURE_SLP_STMT (stmt_info)) + return; + if (dt == vect_constant_def || dt == vect_invariant_def) outside_cost = TARG_SCALAR_TO_VEC_COST; /* Strided access? */ - if (DR_GROUP_FIRST_DR (stmt_info)) + if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node) group_size = vect_cost_strided_group_size (stmt_info); /* Not a strided access. */ else @@ -683,6 +691,10 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; int inside_cost = 0, outside_cost = 0; + /* The SLP costs were already calculated during SLP tree build. */ + if (PURE_SLP_STMT (stmt_info)) + return; + /* Strided accesses? */ first_stmt = DR_GROUP_FIRST_DR (stmt_info); if (first_stmt && !slp_node) @@ -1812,7 +1824,7 @@ get_initial_def_for_induction (gimple iv_phi) if (vect_print_dump_info (REPORT_DETAILS)) { - fprintf (vect_dump, "transform induction: created def-use cycle:"); + fprintf (vect_dump, "transform induction: created def-use cycle: "); print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM); fprintf (vect_dump, "\n"); print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM); @@ -2044,6 +2056,10 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); gcc_assert (vec_stmt_for_operand); vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); + if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI) + vec_oprnd = PHI_RESULT (vec_stmt_for_operand); + else + vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); return vec_oprnd; } @@ -2239,6 +2255,11 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) VECT_DEF is a vector of partial results. REDUC_CODE is the tree-code for the epilog reduction. + NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the + number of elements that we can fit in a vectype (nunits). In this case + we have to generate more than one vector stmt - i.e - we need to "unroll" + the vector stmt by a factor VF/nunits. For more details see documentation + in vectorizable_operation. STMT is the scalar reduction stmt that is being vectorized. REDUCTION_PHI is the phi-node that carries the reduction computation. @@ -2282,10 +2303,12 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) static void vect_create_epilog_for_reduction (tree vect_def, gimple stmt, + int ncopies, enum tree_code reduc_code, gimple reduction_phi) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + stmt_vec_info prev_phi_info; tree vectype; enum machine_mode mode; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); @@ -2293,7 +2316,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, basic_block exit_bb; tree scalar_dest; tree scalar_type; - gimple new_phi; + gimple new_phi = NULL, phi; gimple_stmt_iterator exit_gsi; tree vec_dest; tree new_temp = NULL_TREE; @@ -2304,7 +2327,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, tree bitsize, bitpos, bytesize; enum tree_code code = gimple_assign_rhs_code (stmt); tree adjustment_def; - tree vec_initial_def; + tree vec_initial_def, def; tree orig_name; imm_use_iterator imm_iter; use_operand_p use_p; @@ -2314,7 +2337,8 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, gimple use_stmt; bool nested_in_vect_loop = false; VEC(gimple,heap) *phis = NULL; - int i; + enum vect_def_type dt = vect_unknown_def_type; + int j, i; if (nested_in_vect_loop_p (loop, stmt)) { @@ -2344,25 +2368,34 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, /*** 1. Create the reduction def-use cycle ***/ - /* 1.1 set the loop-entry arg of the reduction-phi: */ /* For the case of reduction, vect_get_vec_def_for_operand returns the scalar def before the loop, that defines the initial value of the reduction variable. */ vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, &adjustment_def); - add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); - - /* 1.2 set the loop-latch arg for the reduction-phi: */ - add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); - if (vect_print_dump_info (REPORT_DETAILS)) + phi = reduction_phi; + def = vect_def; + for (j = 0; j < ncopies; j++) { - fprintf (vect_dump, "transform reduction: created def-use cycle:"); - print_gimple_stmt (vect_dump, reduction_phi, 0, TDF_SLIM); - fprintf (vect_dump, "\n"); - print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vect_def), 0, TDF_SLIM); - } + /* 1.1 set the loop-entry arg of the reduction-phi: */ + add_phi_arg (phi, vec_initial_def, loop_preheader_edge (loop)); + + /* 1.2 set the loop-latch arg for the reduction-phi: */ + if (j > 0) + def = vect_get_vec_def_for_stmt_copy (dt, def); + add_phi_arg (phi, def, loop_latch_edge (loop)); + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "transform reduction: created def-use cycle: "); + print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); + fprintf (vect_dump, "\n"); + print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM); + } + + phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); + } /*** 2. Create epilog code The reduction epilog code operates across the elements of the vector @@ -2386,7 +2419,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, s_out3 = extract_field <v_out2, 0> # step 2 s_out4 = adjust_result <s_out3> # step 3 - (step 3 is optional, and step2 1 and 2 may be combined). + (step 3 is optional, and steps 1 and 2 may be combined). Lastly, the uses of s_out0 are replaced by s_out4. ***/ @@ -2395,8 +2428,22 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, v_out1 = phi <v_loop> */ exit_bb = single_exit (loop)->dest; - new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); - SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def); + def = vect_def; + prev_phi_info = NULL; + for (j = 0; j < ncopies; j++) + { + phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); + set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo)); + if (j == 0) + new_phi = phi; + else + { + def = vect_get_vec_def_for_stmt_copy (dt, def); + STMT_VINFO_RELATED_STMT (prev_phi_info) = phi; + } + SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); + prev_phi_info = vinfo_for_stmt (phi); + } exit_gsi = gsi_after_labels (exit_bb); /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 @@ -2437,6 +2484,9 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, if (nested_in_vect_loop) goto vect_finalize_reduction; + /* FORNOW */ + gcc_assert (ncopies = 1); + /* 2.3 Create the reduction code, using one of the three schemes described above. */ @@ -2652,15 +2702,19 @@ vect_finalize_reduction: { stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); - /* FORNOW. Currently not supporting the case that an inner-loop reduction - is not used in the outer-loop (but only outside the outer-loop). */ + /* FORNOW. Currently not supporting the case that an inner-loop + reduction is not used in the outer-loop (but only outside the + outer-loop). */ gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) && !STMT_VINFO_LIVE_P (stmt_vinfo)); - epilog_stmt = adjustment_def ? epilog_stmt : new_phi; + epilog_stmt = adjustment_def ? epilog_stmt : new_phi; STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt; - set_vinfo_for_stmt (epilog_stmt, + set_vinfo_for_stmt (epilog_stmt, new_stmt_vec_info (epilog_stmt, loop_vinfo)); + if (adjustment_def) + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) = + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi)); continue; } @@ -2733,7 +2787,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, tree def; gimple def_stmt; enum vect_def_type dt; - gimple new_phi; + gimple new_phi = NULL; tree scalar_type; bool is_simple_use; gimple orig_stmt; @@ -2742,23 +2796,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, int i; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; - stmt_vec_info prev_stmt_info; + int epilog_copies; + stmt_vec_info prev_stmt_info, prev_phi_info; + gimple first_phi = NULL; + bool single_defuse_cycle = false; tree reduc_def; gimple new_stmt = NULL; int j; tree ops[3]; if (nested_in_vect_loop_p (loop, stmt)) - { - loop = loop->inner; - /* FORNOW. This restriction should be relaxed. */ - if (ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } - } + loop = loop->inner; gcc_assert (ncopies >= 1); @@ -2992,18 +3040,52 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, /* Create the destination vector */ vec_dest = vect_create_destination_var (scalar_dest, vectype); - /* Create the reduction-phi that defines the reduction-operand. */ - new_phi = create_phi_node (vec_dest, loop->header); - /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate more than one vector stmt - i.e - we need to "unroll" the vector stmt by a factor VF/nunits. For more details see documentation in vectorizable_operation. */ + /* If the reduction is used in an outer loop we need to generate + VF intermediate results, like so (e.g. for ncopies=2): + r0 = phi (init, r0) + r1 = phi (init, r1) + r0 = x0 + r0; + r1 = x1 + r1; + (i.e. we generate VF results in 2 registers). + In this case we have a separate def-use cycle for each copy, and therefore + for each copy we get the vector def for the reduction variable from the + respective phi node created for this copy. + + Otherwise (the reduction is unused in the loop nest), we can combine + together intermediate results, like so (e.g. for ncopies=2): + r = phi (init, r) + r = x0 + r; + r = x1 + r; + (i.e. we generate VF/2 results in a single register). + In this case for each copy we get the vector def for the reduction variable + from the vectorized reduction operation generated in the previous iteration. + */ + + if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop) + { + single_defuse_cycle = true; + epilog_copies = 1; + } + else + epilog_copies = ncopies; + prev_stmt_info = NULL; + prev_phi_info = NULL; for (j = 0; j < ncopies; j++) { + if (j == 0 || !single_defuse_cycle) + { + /* Create the reduction-phi that defines the reduction-operand. */ + new_phi = create_phi_node (vec_dest, loop->header); + set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo)); + } + /* Handle uses. */ if (j == 0) { @@ -3015,6 +3097,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, /* Get the vector def for the reduction variable from the phi node */ reduc_def = PHI_RESULT (new_phi); + first_phi = new_phi; } else { @@ -3023,9 +3106,12 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, if (op_type == ternary_op) loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1); - /* Get the vector def for the reduction variable from the vectorized - reduction operation generated in the previous iteration (j-1) */ - reduc_def = gimple_assign_lhs (new_stmt); + if (single_defuse_cycle) + reduc_def = gimple_assign_lhs (new_stmt); + else + reduc_def = PHI_RESULT (new_phi); + + STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi; } /* Arguments are ready. create the new vector stmt. */ @@ -3044,11 +3130,15 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, else STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; prev_stmt_info = vinfo_for_stmt (new_stmt); + prev_phi_info = vinfo_for_stmt (new_phi); } /* Finalize the reduction-phi (set it's arguments) and create the epilog reduction code. */ - vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi); + if (!single_defuse_cycle) + new_temp = gimple_assign_lhs (*vec_stmt); + vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies, + epilog_reduc_code, first_phi); return true; } @@ -3096,7 +3186,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) int nunits_in; int nunits_out; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree fndecl, new_temp, def, rhs_type, lhs_type; gimple def_stmt; enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; @@ -3200,14 +3289,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) needs to be generated. */ gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } - if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; @@ -3222,14 +3303,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform operation."); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } - /* Handle def. */ scalar_dest = gimple_call_lhs (stmt); vec_dest = vect_create_destination_var (scalar_dest, vectype_out); @@ -3424,7 +3497,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree new_temp; @@ -3444,6 +3516,9 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, int i; VEC(tree,heap) *vec_oprnds0 = NULL; tree vop0; + tree integral_type; + tree dummy; + bool dummy_bool; /* Is STMT a vectorizable conversion? */ @@ -3496,6 +3571,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type))) return false; + integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out; + if (modifier == NARROW) ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; else @@ -3510,14 +3587,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, needs to be generated. */ gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } - /* Check the operands of the operation. */ if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { @@ -3528,17 +3597,18 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, /* Supportable by target? */ if ((modifier == NONE - && !targetm.vectorize.builtin_conversion (code, vectype_in)) + && !targetm.vectorize.builtin_conversion (code, integral_type)) || (modifier == WIDEN && !supportable_widening_operation (code, stmt, vectype_in, &decl1, &decl2, - &code1, &code2)) + &code1, &code2, + &dummy_bool, &dummy)) || (modifier == NARROW && !supportable_narrowing_operation (code, stmt, vectype_in, - &code1))) + &code1, &dummy_bool, &dummy))) { if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "op not supported by target."); + fprintf (vect_dump, "conversion not supported by target."); return false; } @@ -3581,7 +3651,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); builtin_decl = - targetm.vectorize.builtin_conversion (code, vectype_in); + targetm.vectorize.builtin_conversion (code, integral_type); for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++) { /* Arguments are ready. create the new vector stmt. */ @@ -3829,11 +3899,19 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, stmt_vec_info stmt_info = vinfo_for_stmt (phi); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; tree vec_def; gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, phi) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -3885,7 +3963,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code; enum machine_mode vec_mode; tree new_temp; @@ -3914,13 +3991,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, if (slp_node) ncopies = 1; gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -4239,7 +4309,6 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, tree vec_oprnd0=NULL, vec_oprnd1=NULL; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK; tree new_temp; tree def; @@ -4253,6 +4322,10 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, int ncopies; int j; tree vectype_in; + tree intermediate_type = NULL_TREE, narrow_type, double_vec_dest; + bool double_op = false; + tree first_vector, second_vector; + tree vec_oprnd2 = NULL_TREE, vec_oprnd3 = NULL_TREE, last_oprnd = NULL_TREE; if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -4282,18 +4355,12 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, if (!vectype_out) return false; nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); - if (nunits_in != nunits_out / 2) /* FORNOW */ + if (nunits_in != nunits_out / 2 + && nunits_in != nunits_out/4) return false; ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) @@ -4311,7 +4378,8 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, } /* Supportable by target? */ - if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1)) + if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1, + &double_op, &intermediate_type)) return false; STMT_VINFO_VECTYPE (stmt_info) = vectype_in; @@ -4331,8 +4399,15 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, ncopies); /* Handle def. */ - vec_dest = vect_create_destination_var (scalar_dest, vectype_out); - + /* In case of double demotion, we first generate demotion operation to the + intermediate type, and then from that type to the final one. */ + if (double_op) + narrow_type = intermediate_type; + else + narrow_type = vectype_out; + vec_dest = vect_create_destination_var (scalar_dest, narrow_type); + double_vec_dest = vect_create_destination_var (scalar_dest, vectype_out); + /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate more than one vector stmt - i.e - we need to "unroll" the @@ -4343,22 +4418,59 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, /* Handle uses. */ if (j == 0) { - vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); + if (double_op) + { + /* For double demotion we need four operands. */ + vec_oprnd2 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1); + vec_oprnd3 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd2); + } } else { - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1); - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], last_oprnd); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); + if (double_op) + { + /* For double demotion we need four operands. */ + vec_oprnd2 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1); + vec_oprnd3 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd2); + } } - /* Arguments are ready. Create the new vector stmt. */ + /* Arguments are ready. Create the new vector stmts. */ new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0, - vec_oprnd1); - new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); + vec_oprnd1); + first_vector = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, first_vector); vect_finish_stmt_generation (stmt, new_stmt, gsi); + /* In the next iteration we will get copy for this operand. */ + last_oprnd = vec_oprnd1; + + if (double_op) + { + /* For double demotion operation we first generate two demotion + operations from the source type to the intermediate type, and + then combine the results in one demotion to the destination + type. */ + new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd2, + vec_oprnd3); + second_vector = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, second_vector); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + new_stmt = gimple_build_assign_with_ops (code1, double_vec_dest, + first_vector, second_vector); + new_temp = make_ssa_name (double_vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + /* In the next iteration we will get copy for this operand. */ + last_oprnd = vec_oprnd3; + } + if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; else @@ -4390,7 +4502,6 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, tree vec_oprnd0=NULL, vec_oprnd1=NULL; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; tree decl1 = NULL_TREE, decl2 = NULL_TREE; int op_type; @@ -4405,6 +4516,9 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, int ncopies; int j; tree vectype_in; + tree intermediate_type = NULL_TREE, first_vector, second_vector; + bool double_op; + tree wide_type, double_vec_dest; if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -4435,18 +4549,11 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, if (!vectype_out) return false; nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); - if (nunits_out != nunits_in / 2) /* FORNOW */ + if (nunits_out != nunits_in / 2 && nunits_out != nunits_in/4) return false; ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) @@ -4477,9 +4584,14 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, /* Supportable by target? */ if (!supportable_widening_operation (code, stmt, vectype_in, - &decl1, &decl2, &code1, &code2)) + &decl1, &decl2, &code1, &code2, + &double_op, &intermediate_type)) return false; + /* Binary widening operation can only be supported directly by the + architecture. */ + gcc_assert (!(double_op && op_type == binary_op)); + STMT_VINFO_VECTYPE (stmt_info) = vectype_in; if (!vec_stmt) /* transformation not required. */ @@ -4498,7 +4610,13 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, ncopies); /* Handle def. */ - vec_dest = vect_create_destination_var (scalar_dest, vectype_out); + if (double_op) + wide_type = intermediate_type; + else + wide_type = vectype_out; + + vec_dest = vect_create_destination_var (scalar_dest, wide_type); + double_vec_dest = vect_create_destination_var (scalar_dest, vectype_out); /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate @@ -4525,22 +4643,75 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, /* Arguments are ready. Create the new vector stmt. We are creating two vector defs because the widened result does not fit in one vector. The vectorized stmt can be expressed as a call to a target builtin, - or a using a tree-code. */ + or a using a tree-code. In case of double promotion (from char to int, + for example), the promotion is performed in two phases: first we + generate a promotion operation from the source type to the intermediate + type (short in case of char->int promotion), and then for each of the + created vectors we generate a promotion statement from the intermediate + type to the destination type. */ /* Generate first half of the widened result: */ - new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1, + new_stmt = vect_gen_widened_results_half (code1, wide_type, decl1, vec_oprnd0, vec_oprnd1, op_type, vec_dest, gsi, stmt); - if (j == 0) - STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; + if (is_gimple_call (new_stmt)) + first_vector = gimple_call_lhs (new_stmt); else - STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; - prev_stmt_info = vinfo_for_stmt (new_stmt); + first_vector = gimple_assign_lhs (new_stmt); + + if (!double_op) + { + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } /* Generate second half of the widened result: */ - new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2, + new_stmt = vect_gen_widened_results_half (code2, wide_type, decl2, vec_oprnd0, vec_oprnd1, op_type, vec_dest, gsi, stmt); - STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; - prev_stmt_info = vinfo_for_stmt (new_stmt); + if (is_gimple_call (new_stmt)) + second_vector = gimple_call_lhs (new_stmt); + else + second_vector = gimple_assign_lhs (new_stmt); + if (!double_op) + { + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + else + { + /* FIRST_VECTOR and SECOND_VECTOR are the results of source type + to intermediate type promotion. Now we generate promotions + for both of them to the destination type (i.e., four + statements). */ + new_stmt = vect_gen_widened_results_half (code1, vectype_out, + decl1, first_vector, NULL_TREE, op_type, + double_vec_dest, gsi, stmt); + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + + new_stmt = vect_gen_widened_results_half (code2, vectype_out, + decl2, first_vector, NULL_TREE, op_type, + double_vec_dest, gsi, stmt); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + + new_stmt = vect_gen_widened_results_half (code1, vectype_out, + decl1, second_vector, NULL_TREE, op_type, + double_vec_dest, gsi, stmt); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + + new_stmt = vect_gen_widened_results_half (code2, vectype_out, + decl2, second_vector, NULL_TREE, op_type, + double_vec_dest, gsi, stmt); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } } *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); @@ -4865,8 +5036,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; - if (!PURE_SLP_STMT (stmt_info)) - vect_model_store_cost (stmt_info, ncopies, dt, NULL); + vect_model_store_cost (stmt_info, ncopies, dt, NULL); return true; } |