diff options
author | irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-08-28 11:11:14 +0000 |
---|---|---|
committer | irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-08-28 11:11:14 +0000 |
commit | a0515226f8073e566a0af2e005e40d850b6a81d8 (patch) | |
tree | e0ca36b1b58d3d02e70b7343f52d0af07126a5b3 /gcc/tree-vect-analyze.c | |
parent | add2055dad476baa30b1969210de237cb0e159f8 (diff) | |
download | gcc-a0515226f8073e566a0af2e005e40d850b6a81d8.tar.gz |
* target.h (struct vectorize): Add new target builtin.
* tree-vectorizer.c (destroy_loop_vec_info): Call
vect_free_slp_instance instead of vect_free_slp_node.
* tree-vectorizer.h (enum slp_load_perm_type): New.
(struct _slp_instance): Add new fields.
(SLP_INSTANCE_LOAD_PERMUTATION): New.
(SLP_INSTANCE_LOADS): New.
(vect_free_slp_tree): Remove.
(vect_free_slp_instance): Declare.
(SLP_TREE_LOADS_PERM_TYPE, TARG_VEC_PERMUTE_COST): New.
(vectorizable_load): Add argument.
(vect_transform_slp_perm_load): New.
* tree-vect-analyze.c (vect_analyze_operations): Add an argument to
vectorizable_load.
(vect_get_place_in_interleaving_chain): New function.
(vect_free_slp_tree): Make static.
(vect_free_slp_instance): New function.
(vect_build_slp_tree): Add new arguments. Allow load permutations and
collect the load location in the interleaving chain.
(vect_supported_slp_permutation_p): New function.
(vect_supported_load_permutation_p): Likewise.
(vect_analyze_slp_instance): In case of loads permutation, call
vect_supported_load_permutation_p to check that the permutation is
supported.
* target-def.h (TARGET_VECTORIZE_BUILTIN_VEC_PERM): New.
* tree-vect-transform.c (vect_transform_stmt): Add new argument.
(vect_create_mask_and_perm): New function.
(vect_get_mask_element, vect_transform_slp_perm_load): Likewise.
(vectorizable_load): Add an argument. Don't keep the created vectors
statements in the node if permutation is required. Call
vect_transform_slp_perm_load to generate the permutation.
(vect_transform_stmt): Add new argument. Call vectorizable_load with
additional argument.
(vect_schedule_slp_instance): In case of loads permutation, allocate
vectorized statements structure for all the related SLP nodes. Call
vect_transform_stmt with addditional argument.
(vect_transform_loop): Call vect_transform_stmt with correct arguments.
* config/spu/spu.c (spu_builtin_vec_perm): New.
(TARGET_VECTORIZE_BUILTIN_VEC_PERM): Redefine.
* config/spu/spu.h (TARG_VEC_PERMUTE_COS): Define.
* config/rs6000/rs6000.c (rs6000_builtin_vec_perm): New.
(TARGET_VECTORIZE_BUILTIN_VEC_PERM): Redefine.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@139706 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-analyze.c')
-rw-r--r-- | gcc/tree-vect-analyze.c | 339 |
1 files changed, 259 insertions, 80 deletions
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index 305ba4c6603..c672d7affec 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -486,7 +486,7 @@ vect_analyze_operations (loop_vec_info loop_vinfo) || vectorizable_conversion (stmt, NULL, NULL, NULL) || vectorizable_operation (stmt, NULL, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL, NULL) - || vectorizable_load (stmt, NULL, NULL, NULL) + || vectorizable_load (stmt, NULL, NULL, NULL, NULL) || vectorizable_call (stmt, NULL, NULL) || vectorizable_store (stmt, NULL, NULL, NULL) || vectorizable_condition (stmt, NULL, NULL) @@ -846,6 +846,31 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) } +/* Find the place of the data-ref in STMT in the interleaving chain that starts + from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ + +static int +vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt) +{ + gimple next_stmt = first_stmt; + int result = 0; + + if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))) + return -1; + + while (next_stmt && next_stmt != stmt) + { + result++; + next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); + } + + if (next_stmt) + return result; + else + return -1; +} + + /* Function vect_insert_into_interleaving_chain. Insert DRA into the interleaving chain of DRB according to DRA's INIT. */ @@ -2482,7 +2507,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ -void +static void vect_free_slp_tree (slp_tree node) { if (!node) @@ -2503,6 +2528,17 @@ vect_free_slp_tree (slp_tree node) } +/* Free the memory allocated for the SLP instance. */ + +void +vect_free_slp_instance (slp_instance instance) +{ + vect_free_slp_tree (SLP_INSTANCE_TREE (instance)); + VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance)); + VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance)); +} + + /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that they are of a legal type and that they match the defs of the first stmt of the SLP group (stored in FIRST_STMT_...). */ @@ -2705,7 +2741,9 @@ static bool vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, unsigned int group_size, int *inside_cost, int *outside_cost, - int ncopies_for_cost, unsigned int *max_nunits) + int ncopies_for_cost, unsigned int *max_nunits, + VEC (int, heap) **load_permutation, + VEC (slp_tree, heap) **loads) { VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size); VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size); @@ -2716,7 +2754,6 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, enum tree_code first_stmt_code = 0, rhs_code; tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE; tree lhs; - gimple prev_stmt = NULL; bool stop_recursion = false, need_same_oprnds = false; tree vectype, scalar_type, first_op1 = NULL_TREE; unsigned int vectorization_factor = 0, ncopies; @@ -2728,6 +2765,9 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, struct data_reference *first_dr; bool pattern0 = false, pattern1 = false; HOST_WIDE_INT dummy; + bool permutation = false; + unsigned int load_place; + gimple first_load; /* For every stmt in NODE find its def stmt/s. */ for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++) @@ -2813,8 +2853,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, if (icode == CODE_FOR_nothing) { if (vect_print_dump_info (REPORT_SLP)) - fprintf (vect_dump, - "Build SLP failed: op not supported by target."); + fprintf (vect_dump, "Build SLP failed: " + "op not supported by target."); return false; } optab_op2_mode = insn_data[icode].operand[2].mode; @@ -2878,70 +2918,60 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, else { /* Load. */ - if (i == 0) - { - /* First stmt of the SLP group should be the first load of - the interleaving loop if data permutation is not allowed. - Check that there is no gap between the loads. */ - if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt - || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0) - { - /* FORNOW: data permutations and gaps in loads are not - supported. */ - if (vect_print_dump_info (REPORT_SLP)) - { - fprintf (vect_dump, "Build SLP failed: strided " - " loads need permutation or have gaps "); - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } - - return false; - } - - first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); - if (vect_supportable_dr_alignment (first_dr) - == dr_unaligned_unsupported) - { - if (vect_print_dump_info (REPORT_SLP)) - { - fprintf (vect_dump, "Build SLP failed: unsupported " - " unaligned load "); - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } - - return false; - } - - /* Analyze costs (for the first stmt in the group). */ - vect_model_load_cost (vinfo_for_stmt (stmt), - ncopies_for_cost, *node); - } - else - { - /* Check that we have consecutive loads from interleaving - chain and that there is no gap between the loads. */ - if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt - || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1) - { - /* FORNOW: data permutations and gaps in loads are not - supported. */ - if (vect_print_dump_info (REPORT_SLP)) - { - fprintf (vect_dump, "Build SLP failed: strided " - " loads need permutation or have gaps "); - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } - return false; - } - } - - prev_stmt = stmt; - - /* We stop the tree when we reach a group of loads. */ - stop_recursion = true; - continue; - } - } /* Strided access. */ + /* FORNOW: Check that there is no gap between the loads. */ + if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt + && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0) + || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt + && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: strided " + "loads have gaps "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + return false; + } + + first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); + + if (first_load == stmt) + { + first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); + if (vect_supportable_dr_alignment (first_dr) + == dr_unaligned_unsupported) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: unsupported " + "unaligned load "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + return false; + } + + /* Analyze costs (for the first stmt in the group). */ + vect_model_load_cost (vinfo_for_stmt (stmt), + ncopies_for_cost, *node); + } + + /* Store the place of this load in the interleaving chain. In + case that permutation is needed we later decide if a specific + permutation is supported. */ + load_place = vect_get_place_in_interleaving_chain (stmt, + first_load); + if (load_place != i) + permutation = true; + + VEC_safe_push (int, heap, *load_permutation, load_place); + + /* We stop the tree when we reach a group of loads. */ + stop_recursion = true; + continue; + } + } /* Strided access. */ else { if (TREE_CODE_CLASS (rhs_code) == tcc_reference) @@ -2990,7 +3020,15 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, /* Strided loads were reached - stop the recursion. */ if (stop_recursion) - return true; + { + if (permutation) + { + VEC_safe_push (slp_tree, heap, *loads, *node); + *inside_cost += TARG_VEC_PERMUTE_COST * group_size; + } + + return true; + } /* Create SLP_TREE nodes for the definition node/s. */ if (first_stmt_dt0 == vect_loop_def) @@ -3003,8 +3041,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0; SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0; if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size, - inside_cost, outside_cost, - ncopies_for_cost, max_nunits)) + inside_cost, outside_cost, ncopies_for_cost, + max_nunits, load_permutation, loads)) return false; SLP_TREE_LEFT (*node) = left_node; @@ -3020,8 +3058,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0; SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0; if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size, - inside_cost, outside_cost, - ncopies_for_cost, max_nunits)) + inside_cost, outside_cost, ncopies_for_cost, + max_nunits, load_permutation, loads)) return false; SLP_TREE_RIGHT (*node) = right_node; @@ -3076,6 +3114,116 @@ vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j) } +/* Check if the permutation required by the SLP INSTANCE is supported. + Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */ + +static bool +vect_supported_slp_permutation_p (slp_instance instance) +{ + slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0); + gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); + gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)); + VEC (slp_tree, heap) *sorted_loads = NULL; + int index; + slp_tree *tmp_loads = NULL; + int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j; + slp_tree load; + + /* FORNOW: The only supported loads permutation is loads from the same + location in all the loads in the node, when the data-refs in + nodes of LOADS constitute an interleaving chain. + Sort the nodes according to the order of accesses in the chain. */ + tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size); + for (i = 0, j = 0; + VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index) + && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load); + i += group_size, j++) + { + gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0); + /* Check that the loads are all in the same interleaving chain. */ + if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "Build SLP failed: unsupported data " + "permutation "); + print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM); + } + + free (tmp_loads); + return false; + } + + tmp_loads[index] = load; + } + + sorted_loads = VEC_alloc (slp_tree, heap, group_size); + for (i = 0; i < group_size; i++) + VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]); + + VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance)); + SLP_INSTANCE_LOADS (instance) = sorted_loads; + free (tmp_loads); + + if (!vect_transform_slp_perm_load (stmt, NULL, NULL, + SLP_INSTANCE_UNROLLING_FACTOR (instance), + instance, true)) + return false; + + return true; +} + + +/* Check if the required load permutation is supported. + LOAD_PERMUTATION contains a list of indices of the loads. + In SLP this permutation is relative to the order of strided stores that are + the base of the SLP instance. */ + +static bool +vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, + VEC (int, heap) *load_permutation) +{ + int i = 0, j, prev = -1, next, k; + bool supported; + + /* FORNOW: permutations are only supported for loop-aware SLP. */ + if (!slp_instn) + return false; + + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Load permutation "); + for (i = 0; VEC_iterate (int, load_permutation, i, next); i++) + fprintf (vect_dump, "%d ", next); + } + + /* FORNOW: the only supported permutation is 0..01..1.. of length equal to + GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as + well. */ + supported = true; + for (j = 0; j < group_size; j++) + { + for (i = j * group_size, k = 0; + VEC_iterate (int, load_permutation, i, next) && k < group_size; + i++, k++) + { + if (i != j * group_size && next != prev) + { + supported = false; + break; + } + + prev = next; + } + } + + if (supported && i == group_size * group_size + && vect_supported_slp_permutation_p (slp_instn)) + return true; + + return false; +} + /* Analyze an SLP instance starting from a group of strided stores. Call vect_build_slp_tree to build a tree of packed stmts if possible. Return FALSE if it's impossible to SLP any stmt in the loop. */ @@ -3093,8 +3241,11 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt) bool slp_impossible = false; int inside_cost = 0, outside_cost = 0, ncopies_for_cost; unsigned int max_nunits = 0; - - scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))); + VEC (int, heap) *load_permutation; + VEC (slp_tree, heap) *loads; + + scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF ( + vinfo_for_stmt (stmt)))); vectype = get_vectype_for_scalar_type (scalar_type); if (!vectype) { @@ -3134,16 +3285,21 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt) factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is GROUP_SIZE / NUNITS otherwise. */ ncopies_for_cost = unrolling_factor * group_size / nunits; + + load_permutation = VEC_alloc (int, heap, group_size * group_size); + loads = VEC_alloc (slp_tree, heap, group_size); /* Build the tree for the SLP instance. */ if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost, - &outside_cost, ncopies_for_cost, &max_nunits)) + &outside_cost, ncopies_for_cost, &max_nunits, + &load_permutation, &loads)) { /* Create a new SLP instance. */ new_instance = XNEW (struct _slp_instance); SLP_INSTANCE_TREE (new_instance) = node; SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; - /* Calculate the unrolling factor based on the smallest type. */ + /* Calculate the unrolling factor based on the smallest type in the + loop. */ if (max_nunits > nunits) unrolling_factor = least_common_multiple (max_nunits, group_size) / group_size; @@ -3151,6 +3307,27 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt) SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost; SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost; + SLP_INSTANCE_LOADS (new_instance) = loads; + SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; + if (VEC_length (slp_tree, loads)) + { + if (!vect_supported_load_permutation_p (new_instance, group_size, + load_permutation)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: unsupported load " + "permutation "); + print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); + } + + vect_free_slp_instance (new_instance); + return false; + } + } + else + VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance)); + VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo), new_instance); if (vect_print_dump_info (REPORT_SLP)) @@ -3162,7 +3339,9 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt) /* Failed to SLP. */ /* Free the allocated memory. */ vect_free_slp_tree (node); - + VEC_free (int, heap, load_permutation); + VEC_free (slp_tree, heap, loads); + if (slp_impossible) return false; |