summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorirar <irar@138bc75d-0d04-0410-961f-82ee72b054a4>2010-07-04 08:54:52 +0000
committerirar <irar@138bc75d-0d04-0410-961f-82ee72b054a4>2010-07-04 08:54:52 +0000
commit0822b158e9f917b79d48c9866ca15e6eb6179ff1 (patch)
tree41c5db85c09ecda0974c98361603bea7b34ea3fc /gcc/tree-vect-loop.c
parentc4a298958260e54cdec63f8a9009cb5b291b3059 (diff)
downloadgcc-0822b158e9f917b79d48c9866ca15e6eb6179ff1.tar.gz
* doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST):
Document new arguments. * doc/tm.texi: Regenerate. * targhooks.c (default_builtin_vectorization_cost): Add new arguments. Handle unaligned store. * targhooks.h (default_builtin_vectorization_cost): Add new arguments. * target.def (builtin_vectorization_cost): Add new arguments. * target.h (enum vect_cost_for_stmt): Add unaligned_store. * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Take number of iterations of prolog loop directly from LOOP_PEELING_FOR_ALIGNMENT. (vect_vfa_segment_size): Fix indentation. * tree-vectorizer.h (struct _vect_peel_info): New. (struct _vect_peel_extended_info): New. (struct _loop_vec_info): Add new field for peeling hash table and a macro for its access. (VECT_MAX_COST): Define. (vect_get_load_cost): Declare. (vect_get_store_cost, vect_get_known_peeling_cost, vect_get_single_scalar_iteraion_cost): Likewise. (vect_supportable_dr_alignment): Add new argument. * tree-vect-loop.c (new_loop_vec_info): Initialize peeling hash table field. (destroy_loop_vec_info): Free peeling hash table. (vect_analyze_loop_form): Update call to builtin_vectorization_cost. (vect_analyze_loop): Move vect_enhance_data_refs_alignment before vect_analyze_slp. Fix indentation. (vect_get_single_scalar_iteraion_cost): New function. (vect_get_known_peeling_cost): Likewise. (vect_estimate_min_profitable_iters): Rename byte_misalign to npeel. Call vect_get_single_scalar_iteraion_cost instead of cost_for_stmt per statement. Move outside cost calculation inside unknown peeling case. Call vect_get_known_peeling_cost for known amount of peeling. * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Add data reference to the print message of forced alignment. (vect_verify_datarefs_alignment): Update call to vect_supportable_dr_alignment. (vect_get_data_access_cost): New function. (vect_peeling_hash, vect_peeling_hash_eq, vect_peeling_hash_insert, vect_peeling_hash_get_most_frequent, vect_peeling_hash_get_lowest_cost, vect_peeling_hash_choose_best_peeling): Likewise. (vect_enhance_data_refs_alignment): Fix documentation. Use hash table to store all the accesses in the loop and find best possible access to align using peeling for known alignment case. For unknown alignment check if stores are preferred or if peeling is worthy. (vect_find_same_alignment_drs): Analyze pairs of loads too. (vect_supportable_dr_alignment): Add new argument and check aligned accesses according to it. * tree-vect-stmts.c (vect_get_stmt_cost): New function. (cost_for_stmt): Call vect_get_stmt_cost. (vect_model_simple_cost): Likewise. (vect_model_store_cost): Call vect_get_stmt_cost. Call vect_get_store_cost to calculate the cost of the statement. (vect_get_store_cost): New function. (vect_model_load_cost): Call vect_get_stmt_cost. Call vect_get_load_cost to calculate the cost of the statement. (vect_get_load_cost): New function. (vectorizable_store): Update call to vect_supportable_dr_alignment. (vectorizable_load): Likewise. * config/spu/spu.c (spu_builtin_vectorization_cost): Add new arguments. * config/i386/i386.c (ix86_builtin_vectorization_cost): Add new arguments. Handle unaligned store. * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): New. (rs6000_builtin_support_vector_misalignment): Return true for word and double word alignments for VSX. * tree-vect-slp.c (vect_build_slp_tree): Update calls to vect_supportable_dr_alignment and builtin_vectorization_cost. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@161797 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c171
1 files changed, 120 insertions, 51 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 3b387169408..ef481735518 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -755,6 +755,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
+ LOOP_VINFO_PEELING_HTAB (res) = NULL;
return res;
}
@@ -845,6 +846,9 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
+ if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
+ htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
+
free (loop_vinfo);
loop->aux = NULL;
}
@@ -1122,7 +1126,11 @@ vect_analyze_loop_form (struct loop *loop)
static inline
int vect_get_cost (enum vect_cost_for_stmt type_of_cost)
{
- return targetm.vectorize.builtin_vectorization_cost (type_of_cost);
+ tree dummy_type = NULL;
+ int dummy = 0;
+
+ return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
+ dummy_type, dummy);
}
@@ -1498,17 +1506,6 @@ vect_analyze_loop (struct loop *loop)
return NULL;
}
- /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
- ok = vect_analyze_slp (loop_vinfo, NULL);
- if (ok)
- {
- /* Decide which possible SLP instances to SLP. */
- vect_make_slp_decision (loop_vinfo);
-
- /* Find stmts that need to be both vectorized and SLPed. */
- vect_detect_hybrid_slp (loop_vinfo);
- }
-
/* This pass will decide on using loop versioning and/or loop peeling in
order to enhance the alignment of data references in the loop. */
@@ -1516,11 +1513,22 @@ vect_analyze_loop (struct loop *loop)
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "bad data alignment.");
+ fprintf (vect_dump, "bad data alignment.");
destroy_loop_vec_info (loop_vinfo, true);
return NULL;
}
+ /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
+ ok = vect_analyze_slp (loop_vinfo, NULL);
+ if (ok)
+ {
+ /* Decide which possible SLP instances to SLP. */
+ vect_make_slp_decision (loop_vinfo);
+
+ /* Find stmts that need to be both vectorized and SLPed. */
+ vect_detect_hybrid_slp (loop_vinfo);
+ }
+
/* Scan all the operations in the loop and make sure they are
vectorizable. */
@@ -2004,6 +2012,94 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
double_reduc, true);
}
+/* Calculate the cost of one scalar iteration of the loop. */
+int
+vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
+ int innerloop_iters, i, stmt_cost;
+
+ /* Count statements in scalar loop. Using this as scalar cost for a single
+ iteration for now.
+
+ TODO: Add outer loop support.
+
+ TODO: Consider assigning different costs to different scalar
+ statements. */
+
+ /* FORNOW. */
+ if (loop->inner)
+ innerloop_iters = 50; /* FIXME */
+
+ for (i = 0; i < nbbs; i++)
+ {
+ gimple_stmt_iterator si;
+ basic_block bb = bbs[i];
+
+ if (bb->loop_father == loop->inner)
+ factor = innerloop_iters;
+ else
+ factor = 1;
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+
+ if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
+ continue;
+
+ if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+ {
+ if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
+ stmt_cost = vect_get_cost (scalar_load);
+ else
+ stmt_cost = vect_get_cost (scalar_store);
+ }
+ else
+ stmt_cost = vect_get_cost (scalar_stmt);
+
+ scalar_single_iter_cost += stmt_cost * factor;
+ }
+ }
+ return scalar_single_iter_cost;
+}
+
+/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
+int
+vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
+ int *peel_iters_epilogue,
+ int scalar_single_iter_cost)
+{
+ int peel_guard_costs = 0;
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ *peel_iters_epilogue = vf/2;
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "cost model: "
+ "epilogue peel iters set to vf/2 because "
+ "loop iterations are unknown .");
+
+ /* If peeled iterations are known but number of scalar loop
+ iterations are unknown, count a taken branch per peeled loop. */
+ peel_guard_costs = 2 * vect_get_cost (cond_branch_taken);
+ }
+ else
+ {
+ int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
+ peel_iters_prologue = niters < peel_iters_prologue ?
+ niters : peel_iters_prologue;
+ *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+ }
+
+ return (peel_iters_prologue * scalar_single_iter_cost)
+ + (*peel_iters_epilogue * scalar_single_iter_cost)
+ + peel_guard_costs;
+}
+
/* Function vect_estimate_min_profitable_iters
Return the number of iterations required for the vector version of the
@@ -2028,7 +2124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
- int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
int peel_guard_costs = 0;
int innerloop_iters = 0, factor;
VEC (slp_instance, heap) *slp_instances;
@@ -2099,7 +2195,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
&& (!STMT_VINFO_LIVE_P (stmt_info)
|| STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
continue;
- scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
/* FIXME: for stmts in the inner-loop in outer-loop vectorization,
some of the "outside" costs are generated inside the outer-loop. */
@@ -2107,6 +2202,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
}
}
+ scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
+
/* Add additional cost for the peeled instructions in prologue and epilogue
loop.
@@ -2116,7 +2213,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
TODO: Build an expression that represents peel_iters for prologue and
epilogue to be used in a run-time test. */
- if (byte_misalign < 0)
+ if (npeel < 0)
{
peel_iters_prologue = vf/2;
if (vect_print_dump_info (REPORT_COST))
@@ -2137,46 +2234,18 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
not known. Hence guards remain the same. */
peel_guard_costs += 2 * (vect_get_cost (cond_branch_taken)
+ vect_get_cost (cond_branch_not_taken));
+ vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+ + (peel_iters_epilogue * scalar_single_iter_cost)
+ + peel_guard_costs;
}
else
{
- if (byte_misalign)
- {
- struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
- int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
- tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
- int nelements = TYPE_VECTOR_SUBPARTS (vectype);
-
- peel_iters_prologue = nelements - (byte_misalign / element_size);
- }
- else
- peel_iters_prologue = 0;
-
- if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
- {
- peel_iters_epilogue = vf/2;
- if (vect_print_dump_info (REPORT_COST))
- fprintf (vect_dump, "cost model: "
- "epilogue peel iters set to vf/2 because "
- "loop iterations are unknown .");
-
- /* If peeled iterations are known but number of scalar loop
- iterations are unknown, count a taken branch per peeled loop. */
- peel_guard_costs += 2 * vect_get_cost (cond_branch_taken);
- }
- else
- {
- int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
- peel_iters_prologue = niters < peel_iters_prologue ?
- niters : peel_iters_prologue;
- peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
- }
+ peel_iters_prologue = npeel;
+ vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
+ peel_iters_prologue, &peel_iters_epilogue,
+ scalar_single_iter_cost);
}
- vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
- + (peel_iters_epilogue * scalar_single_iter_cost)
- + peel_guard_costs;
-
/* FORNOW: The scalar outside cost is incremented in one of the
following ways: