diff options
author | irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4> | 2010-07-04 08:54:52 +0000 |
---|---|---|
committer | irar <irar@138bc75d-0d04-0410-961f-82ee72b054a4> | 2010-07-04 08:54:52 +0000 |
commit | 0822b158e9f917b79d48c9866ca15e6eb6179ff1 (patch) | |
tree | 41c5db85c09ecda0974c98361603bea7b34ea3fc /gcc/tree-vect-loop.c | |
parent | c4a298958260e54cdec63f8a9009cb5b291b3059 (diff) | |
download | gcc-0822b158e9f917b79d48c9866ca15e6eb6179ff1.tar.gz |
* doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST):
Document new arguments.
* doc/tm.texi: Regenerate.
* targhooks.c (default_builtin_vectorization_cost): Add new arguments.
Handle unaligned store.
* targhooks.h (default_builtin_vectorization_cost): Add new arguments.
* target.def (builtin_vectorization_cost): Add new arguments.
* target.h (enum vect_cost_for_stmt): Add unaligned_store.
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Take number
of iterations of prolog loop directly from LOOP_PEELING_FOR_ALIGNMENT.
(vect_vfa_segment_size): Fix indentation.
* tree-vectorizer.h (struct _vect_peel_info): New.
(struct _vect_peel_extended_info): New.
(struct _loop_vec_info): Add new field for peeling hash table and a
macro for its access.
(VECT_MAX_COST): Define.
(vect_get_load_cost): Declare.
(vect_get_store_cost, vect_get_known_peeling_cost,
vect_get_single_scalar_iteraion_cost): Likewise.
(vect_supportable_dr_alignment): Add new argument.
* tree-vect-loop.c (new_loop_vec_info): Initialize peeling hash table
field.
(destroy_loop_vec_info): Free peeling hash table.
(vect_analyze_loop_form): Update call to builtin_vectorization_cost.
(vect_analyze_loop): Move vect_enhance_data_refs_alignment before
vect_analyze_slp. Fix indentation.
(vect_get_single_scalar_iteraion_cost): New function.
(vect_get_known_peeling_cost): Likewise.
(vect_estimate_min_profitable_iters): Rename byte_misalign to npeel.
Call vect_get_single_scalar_iteraion_cost instead of cost_for_stmt per
statement. Move outside cost calculation inside unknown peeling case.
Call vect_get_known_peeling_cost for known amount of peeling.
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Add data
reference to the print message of forced alignment.
(vect_verify_datarefs_alignment): Update call to
vect_supportable_dr_alignment.
(vect_get_data_access_cost): New function.
(vect_peeling_hash, vect_peeling_hash_eq, vect_peeling_hash_insert,
vect_peeling_hash_get_most_frequent, vect_peeling_hash_get_lowest_cost,
vect_peeling_hash_choose_best_peeling): Likewise.
(vect_enhance_data_refs_alignment): Fix documentation. Use hash table
to store all the accesses in the loop and find best possible access to
align using peeling for known alignment case. For unknown alignment
check if stores are preferred or if peeling is worthy.
(vect_find_same_alignment_drs): Analyze pairs of loads too.
(vect_supportable_dr_alignment): Add new argument and check aligned
accesses according to it.
* tree-vect-stmts.c (vect_get_stmt_cost): New function.
(cost_for_stmt): Call vect_get_stmt_cost.
(vect_model_simple_cost): Likewise.
(vect_model_store_cost): Call vect_get_stmt_cost. Call
vect_get_store_cost to calculate the cost of the statement.
(vect_get_store_cost): New function.
(vect_model_load_cost): Call vect_get_stmt_cost. Call
vect_get_load_cost to calculate the cost of the statement.
(vect_get_load_cost): New function.
(vectorizable_store): Update call to vect_supportable_dr_alignment.
(vectorizable_load): Likewise.
* config/spu/spu.c (spu_builtin_vectorization_cost): Add new
arguments.
* config/i386/i386.c (ix86_builtin_vectorization_cost): Add new
arguments. Handle unaligned store.
* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): New.
(rs6000_builtin_support_vector_misalignment): Return true for word and
double word alignments for VSX.
* tree-vect-slp.c (vect_build_slp_tree): Update calls to
vect_supportable_dr_alignment and builtin_vectorization_cost.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@161797 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 171 |
1 files changed, 120 insertions, 51 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 3b387169408..ef481735518 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -755,6 +755,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10); LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10); LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; + LOOP_VINFO_PEELING_HTAB (res) = NULL; return res; } @@ -845,6 +846,9 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts) VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo)); VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo)); + if (LOOP_VINFO_PEELING_HTAB (loop_vinfo)) + htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo)); + free (loop_vinfo); loop->aux = NULL; } @@ -1122,7 +1126,11 @@ vect_analyze_loop_form (struct loop *loop) static inline int vect_get_cost (enum vect_cost_for_stmt type_of_cost) { - return targetm.vectorize.builtin_vectorization_cost (type_of_cost); + tree dummy_type = NULL; + int dummy = 0; + + return targetm.vectorize.builtin_vectorization_cost (type_of_cost, + dummy_type, dummy); } @@ -1498,17 +1506,6 @@ vect_analyze_loop (struct loop *loop) return NULL; } - /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ - ok = vect_analyze_slp (loop_vinfo, NULL); - if (ok) - { - /* Decide which possible SLP instances to SLP. */ - vect_make_slp_decision (loop_vinfo); - - /* Find stmts that need to be both vectorized and SLPed. */ - vect_detect_hybrid_slp (loop_vinfo); - } - /* This pass will decide on using loop versioning and/or loop peeling in order to enhance the alignment of data references in the loop. */ @@ -1516,11 +1513,22 @@ vect_analyze_loop (struct loop *loop) if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "bad data alignment."); + fprintf (vect_dump, "bad data alignment."); destroy_loop_vec_info (loop_vinfo, true); return NULL; } + /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ + ok = vect_analyze_slp (loop_vinfo, NULL); + if (ok) + { + /* Decide which possible SLP instances to SLP. */ + vect_make_slp_decision (loop_vinfo); + + /* Find stmts that need to be both vectorized and SLPed. */ + vect_detect_hybrid_slp (loop_vinfo); + } + /* Scan all the operations in the loop and make sure they are vectorizable. */ @@ -2004,6 +2012,94 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi, double_reduc, true); } +/* Calculate the cost of one scalar iteration of the loop. */ +int +vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); + int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0; + int innerloop_iters, i, stmt_cost; + + /* Count statements in scalar loop. Using this as scalar cost for a single + iteration for now. + + TODO: Add outer loop support. + + TODO: Consider assigning different costs to different scalar + statements. */ + + /* FORNOW. */ + if (loop->inner) + innerloop_iters = 50; /* FIXME */ + + for (i = 0; i < nbbs; i++) + { + gimple_stmt_iterator si; + basic_block bb = bbs[i]; + + if (bb->loop_father == loop->inner) + factor = innerloop_iters; + else + factor = 1; + + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { + gimple stmt = gsi_stmt (si); + + if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) + continue; + + if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))) + { + if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) + stmt_cost = vect_get_cost (scalar_load); + else + stmt_cost = vect_get_cost (scalar_store); + } + else + stmt_cost = vect_get_cost (scalar_stmt); + + scalar_single_iter_cost += stmt_cost * factor; + } + } + return scalar_single_iter_cost; +} + +/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ +int +vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, + int *peel_iters_epilogue, + int scalar_single_iter_cost) +{ + int peel_guard_costs = 0; + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + { + *peel_iters_epilogue = vf/2; + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, "cost model: " + "epilogue peel iters set to vf/2 because " + "loop iterations are unknown ."); + + /* If peeled iterations are known but number of scalar loop + iterations are unknown, count a taken branch per peeled loop. */ + peel_guard_costs = 2 * vect_get_cost (cond_branch_taken); + } + else + { + int niters = LOOP_VINFO_INT_NITERS (loop_vinfo); + peel_iters_prologue = niters < peel_iters_prologue ? + niters : peel_iters_prologue; + *peel_iters_epilogue = (niters - peel_iters_prologue) % vf; + } + + return (peel_iters_prologue * scalar_single_iter_cost) + + (*peel_iters_epilogue * scalar_single_iter_cost) + + peel_guard_costs; +} + /* Function vect_estimate_min_profitable_iters Return the number of iterations required for the vector version of the @@ -2028,7 +2124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; - int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); + int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); int peel_guard_costs = 0; int innerloop_iters = 0, factor; VEC (slp_instance, heap) *slp_instances; @@ -2099,7 +2195,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) && (!STMT_VINFO_LIVE_P (stmt_info) || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)) continue; - scalar_single_iter_cost += cost_for_stmt (stmt) * factor; vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor; /* FIXME: for stmts in the inner-loop in outer-loop vectorization, some of the "outside" costs are generated inside the outer-loop. */ @@ -2107,6 +2202,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) } } + scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo); + /* Add additional cost for the peeled instructions in prologue and epilogue loop. @@ -2116,7 +2213,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) TODO: Build an expression that represents peel_iters for prologue and epilogue to be used in a run-time test. */ - if (byte_misalign < 0) + if (npeel < 0) { peel_iters_prologue = vf/2; if (vect_print_dump_info (REPORT_COST)) @@ -2137,46 +2234,18 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) not known. Hence guards remain the same. */ peel_guard_costs += 2 * (vect_get_cost (cond_branch_taken) + vect_get_cost (cond_branch_not_taken)); + vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost) + + (peel_iters_epilogue * scalar_single_iter_cost) + + peel_guard_costs; } else { - if (byte_misalign) - { - struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); - int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); - tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); - int nelements = TYPE_VECTOR_SUBPARTS (vectype); - - peel_iters_prologue = nelements - (byte_misalign / element_size); - } - else - peel_iters_prologue = 0; - - if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) - { - peel_iters_epilogue = vf/2; - if (vect_print_dump_info (REPORT_COST)) - fprintf (vect_dump, "cost model: " - "epilogue peel iters set to vf/2 because " - "loop iterations are unknown ."); - - /* If peeled iterations are known but number of scalar loop - iterations are unknown, count a taken branch per peeled loop. */ - peel_guard_costs += 2 * vect_get_cost (cond_branch_taken); - } - else - { - int niters = LOOP_VINFO_INT_NITERS (loop_vinfo); - peel_iters_prologue = niters < peel_iters_prologue ? - niters : peel_iters_prologue; - peel_iters_epilogue = (niters - peel_iters_prologue) % vf; - } + peel_iters_prologue = npeel; + vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo, + peel_iters_prologue, &peel_iters_epilogue, + scalar_single_iter_cost); } - vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost) - + (peel_iters_epilogue * scalar_single_iter_cost) - + peel_guard_costs; - /* FORNOW: The scalar outside cost is incremented in one of the following ways: |