diff options
author | hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-09-11 00:13:47 +0000 |
---|---|---|
committer | hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-09-11 00:13:47 +0000 |
commit | 6202d4dbe64fab3147f67e6c836249f7e31ddd6c (patch) | |
tree | ac4783aa576a9af7b40d78f139e042b91e781996 /gcc/tree-vect-transform.c | |
parent | c8ac5d9a0464767d7091606c4d55aaaf8edc511a (diff) | |
download | gcc-6202d4dbe64fab3147f67e6c836249f7e31ddd6c.tar.gz |
rsha Jagasia <harsha.jagasia@amd.com>
Jan Sjodin <jan.sjodin@amd.com>
* tree-vect-analyze.c (vect_analyze_operations): Change
comparison of loop iterations with threshold to less than
or equal to instead of less than. Reduce
min_scalar_loop_bound by one.
* tree-vect-transform.c (vect_estimate_min_profitable_iters):
Change prologue and epilogue iterations estimate to vf/2,
when unknown at compile-time. Change versioning guard
cost to taken_branch_cost. If peeling for alignment is
unknown at compile-time, change peel guard costs to one
taken branch and one not-taken branch per peeled loop.
If peeling for alignment is known but number of scalar loop
iterations is unknown at compile-time, change peel guard
costs to one taken branch per peeled loop. Change the cost
model equation to consider vector iterations as the loop
iterations less the prologue and epilogue iterations.
Change outside vector cost check to less than or equal to
zero instead of equal to zero.
(vect_do_peeling_for_loop_bound): Reduce
min_scalar_loop_bound by one.
* tree-vectorizer.h: Add TARG_COND_TAKEN_BRANCH_COST and
TARG_COND_NOT_TAKEN_BRANCH_COST.
* config/i386/i386.h (processor_costs): Add
scalar_stmt_cost, scalar_load_cost, scalar_store_cost,
vec_stmt_cost, vec_to_scalar_cost, scalar_to_vec_cost,
vec_align_load_cost, vect_unalign_load_cost,
vec_store_cost, cond_taken_branch_cost,
cond_not_taken_branch_cost.
Define macros for x86 costs.
* config/i386/i386.c:
(size_cost): Set scalar_stmt_cost, scalar_load_cost,
scalar_store_cost, vec_stmt_cost, vec_to_scalar_cost,
scalar_to_vec_cost, vec_align_load_cost,
vect_unalign_load_cost, vec_store_cost,
cond_taken_branch_cost, cond_not_taken_branch_cost to one.
(i386_cost, i486_cost, pentium_cost, pentiumpro_cost,
geode_cost, k6_cost, athlon_cost, pentium4_cost, nocona_cost,
core2_cost, generic64_cost, generic32_cost): Set to default
untuned costs.
(k8_cost, amdfam10_cost): Costs for vectorization tuned.
(x86_builtin_vectorization_cost): New.
2007-09-10 Harsha Jagasia <harsha.jagasia@amd.com>
* gcc.dg/vect/costmodel/i386/costmodel-vect-31.c:
Change dg-final to expect 1 non-profitable loop and
3 profitable loops.
* gcc.dg/vect/costmodel/x86-64/costmodel-vect-31.c:
Change dg-final to expect 1 non-profitable loop and
3 profitable loops.
* gcc.dg/vect/costmodel/x86-64/costmodel-fast-math-vect-pr29925.c:
Change dg-final to expect 1 profitable loop.
* gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c:
Change dg-final to expect 1 profitable loop.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@128353 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r-- | gcc/tree-vect-transform.c | 79 |
1 files changed, 37 insertions, 42 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 30dbf712e55..e2ee92b0d0e 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -124,6 +124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; int byte_misalign; + int peel_guard_costs = 0; int innerloop_iters = 0, factor; VEC (slp_instance, heap) *slp_instances; slp_instance instance; @@ -141,7 +142,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) { - vec_outside_cost += TARG_COND_BRANCH_COST; + vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "cost model: Adding cost of checks for loop " "versioning.\n"); @@ -188,7 +189,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) loop. FORNOW: If we dont know the value of peel_iters for prologue or epilogue - at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1). + at compile-time - we assume it's vf/2 (the worst would be vf-1). TODO: Build an expression that represents peel_iters for prologue and epilogue to be used in a run-time test. */ @@ -197,18 +198,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (byte_misalign < 0) { - peel_iters_prologue = (vf - 1)/2; + peel_iters_prologue = vf/2; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "cost model: " - "prologue peel iters set to (vf-1)/2."); + "prologue peel iters set to vf/2."); /* If peeling for alignment is unknown, loop bound of main loop becomes unknown. */ - peel_iters_epilogue = (vf - 1)/2; + peel_iters_epilogue = vf/2; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "cost model: " - "epilogue peel iters set to (vf-1)/2 because " + "epilogue peel iters set to vf/2 because " "peeling for alignment is unknown ."); + + /* If peeled iterations are unknown, count a taken branch and a not taken + branch per peeled loop. Even if scalar loop iterations are known, + vector iterations are not known since peeled prologue iterations are + not known. Hence guards remain the same. */ + peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST + + TARG_COND_NOT_TAKEN_BRANCH_COST); + } else { @@ -226,11 +235,16 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { - peel_iters_epilogue = (vf - 1)/2; + peel_iters_epilogue = vf/2; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "cost model: " - "epilogue peel iters set to (vf-1)/2 because " + "epilogue peel iters set to vf/2 because " "loop iterations are unknown ."); + + /* If peeled iterations are known but number of scalar loop + iterations are unknown, count a taken branch per peeled loop. */ + peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST; + } else { @@ -241,33 +255,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) } } - /* Requires a prologue loop when peeling to handle misalignment. Add cost of - two guards, one for the peeled loop and one for the vector loop. */ - - if (peel_iters_prologue) - { - vec_outside_cost += 2 * TARG_COND_BRANCH_COST; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "cost model: Adding cost of checks for " - "prologue.\n"); - } - - /* Requires an epilogue loop to finish up remaining iterations after vector - loop. Add cost of two guards, one for the peeled loop and one for the - vector loop. */ - - if (peel_iters_epilogue - || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf) - { - vec_outside_cost += 2 * TARG_COND_BRANCH_COST; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "cost model : Adding cost of checks for " - "epilogue.\n"); - } - vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost) - + (peel_iters_epilogue * scalar_single_iter_cost); + + (peel_iters_epilogue * scalar_single_iter_cost) + + peel_guard_costs; /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only information we provide for the target is whether testing against the @@ -305,11 +295,13 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if ((scalar_single_iter_cost * vf) > vec_inside_cost) { - if (vec_outside_cost == 0) + if (vec_outside_cost <= 0) min_profitable_iters = 1; else { - min_profitable_iters = (vec_outside_cost * vf) + min_profitable_iters = (vec_outside_cost * vf + - vec_inside_cost * peel_iters_prologue + - vec_inside_cost * peel_iters_epilogue) / ((scalar_single_iter_cost * vf) - vec_inside_cost); @@ -344,8 +336,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) peel_iters_epilogue); fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n", min_profitable_iters); - fprintf (vect_dump, " Actual minimum iters for profitability: %d\n", - min_profitable_iters < vf ? vf : min_profitable_iters); } min_profitable_iters = @@ -355,6 +345,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (niters <= min_profitable_iters) then skip the vectorized loop. */ min_profitable_iters--; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, " Profitability threshold = %d\n", + min_profitable_iters); + return min_profitable_iters; } @@ -6452,8 +6447,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio) /* Analyze cost to set threshhold for vectorized loop. */ min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); - min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) - * LOOP_VINFO_VECT_FACTOR (loop_vinfo); + min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) + * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1); /* Use the cost model only if it is more conservative than user specified threshold. */ @@ -6464,8 +6459,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio) || min_profitable_iters > min_scalar_loop_bound)) th = (unsigned) min_profitable_iters; - if (min_profitable_iters - && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + if (((LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) + || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) && vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vectorization may not be profitable."); |