summaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-loop-prefetch.c
diff options
context:
space:
mode:
authorrakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4>2006-11-12 20:05:49 +0000
committerrakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4>2006-11-12 20:05:49 +0000
commit53d4d5ccad6817ab0c190f407b57fa14b9ec6db8 (patch)
tree872aef0e893c395cc6fdd9d9fe625854bf2281f4 /gcc/tree-ssa-loop-prefetch.c
parentfaa56cf9fb2c67f8b284c4d9fcdc641a7728a4a9 (diff)
downloadgcc-53d4d5ccad6817ab0c190f407b57fa14b9ec6db8.tar.gz
* tree-ssa-loop-prefetch.c (schedule_prefetches): Cleanup and improve
comments. (issue_prefetch_ref): Move assignment to write_p out of loop. (determine_unroll_factor): Do not take PARAM_MAX_UNROLL_TIMES and SIMULTANEOUS_PREFETCHES into account. (loop_prefetch_arrays): Do not pass ahead to determine_unroll_factor. * lambda-code.c (lcm): Renamed to ... (least_common_multiple): ... and exported. * tree-flow.h (least_common_multiple): Declare. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@118730 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-ssa-loop-prefetch.c')
-rw-r--r--gcc/tree-ssa-loop-prefetch.c103
1 files changed, 50 insertions, 53 deletions
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 41ada264a04..b40fc0f6617 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -744,19 +744,21 @@ static bool
schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
unsigned ahead)
{
- unsigned max_prefetches, n_prefetches;
+ unsigned remaining_prefetch_slots, n_prefetches, prefetch_slots;
+ unsigned slots_per_prefetch;
struct mem_ref *ref;
bool any = false;
- max_prefetches = (SIMULTANEOUS_PREFETCHES * unroll_factor) / ahead;
- if (max_prefetches > (unsigned) SIMULTANEOUS_PREFETCHES)
- max_prefetches = SIMULTANEOUS_PREFETCHES;
+ /* At most SIMULTANEOUS_PREFETCHES should be running at the same time. */
+ remaining_prefetch_slots = SIMULTANEOUS_PREFETCHES;
+ /* The prefetch will run for AHEAD iterations of the original loop, i.e.,
+ AHEAD / UNROLL_FACTOR iterations of the unrolled loop. In each iteration,
+ it will need a prefetch slot. */
+ slots_per_prefetch = (ahead + unroll_factor / 2) / unroll_factor;
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Max prefetches to issue: %d.\n", max_prefetches);
-
- if (!max_prefetches)
- return false;
+ fprintf (dump_file, "Each prefetch instruction takes %u prefetch slots.\n",
+ slots_per_prefetch);
/* For now we just take memory references one by one and issue
prefetches for as many as possible. The groups are sorted
@@ -769,16 +771,24 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
if (!should_issue_prefetch_p (ref))
continue;
- ref->issue_prefetch_p = true;
-
- /* If prefetch_mod is less then unroll_factor, we need to insert
- several prefetches for the reference. */
+ /* If we need to prefetch the reference each PREFETCH_MOD iterations,
+ and we unroll the loop UNROLL_FACTOR times, we need to insert
+ ceil (UNROLL_FACTOR / PREFETCH_MOD) instructions in each
+ iteration. */
n_prefetches = ((unroll_factor + ref->prefetch_mod - 1)
/ ref->prefetch_mod);
- if (max_prefetches <= n_prefetches)
- return true;
+ prefetch_slots = n_prefetches * slots_per_prefetch;
+
+ /* If more than half of the prefetches would be lost anyway, do not
+ issue the prefetch. */
+ if (2 * remaining_prefetch_slots < prefetch_slots)
+ continue;
+
+ ref->issue_prefetch_p = true;
- max_prefetches -= n_prefetches;
+ if (remaining_prefetch_slots <= prefetch_slots)
+ return true;
+ remaining_prefetch_slots -= prefetch_slots;
any = true;
}
@@ -822,6 +832,7 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
/ ref->prefetch_mod);
addr_base = build_fold_addr_expr_with_type (ref->mem, ptr_type_node);
addr_base = force_gimple_operand_bsi (&bsi, unshare_expr (addr_base), true, NULL);
+ write_p = ref->write_p ? integer_one_node : integer_zero_node;
for (ap = 0; ap < n_prefetches; ap++)
{
@@ -832,10 +843,9 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
addr = force_gimple_operand_bsi (&bsi, unshare_expr (addr), true, NULL);
/* Create the prefetch instruction. */
- write_p = ref->write_p ? integer_one_node : integer_zero_node;
params = tree_cons (NULL_TREE, addr,
tree_cons (NULL_TREE, write_p, NULL_TREE));
-
+
prefetch = build_function_call_expr (built_in_decls[BUILT_IN_PREFETCH],
params);
bsi_insert_before (&bsi, prefetch, BSI_SAME_STMT);
@@ -888,48 +898,36 @@ should_unroll_loop_p (struct loop *loop, struct tree_niter_desc *desc,
static unsigned
determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs,
- unsigned ahead, unsigned ninsns,
- struct tree_niter_desc *desc)
+ unsigned ninsns, struct tree_niter_desc *desc)
{
- unsigned upper_bound, size_factor, constraint_factor;
- unsigned factor, max_mod_constraint, ahead_factor;
+ unsigned upper_bound;
+ unsigned nfactor, factor, mod_constraint;
struct mem_ref_group *agp;
struct mem_ref *ref;
- upper_bound = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
-
- /* First check whether the loop is not too large to unroll. */
- size_factor = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns;
- if (size_factor <= 1)
+ /* First check whether the loop is not too large to unroll. We ignore
+ PARAM_MAX_UNROLL_TIMES, because for small loops, it prevented us
+ from unrolling them enough to make exactly one cache line covered by each
+ iteration. Also, the goal of PARAM_MAX_UNROLL_TIMES is to prevent
+ us from unrolling the loops too many times in cases where we only expect
+ gains from better scheduling and decreasing loop overhead, which is not
+ the case here. */
+ upper_bound = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns;
+ if (upper_bound <= 1)
return 1;
- if (size_factor < upper_bound)
- upper_bound = size_factor;
-
- max_mod_constraint = 1;
+ /* Choose the factor so that we may prefetch each cache just once,
+ but bound the unrolling by UPPER_BOUND. */
+ factor = 1;
for (agp = refs; agp; agp = agp->next)
for (ref = agp->refs; ref; ref = ref->next)
- if (should_issue_prefetch_p (ref)
- && ref->prefetch_mod > max_mod_constraint)
- max_mod_constraint = ref->prefetch_mod;
-
- /* Set constraint_factor as large as needed to be able to satisfy the
- largest modulo constraint. */
- constraint_factor = max_mod_constraint;
-
- /* If ahead is too large in comparison with the number of available
- prefetches, unroll the loop as much as needed to be able to prefetch
- at least partially some of the references in the loop. */
- ahead_factor = ((ahead + SIMULTANEOUS_PREFETCHES - 1)
- / SIMULTANEOUS_PREFETCHES);
-
- /* Unroll as much as useful, but bound the code size growth. */
- if (constraint_factor < ahead_factor)
- factor = ahead_factor;
- else
- factor = constraint_factor;
- if (factor > upper_bound)
- factor = upper_bound;
+ if (should_issue_prefetch_p (ref))
+ {
+ mod_constraint = ref->prefetch_mod;
+ nfactor = least_common_multiple (mod_constraint, factor);
+ if (nfactor <= upper_bound)
+ factor = nfactor;
+ }
if (!should_unroll_loop_p (loop, desc, factor))
return 1;
@@ -964,8 +962,7 @@ loop_prefetch_arrays (struct loops *loops, struct loop *loop)
instructions executed per iteration of the loop. */
ninsns = tree_num_loop_insns (loop);
ahead = (PREFETCH_LATENCY + ninsns - 1) / ninsns;
- unroll_factor = determine_unroll_factor (loop, refs, ahead, ninsns,
- &desc);
+ unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor);