diff options
Diffstat (limited to 'gcc/tree-ssa-loop-prefetch.c')
-rw-r--r-- | gcc/tree-ssa-loop-prefetch.c | 45 |
1 files changed, 28 insertions, 17 deletions
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index e0612b9a56e..53977d8bddd 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -885,13 +885,14 @@ should_unroll_loop_p (struct loop *loop, struct tree_niter_desc *desc, /* Determine the coefficient by that unroll LOOP, from the information contained in the list of memory references REFS. Description of - umber of iterations of LOOP is stored to DESC. AHEAD is the number - of iterations ahead that we need to prefetch. NINSNS is number of - insns of the LOOP. */ + umber of iterations of LOOP is stored to DESC. NINSNS is the number of + insns of the LOOP. EST_NITER is the estimated number of iterations of + the loop, or -1 if no estimate is available. */ static unsigned determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs, - unsigned ninsns, struct tree_niter_desc *desc) + unsigned ninsns, struct tree_niter_desc *desc, + HOST_WIDE_INT est_niter) { unsigned upper_bound; unsigned nfactor, factor, mod_constraint; @@ -906,6 +907,12 @@ determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs, gains from better scheduling and decreasing loop overhead, which is not the case here. */ upper_bound = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns; + + /* If we unrolled the loop more times than it iterates, the unrolled version + of the loop would be never entered. */ + if (est_niter >= 0 && est_niter < (HOST_WIDE_INT) upper_bound) + upper_bound = est_niter; + if (upper_bound <= 1) return 1; @@ -935,7 +942,8 @@ static bool loop_prefetch_arrays (struct loop *loop) { struct mem_ref_group *refs; - unsigned ahead, ninsns, unroll_factor; + unsigned ahead, ninsns, time, unroll_factor; + HOST_WIDE_INT est_niter; struct tree_niter_desc desc; bool unrolled = false; @@ -950,21 +958,24 @@ loop_prefetch_arrays (struct loop *loop) /* Step 3: determine the ahead and unroll factor. */ - /* FIXME: We should use not size of the loop, but the average number of - instructions executed per iteration of the loop. */ - ninsns = tree_num_loop_insns (loop, &eni_time_weights); - ahead = (PREFETCH_LATENCY + ninsns - 1) / ninsns; - unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor); + /* FIXME: the time should be weighted by the probabilities of the blocks in + the loop body. */ + time = tree_num_loop_insns (loop, &eni_time_weights); + ahead = (PREFETCH_LATENCY + time - 1) / time; + est_niter = estimated_loop_iterations_int (loop, false); - /* If the loop rolls less than the required unroll factor, prefetching - is useless. */ - if (unroll_factor > 1 - && cst_and_fits_in_hwi (desc.niter) - && (unsigned HOST_WIDE_INT) int_cst_value (desc.niter) < unroll_factor) + /* The prefetches will run for AHEAD iterations of the original loop. Unless + the loop rolls at least AHEAD times, prefetching the references does not + make sense. */ + if (est_niter >= 0 && est_niter <= (HOST_WIDE_INT) ahead) goto fail; + ninsns = tree_num_loop_insns (loop, &eni_size_weights); + unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, + est_niter); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor); + /* Step 4: what to prefetch? */ if (!schedule_prefetches (refs, unroll_factor, ahead)) goto fail; |