summaryrefslogtreecommitdiff
path: root/gcc/tree-loop-linear.c
diff options
context:
space:
mode:
authorspop <spop@138bc75d-0d04-0410-961f-82ee72b054a4>2008-02-29 12:41:14 +0000
committerspop <spop@138bc75d-0d04-0410-961f-82ee72b054a4>2008-02-29 12:41:14 +0000
commit2fcf1fbb13e889c2503d7311f591622288376138 (patch)
treef9a535bf4c6727dddb73dd954029131e101008f7 /gcc/tree-loop-linear.c
parent583c0935578ea3b6ca9dcacc1f8390edc30edd67 (diff)
downloadgcc-2fcf1fbb13e889c2503d7311f591622288376138.tar.gz
* tree-loop-linear.c (try_interchange_loops): Compare memory access
strides against cache sizes. * testsuite/gcc.dg/tree-ssa/ltrans-8.c: Increase the size of strides to make the interchange profitable. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@132765 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-loop-linear.c')
-rw-r--r--gcc/tree-loop-linear.c33
1 files changed, 30 insertions, 3 deletions
diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c
index 88a77dd228c..806d9e6d1cb 100644
--- a/gcc/tree-loop-linear.c
+++ b/gcc/tree-loop-linear.c
@@ -179,10 +179,14 @@ try_interchange_loops (lambda_trans_matrix trans,
VEC (data_reference_p, heap) *datarefs,
struct loop *first_loop)
{
+ bool res;
struct loop *loop_i;
struct loop *loop_j;
unsigned int dependence_steps_i, dependence_steps_j;
double_int access_strides_i, access_strides_j;
+ double_int small, large, nb_iter;
+ double_int l1_cache_size, l2_cache_size;
+ int cmp;
unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
struct data_dependence_relation *ddr;
@@ -194,7 +198,10 @@ try_interchange_loops (lambda_trans_matrix trans,
ddr = VEC_index (ddr_p, dependence_relations, 0);
if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
return trans;
-
+
+ l1_cache_size = uhwi_to_double_int (L1_CACHE_SIZE * 1024);
+ l2_cache_size = uhwi_to_double_int (L2_CACHE_SIZE * 1024);
+
/* LOOP_I is always the outer loop. */
for (loop_j = first_loop->inner;
loop_j;
@@ -216,18 +223,38 @@ try_interchange_loops (lambda_trans_matrix trans,
/* Heuristics for loop interchange profitability:
+ 0. Don't transform if the smallest stride is larger than
+ the L2 cache, or if the largest stride multiplied by the
+ number of iterations is smaller than the L1 cache.
+
1. (spatial locality) Inner loops should have smallest
dependence steps.
2. (spatial locality) Inner loops should contain more
dependence relations not carried by the loop.
- 3. (temporal locality) Inner loops should have smallest
+ 3. (temporal locality) Inner loops should have smallest
array access strides.
*/
+
+ cmp = double_int_ucmp (access_strides_i, access_strides_j);
+ small = cmp < 0 ? access_strides_i : access_strides_j;
+ large = cmp < 0 ? access_strides_j : access_strides_i;
+
+ if (double_int_ucmp (small, l2_cache_size) > 0)
+ continue;
+
+ res = cmp < 0 ?
+ estimated_loop_iterations (loop_j, false, &nb_iter):
+ estimated_loop_iterations (loop_i, false, &nb_iter);
+ large = double_int_mul (large, nb_iter);
+
+ if (res && double_int_ucmp (large, l1_cache_size) < 0)
+ continue;
+
if (dependence_steps_i < dependence_steps_j
|| nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
- || double_int_ucmp (access_strides_i, access_strides_j) < 0)
+ || cmp < 0)
{
lambda_matrix_row_exchange (LTM_MATRIX (trans),
loop_depth (loop_i) - loop_depth (first_loop),