diff options
author | dberlin <dberlin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2004-09-08 15:28:56 +0000 |
---|---|---|
committer | dberlin <dberlin@138bc75d-0d04-0410-961f-82ee72b054a4> | 2004-09-08 15:28:56 +0000 |
commit | 60cfcb79540b5415f1dd4860aa0c9cb3d006e429 (patch) | |
tree | 23362687866b29177d4928c3a31cb6b30a4946d9 /gcc | |
parent | 22d678e818e4cd9b7fe186fa0725a900df3f4db3 (diff) | |
download | gcc-60cfcb79540b5415f1dd4860aa0c9cb3d006e429.tar.gz |
2004-09-08 Daniel Berlin <dberlin@dberlin.org>
* Makefile.in (tree-loop-linear.o): Added.
(OBJS-common): Add tree-loop-linear.o
* common.opt: New flag, ftree-loop-linear.
* timevar.def: New timevar, TV_TREE_LOOP_LINEAR.
* tree-flow.h: Add prototype for linear_transform_loops.
* tree-optimize.c: Add linear transform after vectorization.
* tree-pass.h: Add struct pass_linear_transform.
* tree-ssa-loop.c: Add pass_linear_transform.
* tree-loop-linear.c: New file.
* lambda-code.c: gcc_assertify.
(gcc_loop_to_lambda_loop): Handle all exit tests.
Handle case where we have (invariant >= induction var).
(find_induction_var_from_exit_cond): Ditto.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@87190 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/Makefile.in | 6 | ||||
-rw-r--r-- | gcc/common.opt | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 6 | ||||
-rw-r--r-- | gcc/lambda-code.c | 104 | ||||
-rw-r--r-- | gcc/timevar.def | 1 | ||||
-rw-r--r-- | gcc/tree-flow.h | 3 | ||||
-rw-r--r-- | gcc/tree-loop-linear.c | 282 | ||||
-rw-r--r-- | gcc/tree-optimize.c | 1 | ||||
-rw-r--r-- | gcc/tree-pass.h | 2 | ||||
-rw-r--r-- | gcc/tree-ssa-loop.c | 35 |
11 files changed, 413 insertions, 47 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4dc3df11110..7ef63e06757 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2004-09-08 Daniel Berlin <dberlin@dberlin.org> + + * Makefile.in (tree-loop-linear.o): Added. + (OBJS-common): Add tree-loop-linear.o + * common.opt: New flag, ftree-loop-linear. + * timevar.def: New timevar, TV_TREE_LOOP_LINEAR. + * tree-flow.h: Add prototype for linear_transform_loops. + * tree-optimize.c: Add linear transform after vectorization. + * tree-pass.h: Add struct pass_linear_transform. + * tree-ssa-loop.c: Add pass_linear_transform. + * tree-loop-linear.c: New file. + * lambda-code.c: gcc_assertify. + (gcc_loop_to_lambda_loop): Handle all exit tests. + Handle case where we have (invariant >= induction var). + (find_induction_var_from_exit_cond): Ditto. + 2004-09-08 Jie Zhang <zhangjie@magima.com.cn> * tree-ssa-alias.c (compute_flow_insensitive_aliasing): If type diff --git a/gcc/Makefile.in b/gcc/Makefile.in index b073532c50a..e76aa11c2f7 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -927,7 +927,7 @@ OBJS-common = \ varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \ et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \ rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o \ - lambda-trans.o lambda-code.o + lambda-trans.o lambda-code.o tree-loop-linear.o OBJS-md = $(out_object_file) OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) tree-inline.o \ @@ -1774,6 +1774,10 @@ tree-vectorizer.o: tree-vectorizer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \ tree-vectorizer.h tree-data-ref.h $(SCEV_H) +tree-loop-linear.o: tree-loop-linear.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \ + $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \ + $(TREE_DATA_REF_H) $(SCEV_H) tree-gimple.o : tree-gimple.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(EXPR_H) \ $(RTL_H) $(TREE_GIMPLE_H) $(TM_H) coretypes.h bitmap.h $(GGC_H) tree-mudflap.o : $(CONFIG_H) errors.h $(SYSTEM_H) $(TREE_H) tree-inline.h \ diff --git a/gcc/common.opt b/gcc/common.opt index 00c0ca30007..77862f07e34 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -864,6 +864,10 @@ ftree-loop-im Common Report Var(flag_tree_loop_im) Init(1) Enable loop invariant motion on trees +ftree-loop-linear +Common Report Var(flag_tree_loop_linear) +Enable linear loop transforms on trees + ftree-loop-ivcanon Common Report Var(flag_tree_loop_ivcanon) Create canonical induction variables in loops diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b757e269c27..f1fdc306526 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -316,7 +316,7 @@ Objective-C and Objective-C++ Dialects}. -funroll-all-loops -funroll-loops -fpeel-loops @gol -funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol -ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol --ftree-loop-im -ftree-loop-ivcanon -fivopts @gol +-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol -ftree-dominator-opts -ftree-dse -ftree-copyrename @gol -ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol --param @var{name}=@var{value} @@ -4610,6 +4610,10 @@ usually increases code size. Perform loop optimizations on trees. This flag is enabled by default at -O and higher. +@item -ftree-loop-linear +Perform linear loop transformations on tree. This flag can improve cache +performance and allow further loop optimizations to take place. + @item -ftree-lim Perform loop invariant motion on trees. This pass moves only invartiants that would be hard to handle on rtl level (function calls, operations that expand to diff --git a/gcc/lambda-code.c b/gcc/lambda-code.c index be1195048cd..6ae4cb2228e 100644 --- a/gcc/lambda-code.c +++ b/gcc/lambda-code.c @@ -169,8 +169,7 @@ lambda_body_vector_compute_new (lambda_trans_matrix transform, int depth; /* Make sure the matrix is square. */ - if (LTM_ROWSIZE (transform) != LTM_COLSIZE (transform)) - abort (); + gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform)); depth = LTM_ROWSIZE (transform); @@ -297,8 +296,7 @@ print_lambda_loop (FILE * outfile, lambda_loop loop, int depth, int step; lambda_linear_expression expr; - if (!loop) - abort (); + gcc_assert (loop); expr = LL_LINEAR_OFFSET (loop); step = LL_STEP (loop); @@ -393,8 +391,7 @@ lambda_lattice_compute_base (lambda_loopnest nest) for (i = 0; i < depth; i++) { loop = LN_LOOPS (nest)[i]; - if (!loop) - abort (); + gcc_assert (loop); step = LL_STEP (loop); /* If we have a step of 1, then the base is one, and the origin and invariant coefficients are 0. */ @@ -412,9 +409,8 @@ lambda_lattice_compute_base (lambda_loopnest nest) /* Otherwise, we need the lower bound expression (which must be an affine function) to determine the base. */ expression = LL_LOWER_BOUND (loop); - if (!expression - || LLE_NEXT (expression) || LLE_DENOMINATOR (expression) != 1) - abort (); + gcc_assert (expression && LLE_NEXT (expression) + && LLE_DENOMINATOR (expression) == 1); /* The lower triangular portion of the base is going to be the coefficient times the step */ @@ -556,8 +552,8 @@ lambda_compute_auxillary_space (lambda_loopnest nest, size++; /* Need to increase matrix sizes above. */ - if (size > 127) - abort (); + gcc_assert (size <= 127); + } /* Then do the exact same thing for the upper bounds. */ @@ -585,8 +581,8 @@ lambda_compute_auxillary_space (lambda_loopnest nest, A[size][i] = LLE_DENOMINATOR (expression); size++; /* Need to increase matrix sizes above. */ - if (size > 127) - abort (); + gcc_assert (size <= 127); + } } @@ -1205,7 +1201,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, tree test; int stepint; int extra = 0; - + tree uboundvar; use_optype uses; /* Find out induction var and set the pointer so that the caller can @@ -1225,19 +1221,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, } test = TREE_OPERAND (exit_cond, 0); - if (TREE_CODE (test) != LE_EXPR - && TREE_CODE (test) != LT_EXPR && TREE_CODE (test) != NE_EXPR) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, - "Unable to convert loop: Loop exit test uses unhandled test condition:"); - print_generic_stmt (dump_file, test, 0); - fprintf (dump_file, "\n"); - } - return NULL; - } if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE) { @@ -1349,25 +1333,39 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, return NULL; } - if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME) - if (invariant_in_loop (loop, TREE_OPERAND (test, 1))) - VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1)); + /* One part of the test may be a loop invariant tree. */ + if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME + && invariant_in_loop (loop, TREE_OPERAND (test, 1))) + VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1)); + else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME + && invariant_in_loop (loop, TREE_OPERAND (test, 0))) + VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0)); + + /* The non-induction variable part of the test is the upper bound variable. + */ + if (TREE_OPERAND (test, 0) == inductionvar) + uboundvar = TREE_OPERAND (test, 1); + else + uboundvar = TREE_OPERAND (test, 0); + /* We only size the vectors assuming we have, at max, 2 times as many invariants as we do loops (one for each bound). This is just an arbitrary number, but it has to be matched against the code below. */ - if (VEC_length (tree, *invariants) > (unsigned int) (2 * depth)) - abort (); + gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth)); + /* We might have some leftover. */ if (TREE_CODE (test) == LT_EXPR) extra = -1 * stepint; else if (TREE_CODE (test) == NE_EXPR) extra = -1 * stepint; + else if (TREE_CODE (test) == GT_EXPR) + extra = -1 * stepint; ubound = gcc_tree_to_linear_expression (depth, - TREE_OPERAND (test, 1), + uboundvar, outerinductionvars, *invariants, extra); if (!ubound) @@ -1393,6 +1391,7 @@ static tree find_induction_var_from_exit_cond (struct loop *loop) { tree expr = get_loop_exit_condition (loop); + tree ivarop; tree test; if (expr == NULL_TREE) return NULL_TREE; @@ -1401,9 +1400,28 @@ find_induction_var_from_exit_cond (struct loop *loop) test = TREE_OPERAND (expr, 0); if (TREE_CODE_CLASS (TREE_CODE (test)) != '<') return NULL_TREE; - if (TREE_CODE (TREE_OPERAND (test, 0)) != SSA_NAME) + /* This is a guess. We say that for a <,!=,<= b, a is the induction + variable. + For >, >=, we guess b is the induction variable. + If we are wrong, it'll fail the rest of the induction variable tests, and + everything will be fine anyway. */ + switch (TREE_CODE (test)) + { + case LT_EXPR: + case LE_EXPR: + case NE_EXPR: + ivarop = TREE_OPERAND (test, 0); + break; + case GT_EXPR: + case GE_EXPR: + ivarop = TREE_OPERAND (test, 1); + break; + default: + gcc_unreachable(); + } + if (TREE_CODE (ivarop) != SSA_NAME) return NULL_TREE; - return TREE_OPERAND (test, 0); + return ivarop; } DEF_VEC_GC_P(lambda_loop); @@ -1693,7 +1711,7 @@ lle_to_gcc_expression (lambda_linear_expression lle, name, build_int_cst (integer_type_node, LLE_DENOMINATOR (lle)))); else - abort (); + gcc_unreachable(); /* name = {ceil, floor}(name/denominator) */ name = make_ssa_name (resvar, stmt); @@ -1706,9 +1724,8 @@ lle_to_gcc_expression (lambda_linear_expression lle, /* Again, out of laziness, we don't handle this case yet. It's not hard, it just hasn't occurred. */ - if (VEC_length (tree, results) > 2) - abort (); - + gcc_assert (VEC_length (tree, results) <= 2); + /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */ if (VEC_length (tree, results) > 1) { @@ -1788,10 +1805,9 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest, cases for now. */ offset = LL_LINEAR_OFFSET (newloop); - if (LLE_DENOMINATOR (offset) != 1 - || !lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth)) - abort (); - + gcc_assert (LLE_DENOMINATOR (offset) == 1 && + lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth)); + /* Now build the new lower bounds, and insert the statements necessary to generate it on the loop preheader. */ newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop), @@ -1929,8 +1945,8 @@ lambda_transform_legal_p (lambda_trans_matrix trans, struct data_dependence_relation *ddr; #if defined ENABLE_CHECKING - if (LTM_COLSIZE (trans) != nb_loops || LTM_ROWSIZE (trans) != nb_loops) - abort (); + gcc_assert (LTM_COLSIZE (trans) == nb_loops + && LTM_ROWSIZE (trans) == nb_loops); #endif /* When there is an unknown relation in the dependence_relations, we diff --git a/gcc/timevar.def b/gcc/timevar.def index 4dbaa6af62c..87108b803d8 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -86,6 +86,7 @@ DEFTIMEVAR (TV_LIM , "loop invariant motion") DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv creation") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree loop vectorization") +DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear transforms") DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization") DEFTIMEVAR (TV_TREE_CH , "tree copy headers") DEFTIMEVAR (TV_TREE_SSA_TO_NORMAL , "tree SSA to normal") diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index 1efbbbd930f..837aaf9420a 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -719,6 +719,9 @@ void insert_edge_copies (tree stmt, basic_block bb); extern void build_ssa_operands (tree, stmt_ann_t, stmt_operands_p, stmt_operands_p); +/* In tree-loop-linear.c */ +extern void linear_transform_loops (struct loops *); + /* In gimplify.c */ tree force_gimple_operand (tree, tree *, bool, tree); diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c new file mode 100644 index 00000000000..856867e9b66 --- /dev/null +++ b/gcc/tree-loop-linear.c @@ -0,0 +1,282 @@ +/* Linear Loop transforms + Copyright (C) 2003, 2004 Free Software Foundation, Inc. + Contributed by Daniel Berlin <dberlin@dberlin.org>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "errors.h" +#include "ggc.h" +#include "tree.h" +#include "target.h" + +#include "rtl.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "expr.h" +#include "optabs.h" +#include "tree-chrec.h" +#include "tree-data-ref.h" +#include "tree-scalar-evolution.h" +#include "tree-pass.h" +#include "varray.h" +#include "lambda.h" + +/* Linear loop transforms include any composition of interchange, + scaling, skewing, and reversal. They are used to change the + iteration order of loop nests in order to optimize data locality of + traversals, or remove dependences that prevent + parallelization/vectorization/etc. + + TODO: Determine reuse vectors/matrix and use it to determine optimal + transform matrix for locality purposes. + TODO: Completion of partial transforms. */ + +/* Gather statistics for loop interchange. Initializes SUM the sum of + all the data dependence distances carried by loop LOOP_NUMBER. + NB_DEPS_NOT_CARRIED_BY_LOOP is initialized to the number of + dependence relations for which the loop LOOP_NUMBER is not carrying + any dependence. */ + +static void +gather_interchange_stats (varray_type dependence_relations, + unsigned int loop_number, + unsigned int *sum, + unsigned int *nb_deps_not_carried_by_loop) +{ + unsigned int i; + + *sum = 0; + *nb_deps_not_carried_by_loop = 0; + for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) + { + int dist; + struct data_dependence_relation *ddr = + (struct data_dependence_relation *) + VARRAY_GENERIC_PTR (dependence_relations, i); + + if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + { + /* Some constants will need tweaking, but not something that should + be user-accessible. Thus, no --param. */ + *sum += 100; + continue; + } + + /* When we know that there is no dependence, we know that there + is no reuse of the data. */ + if (DDR_ARE_DEPENDENT (ddr) == chrec_known) + { + /* Ditto on the no --param here */ + *sum += 1000; + continue; + } + + dist = DDR_DIST_VECT (ddr)[loop_number]; + if (dist == 0) + *nb_deps_not_carried_by_loop++; + else if (dist < 0) + *sum += -dist; + else + *sum += dist; + } +} + +/* Apply to TRANS any loop interchange that minimize inner loop steps. + DEPTH is the depth of the loop nest, and DEPENDENCE_RELATIONS is an array + of dependence relations. + Returns the new transform matrix. The smaller the reuse vector + distances in the inner loops, the fewer the cache misses. */ + +static lambda_trans_matrix +try_interchange_loops (lambda_trans_matrix trans, + unsigned int depth, + varray_type dependence_relations) +{ + unsigned int loop_i, loop_j; + unsigned int steps_i, steps_j; + unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; + struct data_dependence_relation *ddr; + + /* When there is an unknown relation in the dependence_relations, we + know that it is no worth looking at this loop nest: give up. */ + ddr = (struct data_dependence_relation *) + VARRAY_GENERIC_PTR (dependence_relations, 0); + if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + return trans; + + /* LOOP_I is always the outer loop. */ + for (loop_j = 1; loop_j < depth; loop_j++) + for (loop_i = 0; loop_i < loop_j; loop_i++) + { + gather_interchange_stats (dependence_relations, loop_i, &steps_i, + &nb_deps_not_carried_by_i); + gather_interchange_stats (dependence_relations, loop_j, &steps_j, + &nb_deps_not_carried_by_j); + + /* Heuristics for loop interchange profitability: + 1. Inner loops should have smallest steps. + 2. Inner loops should contain more dependence relations not + carried by the loop. + */ + if (steps_i < steps_j + || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j) + { + lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j); + + /* Validate the resulting matrix. When the transformation + is not valid, reverse to the previous matrix. + + FIXME: In this case of transformation it could be + faster to verify the validity of the interchange + without applying the transform to the matrix. But for + the moment do it cleanly: this is just a prototype. */ + if (!lambda_transform_legal_p (trans, depth, dependence_relations)) + lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j); + } + } + + return trans; +} + +/* Perform a set of linear transforms on LOOPS. */ + +void +linear_transform_loops (struct loops *loops) +{ + unsigned int i; + + for (i = 1; i < loops->num; i++) + { + unsigned int depth = 0; + varray_type datarefs; + varray_type dependence_relations; + struct loop *loop_nest = loops->parray[i]; + struct loop *temp; + VEC (tree) *oldivs; + VEC (tree) *invariants; + lambda_loopnest before, after; + lambda_trans_matrix trans; + bool problem = false; + /* If it's not a loop nest, we don't want it. + We also don't handle sibling loops properly, + which are loops of the following form: + for (i = 0; i < 50; i++) + { + for (j = 0; j < 50; j++) + { + ... + } + for (j = 0; j < 50; j++) + { + ... + } + } */ + if (!loop_nest->inner) + continue; + for (temp = loop_nest; temp; temp = temp->inner) + { + flow_loop_scan (temp, LOOP_ALL); + /* If we have a sibling loop or multiple exit edges, jump ship. */ + if (temp->next || temp->num_exits != 1) + { + problem = true; + break; + } + depth ++; + } + if (problem) + continue; + + /* Analyze data references and dependence relations using scev. */ + + VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs"); + VARRAY_GENERIC_PTR_INIT (dependence_relations, 10, + "dependence_relations"); + + + compute_data_dependences_for_loop (depth, loop_nest, + &datarefs, &dependence_relations); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + unsigned int j; + for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++) + { + struct data_dependence_relation *ddr = + (struct data_dependence_relation *) + VARRAY_GENERIC_PTR (dependence_relations, j); + + if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) + { + fprintf (dump_file, "DISTANCE_V ("); + print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), + loops->num); + fprintf (dump_file, ")\n"); + fprintf (dump_file, "DIRECTION_V ("); + print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), + loops->num); + fprintf (dump_file, ")\n"); + } + } + fprintf (dump_file, "\n\n"); + } + /* Build the transformation matrix. */ + trans = lambda_trans_matrix_new (depth, depth); + lambda_matrix_id (LTM_MATRIX (trans), depth); + trans = try_interchange_loops (trans, depth, dependence_relations); + + /* Check whether the transformation is legal. */ + if (!lambda_transform_legal_p (trans, depth, dependence_relations)) + { + if (dump_file) + fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); + continue; + } + before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs, + &invariants); + if (!before) + continue; + + if (dump_file) + { + fprintf (dump_file, "Before:\n"); + print_lambda_loopnest (dump_file, before, 'i'); + } + + after = lambda_loopnest_transform (before, trans); + if (dump_file) + { + fprintf (dump_file, "After:\n"); + print_lambda_loopnest (dump_file, after, 'u'); + } + lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, + after, trans); + oldivs = NULL; + invariants = NULL; + free_dependence_relations (dependence_relations); + free_data_refs (datarefs); + } +} diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c index d081fe0590f..5ad189139c9 100644 --- a/gcc/tree-optimize.c +++ b/gcc/tree-optimize.c @@ -397,6 +397,7 @@ init_tree_optimization_passes (void) NEXT_PASS (pass_iv_canon); NEXT_PASS (pass_if_conversion); NEXT_PASS (pass_vectorize); + NEXT_PASS (pass_linear_transform); NEXT_PASS (pass_complete_unroll); NEXT_PASS (pass_iv_optimize); NEXT_PASS (pass_loop_done); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 29877180b7d..a7445cddda6 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -160,6 +160,6 @@ extern struct tree_opt_pass pass_rename_ssa_copies; extern struct tree_opt_pass pass_expand; extern struct tree_opt_pass pass_rest_of_compilation; extern struct tree_opt_pass pass_fre; - +extern struct tree_opt_pass pass_linear_transform; #endif /* GCC_TREE_PASS_H */ diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index 01dc7675159..2f87d87bacd 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -194,6 +194,41 @@ struct tree_opt_pass pass_vectorize = 0 /* letter */ }; + +/* Loop nest optimizations. */ + +static void +tree_linear_transform (void) +{ + if (!current_loops) + return; + + linear_transform_loops (current_loops); +} + +static bool +gate_tree_linear_transform (void) +{ + return flag_tree_loop_linear != 0; +} + +struct tree_opt_pass pass_linear_transform = +{ + "ltrans", /* name */ + gate_tree_linear_transform, /* gate */ + tree_linear_transform, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_LINEAR_TRANSFORM, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; + /* Canonical induction variable creation pass. */ static void |