diff options
-rw-r--r-- | gcc/ChangeLog | 25 | ||||
-rw-r--r-- | gcc/Makefile.in | 9 | ||||
-rw-r--r-- | gcc/tree-affine.c | 414 | ||||
-rw-r--r-- | gcc/tree-affine.h | 71 | ||||
-rw-r--r-- | gcc/tree-flow.h | 28 | ||||
-rw-r--r-- | gcc/tree-ssa-address.c | 94 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 596 |
7 files changed, 623 insertions, 614 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index daf8eb7a516..d7b85b7e72c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2006-12-13 Zdenek Dvorak <dvorakz@suse.cz> + + * tree-ssa-loop-ivopts.c: Include tree-affine.h. + (divide): Removed. + (constant_multiple_of): Fix order of operators for division. + (aff_combination_const, aff_combination_elt, aff_combination_scale, + aff_combination_add_elt, aff_combination_add, aff_combination_convert, + tree_to_aff_combination, add_elt_to_tree, unshare_aff_combination, + aff_combination_to_tree): Moved to tree-affine.c and made to work with + double_int coefficients. + (get_computation_aff, get_computation_at): Work with double_int + coefficients. + (get_computation_cost_at): Do not use divide. + (rewrite_use_nonlinear_expr, rewrite_use_address, rewrite_use_compare): + Assert that expressing the computation did not fail. + * tree-ssa-address.c: Include tree-affine.h. + (add_to_parts, most_expensive_mult_to_index, addr_to_parts, + create_mem_ref): Work with double_int coefficients. + * tree-affine.c: New file. + * tree-affine.h: New file. + * tree-flow.h (struct affine_tree_combination): Removed. + * Makefile.in (tree-affine.o): Add. + (tree-ssa-address.o, tree-ssa-loop-ivopts.o): Add tree-affine.h + dependency. + 2006-12-13 Peter Bergner <bergner@vnet.ibm.com> PR middle-end/30191 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index b2916097214..7ff6a4086cb 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -988,7 +988,7 @@ OBJS-common = \ tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \ tree-vect-patterns.o tree-ssa-loop-prefetch.o tree-ssa-coalesce.o \ tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \ - tree-ssa-math-opts.o \ + tree-ssa-math-opts.o tree-affine.o \ tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \ alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \ cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \ @@ -1992,7 +1992,7 @@ tree-ssa-address.o : tree-ssa-address.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ tree-pass.h $(FLAGS_H) $(TREE_INLINE_H) $(RECOG_H) insn-config.h $(EXPR_H) \ - gt-tree-ssa-address.h $(GGC_H) + gt-tree-ssa-address.h $(GGC_H) tree-affine.h tree-ssa-loop-niter.o : tree-ssa-loop-niter.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(PARAMS_H) \ $(TREE_INLINE_H) output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ @@ -2018,7 +2018,10 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \ $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \ - tree-chrec.h $(VARRAY_H) + tree-chrec.h $(VARRAY_H) tree-affine.h +tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) \ + $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) \ + output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c new file mode 100644 index 00000000000..762e82e0b4a --- /dev/null +++ b/gcc/tree-affine.c @@ -0,0 +1,414 @@ +/* Operations with affine combinations of trees. + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "rtl.h" +#include "tm_p.h" +#include "hard-reg-set.h" +#include "output.h" +#include "diagnostic.h" +#include "tree-dump.h" +#include "tree-affine.h" + +/* Extends CST as appropriate for the affine combinations COMB. */ + +double_int +double_int_ext_for_comb (double_int cst, aff_tree *comb) +{ + return double_int_sext (cst, TYPE_PRECISION (comb->type)); +} + +/* Initializes affine combination COMB so that its value is zero in TYPE. */ + +static void +aff_combination_zero (aff_tree *comb, tree type) +{ + comb->type = type; + comb->offset = double_int_zero; + comb->n = 0; + comb->rest = NULL_TREE; +} + +/* Sets COMB to CST. */ + +void +aff_combination_const (aff_tree *comb, tree type, double_int cst) +{ + aff_combination_zero (comb, type); + comb->offset = double_int_ext_for_comb (cst, comb); +} + +/* Sets COMB to single element ELT. */ + +void +aff_combination_elt (aff_tree *comb, tree type, tree elt) +{ + aff_combination_zero (comb, type); + + comb->n = 1; + comb->elts[0].val = elt; + comb->elts[0].coef = double_int_one; +} + +/* Scales COMB by SCALE. */ + +void +aff_combination_scale (aff_tree *comb, double_int scale) +{ + unsigned i, j; + + scale = double_int_ext_for_comb (scale, comb); + if (double_int_one_p (scale)) + return; + + if (double_int_zero_p (scale)) + { + aff_combination_zero (comb, comb->type); + return; + } + + comb->offset + = double_int_ext_for_comb (double_int_mul (scale, comb->offset), comb); + for (i = 0, j = 0; i < comb->n; i++) + { + double_int new_coef; + + new_coef + = double_int_ext_for_comb (double_int_mul (scale, comb->elts[i].coef), + comb); + /* A coefficient may become zero due to overflow. Remove the zero + elements. */ + if (double_int_zero_p (new_coef)) + continue; + comb->elts[j].coef = new_coef; + comb->elts[j].val = comb->elts[i].val; + j++; + } + comb->n = j; + + if (comb->rest) + { + if (comb->n < MAX_AFF_ELTS) + { + comb->elts[comb->n].coef = scale; + comb->elts[comb->n].val = comb->rest; + comb->rest = NULL_TREE; + comb->n++; + } + else + comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest, + double_int_to_tree (comb->type, scale)); + } +} + +/* Adds ELT * SCALE to COMB. */ + +void +aff_combination_add_elt (aff_tree *comb, tree elt, double_int scale) +{ + unsigned i; + + scale = double_int_ext_for_comb (scale, comb); + if (double_int_zero_p (scale)) + return; + + for (i = 0; i < comb->n; i++) + if (operand_equal_p (comb->elts[i].val, elt, 0)) + { + double_int new_coef; + + new_coef = double_int_add (comb->elts[i].coef, scale); + new_coef = double_int_ext_for_comb (new_coef, comb); + if (!double_int_zero_p (new_coef)) + { + comb->elts[i].coef = new_coef; + return; + } + + comb->n--; + comb->elts[i] = comb->elts[comb->n]; + + if (comb->rest) + { + gcc_assert (comb->n == MAX_AFF_ELTS - 1); + comb->elts[comb->n].coef = double_int_one; + comb->elts[comb->n].val = comb->rest; + comb->rest = NULL_TREE; + comb->n++; + } + return; + } + if (comb->n < MAX_AFF_ELTS) + { + comb->elts[comb->n].coef = scale; + comb->elts[comb->n].val = elt; + comb->n++; + return; + } + + if (double_int_one_p (scale)) + elt = fold_convert (comb->type, elt); + else + elt = fold_build2 (MULT_EXPR, comb->type, + fold_convert (comb->type, elt), + double_int_to_tree (comb->type, scale)); + + if (comb->rest) + comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt); + else + comb->rest = elt; +} + +/* Adds COMB2 to COMB1. */ + +void +aff_combination_add (aff_tree *comb1, aff_tree *comb2) +{ + unsigned i; + + comb1->offset + = double_int_ext_for_comb (double_int_add (comb1->offset, comb2->offset), + comb1); + for (i = 0; i < comb2->n; i++) + aff_combination_add_elt (comb1, comb2->elts[i].val, comb2->elts[i].coef); + if (comb2->rest) + aff_combination_add_elt (comb1, comb2->rest, double_int_one); +} + +/* Converts affine combination COMB to TYPE. */ + +void +aff_combination_convert (aff_tree *comb, tree type) +{ + unsigned i, j; + tree comb_type = comb->type; + + gcc_assert (TYPE_PRECISION (type) <= TYPE_PRECISION (comb_type)); + comb->type = type; + if (comb->rest) + comb->rest = fold_convert (type, comb->rest); + + if (TYPE_PRECISION (type) == TYPE_PRECISION (comb_type)) + return; + + comb->offset = double_int_ext_for_comb (comb->offset, comb); + for (i = j = 0; i < comb->n; i++) + { + double_int new_coef = double_int_ext_for_comb (comb->elts[i].coef, comb); + if (double_int_zero_p (new_coef)) + continue; + comb->elts[j].coef = new_coef; + comb->elts[j].val = fold_convert (type, comb->elts[i].val); + j++; + } + + comb->n = j; + if (comb->n < MAX_AFF_ELTS && comb->rest) + { + comb->elts[comb->n].coef = double_int_one; + comb->elts[comb->n].val = comb->rest; + comb->rest = NULL_TREE; + comb->n++; + } +} + +/* Splits EXPR into an affine combination of parts. */ + +void +tree_to_aff_combination (tree expr, tree type, aff_tree *comb) +{ + aff_tree tmp; + enum tree_code code; + tree cst, core, toffset; + HOST_WIDE_INT bitpos, bitsize; + enum machine_mode mode; + int unsignedp, volatilep; + + STRIP_NOPS (expr); + + code = TREE_CODE (expr); + switch (code) + { + case INTEGER_CST: + aff_combination_const (comb, type, tree_to_double_int (expr)); + return; + + case PLUS_EXPR: + case MINUS_EXPR: + tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); + tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp); + if (code == MINUS_EXPR) + aff_combination_scale (&tmp, double_int_minus_one); + aff_combination_add (comb, &tmp); + return; + + case MULT_EXPR: + cst = TREE_OPERAND (expr, 1); + if (TREE_CODE (cst) != INTEGER_CST) + break; + tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); + aff_combination_scale (comb, tree_to_double_int (cst)); + return; + + case NEGATE_EXPR: + tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); + aff_combination_scale (comb, double_int_minus_one); + return; + + case ADDR_EXPR: + core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, + &toffset, &mode, &unsignedp, &volatilep, + false); + if (bitpos % BITS_PER_UNIT != 0) + break; + aff_combination_const (comb, type, + uhwi_to_double_int (bitpos / BITS_PER_UNIT)); + core = build_fold_addr_expr (core); + if (TREE_CODE (core) == ADDR_EXPR) + aff_combination_add_elt (comb, core, double_int_one); + else + { + tree_to_aff_combination (core, type, &tmp); + aff_combination_add (comb, &tmp); + } + if (toffset) + { + tree_to_aff_combination (toffset, type, &tmp); + aff_combination_add (comb, &tmp); + } + return; + + default: + break; + } + + aff_combination_elt (comb, type, expr); +} + +/* Creates EXPR + ELT * SCALE in TYPE. EXPR is taken from affine + combination COMB. */ + +static tree +add_elt_to_tree (tree expr, tree type, tree elt, double_int scale, + aff_tree *comb) +{ + enum tree_code code; + + scale = double_int_ext_for_comb (scale, comb); + elt = fold_convert (type, elt); + + if (double_int_one_p (scale)) + { + if (!expr) + return elt; + + return fold_build2 (PLUS_EXPR, type, expr, elt); + } + + if (double_int_minus_one_p (scale)) + { + if (!expr) + return fold_build1 (NEGATE_EXPR, type, elt); + + return fold_build2 (MINUS_EXPR, type, expr, elt); + } + + if (!expr) + return fold_build2 (MULT_EXPR, type, elt, + double_int_to_tree (type, scale)); + + if (double_int_negative_p (scale)) + { + code = MINUS_EXPR; + scale = double_int_neg (scale); + } + else + code = PLUS_EXPR; + + elt = fold_build2 (MULT_EXPR, type, elt, + double_int_to_tree (type, scale)); + return fold_build2 (code, type, expr, elt); +} + +/* Makes tree from the affine combination COMB. */ + +tree +aff_combination_to_tree (aff_tree *comb) +{ + tree type = comb->type; + tree expr = comb->rest; + unsigned i; + double_int off, sgn; + + gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE); + + for (i = 0; i < comb->n; i++) + expr = add_elt_to_tree (expr, type, comb->elts[i].val, comb->elts[i].coef, + comb); + + /* Ensure that we get x - 1, not x + (-1) or x + 0xff..f if x is + unsigned. */ + if (double_int_negative_p (comb->offset)) + { + off = double_int_neg (comb->offset); + sgn = double_int_minus_one; + } + else + { + off = comb->offset; + sgn = double_int_one; + } + return add_elt_to_tree (expr, type, double_int_to_tree (type, off), sgn, + comb); +} + +/* Copies the tree elements of COMB to ensure that they are not shared. */ + +void +unshare_aff_combination (aff_tree *comb) +{ + unsigned i; + + for (i = 0; i < comb->n; i++) + comb->elts[i].val = unshare_expr (comb->elts[i].val); + if (comb->rest) + comb->rest = unshare_expr (comb->rest); +} + +/* Remove M-th element from COMB. */ + +void +aff_combination_remove_elt (aff_tree *comb, unsigned m) +{ + comb->n--; + if (m <= comb->n) + comb->elts[m] = comb->elts[comb->n]; + if (comb->rest) + { + comb->elts[comb->n].coef = double_int_one; + comb->elts[comb->n].val = comb->rest; + comb->rest = NULL_TREE; + comb->n++; + } +} diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h new file mode 100644 index 00000000000..010f4a76d9b --- /dev/null +++ b/gcc/tree-affine.h @@ -0,0 +1,71 @@ +/* Operations with affine combinations of trees. + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ + +/* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements + to make things simpler; this is sufficient in most cases. */ + +#define MAX_AFF_ELTS 8 + +/* Element of an affine combination. */ + +struct aff_comb_elt +{ + /* The value of the element. */ + tree val; + + /* Its coefficient in the combination. */ + double_int coef; +}; + +typedef struct affine_tree_combination +{ + /* Type of the result of the combination. */ + tree type; + + /* Constant offset. */ + double_int offset; + + /* Number of elements of the combination. */ + unsigned n; + + /* Elements and their coefficients. Type of elements may be different from + TYPE, but their sizes must be the same (STRIP_NOPS is applied to the + elements). + + The coefficients are always sign extened from the precision of TYPE + (regardless of signedness of TYPE). */ + struct aff_comb_elt elts[MAX_AFF_ELTS]; + + /* Remainder of the expression. Usually NULL, used only if there are more + than MAX_AFF_ELTS elements. Type of REST must be TYPE. */ + tree rest; +} aff_tree; + +double_int double_int_ext_for_comb (double_int, aff_tree *); +void aff_combination_const (aff_tree *, tree, double_int); +void aff_combination_elt (aff_tree *, tree, tree); +void aff_combination_scale (aff_tree *, double_int); +void aff_combination_add (aff_tree *, aff_tree *); +void aff_combination_add_elt (aff_tree *, tree, double_int); +void aff_combination_remove_elt (aff_tree *, unsigned); +void aff_combination_convert (aff_tree *, tree); +void tree_to_aff_combination (tree, tree, aff_tree *); +tree aff_combination_to_tree (aff_tree *); +void unshare_aff_combination (aff_tree *); diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index e0ef809bd08..f4337d2a1e0 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -1002,33 +1002,6 @@ extern void remove_unused_locals (void); /* In tree-ssa-address.c */ -/* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements - to make things simpler; this is sufficient in most cases. */ - -#define MAX_AFF_ELTS 8 - -struct affine_tree_combination -{ - /* Type of the result of the combination. */ - tree type; - - /* Mask modulo that the operations are performed. */ - unsigned HOST_WIDE_INT mask; - - /* Constant offset. */ - unsigned HOST_WIDE_INT offset; - - /* Number of elements of the combination. */ - unsigned n; - - /* Elements and their coefficients. */ - tree elts[MAX_AFF_ELTS]; - unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS]; - - /* Remainder of the expression. */ - tree rest; -}; - /* Description of a memory address. */ struct mem_address @@ -1036,6 +1009,7 @@ struct mem_address tree symbol, base, index, step, offset; }; +struct affine_tree_combination; tree create_mem_ref (block_stmt_iterator *, tree, struct affine_tree_combination *); rtx addr_for_mem_ref (struct mem_address *, bool); diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c index c3394498964..eb39370f0d8 100644 --- a/gcc/tree-ssa-address.c +++ b/gcc/tree-ssa-address.c @@ -42,6 +42,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "recog.h" #include "expr.h" #include "ggc.h" +#include "tree-affine.h" /* TODO -- handling of symbols (according to Richard Hendersons comments, http://gcc.gnu.org/ml/gcc-patches/2005-04/msg00949.html): @@ -346,25 +347,20 @@ fixed_address_object_p (tree obj) construct. */ static void -add_to_parts (struct mem_address *parts, tree type, tree elt, - unsigned HOST_WIDE_INT coef) +add_to_parts (struct mem_address *parts, tree type, tree elt) { + tree elt_core = elt; + STRIP_NOPS (elt_core); + /* Check if this is a symbol. */ if (!parts->symbol - && coef == 1 - && TREE_CODE (elt) == ADDR_EXPR - && fixed_address_object_p (TREE_OPERAND (elt, 0))) + && TREE_CODE (elt_core) == ADDR_EXPR + && fixed_address_object_p (TREE_OPERAND (elt_core, 0))) { - parts->symbol = TREE_OPERAND (elt, 0); + parts->symbol = TREE_OPERAND (elt_core, 0); return; } - if (coef != 1) - elt = fold_build2 (MULT_EXPR, type, fold_convert (type, elt), - build_int_cst_type (type, coef)); - else - elt = fold_convert (type, elt); - if (!parts->base) { parts->base = elt; @@ -388,52 +384,69 @@ add_to_parts (struct mem_address *parts, tree type, tree elt, static void most_expensive_mult_to_index (struct mem_address *parts, tree type, - struct affine_tree_combination *addr) + aff_tree *addr) { - unsigned HOST_WIDE_INT best_mult = 0; + HOST_WIDE_INT coef; + double_int best_mult, amult, amult_neg; unsigned best_mult_cost = 0, acost; tree mult_elt = NULL_TREE, elt; unsigned i, j; + enum tree_code op_code; + best_mult = double_int_zero; for (i = 0; i < addr->n; i++) { + if (!double_int_fits_in_shwi_p (addr->elts[i].coef)) + continue; + /* FIXME: Should use the correct memory mode rather than Pmode. */ - if (addr->coefs[i] == 1 - || !multiplier_allowed_in_address_p (addr->coefs[i], Pmode)) + + coef = double_int_to_shwi (addr->elts[i].coef); + if (coef == 1 + || !multiplier_allowed_in_address_p (coef, Pmode)) continue; - - acost = multiply_by_cost (addr->coefs[i], Pmode); + + acost = multiply_by_cost (coef, Pmode); if (acost > best_mult_cost) { best_mult_cost = acost; - best_mult = addr->coefs[i]; + best_mult = addr->elts[i].coef; } } - if (!best_mult) + if (!best_mult_cost) return; + /* Collect elements multiplied by best_mult. */ for (i = j = 0; i < addr->n; i++) { - if (addr->coefs[i] != best_mult) + amult = addr->elts[i].coef; + amult_neg = double_int_ext_for_comb (double_int_neg (amult), addr); + + if (double_int_equal_p (amult, best_mult)) + op_code = PLUS_EXPR; + else if (double_int_equal_p (amult_neg, best_mult)) + op_code = MINUS_EXPR; + else { - addr->coefs[j] = addr->coefs[i]; addr->elts[j] = addr->elts[i]; j++; continue; } - - elt = fold_convert (type, addr->elts[i]); - if (!mult_elt) + + elt = fold_convert (type, addr->elts[i].val); + if (mult_elt) + mult_elt = fold_build2 (op_code, type, mult_elt, elt); + else if (op_code == PLUS_EXPR) mult_elt = elt; else - mult_elt = fold_build2 (PLUS_EXPR, type, mult_elt, elt); + mult_elt = fold_build1 (NEGATE_EXPR, type, elt); } addr->n = j; - + parts->index = mult_elt; - parts->step = build_int_cst_type (type, best_mult); + parts->step = double_int_to_tree (type, best_mult); } /* Splits address ADDR into PARTS. @@ -442,13 +455,13 @@ most_expensive_mult_to_index (struct mem_address *parts, tree type, to PARTS. Some architectures do not support anything but single register in address, possibly with a small integer offset; while create_mem_ref will simplify the address to an acceptable shape - later, it would be a small bit more efficient to know that asking - for complicated addressing modes is useless. */ + later, it would be more efficient to know that asking for complicated + addressing modes is useless. */ static void -addr_to_parts (struct affine_tree_combination *addr, tree type, - struct mem_address *parts) +addr_to_parts (aff_tree *addr, tree type, struct mem_address *parts) { + tree part; unsigned i; parts->symbol = NULL_TREE; @@ -456,8 +469,8 @@ addr_to_parts (struct affine_tree_combination *addr, tree type, parts->index = NULL_TREE; parts->step = NULL_TREE; - if (addr->offset) - parts->offset = build_int_cst_type (type, addr->offset); + if (!double_int_zero_p (addr->offset)) + parts->offset = double_int_to_tree (type, addr->offset); else parts->offset = NULL_TREE; @@ -467,9 +480,15 @@ addr_to_parts (struct affine_tree_combination *addr, tree type, /* Then try to process the remaining elements. */ for (i = 0; i < addr->n; i++) - add_to_parts (parts, type, addr->elts[i], addr->coefs[i]); + { + part = fold_convert (type, addr->elts[i].val); + if (!double_int_one_p (addr->elts[i].coef)) + part = fold_build2 (MULT_EXPR, type, part, + double_int_to_tree (type, addr->elts[i].coef)); + add_to_parts (parts, type, part); + } if (addr->rest) - add_to_parts (parts, type, addr->rest, 1); + add_to_parts (parts, type, addr->rest); } /* Force the PARTS to register. */ @@ -490,8 +509,7 @@ gimplify_mem_ref_parts (block_stmt_iterator *bsi, struct mem_address *parts) of created memory reference. */ tree -create_mem_ref (block_stmt_iterator *bsi, tree type, - struct affine_tree_combination *addr) +create_mem_ref (block_stmt_iterator *bsi, tree type, aff_tree *addr) { tree mem_ref, tmp; tree addr_type = build_pointer_type (type); diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 454374010aa..1efaf994bab 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -89,6 +89,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "cfgloop.h" #include "params.h" #include "langhooks.h" +#include "tree-affine.h" /* The infinite cost. */ #define INFTY 10000000 @@ -524,57 +525,6 @@ name_info (struct ivopts_data *data, tree name) return ver_info (data, SSA_NAME_VERSION (name)); } -/* Checks whether there exists number X such that X * B = A, counting modulo - 2^BITS. */ - -static bool -divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b, - HOST_WIDE_INT *x) -{ - unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1); - unsigned HOST_WIDE_INT inv, ex, val; - unsigned i; - - a &= mask; - b &= mask; - - /* First divide the whole equation by 2 as long as possible. */ - while (!(a & 1) && !(b & 1)) - { - a >>= 1; - b >>= 1; - bits--; - mask >>= 1; - } - - if (!(b & 1)) - { - /* If b is still even, a is odd and there is no such x. */ - return false; - } - - /* Find the inverse of b. We compute it as - b^(2^(bits - 1) - 1) (mod 2^bits). */ - inv = 1; - ex = b; - for (i = 0; i < bits - 1; i++) - { - inv = (inv * ex) & mask; - ex = (ex * ex) & mask; - } - - val = (a * inv) & mask; - - gcc_assert (((val * b) & mask) == a); - - if ((val >> (bits - 1)) & 1) - val |= ~mask; - - *x = val; - - return true; -} - /* Returns true if STMT is after the place where the IP_NORMAL ivs will be emitted in LOOP. */ @@ -2613,8 +2563,8 @@ constant_multiple_of (tree top, tree bot, double_int *mul) if (TREE_CODE (bot) != INTEGER_CST) return false; - p0 = double_int_sext (tree_to_double_int (bot), precision); - p1 = double_int_sext (tree_to_double_int (top), precision); + p0 = double_int_sext (tree_to_double_int (top), precision); + p1 = double_int_sext (tree_to_double_int (bot), precision); if (double_int_zero_p (p1)) return false; *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res), @@ -2626,354 +2576,6 @@ constant_multiple_of (tree top, tree bot, double_int *mul) } } -/* Sets COMB to CST. */ - -static void -aff_combination_const (struct affine_tree_combination *comb, tree type, - unsigned HOST_WIDE_INT cst) -{ - unsigned prec = TYPE_PRECISION (type); - - comb->type = type; - comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1); - - comb->n = 0; - comb->rest = NULL_TREE; - comb->offset = cst & comb->mask; -} - -/* Sets COMB to single element ELT. */ - -static void -aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt) -{ - unsigned prec = TYPE_PRECISION (type); - - comb->type = type; - comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1); - - comb->n = 1; - comb->elts[0] = elt; - comb->coefs[0] = 1; - comb->rest = NULL_TREE; - comb->offset = 0; -} - -/* Scales COMB by SCALE. */ - -static void -aff_combination_scale (struct affine_tree_combination *comb, - unsigned HOST_WIDE_INT scale) -{ - unsigned i, j; - - if (scale == 1) - return; - - if (scale == 0) - { - aff_combination_const (comb, comb->type, 0); - return; - } - - comb->offset = (scale * comb->offset) & comb->mask; - for (i = 0, j = 0; i < comb->n; i++) - { - comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask; - comb->elts[j] = comb->elts[i]; - if (comb->coefs[j] != 0) - j++; - } - comb->n = j; - - if (comb->rest) - { - if (comb->n < MAX_AFF_ELTS) - { - comb->coefs[comb->n] = scale; - comb->elts[comb->n] = comb->rest; - comb->rest = NULL_TREE; - comb->n++; - } - else - comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest, - build_int_cst_type (comb->type, scale)); - } -} - -/* Adds ELT * SCALE to COMB. */ - -static void -aff_combination_add_elt (struct affine_tree_combination *comb, tree elt, - unsigned HOST_WIDE_INT scale) -{ - unsigned i; - - if (scale == 0) - return; - - for (i = 0; i < comb->n; i++) - if (operand_equal_p (comb->elts[i], elt, 0)) - { - comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask; - if (comb->coefs[i]) - return; - - comb->n--; - comb->coefs[i] = comb->coefs[comb->n]; - comb->elts[i] = comb->elts[comb->n]; - - if (comb->rest) - { - gcc_assert (comb->n == MAX_AFF_ELTS - 1); - comb->coefs[comb->n] = 1; - comb->elts[comb->n] = comb->rest; - comb->rest = NULL_TREE; - comb->n++; - } - return; - } - if (comb->n < MAX_AFF_ELTS) - { - comb->coefs[comb->n] = scale; - comb->elts[comb->n] = elt; - comb->n++; - return; - } - - if (scale == 1) - elt = fold_convert (comb->type, elt); - else - elt = fold_build2 (MULT_EXPR, comb->type, - fold_convert (comb->type, elt), - build_int_cst_type (comb->type, scale)); - - if (comb->rest) - comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt); - else - comb->rest = elt; -} - -/* Adds COMB2 to COMB1. */ - -static void -aff_combination_add (struct affine_tree_combination *comb1, - struct affine_tree_combination *comb2) -{ - unsigned i; - - comb1->offset = (comb1->offset + comb2->offset) & comb1->mask; - for (i = 0; i < comb2->n; i++) - aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]); - if (comb2->rest) - aff_combination_add_elt (comb1, comb2->rest, 1); -} - -/* Convert COMB to TYPE. */ - -static void -aff_combination_convert (tree type, struct affine_tree_combination *comb) -{ - unsigned prec = TYPE_PRECISION (type); - unsigned i; - - /* If the precision of both types is the same, it suffices to change the type - of the whole combination -- the elements are allowed to have another type - equivalent wrto STRIP_NOPS. */ - if (prec == TYPE_PRECISION (comb->type)) - { - comb->type = type; - return; - } - - comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1); - comb->offset = comb->offset & comb->mask; - - /* The type of the elements can be different from comb->type only as - much as what STRIP_NOPS would remove. We can just directly cast - to TYPE. */ - for (i = 0; i < comb->n; i++) - comb->elts[i] = fold_convert (type, comb->elts[i]); - if (comb->rest) - comb->rest = fold_convert (type, comb->rest); - - comb->type = type; -} - -/* Splits EXPR into an affine combination of parts. */ - -static void -tree_to_aff_combination (tree expr, tree type, - struct affine_tree_combination *comb) -{ - struct affine_tree_combination tmp; - enum tree_code code; - tree cst, core, toffset; - HOST_WIDE_INT bitpos, bitsize; - enum machine_mode mode; - int unsignedp, volatilep; - - STRIP_NOPS (expr); - - code = TREE_CODE (expr); - switch (code) - { - case INTEGER_CST: - aff_combination_const (comb, type, int_cst_value (expr)); - return; - - case PLUS_EXPR: - case MINUS_EXPR: - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); - tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp); - if (code == MINUS_EXPR) - aff_combination_scale (&tmp, -1); - aff_combination_add (comb, &tmp); - return; - - case MULT_EXPR: - cst = TREE_OPERAND (expr, 1); - if (TREE_CODE (cst) != INTEGER_CST) - break; - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); - aff_combination_scale (comb, int_cst_value (cst)); - return; - - case NEGATE_EXPR: - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); - aff_combination_scale (comb, -1); - return; - - case ADDR_EXPR: - core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, - &toffset, &mode, &unsignedp, &volatilep, - false); - if (bitpos % BITS_PER_UNIT != 0) - break; - aff_combination_const (comb, type, bitpos / BITS_PER_UNIT); - core = build_fold_addr_expr (core); - if (TREE_CODE (core) == ADDR_EXPR) - aff_combination_add_elt (comb, core, 1); - else - { - tree_to_aff_combination (core, type, &tmp); - aff_combination_add (comb, &tmp); - } - if (toffset) - { - tree_to_aff_combination (toffset, type, &tmp); - aff_combination_add (comb, &tmp); - } - return; - - default: - break; - } - - aff_combination_elt (comb, type, expr); -} - -/* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */ - -static tree -add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale, - unsigned HOST_WIDE_INT mask) -{ - enum tree_code code; - - scale &= mask; - elt = fold_convert (type, elt); - - if (scale == 1) - { - if (!expr) - return elt; - - return fold_build2 (PLUS_EXPR, type, expr, elt); - } - - if (scale == mask) - { - if (!expr) - return fold_build1 (NEGATE_EXPR, type, elt); - - return fold_build2 (MINUS_EXPR, type, expr, elt); - } - - if (!expr) - return fold_build2 (MULT_EXPR, type, elt, - build_int_cst_type (type, scale)); - - if ((scale | (mask >> 1)) == mask) - { - /* Scale is negative. */ - code = MINUS_EXPR; - scale = (-scale) & mask; - } - else - code = PLUS_EXPR; - - elt = fold_build2 (MULT_EXPR, type, elt, - build_int_cst_type (type, scale)); - return fold_build2 (code, type, expr, elt); -} - -/* Copies the tree elements of COMB to ensure that they are not shared. */ - -static void -unshare_aff_combination (struct affine_tree_combination *comb) -{ - unsigned i; - - for (i = 0; i < comb->n; i++) - comb->elts[i] = unshare_expr (comb->elts[i]); - if (comb->rest) - comb->rest = unshare_expr (comb->rest); -} - -/* Makes tree from the affine combination COMB. */ - -static tree -aff_combination_to_tree (struct affine_tree_combination *comb) -{ - tree type = comb->type; - tree expr = comb->rest; - unsigned i; - unsigned HOST_WIDE_INT off, sgn; - - if (comb->n == 0 && comb->offset == 0) - { - if (expr) - { - /* Handle the special case produced by get_computation_aff when - the type does not fit in HOST_WIDE_INT. */ - return fold_convert (type, expr); - } - else - return build_int_cst (type, 0); - } - - gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE); - - for (i = 0; i < comb->n; i++) - expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i], - comb->mask); - - if ((comb->offset | (comb->mask >> 1)) == comb->mask) - { - /* Offset is negative. */ - off = (-comb->offset) & comb->mask; - sgn = comb->mask; - } - else - { - off = comb->offset; - sgn = 1; - } - return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn, - comb->mask); -} - /* Folds EXPR using the affine expressions framework. */ static tree @@ -3039,16 +2641,11 @@ get_computation_aff (struct loop *loop, tree ubase = use->iv->base; tree ustep = use->iv->step; tree cbase = cand->iv->base; - tree cstep = cand->iv->step; + tree cstep = cand->iv->step, cstep_common; tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); - tree common_type; + tree common_type, var; tree uutype; - tree expr, delta; - tree ratio; - unsigned HOST_WIDE_INT ustepi, cstepi; - HOST_WIDE_INT ratioi; - struct affine_tree_combination cbase_aff, expr_aff; - tree cstep_orig = cstep, ustep_orig = ustep; + aff_tree cbase_aff, var_aff; double_int rat; if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) @@ -3057,65 +2654,19 @@ get_computation_aff (struct loop *loop, return false; } - expr = var_at_stmt (loop, cand, at); - - if (TREE_TYPE (expr) != ctype) - { - /* This may happen with the original ivs. */ - expr = fold_convert (ctype, expr); - } - - if (TYPE_UNSIGNED (utype)) - uutype = utype; - else - { - uutype = unsigned_type_for (utype); - ubase = fold_convert (uutype, ubase); - ustep = fold_convert (uutype, ustep); - } + var = var_at_stmt (loop, cand, at); + uutype = unsigned_type_for (utype); - if (uutype != ctype) + /* If the conversion is not noop, perform it. */ + if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) { - expr = fold_convert (uutype, expr); - cbase = fold_convert (uutype, cbase); cstep = fold_convert (uutype, cstep); - - /* If the conversion is not noop, we must take it into account when - considering the value of the step. */ - if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) - cstep_orig = cstep; - } - - if (cst_and_fits_in_hwi (cstep_orig) - && cst_and_fits_in_hwi (ustep_orig)) - { - ustepi = int_cst_value (ustep_orig); - cstepi = int_cst_value (cstep_orig); - - if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi)) - { - /* TODO maybe consider case when ustep divides cstep and the ratio is - a power of 2 (so that the division is fast to execute)? We would - need to be much more careful with overflows etc. then. */ - return false; - } - - ratio = build_int_cst_type (uutype, ratioi); + cbase = fold_convert (uutype, cbase); + var = fold_convert (uutype, var); } - else - { - if (!constant_multiple_of (ustep_orig, cstep_orig, &rat)) - return false; - ratio = double_int_to_tree (uutype, rat); - /* Ratioi is only used to detect special cases when the multiplicative - factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may - set it to 0. */ - if (double_int_fits_in_shwi_p (rat)) - ratioi = double_int_to_shwi (rat); - else - ratioi = 0; - } + if (!constant_multiple_of (ustep, cstep, &rat)) + return false; /* In case both UBASE and CBASE are shortened to UUTYPE from some common type, we achieve better folding by computing their difference in this @@ -3124,73 +2675,32 @@ get_computation_aff (struct loop *loop, anyway. */ common_type = determine_common_wider_type (&ubase, &cbase); - /* We may need to shift the value if we are after the increment. */ - if (stmt_after_increment (loop, cand, at)) - { - if (uutype != common_type) - cstep = fold_convert (common_type, cstep); - cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep); - } - - /* use = ubase - ratio * cbase + ratio * var. - - In general case ubase + ratio * (var - cbase) could be better (one less - multiplication), but often it is possible to eliminate redundant parts - of computations from (ubase - ratio * cbase) term, and if it does not - happen, fold is able to apply the distributive law to obtain this form - anyway. */ + /* use = ubase - ratio * cbase + ratio * var. */ + tree_to_aff_combination (ubase, common_type, aff); + tree_to_aff_combination (cbase, common_type, &cbase_aff); + tree_to_aff_combination (var, uutype, &var_aff); - if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT) + /* We need to shift the value if we are after the increment. */ + if (stmt_after_increment (loop, cand, at)) { - /* Let's compute in trees and just return the result in AFF. This case - should not be very common, and fold itself is not that bad either, - so making the aff. functions more complicated to handle this case - is not that urgent. */ - if (ratioi == 1) - { - delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase); - if (uutype != common_type) - delta = fold_convert (uutype, delta); - expr = fold_build2 (PLUS_EXPR, uutype, expr, delta); - } - else if (ratioi == -1) - { - delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase); - if (uutype != common_type) - delta = fold_convert (uutype, delta); - expr = fold_build2 (MINUS_EXPR, uutype, delta, expr); - } + aff_tree cstep_aff; + + if (common_type != uutype) + cstep_common = fold_convert (common_type, cstep); else - { - delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio); - delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta); - if (uutype != common_type) - delta = fold_convert (uutype, delta); - expr = fold_build2 (MULT_EXPR, uutype, ratio, expr); - expr = fold_build2 (PLUS_EXPR, uutype, delta, expr); - } + cstep_common = cstep; - aff->type = uutype; - aff->n = 0; - aff->offset = 0; - aff->mask = 0; - aff->rest = expr; - return true; + tree_to_aff_combination (cstep_common, common_type, &cstep_aff); + aff_combination_add (&cbase_aff, &cstep_aff); } - /* If we got here, the types fits in HOST_WIDE_INT, thus it must be - possible to compute ratioi. */ - gcc_assert (ratioi); - - tree_to_aff_combination (ubase, common_type, aff); - tree_to_aff_combination (cbase, common_type, &cbase_aff); - tree_to_aff_combination (expr, uutype, &expr_aff); - aff_combination_scale (&cbase_aff, -ratioi); - aff_combination_scale (&expr_aff, ratioi); + aff_combination_scale (&cbase_aff, double_int_neg (rat)); aff_combination_add (aff, &cbase_aff); if (common_type != uutype) - aff_combination_convert (uutype, aff); - aff_combination_add (aff, &expr_aff); + aff_combination_convert (aff, uutype); + + aff_combination_scale (&var_aff, rat); + aff_combination_add (aff, &var_aff); return true; } @@ -3202,7 +2712,7 @@ static tree get_computation_at (struct loop *loop, struct iv_use *use, struct iv_cand *cand, tree at) { - struct affine_tree_combination aff; + aff_tree aff; tree type = TREE_TYPE (use->iv->base); if (!get_computation_aff (loop, use, cand, at, &aff)) @@ -3862,10 +3372,11 @@ get_computation_cost_at (struct ivopts_data *data, tree ubase = use->iv->base, ustep = use->iv->step; tree cbase, cstep; tree utype = TREE_TYPE (ubase), ctype; - unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0; + unsigned HOST_WIDE_INT cstepi, offset = 0; HOST_WIDE_INT ratio, aratio; bool var_present, symbol_present; unsigned cost = 0, n_sums; + double_int rat; *depends_on = NULL; @@ -3913,26 +3424,13 @@ get_computation_cost_at (struct ivopts_data *data, else cstepi = 0; - if (cst_and_fits_in_hwi (ustep) - && cst_and_fits_in_hwi (cstep)) - { - ustepi = int_cst_value (ustep); - - if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio)) - return INFTY; - } - else - { - double_int rat; - - if (!constant_multiple_of (ustep, cstep, &rat)) - return INFTY; + if (!constant_multiple_of (ustep, cstep, &rat)) + return INFTY; - if (double_int_fits_in_shwi_p (rat)) - ratio = double_int_to_shwi (rat); - else - return INFTY; - } + if (double_int_fits_in_shwi_p (rat)) + ratio = double_int_to_shwi (rat); + else + return INFTY; /* use = ubase + ratio * (var - cbase). If either cbase is a constant or ratio == 1, it is better to handle this like @@ -5427,7 +4925,10 @@ rewrite_use_nonlinear_expr (struct ivopts_data *data, unshare_expr (step))); } else - comp = get_computation (data->current_loop, use, cand); + { + comp = get_computation (data->current_loop, use, cand); + gcc_assert (comp != NULL_TREE); + } switch (TREE_CODE (use->stmt)) { @@ -5593,11 +5094,13 @@ static void rewrite_use_address (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand) { - struct affine_tree_combination aff; + aff_tree aff; block_stmt_iterator bsi = bsi_for_stmt (use->stmt); tree ref; + bool ok; - get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); + ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); + gcc_assert (ok); unshare_aff_combination (&aff); ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff); @@ -5640,6 +5143,7 @@ rewrite_use_compare (struct ivopts_data *data, /* The induction variable elimination failed; just express the original giv. */ comp = get_computation (data->current_loop, use, cand); + gcc_assert (comp != NULL_TREE); cond = *use->op_p; op_p = &TREE_OPERAND (cond, 0); |