diff options
author | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2006-10-06 19:32:04 +0000 |
---|---|---|
committer | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2006-10-06 19:32:04 +0000 |
commit | 9b6564b5c181e334447604b1250f1044e10d357c (patch) | |
tree | 8db53ae568c90ff0d6e7b963b4f8f907540d3826 | |
parent | 627611830da31e38930469b6e5d577043abc9022 (diff) | |
download | gcc-9b6564b5c181e334447604b1250f1044e10d357c.tar.gz |
PR middle-end/29256
* tree-ssa-loop-ivopts.c (determine_base_object): Handle pointers
casted to integer type.
(get_address_cost): Decrease cost of [symbol + index] addressing modes
if they are significantly more expensive than [reg + index] ones.
* gcc.dg/tree-ssa/loop-19.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@117513 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/loop-19.c | 27 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 174 |
4 files changed, 160 insertions, 54 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1e4b928d682..e52ef562553 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2006-10-06 Zdenek Dvorak <dvorakz@suse.cz> + + PR middle-end/29256 + * tree-ssa-loop-ivopts.c (determine_base_object): Handle pointers + casted to integer type. + (get_address_cost): Decrease cost of [symbol + index] addressing modes + if they are significantly more expensive than [reg + index] ones. + 2006-10-06 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/29330 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index dad477eeee3..9beac3fc86d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2006-10-06 Zdenek Dvorak <dvorakz@suse.cz> + + PR middle-end/29256 + * gcc.dg/tree-ssa/loop-19.c: New test. + 2006-10-06 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/29330 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c new file mode 100644 index 00000000000..276913a2c82 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c @@ -0,0 +1,27 @@ +/* This tests strength reduction and choice of induction variables. The targets + for this testcase are quite limited, as with different set of available + addressing modes, the results may be quite different. + + The testcase comes from PR 29256 (and originally, the stream benchmark). */ + +/* { dg-do compile { target i?86-*-* x86_64-*-* powerpc*-*-*} } */ +/* { dg-options "-O3 -fdump-tree-final_cleanup" } */ + +# define N 2000000 +static double a[N],c[N]; +void tuned_STREAM_Copy() +{ + int j; + for (j=0; j<N; j++) + c[j] = a[j]; +} + +/* Check that the memory references are based on &a and &c, with appropriate + offsets. Ideally, we would want each of them to appear once in the output. + However, due to a bug in jump threading, we end up peeling one iteration from + the loop, which creates an additional occurence. */ + +/* { dg-final { scan-tree-dump-times "MEM.(base: &|symbol: )a," 2 "final_cleanup" } } */ +/* { dg-final { scan-tree-dump-times "MEM.(base: &|symbol: )c," 2 "final_cleanup" } } */ + +/* { dg-final { cleanup-tree-dump "final_cleanup" } } */ diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 6029923ae26..08746200f9e 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -835,6 +835,13 @@ determine_base_object (tree expr) enum tree_code code = TREE_CODE (expr); tree base, obj, op0, op1; + /* If this is a pointer casted to any type, we need to determine + the base object for the pointer; so handle conversions before + throwing away non-pointer expressions. */ + if (TREE_CODE (expr) == NOP_EXPR + || TREE_CODE (expr) == CONVERT_EXPR) + return determine_base_object (TREE_OPERAND (expr, 0)); + if (!POINTER_TYPE_P (TREE_TYPE (expr))) return NULL_TREE; @@ -871,10 +878,6 @@ determine_base_object (tree expr) return fold_build2 (code, ptr_type_node, op0, op1); - case NOP_EXPR: - case CONVERT_EXPR: - return determine_base_object (TREE_OPERAND (expr, 0)); - default: return fold_convert (ptr_type_node, expr); } @@ -3371,9 +3374,7 @@ get_address_cost (bool symbol_present, bool var_present, static HOST_WIDE_INT min_offset, max_offset; static unsigned costs[2][2][2][2]; unsigned cost, acost; - rtx seq, addr, base; bool offset_p, ratio_p; - rtx reg1; HOST_WIDE_INT s_offset; unsigned HOST_WIDE_INT mask; unsigned bits; @@ -3381,6 +3382,11 @@ get_address_cost (bool symbol_present, bool var_present, if (!initialized) { HOST_WIDE_INT i; + int old_cse_not_expected; + unsigned sym_p, var_p, off_p, rat_p, add_c; + rtx seq, addr, base; + rtx reg0, reg1; + initialized = true; reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); @@ -3417,6 +3423,114 @@ get_address_cost (bool symbol_present, bool var_present, rat = i; break; } + + /* Compute the cost of various addressing modes. */ + acost = 0; + reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); + reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2); + + for (i = 0; i < 16; i++) + { + sym_p = i & 1; + var_p = (i >> 1) & 1; + off_p = (i >> 2) & 1; + rat_p = (i >> 3) & 1; + + addr = reg0; + if (rat_p) + addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode)); + + if (var_p) + addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1); + + if (sym_p) + { + base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup ("")); + if (off_p) + base = gen_rtx_fmt_e (CONST, Pmode, + gen_rtx_fmt_ee (PLUS, Pmode, + base, + gen_int_mode (off, Pmode))); + } + else if (off_p) + base = gen_int_mode (off, Pmode); + else + base = NULL_RTX; + + if (base) + addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base); + + start_sequence (); + /* To avoid splitting addressing modes, pretend that no cse will + follow. */ + old_cse_not_expected = cse_not_expected; + cse_not_expected = true; + addr = memory_address (Pmode, addr); + cse_not_expected = old_cse_not_expected; + seq = get_insns (); + end_sequence (); + + acost = seq_cost (seq); + acost += address_cost (addr, Pmode); + + if (!acost) + acost = 1; + costs[sym_p][var_p][off_p][rat_p] = acost; + } + + /* On some targets, it is quite expensive to load symbol to a register, + which makes addresses that contain symbols look much more expensive. + However, the symbol will have to be loaded in any case before the + loop (and quite likely we have it in register already), so it does not + make much sense to penalize them too heavily. So make some final + tweaks for the SYMBOL_PRESENT modes: + + If VAR_PRESENT is false, and the mode obtained by changing symbol to + var is cheaper, use this mode with small penalty. + If VAR_PRESENT is true, try whether the mode with + SYMBOL_PRESENT = false is cheaper even with cost of addition, and + if this is the case, use it. */ + add_c = add_cost (Pmode); + for (i = 0; i < 8; i++) + { + var_p = i & 1; + off_p = (i >> 1) & 1; + rat_p = (i >> 2) & 1; + + acost = costs[0][1][off_p][rat_p] + 1; + if (var_p) + acost += add_c; + + if (acost < costs[1][var_p][off_p][rat_p]) + costs[1][var_p][off_p][rat_p] = acost; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Address costs:\n"); + + for (i = 0; i < 16; i++) + { + sym_p = i & 1; + var_p = (i >> 1) & 1; + off_p = (i >> 2) & 1; + rat_p = (i >> 3) & 1; + + fprintf (dump_file, " "); + if (sym_p) + fprintf (dump_file, "sym + "); + if (var_p) + fprintf (dump_file, "var + "); + if (off_p) + fprintf (dump_file, "cst + "); + if (rat_p) + fprintf (dump_file, "rat * "); + + acost = costs[sym_p][var_p][off_p][rat_p]; + fprintf (dump_file, "index costs %d\n", acost); + } + fprintf (dump_file, "\n"); + } } bits = GET_MODE_BITSIZE (Pmode); @@ -3442,54 +3556,6 @@ get_address_cost (bool symbol_present, bool var_present, } acost = costs[symbol_present][var_present][offset_p][ratio_p]; - if (!acost) - { - int old_cse_not_expected; - acost = 0; - - addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); - reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2); - if (ratio_p) - addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode)); - - if (var_present) - addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1); - - if (symbol_present) - { - base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup ("")); - if (offset_p) - base = gen_rtx_fmt_e (CONST, Pmode, - gen_rtx_fmt_ee (PLUS, Pmode, - base, - gen_int_mode (off, Pmode))); - } - else if (offset_p) - base = gen_int_mode (off, Pmode); - else - base = NULL_RTX; - - if (base) - addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base); - - start_sequence (); - /* To avoid splitting addressing modes, pretend that no cse will - follow. */ - old_cse_not_expected = cse_not_expected; - cse_not_expected = true; - addr = memory_address (Pmode, addr); - cse_not_expected = old_cse_not_expected; - seq = get_insns (); - end_sequence (); - - acost = seq_cost (seq); - acost += address_cost (addr, Pmode); - - if (!acost) - acost = 1; - costs[symbol_present][var_present][offset_p][ratio_p] = acost; - } - return cost + acost; } |