summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4>2006-10-06 19:32:04 +0000
committerrakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4>2006-10-06 19:32:04 +0000
commit9b6564b5c181e334447604b1250f1044e10d357c (patch)
tree8db53ae568c90ff0d6e7b963b4f8f907540d3826
parent627611830da31e38930469b6e5d577043abc9022 (diff)
downloadgcc-9b6564b5c181e334447604b1250f1044e10d357c.tar.gz
PR middle-end/29256
* tree-ssa-loop-ivopts.c (determine_base_object): Handle pointers casted to integer type. (get_address_cost): Decrease cost of [symbol + index] addressing modes if they are significantly more expensive than [reg + index] ones. * gcc.dg/tree-ssa/loop-19.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@117513 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/loop-19.c27
-rw-r--r--gcc/tree-ssa-loop-ivopts.c174
4 files changed, 160 insertions, 54 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1e4b928d682..e52ef562553 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2006-10-06 Zdenek Dvorak <dvorakz@suse.cz>
+
+ PR middle-end/29256
+ * tree-ssa-loop-ivopts.c (determine_base_object): Handle pointers
+ casted to integer type.
+ (get_address_cost): Decrease cost of [symbol + index] addressing modes
+ if they are significantly more expensive than [reg + index] ones.
+
2006-10-06 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/29330
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index dad477eeee3..9beac3fc86d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2006-10-06 Zdenek Dvorak <dvorakz@suse.cz>
+
+ PR middle-end/29256
+ * gcc.dg/tree-ssa/loop-19.c: New test.
+
2006-10-06 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/29330
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c
new file mode 100644
index 00000000000..276913a2c82
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-19.c
@@ -0,0 +1,27 @@
+/* This tests strength reduction and choice of induction variables. The targets
+ for this testcase are quite limited, as with different set of available
+ addressing modes, the results may be quite different.
+
+ The testcase comes from PR 29256 (and originally, the stream benchmark). */
+
+/* { dg-do compile { target i?86-*-* x86_64-*-* powerpc*-*-*} } */
+/* { dg-options "-O3 -fdump-tree-final_cleanup" } */
+
+# define N 2000000
+static double a[N],c[N];
+void tuned_STREAM_Copy()
+{
+ int j;
+ for (j=0; j<N; j++)
+ c[j] = a[j];
+}
+
+/* Check that the memory references are based on &a and &c, with appropriate
+ offsets. Ideally, we would want each of them to appear once in the output.
+ However, due to a bug in jump threading, we end up peeling one iteration from
+ the loop, which creates an additional occurence. */
+
+/* { dg-final { scan-tree-dump-times "MEM.(base: &|symbol: )a," 2 "final_cleanup" } } */
+/* { dg-final { scan-tree-dump-times "MEM.(base: &|symbol: )c," 2 "final_cleanup" } } */
+
+/* { dg-final { cleanup-tree-dump "final_cleanup" } } */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 6029923ae26..08746200f9e 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -835,6 +835,13 @@ determine_base_object (tree expr)
enum tree_code code = TREE_CODE (expr);
tree base, obj, op0, op1;
+ /* If this is a pointer casted to any type, we need to determine
+ the base object for the pointer; so handle conversions before
+ throwing away non-pointer expressions. */
+ if (TREE_CODE (expr) == NOP_EXPR
+ || TREE_CODE (expr) == CONVERT_EXPR)
+ return determine_base_object (TREE_OPERAND (expr, 0));
+
if (!POINTER_TYPE_P (TREE_TYPE (expr)))
return NULL_TREE;
@@ -871,10 +878,6 @@ determine_base_object (tree expr)
return fold_build2 (code, ptr_type_node, op0, op1);
- case NOP_EXPR:
- case CONVERT_EXPR:
- return determine_base_object (TREE_OPERAND (expr, 0));
-
default:
return fold_convert (ptr_type_node, expr);
}
@@ -3371,9 +3374,7 @@ get_address_cost (bool symbol_present, bool var_present,
static HOST_WIDE_INT min_offset, max_offset;
static unsigned costs[2][2][2][2];
unsigned cost, acost;
- rtx seq, addr, base;
bool offset_p, ratio_p;
- rtx reg1;
HOST_WIDE_INT s_offset;
unsigned HOST_WIDE_INT mask;
unsigned bits;
@@ -3381,6 +3382,11 @@ get_address_cost (bool symbol_present, bool var_present,
if (!initialized)
{
HOST_WIDE_INT i;
+ int old_cse_not_expected;
+ unsigned sym_p, var_p, off_p, rat_p, add_c;
+ rtx seq, addr, base;
+ rtx reg0, reg1;
+
initialized = true;
reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
@@ -3417,6 +3423,114 @@ get_address_cost (bool symbol_present, bool var_present,
rat = i;
break;
}
+
+ /* Compute the cost of various addressing modes. */
+ acost = 0;
+ reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
+ reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
+
+ for (i = 0; i < 16; i++)
+ {
+ sym_p = i & 1;
+ var_p = (i >> 1) & 1;
+ off_p = (i >> 2) & 1;
+ rat_p = (i >> 3) & 1;
+
+ addr = reg0;
+ if (rat_p)
+ addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
+
+ if (var_p)
+ addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
+
+ if (sym_p)
+ {
+ base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
+ if (off_p)
+ base = gen_rtx_fmt_e (CONST, Pmode,
+ gen_rtx_fmt_ee (PLUS, Pmode,
+ base,
+ gen_int_mode (off, Pmode)));
+ }
+ else if (off_p)
+ base = gen_int_mode (off, Pmode);
+ else
+ base = NULL_RTX;
+
+ if (base)
+ addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
+
+ start_sequence ();
+ /* To avoid splitting addressing modes, pretend that no cse will
+ follow. */
+ old_cse_not_expected = cse_not_expected;
+ cse_not_expected = true;
+ addr = memory_address (Pmode, addr);
+ cse_not_expected = old_cse_not_expected;
+ seq = get_insns ();
+ end_sequence ();
+
+ acost = seq_cost (seq);
+ acost += address_cost (addr, Pmode);
+
+ if (!acost)
+ acost = 1;
+ costs[sym_p][var_p][off_p][rat_p] = acost;
+ }
+
+ /* On some targets, it is quite expensive to load symbol to a register,
+ which makes addresses that contain symbols look much more expensive.
+ However, the symbol will have to be loaded in any case before the
+ loop (and quite likely we have it in register already), so it does not
+ make much sense to penalize them too heavily. So make some final
+ tweaks for the SYMBOL_PRESENT modes:
+
+ If VAR_PRESENT is false, and the mode obtained by changing symbol to
+ var is cheaper, use this mode with small penalty.
+ If VAR_PRESENT is true, try whether the mode with
+ SYMBOL_PRESENT = false is cheaper even with cost of addition, and
+ if this is the case, use it. */
+ add_c = add_cost (Pmode);
+ for (i = 0; i < 8; i++)
+ {
+ var_p = i & 1;
+ off_p = (i >> 1) & 1;
+ rat_p = (i >> 2) & 1;
+
+ acost = costs[0][1][off_p][rat_p] + 1;
+ if (var_p)
+ acost += add_c;
+
+ if (acost < costs[1][var_p][off_p][rat_p])
+ costs[1][var_p][off_p][rat_p] = acost;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Address costs:\n");
+
+ for (i = 0; i < 16; i++)
+ {
+ sym_p = i & 1;
+ var_p = (i >> 1) & 1;
+ off_p = (i >> 2) & 1;
+ rat_p = (i >> 3) & 1;
+
+ fprintf (dump_file, " ");
+ if (sym_p)
+ fprintf (dump_file, "sym + ");
+ if (var_p)
+ fprintf (dump_file, "var + ");
+ if (off_p)
+ fprintf (dump_file, "cst + ");
+ if (rat_p)
+ fprintf (dump_file, "rat * ");
+
+ acost = costs[sym_p][var_p][off_p][rat_p];
+ fprintf (dump_file, "index costs %d\n", acost);
+ }
+ fprintf (dump_file, "\n");
+ }
}
bits = GET_MODE_BITSIZE (Pmode);
@@ -3442,54 +3556,6 @@ get_address_cost (bool symbol_present, bool var_present,
}
acost = costs[symbol_present][var_present][offset_p][ratio_p];
- if (!acost)
- {
- int old_cse_not_expected;
- acost = 0;
-
- addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
- reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
- if (ratio_p)
- addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
-
- if (var_present)
- addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
-
- if (symbol_present)
- {
- base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
- if (offset_p)
- base = gen_rtx_fmt_e (CONST, Pmode,
- gen_rtx_fmt_ee (PLUS, Pmode,
- base,
- gen_int_mode (off, Pmode)));
- }
- else if (offset_p)
- base = gen_int_mode (off, Pmode);
- else
- base = NULL_RTX;
-
- if (base)
- addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
-
- start_sequence ();
- /* To avoid splitting addressing modes, pretend that no cse will
- follow. */
- old_cse_not_expected = cse_not_expected;
- cse_not_expected = true;
- addr = memory_address (Pmode, addr);
- cse_not_expected = old_cse_not_expected;
- seq = get_insns ();
- end_sequence ();
-
- acost = seq_cost (seq);
- acost += address_cost (addr, Pmode);
-
- if (!acost)
- acost = 1;
- costs[symbol_present][var_present][offset_p][ratio_p] = acost;
- }
-
return cost + acost;
}