summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog13
-rw-r--r--gcc/Makefile.in8
-rw-r--r--gcc/doc/invoke.texi13
-rw-r--r--gcc/params.def30
-rw-r--r--gcc/params.h6
-rw-r--r--gcc/testsuite/ChangeLog15
-rw-r--r--gcc/testsuite/gcc.c-torture/compile/sra-1.c75
-rw-r--r--gcc/testsuite/gcc.dg/memcpy-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/sra-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/sra-2.c4
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/sra-3.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/sra-4.c4
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/sra-5.c74
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/sra-6.c40
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c2
-rw-r--r--gcc/testsuite/gfortran.dg/pr25923.f902
-rw-r--r--gcc/testsuite/gnat.dg/pack9.adb2
-rw-r--r--gcc/tree-sra.c4811
20 files changed, 1954 insertions, 3155 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f864337629d..ebe3141b88c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2009-05-29 Martin Jambor <mjambor@suse.cz>
+
+ * tree-sra.c: New implementation of SRA.
+
+ * params.def (PARAM_SRA_MAX_STRUCTURE_SIZE): Removed.
+ (PARAM_SRA_MAX_STRUCTURE_COUNT): Removed.
+ (PARAM_SRA_FIELD_STRUCTURE_RATIO): Removed.
+ * params.h (SRA_MAX_STRUCTURE_SIZE): Removed.
+ (SRA_MAX_STRUCTURE_COUNT): Removed.
+ (SRA_FIELD_STRUCTURE_RATIO): Removed.
+ * doc/invoke.texi (sra-max-structure-size): Removed.
+ (sra-field-structure-ratio): Removed.
+
2009-05-29 Jakub Jelinek <jakub@redhat.com>
PR middle-end/40291
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index a5ef9938026..810699d52e6 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2769,11 +2769,9 @@ tree-ssa-ccp.o : tree-ssa-ccp.c $(TREE_FLOW_H) $(CONFIG_H) \
$(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
$(TREE_DUMP_H) $(BASIC_BLOCK_H) $(TREE_PASS_H) langhooks.h \
tree-ssa-propagate.h value-prof.h $(FLAGS_H) $(TARGET_H) $(TOPLEV_H)
-tree-sra.o : tree-sra.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(RTL_H) \
- $(TM_P_H) $(TREE_FLOW_H) $(DIAGNOSTIC_H) $(TREE_INLINE_H) \
- $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) $(GIMPLE_H) \
- langhooks.h $(TREE_PASS_H) $(FLAGS_H) $(EXPR_H) $(BASIC_BLOCK_H) \
- $(BITMAP_H) $(GGC_H) hard-reg-set.h $(OBSTACK_H) $(PARAMS_H) $(TARGET_H)
+tree-sra.o : tree-sra.c $(CONFIG_H) $(SYSTEM_H) coretypes.h alloc-pool.h \
+ $(TM_H) $(TREE_H) $(GIMPLE_H) $(TREE_FLOW_H) $(DIAGNOSTIC_H) $(TREE_DUMP_H) \
+ $(TIMEVAR_H) $(PARAMS_H) $(TARGET_H) $(FLAGS_H)
tree-switch-conversion.o : tree-switch-conversion.c $(CONFIG_H) $(SYSTEM_H) \
$(TREE_H) $(TM_P_H) $(TREE_FLOW_H) $(DIAGNOSTIC_H) $(TREE_INLINE_H) \
$(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) $(GIMPLE_H) \
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ca059df9445..ebd91dbee1c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7312,19 +7312,6 @@ In each case, the @var{value} is an integer. The allowable choices for
@var{name} are given in the following table:
@table @gcctabopt
-@item sra-max-structure-size
-The maximum structure size, in bytes, at which the scalar replacement
-of aggregates (SRA) optimization will perform block copies. The
-default value, 0, implies that GCC will select the most appropriate
-size itself.
-
-@item sra-field-structure-ratio
-The threshold ratio (as a percentage) between instantiated fields and
-the complete structure size. We say that if the ratio of the number
-of bytes in instantiated fields to the number of bytes in the complete
-structure exceeds this parameter, then block copies are not used. The
-default is 75.
-
@item struct-reorg-cold-struct-ratio
The threshold ratio (as a percentage) between a structure frequency
and the frequency of the hottest structure in the program. This parameter
diff --git a/gcc/params.def b/gcc/params.def
index ccbc305a627..370d0948da9 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -38,36 +38,6 @@ along with GCC; see the file COPYING3. If not see
Be sure to add an entry to invoke.texi summarizing the parameter. */
-/* The maximum structure size at which the scalar replacement of
- aggregates (SRA) pass will perform block copies. The default
- value, 0, implies that GCC will select the most appropriate size
- itself. */
-DEFPARAM (PARAM_SRA_MAX_STRUCTURE_SIZE,
- "sra-max-structure-size",
- "The maximum structure size (in bytes) for which GCC will "
- "use by-element copies",
- 0, 0, 0)
-
-/* The maximum number of structure fields which the SRA pass will
- instantiate to avoid block copies. The default value, 0, implies
- that GCC will select the appropriate value itself. */
-DEFPARAM (PARAM_SRA_MAX_STRUCTURE_COUNT,
- "sra-max-structure-count",
- "The maximum number of structure fields for which GCC will "
- "use by-element copies",
- 0, 0, 0)
-
-/* The ratio between instantiated fields and the complete structure
- size. We say that if the ratio of the number of bytes in
- instantiated fields to the number of bytes in the complete
- structure exceeds this parameter, or if the number of instantiated
- fields to the total number of fields exceeds this parameter, then
- block copies are not used. The default is 75%. */
-DEFPARAM (PARAM_SRA_FIELD_STRUCTURE_RATIO,
- "sra-field-structure-ratio",
- "The threshold ratio between instantiated fields and the total structure size",
- 75, 0, 100)
-
/* The threshold ratio between current and hottest structure counts.
We say that if the ratio of the current structure count,
calculated by profiling, to the hottest structure count
diff --git a/gcc/params.h b/gcc/params.h
index 16ed29234ca..828aa7d43cc 100644
--- a/gcc/params.h
+++ b/gcc/params.h
@@ -94,12 +94,6 @@ typedef enum compiler_param
(compiler_params[(int) ENUM].set)
/* Macros for the various parameters. */
-#define SRA_MAX_STRUCTURE_SIZE \
- PARAM_VALUE (PARAM_SRA_MAX_STRUCTURE_SIZE)
-#define SRA_MAX_STRUCTURE_COUNT \
- PARAM_VALUE (PARAM_SRA_MAX_STRUCTURE_COUNT)
-#define SRA_FIELD_STRUCTURE_RATIO \
- PARAM_VALUE (PARAM_SRA_FIELD_STRUCTURE_RATIO)
#define STRUCT_REORG_COLD_STRUCT_RATIO \
PARAM_VALUE (PARAM_STRUCT_REORG_COLD_STRUCT_RATIO)
#define MAX_INLINE_INSNS_SINGLE \
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e8577143ff8..7479c1a74ea 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,18 @@
+2009-05-29 Martin Jambor <mjambor@suse.cz>
+
+ * gfortran.dg/pr25923.f90: XFAIL warning expectation.
+ * gcc.dg/tree-ssa/ssa-fre-7.c: Compile with -fno-tree-sra.
+ * gcc.dg/tree-ssa/ssa-fre-8.c: Likewise.
+ * gcc.dg/tree-ssa/ssa-fre-9.c: Likewise.
+ * gcc.dg/memcpy-1.c: Removed param sra-max-structure-size.
+ * gcc.dg/tree-ssa/sra-2.c: Likewise.
+ * gcc.dg/tree-ssa/sra-3.c: Likewise.
+ * gcc.dg/tree-ssa/sra-1.c: Likewise.
+ * gcc.dg/tree-ssa/sra-4.c: Changed comment.
+ * gcc.dg/tree-ssa/sra-5.c: New file.
+ * gcc.dg/tree-ssa/sra-6.c: New file.
+ * gcc.c-torture/compile/sra-1.c: New file.
+
2009-05-29 Jakub Jelinek <jakub@redhat.com>
PR middle-end/40291
diff --git a/gcc/testsuite/gcc.c-torture/compile/sra-1.c b/gcc/testsuite/gcc.c-torture/compile/sra-1.c
new file mode 100644
index 00000000000..06dcf1002be
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/sra-1.c
@@ -0,0 +1,75 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+/* Let gimple verifier check what SRA does to unions and single-field
+ strucutres . */
+
+struct sim_struct
+{
+ int x;
+};
+
+extern struct sim_struct get_x(void);
+
+struct sim_struct foo (void)
+{
+ struct sim_struct simple;
+
+ simple = get_x ();
+ if (simple.x % 2)
+ simple.x = 39;
+ else
+ simple.x -=8;
+
+ return simple;
+}
+
+struct sim_cmplx
+{
+ _Complex double c;
+};
+
+extern struct sim_cmplx get_sc (void);
+
+_Complex double foo_c (void)
+{
+ struct sim_cmplx simple;
+
+ simple = get_sc ();
+ if (__real__ simple.c > 200.3)
+ __imag__ simple.c -= 2.4;
+
+ return simple.c;
+}
+
+
+union sim_union
+{
+ int i;
+ float d;
+};
+
+extern union sim_union get_y (void);
+
+union sim_union bar (void)
+{
+ union sim_union simple;
+
+ simple = get_y ();
+ if (simple.d > 8.2)
+ simple.i = 300;
+
+ return simple;
+}
+
+extern int get_int (void);
+
+int bar_i (void)
+{
+ union sim_union simple;
+
+ simple = get_y ();
+ if (simple.d > 8.2)
+ simple.i = get_int ();
+
+ return simple.i;
+}
diff --git a/gcc/testsuite/gcc.dg/memcpy-1.c b/gcc/testsuite/gcc.dg/memcpy-1.c
index cc602423793..2b11098b286 100644
--- a/gcc/testsuite/gcc.dg/memcpy-1.c
+++ b/gcc/testsuite/gcc.dg/memcpy-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized --param sra-max-structure-size=32" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
/* PR36598 AVR fail maybe due to cost metrics */
/* { dg-final { scan-tree-dump-times "nasty_local" 0 "optimized" { xfail { "avr-*-*" } } } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-1.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-1.c
index c2e45eb1f84..e5af2475115 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/sra-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized --param sra-max-structure-size=32" } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
/* Tests for SRA. */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-2.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-2.c
index 177c4bcace4..5682b8afbcf 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/sra-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-2.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O1 -fno-tree-fre -fdump-tree-optimized --param sra-max-structure-size=32" } */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fno-tree-fre -fdump-tree-optimized" } */
/* Test for SRA. */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-3.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-3.c
index 661dc58ff09..d8908152384 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/sra-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-3.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized --param sra-max-structure-size=32" } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
/* Test for SRA. */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c
index 6fdf37ffb34..73a68f90043 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O1 -fdump-tree-optimized -w" } */
-/* Check that SRA does non block copies for structs that just contain vectors. */
+/* Check that SRA replaces strucutres containing vectors. */
#define vector __attribute__((vector_size(16)))
@@ -20,7 +20,5 @@ vector int f(vector int t1, vector int t2)
return st3.t;
}
-/* There should be no references to st as SRA should not have done block copy. */
/* { dg-final { scan-tree-dump-times "st" 0 "optimized" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
-
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-5.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-5.c
new file mode 100644
index 00000000000..869d2f55f95
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-5.c
@@ -0,0 +1,74 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+
+/* Tests for SRA of unions. */
+
+
+typedef union testunion
+{
+ double d;
+ char f1;
+} testunion;
+
+void
+copyunion1 (testunion param)
+{
+ testunion local;
+ param.f1 = 0;
+ local = param;
+ if (local.f1 != 0)
+ link_error ();
+}
+
+void
+copyunion11 (testunion *param)
+{
+ testunion local;
+ param->f1 = 0;
+ local = *param;
+ if (local.f1 != 0)
+ link_error ();
+}
+
+void
+copyunion111 (testunion param)
+{
+ testunion *local = &param;
+ param.f1 = 0;
+ if (local->f1 != 0)
+ link_error ();
+}
+
+testunion globuf;
+void
+copyunion1111 (void)
+{
+ testunion local;
+ globuf.f1 = 0;
+ local = globuf;
+ if (local.f1 != 0)
+ link_error ();
+}
+
+void
+copyunion11111 (void)
+{
+ testunion *local = &globuf;
+ globuf.f1 = 0;
+ if (local->f1 != 0)
+ link_error ();
+}
+
+void
+copyunion111111 (testunion param)
+{
+ static testunion local;
+ param.f1 = 0;
+ local = param;
+ if (local.f1 != 0)
+ link_error ();
+}
+
+/* There should be no reference to link_error. */
+/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-6.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-6.c
new file mode 100644
index 00000000000..e59b536c12d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-6.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized -fdump-tree-esra-details" } */
+
+typedef struct teststruct
+{
+ double d;
+ int i1;
+ char c1;
+ float z;
+ char c2;
+ int i2;
+} teststruct;
+
+
+void cow (int i)
+{
+ teststruct a, b, c, d;
+
+ a.d = 3.2;
+ a.i1 = i;
+
+ b = a;
+ c = b;
+ d = c;
+
+ if (d.i1 != i)
+ link_error ();
+}
+
+
+/* Suaccesses of b and c should have been created. */
+/* { dg-final { scan-tree-dump "expr = b.d" "esra"} } */
+/* { dg-final { scan-tree-dump "expr = b.i1" "esra"} } */
+/* { dg-final { scan-tree-dump "expr = c.d" "esra"} } */
+/* { dg-final { scan-tree-dump "expr = c.i1" "esra"} } */
+/* { dg-final { cleanup-tree-dump "esra" } } */
+
+/* There should be no reference to link_error. */
+/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c
index bd81831eba8..895c05fdf91 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-fre-details -fdump-tree-optimized" } */
+/* { dg-options "-O -fno-tree-sra -fdump-tree-fre-details -fdump-tree-optimized" } */
#if (__SIZEOF_INT__ == __SIZEOF_FLOAT__)
typedef int intflt;
#elif (__SIZEOF_LONG__ == __SIZEOF_FLOAT__)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c
index 6e17bd531b3..bc9f8e3992e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-fre-details" } */
+/* { dg-options "-O -fno-tree-sra -fdump-tree-fre-details" } */
#if (__SIZEOF_INT__ == __SIZEOF_FLOAT__)
typedef int intflt;
#elif (__SIZEOF_LONG__ == __SIZEOF_FLOAT__)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c
index 18595ed6fe5..c8a434a2bba 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-fre-stats" } */
+/* { dg-options "-O -fno-tree-sra -fdump-tree-fre-stats" } */
union loc {
unsigned reg;
diff --git a/gcc/testsuite/gfortran.dg/pr25923.f90 b/gcc/testsuite/gfortran.dg/pr25923.f90
index f075944b92b..b6979ec8896 100644
--- a/gcc/testsuite/gfortran.dg/pr25923.f90
+++ b/gcc/testsuite/gfortran.dg/pr25923.f90
@@ -10,7 +10,7 @@ implicit none
contains
- function baz(arg) result(res) ! { dg-warning "res.yr' may be" }
+ function baz(arg) result(res) ! { dg-warning "res.yr' may be" "" { xfail *-*-* } }
type(bar), intent(in) :: arg
type(bar) :: res
logical, external:: some_func
diff --git a/gcc/testsuite/gnat.dg/pack9.adb b/gcc/testsuite/gnat.dg/pack9.adb
index 64d71b11cc9..232904ac1e1 100644
--- a/gcc/testsuite/gnat.dg/pack9.adb
+++ b/gcc/testsuite/gnat.dg/pack9.adb
@@ -1,5 +1,5 @@
-- { dg-do compile }
--- { dg-options "-O2 -gnatp -cargs --param sra-max-structure-size=24 --param sra-max-structure-count=6 -fdump-tree-optimized" }
+-- { dg-options "-O2 -gnatp -cargs -fdump-tree-optimized" }
package body Pack9 is
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 0f689417163..825d6e80ae7 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -1,19 +1,18 @@
/* Scalar Replacement of Aggregates (SRA) converts some structure
references into scalar references, exposing them to the scalar
optimizers.
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
- Free Software Foundation, Inc.
- Contributed by Diego Novillo <dnovillo@redhat.com>
+ Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ Contributed by Martin Jambor <mjambor@suse.cz>
This file is part of GCC.
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
-GCC is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
@@ -21,3660 +20,2295 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+/* This file implements Scalar Reduction of Aggregates (SRA). SRA is run
+ twice, once in the early stages of compilation (early SRA) and once in the
+ late stages (late SRA). The aim of both is to turn references to scalar
+ parts of aggregates into uses of independent scalar variables.
+
+ The two passes are nearly identical, the only difference is that early SRA
+ does not scalarize unions which are used as the result in a GIMPLE_RETURN
+ statement because together with inlining this can lead to weird type
+ conversions.
+
+ Both passes operate in four stages:
+
+ 1. The declarations that have properties which make them candidates for
+ scalarization are identified in function find_var_candidates(). The
+ candidates are stored in candidate_bitmap.
+
+ 2. The function body is scanned. In the process, declarations which are
+ used in a manner that prevent their scalarization are removed from the
+ candidate bitmap. More importantly, for every access into an aggregate,
+ an access structure (struct access) is created by create_access() and
+ stored in a vector associated with the aggregate. Among other
+ information, the aggregate declaration, the offset and size of the access
+ and its type are stored in the structure.
+
+ On a related note, assign_link structures are created for every assign
+ statement between candidate aggregates and attached to the related
+ accesses.
+
+ 3. The vectors of accesses are analyzed. They are first sorted according to
+ their offset and size and then scanned for partially overlapping accesses
+ (i.e. those which overlap but one is not entirely within another). Such
+ an access disqualifies the whole aggregate from being scalarized.
+
+ If there is no such inhibiting overlap, a representative access structure
+ is chosen for every unique combination of offset and size. Afterwards,
+ the pass builds a set of trees from these structures, in which children
+ of an access are within their parent (in terms of offset and size).
+
+ Then accesses are propagated whenever possible (i.e. in cases when it
+ does not create a partially overlapping access) across assign_links from
+ the right hand side to the left hand side.
+
+ Then the set of trees for each declaration is traversed again and those
+ accesses which should be replaced by a scalar are identified.
+
+ 4. The function is traversed again, and for every reference into an
+ aggregate that has some component which is about to be scalarized,
+ statements are amended and new statements are created as necessary.
+ Finally, if a parameter got scalarized, the scalar replacements are
+ initialized with values from respective parameter aggregates. */
+
#include "config.h"
#include "system.h"
#include "coretypes.h"
+#include "alloc-pool.h"
#include "tm.h"
-#include "ggc.h"
#include "tree.h"
-
-/* These RTL headers are needed for basic-block.h. */
-#include "rtl.h"
-#include "tm_p.h"
-#include "hard-reg-set.h"
-#include "basic-block.h"
-#include "diagnostic.h"
-#include "langhooks.h"
-#include "tree-inline.h"
-#include "tree-flow.h"
#include "gimple.h"
+#include "tree-flow.h"
+#include "diagnostic.h"
#include "tree-dump.h"
-#include "tree-pass.h"
#include "timevar.h"
-#include "flags.h"
-#include "bitmap.h"
-#include "obstack.h"
-#include "target.h"
-/* expr.h is needed for MOVE_RATIO. */
-#include "expr.h"
#include "params.h"
+#include "target.h"
+#include "flags.h"
+/* Enumeration of all aggregate reductions we can do. */
+enum sra_mode { SRA_MODE_EARLY_INTRA, /* early intraprocedural SRA */
+ SRA_MODE_INTRA }; /* late intraprocedural SRA */
-/* This object of this pass is to replace a non-addressable aggregate with a
- set of independent variables. Most of the time, all of these variables
- will be scalars. But a secondary objective is to break up larger
- aggregates into smaller aggregates. In the process we may find that some
- bits of the larger aggregate can be deleted as unreferenced.
-
- This substitution is done globally. More localized substitutions would
- be the purvey of a load-store motion pass.
-
- The optimization proceeds in phases:
-
- (1) Identify variables that have types that are candidates for
- decomposition.
-
- (2) Scan the function looking for the ways these variables are used.
- In particular we're interested in the number of times a variable
- (or member) is needed as a complete unit, and the number of times
- a variable (or member) is copied.
-
- (3) Based on the usage profile, instantiate substitution variables.
-
- (4) Scan the function making replacements.
-*/
-
+/* Global variable describing which aggregate reduction we are performing at
+ the moment. */
+static enum sra_mode sra_mode;
-/* True if this is the "early" pass, before inlining. */
-static bool early_sra;
+struct assign_link;
-/* The set of aggregate variables that are candidates for scalarization. */
-static bitmap sra_candidates;
+/* ACCESS represents each access to an aggregate variable (as a whole or a
+ part). It can also represent a group of accesses that refer to exactly the
+ same fragment of an aggregate (i.e. those that have exactly the same offset
+ and size). Such representatives for a single aggregate, once determined,
+ are linked in a linked list and have the group fields set.
-/* Set of scalarizable PARM_DECLs that need copy-in operations at the
- beginning of the function. */
-static bitmap needs_copy_in;
+ Moreover, when doing intraprocedural SRA, a tree is built from those
+ representatives (by the means of first_child and next_sibling pointers), in
+ which all items in a subtree are "within" the root, i.e. their offset is
+ greater or equal to offset of the root and offset+size is smaller or equal
+ to offset+size of the root. Children of an access are sorted by offset.
-/* Sets of bit pairs that cache type decomposition and instantiation. */
-static bitmap sra_type_decomp_cache;
-static bitmap sra_type_inst_cache;
+ Note that accesses to parts of vector and complex number types always
+ represented by an access to the whole complex number or a vector. It is a
+ duty of the modifying functions to replace them appropriately. */
-/* One of these structures is created for each candidate aggregate and
- each (accessed) member or group of members of such an aggregate. */
-struct sra_elt
+struct access
{
- /* A tree of the elements. Used when we want to traverse everything. */
- struct sra_elt *parent;
- struct sra_elt *groups;
- struct sra_elt *children;
- struct sra_elt *sibling;
-
- /* If this element is a root, then this is the VAR_DECL. If this is
- a sub-element, this is some token used to identify the reference.
- In the case of COMPONENT_REF, this is the FIELD_DECL. In the case
- of an ARRAY_REF, this is the (constant) index. In the case of an
- ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR. In the case
- of a complex number, this is a zero or one. */
- tree element;
-
- /* The type of the element. */
- tree type;
+ /* Values returned by `get_ref_base_and_extent' for each component reference
+ If EXPR isn't a component reference just set `BASE = EXPR', `OFFSET = 0',
+ `SIZE = TREE_SIZE (TREE_TYPE (expr))'. */
+ HOST_WIDE_INT offset;
+ HOST_WIDE_INT size;
+ tree base;
- /* A VAR_DECL, for any sub-element we've decided to replace. */
- tree replacement;
+ /* Expression. */
+ tree expr;
+ /* Type. */
+ tree type;
- /* The number of times the element is referenced as a whole. I.e.
- given "a.b.c", this would be incremented for C, but not for A or B. */
- unsigned int n_uses;
+ /* Next group representative for this aggregate. */
+ struct access *next_grp;
+
+ /* Pointer to the group representative. Pointer to itself if the struct is
+ the representative. */
+ struct access *group_representative;
+
+ /* If this access has any children (in terms of the definition above), this
+ points to the first one. */
+ struct access *first_child;
+
+ /* Pointer to the next sibling in the access tree as described above. */
+ struct access *next_sibling;
+
+ /* Pointers to the first and last element in the linked list of assign
+ links. */
+ struct assign_link *first_link, *last_link;
+
+ /* Pointer to the next access in the work queue. */
+ struct access *next_queued;
+
+ /* Replacement variable for this access "region." Never to be accessed
+ directly, always only by the means of get_access_replacement() and only
+ when grp_to_be_replaced flag is set. */
+ tree replacement_decl;
+
+ /* Is this particular access write access? */
+ unsigned write : 1;
+
+ /* Is this access currently in the work queue? */
+ unsigned grp_queued : 1;
+ /* Does this group contain a write access? This flag is propagated down the
+ access tree. */
+ unsigned grp_write : 1;
+ /* Does this group contain a read access? This flag is propagated down the
+ access tree. */
+ unsigned grp_read : 1;
+ /* Is the subtree rooted in this access fully covered by scalar
+ replacements? */
+ unsigned grp_covered : 1;
+ /* If set to true, this access and all below it in an access tree must not be
+ scalarized. */
+ unsigned grp_unscalarizable_region : 1;
+ /* Whether data have been written to parts of the aggregate covered by this
+ access which is not to be scalarized. This flag is propagated up in the
+ access tree. */
+ unsigned grp_unscalarized_data : 1;
+ /* Does this access and/or group contain a write access through a
+ BIT_FIELD_REF? */
+ unsigned grp_partial_lhs : 1;
+
+ /* Set when a scalar replacement should be created for this variable. We do
+ the decision and creation at different places because create_tmp_var
+ cannot be called from within FOR_EACH_REFERENCED_VAR. */
+ unsigned grp_to_be_replaced : 1;
+};
- /* The number of times the element is copied to or from another
- scalarizable element. */
- unsigned int n_copies;
+typedef struct access *access_p;
- /* True if TYPE is scalar. */
- bool is_scalar;
+DEF_VEC_P (access_p);
+DEF_VEC_ALLOC_P (access_p, heap);
- /* True if this element is a group of members of its parent. */
- bool is_group;
+/* Alloc pool for allocating access structures. */
+static alloc_pool access_pool;
- /* True if we saw something about this element that prevents scalarization,
- such as non-constant indexing. */
- bool cannot_scalarize;
+/* A structure linking lhs and rhs accesses from an aggregate assignment. They
+ are used to propagate subaccesses from rhs to lhs as long as they don't
+ conflict with what is already there. */
+struct assign_link
+{
+ struct access *lacc, *racc;
+ struct assign_link *next;
+};
- /* True if we've decided that structure-to-structure assignment
- should happen via memcpy and not per-element. */
- bool use_block_copy;
+/* Alloc pool for allocating assign link structures. */
+static alloc_pool link_pool;
- /* True if everything under this element has been marked TREE_NO_WARNING. */
- bool all_no_warning;
+/* Base (tree) -> Vector (VEC(access_p,heap) *) map. */
+static struct pointer_map_t *base_access_vec;
- /* A flag for use with/after random access traversals. */
- bool visited;
+/* Bitmap of bases (candidates). */
+static bitmap candidate_bitmap;
+/* Obstack for creation of fancy names. */
+static struct obstack name_obstack;
- /* True if there is BIT_FIELD_REF on the lhs with a vector. */
- bool is_vector_lhs;
+/* Head of a linked list of accesses that need to have its subaccesses
+ propagated to their assignment counterparts. */
+static struct access *work_queue_head;
- /* 1 if the element is a field that is part of a block, 2 if the field
- is the block itself, 0 if it's neither. */
- char in_bitfld_block;
-};
+/* Dump contents of ACCESS to file F in a human friendly way. If GRP is true,
+ representative fields are dumped, otherwise those which only describe the
+ individual access are. */
-#define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR)
+static void
+dump_access (FILE *f, struct access *access, bool grp)
+{
+ fprintf (f, "access { ");
+ fprintf (f, "base = (%d)'", DECL_UID (access->base));
+ print_generic_expr (f, access->base, 0);
+ fprintf (f, "', offset = " HOST_WIDE_INT_PRINT_DEC, access->offset);
+ fprintf (f, ", size = " HOST_WIDE_INT_PRINT_DEC, access->size);
+ fprintf (f, ", expr = ");
+ print_generic_expr (f, access->expr, 0);
+ fprintf (f, ", type = ");
+ print_generic_expr (f, access->type, 0);
+ if (grp)
+ fprintf (f, ", grp_write = %d, grp_read = %d, grp_covered = %d, "
+ "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, "
+ "grp_partial_lhs = %d, grp_to_be_replaced = %d\n",
+ access->grp_write, access->grp_read, access->grp_covered,
+ access->grp_unscalarizable_region, access->grp_unscalarized_data,
+ access->grp_partial_lhs, access->grp_to_be_replaced);
+ else
+ fprintf (f, ", write = %d, grp_partial_lhs = %d\n", access->write,
+ access->grp_partial_lhs);
+}
-#define FOR_EACH_ACTUAL_CHILD(CHILD, ELT) \
- for ((CHILD) = (ELT)->is_group \
- ? next_child_for_group (NULL, (ELT)) \
- : (ELT)->children; \
- (CHILD); \
- (CHILD) = (ELT)->is_group \
- ? next_child_for_group ((CHILD), (ELT)) \
- : (CHILD)->sibling)
+/* Dump a subtree rooted in ACCESS to file F, indent by LEVEL. */
-/* Helper function for above macro. Return next child in group. */
-static struct sra_elt *
-next_child_for_group (struct sra_elt *child, struct sra_elt *group)
+static void
+dump_access_tree_1 (FILE *f, struct access *access, int level)
{
- gcc_assert (group->is_group);
-
- /* Find the next child in the parent. */
- if (child)
- child = child->sibling;
- else
- child = group->parent->children;
-
- /* Skip siblings that do not belong to the group. */
- while (child)
+ do
{
- tree g_elt = group->element;
- if (TREE_CODE (g_elt) == RANGE_EXPR)
- {
- if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0))
- && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element))
- break;
- }
- else
- gcc_unreachable ();
-
- child = child->sibling;
- }
-
- return child;
-}
+ int i;
-/* Random access to the child of a parent is performed by hashing.
- This prevents quadratic behavior, and allows SRA to function
- reasonably on larger records. */
-static htab_t sra_map;
+ for (i = 0; i < level; i++)
+ fputs ("* ", dump_file);
-/* All structures are allocated out of the following obstack. */
-static struct obstack sra_obstack;
+ dump_access (f, access, true);
-/* Debugging functions. */
-static void dump_sra_elt_name (FILE *, struct sra_elt *);
-extern void debug_sra_elt_name (struct sra_elt *);
+ if (access->first_child)
+ dump_access_tree_1 (f, access->first_child, level + 1);
-/* Forward declarations. */
-static tree generate_element_ref (struct sra_elt *);
-static gimple_seq sra_build_assignment (tree dst, tree src);
-static void mark_all_v_defs_seq (gimple_seq);
+ access = access->next_sibling;
+ }
+ while (access);
+}
-
-/* Return true if DECL is an SRA candidate. */
+/* Dump all access trees for a variable, given the pointer to the first root in
+ ACCESS. */
-static bool
-is_sra_candidate_decl (tree decl)
+static void
+dump_access_tree (FILE *f, struct access *access)
{
- return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl));
+ for (; access; access = access->next_grp)
+ dump_access_tree_1 (f, access, 0);
}
-/* Return true if TYPE is a scalar type. */
+/* Return true iff ACC is non-NULL and has subaccesses. */
-static bool
-is_sra_scalar_type (tree type)
+static inline bool
+access_has_children_p (struct access *acc)
{
- enum tree_code code = TREE_CODE (type);
- return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE
- || code == FIXED_POINT_TYPE
- || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE
- || code == POINTER_TYPE || code == OFFSET_TYPE
- || code == REFERENCE_TYPE);
+ return acc && acc->first_child;
}
-/* Return true if TYPE can be decomposed into a set of independent variables.
-
- Note that this doesn't imply that all elements of TYPE can be
- instantiated, just that if we decide to break up the type into
- separate pieces that it can be done. */
+/* Return a vector of pointers to accesses for the variable given in BASE or
+ NULL if there is none. */
-static bool
-sra_type_can_be_decomposed_p (tree type)
+static VEC (access_p, heap) *
+get_base_access_vector (tree base)
{
- unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
- tree t;
+ void **slot;
- /* Avoid searching the same type twice. */
- if (bitmap_bit_p (sra_type_decomp_cache, cache+0))
- return true;
- if (bitmap_bit_p (sra_type_decomp_cache, cache+1))
- return false;
+ slot = pointer_map_contains (base_access_vec, base);
+ if (!slot)
+ return NULL;
+ else
+ return *(VEC (access_p, heap) **) slot;
+}
- /* The type must have a definite nonzero size. */
- if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
- || integer_zerop (TYPE_SIZE (type)))
- goto fail;
+/* Find an access with required OFFSET and SIZE in a subtree of accesses rooted
+ in ACCESS. Return NULL if it cannot be found. */
- /* The type must be a non-union aggregate. */
- switch (TREE_CODE (type))
+static struct access *
+find_access_in_subtree (struct access *access, HOST_WIDE_INT offset,
+ HOST_WIDE_INT size)
+{
+ while (access && (access->offset != offset || access->size != size))
{
- case RECORD_TYPE:
- {
- bool saw_one_field = false;
+ struct access *child = access->first_child;
- for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t))
- if (TREE_CODE (t) == FIELD_DECL)
- {
- /* Reject incorrectly represented bit fields. */
- if (DECL_BIT_FIELD (t)
- && INTEGRAL_TYPE_P (TREE_TYPE (t))
- && (tree_low_cst (DECL_SIZE (t), 1)
- != TYPE_PRECISION (TREE_TYPE (t))))
- goto fail;
-
- /* And volatile fields. */
- if (TREE_THIS_VOLATILE (t))
- goto fail;
-
- saw_one_field = true;
- }
-
- /* Record types must have at least one field. */
- if (!saw_one_field)
- goto fail;
- }
- break;
+ while (child && (child->offset + child->size <= offset))
+ child = child->next_sibling;
+ access = child;
+ }
- case ARRAY_TYPE:
- /* Array types must have a fixed lower and upper bound. */
- t = TYPE_DOMAIN (type);
- if (t == NULL)
- goto fail;
- if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t)))
- goto fail;
- if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t)))
- goto fail;
- break;
+ return access;
+}
- case COMPLEX_TYPE:
- break;
+/* Return the first group representative for DECL or NULL if none exists. */
- default:
- goto fail;
- }
+static struct access *
+get_first_repr_for_decl (tree base)
+{
+ VEC (access_p, heap) *access_vec;
- bitmap_set_bit (sra_type_decomp_cache, cache+0);
- return true;
+ access_vec = get_base_access_vector (base);
+ if (!access_vec)
+ return NULL;
- fail:
- bitmap_set_bit (sra_type_decomp_cache, cache+1);
- return false;
+ return VEC_index (access_p, access_vec, 0);
}
-/* Returns true if the TYPE is one of the available va_list types.
- Otherwise it returns false.
- Note, that for multiple calling conventions there can be more
- than just one va_list type present. */
+/* Find an access representative for the variable BASE and given OFFSET and
+ SIZE. Requires that access trees have already been built. Return NULL if
+ it cannot be found. */
-static bool
-is_va_list_type (tree type)
+static struct access *
+get_var_base_offset_size_access (tree base, HOST_WIDE_INT offset,
+ HOST_WIDE_INT size)
{
- tree h;
+ struct access *access;
- if (type == NULL_TREE)
- return false;
- h = targetm.canonical_va_list_type (type);
- if (h == NULL_TREE)
- return false;
- if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (h))
- return true;
- return false;
-}
+ access = get_first_repr_for_decl (base);
+ while (access && (access->offset + access->size <= offset))
+ access = access->next_grp;
+ if (!access)
+ return NULL;
-/* Return true if DECL can be decomposed into a set of independent
- (though not necessarily scalar) variables. */
+ return find_access_in_subtree (access, offset, size);
+}
-static bool
-decl_can_be_decomposed_p (tree var)
+/* Add LINK to the linked list of assign links of RACC. */
+static void
+add_link_to_rhs (struct access *racc, struct assign_link *link)
{
- /* Early out for scalars. */
- if (is_sra_scalar_type (TREE_TYPE (var)))
- return false;
+ gcc_assert (link->racc == racc);
- /* The variable must not be aliased. */
- if (!is_gimple_non_addressable (var))
+ if (!racc->first_link)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Cannot scalarize variable ");
- print_generic_expr (dump_file, var, dump_flags);
- fprintf (dump_file, " because it must live in memory\n");
- }
- return false;
+ gcc_assert (!racc->last_link);
+ racc->first_link = link;
}
+ else
+ racc->last_link->next = link;
+
+ racc->last_link = link;
+ link->next = NULL;
+}
- /* The variable must not be volatile. */
- if (TREE_THIS_VOLATILE (var))
+/* Move all link structures in their linked list in OLD_RACC to the linked list
+ in NEW_RACC. */
+static void
+relink_to_new_repr (struct access *new_racc, struct access *old_racc)
+{
+ if (!old_racc->first_link)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Cannot scalarize variable ");
- print_generic_expr (dump_file, var, dump_flags);
- fprintf (dump_file, " because it is declared volatile\n");
- }
- return false;
+ gcc_assert (!old_racc->last_link);
+ return;
}
- /* We must be able to decompose the variable's type. */
- if (!sra_type_can_be_decomposed_p (TREE_TYPE (var)))
+ if (new_racc->first_link)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Cannot scalarize variable ");
- print_generic_expr (dump_file, var, dump_flags);
- fprintf (dump_file, " because its type cannot be decomposed\n");
- }
- return false;
- }
+ gcc_assert (!new_racc->last_link->next);
+ gcc_assert (!old_racc->last_link || !old_racc->last_link->next);
- /* HACK: if we decompose a va_list_type_node before inlining, then we'll
- confuse tree-stdarg.c, and we won't be able to figure out which and
- how many arguments are accessed. This really should be improved in
- tree-stdarg.c, as the decomposition is truly a win. This could also
- be fixed if the stdarg pass ran early, but this can't be done until
- we've aliasing information early too. See PR 30791. */
- if (early_sra && is_va_list_type (TREE_TYPE (var)))
- return false;
+ new_racc->last_link->next = old_racc->first_link;
+ new_racc->last_link = old_racc->last_link;
+ }
+ else
+ {
+ gcc_assert (!new_racc->last_link);
- return true;
+ new_racc->first_link = old_racc->first_link;
+ new_racc->last_link = old_racc->last_link;
+ }
+ old_racc->first_link = old_racc->last_link = NULL;
}
-/* Return true if TYPE can be *completely* decomposed into scalars. */
+/* Add ACCESS to the work queue (which is actually a stack). */
-static bool
-type_can_instantiate_all_elements (tree type)
+static void
+add_access_to_work_queue (struct access *access)
{
- if (is_sra_scalar_type (type))
- return true;
- if (!sra_type_can_be_decomposed_p (type))
- return false;
-
- switch (TREE_CODE (type))
+ if (!access->grp_queued)
{
- case RECORD_TYPE:
- {
- unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
- tree f;
+ gcc_assert (!access->next_queued);
+ access->next_queued = work_queue_head;
+ access->grp_queued = 1;
+ work_queue_head = access;
+ }
+}
- if (bitmap_bit_p (sra_type_inst_cache, cache+0))
- return true;
- if (bitmap_bit_p (sra_type_inst_cache, cache+1))
- return false;
+/* Pop an access from the work queue, and return it, assuming there is one. */
- for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
- if (TREE_CODE (f) == FIELD_DECL)
- {
- if (!type_can_instantiate_all_elements (TREE_TYPE (f)))
- {
- bitmap_set_bit (sra_type_inst_cache, cache+1);
- return false;
- }
- }
+static struct access *
+pop_access_from_work_queue (void)
+{
+ struct access *access = work_queue_head;
- bitmap_set_bit (sra_type_inst_cache, cache+0);
- return true;
- }
+ work_queue_head = access->next_queued;
+ access->next_queued = NULL;
+ access->grp_queued = 0;
+ return access;
+}
- case ARRAY_TYPE:
- return type_can_instantiate_all_elements (TREE_TYPE (type));
- case COMPLEX_TYPE:
- return true;
+/* Allocate necessary structures. */
- default:
- gcc_unreachable ();
- }
+static void
+sra_initialize (void)
+{
+ candidate_bitmap = BITMAP_ALLOC (NULL);
+ gcc_obstack_init (&name_obstack);
+ access_pool = create_alloc_pool ("SRA accesses", sizeof (struct access), 16);
+ link_pool = create_alloc_pool ("SRA links", sizeof (struct assign_link), 16);
+ base_access_vec = pointer_map_create ();
}
-/* Test whether ELT or some sub-element cannot be scalarized. */
+/* Hook fed to pointer_map_traverse, deallocate stored vectors. */
static bool
-can_completely_scalarize_p (struct sra_elt *elt)
+delete_base_accesses (const void *key ATTRIBUTE_UNUSED, void **value,
+ void *data ATTRIBUTE_UNUSED)
{
- struct sra_elt *c;
-
- if (elt->cannot_scalarize)
- return false;
-
- for (c = elt->children; c; c = c->sibling)
- if (!can_completely_scalarize_p (c))
- return false;
-
- for (c = elt->groups; c; c = c->sibling)
- if (!can_completely_scalarize_p (c))
- return false;
+ VEC (access_p, heap) *access_vec;
+ access_vec = (VEC (access_p, heap) *) *value;
+ VEC_free (access_p, heap, access_vec);
return true;
}
-
-/* A simplified tree hashing algorithm that only handles the types of
- trees we expect to find in sra_elt->element. */
+/* Deallocate all general structures. */
-static hashval_t
-sra_hash_tree (tree t)
+static void
+sra_deinitialize (void)
{
- hashval_t h;
-
- switch (TREE_CODE (t))
- {
- case VAR_DECL:
- case PARM_DECL:
- case RESULT_DECL:
- h = DECL_UID (t);
- break;
-
- case INTEGER_CST:
- h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t);
- break;
-
- case RANGE_EXPR:
- h = iterative_hash_expr (TREE_OPERAND (t, 0), 0);
- h = iterative_hash_expr (TREE_OPERAND (t, 1), h);
- break;
+ BITMAP_FREE (candidate_bitmap);
+ free_alloc_pool (access_pool);
+ free_alloc_pool (link_pool);
+ obstack_free (&name_obstack, NULL);
- case FIELD_DECL:
- /* We can have types that are compatible, but have different member
- lists, so we can't hash fields by ID. Use offsets instead. */
- h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0);
- h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h);
- break;
+ pointer_map_traverse (base_access_vec, delete_base_accesses, NULL);
+ pointer_map_destroy (base_access_vec);
+}
- case BIT_FIELD_REF:
- /* Don't take operand 0 into account, that's our parent. */
- h = iterative_hash_expr (TREE_OPERAND (t, 1), 0);
- h = iterative_hash_expr (TREE_OPERAND (t, 2), h);
- break;
+/* Remove DECL from candidates for SRA and write REASON to the dump file if
+ there is one. */
+static void
+disqualify_candidate (tree decl, const char *reason)
+{
+ bitmap_clear_bit (candidate_bitmap, DECL_UID (decl));
- default:
- gcc_unreachable ();
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "! Disqualifying ");
+ print_generic_expr (dump_file, decl, 0);
+ fprintf (dump_file, " - %s\n", reason);
}
-
- return h;
}
-/* Hash function for type SRA_PAIR. */
+/* Return true iff the type contains a field or an element which does not allow
+ scalarization. */
-static hashval_t
-sra_elt_hash (const void *x)
+static bool
+type_internals_preclude_sra_p (tree type)
{
- const struct sra_elt *const e = (const struct sra_elt *) x;
- const struct sra_elt *p;
- hashval_t h;
-
- h = sra_hash_tree (e->element);
-
- /* Take into account everything except bitfield blocks back up the
- chain. Given that chain lengths are rarely very long, this
- should be acceptable. If we truly identify this as a performance
- problem, it should work to hash the pointer value
- "e->parent". */
- for (p = e->parent; p ; p = p->parent)
- if (!p->in_bitfld_block)
- h = (h * 65521) ^ sra_hash_tree (p->element);
-
- return h;
-}
+ tree fld;
+ tree et;
-/* Equality function for type SRA_PAIR. */
-
-static int
-sra_elt_eq (const void *x, const void *y)
-{
- const struct sra_elt *const a = (const struct sra_elt *) x;
- const struct sra_elt *const b = (const struct sra_elt *) y;
- tree ae, be;
- const struct sra_elt *ap = a->parent;
- const struct sra_elt *bp = b->parent;
-
- if (ap)
- while (ap->in_bitfld_block)
- ap = ap->parent;
- if (bp)
- while (bp->in_bitfld_block)
- bp = bp->parent;
-
- if (ap != bp)
- return false;
+ switch (TREE_CODE (type))
+ {
+ case RECORD_TYPE:
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ for (fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld))
+ if (TREE_CODE (fld) == FIELD_DECL)
+ {
+ tree ft = TREE_TYPE (fld);
- ae = a->element;
- be = b->element;
+ if (TREE_THIS_VOLATILE (fld)
+ || !DECL_FIELD_OFFSET (fld) || !DECL_SIZE (fld)
+ || !host_integerp (DECL_FIELD_OFFSET (fld), 1)
+ || !host_integerp (DECL_SIZE (fld), 1))
+ return true;
- if (ae == be)
- return true;
- if (TREE_CODE (ae) != TREE_CODE (be))
- return false;
+ if (AGGREGATE_TYPE_P (ft)
+ && type_internals_preclude_sra_p (ft))
+ return true;
+ }
- switch (TREE_CODE (ae))
- {
- case VAR_DECL:
- case PARM_DECL:
- case RESULT_DECL:
- /* These are all pointer unique. */
return false;
- case INTEGER_CST:
- /* Integers are not pointer unique, so compare their values. */
- return tree_int_cst_equal (ae, be);
-
- case RANGE_EXPR:
- return
- tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0))
- && tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1));
+ case ARRAY_TYPE:
+ et = TREE_TYPE (type);
- case FIELD_DECL:
- /* Fields are unique within a record, but not between
- compatible records. */
- if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be))
+ if (AGGREGATE_TYPE_P (et))
+ return type_internals_preclude_sra_p (et);
+ else
return false;
- return fields_compatible_p (ae, be);
-
- case BIT_FIELD_REF:
- return
- tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1))
- && tree_int_cst_equal (TREE_OPERAND (ae, 2), TREE_OPERAND (be, 2));
default:
- gcc_unreachable ();
+ return false;
}
}
-/* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT
- may be null, in which case CHILD must be a DECL. */
+/* Create and insert access for EXPR. Return created access, or NULL if it is
+ not possible. */
-static struct sra_elt *
-lookup_element (struct sra_elt *parent, tree child, tree type,
- enum insert_option insert)
+static struct access *
+create_access (tree expr, bool write)
{
- struct sra_elt dummy;
- struct sra_elt **slot;
- struct sra_elt *elt;
+ struct access *access;
+ void **slot;
+ VEC (access_p,heap) *vec;
+ HOST_WIDE_INT offset, size, max_size;
+ tree base = expr;
+ bool unscalarizable_region = false;
- if (parent)
- dummy.parent = parent->is_group ? parent->parent : parent;
- else
- dummy.parent = NULL;
- dummy.element = child;
+ base = get_ref_base_and_extent (expr, &offset, &size, &max_size);
- slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert);
- if (!slot && insert == NO_INSERT)
+ if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
return NULL;
- elt = *slot;
- if (!elt && insert == INSERT)
+ if (size != max_size)
{
- *slot = elt = XOBNEW (&sra_obstack, struct sra_elt);
- memset (elt, 0, sizeof (*elt));
-
- elt->parent = parent;
- elt->element = child;
- elt->type = type;
- elt->is_scalar = is_sra_scalar_type (type);
-
- if (parent)
- {
- if (IS_ELEMENT_FOR_GROUP (elt->element))
- {
- elt->is_group = true;
- elt->sibling = parent->groups;
- parent->groups = elt;
- }
- else
- {
- elt->sibling = parent->children;
- parent->children = elt;
- }
- }
-
- /* If this is a parameter, then if we want to scalarize, we have
- one copy from the true function parameter. Count it now. */
- if (TREE_CODE (child) == PARM_DECL)
- {
- elt->n_copies = 1;
- bitmap_set_bit (needs_copy_in, DECL_UID (child));
- }
+ size = max_size;
+ unscalarizable_region = true;
}
- return elt;
-}
-
-/* Create or return the SRA_ELT structure for EXPR if the expression
- refers to a scalarizable variable. */
-
-static struct sra_elt *
-maybe_lookup_element_for_expr (tree expr)
-{
- struct sra_elt *elt;
- tree child;
-
- switch (TREE_CODE (expr))
+ if (size < 0)
{
- case VAR_DECL:
- case PARM_DECL:
- case RESULT_DECL:
- if (is_sra_candidate_decl (expr))
- return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT);
- return NULL;
-
- case ARRAY_REF:
- /* We can't scalarize variable array indices. */
- if (in_array_bounds_p (expr))
- child = TREE_OPERAND (expr, 1);
- else
- return NULL;
- break;
-
- case ARRAY_RANGE_REF:
- /* We can't scalarize variable array indices. */
- if (range_in_array_bounds_p (expr))
- {
- tree domain = TYPE_DOMAIN (TREE_TYPE (expr));
- child = build2 (RANGE_EXPR, integer_type_node,
- TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain));
- }
- else
- return NULL;
- break;
-
- case COMPONENT_REF:
- {
- tree type = TREE_TYPE (TREE_OPERAND (expr, 0));
- /* Don't look through unions. */
- if (TREE_CODE (type) != RECORD_TYPE)
- return NULL;
- /* Neither through variable-sized records. */
- if (TYPE_SIZE (type) == NULL_TREE
- || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
- return NULL;
- child = TREE_OPERAND (expr, 1);
- }
- break;
-
- case REALPART_EXPR:
- child = integer_zero_node;
- break;
- case IMAGPART_EXPR:
- child = integer_one_node;
- break;
-
- default:
+ disqualify_candidate (base, "Encountered an unconstrained access.");
return NULL;
}
- elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0));
- if (elt)
- return lookup_element (elt, child, TREE_TYPE (expr), INSERT);
- return NULL;
-}
+ access = (struct access *) pool_alloc (access_pool);
+ memset (access, 0, sizeof (struct access));
-
-/* Functions to walk just enough of the tree to see all scalarizable
- references, and categorize them. */
+ access->base = base;
+ access->offset = offset;
+ access->size = size;
+ access->expr = expr;
+ access->type = TREE_TYPE (expr);
+ access->write = write;
+ access->grp_unscalarizable_region = unscalarizable_region;
-/* A set of callbacks for phases 2 and 4. They'll be invoked for the
- various kinds of references seen. In all cases, *GSI is an iterator
- pointing to the statement being processed. */
-struct sra_walk_fns
-{
- /* Invoked when ELT is required as a unit. Note that ELT might refer to
- a leaf node, in which case this is a simple scalar reference. *EXPR_P
- points to the location of the expression. IS_OUTPUT is true if this
- is a left-hand-side reference. USE_ALL is true if we saw something we
- couldn't quite identify and had to force the use of the entire object. */
- void (*use) (struct sra_elt *elt, tree *expr_p,
- gimple_stmt_iterator *gsi, bool is_output, bool use_all);
-
- /* Invoked when we have a copy between two scalarizable references. */
- void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
- gimple_stmt_iterator *gsi);
-
- /* Invoked when ELT is initialized from a constant. VALUE may be NULL,
- in which case it should be treated as an empty CONSTRUCTOR. */
- void (*init) (struct sra_elt *elt, tree value, gimple_stmt_iterator *gsi);
-
- /* Invoked when we have a copy between one scalarizable reference ELT
- and one non-scalarizable reference OTHER without side-effects.
- IS_OUTPUT is true if ELT is on the left-hand side. */
- void (*ldst) (struct sra_elt *elt, tree other,
- gimple_stmt_iterator *gsi, bool is_output);
-
- /* True during phase 2, false during phase 4. */
- /* ??? This is a hack. */
- bool initial_scan;
-};
-
-#ifdef ENABLE_CHECKING
-/* Invoked via walk_tree, if *TP contains a candidate decl, return it. */
+ slot = pointer_map_contains (base_access_vec, base);
+ if (slot)
+ vec = (VEC (access_p, heap) *) *slot;
+ else
+ vec = VEC_alloc (access_p, heap, 32);
-static tree
-sra_find_candidate_decl (tree *tp, int *walk_subtrees,
- void *data ATTRIBUTE_UNUSED)
-{
- tree t = *tp;
- enum tree_code code = TREE_CODE (t);
+ VEC_safe_push (access_p, heap, vec, access);
- if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL)
- {
- *walk_subtrees = 0;
- if (is_sra_candidate_decl (t))
- return t;
- }
- else if (TYPE_P (t))
- *walk_subtrees = 0;
+ *((struct VEC (access_p,heap) **)
+ pointer_map_insert (base_access_vec, base)) = vec;
- return NULL;
+ return access;
}
-#endif
-
-/* Walk most expressions looking for a scalarizable aggregate.
- If we find one, invoke FNS->USE. */
-
-static void
-sra_walk_expr (tree *expr_p, gimple_stmt_iterator *gsi, bool is_output,
- const struct sra_walk_fns *fns)
-{
- tree expr = *expr_p;
- tree inner = expr;
- bool disable_scalarization = false;
- bool use_all_p = false;
-
- /* We're looking to collect a reference expression between EXPR and INNER,
- such that INNER is a scalarizable decl and all other nodes through EXPR
- are references that we can scalarize. If we come across something that
- we can't scalarize, we reset EXPR. This has the effect of making it
- appear that we're referring to the larger expression as a whole. */
-
- while (1)
- switch (TREE_CODE (inner))
- {
- case VAR_DECL:
- case PARM_DECL:
- case RESULT_DECL:
- /* If there is a scalarizable decl at the bottom, then process it. */
- if (is_sra_candidate_decl (inner))
- {
- struct sra_elt *elt = maybe_lookup_element_for_expr (expr);
- if (disable_scalarization)
- elt->cannot_scalarize = true;
- else
- fns->use (elt, expr_p, gsi, is_output, use_all_p);
- }
- return;
-
- case ARRAY_REF:
- /* Non-constant index means any member may be accessed. Prevent the
- expression from being scalarized. If we were to treat this as a
- reference to the whole array, we can wind up with a single dynamic
- index reference inside a loop being overridden by several constant
- index references during loop setup. It's possible that this could
- be avoided by using dynamic usage counts based on BB trip counts
- (based on loop analysis or profiling), but that hardly seems worth
- the effort. */
- /* ??? Hack. Figure out how to push this into the scan routines
- without duplicating too much code. */
- if (!in_array_bounds_p (inner))
- {
- disable_scalarization = true;
- goto use_all;
- }
- /* ??? Are we assured that non-constant bounds and stride will have
- the same value everywhere? I don't think Fortran will... */
- if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
- goto use_all;
- inner = TREE_OPERAND (inner, 0);
- break;
-
- case ARRAY_RANGE_REF:
- if (!range_in_array_bounds_p (inner))
- {
- disable_scalarization = true;
- goto use_all;
- }
- /* ??? See above non-constant bounds and stride . */
- if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
- goto use_all;
- inner = TREE_OPERAND (inner, 0);
- break;
- case COMPONENT_REF:
- {
- tree type = TREE_TYPE (TREE_OPERAND (inner, 0));
- /* Don't look through unions. */
- if (TREE_CODE (type) != RECORD_TYPE)
- goto use_all;
- /* Neither through variable-sized records. */
- if (TYPE_SIZE (type) == NULL_TREE
- || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
- goto use_all;
- inner = TREE_OPERAND (inner, 0);
- }
- break;
- case REALPART_EXPR:
- case IMAGPART_EXPR:
- inner = TREE_OPERAND (inner, 0);
- break;
-
- case BIT_FIELD_REF:
- /* A bit field reference to a specific vector is scalarized but for
- ones for inputs need to be marked as used on the left hand size so
- when we scalarize it, we can mark that variable as non renamable. */
- if (is_output
- && TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) == VECTOR_TYPE)
- {
- struct sra_elt *elt
- = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0));
- if (elt)
- elt->is_vector_lhs = true;
- }
-
- /* A bit field reference (access to *multiple* fields simultaneously)
- is not currently scalarized. Consider this an access to the full
- outer element, to which walk_tree will bring us next. */
- goto use_all;
-
- CASE_CONVERT:
- /* Similarly, a nop explicitly wants to look at an object in a
- type other than the one we've scalarized. */
- goto use_all;
-
- case VIEW_CONVERT_EXPR:
- /* Likewise for a view conversion, but with an additional twist:
- it can be on the LHS and, in this case, an access to the full
- outer element would mean a killing def. So we need to punt
- if we haven't already a full access to the current element,
- because we cannot pretend to have a killing def if we only
- have a partial access at some level. */
- if (is_output && !use_all_p && inner != expr)
- disable_scalarization = true;
- goto use_all;
-
- case WITH_SIZE_EXPR:
- /* This is a transparent wrapper. The entire inner expression really
- is being used. */
- goto use_all;
-
- use_all:
- expr_p = &TREE_OPERAND (inner, 0);
- inner = expr = *expr_p;
- use_all_p = true;
- break;
-
- default:
-#ifdef ENABLE_CHECKING
- /* Validate that we're not missing any references. */
- gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL));
-#endif
- return;
- }
-}
-
-/* Walk the arguments of a GIMPLE_CALL looking for scalarizable aggregates.
- If we find one, invoke FNS->USE. */
+/* Search the given tree for a declaration by skipping handled components and
+ exclude it from the candidates. */
static void
-sra_walk_gimple_call (gimple stmt, gimple_stmt_iterator *gsi,
- const struct sra_walk_fns *fns)
+disqualify_base_of_expr (tree t, const char *reason)
{
- int i;
- int nargs = gimple_call_num_args (stmt);
+ while (handled_component_p (t))
+ t = TREE_OPERAND (t, 0);
- for (i = 0; i < nargs; i++)
- sra_walk_expr (gimple_call_arg_ptr (stmt, i), gsi, false, fns);
-
- if (gimple_call_lhs (stmt))
- sra_walk_expr (gimple_call_lhs_ptr (stmt), gsi, true, fns);
-}
-
-/* Walk the inputs and outputs of a GIMPLE_ASM looking for scalarizable
- aggregates. If we find one, invoke FNS->USE. */
-
-static void
-sra_walk_gimple_asm (gimple stmt, gimple_stmt_iterator *gsi,
- const struct sra_walk_fns *fns)
-{
- size_t i;
- for (i = 0; i < gimple_asm_ninputs (stmt); i++)
- sra_walk_expr (&TREE_VALUE (gimple_asm_input_op (stmt, i)), gsi, false, fns);
- for (i = 0; i < gimple_asm_noutputs (stmt); i++)
- sra_walk_expr (&TREE_VALUE (gimple_asm_output_op (stmt, i)), gsi, true, fns);
+ if (DECL_P (t))
+ disqualify_candidate (t, reason);
}
-/* Walk a GIMPLE_ASSIGN and categorize the assignment appropriately. */
+/* Scan expression EXPR and create access structures for all accesses to
+ candidates for scalarization. Return the created access or NULL if none is
+ created. */
-static void
-sra_walk_gimple_assign (gimple stmt, gimple_stmt_iterator *gsi,
- const struct sra_walk_fns *fns)
+static struct access *
+build_access_from_expr_1 (tree *expr_ptr, bool write)
{
- struct sra_elt *lhs_elt = NULL, *rhs_elt = NULL;
- tree lhs, rhs;
+ struct access *ret = NULL;
+ tree expr = *expr_ptr;
+ bool partial_ref;
- /* If there is more than 1 element on the RHS, only walk the lhs. */
- if (!gimple_assign_single_p (stmt))
+ if (TREE_CODE (expr) == BIT_FIELD_REF
+ || TREE_CODE (expr) == IMAGPART_EXPR
+ || TREE_CODE (expr) == REALPART_EXPR)
{
- sra_walk_expr (gimple_assign_lhs_ptr (stmt), gsi, true, fns);
- return;
+ expr = TREE_OPERAND (expr, 0);
+ partial_ref = true;
}
+ else
+ partial_ref = false;
- lhs = gimple_assign_lhs (stmt);
- rhs = gimple_assign_rhs1 (stmt);
- lhs_elt = maybe_lookup_element_for_expr (lhs);
- rhs_elt = maybe_lookup_element_for_expr (rhs);
+ /* We need to dive through V_C_Es in order to get the size of its parameter
+ and not the result type. Ada produces such statements. We are also
+ capable of handling the topmost V_C_E but not any of those buried in other
+ handled components. */
+ if (TREE_CODE (expr) == VIEW_CONVERT_EXPR)
+ expr = TREE_OPERAND (expr, 0);
- /* If both sides are scalarizable, this is a COPY operation. */
- if (lhs_elt && rhs_elt)
+ if (contains_view_convert_expr_p (expr))
{
- fns->copy (lhs_elt, rhs_elt, gsi);
- return;
+ disqualify_base_of_expr (expr, "V_C_E under a different handled "
+ "component.");
+ return NULL;
}
- /* If the RHS is scalarizable, handle it. There are only two cases. */
- if (rhs_elt)
+ switch (TREE_CODE (expr))
{
- if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs))
- fns->ldst (rhs_elt, lhs, gsi, false);
- else
- fns->use (rhs_elt, gimple_assign_rhs1_ptr (stmt), gsi, false, false);
- }
-
- /* If it isn't scalarizable, there may be scalarizable variables within, so
- check for a call or else walk the RHS to see if we need to do any
- copy-in operations. We need to do it before the LHS is scalarized so
- that the statements get inserted in the proper place, before any
- copy-out operations. */
- else
- sra_walk_expr (gimple_assign_rhs1_ptr (stmt), gsi, false, fns);
+ case VAR_DECL:
+ case PARM_DECL:
+ case RESULT_DECL:
+ case COMPONENT_REF:
+ case ARRAY_REF:
+ case ARRAY_RANGE_REF:
+ ret = create_access (expr, write);
+ break;
- /* Likewise, handle the LHS being scalarizable. We have cases similar
- to those above, but also want to handle RHS being constant. */
- if (lhs_elt)
- {
- /* If this is an assignment from a constant, or constructor, then
- we have access to all of the elements individually. Invoke INIT. */
- if (TREE_CODE (rhs) == COMPLEX_EXPR
- || TREE_CODE (rhs) == COMPLEX_CST
- || TREE_CODE (rhs) == CONSTRUCTOR)
- fns->init (lhs_elt, rhs, gsi);
-
- /* If this is an assignment from read-only memory, treat this as if
- we'd been passed the constructor directly. Invoke INIT. */
- else if (TREE_CODE (rhs) == VAR_DECL
- && TREE_STATIC (rhs)
- && !DECL_EXTERNAL (rhs)
- && TREE_READONLY (rhs)
- && targetm.binds_local_p (rhs))
- fns->init (lhs_elt, DECL_INITIAL (rhs), gsi);
-
- /* If this is a copy from a non-scalarizable lvalue, invoke LDST.
- The lvalue requirement prevents us from trying to directly scalarize
- the result of a function call. Which would result in trying to call
- the function multiple times, and other evil things. */
- else if (!lhs_elt->is_scalar
- && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs))
- fns->ldst (lhs_elt, rhs, gsi, true);
-
- /* Otherwise we're being used in some context that requires the
- aggregate to be seen as a whole. Invoke USE. */
- else
- fns->use (lhs_elt, gimple_assign_lhs_ptr (stmt), gsi, true, false);
+ default:
+ break;
}
- /* Similarly to above, LHS_ELT being null only means that the LHS as a
- whole is not a scalarizable reference. There may be occurrences of
- scalarizable variables within, which implies a USE. */
- else
- sra_walk_expr (gimple_assign_lhs_ptr (stmt), gsi, true, fns);
+ if (write && partial_ref && ret)
+ ret->grp_partial_lhs = 1;
+
+ return ret;
}
-/* Entry point to the walk functions. Search the entire function,
- invoking the callbacks in FNS on each of the references to
- scalarizable variables. */
+/* Callback of scan_function. Scan expression EXPR and create access
+ structures for all accesses to candidates for scalarization. Return true if
+ any access has been inserted. */
-static void
-sra_walk_function (const struct sra_walk_fns *fns)
+static bool
+build_access_from_expr (tree *expr_ptr,
+ gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, bool write,
+ void *data ATTRIBUTE_UNUSED)
{
- basic_block bb;
- gimple_stmt_iterator si, ni;
-
- /* ??? Phase 4 could derive some benefit to walking the function in
- dominator tree order. */
-
- FOR_EACH_BB (bb)
- for (si = gsi_start_bb (bb); !gsi_end_p (si); si = ni)
- {
- gimple stmt;
-
- stmt = gsi_stmt (si);
-
- ni = si;
- gsi_next (&ni);
-
- /* If the statement does not reference memory, then it doesn't
- make any structure references that we care about. */
- if (!gimple_references_memory_p (stmt))
- continue;
-
- switch (gimple_code (stmt))
- {
- case GIMPLE_RETURN:
- /* If we have "return <retval>" then the return value is
- already exposed for our pleasure. Walk it as a USE to
- force all the components back in place for the return.
- */
- if (gimple_return_retval (stmt) == NULL_TREE)
- ;
- else
- sra_walk_expr (gimple_return_retval_ptr (stmt), &si, false,
- fns);
- break;
-
- case GIMPLE_ASSIGN:
- sra_walk_gimple_assign (stmt, &si, fns);
- break;
- case GIMPLE_CALL:
- sra_walk_gimple_call (stmt, &si, fns);
- break;
- case GIMPLE_ASM:
- sra_walk_gimple_asm (stmt, &si, fns);
- break;
-
- default:
- break;
- }
- }
+ return build_access_from_expr_1 (expr_ptr, write) != NULL;
}
-
-/* Phase One: Scan all referenced variables in the program looking for
- structures that could be decomposed. */
+/* Disqualify LHS and RHS for scalarization if STMT must end its basic block in
+ modes in which it matters, return true iff they have been disqualified. RHS
+ may be NULL, in that case ignore it. If we scalarize an aggregate in
+ intra-SRA we may need to add statements after each statement. This is not
+ possible if a statement unconditionally has to end the basic block. */
static bool
-find_candidates_for_sra (void)
+disqualify_ops_if_throwing_stmt (gimple stmt, tree lhs, tree rhs)
{
- bool any_set = false;
- tree var;
- referenced_var_iterator rvi;
-
- FOR_EACH_REFERENCED_VAR (var, rvi)
+ if (stmt_can_throw_internal (stmt) || stmt_ends_bb_p (stmt))
{
- if (decl_can_be_decomposed_p (var))
- {
- bitmap_set_bit (sra_candidates, DECL_UID (var));
- any_set = true;
- }
+ disqualify_base_of_expr (lhs, "LHS of a throwing stmt.");
+ if (rhs)
+ disqualify_base_of_expr (rhs, "RHS of a throwing stmt.");
+ return true;
}
-
- return any_set;
+ return false;
}
-
-/* Phase Two: Scan all references to scalarizable variables. Count the
- number of times they are used or copied respectively. */
-/* Callbacks to fill in SRA_WALK_FNS. Everything but USE is
- considered a copy, because we can decompose the reference such that
- the sub-elements needn't be contiguous. */
+/* Result code for scan_assign callback for scan_function. */
+enum scan_assign_result { SRA_SA_NONE, /* nothing done for the stmt */
+ SRA_SA_PROCESSED, /* stmt analyzed/changed */
+ SRA_SA_REMOVED }; /* stmt redundant and eliminated */
-static void
-scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED,
- gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
- bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED)
-{
- elt->n_uses += 1;
-}
-static void
-scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
- gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
-{
- lhs_elt->n_copies += 1;
- rhs_elt->n_copies += 1;
-}
-
-static void
-scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED,
- gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
-{
- lhs_elt->n_copies += 1;
-}
+/* Callback of scan_function. Scan expressions occuring in the statement
+ pointed to by STMT_EXPR, create access structures for all accesses to
+ candidates for scalarization and remove those candidates which occur in
+ statements or expressions that prevent them from being split apart. Return
+ true if any access has been inserted. */
-static void
-scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED,
- gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
- bool is_output ATTRIBUTE_UNUSED)
+static enum scan_assign_result
+build_accesses_from_assign (gimple *stmt_ptr,
+ gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
+ void *data ATTRIBUTE_UNUSED)
{
- elt->n_copies += 1;
-}
+ gimple stmt = *stmt_ptr;
+ tree *lhs_ptr, *rhs_ptr;
+ struct access *lacc, *racc;
-/* Dump the values we collected during the scanning phase. */
-
-static void
-scan_dump (struct sra_elt *elt)
-{
- struct sra_elt *c;
-
- dump_sra_elt_name (dump_file, elt);
- fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies);
-
- for (c = elt->children; c ; c = c->sibling)
- scan_dump (c);
-
- for (c = elt->groups; c ; c = c->sibling)
- scan_dump (c);
-}
+ if (!gimple_assign_single_p (stmt))
+ return SRA_SA_NONE;
-/* Entry point to phase 2. Scan the entire function, building up
- scalarization data structures, recording copies and uses. */
+ lhs_ptr = gimple_assign_lhs_ptr (stmt);
+ rhs_ptr = gimple_assign_rhs1_ptr (stmt);
-static void
-scan_function (void)
-{
- static const struct sra_walk_fns fns = {
- scan_use, scan_copy, scan_init, scan_ldst, true
- };
- bitmap_iterator bi;
+ if (disqualify_ops_if_throwing_stmt (stmt, *lhs_ptr, *rhs_ptr))
+ return SRA_SA_NONE;
- sra_walk_function (&fns);
+ racc = build_access_from_expr_1 (rhs_ptr, false);
+ lacc = build_access_from_expr_1 (lhs_ptr, true);
- if (dump_file && (dump_flags & TDF_DETAILS))
+ if (lacc && racc
+ && !lacc->grp_unscalarizable_region
+ && !racc->grp_unscalarizable_region
+ && AGGREGATE_TYPE_P (TREE_TYPE (*lhs_ptr))
+ /* FIXME: Turn the following line into an assert after PR 40058 is
+ fixed. */
+ && lacc->size == racc->size
+ && useless_type_conversion_p (lacc->type, racc->type))
{
- unsigned i;
+ struct assign_link *link;
- fputs ("\nScan results:\n", dump_file);
- EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
- {
- tree var = referenced_var (i);
- struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
- if (elt)
- scan_dump (elt);
- }
- fputc ('\n', dump_file);
- }
-}
-
-/* Phase Three: Make decisions about which variables to scalarize, if any.
- All elements to be scalarized have replacement variables made for them. */
+ link = (struct assign_link *) pool_alloc (link_pool);
+ memset (link, 0, sizeof (struct assign_link));
-/* A subroutine of build_element_name. Recursively build the element
- name on the obstack. */
+ link->lacc = lacc;
+ link->racc = racc;
-static void
-build_element_name_1 (struct sra_elt *elt)
-{
- tree t;
- char buffer[32];
-
- if (elt->parent)
- {
- build_element_name_1 (elt->parent);
- obstack_1grow (&sra_obstack, '$');
-
- if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
- {
- if (elt->element == integer_zero_node)
- obstack_grow (&sra_obstack, "real", 4);
- else
- obstack_grow (&sra_obstack, "imag", 4);
- return;
- }
+ add_link_to_rhs (racc, link);
}
- t = elt->element;
- if (TREE_CODE (t) == INTEGER_CST)
- {
- /* ??? Eh. Don't bother doing double-wide printing. */
- sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t));
- obstack_grow (&sra_obstack, buffer, strlen (buffer));
- }
- else if (TREE_CODE (t) == BIT_FIELD_REF)
- {
- sprintf (buffer, "B" HOST_WIDE_INT_PRINT_DEC,
- tree_low_cst (TREE_OPERAND (t, 2), 1));
- obstack_grow (&sra_obstack, buffer, strlen (buffer));
- sprintf (buffer, "F" HOST_WIDE_INT_PRINT_DEC,
- tree_low_cst (TREE_OPERAND (t, 1), 1));
- obstack_grow (&sra_obstack, buffer, strlen (buffer));
- }
- else
- {
- tree name = DECL_NAME (t);
- if (name)
- obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name),
- IDENTIFIER_LENGTH (name));
- else
- {
- sprintf (buffer, "D%u", DECL_UID (t));
- obstack_grow (&sra_obstack, buffer, strlen (buffer));
- }
- }
+ return (lacc || racc) ? SRA_SA_PROCESSED : SRA_SA_NONE;
}
-/* Construct a pretty variable name for an element's replacement variable.
- The name is built on the obstack. */
+/* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
+ GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
-static char *
-build_element_name (struct sra_elt *elt)
+static bool
+asm_visit_addr (gimple stmt ATTRIBUTE_UNUSED, tree op,
+ void *data ATTRIBUTE_UNUSED)
{
- build_element_name_1 (elt);
- obstack_1grow (&sra_obstack, '\0');
- return XOBFINISH (&sra_obstack, char *);
+ if (DECL_P (op))
+ disqualify_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
+
+ return false;
}
-/* Insert a gimple_seq SEQ on all the outgoing edges out of BB. Note that
- if BB has more than one edge, STMT will be replicated for each edge.
- Also, abnormal edges will be ignored. */
-static void
-insert_edge_copies_seq (gimple_seq seq, basic_block bb)
-{
- edge e;
- edge_iterator ei;
- unsigned n_copies = -1;
+/* Scan function and look for interesting statements. Return true if any has
+ been found or processed, as indicated by callbacks. SCAN_EXPR is a callback
+ called on all expressions within statements except assign statements and
+ those deemed entirely unsuitable for some reason (all operands in such
+ statements and expression are removed from candidate_bitmap). SCAN_ASSIGN
+ is a callback called on all assign statements, HANDLE_SSA_DEFS is a callback
+ called on assign statements and those call statements which have a lhs and
+ it is the only callback which can be NULL. ANALYSIS_STAGE is true when
+ running in the analysis stage of a pass and thus no statement is being
+ modified. DATA is a pointer passed to all callbacks. If any single
+ callback returns true, this function also returns true, otherwise it returns
+ false. */
- FOR_EACH_EDGE (e, ei, bb->succs)
- if (!(e->flags & EDGE_ABNORMAL))
- n_copies++;
+static bool
+scan_function (bool (*scan_expr) (tree *, gimple_stmt_iterator *, bool, void *),
+ enum scan_assign_result (*scan_assign) (gimple *,
+ gimple_stmt_iterator *,
+ void *),
+ bool (*handle_ssa_defs)(gimple, void *),
+ bool analysis_stage, void *data)
+{
+ gimple_stmt_iterator gsi;
+ basic_block bb;
+ unsigned i;
+ tree *t;
+ bool ret = false;
- FOR_EACH_EDGE (e, ei, bb->succs)
- if (!(e->flags & EDGE_ABNORMAL))
- gsi_insert_seq_on_edge (e, n_copies-- > 0 ? gimple_seq_copy (seq) : seq);
-}
+ FOR_EACH_BB (bb)
+ {
+ bool bb_changed = false;
-/* Instantiate an element as an independent variable. */
+ gsi = gsi_start_bb (bb);
+ while (!gsi_end_p (gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ enum scan_assign_result assign_result;
+ bool any = false, deleted = false;
-static void
-instantiate_element (struct sra_elt *elt)
-{
- struct sra_elt *base_elt;
- tree var, base;
- bool nowarn = TREE_NO_WARNING (elt->element);
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_RETURN:
+ t = gimple_return_retval_ptr (stmt);
+ if (*t != NULL_TREE)
+ any |= scan_expr (t, &gsi, false, data);
+ break;
- for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent)
- if (!nowarn)
- nowarn = TREE_NO_WARNING (base_elt->parent->element);
- base = base_elt->element;
+ case GIMPLE_ASSIGN:
+ assign_result = scan_assign (&stmt, &gsi, data);
+ any |= assign_result == SRA_SA_PROCESSED;
+ deleted = assign_result == SRA_SA_REMOVED;
+ if (handle_ssa_defs && assign_result != SRA_SA_REMOVED)
+ any |= handle_ssa_defs (stmt, data);
+ break;
- elt->replacement = var = make_rename_temp (elt->type, "SR");
+ case GIMPLE_CALL:
+ /* Operands must be processed before the lhs. */
+ for (i = 0; i < gimple_call_num_args (stmt); i++)
+ {
+ tree *argp = gimple_call_arg_ptr (stmt, i);
+ any |= scan_expr (argp, &gsi, false, data);
+ }
- if (DECL_P (elt->element)
- && !tree_int_cst_equal (DECL_SIZE (var), DECL_SIZE (elt->element)))
- {
- DECL_SIZE (var) = DECL_SIZE (elt->element);
- DECL_SIZE_UNIT (var) = DECL_SIZE_UNIT (elt->element);
-
- elt->in_bitfld_block = 1;
- elt->replacement = fold_build3 (BIT_FIELD_REF, elt->type, var,
- DECL_SIZE (var),
- BYTES_BIG_ENDIAN
- ? size_binop (MINUS_EXPR,
- TYPE_SIZE (elt->type),
- DECL_SIZE (var))
- : bitsize_int (0));
- }
+ if (gimple_call_lhs (stmt))
+ {
+ tree *lhs_ptr = gimple_call_lhs_ptr (stmt);
+ if (!analysis_stage
+ || !disqualify_ops_if_throwing_stmt (stmt,
+ *lhs_ptr, NULL))
+ {
+ any |= scan_expr (lhs_ptr, &gsi, true, data);
+ if (handle_ssa_defs)
+ any |= handle_ssa_defs (stmt, data);
+ }
+ }
+ break;
- /* For vectors, if used on the left hand side with BIT_FIELD_REF,
- they are not a gimple register. */
- if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE && elt->is_vector_lhs)
- DECL_GIMPLE_REG_P (var) = 0;
+ case GIMPLE_ASM:
- DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base);
- DECL_ARTIFICIAL (var) = 1;
+ if (analysis_stage)
+ walk_stmt_load_store_addr_ops (stmt, NULL, NULL, NULL,
+ asm_visit_addr);
+ for (i = 0; i < gimple_asm_ninputs (stmt); i++)
+ {
+ tree *op = &TREE_VALUE (gimple_asm_input_op (stmt, i));
+ any |= scan_expr (op, &gsi, false, data);
+ }
+ for (i = 0; i < gimple_asm_noutputs (stmt); i++)
+ {
+ tree *op = &TREE_VALUE (gimple_asm_output_op (stmt, i));
+ any |= scan_expr (op, &gsi, true, data);
+ }
- if (TREE_THIS_VOLATILE (elt->type))
- {
- TREE_THIS_VOLATILE (var) = 1;
- TREE_SIDE_EFFECTS (var) = 1;
- }
+ default:
+ break;
+ }
- if (DECL_NAME (base) && !DECL_IGNORED_P (base))
- {
- char *pretty_name = build_element_name (elt);
- DECL_NAME (var) = get_identifier (pretty_name);
- obstack_free (&sra_obstack, pretty_name);
-
- SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt));
- DECL_DEBUG_EXPR_IS_FROM (var) = 1;
-
- DECL_IGNORED_P (var) = 0;
- TREE_NO_WARNING (var) = nowarn;
- }
- else
- {
- DECL_IGNORED_P (var) = 1;
- /* ??? We can't generate any warning that would be meaningful. */
- TREE_NO_WARNING (var) = 1;
- }
+ if (any)
+ {
+ ret = true;
+ bb_changed = true;
- /* Zero-initialize bit-field scalarization variables, to avoid
- triggering undefined behavior. */
- if (TREE_CODE (elt->element) == BIT_FIELD_REF
- || (var != elt->replacement
- && TREE_CODE (elt->replacement) == BIT_FIELD_REF))
- {
- gimple_seq init = sra_build_assignment (var,
- fold_convert (TREE_TYPE (var),
- integer_zero_node)
- );
- insert_edge_copies_seq (init, ENTRY_BLOCK_PTR);
- mark_all_v_defs_seq (init);
+ if (!analysis_stage)
+ {
+ update_stmt (stmt);
+ if (!stmt_could_throw_p (stmt))
+ remove_stmt_from_eh_region (stmt);
+ }
+ }
+ if (deleted)
+ bb_changed = true;
+ else
+ {
+ gsi_next (&gsi);
+ ret = true;
+ }
+ }
+ if (!analysis_stage && bb_changed)
+ gimple_purge_dead_eh_edges (bb);
}
- if (dump_file)
- {
- fputs (" ", dump_file);
- dump_sra_elt_name (dump_file, elt);
- fputs (" -> ", dump_file);
- print_generic_expr (dump_file, var, dump_flags);
- fputc ('\n', dump_file);
- }
+ return ret;
}
-/* Make one pass across an element tree deciding whether or not it's
- profitable to instantiate individual leaf scalars.
+/* Helper of QSORT function. There are pointers to accesses in the array. An
+ access is considered smaller than another if it has smaller offset or if the
+ offsets are the same but is size is bigger. */
- PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
- fields all the way up the tree. */
+static int
+compare_access_positions (const void *a, const void *b)
+{
+ const access_p *fp1 = (const access_p *) a;
+ const access_p *fp2 = (const access_p *) b;
+ const access_p f1 = *fp1;
+ const access_p f2 = *fp2;
+
+ if (f1->offset != f2->offset)
+ return f1->offset < f2->offset ? -1 : 1;
+
+ if (f1->size == f2->size)
+ {
+ /* Put any non-aggregate type before any aggregate type. */
+ if (!is_gimple_reg_type (f1->type)
+ && is_gimple_reg_type (f2->type))
+ return 1;
+ else if (is_gimple_reg_type (f1->type)
+ && !is_gimple_reg_type (f2->type))
+ return -1;
+ /* Put the integral type with the bigger precision first. */
+ else if (INTEGRAL_TYPE_P (f1->type)
+ && INTEGRAL_TYPE_P (f2->type))
+ return TYPE_PRECISION (f1->type) > TYPE_PRECISION (f2->type) ? -1 : 1;
+ /* Put any integral type with non-full precision last. */
+ else if (INTEGRAL_TYPE_P (f1->type)
+ && (TREE_INT_CST_LOW (TYPE_SIZE (f1->type))
+ != TYPE_PRECISION (f1->type)))
+ return 1;
+ else if (INTEGRAL_TYPE_P (f2->type)
+ && (TREE_INT_CST_LOW (TYPE_SIZE (f2->type))
+ != TYPE_PRECISION (f2->type)))
+ return -1;
+ /* Stabilize the sort. */
+ return TYPE_UID (f1->type) - TYPE_UID (f2->type);
+ }
+
+ /* We want the bigger accesses first, thus the opposite operator in the next
+ line: */
+ return f1->size > f2->size ? -1 : 1;
+}
+
+
+/* Append a name of the declaration to the name obstack. A helper function for
+ make_fancy_name. */
static void
-decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses,
- unsigned int parent_copies)
+make_fancy_decl_name (tree decl)
{
- if (dump_file && !elt->parent)
- {
- fputs ("Initial instantiation for ", dump_file);
- dump_sra_elt_name (dump_file, elt);
- fputc ('\n', dump_file);
- }
-
- if (elt->cannot_scalarize)
- return;
+ char buffer[32];
- if (elt->is_scalar)
- {
- /* The decision is simple: instantiate if we're used more frequently
- than the parent needs to be seen as a complete unit. */
- if (elt->n_uses + elt->n_copies + parent_copies > parent_uses)
- instantiate_element (elt);
- }
+ tree name = DECL_NAME (decl);
+ if (name)
+ obstack_grow (&name_obstack, IDENTIFIER_POINTER (name),
+ IDENTIFIER_LENGTH (name));
else
{
- struct sra_elt *c, *group;
- unsigned int this_uses = elt->n_uses + parent_uses;
- unsigned int this_copies = elt->n_copies + parent_copies;
-
- /* Consider groups of sub-elements as weighing in favour of
- instantiation whatever their size. */
- for (group = elt->groups; group ; group = group->sibling)
- FOR_EACH_ACTUAL_CHILD (c, group)
- {
- c->n_uses += group->n_uses;
- c->n_copies += group->n_copies;
- }
-
- for (c = elt->children; c ; c = c->sibling)
- decide_instantiation_1 (c, this_uses, this_copies);
+ sprintf (buffer, "D%u", DECL_UID (decl));
+ obstack_grow (&name_obstack, buffer, strlen (buffer));
}
}
-/* Compute the size and number of all instantiated elements below ELT.
- We will only care about this if the size of the complete structure
- fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */
+/* Helper for make_fancy_name. */
-static unsigned int
-sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep)
+static void
+make_fancy_name_1 (tree expr)
{
- if (elt->replacement)
+ char buffer[32];
+ tree index;
+
+ if (DECL_P (expr))
{
- *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type));
- return 1;
+ make_fancy_decl_name (expr);
+ return;
}
- else
- {
- struct sra_elt *c;
- unsigned int count = 0;
-
- for (c = elt->children; c ; c = c->sibling)
- count += sum_instantiated_sizes (c, sizep);
- return count;
- }
-}
+ switch (TREE_CODE (expr))
+ {
+ case COMPONENT_REF:
+ make_fancy_name_1 (TREE_OPERAND (expr, 0));
+ obstack_1grow (&name_obstack, '$');
+ make_fancy_decl_name (TREE_OPERAND (expr, 1));
+ break;
-/* Instantiate fields in ELT->TYPE that are not currently present as
- children of ELT. */
+ case ARRAY_REF:
+ make_fancy_name_1 (TREE_OPERAND (expr, 0));
+ obstack_1grow (&name_obstack, '$');
+ /* Arrays with only one element may not have a constant as their
+ index. */
+ index = TREE_OPERAND (expr, 1);
+ if (TREE_CODE (index) != INTEGER_CST)
+ break;
+ sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (index));
+ obstack_grow (&name_obstack, buffer, strlen (buffer));
-static void instantiate_missing_elements (struct sra_elt *elt);
+ break;
-static struct sra_elt *
-instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type)
-{
- struct sra_elt *sub = lookup_element (elt, child, type, INSERT);
- if (sub->is_scalar)
- {
- if (sub->replacement == NULL)
- instantiate_element (sub);
+ case BIT_FIELD_REF:
+ case REALPART_EXPR:
+ case IMAGPART_EXPR:
+ gcc_unreachable (); /* we treat these as scalars. */
+ break;
+ default:
+ break;
}
- else
- instantiate_missing_elements (sub);
- return sub;
}
-/* Obtain the canonical type for field F of ELEMENT. */
+/* Create a human readable name for replacement variable of ACCESS. */
-static tree
-canon_type_for_field (tree f, tree element)
+static char *
+make_fancy_name (tree expr)
{
- tree field_type = TREE_TYPE (f);
-
- /* canonicalize_component_ref() unwidens some bit-field types (not
- marked as DECL_BIT_FIELD in C++), so we must do the same, lest we
- may introduce type mismatches. */
- if (INTEGRAL_TYPE_P (field_type)
- && DECL_MODE (f) != TYPE_MODE (field_type))
- field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF,
- field_type,
- element,
- f, NULL_TREE),
- NULL_TREE));
-
- return field_type;
+ make_fancy_name_1 (expr);
+ obstack_1grow (&name_obstack, '\0');
+ return XOBFINISH (&name_obstack, char *);
}
-/* Look for adjacent fields of ELT starting at F that we'd like to
- scalarize as a single variable. Return the last field of the
- group. */
+/* Helper function for build_ref_for_offset. */
-static tree
-try_instantiate_multiple_fields (struct sra_elt *elt, tree f)
+static bool
+build_ref_for_offset_1 (tree *res, tree type, HOST_WIDE_INT offset,
+ tree exp_type)
{
- int count;
- unsigned HOST_WIDE_INT align, bit, size, alchk;
- enum machine_mode mode;
- tree first = f, prev;
- tree type, var;
- struct sra_elt *block;
-
- /* Point fields are typically best handled as standalone entities. */
- if (POINTER_TYPE_P (TREE_TYPE (f)))
- return f;
-
- if (!is_sra_scalar_type (TREE_TYPE (f))
- || !host_integerp (DECL_FIELD_OFFSET (f), 1)
- || !host_integerp (DECL_FIELD_BIT_OFFSET (f), 1)
- || !host_integerp (DECL_SIZE (f), 1)
- || lookup_element (elt, f, NULL, NO_INSERT))
- return f;
-
- block = elt;
-
- /* For complex and array objects, there are going to be integer
- literals as child elements. In this case, we can't just take the
- alignment and mode of the decl, so we instead rely on the element
- type.
-
- ??? We could try to infer additional alignment from the full
- object declaration and the location of the sub-elements we're
- accessing. */
- for (count = 0; !DECL_P (block->element); count++)
- block = block->parent;
-
- align = DECL_ALIGN (block->element);
- alchk = GET_MODE_BITSIZE (DECL_MODE (block->element));
-
- if (count)
- {
- type = TREE_TYPE (block->element);
- while (count--)
- type = TREE_TYPE (type);
-
- align = TYPE_ALIGN (type);
- alchk = GET_MODE_BITSIZE (TYPE_MODE (type));
- }
-
- if (align < alchk)
- align = alchk;
-
- /* Coalescing wider fields is probably pointless and
- inefficient. */
- if (align > BITS_PER_WORD)
- align = BITS_PER_WORD;
-
- bit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT
- + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1);
- size = tree_low_cst (DECL_SIZE (f), 1);
-
- alchk = align - 1;
- alchk = ~alchk;
-
- if ((bit & alchk) != ((bit + size - 1) & alchk))
- return f;
-
- /* Find adjacent fields in the same alignment word. */
-
- for (prev = f, f = TREE_CHAIN (f);
- f && TREE_CODE (f) == FIELD_DECL
- && is_sra_scalar_type (TREE_TYPE (f))
- && host_integerp (DECL_FIELD_OFFSET (f), 1)
- && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1)
- && host_integerp (DECL_SIZE (f), 1)
- && !lookup_element (elt, f, NULL, NO_INSERT);
- prev = f, f = TREE_CHAIN (f))
+ while (1)
{
- unsigned HOST_WIDE_INT nbit, nsize;
-
- nbit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT
- + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1);
- nsize = tree_low_cst (DECL_SIZE (f), 1);
-
- if (bit + size == nbit)
- {
- if ((bit & alchk) != ((nbit + nsize - 1) & alchk))
- {
- /* If we're at an alignment boundary, don't bother
- growing alignment such that we can include this next
- field. */
- if ((nbit & alchk)
- || GET_MODE_BITSIZE (DECL_MODE (f)) <= align)
- break;
-
- align = GET_MODE_BITSIZE (DECL_MODE (f));
- alchk = align - 1;
- alchk = ~alchk;
-
- if ((bit & alchk) != ((nbit + nsize - 1) & alchk))
- break;
- }
- size += nsize;
- }
- else if (nbit + nsize == bit)
- {
- if ((nbit & alchk) != ((bit + size - 1) & alchk))
- {
- if ((bit & alchk)
- || GET_MODE_BITSIZE (DECL_MODE (f)) <= align)
- break;
+ tree fld;
+ tree tr_size, index;
+ HOST_WIDE_INT el_size;
- align = GET_MODE_BITSIZE (DECL_MODE (f));
- alchk = align - 1;
- alchk = ~alchk;
-
- if ((nbit & alchk) != ((bit + size - 1) & alchk))
- break;
- }
- bit = nbit;
- size += nsize;
- }
- else
- break;
- }
-
- f = prev;
-
- if (f == first)
- return f;
-
- gcc_assert ((bit & alchk) == ((bit + size - 1) & alchk));
-
- /* Try to widen the bit range so as to cover padding bits as well. */
-
- if ((bit & ~alchk) || size != align)
- {
- unsigned HOST_WIDE_INT mbit = bit & alchk;
- unsigned HOST_WIDE_INT msize = align;
+ if (offset == 0 && exp_type
+ && useless_type_conversion_p (exp_type, type))
+ return true;
- for (f = TYPE_FIELDS (elt->type);
- f; f = TREE_CHAIN (f))
+ switch (TREE_CODE (type))
{
- unsigned HOST_WIDE_INT fbit, fsize;
-
- /* Skip the fields from first to prev. */
- if (f == first)
- {
- f = prev;
- continue;
- }
-
- if (!(TREE_CODE (f) == FIELD_DECL
- && host_integerp (DECL_FIELD_OFFSET (f), 1)
- && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1)))
- continue;
-
- fbit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT
- + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1);
-
- /* If we're past the selected word, we're fine. */
- if ((bit & alchk) < (fbit & alchk))
- continue;
-
- if (host_integerp (DECL_SIZE (f), 1))
- fsize = tree_low_cst (DECL_SIZE (f), 1);
- else
- /* Assume a variable-sized field takes up all space till
- the end of the word. ??? Endianness issues? */
- fsize = align - (fbit & alchk);
-
- if ((fbit & alchk) < (bit & alchk))
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ case RECORD_TYPE:
+ /* Some ADA records are half-unions, treat all of them the same. */
+ for (fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld))
{
- /* A large field might start at a previous word and
- extend into the selected word. Exclude those
- bits. ??? Endianness issues? */
- HOST_WIDE_INT diff = fbit + fsize - mbit;
+ HOST_WIDE_INT pos, size;
+ tree expr, *expr_ptr;
- if (diff <= 0)
+ if (TREE_CODE (fld) != FIELD_DECL)
continue;
- mbit += diff;
- msize -= diff;
- }
- else
- {
- /* Non-overlapping, great. */
- if (fbit + fsize <= mbit
- || mbit + msize <= fbit)
+ pos = int_bit_position (fld);
+ gcc_assert (TREE_CODE (type) == RECORD_TYPE || pos == 0);
+ size = tree_low_cst (DECL_SIZE (fld), 1);
+ if (pos > offset || (pos + size) <= offset)
continue;
- if (fbit <= mbit)
+ if (res)
{
- unsigned HOST_WIDE_INT diff = fbit + fsize - mbit;
- mbit += diff;
- msize -= diff;
+ expr = build3 (COMPONENT_REF, TREE_TYPE (fld), *res, fld,
+ NULL_TREE);
+ expr_ptr = &expr;
}
- else if (fbit > mbit)
- msize -= (mbit + msize - fbit);
else
- gcc_unreachable ();
+ expr_ptr = NULL;
+ if (build_ref_for_offset_1 (expr_ptr, TREE_TYPE (fld),
+ offset - pos, exp_type))
+ {
+ if (res)
+ *res = expr;
+ return true;
+ }
}
- }
-
- bit = mbit;
- size = msize;
- }
+ return false;
- /* Now we know the bit range we're interested in. Find the smallest
- machine mode we can use to access it. */
+ case ARRAY_TYPE:
+ tr_size = TYPE_SIZE (TREE_TYPE (type));
+ if (!tr_size || !host_integerp (tr_size, 1))
+ return false;
+ el_size = tree_low_cst (tr_size, 1);
- for (mode = smallest_mode_for_size (size, MODE_INT);
- ;
- mode = GET_MODE_WIDER_MODE (mode))
- {
- gcc_assert (mode != VOIDmode);
+ if (res)
+ {
+ index = build_int_cst (TYPE_DOMAIN (type), offset / el_size);
+ if (!integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (type))))
+ index = int_const_binop (PLUS_EXPR, index,
+ TYPE_MIN_VALUE (TYPE_DOMAIN (type)),
+ 0);
+ *res = build4 (ARRAY_REF, TREE_TYPE (type), *res, index,
+ NULL_TREE, NULL_TREE);
+ }
+ offset = offset % el_size;
+ type = TREE_TYPE (type);
+ break;
- alchk = GET_MODE_PRECISION (mode) - 1;
- alchk = ~alchk;
+ default:
+ if (offset != 0)
+ return false;
- if ((bit & alchk) == ((bit + size - 1) & alchk))
- break;
+ if (exp_type)
+ return false;
+ else
+ return true;
+ }
}
+}
- gcc_assert (~alchk < align);
-
- /* Create the field group as a single variable. */
-
- /* We used to create a type for the mode above, but size turns
- to be out not of mode-size. As we need a matching type
- to build a BIT_FIELD_REF, use a nonstandard integer type as
- fallback. */
- type = lang_hooks.types.type_for_size (size, 1);
- if (!type || TYPE_PRECISION (type) != size)
- type = build_nonstandard_integer_type (size, 1);
- gcc_assert (type);
- var = build3 (BIT_FIELD_REF, type, NULL_TREE,
- bitsize_int (size), bitsize_int (bit));
-
- block = instantiate_missing_elements_1 (elt, var, type);
- gcc_assert (block && block->is_scalar);
-
- var = block->replacement;
- block->in_bitfld_block = 2;
-
- /* Add the member fields to the group, such that they access
- portions of the group variable. */
-
- for (f = first; f != TREE_CHAIN (prev); f = TREE_CHAIN (f))
- {
- tree field_type = canon_type_for_field (f, elt->element);
- struct sra_elt *fld = lookup_element (block, f, field_type, INSERT);
-
- gcc_assert (fld && fld->is_scalar && !fld->replacement);
-
- fld->replacement = fold_build3 (BIT_FIELD_REF, field_type, var,
- bitsize_int (TYPE_PRECISION (field_type)),
- bitsize_int
- ((TREE_INT_CST_LOW (DECL_FIELD_OFFSET (f))
- * BITS_PER_UNIT
- + (TREE_INT_CST_LOW
- (DECL_FIELD_BIT_OFFSET (f)))
- - (TREE_INT_CST_LOW
- (TREE_OPERAND (block->element, 2))))
- & ~alchk));
- fld->in_bitfld_block = 1;
- }
+/* Construct an expression that would reference a part of aggregate *EXPR of
+ type TYPE at the given OFFSET of the type EXP_TYPE. If EXPR is NULL, the
+ function only determines whether it can build such a reference without
+ actually doing it.
- return prev;
-}
+ FIXME: Eventually this should be replaced with
+ maybe_fold_offset_to_reference() from tree-ssa-ccp.c but that requires a
+ minor rewrite of fold_stmt.
+ */
-static void
-instantiate_missing_elements (struct sra_elt *elt)
+static bool
+build_ref_for_offset (tree *expr, tree type, HOST_WIDE_INT offset,
+ tree exp_type, bool allow_ptr)
{
- tree type = elt->type;
-
- switch (TREE_CODE (type))
+ if (allow_ptr && POINTER_TYPE_P (type))
{
- case RECORD_TYPE:
- {
- tree f;
- for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
- if (TREE_CODE (f) == FIELD_DECL)
- {
- tree last = try_instantiate_multiple_fields (elt, f);
-
- if (last != f)
- {
- f = last;
- continue;
- }
-
- instantiate_missing_elements_1 (elt, f,
- canon_type_for_field
- (f, elt->element));
- }
- break;
- }
-
- case ARRAY_TYPE:
- {
- tree i, max, subtype;
-
- i = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
- max = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
- subtype = TREE_TYPE (type);
-
- while (1)
- {
- instantiate_missing_elements_1 (elt, i, subtype);
- if (tree_int_cst_equal (i, max))
- break;
- i = int_const_binop (PLUS_EXPR, i, integer_one_node, true);
- }
-
- break;
- }
-
- case COMPLEX_TYPE:
type = TREE_TYPE (type);
- instantiate_missing_elements_1 (elt, integer_zero_node, type);
- instantiate_missing_elements_1 (elt, integer_one_node, type);
- break;
-
- default:
- gcc_unreachable ();
+ if (expr)
+ *expr = fold_build1 (INDIRECT_REF, type, *expr);
}
-}
-/* Return true if there is only one non aggregate field in the record, TYPE.
- Return false otherwise. */
-
-static bool
-single_scalar_field_in_record_p (tree type)
-{
- int num_fields = 0;
- tree field;
- if (TREE_CODE (type) != RECORD_TYPE)
- return false;
-
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
- if (TREE_CODE (field) == FIELD_DECL)
- {
- num_fields++;
-
- if (num_fields == 2)
- return false;
-
- if (AGGREGATE_TYPE_P (TREE_TYPE (field)))
- return false;
- }
-
- return true;
+ return build_ref_for_offset_1 (expr, type, offset, exp_type);
}
-/* Make one pass across an element tree deciding whether to perform block
- or element copies. If we decide on element copies, instantiate all
- elements. Return true if there are any instantiated sub-elements. */
+/* The very first phase of intraprocedural SRA. It marks in candidate_bitmap
+ those with type which is suitable for scalarization. */
static bool
-decide_block_copy (struct sra_elt *elt)
+find_var_candidates (void)
{
- struct sra_elt *c;
- bool any_inst;
-
- /* We shouldn't be invoked on groups of sub-elements as they must
- behave like their parent as far as block copy is concerned. */
- gcc_assert (!elt->is_group);
+ tree var, type;
+ referenced_var_iterator rvi;
+ bool ret = false;
- /* If scalarization is disabled, respect it. */
- if (elt->cannot_scalarize)
+ FOR_EACH_REFERENCED_VAR (var, rvi)
{
- elt->use_block_copy = 1;
+ if (TREE_CODE (var) != VAR_DECL && TREE_CODE (var) != PARM_DECL)
+ continue;
+ type = TREE_TYPE (var);
+
+ if (!AGGREGATE_TYPE_P (type)
+ || needs_to_live_in_memory (var)
+ || TREE_THIS_VOLATILE (var)
+ || !COMPLETE_TYPE_P (type)
+ || !host_integerp (TYPE_SIZE (type), 1)
+ || tree_low_cst (TYPE_SIZE (type), 1) == 0
+ || type_internals_preclude_sra_p (type))
+ continue;
- if (dump_file)
- {
- fputs ("Scalarization disabled for ", dump_file);
- dump_sra_elt_name (dump_file, elt);
- fputc ('\n', dump_file);
- }
+ bitmap_set_bit (candidate_bitmap, DECL_UID (var));
- /* Disable scalarization of sub-elements */
- for (c = elt->children; c; c = c->sibling)
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- c->cannot_scalarize = 1;
- decide_block_copy (c);
+ fprintf (dump_file, "Candidate (%d): ", DECL_UID (var));
+ print_generic_expr (dump_file, var, 0);
+ fprintf (dump_file, "\n");
}
+ ret = true;
+ }
- /* Groups behave like their parent. */
- for (c = elt->groups; c; c = c->sibling)
- {
- c->cannot_scalarize = 1;
- c->use_block_copy = 1;
- }
+ return ret;
+}
- return false;
- }
+/* Sort all accesses for the given variable, check for partial overlaps and
+ return NULL if there are any. If there are none, pick a representative for
+ each combination of offset and size and create a linked list out of them.
+ Return the pointer to the first representative and make sure it is the first
+ one in the vector of accesses. */
- /* Don't decide if we've no uses and no groups. */
- if (elt->n_uses == 0 && elt->n_copies == 0 && elt->groups == NULL)
- ;
+static struct access *
+sort_and_splice_var_accesses (tree var)
+{
+ int i, j, access_count;
+ struct access *res, **prev_acc_ptr = &res;
+ VEC (access_p, heap) *access_vec;
+ bool first = true;
+ HOST_WIDE_INT low = -1, high = 0;
- else if (!elt->is_scalar)
- {
- tree size_tree = TYPE_SIZE_UNIT (elt->type);
- bool use_block_copy = true;
-
- /* Tradeoffs for COMPLEX types pretty much always make it better
- to go ahead and split the components. */
- if (TREE_CODE (elt->type) == COMPLEX_TYPE)
- use_block_copy = false;
-
- /* Don't bother trying to figure out the rest if the structure is
- so large we can't do easy arithmetic. This also forces block
- copies for variable sized structures. */
- else if (host_integerp (size_tree, 1))
- {
- unsigned HOST_WIDE_INT full_size, inst_size = 0;
- unsigned int max_size, max_count, inst_count, full_count;
-
- /* If the sra-max-structure-size parameter is 0, then the
- user has not overridden the parameter and we can choose a
- sensible default. */
- max_size = SRA_MAX_STRUCTURE_SIZE
- ? SRA_MAX_STRUCTURE_SIZE
- : MOVE_RATIO (optimize_function_for_speed_p (cfun)) * UNITS_PER_WORD;
- max_count = SRA_MAX_STRUCTURE_COUNT
- ? SRA_MAX_STRUCTURE_COUNT
- : MOVE_RATIO (optimize_function_for_speed_p (cfun));
-
- full_size = tree_low_cst (size_tree, 1);
- full_count = count_type_elements (elt->type, false);
- inst_count = sum_instantiated_sizes (elt, &inst_size);
-
- /* If there is only one scalar field in the record, don't block copy. */
- if (single_scalar_field_in_record_p (elt->type))
- use_block_copy = false;
-
- /* ??? What to do here. If there are two fields, and we've only
- instantiated one, then instantiating the other is clearly a win.
- If there are a large number of fields then the size of the copy
- is much more of a factor. */
-
- /* If the structure is small, and we've made copies, go ahead
- and instantiate, hoping that the copies will go away. */
- if (full_size <= max_size
- && (full_count - inst_count) <= max_count
- && elt->n_copies > elt->n_uses)
- use_block_copy = false;
- else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO
- && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO)
- use_block_copy = false;
-
- /* In order to avoid block copy, we have to be able to instantiate
- all elements of the type. See if this is possible. */
- if (!use_block_copy
- && (!can_completely_scalarize_p (elt)
- || !type_can_instantiate_all_elements (elt->type)))
- use_block_copy = true;
- }
+ access_vec = get_base_access_vector (var);
+ if (!access_vec)
+ return NULL;
+ access_count = VEC_length (access_p, access_vec);
- elt->use_block_copy = use_block_copy;
+ /* Sort by <OFFSET, SIZE>. */
+ qsort (VEC_address (access_p, access_vec), access_count, sizeof (access_p),
+ compare_access_positions);
- /* Groups behave like their parent. */
- for (c = elt->groups; c; c = c->sibling)
- c->use_block_copy = use_block_copy;
+ i = 0;
+ while (i < access_count)
+ {
+ struct access *access = VEC_index (access_p, access_vec, i);
+ bool modification = access->write;
+ bool grp_read = !access->write;
+ bool grp_partial_lhs = access->grp_partial_lhs;
+ bool first_scalar = is_gimple_reg_type (access->type);
+ bool unscalarizable_region = access->grp_unscalarizable_region;
- if (dump_file)
+ if (first || access->offset >= high)
{
- fprintf (dump_file, "Using %s for ",
- use_block_copy ? "block-copy" : "element-copy");
- dump_sra_elt_name (dump_file, elt);
- fputc ('\n', dump_file);
+ first = false;
+ low = access->offset;
+ high = access->offset + access->size;
}
+ else if (access->offset > low && access->offset + access->size > high)
+ return NULL;
+ else
+ gcc_assert (access->offset >= low
+ && access->offset + access->size <= high);
- if (!use_block_copy)
+ j = i + 1;
+ while (j < access_count)
{
- instantiate_missing_elements (elt);
- return true;
+ struct access *ac2 = VEC_index (access_p, access_vec, j);
+ if (ac2->offset != access->offset || ac2->size != access->size)
+ break;
+ modification |= ac2->write;
+ grp_read |= !ac2->write;
+ grp_partial_lhs |= ac2->grp_partial_lhs;
+ unscalarizable_region |= ac2->grp_unscalarizable_region;
+ relink_to_new_repr (access, ac2);
+
+ /* If there are both aggregate-type and scalar-type accesses with
+ this combination of size and offset, the comparison function
+ should have put the scalars first. */
+ gcc_assert (first_scalar || !is_gimple_reg_type (ac2->type));
+ ac2->group_representative = access;
+ j++;
}
- }
- any_inst = elt->replacement != NULL;
+ i = j;
+
+ access->group_representative = access;
+ access->grp_write = modification;
+ access->grp_read = grp_read;
+ access->grp_partial_lhs = grp_partial_lhs;
+ access->grp_unscalarizable_region = unscalarizable_region;
+ if (access->first_link)
+ add_access_to_work_queue (access);
- for (c = elt->children; c ; c = c->sibling)
- any_inst |= decide_block_copy (c);
+ *prev_acc_ptr = access;
+ prev_acc_ptr = &access->next_grp;
+ }
- return any_inst;
+ gcc_assert (res == VEC_index (access_p, access_vec, 0));
+ return res;
}
-/* Entry point to phase 3. Instantiate scalar replacement variables. */
+/* Create a variable for the given ACCESS which determines the type, name and a
+ few other properties. Return the variable declaration and store it also to
+ ACCESS->replacement. */
-static void
-decide_instantiations (void)
+static tree
+create_access_replacement (struct access *access)
{
- unsigned int i;
- bool cleared_any;
- bitmap_head done_head;
- bitmap_iterator bi;
+ tree repl;
+
+ repl = create_tmp_var (access->type, "SR");
+ get_var_ann (repl);
+ add_referenced_var (repl);
+ mark_sym_for_renaming (repl);
- /* We cannot clear bits from a bitmap we're iterating over,
- so save up all the bits to clear until the end. */
- bitmap_initialize (&done_head, &bitmap_default_obstack);
- cleared_any = false;
+ if (!access->grp_partial_lhs
+ && (TREE_CODE (access->type) == COMPLEX_TYPE
+ || TREE_CODE (access->type) == VECTOR_TYPE))
+ DECL_GIMPLE_REG_P (repl) = 1;
- EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi)
+ DECL_SOURCE_LOCATION (repl) = DECL_SOURCE_LOCATION (access->base);
+ DECL_ARTIFICIAL (repl) = 1;
+
+ if (DECL_NAME (access->base)
+ && !DECL_IGNORED_P (access->base)
+ && !DECL_ARTIFICIAL (access->base))
{
- tree var = referenced_var (i);
- struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
- if (elt)
- {
- decide_instantiation_1 (elt, 0, 0);
- if (!decide_block_copy (elt))
- elt = NULL;
- }
- if (!elt)
- {
- bitmap_set_bit (&done_head, i);
- cleared_any = true;
- }
+ char *pretty_name = make_fancy_name (access->expr);
+
+ DECL_NAME (repl) = get_identifier (pretty_name);
+ obstack_free (&name_obstack, pretty_name);
+
+ SET_DECL_DEBUG_EXPR (repl, access->expr);
+ DECL_DEBUG_EXPR_IS_FROM (repl) = 1;
+ DECL_IGNORED_P (repl) = 0;
}
- if (cleared_any)
+ DECL_IGNORED_P (repl) = DECL_IGNORED_P (access->base);
+ TREE_NO_WARNING (repl) = TREE_NO_WARNING (access->base);
+
+ if (dump_file)
{
- bitmap_and_compl_into (sra_candidates, &done_head);
- bitmap_and_compl_into (needs_copy_in, &done_head);
+ fprintf (dump_file, "Created a replacement for ");
+ print_generic_expr (dump_file, access->base, 0);
+ fprintf (dump_file, " offset: %u, size: %u: ",
+ (unsigned) access->offset, (unsigned) access->size);
+ print_generic_expr (dump_file, repl, 0);
+ fprintf (dump_file, "\n");
}
- bitmap_clear (&done_head);
-
- mark_set_for_renaming (sra_candidates);
- if (dump_file)
- fputc ('\n', dump_file);
+ return repl;
}
-
-/* Phase Four: Update the function to match the replacements created. */
+/* Return ACCESS scalar replacement, create it if it does not exist yet. */
-/* Mark all the variables in virtual operands in all the statements in
- LIST for renaming. */
-
-static void
-mark_all_v_defs_seq (gimple_seq seq)
+static inline tree
+get_access_replacement (struct access *access)
{
- gimple_stmt_iterator gsi;
+ gcc_assert (access->grp_to_be_replaced);
+
+ if (access->replacement_decl)
+ return access->replacement_decl;
- for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
- update_stmt_if_modified (gsi_stmt (gsi));
+ access->replacement_decl = create_access_replacement (access);
+ return access->replacement_decl;
}
-/* Mark every replacement under ELT with TREE_NO_WARNING. */
+/* Build a subtree of accesses rooted in *ACCESS, and move the pointer in the
+ linked list along the way. Stop when *ACCESS is NULL or the access pointed
+ to it is not "within" the root. */
static void
-mark_no_warning (struct sra_elt *elt)
+build_access_subtree (struct access **access)
{
- if (!elt->all_no_warning)
+ struct access *root = *access, *last_child = NULL;
+ HOST_WIDE_INT limit = root->offset + root->size;
+
+ *access = (*access)->next_grp;
+ while (*access && (*access)->offset + (*access)->size <= limit)
{
- if (elt->replacement)
- TREE_NO_WARNING (elt->replacement) = 1;
+ if (!last_child)
+ root->first_child = *access;
else
- {
- struct sra_elt *c;
- FOR_EACH_ACTUAL_CHILD (c, elt)
- mark_no_warning (c);
- }
- elt->all_no_warning = true;
+ last_child->next_sibling = *access;
+ last_child = *access;
+
+ build_access_subtree (access);
}
}
-/* Build a single level component reference to ELT rooted at BASE. */
+/* Build a tree of access representatives, ACCESS is the pointer to the first
+ one, others are linked in a list by the next_grp field. Decide about scalar
+ replacements on the way, return true iff any are to be created. */
-static tree
-generate_one_element_ref (struct sra_elt *elt, tree base)
+static void
+build_access_trees (struct access *access)
{
- switch (TREE_CODE (TREE_TYPE (base)))
+ while (access)
{
- case RECORD_TYPE:
- {
- tree field = elt->element;
-
- /* We can't test elt->in_bitfld_block here because, when this is
- called from instantiate_element, we haven't set this field
- yet. */
- if (TREE_CODE (field) == BIT_FIELD_REF)
- {
- tree ret = unshare_expr (field);
- TREE_OPERAND (ret, 0) = base;
- return ret;
- }
-
- /* Watch out for compatible records with differing field lists. */
- if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base)))
- field = find_compatible_field (TREE_TYPE (base), field);
+ struct access *root = access;
- return build3 (COMPONENT_REF, elt->type, base, field, NULL);
- }
-
- case ARRAY_TYPE:
- if (TREE_CODE (elt->element) == RANGE_EXPR)
- return build4 (ARRAY_RANGE_REF, elt->type, base,
- TREE_OPERAND (elt->element, 0), NULL, NULL);
- else
- return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL);
-
- case COMPLEX_TYPE:
- if (elt->element == integer_zero_node)
- return build1 (REALPART_EXPR, elt->type, base);
- else
- return build1 (IMAGPART_EXPR, elt->type, base);
-
- default:
- gcc_unreachable ();
+ build_access_subtree (&access);
+ root->next_grp = access;
}
}
-/* Build a full component reference to ELT rooted at its native variable. */
+/* Analyze the subtree of accesses rooted in ROOT, scheduling replacements when
+ both seeming beneficial and when ALLOW_REPLACEMENTS allows it. Also set
+ all sorts of access flags appropriately along the way, notably always ser
+ grp_read when MARK_READ is true and grp_write when MARK_WRITE is true. */
-static tree
-generate_element_ref (struct sra_elt *elt)
+static bool
+analyze_access_subtree (struct access *root, bool allow_replacements,
+ bool mark_read, bool mark_write)
{
- if (elt->parent)
- return generate_one_element_ref (elt, generate_element_ref (elt->parent));
- else
- return elt->element;
-}
+ struct access *child;
+ HOST_WIDE_INT limit = root->offset + root->size;
+ HOST_WIDE_INT covered_to = root->offset;
+ bool scalar = is_gimple_reg_type (root->type);
+ bool hole = false, sth_created = false;
-/* Return true if BF is a bit-field that we can handle like a scalar. */
+ if (mark_read)
+ root->grp_read = true;
+ else if (root->grp_read)
+ mark_read = true;
-static bool
-scalar_bitfield_p (tree bf)
-{
- return (TREE_CODE (bf) == BIT_FIELD_REF
- && (is_gimple_reg (TREE_OPERAND (bf, 0))
- || (TYPE_MODE (TREE_TYPE (TREE_OPERAND (bf, 0))) != BLKmode
- && (!TREE_SIDE_EFFECTS (TREE_OPERAND (bf, 0))
- || (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE
- (TREE_OPERAND (bf, 0))))
- <= BITS_PER_WORD)))));
-}
+ if (mark_write)
+ root->grp_write = true;
+ else if (root->grp_write)
+ mark_write = true;
-/* Create an assignment statement from SRC to DST. */
+ if (root->grp_unscalarizable_region)
+ allow_replacements = false;
-static gimple_seq
-sra_build_assignment (tree dst, tree src)
-{
- gimple stmt;
- gimple_seq seq = NULL, seq2 = NULL;
- /* Turning BIT_FIELD_REFs into bit operations enables other passes
- to do a much better job at optimizing the code.
- From dst = BIT_FIELD_REF <var, sz, off> we produce
-
- SR.1 = (scalar type) var;
- SR.2 = SR.1 >> off;
- SR.3 = SR.2 & ((1 << sz) - 1);
- ... possible sign extension of SR.3 ...
- dst = (destination type) SR.3;
- */
- if (scalar_bitfield_p (src))
+ for (child = root->first_child; child; child = child->next_sibling)
{
- tree var, shift, width;
- tree utype, stype;
- bool unsignedp = (INTEGRAL_TYPE_P (TREE_TYPE (src))
- ? TYPE_UNSIGNED (TREE_TYPE (src)) : true);
- struct gimplify_ctx gctx;
-
- var = TREE_OPERAND (src, 0);
- width = TREE_OPERAND (src, 1);
- /* The offset needs to be adjusted to a right shift quantity
- depending on the endianness. */
- if (BYTES_BIG_ENDIAN)
- {
- tree tmp = size_binop (PLUS_EXPR, width, TREE_OPERAND (src, 2));
- shift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), tmp);
- }
+ if (!hole && child->offset < covered_to)
+ hole = true;
else
- shift = TREE_OPERAND (src, 2);
-
- /* In weird cases we have non-integral types for the source or
- destination object.
- ??? For unknown reasons we also want an unsigned scalar type. */
- stype = TREE_TYPE (var);
- if (!INTEGRAL_TYPE_P (stype))
- stype = lang_hooks.types.type_for_size (TREE_INT_CST_LOW
- (TYPE_SIZE (stype)), 1);
- else if (!TYPE_UNSIGNED (stype))
- stype = unsigned_type_for (stype);
-
- utype = TREE_TYPE (dst);
- if (!INTEGRAL_TYPE_P (utype))
- utype = lang_hooks.types.type_for_size (TREE_INT_CST_LOW
- (TYPE_SIZE (utype)), 1);
- else if (!TYPE_UNSIGNED (utype))
- utype = unsigned_type_for (utype);
-
- /* Convert the base var of the BIT_FIELD_REF to the scalar type
- we use for computation if we cannot use it directly. */
- if (INTEGRAL_TYPE_P (TREE_TYPE (var)))
- var = fold_convert (stype, var);
- else
- var = fold_build1 (VIEW_CONVERT_EXPR, stype, var);
-
- if (!integer_zerop (shift))
- var = fold_build2 (RSHIFT_EXPR, stype, var, shift);
+ covered_to += child->size;
- /* If we need a masking operation, produce one. */
- if (TREE_INT_CST_LOW (width) == TYPE_PRECISION (stype))
- unsignedp = true;
- else
- {
- tree one = build_int_cst_wide (stype, 1, 0);
- tree mask = int_const_binop (LSHIFT_EXPR, one, width, 0);
- mask = int_const_binop (MINUS_EXPR, mask, one, 0);
- var = fold_build2 (BIT_AND_EXPR, stype, var, mask);
- }
+ sth_created |= analyze_access_subtree (child, allow_replacements,
+ mark_read, mark_write);
- /* After shifting and masking, convert to the target type. */
- var = fold_convert (utype, var);
+ root->grp_unscalarized_data |= child->grp_unscalarized_data;
+ hole |= !child->grp_covered;
+ }
- /* Perform sign extension, if required.
- ??? This should never be necessary. */
- if (!unsignedp)
+ if (allow_replacements && scalar && !root->first_child)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- tree signbit = int_const_binop (LSHIFT_EXPR,
- build_int_cst_wide (utype, 1, 0),
- size_binop (MINUS_EXPR, width,
- bitsize_int (1)), 0);
-
- var = fold_build2 (BIT_XOR_EXPR, utype, var, signbit);
- var = fold_build2 (MINUS_EXPR, utype, var, signbit);
+ fprintf (dump_file, "Marking ");
+ print_generic_expr (dump_file, root->base, 0);
+ fprintf (dump_file, " offset: %u, size: %u: ",
+ (unsigned) root->offset, (unsigned) root->size);
+ fprintf (dump_file, " to be replaced.\n");
}
- /* fold_build3 (BIT_FIELD_REF, ...) sometimes returns a cast. */
- STRIP_NOPS (dst);
-
- /* Finally, move and convert to the destination. */
- if (INTEGRAL_TYPE_P (TREE_TYPE (dst)))
- var = fold_convert (TREE_TYPE (dst), var);
- else
- var = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (dst), var);
-
- push_gimplify_context (&gctx);
- gctx.allow_rhs_cond_expr = true;
-
- gimplify_assign (dst, var, &seq);
-
- if (gimple_referenced_vars (cfun))
- for (var = gctx.temps; var; var = TREE_CHAIN (var))
- {
- add_referenced_var (var);
- mark_sym_for_renaming (var);
- }
- pop_gimplify_context (NULL);
-
- return seq;
+ root->grp_to_be_replaced = 1;
+ sth_created = true;
+ hole = false;
}
+ else if (covered_to < limit)
+ hole = true;
- /* fold_build3 (BIT_FIELD_REF, ...) sometimes returns a cast. */
- if (CONVERT_EXPR_P (dst))
+ if (sth_created && !hole)
{
- STRIP_NOPS (dst);
- src = fold_convert (TREE_TYPE (dst), src);
+ root->grp_covered = 1;
+ return true;
}
- /* It was hoped that we could perform some type sanity checking
- here, but since front-ends can emit accesses of fields in types
- different from their nominal types and copy structures containing
- them as a whole, we'd have to handle such differences here.
- Since such accesses under different types require compatibility
- anyway, there's little point in making tests and/or adding
- conversions to ensure the types of src and dst are the same.
- So we just assume type differences at this point are ok.
- The only exception we make here are pointer types, which can be different
- in e.g. structurally equal, but non-identical RECORD_TYPEs. */
- else if (POINTER_TYPE_P (TREE_TYPE (dst))
- && !useless_type_conversion_p (TREE_TYPE (dst), TREE_TYPE (src)))
- src = fold_convert (TREE_TYPE (dst), src);
-
- /* ??? Only call the gimplifier if we need to. Otherwise we may
- end up substituting with DECL_VALUE_EXPR - see PR37380. */
- if (!handled_component_p (src)
- && !SSA_VAR_P (src))
+ if (root->grp_write || TREE_CODE (root->base) == PARM_DECL)
+ root->grp_unscalarized_data = 1; /* not covered and written to */
+ if (sth_created)
+ return true;
+ return false;
+}
+
+/* Analyze all access trees linked by next_grp by the means of
+ analyze_access_subtree. */
+static bool
+analyze_access_trees (struct access *access)
+{
+ bool ret = false;
+
+ while (access)
{
- src = force_gimple_operand (src, &seq2, false, NULL_TREE);
- gimple_seq_add_seq (&seq, seq2);
+ if (analyze_access_subtree (access, true, false, false))
+ ret = true;
+ access = access->next_grp;
}
- stmt = gimple_build_assign (dst, src);
- gimple_seq_add_stmt (&seq, stmt);
- return seq;
-}
-/* BIT_FIELD_REFs must not be shared. sra_build_elt_assignment()
- takes care of assignments, but we must create copies for uses. */
-#define REPLDUP(t) (TREE_CODE (t) != BIT_FIELD_REF ? (t) : unshare_expr (t))
+ return ret;
+}
-/* Emit an assignment from SRC to DST, but if DST is a scalarizable
- BIT_FIELD_REF, turn it into bit operations. */
+/* Return true iff a potential new child of LACC at offset OFFSET and with size
+ SIZE would conflict with an already existing one. If exactly such a child
+ already exists in LACC, store a pointer to it in EXACT_MATCH. */
-static gimple_seq
-sra_build_bf_assignment (tree dst, tree src)
+static bool
+child_would_conflict_in_lacc (struct access *lacc, HOST_WIDE_INT norm_offset,
+ HOST_WIDE_INT size, struct access **exact_match)
{
- tree var, type, utype, tmp, tmp2, tmp3;
- gimple_seq seq;
- gimple stmt;
- tree cst, cst2, mask;
- tree minshift, maxshift;
+ struct access *child;
- if (TREE_CODE (dst) != BIT_FIELD_REF)
- return sra_build_assignment (dst, src);
+ for (child = lacc->first_child; child; child = child->next_sibling)
+ {
+ if (child->offset == norm_offset && child->size == size)
+ {
+ *exact_match = child;
+ return true;
+ }
- var = TREE_OPERAND (dst, 0);
+ if (child->offset < norm_offset + size
+ && child->offset + child->size > norm_offset)
+ return true;
+ }
- if (!scalar_bitfield_p (dst))
- return sra_build_assignment (REPLDUP (dst), src);
+ return false;
+}
- seq = NULL;
+/* Set the expr of TARGET to one just like MODEL but with is own base at the
+ bottom of the handled components. */
- cst = fold_convert (bitsizetype, TREE_OPERAND (dst, 2));
- cst2 = size_binop (PLUS_EXPR,
- fold_convert (bitsizetype, TREE_OPERAND (dst, 1)),
- cst);
+static void
+duplicate_expr_for_different_base (struct access *target,
+ struct access *model)
+{
+ tree t, expr = unshare_expr (model->expr);
- if (BYTES_BIG_ENDIAN)
- {
- maxshift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), cst);
- minshift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), cst2);
- }
- else
- {
- maxshift = cst2;
- minshift = cst;
- }
+ gcc_assert (handled_component_p (expr));
+ t = expr;
+ while (handled_component_p (TREE_OPERAND (t, 0)))
+ t = TREE_OPERAND (t, 0);
+ gcc_assert (TREE_OPERAND (t, 0) == model->base);
+ TREE_OPERAND (t, 0) = target->base;
- type = TREE_TYPE (var);
- if (!INTEGRAL_TYPE_P (type))
- type = lang_hooks.types.type_for_size
- (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (var))), 1);
- if (TYPE_UNSIGNED (type))
- utype = type;
- else
- utype = unsigned_type_for (type);
+ target->expr = expr;
+}
- mask = build_int_cst_wide (utype, 1, 0);
- if (TREE_INT_CST_LOW (maxshift) == TYPE_PRECISION (utype))
- cst = build_int_cst_wide (utype, 0, 0);
- else
- cst = int_const_binop (LSHIFT_EXPR, mask, maxshift, true);
- if (integer_zerop (minshift))
- cst2 = mask;
- else
- cst2 = int_const_binop (LSHIFT_EXPR, mask, minshift, true);
- mask = int_const_binop (MINUS_EXPR, cst, cst2, true);
- mask = fold_build1 (BIT_NOT_EXPR, utype, mask);
- if (TYPE_MAIN_VARIANT (utype) != TYPE_MAIN_VARIANT (TREE_TYPE (var))
- && !integer_zerop (mask))
- {
- tmp = var;
- if (!is_gimple_variable (tmp))
- tmp = unshare_expr (var);
- else
- TREE_NO_WARNING (var) = true;
+/* Create a new child access of PARENT, with all properties just like MODEL
+ except for its offset and with its grp_write false and grp_read true.
+ Return the new access. Note that this access is created long after all
+ splicing and sorting, it's not located in any access vector and is
+ automatically a representative of its group. */
- tmp2 = make_rename_temp (utype, "SR");
+static struct access *
+create_artificial_child_access (struct access *parent, struct access *model,
+ HOST_WIDE_INT new_offset)
+{
+ struct access *access;
+ struct access **child;
- if (INTEGRAL_TYPE_P (TREE_TYPE (var)))
- tmp = fold_convert (utype, tmp);
- else
- tmp = fold_build1 (VIEW_CONVERT_EXPR, utype, tmp);
+ gcc_assert (!model->grp_unscalarizable_region);
- stmt = gimple_build_assign (tmp2, tmp);
- gimple_seq_add_stmt (&seq, stmt);
- }
- else
- tmp2 = var;
+ access = (struct access *) pool_alloc (access_pool);
+ memset (access, 0, sizeof (struct access));
+ access->base = parent->base;
+ access->offset = new_offset;
+ access->size = model->size;
+ duplicate_expr_for_different_base (access, model);
+ access->type = model->type;
+ access->grp_write = true;
+ access->grp_read = false;
- if (!integer_zerop (mask))
- {
- tmp = make_rename_temp (utype, "SR");
- stmt = gimple_build_assign (tmp, fold_build2 (BIT_AND_EXPR, utype,
- tmp2, mask));
- gimple_seq_add_stmt (&seq, stmt);
- }
- else
- tmp = mask;
+ child = &parent->first_child;
+ while (*child && (*child)->offset < new_offset)
+ child = &(*child)->next_sibling;
- if (is_gimple_reg (src) && INTEGRAL_TYPE_P (TREE_TYPE (src)))
- tmp2 = src;
- else if (INTEGRAL_TYPE_P (TREE_TYPE (src)))
- {
- gimple_seq tmp_seq;
- tmp2 = make_rename_temp (TREE_TYPE (src), "SR");
- tmp_seq = sra_build_assignment (tmp2, src);
- gimple_seq_add_seq (&seq, tmp_seq);
- }
- else
- {
- gimple_seq tmp_seq;
- tmp2 = make_rename_temp
- (lang_hooks.types.type_for_size
- (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (src))),
- 1), "SR");
- tmp_seq = sra_build_assignment (tmp2, fold_build1 (VIEW_CONVERT_EXPR,
- TREE_TYPE (tmp2), src));
- gimple_seq_add_seq (&seq, tmp_seq);
- }
+ access->next_sibling = *child;
+ *child = access;
- if (!TYPE_UNSIGNED (TREE_TYPE (tmp2)))
- {
- gimple_seq tmp_seq;
- tree ut = unsigned_type_for (TREE_TYPE (tmp2));
- tmp3 = make_rename_temp (ut, "SR");
- tmp2 = fold_convert (ut, tmp2);
- tmp_seq = sra_build_assignment (tmp3, tmp2);
- gimple_seq_add_seq (&seq, tmp_seq);
-
- tmp2 = fold_build1 (BIT_NOT_EXPR, utype, mask);
- tmp2 = int_const_binop (RSHIFT_EXPR, tmp2, minshift, true);
- tmp2 = fold_convert (ut, tmp2);
- tmp2 = fold_build2 (BIT_AND_EXPR, ut, tmp3, tmp2);
-
- if (tmp3 != tmp2)
- {
- tmp3 = make_rename_temp (ut, "SR");
- tmp_seq = sra_build_assignment (tmp3, tmp2);
- gimple_seq_add_seq (&seq, tmp_seq);
- }
+ return access;
+}
- tmp2 = tmp3;
- }
- if (TYPE_MAIN_VARIANT (TREE_TYPE (tmp2)) != TYPE_MAIN_VARIANT (utype))
- {
- gimple_seq tmp_seq;
- tmp3 = make_rename_temp (utype, "SR");
- tmp2 = fold_convert (utype, tmp2);
- tmp_seq = sra_build_assignment (tmp3, tmp2);
- gimple_seq_add_seq (&seq, tmp_seq);
- tmp2 = tmp3;
- }
+/* Propagate all subaccesses of RACC across an assignment link to LACC. Return
+ true if any new subaccess was created. Additionally, if RACC is a scalar
+ access but LACC is not, change the type of the latter. */
- if (!integer_zerop (minshift))
- {
- tmp3 = make_rename_temp (utype, "SR");
- stmt = gimple_build_assign (tmp3, fold_build2 (LSHIFT_EXPR, utype,
- tmp2, minshift));
- gimple_seq_add_stmt (&seq, stmt);
- tmp2 = tmp3;
- }
+static bool
+propagate_subacesses_accross_link (struct access *lacc, struct access *racc)
+{
+ struct access *rchild;
+ HOST_WIDE_INT norm_delta = lacc->offset - racc->offset;
- if (utype != TREE_TYPE (var))
- tmp3 = make_rename_temp (utype, "SR");
- else
- tmp3 = var;
- stmt = gimple_build_assign (tmp3, fold_build2 (BIT_IOR_EXPR, utype,
- tmp, tmp2));
- gimple_seq_add_stmt (&seq, stmt);
+ bool ret = false;
+
+ if (is_gimple_reg_type (lacc->type)
+ || lacc->grp_unscalarizable_region
+ || racc->grp_unscalarizable_region)
+ return false;
- if (tmp3 != var)
+ if (!lacc->first_child && !racc->first_child
+ && is_gimple_reg_type (racc->type))
{
- if (TREE_TYPE (var) == type)
- stmt = gimple_build_assign (var, fold_convert (type, tmp3));
- else
- stmt = gimple_build_assign (var, fold_build1 (VIEW_CONVERT_EXPR,
- TREE_TYPE (var), tmp3));
- gimple_seq_add_stmt (&seq, stmt);
+ duplicate_expr_for_different_base (lacc, racc);
+ lacc->type = racc->type;
+ return false;
}
- return seq;
-}
-
-/* Expand an assignment of SRC to the scalarized representation of
- ELT. If it is a field group, try to widen the assignment to cover
- the full variable. */
-
-static gimple_seq
-sra_build_elt_assignment (struct sra_elt *elt, tree src)
-{
- tree dst = elt->replacement;
- tree var, tmp, cst, cst2;
- gimple stmt;
- gimple_seq seq;
-
- if (TREE_CODE (dst) != BIT_FIELD_REF
- || !elt->in_bitfld_block)
- return sra_build_assignment (REPLDUP (dst), src);
-
- var = TREE_OPERAND (dst, 0);
-
- /* Try to widen the assignment to the entire variable.
- We need the source to be a BIT_FIELD_REF as well, such that, for
- BIT_FIELD_REF<d,sz,dp> = BIT_FIELD_REF<s,sz,sp>,
- by design, conditions are met such that we can turn it into
- d = BIT_FIELD_REF<s,dw,sp-dp>. */
- if (elt->in_bitfld_block == 2
- && TREE_CODE (src) == BIT_FIELD_REF)
+ for (rchild = racc->first_child; rchild; rchild = rchild->next_sibling)
{
- tmp = src;
- cst = TYPE_SIZE (TREE_TYPE (var));
- cst2 = size_binop (MINUS_EXPR, TREE_OPERAND (src, 2),
- TREE_OPERAND (dst, 2));
+ struct access *new_acc = NULL;
+ HOST_WIDE_INT norm_offset = rchild->offset + norm_delta;
- src = TREE_OPERAND (src, 0);
+ if (rchild->grp_unscalarizable_region)
+ continue;
- /* Avoid full-width bit-fields. */
- if (integer_zerop (cst2)
- && tree_int_cst_equal (cst, TYPE_SIZE (TREE_TYPE (src))))
+ if (child_would_conflict_in_lacc (lacc, norm_offset, rchild->size,
+ &new_acc))
{
- if (INTEGRAL_TYPE_P (TREE_TYPE (src))
- && !TYPE_UNSIGNED (TREE_TYPE (src)))
- src = fold_convert (unsigned_type_for (TREE_TYPE (src)), src);
-
- /* If a single conversion won't do, we'll need a statement
- list. */
- if (TYPE_MAIN_VARIANT (TREE_TYPE (var))
- != TYPE_MAIN_VARIANT (TREE_TYPE (src)))
- {
- gimple_seq tmp_seq;
- seq = NULL;
-
- if (!INTEGRAL_TYPE_P (TREE_TYPE (src)))
- src = fold_build1 (VIEW_CONVERT_EXPR,
- lang_hooks.types.type_for_size
- (TREE_INT_CST_LOW
- (TYPE_SIZE (TREE_TYPE (src))),
- 1), src);
- gcc_assert (TYPE_UNSIGNED (TREE_TYPE (src)));
-
- tmp = make_rename_temp (TREE_TYPE (src), "SR");
- stmt = gimple_build_assign (tmp, src);
- gimple_seq_add_stmt (&seq, stmt);
-
- tmp_seq = sra_build_assignment (var,
- fold_convert (TREE_TYPE (var),
- tmp));
- gimple_seq_add_seq (&seq, tmp_seq);
-
- return seq;
- }
-
- src = fold_convert (TREE_TYPE (var), src);
- }
- else
- {
- src = fold_convert (TREE_TYPE (var), tmp);
+ if (new_acc && rchild->first_child)
+ ret |= propagate_subacesses_accross_link (new_acc, rchild);
+ continue;
}
- return sra_build_assignment (var, src);
+ new_acc = create_artificial_child_access (lacc, rchild, norm_offset);
+ if (racc->first_child)
+ propagate_subacesses_accross_link (new_acc, rchild);
+
+ ret = true;
}
- return sra_build_bf_assignment (dst, src);
+ return ret;
}
-/* Generate a set of assignment statements in *LIST_P to copy all
- instantiated elements under ELT to or from the equivalent structure
- rooted at EXPR. COPY_OUT controls the direction of the copy, with
- true meaning to copy out of EXPR into ELT. */
+/* Propagate all subaccesses across assignment links. */
static void
-generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr,
- gimple_seq *seq_p)
+propagate_all_subaccesses (void)
{
- struct sra_elt *c;
- gimple_seq tmp_seq;
- tree t;
-
- if (!copy_out && TREE_CODE (expr) == SSA_NAME
- && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE)
+ while (work_queue_head)
{
- tree r, i;
+ struct access *racc = pop_access_from_work_queue ();
+ struct assign_link *link;
- c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT);
- r = c->replacement;
- c = lookup_element (elt, integer_one_node, NULL, NO_INSERT);
- i = c->replacement;
+ gcc_assert (racc->first_link);
- t = build2 (COMPLEX_EXPR, elt->type, r, i);
- tmp_seq = sra_build_bf_assignment (expr, t);
- SSA_NAME_DEF_STMT (expr) = gimple_seq_last_stmt (tmp_seq);
- gimple_seq_add_seq (seq_p, tmp_seq);
- }
- else if (elt->replacement)
- {
- if (copy_out)
- tmp_seq = sra_build_elt_assignment (elt, expr);
- else
- tmp_seq = sra_build_bf_assignment (expr, REPLDUP (elt->replacement));
- gimple_seq_add_seq (seq_p, tmp_seq);
- }
- else
- {
- FOR_EACH_ACTUAL_CHILD (c, elt)
+ for (link = racc->first_link; link; link = link->next)
{
- t = generate_one_element_ref (c, unshare_expr (expr));
- generate_copy_inout (c, copy_out, t, seq_p);
+ struct access *lacc = link->lacc;
+
+ if (!bitmap_bit_p (candidate_bitmap, DECL_UID (lacc->base)))
+ continue;
+ lacc = lacc->group_representative;
+ if (propagate_subacesses_accross_link (lacc, racc)
+ && lacc->first_link)
+ add_access_to_work_queue (lacc);
}
}
}
-/* Generate a set of assignment statements in *LIST_P to copy all instantiated
- elements under SRC to their counterparts under DST. There must be a 1-1
- correspondence of instantiated elements. */
+/* Go through all accesses collected throughout the (intraprocedural) analysis
+ stage, exclude overlapping ones, identify representatives and build trees
+ out of them, making decisions about scalarization on the way. Return true
+ iff there are any to-be-scalarized variables after this stage. */
-static void
-generate_element_copy (struct sra_elt *dst, struct sra_elt *src, gimple_seq *seq_p)
+static bool
+analyze_all_variable_accesses (void)
{
- struct sra_elt *dc, *sc;
-
- FOR_EACH_ACTUAL_CHILD (dc, dst)
- {
- sc = lookup_element (src, dc->element, NULL, NO_INSERT);
- if (!sc && dc->in_bitfld_block == 2)
- {
- struct sra_elt *dcs;
-
- FOR_EACH_ACTUAL_CHILD (dcs, dc)
- {
- sc = lookup_element (src, dcs->element, NULL, NO_INSERT);
- gcc_assert (sc);
- generate_element_copy (dcs, sc, seq_p);
- }
+ tree var;
+ referenced_var_iterator rvi;
+ bool res = false;
- continue;
- }
+ FOR_EACH_REFERENCED_VAR (var, rvi)
+ if (bitmap_bit_p (candidate_bitmap, DECL_UID (var)))
+ {
+ struct access *access;
- /* If DST and SRC are structs with the same elements, but do not have
- the same TYPE_MAIN_VARIANT, then lookup of DST FIELD_DECL in SRC
- will fail. Try harder by finding the corresponding FIELD_DECL
- in SRC. */
- if (!sc)
- {
- tree f;
-
- gcc_assert (useless_type_conversion_p (dst->type, src->type));
- gcc_assert (TREE_CODE (dc->element) == FIELD_DECL);
- for (f = TYPE_FIELDS (src->type); f ; f = TREE_CHAIN (f))
- if (simple_cst_equal (DECL_FIELD_OFFSET (f),
- DECL_FIELD_OFFSET (dc->element)) > 0
- && simple_cst_equal (DECL_FIELD_BIT_OFFSET (f),
- DECL_FIELD_BIT_OFFSET (dc->element)) > 0
- && simple_cst_equal (DECL_SIZE (f),
- DECL_SIZE (dc->element)) > 0
- && (useless_type_conversion_p (TREE_TYPE (dc->element),
- TREE_TYPE (f))
- || (POINTER_TYPE_P (TREE_TYPE (dc->element))
- && POINTER_TYPE_P (TREE_TYPE (f)))))
- break;
- gcc_assert (f != NULL_TREE);
- sc = lookup_element (src, f, NULL, NO_INSERT);
- }
+ access = sort_and_splice_var_accesses (var);
+ if (access)
+ build_access_trees (access);
+ else
+ disqualify_candidate (var,
+ "No or inhibitingly overlapping accesses.");
+ }
- generate_element_copy (dc, sc, seq_p);
- }
+ propagate_all_subaccesses ();
- if (dst->replacement)
- {
- gimple_seq tmp_seq;
+ FOR_EACH_REFERENCED_VAR (var, rvi)
+ if (bitmap_bit_p (candidate_bitmap, DECL_UID (var)))
+ {
+ struct access *access = get_first_repr_for_decl (var);
- gcc_assert (src->replacement);
+ if (analyze_access_trees (access))
+ {
+ res = true;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nAccess trees for ");
+ print_generic_expr (dump_file, var, 0);
+ fprintf (dump_file, " (UID: %u): \n", DECL_UID (var));
+ dump_access_tree (dump_file, access);
+ fprintf (dump_file, "\n");
+ }
+ }
+ else
+ disqualify_candidate (var, "No scalar replacements to be created.");
+ }
- tmp_seq = sra_build_elt_assignment (dst, REPLDUP (src->replacement));
- gimple_seq_add_seq (seq_p, tmp_seq);
- }
+ return res;
}
-/* Generate a set of assignment statements in *LIST_P to zero all instantiated
- elements under ELT. In addition, do not assign to elements that have been
- marked VISITED but do reset the visited flag; this allows easy coordination
- with generate_element_init. */
+/* Return true iff a reference statement into aggregate AGG can be built for
+ every single to-be-replaced accesses that is a child of ACCESS, its sibling
+ or a child of its sibling. TOP_OFFSET is the offset from the processed
+ access subtree that has to be subtracted from offset of each access. */
-static void
-generate_element_zero (struct sra_elt *elt, gimple_seq *seq_p)
+static bool
+ref_expr_for_all_replacements_p (struct access *access, tree agg,
+ HOST_WIDE_INT top_offset)
{
- struct sra_elt *c;
-
- if (elt->visited)
- {
- elt->visited = false;
- return;
- }
-
- if (!elt->in_bitfld_block)
- FOR_EACH_ACTUAL_CHILD (c, elt)
- generate_element_zero (c, seq_p);
-
- if (elt->replacement)
+ do
{
- tree t;
- gimple_seq tmp_seq;
+ if (access->grp_to_be_replaced
+ && !build_ref_for_offset (NULL, TREE_TYPE (agg),
+ access->offset - top_offset,
+ access->type, false))
+ return false;
- gcc_assert (elt->is_scalar);
- t = fold_convert (elt->type, integer_zero_node);
+ if (access->first_child
+ && !ref_expr_for_all_replacements_p (access->first_child, agg,
+ top_offset))
+ return false;
- tmp_seq = sra_build_elt_assignment (elt, t);
- gimple_seq_add_seq (seq_p, tmp_seq);
+ access = access->next_sibling;
}
+ while (access);
+
+ return true;
}
-/* Generate an assignment VAR = INIT, where INIT may need gimplification.
- Add the result to *LIST_P. */
+/* Generate statements copying scalar replacements of accesses within a subtree
+ into or out of AGG. ACCESS is the first child of the root of the subtree to
+ be processed. AGG is an aggregate type expression (can be a declaration but
+ does not have to be, it can for example also be an indirect_ref).
+ TOP_OFFSET is the offset of the processed subtree which has to be subtracted
+ from offsets of individual accesses to get corresponding offsets for AGG.
+ If CHUNK_SIZE is non-null, copy only replacements in the interval
+ <start_offset, start_offset + chunk_size>, otherwise copy all. GSI is a
+ statement iterator used to place the new statements. WRITE should be true
+ when the statements should write from AGG to the replacement and false if
+ vice versa. if INSERT_AFTER is true, new statements will be added after the
+ current statement in GSI, they will be added before the statement
+ otherwise. */
static void
-generate_one_element_init (struct sra_elt *elt, tree init, gimple_seq *seq_p)
+generate_subtree_copies (struct access *access, tree agg,
+ HOST_WIDE_INT top_offset,
+ HOST_WIDE_INT start_offset, HOST_WIDE_INT chunk_size,
+ gimple_stmt_iterator *gsi, bool write,
+ bool insert_after)
{
- gimple_seq tmp_seq = sra_build_elt_assignment (elt, init);
- gimple_seq_add_seq (seq_p, tmp_seq);
-}
+ do
+ {
+ tree expr = unshare_expr (agg);
-/* Generate a set of assignment statements in *LIST_P to set all instantiated
- elements under ELT with the contents of the initializer INIT. In addition,
- mark all assigned elements VISITED; this allows easy coordination with
- generate_element_zero. Return false if we found a case we couldn't
- handle. */
+ if (chunk_size && access->offset >= start_offset + chunk_size)
+ return;
-static bool
-generate_element_init_1 (struct sra_elt *elt, tree init, gimple_seq *seq_p)
-{
- bool result = true;
- enum tree_code init_code;
- struct sra_elt *sub;
- tree t;
- unsigned HOST_WIDE_INT idx;
- tree value, purpose;
-
- /* We can be passed DECL_INITIAL of a static variable. It might have a
- conversion, which we strip off here. */
- STRIP_USELESS_TYPE_CONVERSION (init);
- init_code = TREE_CODE (init);
-
- if (elt->is_scalar)
- {
- if (elt->replacement)
+ if (access->grp_to_be_replaced
+ && (chunk_size == 0
+ || access->offset + access->size > start_offset))
{
- generate_one_element_init (elt, init, seq_p);
- elt->visited = true;
- }
- return result;
- }
+ tree repl = get_access_replacement (access);
+ bool ref_found;
+ gimple stmt;
- switch (init_code)
- {
- case COMPLEX_CST:
- case COMPLEX_EXPR:
- FOR_EACH_ACTUAL_CHILD (sub, elt)
- {
- if (sub->element == integer_zero_node)
- t = (init_code == COMPLEX_EXPR
- ? TREE_OPERAND (init, 0) : TREE_REALPART (init));
- else
- t = (init_code == COMPLEX_EXPR
- ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init));
- result &= generate_element_init_1 (sub, t, seq_p);
- }
- break;
+ ref_found = build_ref_for_offset (&expr, TREE_TYPE (agg),
+ access->offset - top_offset,
+ access->type, false);
+ gcc_assert (ref_found);
- case CONSTRUCTOR:
- FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value)
- {
- /* Array constructors are routinely created with NULL indices. */
- if (purpose == NULL_TREE)
- {
- result = false;
- break;
- }
- if (TREE_CODE (purpose) == RANGE_EXPR)
+ if (write)
{
- tree lower = TREE_OPERAND (purpose, 0);
- tree upper = TREE_OPERAND (purpose, 1);
-
- while (1)
- {
- sub = lookup_element (elt, lower, NULL, NO_INSERT);
- if (sub != NULL)
- result &= generate_element_init_1 (sub, value, seq_p);
- if (tree_int_cst_equal (lower, upper))
- break;
- lower = int_const_binop (PLUS_EXPR, lower,
- integer_one_node, true);
- }
+ if (access->grp_partial_lhs)
+ expr = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
+ !insert_after,
+ insert_after ? GSI_NEW_STMT
+ : GSI_SAME_STMT);
+ stmt = gimple_build_assign (repl, expr);
}
else
{
- sub = lookup_element (elt, purpose, NULL, NO_INSERT);
- if (sub != NULL)
- result &= generate_element_init_1 (sub, value, seq_p);
+ TREE_NO_WARNING (repl) = 1;
+ if (access->grp_partial_lhs)
+ repl = force_gimple_operand_gsi (gsi, repl, true, NULL_TREE,
+ !insert_after,
+ insert_after ? GSI_NEW_STMT
+ : GSI_SAME_STMT);
+ stmt = gimple_build_assign (expr, repl);
}
+
+ if (insert_after)
+ gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
+ else
+ gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+ update_stmt (stmt);
}
- break;
- default:
- elt->visited = true;
- result = false;
- }
+ if (access->first_child)
+ generate_subtree_copies (access->first_child, agg, top_offset,
+ start_offset, chunk_size, gsi,
+ write, insert_after);
- return result;
+ access = access->next_sibling;
+ }
+ while (access);
}
-/* A wrapper function for generate_element_init_1 that handles cleanup after
- gimplification. */
+/* Assign zero to all scalar replacements in an access subtree. ACCESS is the
+ the root of the subtree to be processed. GSI is the statement iterator used
+ for inserting statements which are added after the current statement if
+ INSERT_AFTER is true or before it otherwise. */
-static bool
-generate_element_init (struct sra_elt *elt, tree init, gimple_seq *seq_p)
-{
- bool ret;
- struct gimplify_ctx gctx;
+static void
+init_subtree_with_zero (struct access *access, gimple_stmt_iterator *gsi,
+ bool insert_after)
- push_gimplify_context (&gctx);
- ret = generate_element_init_1 (elt, init, seq_p);
- pop_gimplify_context (NULL);
+{
+ struct access *child;
- /* The replacement can expose previously unreferenced variables. */
- if (ret && *seq_p)
+ if (access->grp_to_be_replaced)
{
- gimple_stmt_iterator i;
+ gimple stmt;
- for (i = gsi_start (*seq_p); !gsi_end_p (i); gsi_next (&i))
- find_new_referenced_vars (gsi_stmt (i));
+ stmt = gimple_build_assign (get_access_replacement (access),
+ fold_convert (access->type,
+ integer_zero_node));
+ if (insert_after)
+ gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
+ else
+ gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+ update_stmt (stmt);
}
- return ret;
-}
-
-/* Helper function to insert LIST before GSI, and set up line number info. */
-
-static void
-sra_insert_before (gimple_stmt_iterator *gsi, gimple_seq seq)
-{
- gimple stmt = gsi_stmt (*gsi);
-
- if (gimple_has_location (stmt))
- annotate_all_with_location (seq, gimple_location (stmt));
- gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ for (child = access->first_child; child; child = child->next_sibling)
+ init_subtree_with_zero (child, gsi, insert_after);
}
-/* Similarly, but insert after GSI. Handles insertion onto edges as well. */
+/* Search for an access representative for the given expression EXPR and
+ return it or NULL if it cannot be found. */
-static void
-sra_insert_after (gimple_stmt_iterator *gsi, gimple_seq seq)
+static struct access *
+get_access_for_expr (tree expr)
{
- gimple stmt = gsi_stmt (*gsi);
+ HOST_WIDE_INT offset, size, max_size;
+ tree base;
- if (gimple_has_location (stmt))
- annotate_all_with_location (seq, gimple_location (stmt));
+ /* FIXME: This should not be necessary but Ada produces V_C_Es with a type of
+ a different size than the size of its argument and we need the latter
+ one. */
+ if (TREE_CODE (expr) == VIEW_CONVERT_EXPR)
+ expr = TREE_OPERAND (expr, 0);
- if (stmt_ends_bb_p (stmt))
- insert_edge_copies_seq (seq, gsi_bb (*gsi));
- else
- gsi_insert_seq_after (gsi, seq, GSI_SAME_STMT);
-}
+ base = get_ref_base_and_extent (expr, &offset, &size, &max_size);
+ if (max_size == -1 || !DECL_P (base))
+ return NULL;
-/* Similarly, but replace the statement at GSI. */
+ if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
+ return NULL;
-static void
-sra_replace (gimple_stmt_iterator *gsi, gimple_seq seq)
-{
- sra_insert_before (gsi, seq);
- unlink_stmt_vdef (gsi_stmt (*gsi));
- gsi_remove (gsi, false);
- if (gsi_end_p (*gsi))
- *gsi = gsi_last (gsi_seq (*gsi));
- else
- gsi_prev (gsi);
+ return get_var_base_offset_size_access (base, offset, max_size);
}
-/* Data structure that bitfield_overlaps_p fills in with information
- about the element passed in and how much of it overlaps with the
- bit-range passed it to. */
-
-struct bitfield_overlap_info
-{
- /* The bit-length of an element. */
- tree field_len;
-
- /* The bit-position of the element in its parent. */
- tree field_pos;
-
- /* The number of bits of the element that overlap with the incoming
- bit range. */
- tree overlap_len;
-
- /* The first bit of the element that overlaps with the incoming bit
- range. */
- tree overlap_pos;
-};
-
-/* Return true if a BIT_FIELD_REF<(FLD->parent), BLEN, BPOS>
- expression (referenced as BF below) accesses any of the bits in FLD,
- false if it doesn't. If DATA is non-null, its field_len and
- field_pos are filled in such that BIT_FIELD_REF<(FLD->parent),
- field_len, field_pos> (referenced as BFLD below) represents the
- entire field FLD->element, and BIT_FIELD_REF<BFLD, overlap_len,
- overlap_pos> represents the portion of the entire field that
- overlaps with BF. */
+/* Callback for scan_function. Replace the expression EXPR with a scalar
+ replacement if there is one and generate other statements to do type
+ conversion or subtree copying if necessary. GSI is used to place newly
+ created statements, WRITE is true if the expression is being written to (it
+ is on a LHS of a statement or output in an assembly statement). */
static bool
-bitfield_overlaps_p (tree blen, tree bpos, struct sra_elt *fld,
- struct bitfield_overlap_info *data)
+sra_modify_expr (tree *expr, gimple_stmt_iterator *gsi, bool write,
+ void *data ATTRIBUTE_UNUSED)
{
- tree flen, fpos;
- bool ret;
+ struct access *access;
+ tree type, bfr;
- if (TREE_CODE (fld->element) == FIELD_DECL)
- {
- flen = fold_convert (bitsizetype, DECL_SIZE (fld->element));
- fpos = fold_convert (bitsizetype, DECL_FIELD_OFFSET (fld->element));
- fpos = size_binop (MULT_EXPR, fpos, bitsize_int (BITS_PER_UNIT));
- fpos = size_binop (PLUS_EXPR, fpos, DECL_FIELD_BIT_OFFSET (fld->element));
- }
- else if (TREE_CODE (fld->element) == BIT_FIELD_REF)
+ if (TREE_CODE (*expr) == BIT_FIELD_REF)
{
- flen = fold_convert (bitsizetype, TREE_OPERAND (fld->element, 1));
- fpos = fold_convert (bitsizetype, TREE_OPERAND (fld->element, 2));
- }
- else if (TREE_CODE (fld->element) == INTEGER_CST)
- {
- tree domain_type = TYPE_DOMAIN (TREE_TYPE (fld->parent->element));
- flen = fold_convert (bitsizetype, TYPE_SIZE (fld->type));
- fpos = fold_convert (bitsizetype, fld->element);
- if (domain_type && TYPE_MIN_VALUE (domain_type))
- fpos = size_binop (MINUS_EXPR, fpos,
- fold_convert (bitsizetype,
- TYPE_MIN_VALUE (domain_type)));
- fpos = size_binop (MULT_EXPR, flen, fpos);
+ bfr = *expr;
+ expr = &TREE_OPERAND (*expr, 0);
}
else
- gcc_unreachable ();
-
- gcc_assert (host_integerp (blen, 1)
- && host_integerp (bpos, 1)
- && host_integerp (flen, 1)
- && host_integerp (fpos, 1));
-
- ret = ((!tree_int_cst_lt (fpos, bpos)
- && tree_int_cst_lt (size_binop (MINUS_EXPR, fpos, bpos),
- blen))
- || (!tree_int_cst_lt (bpos, fpos)
- && tree_int_cst_lt (size_binop (MINUS_EXPR, bpos, fpos),
- flen)));
+ bfr = NULL_TREE;
- if (!ret)
- return ret;
-
- if (data)
- {
- tree bend, fend;
-
- data->field_len = flen;
- data->field_pos = fpos;
-
- fend = size_binop (PLUS_EXPR, fpos, flen);
- bend = size_binop (PLUS_EXPR, bpos, blen);
-
- if (tree_int_cst_lt (bend, fend))
- data->overlap_len = size_binop (MINUS_EXPR, bend, fpos);
- else
- data->overlap_len = NULL;
-
- if (tree_int_cst_lt (fpos, bpos))
- {
- data->overlap_pos = size_binop (MINUS_EXPR, bpos, fpos);
- data->overlap_len = size_binop (MINUS_EXPR,
- data->overlap_len
- ? data->overlap_len
- : data->field_len,
- data->overlap_pos);
- }
- else
- data->overlap_pos = NULL;
- }
-
- return ret;
-}
-
-/* Add to LISTP a sequence of statements that copies BLEN bits between
- VAR and the scalarized elements of ELT, starting a bit VPOS of VAR
- and at bit BPOS of ELT. The direction of the copy is given by
- TO_VAR. */
-
-static void
-sra_explode_bitfield_assignment (tree var, tree vpos, bool to_var,
- gimple_seq *seq_p, tree blen, tree bpos,
- struct sra_elt *elt)
-{
- struct sra_elt *fld;
- struct bitfield_overlap_info flp;
+ if (TREE_CODE (*expr) == REALPART_EXPR || TREE_CODE (*expr) == IMAGPART_EXPR)
+ expr = &TREE_OPERAND (*expr, 0);
+ access = get_access_for_expr (*expr);
+ if (!access)
+ return false;
+ type = TREE_TYPE (*expr);
- FOR_EACH_ACTUAL_CHILD (fld, elt)
+ if (access->grp_to_be_replaced)
{
- tree flen, fpos;
-
- if (!bitfield_overlaps_p (blen, bpos, fld, &flp))
- continue;
-
- flen = flp.overlap_len ? flp.overlap_len : flp.field_len;
- fpos = flp.overlap_pos ? flp.overlap_pos : bitsize_int (0);
-
- if (fld->replacement)
+ tree repl = get_access_replacement (access);
+ /* If we replace a non-register typed access simply use the original
+ access expression to extract the scalar component afterwards.
+ This happens if scalarizing a function return value or parameter
+ like in gcc.c-torture/execute/20041124-1.c, 20050316-1.c and
+ gcc.c-torture/compile/20011217-1.c. */
+ if (!is_gimple_reg_type (type))
{
- tree infld, invar, type;
- gimple_seq st;
-
- infld = fld->replacement;
-
- type = unsigned_type_for (TREE_TYPE (infld));
- if (TYPE_PRECISION (type) != TREE_INT_CST_LOW (flen))
- type = build_nonstandard_integer_type (TREE_INT_CST_LOW (flen), 1);
-
- if (TREE_CODE (infld) == BIT_FIELD_REF)
+ gimple stmt;
+ if (write)
{
- fpos = size_binop (PLUS_EXPR, fpos, TREE_OPERAND (infld, 2));
- infld = TREE_OPERAND (infld, 0);
+ tree ref = unshare_expr (access->expr);
+ if (access->grp_partial_lhs)
+ ref = force_gimple_operand_gsi (gsi, ref, true, NULL_TREE,
+ false, GSI_NEW_STMT);
+ stmt = gimple_build_assign (repl, ref);
+ gsi_insert_after (gsi, stmt, GSI_NEW_STMT);
}
- else if (BYTES_BIG_ENDIAN && DECL_P (fld->element)
- && !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (infld)),
- DECL_SIZE (fld->element)))
+ else
{
- fpos = size_binop (PLUS_EXPR, fpos,
- TYPE_SIZE (TREE_TYPE (infld)));
- fpos = size_binop (MINUS_EXPR, fpos,
- DECL_SIZE (fld->element));
+ if (access->grp_partial_lhs)
+ repl = force_gimple_operand_gsi (gsi, repl, true, NULL_TREE,
+ true, GSI_SAME_STMT);
+ stmt = gimple_build_assign (unshare_expr (access->expr), repl);
+ gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
}
-
- infld = fold_build3 (BIT_FIELD_REF, type, infld, flen, fpos);
-
- invar = size_binop (MINUS_EXPR, flp.field_pos, bpos);
- if (flp.overlap_pos)
- invar = size_binop (PLUS_EXPR, invar, flp.overlap_pos);
- invar = size_binop (PLUS_EXPR, invar, vpos);
-
- invar = fold_build3 (BIT_FIELD_REF, type, var, flen, invar);
-
- if (to_var)
- st = sra_build_bf_assignment (invar, infld);
- else
- st = sra_build_bf_assignment (infld, invar);
-
- gimple_seq_add_seq (seq_p, st);
}
else
{
- tree sub = size_binop (MINUS_EXPR, flp.field_pos, bpos);
- sub = size_binop (PLUS_EXPR, vpos, sub);
- if (flp.overlap_pos)
- sub = size_binop (PLUS_EXPR, sub, flp.overlap_pos);
+ gcc_assert (useless_type_conversion_p (type, access->type));
+ *expr = repl;
+ }
+ }
- sra_explode_bitfield_assignment (var, sub, to_var, seq_p,
- flen, fpos, fld);
+ if (access->first_child)
+ {
+ HOST_WIDE_INT start_offset, chunk_size;
+ if (bfr
+ && host_integerp (TREE_OPERAND (bfr, 1), 1)
+ && host_integerp (TREE_OPERAND (bfr, 2), 1))
+ {
+ start_offset = tree_low_cst (TREE_OPERAND (bfr, 1), 1);
+ chunk_size = tree_low_cst (TREE_OPERAND (bfr, 2), 1);
}
+ else
+ start_offset = chunk_size = 0;
+
+ generate_subtree_copies (access->first_child, access->base, 0,
+ start_offset, chunk_size, gsi, write, write);
}
+ return true;
}
-/* Add to LISTBEFOREP statements that copy scalarized members of ELT
- that overlap with BIT_FIELD_REF<(ELT->element), BLEN, BPOS> back
- into the full variable, and to LISTAFTERP, if non-NULL, statements
- that copy the (presumably modified) overlapping portions of the
- full variable back to the scalarized variables. */
+/* Store all replacements in the access tree rooted in TOP_RACC either to their
+ base aggregate if there are unscalarized data or directly to LHS
+ otherwise. */
static void
-sra_sync_for_bitfield_assignment (gimple_seq *seq_before_p,
- gimple_seq *seq_after_p,
- tree blen, tree bpos,
- struct sra_elt *elt)
+handle_unscalarized_data_in_subtree (struct access *top_racc, tree lhs,
+ gimple_stmt_iterator *gsi)
{
- struct sra_elt *fld;
- struct bitfield_overlap_info flp;
-
- FOR_EACH_ACTUAL_CHILD (fld, elt)
- if (bitfield_overlaps_p (blen, bpos, fld, &flp))
- {
- if (fld->replacement || (!flp.overlap_len && !flp.overlap_pos))
- {
- generate_copy_inout (fld, false, generate_element_ref (fld),
- seq_before_p);
- mark_no_warning (fld);
- if (seq_after_p)
- generate_copy_inout (fld, true, generate_element_ref (fld),
- seq_after_p);
- }
- else
- {
- tree flen = flp.overlap_len ? flp.overlap_len : flp.field_len;
- tree fpos = flp.overlap_pos ? flp.overlap_pos : bitsize_int (0);
-
- sra_sync_for_bitfield_assignment (seq_before_p, seq_after_p,
- flen, fpos, fld);
- }
- }
+ if (top_racc->grp_unscalarized_data)
+ generate_subtree_copies (top_racc->first_child, top_racc->base, 0, 0, 0,
+ gsi, false, false);
+ else
+ generate_subtree_copies (top_racc->first_child, lhs, top_racc->offset,
+ 0, 0, gsi, false, false);
}
-/* Scalarize a USE. To recap, this is either a simple reference to ELT,
- if elt is scalar, or some occurrence of ELT that requires a complete
- aggregate. IS_OUTPUT is true if ELT is being modified. */
+
+/* Try to generate statements to load all sub-replacements in an access
+ (sub)tree (LACC is the first child) from scalar replacements in the TOP_RACC
+ (sub)tree. If that is not possible, refresh the TOP_RACC base aggregate and
+ load the accesses from it. LEFT_OFFSET is the offset of the left whole
+ subtree being copied, RIGHT_OFFSET is the same thing for the right subtree.
+ GSI is stmt iterator used for statement insertions. *REFRESHED is true iff
+ the rhs top aggregate has already been refreshed by contents of its scalar
+ reductions and is set to true if this function has to do it. */
static void
-scalarize_use (struct sra_elt *elt, tree *expr_p, gimple_stmt_iterator *gsi,
- bool is_output, bool use_all)
+load_assign_lhs_subreplacements (struct access *lacc, struct access *top_racc,
+ HOST_WIDE_INT left_offset,
+ HOST_WIDE_INT right_offset,
+ gimple_stmt_iterator *old_gsi,
+ gimple_stmt_iterator *new_gsi,
+ bool *refreshed, tree lhs)
{
- gimple stmt = gsi_stmt (*gsi);
- tree bfexpr;
-
- if (elt->replacement)
- {
- tree replacement = elt->replacement;
-
- /* If we have a replacement, then updating the reference is as
- simple as modifying the existing statement in place. */
- if (is_output
- && TREE_CODE (elt->replacement) == BIT_FIELD_REF
- && is_gimple_reg (TREE_OPERAND (elt->replacement, 0))
- && is_gimple_assign (stmt)
- && gimple_assign_lhs_ptr (stmt) == expr_p)
- {
- gimple_seq newseq;
- /* RHS must be a single operand. */
- gcc_assert (gimple_assign_single_p (stmt));
- newseq = sra_build_elt_assignment (elt, gimple_assign_rhs1 (stmt));
- sra_replace (gsi, newseq);
- return;
- }
- else if (!is_output
- && TREE_CODE (elt->replacement) == BIT_FIELD_REF
- && is_gimple_assign (stmt)
- && gimple_assign_rhs1_ptr (stmt) == expr_p)
- {
- tree tmp = make_rename_temp
- (TREE_TYPE (gimple_assign_lhs (stmt)), "SR");
- gimple_seq newseq = sra_build_assignment (tmp, REPLDUP (elt->replacement));
-
- sra_insert_before (gsi, newseq);
- replacement = tmp;
- }
- if (is_output)
- update_stmt_if_modified (stmt);
- *expr_p = REPLDUP (replacement);
- update_stmt (stmt);
- }
- else if (use_all && is_output
- && is_gimple_assign (stmt)
- && TREE_CODE (bfexpr
- = gimple_assign_lhs (stmt)) == BIT_FIELD_REF
- && &TREE_OPERAND (bfexpr, 0) == expr_p
- && INTEGRAL_TYPE_P (TREE_TYPE (bfexpr))
- && TREE_CODE (TREE_TYPE (*expr_p)) == RECORD_TYPE)
+ do
{
- gimple_seq seq_before = NULL;
- gimple_seq seq_after = NULL;
- tree blen = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 1));
- tree bpos = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 2));
- bool update = false;
-
- if (!elt->use_block_copy)
+ if (lacc->grp_to_be_replaced)
{
- tree type = TREE_TYPE (bfexpr);
- tree var = make_rename_temp (type, "SR"), tmp, vpos;
- gimple st;
+ struct access *racc;
+ HOST_WIDE_INT offset = lacc->offset - left_offset + right_offset;
+ gimple stmt;
+ tree rhs;
- gimple_assign_set_lhs (stmt, var);
- update = true;
-
- if (!TYPE_UNSIGNED (type))
+ racc = find_access_in_subtree (top_racc, offset, lacc->size);
+ if (racc && racc->grp_to_be_replaced)
{
- type = unsigned_type_for (type);
- tmp = make_rename_temp (type, "SR");
- st = gimple_build_assign (tmp, fold_convert (type, var));
- gimple_seq_add_stmt (&seq_after, st);
- var = tmp;
+ rhs = get_access_replacement (racc);
+ if (!useless_type_conversion_p (lacc->type, racc->type))
+ rhs = fold_build1 (VIEW_CONVERT_EXPR, lacc->type, rhs);
}
-
- /* If VAR is wider than BLEN bits, it is padded at the
- most-significant end. We want to set VPOS such that
- <BIT_FIELD_REF VAR BLEN VPOS> would refer to the
- least-significant BLEN bits of VAR. */
- if (BYTES_BIG_ENDIAN)
- vpos = size_binop (MINUS_EXPR, TYPE_SIZE (type), blen);
else
- vpos = bitsize_int (0);
- sra_explode_bitfield_assignment
- (var, vpos, false, &seq_after, blen, bpos, elt);
- }
- else
- sra_sync_for_bitfield_assignment
- (&seq_before, &seq_after, blen, bpos, elt);
-
- if (seq_before)
- {
- mark_all_v_defs_seq (seq_before);
- sra_insert_before (gsi, seq_before);
- }
- if (seq_after)
- {
- mark_all_v_defs_seq (seq_after);
- sra_insert_after (gsi, seq_after);
- }
-
- if (update)
- update_stmt (stmt);
- }
- else if (use_all && !is_output
- && is_gimple_assign (stmt)
- && TREE_CODE (bfexpr
- = gimple_assign_rhs1 (stmt)) == BIT_FIELD_REF
- && &TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == expr_p
- && INTEGRAL_TYPE_P (TREE_TYPE (bfexpr))
- && TREE_CODE (TREE_TYPE (*expr_p)) == RECORD_TYPE)
- {
- gimple_seq seq = NULL;
- tree blen = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 1));
- tree bpos = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 2));
- bool update = false;
-
- if (!elt->use_block_copy)
- {
- tree type = TREE_TYPE (bfexpr);
- tree var = make_rename_temp (type, "SR"), tmp, vpos;
- gimple st = NULL;
-
- gimple_assign_set_rhs1 (stmt, var);
- update = true;
-
- if (!TYPE_UNSIGNED (type))
{
- type = unsigned_type_for (type);
- tmp = make_rename_temp (type, "SR");
- st = gimple_build_assign (var,
- fold_convert (TREE_TYPE (var), tmp));
- var = tmp;
- }
+ bool repl_found;
- gimple_seq_add_stmt (&seq,
- gimple_build_assign
- (var, build_int_cst_wide (type, 0, 0)));
+ /* No suitable access on the right hand side, need to load from
+ the aggregate. See if we have to update it first... */
+ if (!*refreshed)
+ {
+ gcc_assert (top_racc->first_child);
+ handle_unscalarized_data_in_subtree (top_racc, lhs, old_gsi);
+ *refreshed = true;
+ }
- /* If VAR is wider than BLEN bits, it is padded at the
- most-significant end. We want to set VPOS such that
- <BIT_FIELD_REF VAR BLEN VPOS> would refer to the
- least-significant BLEN bits of VAR. */
- if (BYTES_BIG_ENDIAN)
- vpos = size_binop (MINUS_EXPR, TYPE_SIZE (type), blen);
- else
- vpos = bitsize_int (0);
- sra_explode_bitfield_assignment
- (var, vpos, true, &seq, blen, bpos, elt);
+ rhs = unshare_expr (top_racc->base);
+ repl_found = build_ref_for_offset (&rhs,
+ TREE_TYPE (top_racc->base),
+ lacc->offset - left_offset,
+ lacc->type, false);
+ gcc_assert (repl_found);
+ }
- if (st)
- gimple_seq_add_stmt (&seq, st);
+ stmt = gimple_build_assign (get_access_replacement (lacc), rhs);
+ gsi_insert_after (new_gsi, stmt, GSI_NEW_STMT);
+ update_stmt (stmt);
}
- else
- sra_sync_for_bitfield_assignment
- (&seq, NULL, blen, bpos, elt);
-
- if (seq)
+ else if (lacc->grp_read && !lacc->grp_covered && !*refreshed)
{
- mark_all_v_defs_seq (seq);
- sra_insert_before (gsi, seq);
+ handle_unscalarized_data_in_subtree (top_racc, lhs, old_gsi);
+ *refreshed = true;
}
- if (update)
- update_stmt (stmt);
- }
- else
- {
- gimple_seq seq = NULL;
-
- /* Otherwise we need some copies. If ELT is being read, then we
- want to store all (modified) sub-elements back into the
- structure before the reference takes place. If ELT is being
- written, then we want to load the changed values back into
- our shadow variables. */
- /* ??? We don't check modified for reads, we just always write all of
- the values. We should be able to record the SSA number of the VOP
- for which the values were last read. If that number matches the
- SSA number of the VOP in the current statement, then we needn't
- emit an assignment. This would also eliminate double writes when
- a structure is passed as more than one argument to a function call.
- This optimization would be most effective if sra_walk_function
- processed the blocks in dominator order. */
-
- generate_copy_inout (elt, is_output, generate_element_ref (elt), &seq);
- if (seq == NULL)
- return;
- mark_all_v_defs_seq (seq);
- if (is_output)
- sra_insert_after (gsi, seq);
- else
- {
- sra_insert_before (gsi, seq);
- if (use_all)
- mark_no_warning (elt);
- }
+ if (lacc->first_child)
+ load_assign_lhs_subreplacements (lacc->first_child, top_racc,
+ left_offset, right_offset,
+ old_gsi, new_gsi, refreshed, lhs);
+ lacc = lacc->next_sibling;
}
+ while (lacc);
}
-/* Scalarize a COPY. To recap, this is an assignment statement between
- two scalarizable references, LHS_ELT and RHS_ELT. */
+/* Modify assignments with a CONSTRUCTOR on their RHS. STMT contains a pointer
+ to the assignment and GSI is the statement iterator pointing at it. Returns
+ the same values as sra_modify_assign. */
-static void
-scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
- gimple_stmt_iterator *gsi)
+static enum scan_assign_result
+sra_modify_constructor_assign (gimple *stmt, gimple_stmt_iterator *gsi)
{
- gimple_seq seq;
- gimple stmt;
-
- if (lhs_elt->replacement && rhs_elt->replacement)
- {
- /* If we have two scalar operands, modify the existing statement. */
- stmt = gsi_stmt (*gsi);
+ tree lhs = gimple_assign_lhs (*stmt);
+ struct access *acc;
- /* See the commentary in sra_walk_function concerning
- RETURN_EXPR, and why we should never see one here. */
- gcc_assert (is_gimple_assign (stmt));
- gcc_assert (gimple_assign_copy_p (stmt));
+ acc = get_access_for_expr (lhs);
+ if (!acc)
+ return SRA_SA_NONE;
-
- gimple_assign_set_lhs (stmt, lhs_elt->replacement);
- gimple_assign_set_rhs1 (stmt, REPLDUP (rhs_elt->replacement));
- update_stmt (stmt);
- }
- else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy)
+ if (VEC_length (constructor_elt,
+ CONSTRUCTOR_ELTS (gimple_assign_rhs1 (*stmt))) > 0)
{
- /* If either side requires a block copy, then sync the RHS back
- to the original structure, leave the original assignment
- statement (which will perform the block copy), then load the
- LHS values out of its now-updated original structure. */
- /* ??? Could perform a modified pair-wise element copy. That
- would at least allow those elements that are instantiated in
- both structures to be optimized well. */
-
- seq = NULL;
- generate_copy_inout (rhs_elt, false,
- generate_element_ref (rhs_elt), &seq);
- if (seq)
- {
- mark_all_v_defs_seq (seq);
- sra_insert_before (gsi, seq);
- }
+ /* I have never seen this code path trigger but if it can happen the
+ following should handle it gracefully. */
+ if (access_has_children_p (acc))
+ generate_subtree_copies (acc->first_child, acc->base, 0, 0, 0, gsi,
+ true, true);
+ return SRA_SA_PROCESSED;
+ }
- seq = NULL;
- generate_copy_inout (lhs_elt, true,
- generate_element_ref (lhs_elt), &seq);
- if (seq)
- {
- mark_all_v_defs_seq (seq);
- sra_insert_after (gsi, seq);
- }
+ if (acc->grp_covered)
+ {
+ init_subtree_with_zero (acc, gsi, false);
+ unlink_stmt_vdef (*stmt);
+ gsi_remove (gsi, true);
+ return SRA_SA_REMOVED;
}
else
{
- /* Otherwise both sides must be fully instantiated. In which
- case perform pair-wise element assignments and replace the
- original block copy statement. */
-
- stmt = gsi_stmt (*gsi);
- update_stmt_if_modified (stmt);
-
- seq = NULL;
- generate_element_copy (lhs_elt, rhs_elt, &seq);
- gcc_assert (seq);
- mark_all_v_defs_seq (seq);
- sra_replace (gsi, seq);
+ init_subtree_with_zero (acc, gsi, true);
+ return SRA_SA_PROCESSED;
}
}
-/* Scalarize an INIT. To recap, this is an assignment to a scalarizable
- reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
- COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty
- CONSTRUCTOR. */
-static void
-scalarize_init (struct sra_elt *lhs_elt, tree rhs, gimple_stmt_iterator *gsi)
+/* Callback of scan_function to process assign statements. It examines both
+ sides of the statement, replaces them with a scalare replacement if there is
+ one and generating copying of replacements if scalarized aggregates have been
+ used in the assignment. STMT is a pointer to the assign statement, GSI is
+ used to hold generated statements for type conversions and subtree
+ copying. */
+
+static enum scan_assign_result
+sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi,
+ void *data ATTRIBUTE_UNUSED)
{
- bool result = true;
- gimple_seq seq = NULL, init_seq = NULL;
+ struct access *lacc, *racc;
+ tree lhs, rhs;
+ bool modify_this_stmt = false;
+ bool force_gimple_rhs = false;
- /* Generate initialization statements for all members extant in the RHS. */
- if (rhs)
+ if (!gimple_assign_single_p (*stmt))
+ return SRA_SA_NONE;
+ lhs = gimple_assign_lhs (*stmt);
+ rhs = gimple_assign_rhs1 (*stmt);
+
+ if (TREE_CODE (rhs) == CONSTRUCTOR)
+ return sra_modify_constructor_assign (stmt, gsi);
+
+ if (TREE_CODE (rhs) == REALPART_EXPR || TREE_CODE (lhs) == REALPART_EXPR
+ || TREE_CODE (rhs) == IMAGPART_EXPR || TREE_CODE (lhs) == IMAGPART_EXPR
+ || TREE_CODE (rhs) == BIT_FIELD_REF || TREE_CODE (lhs) == BIT_FIELD_REF)
{
- /* Unshare the expression just in case this is from a decl's initial. */
- rhs = unshare_expr (rhs);
- result = generate_element_init (lhs_elt, rhs, &init_seq);
+ modify_this_stmt = sra_modify_expr (gimple_assign_rhs1_ptr (*stmt),
+ gsi, false, data);
+ modify_this_stmt |= sra_modify_expr (gimple_assign_lhs_ptr (*stmt),
+ gsi, true, data);
+ return modify_this_stmt ? SRA_SA_PROCESSED : SRA_SA_NONE;
}
- if (!result)
+ lacc = get_access_for_expr (lhs);
+ racc = get_access_for_expr (rhs);
+ if (!lacc && !racc)
+ return SRA_SA_NONE;
+
+ if (lacc && lacc->grp_to_be_replaced)
{
- /* If we failed to convert the entire initializer, then we must
- leave the structure assignment in place and must load values
- from the structure into the slots for which we did not find
- constants. The easiest way to do this is to generate a complete
- copy-out, and then follow that with the constant assignments
- that we were able to build. DCE will clean things up. */
- gimple_seq seq0 = NULL;
- generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt),
- &seq0);
- gimple_seq_add_seq (&seq0, seq);
- seq = seq0;
+ lhs = get_access_replacement (lacc);
+ gimple_assign_set_lhs (*stmt, lhs);
+ modify_this_stmt = true;
+ if (lacc->grp_partial_lhs)
+ force_gimple_rhs = true;
}
- else
+
+ if (racc && racc->grp_to_be_replaced)
{
- /* CONSTRUCTOR is defined such that any member not mentioned is assigned
- a zero value. Initialize the rest of the instantiated elements. */
- generate_element_zero (lhs_elt, &seq);
- gimple_seq_add_seq (&seq, init_seq);
+ rhs = get_access_replacement (racc);
+ modify_this_stmt = true;
+ if (racc->grp_partial_lhs)
+ force_gimple_rhs = true;
}
- if (lhs_elt->use_block_copy || !result)
+ if (modify_this_stmt)
{
- /* Since LHS is not fully instantiated, we must leave the structure
- assignment in place. Treating this case differently from a USE
- exposes constants to later optimizations. */
- if (seq)
+ if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
{
- mark_all_v_defs_seq (seq);
- sra_insert_after (gsi, seq);
+ /* If we can avoid creating a VIEW_CONVERT_EXPR do so.
+ ??? This should move to fold_stmt which we simply should
+ call after building a VIEW_CONVERT_EXPR here. */
+ if (AGGREGATE_TYPE_P (TREE_TYPE (lhs))
+ && !access_has_children_p (lacc))
+ {
+ tree expr = unshare_expr (lhs);
+ if (build_ref_for_offset (&expr, TREE_TYPE (lhs), racc->offset,
+ TREE_TYPE (rhs), false))
+ {
+ lhs = expr;
+ gimple_assign_set_lhs (*stmt, expr);
+ }
+ }
+ else if (AGGREGATE_TYPE_P (TREE_TYPE (rhs))
+ && !access_has_children_p (racc))
+ {
+ tree expr = unshare_expr (rhs);
+ if (build_ref_for_offset (&expr, TREE_TYPE (rhs), lacc->offset,
+ TREE_TYPE (lhs), false))
+ rhs = expr;
+ }
+ if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+ rhs = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs);
}
- }
- else
- {
- /* The LHS is fully instantiated. The list of initializations
- replaces the original structure assignment. */
- gcc_assert (seq);
- update_stmt_if_modified (gsi_stmt (*gsi));
- mark_all_v_defs_seq (seq);
- sra_replace (gsi, seq);
- }
-}
-/* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP
- on all INDIRECT_REFs. */
-
-static tree
-mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
-{
- tree t = *tp;
-
- if (TREE_CODE (t) == INDIRECT_REF)
- {
- TREE_THIS_NOTRAP (t) = 1;
- *walk_subtrees = 0;
+ if (force_gimple_rhs)
+ rhs = force_gimple_operand_gsi (gsi, rhs, true, NULL_TREE,
+ true, GSI_SAME_STMT);
+ if (gimple_assign_rhs1 (*stmt) != rhs)
+ {
+ gimple_assign_set_rhs_from_tree (gsi, rhs);
+ gcc_assert (*stmt == gsi_stmt (*gsi));
+ }
}
- else if (IS_TYPE_OR_DECL_P (t))
- *walk_subtrees = 0;
-
- return NULL;
-}
-
-/* Scalarize a LDST. To recap, this is an assignment between one scalarizable
- reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true
- if ELT is on the left-hand side. */
-
-static void
-scalarize_ldst (struct sra_elt *elt, tree other,
- gimple_stmt_iterator *gsi, bool is_output)
-{
- /* Shouldn't have gotten called for a scalar. */
- gcc_assert (!elt->replacement);
- if (elt->use_block_copy)
- {
- /* Since ELT is not fully instantiated, we have to leave the
- block copy in place. Treat this as a USE. */
- scalarize_use (elt, NULL, gsi, is_output, false);
+ /* From this point on, the function deals with assignments in between
+ aggregates when at least one has scalar reductions of some of its
+ components. There are three possible scenarios: Both the LHS and RHS have
+ to-be-scalarized components, 2) only the RHS has or 3) only the LHS has.
+
+ In the first case, we would like to load the LHS components from RHS
+ components whenever possible. If that is not possible, we would like to
+ read it directly from the RHS (after updating it by storing in it its own
+ components). If there are some necessary unscalarized data in the LHS,
+ those will be loaded by the original assignment too. If neither of these
+ cases happen, the original statement can be removed. Most of this is done
+ by load_assign_lhs_subreplacements.
+
+ In the second case, we would like to store all RHS scalarized components
+ directly into LHS and if they cover the aggregate completely, remove the
+ statement too. In the third case, we want the LHS components to be loaded
+ directly from the RHS (DSE will remove the original statement if it
+ becomes redundant).
+
+ This is a bit complex but manageable when types match and when unions do
+ not cause confusion in a way that we cannot really load a component of LHS
+ from the RHS or vice versa (the access representing this level can have
+ subaccesses that are accessible only through a different union field at a
+ higher level - different from the one used in the examined expression).
+ Unions are fun.
+
+ Therefore, I specially handle a fourth case, happening when there is a
+ specific type cast or it is impossible to locate a scalarized subaccess on
+ the other side of the expression. If that happens, I simply "refresh" the
+ RHS by storing in it is scalarized components leave the original statement
+ there to do the copying and then load the scalar replacements of the LHS.
+ This is what the first branch does. */
+
+ if (contains_view_convert_expr_p (rhs) || contains_view_convert_expr_p (lhs)
+ || (access_has_children_p (racc)
+ && !ref_expr_for_all_replacements_p (racc, lhs, racc->offset))
+ || (access_has_children_p (lacc)
+ && !ref_expr_for_all_replacements_p (lacc, rhs, lacc->offset)))
+ {
+ if (access_has_children_p (racc))
+ generate_subtree_copies (racc->first_child, racc->base, 0, 0, 0,
+ gsi, false, false);
+ if (access_has_children_p (lacc))
+ generate_subtree_copies (lacc->first_child, lacc->base, 0, 0, 0,
+ gsi, true, true);
}
else
{
- /* The interesting case is when ELT is fully instantiated. In this
- case we can have each element stored/loaded directly to/from the
- corresponding slot in OTHER. This avoids a block copy. */
-
- gimple_seq seq = NULL;
- gimple stmt = gsi_stmt (*gsi);
-
- update_stmt_if_modified (stmt);
- generate_copy_inout (elt, is_output, other, &seq);
- gcc_assert (seq);
- mark_all_v_defs_seq (seq);
-
- /* Preserve EH semantics. */
- if (stmt_ends_bb_p (stmt))
+ if (access_has_children_p (lacc) && access_has_children_p (racc))
{
- gimple_stmt_iterator si;
- gimple first;
- gimple_seq blist = NULL;
- bool thr = stmt_could_throw_p (stmt);
-
- /* If the last statement of this BB created an EH edge
- before scalarization, we have to locate the first
- statement that can throw in the new statement list and
- use that as the last statement of this BB, such that EH
- semantics is preserved. All statements up to this one
- are added to the same BB. All other statements in the
- list will be added to normal outgoing edges of the same
- BB. If they access any memory, it's the same memory, so
- we can assume they won't throw. */
- si = gsi_start (seq);
- for (first = gsi_stmt (si);
- thr && !gsi_end_p (si) && !stmt_could_throw_p (first);
- first = gsi_stmt (si))
+ gimple_stmt_iterator orig_gsi = *gsi;
+ bool refreshed;
+
+ if (lacc->grp_read && !lacc->grp_covered)
{
- gsi_remove (&si, false);
- gimple_seq_add_stmt (&blist, first);
+ handle_unscalarized_data_in_subtree (racc, lhs, gsi);
+ refreshed = true;
}
+ else
+ refreshed = false;
- /* Extract the first remaining statement from LIST, this is
- the EH statement if there is one. */
- gsi_remove (&si, false);
-
- if (blist)
- sra_insert_before (gsi, blist);
-
- /* Replace the old statement with this new representative. */
- gsi_replace (gsi, first, true);
+ load_assign_lhs_subreplacements (lacc->first_child, racc,
+ lacc->offset, racc->offset,
+ &orig_gsi, gsi, &refreshed, lhs);
+ if (!refreshed || !racc->grp_unscalarized_data)
+ {
+ if (*stmt == gsi_stmt (*gsi))
+ gsi_next (gsi);
- if (!gsi_end_p (si))
+ unlink_stmt_vdef (*stmt);
+ gsi_remove (&orig_gsi, true);
+ return SRA_SA_REMOVED;
+ }
+ }
+ else
+ {
+ if (access_has_children_p (racc))
{
- /* If any reference would trap, then they all would. And more
- to the point, the first would. Therefore none of the rest
- will trap since the first didn't. Indicate this by
- iterating over the remaining statements and set
- TREE_THIS_NOTRAP in all INDIRECT_REFs. */
- do
+ if (!racc->grp_unscalarized_data)
{
- walk_gimple_stmt (&si, NULL, mark_notrap, NULL);
- gsi_next (&si);
+ generate_subtree_copies (racc->first_child, lhs,
+ racc->offset, 0, 0, gsi,
+ false, false);
+ gcc_assert (*stmt == gsi_stmt (*gsi));
+ unlink_stmt_vdef (*stmt);
+ gsi_remove (gsi, true);
+ return SRA_SA_REMOVED;
}
- while (!gsi_end_p (si));
-
- insert_edge_copies_seq (seq, gsi_bb (*gsi));
+ else
+ generate_subtree_copies (racc->first_child, lhs,
+ racc->offset, 0, 0, gsi, false, true);
}
+ else if (access_has_children_p (lacc))
+ generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
+ 0, 0, gsi, true, true);
}
- else
- sra_replace (gsi, seq);
}
+ return modify_this_stmt ? SRA_SA_PROCESSED : SRA_SA_NONE;
}
-/* Generate initializations for all scalarizable parameters. */
+/* Generate statements initializing scalar replacements of parts of function
+ parameters. */
static void
-scalarize_parms (void)
+initialize_parameter_reductions (void)
{
+ gimple_stmt_iterator gsi;
gimple_seq seq = NULL;
- unsigned i;
- bitmap_iterator bi;
-
- EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi)
- {
- tree var = referenced_var (i);
- struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
- generate_copy_inout (elt, true, var, &seq);
- }
+ tree parm;
- if (seq)
+ for (parm = DECL_ARGUMENTS (current_function_decl);
+ parm;
+ parm = TREE_CHAIN (parm))
{
- insert_edge_copies_seq (seq, ENTRY_BLOCK_PTR);
- mark_all_v_defs_seq (seq);
- }
-}
-
-/* Entry point to phase 4. Update the function to match replacements. */
-
-static void
-scalarize_function (void)
-{
- static const struct sra_walk_fns fns = {
- scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false
- };
+ VEC (access_p, heap) *access_vec;
+ struct access *access;
- sra_walk_function (&fns);
- scalarize_parms ();
- gsi_commit_edge_inserts ();
-}
-
-
-/* Debug helper function. Print ELT in a nice human-readable format. */
+ if (!bitmap_bit_p (candidate_bitmap, DECL_UID (parm)))
+ continue;
+ access_vec = get_base_access_vector (parm);
+ if (!access_vec)
+ continue;
-static void
-dump_sra_elt_name (FILE *f, struct sra_elt *elt)
-{
- if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
- {
- fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f);
- dump_sra_elt_name (f, elt->parent);
- }
- else
- {
- if (elt->parent)
- dump_sra_elt_name (f, elt->parent);
- if (DECL_P (elt->element))
+ if (!seq)
{
- if (TREE_CODE (elt->element) == FIELD_DECL)
- fputc ('.', f);
- print_generic_expr (f, elt->element, dump_flags);
+ seq = gimple_seq_alloc ();
+ gsi = gsi_start (seq);
}
- else if (TREE_CODE (elt->element) == BIT_FIELD_REF)
- fprintf (f, "$B" HOST_WIDE_INT_PRINT_DEC "F" HOST_WIDE_INT_PRINT_DEC,
- tree_low_cst (TREE_OPERAND (elt->element, 2), 1),
- tree_low_cst (TREE_OPERAND (elt->element, 1), 1));
- else if (TREE_CODE (elt->element) == RANGE_EXPR)
- fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]",
- TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)),
- TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1)));
- else
- fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]",
- TREE_INT_CST_LOW (elt->element));
- }
-}
-/* Likewise, but callable from the debugger. */
+ for (access = VEC_index (access_p, access_vec, 0);
+ access;
+ access = access->next_grp)
+ generate_subtree_copies (access, parm, 0, 0, 0, &gsi, true, true);
+ }
-void
-debug_sra_elt_name (struct sra_elt *elt)
-{
- dump_sra_elt_name (stderr, elt);
- fputc ('\n', stderr);
+ if (seq)
+ gsi_insert_seq_on_edge_immediate (single_succ_edge (ENTRY_BLOCK_PTR), seq);
}
-static void
-sra_init_cache (void)
+/* The "main" function of intraprocedural SRA passes. Runs the analysis and if
+ it reveals there are components of some aggregates to be scalarized, it runs
+ the required transformations. */
+static unsigned int
+perform_intra_sra (void)
{
- if (sra_type_decomp_cache)
- return;
+ int ret = 0;
+ sra_initialize ();
- sra_type_decomp_cache = BITMAP_ALLOC (NULL);
- sra_type_inst_cache = BITMAP_ALLOC (NULL);
-}
+ if (!find_var_candidates ())
+ goto out;
+ if (!scan_function (build_access_from_expr, build_accesses_from_assign, NULL,
+ true, NULL))
+ goto out;
-/* Main entry point. */
+ if (!analyze_all_variable_accesses ())
+ goto out;
-static unsigned int
-tree_sra (void)
-{
- /* Initialize local variables. */
- gcc_obstack_init (&sra_obstack);
- sra_candidates = BITMAP_ALLOC (NULL);
- needs_copy_in = BITMAP_ALLOC (NULL);
- sra_init_cache ();
- sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL);
-
- /* Scan. If we find anything, instantiate and scalarize. */
- if (find_candidates_for_sra ())
- {
- scan_function ();
- decide_instantiations ();
- scalarize_function ();
- }
+ scan_function (sra_modify_expr, sra_modify_assign, NULL,
+ false, NULL);
+ initialize_parameter_reductions ();
+ ret = TODO_update_ssa;
- /* Free allocated memory. */
- htab_delete (sra_map);
- sra_map = NULL;
- BITMAP_FREE (sra_candidates);
- BITMAP_FREE (needs_copy_in);
- BITMAP_FREE (sra_type_decomp_cache);
- BITMAP_FREE (sra_type_inst_cache);
- obstack_free (&sra_obstack, NULL);
- return 0;
+ out:
+ sra_deinitialize ();
+ return ret;
}
+/* Perform early intraprocedural SRA. */
static unsigned int
-tree_sra_early (void)
+early_intra_sra (void)
{
- unsigned int ret;
-
- early_sra = true;
- ret = tree_sra ();
- early_sra = false;
+ sra_mode = SRA_MODE_EARLY_INTRA;
+ return perform_intra_sra ();
+}
- return ret;
+/* Perform "late" intraprocedural SRA. */
+static unsigned int
+late_intra_sra (void)
+{
+ sra_mode = SRA_MODE_INTRA;
+ return perform_intra_sra ();
}
+
static bool
-gate_sra (void)
+gate_intra_sra (void)
{
return flag_tree_sra != 0;
}
+
struct gimple_opt_pass pass_sra_early =
{
{
GIMPLE_PASS,
- "esra", /* name */
- gate_sra, /* gate */
- tree_sra_early, /* execute */
+ "esra", /* name */
+ gate_intra_sra, /* gate */
+ early_intra_sra, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TREE_SRA, /* tv_id */
- PROP_cfg | PROP_ssa, /* properties_required */
+ PROP_cfg | PROP_ssa, /* properties_required */
0, /* properties_provided */
- 0, /* properties_destroyed */
+ 0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func
| TODO_update_ssa
@@ -3683,20 +2317,21 @@ struct gimple_opt_pass pass_sra_early =
}
};
+
struct gimple_opt_pass pass_sra =
{
{
GIMPLE_PASS,
- "sra", /* name */
- gate_sra, /* gate */
- tree_sra, /* execute */
+ "sra", /* name */
+ gate_intra_sra, /* gate */
+ late_intra_sra, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TREE_SRA, /* tv_id */
- PROP_cfg | PROP_ssa, /* properties_required */
+ PROP_cfg | PROP_ssa, /* properties_required */
0, /* properties_provided */
- 0, /* properties_destroyed */
+ 0, /* properties_destroyed */
TODO_update_address_taken, /* todo_flags_start */
TODO_dump_func
| TODO_update_ssa