diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/Makefile.in | 8 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 13 | ||||
-rw-r--r-- | gcc/params.def | 30 | ||||
-rw-r--r-- | gcc/params.h | 6 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/compile/sra-1.c | 75 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/memcpy-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/sra-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/sra-2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/sra-3.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/sra-4.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/sra-5.c | 74 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/sra-6.c | 40 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gfortran.dg/pr25923.f90 | 2 | ||||
-rw-r--r-- | gcc/testsuite/gnat.dg/pack9.adb | 2 | ||||
-rw-r--r-- | gcc/tree-sra.c | 4811 |
20 files changed, 1954 insertions, 3155 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f864337629d..ebe3141b88c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2009-05-29 Martin Jambor <mjambor@suse.cz> + + * tree-sra.c: New implementation of SRA. + + * params.def (PARAM_SRA_MAX_STRUCTURE_SIZE): Removed. + (PARAM_SRA_MAX_STRUCTURE_COUNT): Removed. + (PARAM_SRA_FIELD_STRUCTURE_RATIO): Removed. + * params.h (SRA_MAX_STRUCTURE_SIZE): Removed. + (SRA_MAX_STRUCTURE_COUNT): Removed. + (SRA_FIELD_STRUCTURE_RATIO): Removed. + * doc/invoke.texi (sra-max-structure-size): Removed. + (sra-field-structure-ratio): Removed. + 2009-05-29 Jakub Jelinek <jakub@redhat.com> PR middle-end/40291 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index a5ef9938026..810699d52e6 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2769,11 +2769,9 @@ tree-ssa-ccp.o : tree-ssa-ccp.c $(TREE_FLOW_H) $(CONFIG_H) \ $(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \ $(TREE_DUMP_H) $(BASIC_BLOCK_H) $(TREE_PASS_H) langhooks.h \ tree-ssa-propagate.h value-prof.h $(FLAGS_H) $(TARGET_H) $(TOPLEV_H) -tree-sra.o : tree-sra.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(RTL_H) \ - $(TM_P_H) $(TREE_FLOW_H) $(DIAGNOSTIC_H) $(TREE_INLINE_H) \ - $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) $(GIMPLE_H) \ - langhooks.h $(TREE_PASS_H) $(FLAGS_H) $(EXPR_H) $(BASIC_BLOCK_H) \ - $(BITMAP_H) $(GGC_H) hard-reg-set.h $(OBSTACK_H) $(PARAMS_H) $(TARGET_H) +tree-sra.o : tree-sra.c $(CONFIG_H) $(SYSTEM_H) coretypes.h alloc-pool.h \ + $(TM_H) $(TREE_H) $(GIMPLE_H) $(TREE_FLOW_H) $(DIAGNOSTIC_H) $(TREE_DUMP_H) \ + $(TIMEVAR_H) $(PARAMS_H) $(TARGET_H) $(FLAGS_H) tree-switch-conversion.o : tree-switch-conversion.c $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(TREE_FLOW_H) $(DIAGNOSTIC_H) $(TREE_INLINE_H) \ $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) $(GIMPLE_H) \ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ca059df9445..ebd91dbee1c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -7312,19 +7312,6 @@ In each case, the @var{value} is an integer. The allowable choices for @var{name} are given in the following table: @table @gcctabopt -@item sra-max-structure-size -The maximum structure size, in bytes, at which the scalar replacement -of aggregates (SRA) optimization will perform block copies. The -default value, 0, implies that GCC will select the most appropriate -size itself. - -@item sra-field-structure-ratio -The threshold ratio (as a percentage) between instantiated fields and -the complete structure size. We say that if the ratio of the number -of bytes in instantiated fields to the number of bytes in the complete -structure exceeds this parameter, then block copies are not used. The -default is 75. - @item struct-reorg-cold-struct-ratio The threshold ratio (as a percentage) between a structure frequency and the frequency of the hottest structure in the program. This parameter diff --git a/gcc/params.def b/gcc/params.def index ccbc305a627..370d0948da9 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -38,36 +38,6 @@ along with GCC; see the file COPYING3. If not see Be sure to add an entry to invoke.texi summarizing the parameter. */ -/* The maximum structure size at which the scalar replacement of - aggregates (SRA) pass will perform block copies. The default - value, 0, implies that GCC will select the most appropriate size - itself. */ -DEFPARAM (PARAM_SRA_MAX_STRUCTURE_SIZE, - "sra-max-structure-size", - "The maximum structure size (in bytes) for which GCC will " - "use by-element copies", - 0, 0, 0) - -/* The maximum number of structure fields which the SRA pass will - instantiate to avoid block copies. The default value, 0, implies - that GCC will select the appropriate value itself. */ -DEFPARAM (PARAM_SRA_MAX_STRUCTURE_COUNT, - "sra-max-structure-count", - "The maximum number of structure fields for which GCC will " - "use by-element copies", - 0, 0, 0) - -/* The ratio between instantiated fields and the complete structure - size. We say that if the ratio of the number of bytes in - instantiated fields to the number of bytes in the complete - structure exceeds this parameter, or if the number of instantiated - fields to the total number of fields exceeds this parameter, then - block copies are not used. The default is 75%. */ -DEFPARAM (PARAM_SRA_FIELD_STRUCTURE_RATIO, - "sra-field-structure-ratio", - "The threshold ratio between instantiated fields and the total structure size", - 75, 0, 100) - /* The threshold ratio between current and hottest structure counts. We say that if the ratio of the current structure count, calculated by profiling, to the hottest structure count diff --git a/gcc/params.h b/gcc/params.h index 16ed29234ca..828aa7d43cc 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -94,12 +94,6 @@ typedef enum compiler_param (compiler_params[(int) ENUM].set) /* Macros for the various parameters. */ -#define SRA_MAX_STRUCTURE_SIZE \ - PARAM_VALUE (PARAM_SRA_MAX_STRUCTURE_SIZE) -#define SRA_MAX_STRUCTURE_COUNT \ - PARAM_VALUE (PARAM_SRA_MAX_STRUCTURE_COUNT) -#define SRA_FIELD_STRUCTURE_RATIO \ - PARAM_VALUE (PARAM_SRA_FIELD_STRUCTURE_RATIO) #define STRUCT_REORG_COLD_STRUCT_RATIO \ PARAM_VALUE (PARAM_STRUCT_REORG_COLD_STRUCT_RATIO) #define MAX_INLINE_INSNS_SINGLE \ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e8577143ff8..7479c1a74ea 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2009-05-29 Martin Jambor <mjambor@suse.cz> + + * gfortran.dg/pr25923.f90: XFAIL warning expectation. + * gcc.dg/tree-ssa/ssa-fre-7.c: Compile with -fno-tree-sra. + * gcc.dg/tree-ssa/ssa-fre-8.c: Likewise. + * gcc.dg/tree-ssa/ssa-fre-9.c: Likewise. + * gcc.dg/memcpy-1.c: Removed param sra-max-structure-size. + * gcc.dg/tree-ssa/sra-2.c: Likewise. + * gcc.dg/tree-ssa/sra-3.c: Likewise. + * gcc.dg/tree-ssa/sra-1.c: Likewise. + * gcc.dg/tree-ssa/sra-4.c: Changed comment. + * gcc.dg/tree-ssa/sra-5.c: New file. + * gcc.dg/tree-ssa/sra-6.c: New file. + * gcc.c-torture/compile/sra-1.c: New file. + 2009-05-29 Jakub Jelinek <jakub@redhat.com> PR middle-end/40291 diff --git a/gcc/testsuite/gcc.c-torture/compile/sra-1.c b/gcc/testsuite/gcc.c-torture/compile/sra-1.c new file mode 100644 index 00000000000..06dcf1002be --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/sra-1.c @@ -0,0 +1,75 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ +/* Let gimple verifier check what SRA does to unions and single-field + strucutres . */ + +struct sim_struct +{ + int x; +}; + +extern struct sim_struct get_x(void); + +struct sim_struct foo (void) +{ + struct sim_struct simple; + + simple = get_x (); + if (simple.x % 2) + simple.x = 39; + else + simple.x -=8; + + return simple; +} + +struct sim_cmplx +{ + _Complex double c; +}; + +extern struct sim_cmplx get_sc (void); + +_Complex double foo_c (void) +{ + struct sim_cmplx simple; + + simple = get_sc (); + if (__real__ simple.c > 200.3) + __imag__ simple.c -= 2.4; + + return simple.c; +} + + +union sim_union +{ + int i; + float d; +}; + +extern union sim_union get_y (void); + +union sim_union bar (void) +{ + union sim_union simple; + + simple = get_y (); + if (simple.d > 8.2) + simple.i = 300; + + return simple; +} + +extern int get_int (void); + +int bar_i (void) +{ + union sim_union simple; + + simple = get_y (); + if (simple.d > 8.2) + simple.i = get_int (); + + return simple.i; +} diff --git a/gcc/testsuite/gcc.dg/memcpy-1.c b/gcc/testsuite/gcc.dg/memcpy-1.c index cc602423793..2b11098b286 100644 --- a/gcc/testsuite/gcc.dg/memcpy-1.c +++ b/gcc/testsuite/gcc.dg/memcpy-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-optimized --param sra-max-structure-size=32" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ /* PR36598 AVR fail maybe due to cost metrics */ /* { dg-final { scan-tree-dump-times "nasty_local" 0 "optimized" { xfail { "avr-*-*" } } } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-1.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-1.c index c2e45eb1f84..e5af2475115 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/sra-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O1 -fdump-tree-optimized --param sra-max-structure-size=32" } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ /* Tests for SRA. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-2.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-2.c index 177c4bcace4..5682b8afbcf 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/sra-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-2.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O1 -fno-tree-fre -fdump-tree-optimized --param sra-max-structure-size=32" } */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fno-tree-fre -fdump-tree-optimized" } */ /* Test for SRA. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-3.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-3.c index 661dc58ff09..d8908152384 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/sra-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O1 -fdump-tree-optimized --param sra-max-structure-size=32" } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ /* Test for SRA. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c index 6fdf37ffb34..73a68f90043 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fdump-tree-optimized -w" } */ -/* Check that SRA does non block copies for structs that just contain vectors. */ +/* Check that SRA replaces strucutres containing vectors. */ #define vector __attribute__((vector_size(16))) @@ -20,7 +20,5 @@ vector int f(vector int t1, vector int t2) return st3.t; } -/* There should be no references to st as SRA should not have done block copy. */ /* { dg-final { scan-tree-dump-times "st" 0 "optimized" } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */ - diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-5.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-5.c new file mode 100644 index 00000000000..869d2f55f95 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-5.c @@ -0,0 +1,74 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ + +/* Tests for SRA of unions. */ + + +typedef union testunion +{ + double d; + char f1; +} testunion; + +void +copyunion1 (testunion param) +{ + testunion local; + param.f1 = 0; + local = param; + if (local.f1 != 0) + link_error (); +} + +void +copyunion11 (testunion *param) +{ + testunion local; + param->f1 = 0; + local = *param; + if (local.f1 != 0) + link_error (); +} + +void +copyunion111 (testunion param) +{ + testunion *local = ¶m; + param.f1 = 0; + if (local->f1 != 0) + link_error (); +} + +testunion globuf; +void +copyunion1111 (void) +{ + testunion local; + globuf.f1 = 0; + local = globuf; + if (local.f1 != 0) + link_error (); +} + +void +copyunion11111 (void) +{ + testunion *local = &globuf; + globuf.f1 = 0; + if (local->f1 != 0) + link_error (); +} + +void +copyunion111111 (testunion param) +{ + static testunion local; + param.f1 = 0; + local = param; + if (local.f1 != 0) + link_error (); +} + +/* There should be no reference to link_error. */ +/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized"} } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-6.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-6.c new file mode 100644 index 00000000000..e59b536c12d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-6.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized -fdump-tree-esra-details" } */ + +typedef struct teststruct +{ + double d; + int i1; + char c1; + float z; + char c2; + int i2; +} teststruct; + + +void cow (int i) +{ + teststruct a, b, c, d; + + a.d = 3.2; + a.i1 = i; + + b = a; + c = b; + d = c; + + if (d.i1 != i) + link_error (); +} + + +/* Suaccesses of b and c should have been created. */ +/* { dg-final { scan-tree-dump "expr = b.d" "esra"} } */ +/* { dg-final { scan-tree-dump "expr = b.i1" "esra"} } */ +/* { dg-final { scan-tree-dump "expr = c.d" "esra"} } */ +/* { dg-final { scan-tree-dump "expr = c.i1" "esra"} } */ +/* { dg-final { cleanup-tree-dump "esra" } } */ + +/* There should be no reference to link_error. */ +/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized"} } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c index bd81831eba8..895c05fdf91 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-fre-details -fdump-tree-optimized" } */ +/* { dg-options "-O -fno-tree-sra -fdump-tree-fre-details -fdump-tree-optimized" } */ #if (__SIZEOF_INT__ == __SIZEOF_FLOAT__) typedef int intflt; #elif (__SIZEOF_LONG__ == __SIZEOF_FLOAT__) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c index 6e17bd531b3..bc9f8e3992e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-8.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-fre-details" } */ +/* { dg-options "-O -fno-tree-sra -fdump-tree-fre-details" } */ #if (__SIZEOF_INT__ == __SIZEOF_FLOAT__) typedef int intflt; #elif (__SIZEOF_LONG__ == __SIZEOF_FLOAT__) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c index 18595ed6fe5..c8a434a2bba 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fdump-tree-fre-stats" } */ +/* { dg-options "-O -fno-tree-sra -fdump-tree-fre-stats" } */ union loc { unsigned reg; diff --git a/gcc/testsuite/gfortran.dg/pr25923.f90 b/gcc/testsuite/gfortran.dg/pr25923.f90 index f075944b92b..b6979ec8896 100644 --- a/gcc/testsuite/gfortran.dg/pr25923.f90 +++ b/gcc/testsuite/gfortran.dg/pr25923.f90 @@ -10,7 +10,7 @@ implicit none contains - function baz(arg) result(res) ! { dg-warning "res.yr' may be" } + function baz(arg) result(res) ! { dg-warning "res.yr' may be" "" { xfail *-*-* } } type(bar), intent(in) :: arg type(bar) :: res logical, external:: some_func diff --git a/gcc/testsuite/gnat.dg/pack9.adb b/gcc/testsuite/gnat.dg/pack9.adb index 64d71b11cc9..232904ac1e1 100644 --- a/gcc/testsuite/gnat.dg/pack9.adb +++ b/gcc/testsuite/gnat.dg/pack9.adb @@ -1,5 +1,5 @@ -- { dg-do compile } --- { dg-options "-O2 -gnatp -cargs --param sra-max-structure-size=24 --param sra-max-structure-count=6 -fdump-tree-optimized" } +-- { dg-options "-O2 -gnatp -cargs -fdump-tree-optimized" } package body Pack9 is diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 0f689417163..825d6e80ae7 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -1,19 +1,18 @@ /* Scalar Replacement of Aggregates (SRA) converts some structure references into scalar references, exposing them to the scalar optimizers. - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 - Free Software Foundation, Inc. - Contributed by Diego Novillo <dnovillo@redhat.com> + Copyright (C) 2008, 2009 Free Software Foundation, Inc. + Contributed by Martin Jambor <mjambor@suse.cz> This file is part of GCC. -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. -GCC is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. @@ -21,3660 +20,2295 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* This file implements Scalar Reduction of Aggregates (SRA). SRA is run + twice, once in the early stages of compilation (early SRA) and once in the + late stages (late SRA). The aim of both is to turn references to scalar + parts of aggregates into uses of independent scalar variables. + + The two passes are nearly identical, the only difference is that early SRA + does not scalarize unions which are used as the result in a GIMPLE_RETURN + statement because together with inlining this can lead to weird type + conversions. + + Both passes operate in four stages: + + 1. The declarations that have properties which make them candidates for + scalarization are identified in function find_var_candidates(). The + candidates are stored in candidate_bitmap. + + 2. The function body is scanned. In the process, declarations which are + used in a manner that prevent their scalarization are removed from the + candidate bitmap. More importantly, for every access into an aggregate, + an access structure (struct access) is created by create_access() and + stored in a vector associated with the aggregate. Among other + information, the aggregate declaration, the offset and size of the access + and its type are stored in the structure. + + On a related note, assign_link structures are created for every assign + statement between candidate aggregates and attached to the related + accesses. + + 3. The vectors of accesses are analyzed. They are first sorted according to + their offset and size and then scanned for partially overlapping accesses + (i.e. those which overlap but one is not entirely within another). Such + an access disqualifies the whole aggregate from being scalarized. + + If there is no such inhibiting overlap, a representative access structure + is chosen for every unique combination of offset and size. Afterwards, + the pass builds a set of trees from these structures, in which children + of an access are within their parent (in terms of offset and size). + + Then accesses are propagated whenever possible (i.e. in cases when it + does not create a partially overlapping access) across assign_links from + the right hand side to the left hand side. + + Then the set of trees for each declaration is traversed again and those + accesses which should be replaced by a scalar are identified. + + 4. The function is traversed again, and for every reference into an + aggregate that has some component which is about to be scalarized, + statements are amended and new statements are created as necessary. + Finally, if a parameter got scalarized, the scalar replacements are + initialized with values from respective parameter aggregates. */ + #include "config.h" #include "system.h" #include "coretypes.h" +#include "alloc-pool.h" #include "tm.h" -#include "ggc.h" #include "tree.h" - -/* These RTL headers are needed for basic-block.h. */ -#include "rtl.h" -#include "tm_p.h" -#include "hard-reg-set.h" -#include "basic-block.h" -#include "diagnostic.h" -#include "langhooks.h" -#include "tree-inline.h" -#include "tree-flow.h" #include "gimple.h" +#include "tree-flow.h" +#include "diagnostic.h" #include "tree-dump.h" -#include "tree-pass.h" #include "timevar.h" -#include "flags.h" -#include "bitmap.h" -#include "obstack.h" -#include "target.h" -/* expr.h is needed for MOVE_RATIO. */ -#include "expr.h" #include "params.h" +#include "target.h" +#include "flags.h" +/* Enumeration of all aggregate reductions we can do. */ +enum sra_mode { SRA_MODE_EARLY_INTRA, /* early intraprocedural SRA */ + SRA_MODE_INTRA }; /* late intraprocedural SRA */ -/* This object of this pass is to replace a non-addressable aggregate with a - set of independent variables. Most of the time, all of these variables - will be scalars. But a secondary objective is to break up larger - aggregates into smaller aggregates. In the process we may find that some - bits of the larger aggregate can be deleted as unreferenced. - - This substitution is done globally. More localized substitutions would - be the purvey of a load-store motion pass. - - The optimization proceeds in phases: - - (1) Identify variables that have types that are candidates for - decomposition. - - (2) Scan the function looking for the ways these variables are used. - In particular we're interested in the number of times a variable - (or member) is needed as a complete unit, and the number of times - a variable (or member) is copied. - - (3) Based on the usage profile, instantiate substitution variables. - - (4) Scan the function making replacements. -*/ - +/* Global variable describing which aggregate reduction we are performing at + the moment. */ +static enum sra_mode sra_mode; -/* True if this is the "early" pass, before inlining. */ -static bool early_sra; +struct assign_link; -/* The set of aggregate variables that are candidates for scalarization. */ -static bitmap sra_candidates; +/* ACCESS represents each access to an aggregate variable (as a whole or a + part). It can also represent a group of accesses that refer to exactly the + same fragment of an aggregate (i.e. those that have exactly the same offset + and size). Such representatives for a single aggregate, once determined, + are linked in a linked list and have the group fields set. -/* Set of scalarizable PARM_DECLs that need copy-in operations at the - beginning of the function. */ -static bitmap needs_copy_in; + Moreover, when doing intraprocedural SRA, a tree is built from those + representatives (by the means of first_child and next_sibling pointers), in + which all items in a subtree are "within" the root, i.e. their offset is + greater or equal to offset of the root and offset+size is smaller or equal + to offset+size of the root. Children of an access are sorted by offset. -/* Sets of bit pairs that cache type decomposition and instantiation. */ -static bitmap sra_type_decomp_cache; -static bitmap sra_type_inst_cache; + Note that accesses to parts of vector and complex number types always + represented by an access to the whole complex number or a vector. It is a + duty of the modifying functions to replace them appropriately. */ -/* One of these structures is created for each candidate aggregate and - each (accessed) member or group of members of such an aggregate. */ -struct sra_elt +struct access { - /* A tree of the elements. Used when we want to traverse everything. */ - struct sra_elt *parent; - struct sra_elt *groups; - struct sra_elt *children; - struct sra_elt *sibling; - - /* If this element is a root, then this is the VAR_DECL. If this is - a sub-element, this is some token used to identify the reference. - In the case of COMPONENT_REF, this is the FIELD_DECL. In the case - of an ARRAY_REF, this is the (constant) index. In the case of an - ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR. In the case - of a complex number, this is a zero or one. */ - tree element; - - /* The type of the element. */ - tree type; + /* Values returned by `get_ref_base_and_extent' for each component reference + If EXPR isn't a component reference just set `BASE = EXPR', `OFFSET = 0', + `SIZE = TREE_SIZE (TREE_TYPE (expr))'. */ + HOST_WIDE_INT offset; + HOST_WIDE_INT size; + tree base; - /* A VAR_DECL, for any sub-element we've decided to replace. */ - tree replacement; + /* Expression. */ + tree expr; + /* Type. */ + tree type; - /* The number of times the element is referenced as a whole. I.e. - given "a.b.c", this would be incremented for C, but not for A or B. */ - unsigned int n_uses; + /* Next group representative for this aggregate. */ + struct access *next_grp; + + /* Pointer to the group representative. Pointer to itself if the struct is + the representative. */ + struct access *group_representative; + + /* If this access has any children (in terms of the definition above), this + points to the first one. */ + struct access *first_child; + + /* Pointer to the next sibling in the access tree as described above. */ + struct access *next_sibling; + + /* Pointers to the first and last element in the linked list of assign + links. */ + struct assign_link *first_link, *last_link; + + /* Pointer to the next access in the work queue. */ + struct access *next_queued; + + /* Replacement variable for this access "region." Never to be accessed + directly, always only by the means of get_access_replacement() and only + when grp_to_be_replaced flag is set. */ + tree replacement_decl; + + /* Is this particular access write access? */ + unsigned write : 1; + + /* Is this access currently in the work queue? */ + unsigned grp_queued : 1; + /* Does this group contain a write access? This flag is propagated down the + access tree. */ + unsigned grp_write : 1; + /* Does this group contain a read access? This flag is propagated down the + access tree. */ + unsigned grp_read : 1; + /* Is the subtree rooted in this access fully covered by scalar + replacements? */ + unsigned grp_covered : 1; + /* If set to true, this access and all below it in an access tree must not be + scalarized. */ + unsigned grp_unscalarizable_region : 1; + /* Whether data have been written to parts of the aggregate covered by this + access which is not to be scalarized. This flag is propagated up in the + access tree. */ + unsigned grp_unscalarized_data : 1; + /* Does this access and/or group contain a write access through a + BIT_FIELD_REF? */ + unsigned grp_partial_lhs : 1; + + /* Set when a scalar replacement should be created for this variable. We do + the decision and creation at different places because create_tmp_var + cannot be called from within FOR_EACH_REFERENCED_VAR. */ + unsigned grp_to_be_replaced : 1; +}; - /* The number of times the element is copied to or from another - scalarizable element. */ - unsigned int n_copies; +typedef struct access *access_p; - /* True if TYPE is scalar. */ - bool is_scalar; +DEF_VEC_P (access_p); +DEF_VEC_ALLOC_P (access_p, heap); - /* True if this element is a group of members of its parent. */ - bool is_group; +/* Alloc pool for allocating access structures. */ +static alloc_pool access_pool; - /* True if we saw something about this element that prevents scalarization, - such as non-constant indexing. */ - bool cannot_scalarize; +/* A structure linking lhs and rhs accesses from an aggregate assignment. They + are used to propagate subaccesses from rhs to lhs as long as they don't + conflict with what is already there. */ +struct assign_link +{ + struct access *lacc, *racc; + struct assign_link *next; +}; - /* True if we've decided that structure-to-structure assignment - should happen via memcpy and not per-element. */ - bool use_block_copy; +/* Alloc pool for allocating assign link structures. */ +static alloc_pool link_pool; - /* True if everything under this element has been marked TREE_NO_WARNING. */ - bool all_no_warning; +/* Base (tree) -> Vector (VEC(access_p,heap) *) map. */ +static struct pointer_map_t *base_access_vec; - /* A flag for use with/after random access traversals. */ - bool visited; +/* Bitmap of bases (candidates). */ +static bitmap candidate_bitmap; +/* Obstack for creation of fancy names. */ +static struct obstack name_obstack; - /* True if there is BIT_FIELD_REF on the lhs with a vector. */ - bool is_vector_lhs; +/* Head of a linked list of accesses that need to have its subaccesses + propagated to their assignment counterparts. */ +static struct access *work_queue_head; - /* 1 if the element is a field that is part of a block, 2 if the field - is the block itself, 0 if it's neither. */ - char in_bitfld_block; -}; +/* Dump contents of ACCESS to file F in a human friendly way. If GRP is true, + representative fields are dumped, otherwise those which only describe the + individual access are. */ -#define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR) +static void +dump_access (FILE *f, struct access *access, bool grp) +{ + fprintf (f, "access { "); + fprintf (f, "base = (%d)'", DECL_UID (access->base)); + print_generic_expr (f, access->base, 0); + fprintf (f, "', offset = " HOST_WIDE_INT_PRINT_DEC, access->offset); + fprintf (f, ", size = " HOST_WIDE_INT_PRINT_DEC, access->size); + fprintf (f, ", expr = "); + print_generic_expr (f, access->expr, 0); + fprintf (f, ", type = "); + print_generic_expr (f, access->type, 0); + if (grp) + fprintf (f, ", grp_write = %d, grp_read = %d, grp_covered = %d, " + "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, " + "grp_partial_lhs = %d, grp_to_be_replaced = %d\n", + access->grp_write, access->grp_read, access->grp_covered, + access->grp_unscalarizable_region, access->grp_unscalarized_data, + access->grp_partial_lhs, access->grp_to_be_replaced); + else + fprintf (f, ", write = %d, grp_partial_lhs = %d\n", access->write, + access->grp_partial_lhs); +} -#define FOR_EACH_ACTUAL_CHILD(CHILD, ELT) \ - for ((CHILD) = (ELT)->is_group \ - ? next_child_for_group (NULL, (ELT)) \ - : (ELT)->children; \ - (CHILD); \ - (CHILD) = (ELT)->is_group \ - ? next_child_for_group ((CHILD), (ELT)) \ - : (CHILD)->sibling) +/* Dump a subtree rooted in ACCESS to file F, indent by LEVEL. */ -/* Helper function for above macro. Return next child in group. */ -static struct sra_elt * -next_child_for_group (struct sra_elt *child, struct sra_elt *group) +static void +dump_access_tree_1 (FILE *f, struct access *access, int level) { - gcc_assert (group->is_group); - - /* Find the next child in the parent. */ - if (child) - child = child->sibling; - else - child = group->parent->children; - - /* Skip siblings that do not belong to the group. */ - while (child) + do { - tree g_elt = group->element; - if (TREE_CODE (g_elt) == RANGE_EXPR) - { - if (!tree_int_cst_lt (child->element, TREE_OPERAND (g_elt, 0)) - && !tree_int_cst_lt (TREE_OPERAND (g_elt, 1), child->element)) - break; - } - else - gcc_unreachable (); - - child = child->sibling; - } - - return child; -} + int i; -/* Random access to the child of a parent is performed by hashing. - This prevents quadratic behavior, and allows SRA to function - reasonably on larger records. */ -static htab_t sra_map; + for (i = 0; i < level; i++) + fputs ("* ", dump_file); -/* All structures are allocated out of the following obstack. */ -static struct obstack sra_obstack; + dump_access (f, access, true); -/* Debugging functions. */ -static void dump_sra_elt_name (FILE *, struct sra_elt *); -extern void debug_sra_elt_name (struct sra_elt *); + if (access->first_child) + dump_access_tree_1 (f, access->first_child, level + 1); -/* Forward declarations. */ -static tree generate_element_ref (struct sra_elt *); -static gimple_seq sra_build_assignment (tree dst, tree src); -static void mark_all_v_defs_seq (gimple_seq); + access = access->next_sibling; + } + while (access); +} - -/* Return true if DECL is an SRA candidate. */ +/* Dump all access trees for a variable, given the pointer to the first root in + ACCESS. */ -static bool -is_sra_candidate_decl (tree decl) +static void +dump_access_tree (FILE *f, struct access *access) { - return DECL_P (decl) && bitmap_bit_p (sra_candidates, DECL_UID (decl)); + for (; access; access = access->next_grp) + dump_access_tree_1 (f, access, 0); } -/* Return true if TYPE is a scalar type. */ +/* Return true iff ACC is non-NULL and has subaccesses. */ -static bool -is_sra_scalar_type (tree type) +static inline bool +access_has_children_p (struct access *acc) { - enum tree_code code = TREE_CODE (type); - return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE - || code == FIXED_POINT_TYPE - || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE - || code == POINTER_TYPE || code == OFFSET_TYPE - || code == REFERENCE_TYPE); + return acc && acc->first_child; } -/* Return true if TYPE can be decomposed into a set of independent variables. - - Note that this doesn't imply that all elements of TYPE can be - instantiated, just that if we decide to break up the type into - separate pieces that it can be done. */ +/* Return a vector of pointers to accesses for the variable given in BASE or + NULL if there is none. */ -static bool -sra_type_can_be_decomposed_p (tree type) +static VEC (access_p, heap) * +get_base_access_vector (tree base) { - unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; - tree t; + void **slot; - /* Avoid searching the same type twice. */ - if (bitmap_bit_p (sra_type_decomp_cache, cache+0)) - return true; - if (bitmap_bit_p (sra_type_decomp_cache, cache+1)) - return false; + slot = pointer_map_contains (base_access_vec, base); + if (!slot) + return NULL; + else + return *(VEC (access_p, heap) **) slot; +} - /* The type must have a definite nonzero size. */ - if (TYPE_SIZE (type) == NULL || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST - || integer_zerop (TYPE_SIZE (type))) - goto fail; +/* Find an access with required OFFSET and SIZE in a subtree of accesses rooted + in ACCESS. Return NULL if it cannot be found. */ - /* The type must be a non-union aggregate. */ - switch (TREE_CODE (type)) +static struct access * +find_access_in_subtree (struct access *access, HOST_WIDE_INT offset, + HOST_WIDE_INT size) +{ + while (access && (access->offset != offset || access->size != size)) { - case RECORD_TYPE: - { - bool saw_one_field = false; + struct access *child = access->first_child; - for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t)) - if (TREE_CODE (t) == FIELD_DECL) - { - /* Reject incorrectly represented bit fields. */ - if (DECL_BIT_FIELD (t) - && INTEGRAL_TYPE_P (TREE_TYPE (t)) - && (tree_low_cst (DECL_SIZE (t), 1) - != TYPE_PRECISION (TREE_TYPE (t)))) - goto fail; - - /* And volatile fields. */ - if (TREE_THIS_VOLATILE (t)) - goto fail; - - saw_one_field = true; - } - - /* Record types must have at least one field. */ - if (!saw_one_field) - goto fail; - } - break; + while (child && (child->offset + child->size <= offset)) + child = child->next_sibling; + access = child; + } - case ARRAY_TYPE: - /* Array types must have a fixed lower and upper bound. */ - t = TYPE_DOMAIN (type); - if (t == NULL) - goto fail; - if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t))) - goto fail; - if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t))) - goto fail; - break; + return access; +} - case COMPLEX_TYPE: - break; +/* Return the first group representative for DECL or NULL if none exists. */ - default: - goto fail; - } +static struct access * +get_first_repr_for_decl (tree base) +{ + VEC (access_p, heap) *access_vec; - bitmap_set_bit (sra_type_decomp_cache, cache+0); - return true; + access_vec = get_base_access_vector (base); + if (!access_vec) + return NULL; - fail: - bitmap_set_bit (sra_type_decomp_cache, cache+1); - return false; + return VEC_index (access_p, access_vec, 0); } -/* Returns true if the TYPE is one of the available va_list types. - Otherwise it returns false. - Note, that for multiple calling conventions there can be more - than just one va_list type present. */ +/* Find an access representative for the variable BASE and given OFFSET and + SIZE. Requires that access trees have already been built. Return NULL if + it cannot be found. */ -static bool -is_va_list_type (tree type) +static struct access * +get_var_base_offset_size_access (tree base, HOST_WIDE_INT offset, + HOST_WIDE_INT size) { - tree h; + struct access *access; - if (type == NULL_TREE) - return false; - h = targetm.canonical_va_list_type (type); - if (h == NULL_TREE) - return false; - if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (h)) - return true; - return false; -} + access = get_first_repr_for_decl (base); + while (access && (access->offset + access->size <= offset)) + access = access->next_grp; + if (!access) + return NULL; -/* Return true if DECL can be decomposed into a set of independent - (though not necessarily scalar) variables. */ + return find_access_in_subtree (access, offset, size); +} -static bool -decl_can_be_decomposed_p (tree var) +/* Add LINK to the linked list of assign links of RACC. */ +static void +add_link_to_rhs (struct access *racc, struct assign_link *link) { - /* Early out for scalars. */ - if (is_sra_scalar_type (TREE_TYPE (var))) - return false; + gcc_assert (link->racc == racc); - /* The variable must not be aliased. */ - if (!is_gimple_non_addressable (var)) + if (!racc->first_link) { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, " because it must live in memory\n"); - } - return false; + gcc_assert (!racc->last_link); + racc->first_link = link; } + else + racc->last_link->next = link; + + racc->last_link = link; + link->next = NULL; +} - /* The variable must not be volatile. */ - if (TREE_THIS_VOLATILE (var)) +/* Move all link structures in their linked list in OLD_RACC to the linked list + in NEW_RACC. */ +static void +relink_to_new_repr (struct access *new_racc, struct access *old_racc) +{ + if (!old_racc->first_link) { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, " because it is declared volatile\n"); - } - return false; + gcc_assert (!old_racc->last_link); + return; } - /* We must be able to decompose the variable's type. */ - if (!sra_type_can_be_decomposed_p (TREE_TYPE (var))) + if (new_racc->first_link) { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, " because its type cannot be decomposed\n"); - } - return false; - } + gcc_assert (!new_racc->last_link->next); + gcc_assert (!old_racc->last_link || !old_racc->last_link->next); - /* HACK: if we decompose a va_list_type_node before inlining, then we'll - confuse tree-stdarg.c, and we won't be able to figure out which and - how many arguments are accessed. This really should be improved in - tree-stdarg.c, as the decomposition is truly a win. This could also - be fixed if the stdarg pass ran early, but this can't be done until - we've aliasing information early too. See PR 30791. */ - if (early_sra && is_va_list_type (TREE_TYPE (var))) - return false; + new_racc->last_link->next = old_racc->first_link; + new_racc->last_link = old_racc->last_link; + } + else + { + gcc_assert (!new_racc->last_link); - return true; + new_racc->first_link = old_racc->first_link; + new_racc->last_link = old_racc->last_link; + } + old_racc->first_link = old_racc->last_link = NULL; } -/* Return true if TYPE can be *completely* decomposed into scalars. */ +/* Add ACCESS to the work queue (which is actually a stack). */ -static bool -type_can_instantiate_all_elements (tree type) +static void +add_access_to_work_queue (struct access *access) { - if (is_sra_scalar_type (type)) - return true; - if (!sra_type_can_be_decomposed_p (type)) - return false; - - switch (TREE_CODE (type)) + if (!access->grp_queued) { - case RECORD_TYPE: - { - unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; - tree f; + gcc_assert (!access->next_queued); + access->next_queued = work_queue_head; + access->grp_queued = 1; + work_queue_head = access; + } +} - if (bitmap_bit_p (sra_type_inst_cache, cache+0)) - return true; - if (bitmap_bit_p (sra_type_inst_cache, cache+1)) - return false; +/* Pop an access from the work queue, and return it, assuming there is one. */ - for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) - if (TREE_CODE (f) == FIELD_DECL) - { - if (!type_can_instantiate_all_elements (TREE_TYPE (f))) - { - bitmap_set_bit (sra_type_inst_cache, cache+1); - return false; - } - } +static struct access * +pop_access_from_work_queue (void) +{ + struct access *access = work_queue_head; - bitmap_set_bit (sra_type_inst_cache, cache+0); - return true; - } + work_queue_head = access->next_queued; + access->next_queued = NULL; + access->grp_queued = 0; + return access; +} - case ARRAY_TYPE: - return type_can_instantiate_all_elements (TREE_TYPE (type)); - case COMPLEX_TYPE: - return true; +/* Allocate necessary structures. */ - default: - gcc_unreachable (); - } +static void +sra_initialize (void) +{ + candidate_bitmap = BITMAP_ALLOC (NULL); + gcc_obstack_init (&name_obstack); + access_pool = create_alloc_pool ("SRA accesses", sizeof (struct access), 16); + link_pool = create_alloc_pool ("SRA links", sizeof (struct assign_link), 16); + base_access_vec = pointer_map_create (); } -/* Test whether ELT or some sub-element cannot be scalarized. */ +/* Hook fed to pointer_map_traverse, deallocate stored vectors. */ static bool -can_completely_scalarize_p (struct sra_elt *elt) +delete_base_accesses (const void *key ATTRIBUTE_UNUSED, void **value, + void *data ATTRIBUTE_UNUSED) { - struct sra_elt *c; - - if (elt->cannot_scalarize) - return false; - - for (c = elt->children; c; c = c->sibling) - if (!can_completely_scalarize_p (c)) - return false; - - for (c = elt->groups; c; c = c->sibling) - if (!can_completely_scalarize_p (c)) - return false; + VEC (access_p, heap) *access_vec; + access_vec = (VEC (access_p, heap) *) *value; + VEC_free (access_p, heap, access_vec); return true; } - -/* A simplified tree hashing algorithm that only handles the types of - trees we expect to find in sra_elt->element. */ +/* Deallocate all general structures. */ -static hashval_t -sra_hash_tree (tree t) +static void +sra_deinitialize (void) { - hashval_t h; - - switch (TREE_CODE (t)) - { - case VAR_DECL: - case PARM_DECL: - case RESULT_DECL: - h = DECL_UID (t); - break; - - case INTEGER_CST: - h = TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t); - break; - - case RANGE_EXPR: - h = iterative_hash_expr (TREE_OPERAND (t, 0), 0); - h = iterative_hash_expr (TREE_OPERAND (t, 1), h); - break; + BITMAP_FREE (candidate_bitmap); + free_alloc_pool (access_pool); + free_alloc_pool (link_pool); + obstack_free (&name_obstack, NULL); - case FIELD_DECL: - /* We can have types that are compatible, but have different member - lists, so we can't hash fields by ID. Use offsets instead. */ - h = iterative_hash_expr (DECL_FIELD_OFFSET (t), 0); - h = iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t), h); - break; + pointer_map_traverse (base_access_vec, delete_base_accesses, NULL); + pointer_map_destroy (base_access_vec); +} - case BIT_FIELD_REF: - /* Don't take operand 0 into account, that's our parent. */ - h = iterative_hash_expr (TREE_OPERAND (t, 1), 0); - h = iterative_hash_expr (TREE_OPERAND (t, 2), h); - break; +/* Remove DECL from candidates for SRA and write REASON to the dump file if + there is one. */ +static void +disqualify_candidate (tree decl, const char *reason) +{ + bitmap_clear_bit (candidate_bitmap, DECL_UID (decl)); - default: - gcc_unreachable (); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "! Disqualifying "); + print_generic_expr (dump_file, decl, 0); + fprintf (dump_file, " - %s\n", reason); } - - return h; } -/* Hash function for type SRA_PAIR. */ +/* Return true iff the type contains a field or an element which does not allow + scalarization. */ -static hashval_t -sra_elt_hash (const void *x) +static bool +type_internals_preclude_sra_p (tree type) { - const struct sra_elt *const e = (const struct sra_elt *) x; - const struct sra_elt *p; - hashval_t h; - - h = sra_hash_tree (e->element); - - /* Take into account everything except bitfield blocks back up the - chain. Given that chain lengths are rarely very long, this - should be acceptable. If we truly identify this as a performance - problem, it should work to hash the pointer value - "e->parent". */ - for (p = e->parent; p ; p = p->parent) - if (!p->in_bitfld_block) - h = (h * 65521) ^ sra_hash_tree (p->element); - - return h; -} + tree fld; + tree et; -/* Equality function for type SRA_PAIR. */ - -static int -sra_elt_eq (const void *x, const void *y) -{ - const struct sra_elt *const a = (const struct sra_elt *) x; - const struct sra_elt *const b = (const struct sra_elt *) y; - tree ae, be; - const struct sra_elt *ap = a->parent; - const struct sra_elt *bp = b->parent; - - if (ap) - while (ap->in_bitfld_block) - ap = ap->parent; - if (bp) - while (bp->in_bitfld_block) - bp = bp->parent; - - if (ap != bp) - return false; + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + for (fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld)) + if (TREE_CODE (fld) == FIELD_DECL) + { + tree ft = TREE_TYPE (fld); - ae = a->element; - be = b->element; + if (TREE_THIS_VOLATILE (fld) + || !DECL_FIELD_OFFSET (fld) || !DECL_SIZE (fld) + || !host_integerp (DECL_FIELD_OFFSET (fld), 1) + || !host_integerp (DECL_SIZE (fld), 1)) + return true; - if (ae == be) - return true; - if (TREE_CODE (ae) != TREE_CODE (be)) - return false; + if (AGGREGATE_TYPE_P (ft) + && type_internals_preclude_sra_p (ft)) + return true; + } - switch (TREE_CODE (ae)) - { - case VAR_DECL: - case PARM_DECL: - case RESULT_DECL: - /* These are all pointer unique. */ return false; - case INTEGER_CST: - /* Integers are not pointer unique, so compare their values. */ - return tree_int_cst_equal (ae, be); - - case RANGE_EXPR: - return - tree_int_cst_equal (TREE_OPERAND (ae, 0), TREE_OPERAND (be, 0)) - && tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1)); + case ARRAY_TYPE: + et = TREE_TYPE (type); - case FIELD_DECL: - /* Fields are unique within a record, but not between - compatible records. */ - if (DECL_FIELD_CONTEXT (ae) == DECL_FIELD_CONTEXT (be)) + if (AGGREGATE_TYPE_P (et)) + return type_internals_preclude_sra_p (et); + else return false; - return fields_compatible_p (ae, be); - - case BIT_FIELD_REF: - return - tree_int_cst_equal (TREE_OPERAND (ae, 1), TREE_OPERAND (be, 1)) - && tree_int_cst_equal (TREE_OPERAND (ae, 2), TREE_OPERAND (be, 2)); default: - gcc_unreachable (); + return false; } } -/* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT - may be null, in which case CHILD must be a DECL. */ +/* Create and insert access for EXPR. Return created access, or NULL if it is + not possible. */ -static struct sra_elt * -lookup_element (struct sra_elt *parent, tree child, tree type, - enum insert_option insert) +static struct access * +create_access (tree expr, bool write) { - struct sra_elt dummy; - struct sra_elt **slot; - struct sra_elt *elt; + struct access *access; + void **slot; + VEC (access_p,heap) *vec; + HOST_WIDE_INT offset, size, max_size; + tree base = expr; + bool unscalarizable_region = false; - if (parent) - dummy.parent = parent->is_group ? parent->parent : parent; - else - dummy.parent = NULL; - dummy.element = child; + base = get_ref_base_and_extent (expr, &offset, &size, &max_size); - slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert); - if (!slot && insert == NO_INSERT) + if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID (base))) return NULL; - elt = *slot; - if (!elt && insert == INSERT) + if (size != max_size) { - *slot = elt = XOBNEW (&sra_obstack, struct sra_elt); - memset (elt, 0, sizeof (*elt)); - - elt->parent = parent; - elt->element = child; - elt->type = type; - elt->is_scalar = is_sra_scalar_type (type); - - if (parent) - { - if (IS_ELEMENT_FOR_GROUP (elt->element)) - { - elt->is_group = true; - elt->sibling = parent->groups; - parent->groups = elt; - } - else - { - elt->sibling = parent->children; - parent->children = elt; - } - } - - /* If this is a parameter, then if we want to scalarize, we have - one copy from the true function parameter. Count it now. */ - if (TREE_CODE (child) == PARM_DECL) - { - elt->n_copies = 1; - bitmap_set_bit (needs_copy_in, DECL_UID (child)); - } + size = max_size; + unscalarizable_region = true; } - return elt; -} - -/* Create or return the SRA_ELT structure for EXPR if the expression - refers to a scalarizable variable. */ - -static struct sra_elt * -maybe_lookup_element_for_expr (tree expr) -{ - struct sra_elt *elt; - tree child; - - switch (TREE_CODE (expr)) + if (size < 0) { - case VAR_DECL: - case PARM_DECL: - case RESULT_DECL: - if (is_sra_candidate_decl (expr)) - return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT); - return NULL; - - case ARRAY_REF: - /* We can't scalarize variable array indices. */ - if (in_array_bounds_p (expr)) - child = TREE_OPERAND (expr, 1); - else - return NULL; - break; - - case ARRAY_RANGE_REF: - /* We can't scalarize variable array indices. */ - if (range_in_array_bounds_p (expr)) - { - tree domain = TYPE_DOMAIN (TREE_TYPE (expr)); - child = build2 (RANGE_EXPR, integer_type_node, - TYPE_MIN_VALUE (domain), TYPE_MAX_VALUE (domain)); - } - else - return NULL; - break; - - case COMPONENT_REF: - { - tree type = TREE_TYPE (TREE_OPERAND (expr, 0)); - /* Don't look through unions. */ - if (TREE_CODE (type) != RECORD_TYPE) - return NULL; - /* Neither through variable-sized records. */ - if (TYPE_SIZE (type) == NULL_TREE - || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) - return NULL; - child = TREE_OPERAND (expr, 1); - } - break; - - case REALPART_EXPR: - child = integer_zero_node; - break; - case IMAGPART_EXPR: - child = integer_one_node; - break; - - default: + disqualify_candidate (base, "Encountered an unconstrained access."); return NULL; } - elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0)); - if (elt) - return lookup_element (elt, child, TREE_TYPE (expr), INSERT); - return NULL; -} + access = (struct access *) pool_alloc (access_pool); + memset (access, 0, sizeof (struct access)); - -/* Functions to walk just enough of the tree to see all scalarizable - references, and categorize them. */ + access->base = base; + access->offset = offset; + access->size = size; + access->expr = expr; + access->type = TREE_TYPE (expr); + access->write = write; + access->grp_unscalarizable_region = unscalarizable_region; -/* A set of callbacks for phases 2 and 4. They'll be invoked for the - various kinds of references seen. In all cases, *GSI is an iterator - pointing to the statement being processed. */ -struct sra_walk_fns -{ - /* Invoked when ELT is required as a unit. Note that ELT might refer to - a leaf node, in which case this is a simple scalar reference. *EXPR_P - points to the location of the expression. IS_OUTPUT is true if this - is a left-hand-side reference. USE_ALL is true if we saw something we - couldn't quite identify and had to force the use of the entire object. */ - void (*use) (struct sra_elt *elt, tree *expr_p, - gimple_stmt_iterator *gsi, bool is_output, bool use_all); - - /* Invoked when we have a copy between two scalarizable references. */ - void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, - gimple_stmt_iterator *gsi); - - /* Invoked when ELT is initialized from a constant. VALUE may be NULL, - in which case it should be treated as an empty CONSTRUCTOR. */ - void (*init) (struct sra_elt *elt, tree value, gimple_stmt_iterator *gsi); - - /* Invoked when we have a copy between one scalarizable reference ELT - and one non-scalarizable reference OTHER without side-effects. - IS_OUTPUT is true if ELT is on the left-hand side. */ - void (*ldst) (struct sra_elt *elt, tree other, - gimple_stmt_iterator *gsi, bool is_output); - - /* True during phase 2, false during phase 4. */ - /* ??? This is a hack. */ - bool initial_scan; -}; - -#ifdef ENABLE_CHECKING -/* Invoked via walk_tree, if *TP contains a candidate decl, return it. */ + slot = pointer_map_contains (base_access_vec, base); + if (slot) + vec = (VEC (access_p, heap) *) *slot; + else + vec = VEC_alloc (access_p, heap, 32); -static tree -sra_find_candidate_decl (tree *tp, int *walk_subtrees, - void *data ATTRIBUTE_UNUSED) -{ - tree t = *tp; - enum tree_code code = TREE_CODE (t); + VEC_safe_push (access_p, heap, vec, access); - if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL) - { - *walk_subtrees = 0; - if (is_sra_candidate_decl (t)) - return t; - } - else if (TYPE_P (t)) - *walk_subtrees = 0; + *((struct VEC (access_p,heap) **) + pointer_map_insert (base_access_vec, base)) = vec; - return NULL; + return access; } -#endif - -/* Walk most expressions looking for a scalarizable aggregate. - If we find one, invoke FNS->USE. */ - -static void -sra_walk_expr (tree *expr_p, gimple_stmt_iterator *gsi, bool is_output, - const struct sra_walk_fns *fns) -{ - tree expr = *expr_p; - tree inner = expr; - bool disable_scalarization = false; - bool use_all_p = false; - - /* We're looking to collect a reference expression between EXPR and INNER, - such that INNER is a scalarizable decl and all other nodes through EXPR - are references that we can scalarize. If we come across something that - we can't scalarize, we reset EXPR. This has the effect of making it - appear that we're referring to the larger expression as a whole. */ - - while (1) - switch (TREE_CODE (inner)) - { - case VAR_DECL: - case PARM_DECL: - case RESULT_DECL: - /* If there is a scalarizable decl at the bottom, then process it. */ - if (is_sra_candidate_decl (inner)) - { - struct sra_elt *elt = maybe_lookup_element_for_expr (expr); - if (disable_scalarization) - elt->cannot_scalarize = true; - else - fns->use (elt, expr_p, gsi, is_output, use_all_p); - } - return; - - case ARRAY_REF: - /* Non-constant index means any member may be accessed. Prevent the - expression from being scalarized. If we were to treat this as a - reference to the whole array, we can wind up with a single dynamic - index reference inside a loop being overridden by several constant - index references during loop setup. It's possible that this could - be avoided by using dynamic usage counts based on BB trip counts - (based on loop analysis or profiling), but that hardly seems worth - the effort. */ - /* ??? Hack. Figure out how to push this into the scan routines - without duplicating too much code. */ - if (!in_array_bounds_p (inner)) - { - disable_scalarization = true; - goto use_all; - } - /* ??? Are we assured that non-constant bounds and stride will have - the same value everywhere? I don't think Fortran will... */ - if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) - goto use_all; - inner = TREE_OPERAND (inner, 0); - break; - - case ARRAY_RANGE_REF: - if (!range_in_array_bounds_p (inner)) - { - disable_scalarization = true; - goto use_all; - } - /* ??? See above non-constant bounds and stride . */ - if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) - goto use_all; - inner = TREE_OPERAND (inner, 0); - break; - case COMPONENT_REF: - { - tree type = TREE_TYPE (TREE_OPERAND (inner, 0)); - /* Don't look through unions. */ - if (TREE_CODE (type) != RECORD_TYPE) - goto use_all; - /* Neither through variable-sized records. */ - if (TYPE_SIZE (type) == NULL_TREE - || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) - goto use_all; - inner = TREE_OPERAND (inner, 0); - } - break; - case REALPART_EXPR: - case IMAGPART_EXPR: - inner = TREE_OPERAND (inner, 0); - break; - - case BIT_FIELD_REF: - /* A bit field reference to a specific vector is scalarized but for - ones for inputs need to be marked as used on the left hand size so - when we scalarize it, we can mark that variable as non renamable. */ - if (is_output - && TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) == VECTOR_TYPE) - { - struct sra_elt *elt - = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0)); - if (elt) - elt->is_vector_lhs = true; - } - - /* A bit field reference (access to *multiple* fields simultaneously) - is not currently scalarized. Consider this an access to the full - outer element, to which walk_tree will bring us next. */ - goto use_all; - - CASE_CONVERT: - /* Similarly, a nop explicitly wants to look at an object in a - type other than the one we've scalarized. */ - goto use_all; - - case VIEW_CONVERT_EXPR: - /* Likewise for a view conversion, but with an additional twist: - it can be on the LHS and, in this case, an access to the full - outer element would mean a killing def. So we need to punt - if we haven't already a full access to the current element, - because we cannot pretend to have a killing def if we only - have a partial access at some level. */ - if (is_output && !use_all_p && inner != expr) - disable_scalarization = true; - goto use_all; - - case WITH_SIZE_EXPR: - /* This is a transparent wrapper. The entire inner expression really - is being used. */ - goto use_all; - - use_all: - expr_p = &TREE_OPERAND (inner, 0); - inner = expr = *expr_p; - use_all_p = true; - break; - - default: -#ifdef ENABLE_CHECKING - /* Validate that we're not missing any references. */ - gcc_assert (!walk_tree (&inner, sra_find_candidate_decl, NULL, NULL)); -#endif - return; - } -} - -/* Walk the arguments of a GIMPLE_CALL looking for scalarizable aggregates. - If we find one, invoke FNS->USE. */ +/* Search the given tree for a declaration by skipping handled components and + exclude it from the candidates. */ static void -sra_walk_gimple_call (gimple stmt, gimple_stmt_iterator *gsi, - const struct sra_walk_fns *fns) +disqualify_base_of_expr (tree t, const char *reason) { - int i; - int nargs = gimple_call_num_args (stmt); + while (handled_component_p (t)) + t = TREE_OPERAND (t, 0); - for (i = 0; i < nargs; i++) - sra_walk_expr (gimple_call_arg_ptr (stmt, i), gsi, false, fns); - - if (gimple_call_lhs (stmt)) - sra_walk_expr (gimple_call_lhs_ptr (stmt), gsi, true, fns); -} - -/* Walk the inputs and outputs of a GIMPLE_ASM looking for scalarizable - aggregates. If we find one, invoke FNS->USE. */ - -static void -sra_walk_gimple_asm (gimple stmt, gimple_stmt_iterator *gsi, - const struct sra_walk_fns *fns) -{ - size_t i; - for (i = 0; i < gimple_asm_ninputs (stmt); i++) - sra_walk_expr (&TREE_VALUE (gimple_asm_input_op (stmt, i)), gsi, false, fns); - for (i = 0; i < gimple_asm_noutputs (stmt); i++) - sra_walk_expr (&TREE_VALUE (gimple_asm_output_op (stmt, i)), gsi, true, fns); + if (DECL_P (t)) + disqualify_candidate (t, reason); } -/* Walk a GIMPLE_ASSIGN and categorize the assignment appropriately. */ +/* Scan expression EXPR and create access structures for all accesses to + candidates for scalarization. Return the created access or NULL if none is + created. */ -static void -sra_walk_gimple_assign (gimple stmt, gimple_stmt_iterator *gsi, - const struct sra_walk_fns *fns) +static struct access * +build_access_from_expr_1 (tree *expr_ptr, bool write) { - struct sra_elt *lhs_elt = NULL, *rhs_elt = NULL; - tree lhs, rhs; + struct access *ret = NULL; + tree expr = *expr_ptr; + bool partial_ref; - /* If there is more than 1 element on the RHS, only walk the lhs. */ - if (!gimple_assign_single_p (stmt)) + if (TREE_CODE (expr) == BIT_FIELD_REF + || TREE_CODE (expr) == IMAGPART_EXPR + || TREE_CODE (expr) == REALPART_EXPR) { - sra_walk_expr (gimple_assign_lhs_ptr (stmt), gsi, true, fns); - return; + expr = TREE_OPERAND (expr, 0); + partial_ref = true; } + else + partial_ref = false; - lhs = gimple_assign_lhs (stmt); - rhs = gimple_assign_rhs1 (stmt); - lhs_elt = maybe_lookup_element_for_expr (lhs); - rhs_elt = maybe_lookup_element_for_expr (rhs); + /* We need to dive through V_C_Es in order to get the size of its parameter + and not the result type. Ada produces such statements. We are also + capable of handling the topmost V_C_E but not any of those buried in other + handled components. */ + if (TREE_CODE (expr) == VIEW_CONVERT_EXPR) + expr = TREE_OPERAND (expr, 0); - /* If both sides are scalarizable, this is a COPY operation. */ - if (lhs_elt && rhs_elt) + if (contains_view_convert_expr_p (expr)) { - fns->copy (lhs_elt, rhs_elt, gsi); - return; + disqualify_base_of_expr (expr, "V_C_E under a different handled " + "component."); + return NULL; } - /* If the RHS is scalarizable, handle it. There are only two cases. */ - if (rhs_elt) + switch (TREE_CODE (expr)) { - if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs)) - fns->ldst (rhs_elt, lhs, gsi, false); - else - fns->use (rhs_elt, gimple_assign_rhs1_ptr (stmt), gsi, false, false); - } - - /* If it isn't scalarizable, there may be scalarizable variables within, so - check for a call or else walk the RHS to see if we need to do any - copy-in operations. We need to do it before the LHS is scalarized so - that the statements get inserted in the proper place, before any - copy-out operations. */ - else - sra_walk_expr (gimple_assign_rhs1_ptr (stmt), gsi, false, fns); + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + case COMPONENT_REF: + case ARRAY_REF: + case ARRAY_RANGE_REF: + ret = create_access (expr, write); + break; - /* Likewise, handle the LHS being scalarizable. We have cases similar - to those above, but also want to handle RHS being constant. */ - if (lhs_elt) - { - /* If this is an assignment from a constant, or constructor, then - we have access to all of the elements individually. Invoke INIT. */ - if (TREE_CODE (rhs) == COMPLEX_EXPR - || TREE_CODE (rhs) == COMPLEX_CST - || TREE_CODE (rhs) == CONSTRUCTOR) - fns->init (lhs_elt, rhs, gsi); - - /* If this is an assignment from read-only memory, treat this as if - we'd been passed the constructor directly. Invoke INIT. */ - else if (TREE_CODE (rhs) == VAR_DECL - && TREE_STATIC (rhs) - && !DECL_EXTERNAL (rhs) - && TREE_READONLY (rhs) - && targetm.binds_local_p (rhs)) - fns->init (lhs_elt, DECL_INITIAL (rhs), gsi); - - /* If this is a copy from a non-scalarizable lvalue, invoke LDST. - The lvalue requirement prevents us from trying to directly scalarize - the result of a function call. Which would result in trying to call - the function multiple times, and other evil things. */ - else if (!lhs_elt->is_scalar - && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs)) - fns->ldst (lhs_elt, rhs, gsi, true); - - /* Otherwise we're being used in some context that requires the - aggregate to be seen as a whole. Invoke USE. */ - else - fns->use (lhs_elt, gimple_assign_lhs_ptr (stmt), gsi, true, false); + default: + break; } - /* Similarly to above, LHS_ELT being null only means that the LHS as a - whole is not a scalarizable reference. There may be occurrences of - scalarizable variables within, which implies a USE. */ - else - sra_walk_expr (gimple_assign_lhs_ptr (stmt), gsi, true, fns); + if (write && partial_ref && ret) + ret->grp_partial_lhs = 1; + + return ret; } -/* Entry point to the walk functions. Search the entire function, - invoking the callbacks in FNS on each of the references to - scalarizable variables. */ +/* Callback of scan_function. Scan expression EXPR and create access + structures for all accesses to candidates for scalarization. Return true if + any access has been inserted. */ -static void -sra_walk_function (const struct sra_walk_fns *fns) +static bool +build_access_from_expr (tree *expr_ptr, + gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, bool write, + void *data ATTRIBUTE_UNUSED) { - basic_block bb; - gimple_stmt_iterator si, ni; - - /* ??? Phase 4 could derive some benefit to walking the function in - dominator tree order. */ - - FOR_EACH_BB (bb) - for (si = gsi_start_bb (bb); !gsi_end_p (si); si = ni) - { - gimple stmt; - - stmt = gsi_stmt (si); - - ni = si; - gsi_next (&ni); - - /* If the statement does not reference memory, then it doesn't - make any structure references that we care about. */ - if (!gimple_references_memory_p (stmt)) - continue; - - switch (gimple_code (stmt)) - { - case GIMPLE_RETURN: - /* If we have "return <retval>" then the return value is - already exposed for our pleasure. Walk it as a USE to - force all the components back in place for the return. - */ - if (gimple_return_retval (stmt) == NULL_TREE) - ; - else - sra_walk_expr (gimple_return_retval_ptr (stmt), &si, false, - fns); - break; - - case GIMPLE_ASSIGN: - sra_walk_gimple_assign (stmt, &si, fns); - break; - case GIMPLE_CALL: - sra_walk_gimple_call (stmt, &si, fns); - break; - case GIMPLE_ASM: - sra_walk_gimple_asm (stmt, &si, fns); - break; - - default: - break; - } - } + return build_access_from_expr_1 (expr_ptr, write) != NULL; } - -/* Phase One: Scan all referenced variables in the program looking for - structures that could be decomposed. */ +/* Disqualify LHS and RHS for scalarization if STMT must end its basic block in + modes in which it matters, return true iff they have been disqualified. RHS + may be NULL, in that case ignore it. If we scalarize an aggregate in + intra-SRA we may need to add statements after each statement. This is not + possible if a statement unconditionally has to end the basic block. */ static bool -find_candidates_for_sra (void) +disqualify_ops_if_throwing_stmt (gimple stmt, tree lhs, tree rhs) { - bool any_set = false; - tree var; - referenced_var_iterator rvi; - - FOR_EACH_REFERENCED_VAR (var, rvi) + if (stmt_can_throw_internal (stmt) || stmt_ends_bb_p (stmt)) { - if (decl_can_be_decomposed_p (var)) - { - bitmap_set_bit (sra_candidates, DECL_UID (var)); - any_set = true; - } + disqualify_base_of_expr (lhs, "LHS of a throwing stmt."); + if (rhs) + disqualify_base_of_expr (rhs, "RHS of a throwing stmt."); + return true; } - - return any_set; + return false; } - -/* Phase Two: Scan all references to scalarizable variables. Count the - number of times they are used or copied respectively. */ -/* Callbacks to fill in SRA_WALK_FNS. Everything but USE is - considered a copy, because we can decompose the reference such that - the sub-elements needn't be contiguous. */ +/* Result code for scan_assign callback for scan_function. */ +enum scan_assign_result { SRA_SA_NONE, /* nothing done for the stmt */ + SRA_SA_PROCESSED, /* stmt analyzed/changed */ + SRA_SA_REMOVED }; /* stmt redundant and eliminated */ -static void -scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED, - gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, - bool is_output ATTRIBUTE_UNUSED, bool use_all ATTRIBUTE_UNUSED) -{ - elt->n_uses += 1; -} -static void -scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, - gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) -{ - lhs_elt->n_copies += 1; - rhs_elt->n_copies += 1; -} - -static void -scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED, - gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) -{ - lhs_elt->n_copies += 1; -} +/* Callback of scan_function. Scan expressions occuring in the statement + pointed to by STMT_EXPR, create access structures for all accesses to + candidates for scalarization and remove those candidates which occur in + statements or expressions that prevent them from being split apart. Return + true if any access has been inserted. */ -static void -scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED, - gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, - bool is_output ATTRIBUTE_UNUSED) +static enum scan_assign_result +build_accesses_from_assign (gimple *stmt_ptr, + gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) { - elt->n_copies += 1; -} + gimple stmt = *stmt_ptr; + tree *lhs_ptr, *rhs_ptr; + struct access *lacc, *racc; -/* Dump the values we collected during the scanning phase. */ - -static void -scan_dump (struct sra_elt *elt) -{ - struct sra_elt *c; - - dump_sra_elt_name (dump_file, elt); - fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies); - - for (c = elt->children; c ; c = c->sibling) - scan_dump (c); - - for (c = elt->groups; c ; c = c->sibling) - scan_dump (c); -} + if (!gimple_assign_single_p (stmt)) + return SRA_SA_NONE; -/* Entry point to phase 2. Scan the entire function, building up - scalarization data structures, recording copies and uses. */ + lhs_ptr = gimple_assign_lhs_ptr (stmt); + rhs_ptr = gimple_assign_rhs1_ptr (stmt); -static void -scan_function (void) -{ - static const struct sra_walk_fns fns = { - scan_use, scan_copy, scan_init, scan_ldst, true - }; - bitmap_iterator bi; + if (disqualify_ops_if_throwing_stmt (stmt, *lhs_ptr, *rhs_ptr)) + return SRA_SA_NONE; - sra_walk_function (&fns); + racc = build_access_from_expr_1 (rhs_ptr, false); + lacc = build_access_from_expr_1 (lhs_ptr, true); - if (dump_file && (dump_flags & TDF_DETAILS)) + if (lacc && racc + && !lacc->grp_unscalarizable_region + && !racc->grp_unscalarizable_region + && AGGREGATE_TYPE_P (TREE_TYPE (*lhs_ptr)) + /* FIXME: Turn the following line into an assert after PR 40058 is + fixed. */ + && lacc->size == racc->size + && useless_type_conversion_p (lacc->type, racc->type)) { - unsigned i; + struct assign_link *link; - fputs ("\nScan results:\n", dump_file); - EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) - { - tree var = referenced_var (i); - struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); - if (elt) - scan_dump (elt); - } - fputc ('\n', dump_file); - } -} - -/* Phase Three: Make decisions about which variables to scalarize, if any. - All elements to be scalarized have replacement variables made for them. */ + link = (struct assign_link *) pool_alloc (link_pool); + memset (link, 0, sizeof (struct assign_link)); -/* A subroutine of build_element_name. Recursively build the element - name on the obstack. */ + link->lacc = lacc; + link->racc = racc; -static void -build_element_name_1 (struct sra_elt *elt) -{ - tree t; - char buffer[32]; - - if (elt->parent) - { - build_element_name_1 (elt->parent); - obstack_1grow (&sra_obstack, '$'); - - if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE) - { - if (elt->element == integer_zero_node) - obstack_grow (&sra_obstack, "real", 4); - else - obstack_grow (&sra_obstack, "imag", 4); - return; - } + add_link_to_rhs (racc, link); } - t = elt->element; - if (TREE_CODE (t) == INTEGER_CST) - { - /* ??? Eh. Don't bother doing double-wide printing. */ - sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t)); - obstack_grow (&sra_obstack, buffer, strlen (buffer)); - } - else if (TREE_CODE (t) == BIT_FIELD_REF) - { - sprintf (buffer, "B" HOST_WIDE_INT_PRINT_DEC, - tree_low_cst (TREE_OPERAND (t, 2), 1)); - obstack_grow (&sra_obstack, buffer, strlen (buffer)); - sprintf (buffer, "F" HOST_WIDE_INT_PRINT_DEC, - tree_low_cst (TREE_OPERAND (t, 1), 1)); - obstack_grow (&sra_obstack, buffer, strlen (buffer)); - } - else - { - tree name = DECL_NAME (t); - if (name) - obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name), - IDENTIFIER_LENGTH (name)); - else - { - sprintf (buffer, "D%u", DECL_UID (t)); - obstack_grow (&sra_obstack, buffer, strlen (buffer)); - } - } + return (lacc || racc) ? SRA_SA_PROCESSED : SRA_SA_NONE; } -/* Construct a pretty variable name for an element's replacement variable. - The name is built on the obstack. */ +/* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine + GIMPLE_ASM operands with memory constrains which cannot be scalarized. */ -static char * -build_element_name (struct sra_elt *elt) +static bool +asm_visit_addr (gimple stmt ATTRIBUTE_UNUSED, tree op, + void *data ATTRIBUTE_UNUSED) { - build_element_name_1 (elt); - obstack_1grow (&sra_obstack, '\0'); - return XOBFINISH (&sra_obstack, char *); + if (DECL_P (op)) + disqualify_candidate (op, "Non-scalarizable GIMPLE_ASM operand."); + + return false; } -/* Insert a gimple_seq SEQ on all the outgoing edges out of BB. Note that - if BB has more than one edge, STMT will be replicated for each edge. - Also, abnormal edges will be ignored. */ -static void -insert_edge_copies_seq (gimple_seq seq, basic_block bb) -{ - edge e; - edge_iterator ei; - unsigned n_copies = -1; +/* Scan function and look for interesting statements. Return true if any has + been found or processed, as indicated by callbacks. SCAN_EXPR is a callback + called on all expressions within statements except assign statements and + those deemed entirely unsuitable for some reason (all operands in such + statements and expression are removed from candidate_bitmap). SCAN_ASSIGN + is a callback called on all assign statements, HANDLE_SSA_DEFS is a callback + called on assign statements and those call statements which have a lhs and + it is the only callback which can be NULL. ANALYSIS_STAGE is true when + running in the analysis stage of a pass and thus no statement is being + modified. DATA is a pointer passed to all callbacks. If any single + callback returns true, this function also returns true, otherwise it returns + false. */ - FOR_EACH_EDGE (e, ei, bb->succs) - if (!(e->flags & EDGE_ABNORMAL)) - n_copies++; +static bool +scan_function (bool (*scan_expr) (tree *, gimple_stmt_iterator *, bool, void *), + enum scan_assign_result (*scan_assign) (gimple *, + gimple_stmt_iterator *, + void *), + bool (*handle_ssa_defs)(gimple, void *), + bool analysis_stage, void *data) +{ + gimple_stmt_iterator gsi; + basic_block bb; + unsigned i; + tree *t; + bool ret = false; - FOR_EACH_EDGE (e, ei, bb->succs) - if (!(e->flags & EDGE_ABNORMAL)) - gsi_insert_seq_on_edge (e, n_copies-- > 0 ? gimple_seq_copy (seq) : seq); -} + FOR_EACH_BB (bb) + { + bool bb_changed = false; -/* Instantiate an element as an independent variable. */ + gsi = gsi_start_bb (bb); + while (!gsi_end_p (gsi)) + { + gimple stmt = gsi_stmt (gsi); + enum scan_assign_result assign_result; + bool any = false, deleted = false; -static void -instantiate_element (struct sra_elt *elt) -{ - struct sra_elt *base_elt; - tree var, base; - bool nowarn = TREE_NO_WARNING (elt->element); + switch (gimple_code (stmt)) + { + case GIMPLE_RETURN: + t = gimple_return_retval_ptr (stmt); + if (*t != NULL_TREE) + any |= scan_expr (t, &gsi, false, data); + break; - for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent) - if (!nowarn) - nowarn = TREE_NO_WARNING (base_elt->parent->element); - base = base_elt->element; + case GIMPLE_ASSIGN: + assign_result = scan_assign (&stmt, &gsi, data); + any |= assign_result == SRA_SA_PROCESSED; + deleted = assign_result == SRA_SA_REMOVED; + if (handle_ssa_defs && assign_result != SRA_SA_REMOVED) + any |= handle_ssa_defs (stmt, data); + break; - elt->replacement = var = make_rename_temp (elt->type, "SR"); + case GIMPLE_CALL: + /* Operands must be processed before the lhs. */ + for (i = 0; i < gimple_call_num_args (stmt); i++) + { + tree *argp = gimple_call_arg_ptr (stmt, i); + any |= scan_expr (argp, &gsi, false, data); + } - if (DECL_P (elt->element) - && !tree_int_cst_equal (DECL_SIZE (var), DECL_SIZE (elt->element))) - { - DECL_SIZE (var) = DECL_SIZE (elt->element); - DECL_SIZE_UNIT (var) = DECL_SIZE_UNIT (elt->element); - - elt->in_bitfld_block = 1; - elt->replacement = fold_build3 (BIT_FIELD_REF, elt->type, var, - DECL_SIZE (var), - BYTES_BIG_ENDIAN - ? size_binop (MINUS_EXPR, - TYPE_SIZE (elt->type), - DECL_SIZE (var)) - : bitsize_int (0)); - } + if (gimple_call_lhs (stmt)) + { + tree *lhs_ptr = gimple_call_lhs_ptr (stmt); + if (!analysis_stage + || !disqualify_ops_if_throwing_stmt (stmt, + *lhs_ptr, NULL)) + { + any |= scan_expr (lhs_ptr, &gsi, true, data); + if (handle_ssa_defs) + any |= handle_ssa_defs (stmt, data); + } + } + break; - /* For vectors, if used on the left hand side with BIT_FIELD_REF, - they are not a gimple register. */ - if (TREE_CODE (TREE_TYPE (var)) == VECTOR_TYPE && elt->is_vector_lhs) - DECL_GIMPLE_REG_P (var) = 0; + case GIMPLE_ASM: - DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base); - DECL_ARTIFICIAL (var) = 1; + if (analysis_stage) + walk_stmt_load_store_addr_ops (stmt, NULL, NULL, NULL, + asm_visit_addr); + for (i = 0; i < gimple_asm_ninputs (stmt); i++) + { + tree *op = &TREE_VALUE (gimple_asm_input_op (stmt, i)); + any |= scan_expr (op, &gsi, false, data); + } + for (i = 0; i < gimple_asm_noutputs (stmt); i++) + { + tree *op = &TREE_VALUE (gimple_asm_output_op (stmt, i)); + any |= scan_expr (op, &gsi, true, data); + } - if (TREE_THIS_VOLATILE (elt->type)) - { - TREE_THIS_VOLATILE (var) = 1; - TREE_SIDE_EFFECTS (var) = 1; - } + default: + break; + } - if (DECL_NAME (base) && !DECL_IGNORED_P (base)) - { - char *pretty_name = build_element_name (elt); - DECL_NAME (var) = get_identifier (pretty_name); - obstack_free (&sra_obstack, pretty_name); - - SET_DECL_DEBUG_EXPR (var, generate_element_ref (elt)); - DECL_DEBUG_EXPR_IS_FROM (var) = 1; - - DECL_IGNORED_P (var) = 0; - TREE_NO_WARNING (var) = nowarn; - } - else - { - DECL_IGNORED_P (var) = 1; - /* ??? We can't generate any warning that would be meaningful. */ - TREE_NO_WARNING (var) = 1; - } + if (any) + { + ret = true; + bb_changed = true; - /* Zero-initialize bit-field scalarization variables, to avoid - triggering undefined behavior. */ - if (TREE_CODE (elt->element) == BIT_FIELD_REF - || (var != elt->replacement - && TREE_CODE (elt->replacement) == BIT_FIELD_REF)) - { - gimple_seq init = sra_build_assignment (var, - fold_convert (TREE_TYPE (var), - integer_zero_node) - ); - insert_edge_copies_seq (init, ENTRY_BLOCK_PTR); - mark_all_v_defs_seq (init); + if (!analysis_stage) + { + update_stmt (stmt); + if (!stmt_could_throw_p (stmt)) + remove_stmt_from_eh_region (stmt); + } + } + if (deleted) + bb_changed = true; + else + { + gsi_next (&gsi); + ret = true; + } + } + if (!analysis_stage && bb_changed) + gimple_purge_dead_eh_edges (bb); } - if (dump_file) - { - fputs (" ", dump_file); - dump_sra_elt_name (dump_file, elt); - fputs (" -> ", dump_file); - print_generic_expr (dump_file, var, dump_flags); - fputc ('\n', dump_file); - } + return ret; } -/* Make one pass across an element tree deciding whether or not it's - profitable to instantiate individual leaf scalars. +/* Helper of QSORT function. There are pointers to accesses in the array. An + access is considered smaller than another if it has smaller offset or if the + offsets are the same but is size is bigger. */ - PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES - fields all the way up the tree. */ +static int +compare_access_positions (const void *a, const void *b) +{ + const access_p *fp1 = (const access_p *) a; + const access_p *fp2 = (const access_p *) b; + const access_p f1 = *fp1; + const access_p f2 = *fp2; + + if (f1->offset != f2->offset) + return f1->offset < f2->offset ? -1 : 1; + + if (f1->size == f2->size) + { + /* Put any non-aggregate type before any aggregate type. */ + if (!is_gimple_reg_type (f1->type) + && is_gimple_reg_type (f2->type)) + return 1; + else if (is_gimple_reg_type (f1->type) + && !is_gimple_reg_type (f2->type)) + return -1; + /* Put the integral type with the bigger precision first. */ + else if (INTEGRAL_TYPE_P (f1->type) + && INTEGRAL_TYPE_P (f2->type)) + return TYPE_PRECISION (f1->type) > TYPE_PRECISION (f2->type) ? -1 : 1; + /* Put any integral type with non-full precision last. */ + else if (INTEGRAL_TYPE_P (f1->type) + && (TREE_INT_CST_LOW (TYPE_SIZE (f1->type)) + != TYPE_PRECISION (f1->type))) + return 1; + else if (INTEGRAL_TYPE_P (f2->type) + && (TREE_INT_CST_LOW (TYPE_SIZE (f2->type)) + != TYPE_PRECISION (f2->type))) + return -1; + /* Stabilize the sort. */ + return TYPE_UID (f1->type) - TYPE_UID (f2->type); + } + + /* We want the bigger accesses first, thus the opposite operator in the next + line: */ + return f1->size > f2->size ? -1 : 1; +} + + +/* Append a name of the declaration to the name obstack. A helper function for + make_fancy_name. */ static void -decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses, - unsigned int parent_copies) +make_fancy_decl_name (tree decl) { - if (dump_file && !elt->parent) - { - fputs ("Initial instantiation for ", dump_file); - dump_sra_elt_name (dump_file, elt); - fputc ('\n', dump_file); - } - - if (elt->cannot_scalarize) - return; + char buffer[32]; - if (elt->is_scalar) - { - /* The decision is simple: instantiate if we're used more frequently - than the parent needs to be seen as a complete unit. */ - if (elt->n_uses + elt->n_copies + parent_copies > parent_uses) - instantiate_element (elt); - } + tree name = DECL_NAME (decl); + if (name) + obstack_grow (&name_obstack, IDENTIFIER_POINTER (name), + IDENTIFIER_LENGTH (name)); else { - struct sra_elt *c, *group; - unsigned int this_uses = elt->n_uses + parent_uses; - unsigned int this_copies = elt->n_copies + parent_copies; - - /* Consider groups of sub-elements as weighing in favour of - instantiation whatever their size. */ - for (group = elt->groups; group ; group = group->sibling) - FOR_EACH_ACTUAL_CHILD (c, group) - { - c->n_uses += group->n_uses; - c->n_copies += group->n_copies; - } - - for (c = elt->children; c ; c = c->sibling) - decide_instantiation_1 (c, this_uses, this_copies); + sprintf (buffer, "D%u", DECL_UID (decl)); + obstack_grow (&name_obstack, buffer, strlen (buffer)); } } -/* Compute the size and number of all instantiated elements below ELT. - We will only care about this if the size of the complete structure - fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */ +/* Helper for make_fancy_name. */ -static unsigned int -sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep) +static void +make_fancy_name_1 (tree expr) { - if (elt->replacement) + char buffer[32]; + tree index; + + if (DECL_P (expr)) { - *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type)); - return 1; + make_fancy_decl_name (expr); + return; } - else - { - struct sra_elt *c; - unsigned int count = 0; - - for (c = elt->children; c ; c = c->sibling) - count += sum_instantiated_sizes (c, sizep); - return count; - } -} + switch (TREE_CODE (expr)) + { + case COMPONENT_REF: + make_fancy_name_1 (TREE_OPERAND (expr, 0)); + obstack_1grow (&name_obstack, '$'); + make_fancy_decl_name (TREE_OPERAND (expr, 1)); + break; -/* Instantiate fields in ELT->TYPE that are not currently present as - children of ELT. */ + case ARRAY_REF: + make_fancy_name_1 (TREE_OPERAND (expr, 0)); + obstack_1grow (&name_obstack, '$'); + /* Arrays with only one element may not have a constant as their + index. */ + index = TREE_OPERAND (expr, 1); + if (TREE_CODE (index) != INTEGER_CST) + break; + sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (index)); + obstack_grow (&name_obstack, buffer, strlen (buffer)); -static void instantiate_missing_elements (struct sra_elt *elt); + break; -static struct sra_elt * -instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type) -{ - struct sra_elt *sub = lookup_element (elt, child, type, INSERT); - if (sub->is_scalar) - { - if (sub->replacement == NULL) - instantiate_element (sub); + case BIT_FIELD_REF: + case REALPART_EXPR: + case IMAGPART_EXPR: + gcc_unreachable (); /* we treat these as scalars. */ + break; + default: + break; } - else - instantiate_missing_elements (sub); - return sub; } -/* Obtain the canonical type for field F of ELEMENT. */ +/* Create a human readable name for replacement variable of ACCESS. */ -static tree -canon_type_for_field (tree f, tree element) +static char * +make_fancy_name (tree expr) { - tree field_type = TREE_TYPE (f); - - /* canonicalize_component_ref() unwidens some bit-field types (not - marked as DECL_BIT_FIELD in C++), so we must do the same, lest we - may introduce type mismatches. */ - if (INTEGRAL_TYPE_P (field_type) - && DECL_MODE (f) != TYPE_MODE (field_type)) - field_type = TREE_TYPE (get_unwidened (build3 (COMPONENT_REF, - field_type, - element, - f, NULL_TREE), - NULL_TREE)); - - return field_type; + make_fancy_name_1 (expr); + obstack_1grow (&name_obstack, '\0'); + return XOBFINISH (&name_obstack, char *); } -/* Look for adjacent fields of ELT starting at F that we'd like to - scalarize as a single variable. Return the last field of the - group. */ +/* Helper function for build_ref_for_offset. */ -static tree -try_instantiate_multiple_fields (struct sra_elt *elt, tree f) +static bool +build_ref_for_offset_1 (tree *res, tree type, HOST_WIDE_INT offset, + tree exp_type) { - int count; - unsigned HOST_WIDE_INT align, bit, size, alchk; - enum machine_mode mode; - tree first = f, prev; - tree type, var; - struct sra_elt *block; - - /* Point fields are typically best handled as standalone entities. */ - if (POINTER_TYPE_P (TREE_TYPE (f))) - return f; - - if (!is_sra_scalar_type (TREE_TYPE (f)) - || !host_integerp (DECL_FIELD_OFFSET (f), 1) - || !host_integerp (DECL_FIELD_BIT_OFFSET (f), 1) - || !host_integerp (DECL_SIZE (f), 1) - || lookup_element (elt, f, NULL, NO_INSERT)) - return f; - - block = elt; - - /* For complex and array objects, there are going to be integer - literals as child elements. In this case, we can't just take the - alignment and mode of the decl, so we instead rely on the element - type. - - ??? We could try to infer additional alignment from the full - object declaration and the location of the sub-elements we're - accessing. */ - for (count = 0; !DECL_P (block->element); count++) - block = block->parent; - - align = DECL_ALIGN (block->element); - alchk = GET_MODE_BITSIZE (DECL_MODE (block->element)); - - if (count) - { - type = TREE_TYPE (block->element); - while (count--) - type = TREE_TYPE (type); - - align = TYPE_ALIGN (type); - alchk = GET_MODE_BITSIZE (TYPE_MODE (type)); - } - - if (align < alchk) - align = alchk; - - /* Coalescing wider fields is probably pointless and - inefficient. */ - if (align > BITS_PER_WORD) - align = BITS_PER_WORD; - - bit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT - + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); - size = tree_low_cst (DECL_SIZE (f), 1); - - alchk = align - 1; - alchk = ~alchk; - - if ((bit & alchk) != ((bit + size - 1) & alchk)) - return f; - - /* Find adjacent fields in the same alignment word. */ - - for (prev = f, f = TREE_CHAIN (f); - f && TREE_CODE (f) == FIELD_DECL - && is_sra_scalar_type (TREE_TYPE (f)) - && host_integerp (DECL_FIELD_OFFSET (f), 1) - && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1) - && host_integerp (DECL_SIZE (f), 1) - && !lookup_element (elt, f, NULL, NO_INSERT); - prev = f, f = TREE_CHAIN (f)) + while (1) { - unsigned HOST_WIDE_INT nbit, nsize; - - nbit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT - + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); - nsize = tree_low_cst (DECL_SIZE (f), 1); - - if (bit + size == nbit) - { - if ((bit & alchk) != ((nbit + nsize - 1) & alchk)) - { - /* If we're at an alignment boundary, don't bother - growing alignment such that we can include this next - field. */ - if ((nbit & alchk) - || GET_MODE_BITSIZE (DECL_MODE (f)) <= align) - break; - - align = GET_MODE_BITSIZE (DECL_MODE (f)); - alchk = align - 1; - alchk = ~alchk; - - if ((bit & alchk) != ((nbit + nsize - 1) & alchk)) - break; - } - size += nsize; - } - else if (nbit + nsize == bit) - { - if ((nbit & alchk) != ((bit + size - 1) & alchk)) - { - if ((bit & alchk) - || GET_MODE_BITSIZE (DECL_MODE (f)) <= align) - break; + tree fld; + tree tr_size, index; + HOST_WIDE_INT el_size; - align = GET_MODE_BITSIZE (DECL_MODE (f)); - alchk = align - 1; - alchk = ~alchk; - - if ((nbit & alchk) != ((bit + size - 1) & alchk)) - break; - } - bit = nbit; - size += nsize; - } - else - break; - } - - f = prev; - - if (f == first) - return f; - - gcc_assert ((bit & alchk) == ((bit + size - 1) & alchk)); - - /* Try to widen the bit range so as to cover padding bits as well. */ - - if ((bit & ~alchk) || size != align) - { - unsigned HOST_WIDE_INT mbit = bit & alchk; - unsigned HOST_WIDE_INT msize = align; + if (offset == 0 && exp_type + && useless_type_conversion_p (exp_type, type)) + return true; - for (f = TYPE_FIELDS (elt->type); - f; f = TREE_CHAIN (f)) + switch (TREE_CODE (type)) { - unsigned HOST_WIDE_INT fbit, fsize; - - /* Skip the fields from first to prev. */ - if (f == first) - { - f = prev; - continue; - } - - if (!(TREE_CODE (f) == FIELD_DECL - && host_integerp (DECL_FIELD_OFFSET (f), 1) - && host_integerp (DECL_FIELD_BIT_OFFSET (f), 1))) - continue; - - fbit = tree_low_cst (DECL_FIELD_OFFSET (f), 1) * BITS_PER_UNIT - + tree_low_cst (DECL_FIELD_BIT_OFFSET (f), 1); - - /* If we're past the selected word, we're fine. */ - if ((bit & alchk) < (fbit & alchk)) - continue; - - if (host_integerp (DECL_SIZE (f), 1)) - fsize = tree_low_cst (DECL_SIZE (f), 1); - else - /* Assume a variable-sized field takes up all space till - the end of the word. ??? Endianness issues? */ - fsize = align - (fbit & alchk); - - if ((fbit & alchk) < (bit & alchk)) + case UNION_TYPE: + case QUAL_UNION_TYPE: + case RECORD_TYPE: + /* Some ADA records are half-unions, treat all of them the same. */ + for (fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld)) { - /* A large field might start at a previous word and - extend into the selected word. Exclude those - bits. ??? Endianness issues? */ - HOST_WIDE_INT diff = fbit + fsize - mbit; + HOST_WIDE_INT pos, size; + tree expr, *expr_ptr; - if (diff <= 0) + if (TREE_CODE (fld) != FIELD_DECL) continue; - mbit += diff; - msize -= diff; - } - else - { - /* Non-overlapping, great. */ - if (fbit + fsize <= mbit - || mbit + msize <= fbit) + pos = int_bit_position (fld); + gcc_assert (TREE_CODE (type) == RECORD_TYPE || pos == 0); + size = tree_low_cst (DECL_SIZE (fld), 1); + if (pos > offset || (pos + size) <= offset) continue; - if (fbit <= mbit) + if (res) { - unsigned HOST_WIDE_INT diff = fbit + fsize - mbit; - mbit += diff; - msize -= diff; + expr = build3 (COMPONENT_REF, TREE_TYPE (fld), *res, fld, + NULL_TREE); + expr_ptr = &expr; } - else if (fbit > mbit) - msize -= (mbit + msize - fbit); else - gcc_unreachable (); + expr_ptr = NULL; + if (build_ref_for_offset_1 (expr_ptr, TREE_TYPE (fld), + offset - pos, exp_type)) + { + if (res) + *res = expr; + return true; + } } - } - - bit = mbit; - size = msize; - } + return false; - /* Now we know the bit range we're interested in. Find the smallest - machine mode we can use to access it. */ + case ARRAY_TYPE: + tr_size = TYPE_SIZE (TREE_TYPE (type)); + if (!tr_size || !host_integerp (tr_size, 1)) + return false; + el_size = tree_low_cst (tr_size, 1); - for (mode = smallest_mode_for_size (size, MODE_INT); - ; - mode = GET_MODE_WIDER_MODE (mode)) - { - gcc_assert (mode != VOIDmode); + if (res) + { + index = build_int_cst (TYPE_DOMAIN (type), offset / el_size); + if (!integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (type)))) + index = int_const_binop (PLUS_EXPR, index, + TYPE_MIN_VALUE (TYPE_DOMAIN (type)), + 0); + *res = build4 (ARRAY_REF, TREE_TYPE (type), *res, index, + NULL_TREE, NULL_TREE); + } + offset = offset % el_size; + type = TREE_TYPE (type); + break; - alchk = GET_MODE_PRECISION (mode) - 1; - alchk = ~alchk; + default: + if (offset != 0) + return false; - if ((bit & alchk) == ((bit + size - 1) & alchk)) - break; + if (exp_type) + return false; + else + return true; + } } +} - gcc_assert (~alchk < align); - - /* Create the field group as a single variable. */ - - /* We used to create a type for the mode above, but size turns - to be out not of mode-size. As we need a matching type - to build a BIT_FIELD_REF, use a nonstandard integer type as - fallback. */ - type = lang_hooks.types.type_for_size (size, 1); - if (!type || TYPE_PRECISION (type) != size) - type = build_nonstandard_integer_type (size, 1); - gcc_assert (type); - var = build3 (BIT_FIELD_REF, type, NULL_TREE, - bitsize_int (size), bitsize_int (bit)); - - block = instantiate_missing_elements_1 (elt, var, type); - gcc_assert (block && block->is_scalar); - - var = block->replacement; - block->in_bitfld_block = 2; - - /* Add the member fields to the group, such that they access - portions of the group variable. */ - - for (f = first; f != TREE_CHAIN (prev); f = TREE_CHAIN (f)) - { - tree field_type = canon_type_for_field (f, elt->element); - struct sra_elt *fld = lookup_element (block, f, field_type, INSERT); - - gcc_assert (fld && fld->is_scalar && !fld->replacement); - - fld->replacement = fold_build3 (BIT_FIELD_REF, field_type, var, - bitsize_int (TYPE_PRECISION (field_type)), - bitsize_int - ((TREE_INT_CST_LOW (DECL_FIELD_OFFSET (f)) - * BITS_PER_UNIT - + (TREE_INT_CST_LOW - (DECL_FIELD_BIT_OFFSET (f))) - - (TREE_INT_CST_LOW - (TREE_OPERAND (block->element, 2)))) - & ~alchk)); - fld->in_bitfld_block = 1; - } +/* Construct an expression that would reference a part of aggregate *EXPR of + type TYPE at the given OFFSET of the type EXP_TYPE. If EXPR is NULL, the + function only determines whether it can build such a reference without + actually doing it. - return prev; -} + FIXME: Eventually this should be replaced with + maybe_fold_offset_to_reference() from tree-ssa-ccp.c but that requires a + minor rewrite of fold_stmt. + */ -static void -instantiate_missing_elements (struct sra_elt *elt) +static bool +build_ref_for_offset (tree *expr, tree type, HOST_WIDE_INT offset, + tree exp_type, bool allow_ptr) { - tree type = elt->type; - - switch (TREE_CODE (type)) + if (allow_ptr && POINTER_TYPE_P (type)) { - case RECORD_TYPE: - { - tree f; - for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) - if (TREE_CODE (f) == FIELD_DECL) - { - tree last = try_instantiate_multiple_fields (elt, f); - - if (last != f) - { - f = last; - continue; - } - - instantiate_missing_elements_1 (elt, f, - canon_type_for_field - (f, elt->element)); - } - break; - } - - case ARRAY_TYPE: - { - tree i, max, subtype; - - i = TYPE_MIN_VALUE (TYPE_DOMAIN (type)); - max = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); - subtype = TREE_TYPE (type); - - while (1) - { - instantiate_missing_elements_1 (elt, i, subtype); - if (tree_int_cst_equal (i, max)) - break; - i = int_const_binop (PLUS_EXPR, i, integer_one_node, true); - } - - break; - } - - case COMPLEX_TYPE: type = TREE_TYPE (type); - instantiate_missing_elements_1 (elt, integer_zero_node, type); - instantiate_missing_elements_1 (elt, integer_one_node, type); - break; - - default: - gcc_unreachable (); + if (expr) + *expr = fold_build1 (INDIRECT_REF, type, *expr); } -} -/* Return true if there is only one non aggregate field in the record, TYPE. - Return false otherwise. */ - -static bool -single_scalar_field_in_record_p (tree type) -{ - int num_fields = 0; - tree field; - if (TREE_CODE (type) != RECORD_TYPE) - return false; - - for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) - if (TREE_CODE (field) == FIELD_DECL) - { - num_fields++; - - if (num_fields == 2) - return false; - - if (AGGREGATE_TYPE_P (TREE_TYPE (field))) - return false; - } - - return true; + return build_ref_for_offset_1 (expr, type, offset, exp_type); } -/* Make one pass across an element tree deciding whether to perform block - or element copies. If we decide on element copies, instantiate all - elements. Return true if there are any instantiated sub-elements. */ +/* The very first phase of intraprocedural SRA. It marks in candidate_bitmap + those with type which is suitable for scalarization. */ static bool -decide_block_copy (struct sra_elt *elt) +find_var_candidates (void) { - struct sra_elt *c; - bool any_inst; - - /* We shouldn't be invoked on groups of sub-elements as they must - behave like their parent as far as block copy is concerned. */ - gcc_assert (!elt->is_group); + tree var, type; + referenced_var_iterator rvi; + bool ret = false; - /* If scalarization is disabled, respect it. */ - if (elt->cannot_scalarize) + FOR_EACH_REFERENCED_VAR (var, rvi) { - elt->use_block_copy = 1; + if (TREE_CODE (var) != VAR_DECL && TREE_CODE (var) != PARM_DECL) + continue; + type = TREE_TYPE (var); + + if (!AGGREGATE_TYPE_P (type) + || needs_to_live_in_memory (var) + || TREE_THIS_VOLATILE (var) + || !COMPLETE_TYPE_P (type) + || !host_integerp (TYPE_SIZE (type), 1) + || tree_low_cst (TYPE_SIZE (type), 1) == 0 + || type_internals_preclude_sra_p (type)) + continue; - if (dump_file) - { - fputs ("Scalarization disabled for ", dump_file); - dump_sra_elt_name (dump_file, elt); - fputc ('\n', dump_file); - } + bitmap_set_bit (candidate_bitmap, DECL_UID (var)); - /* Disable scalarization of sub-elements */ - for (c = elt->children; c; c = c->sibling) + if (dump_file && (dump_flags & TDF_DETAILS)) { - c->cannot_scalarize = 1; - decide_block_copy (c); + fprintf (dump_file, "Candidate (%d): ", DECL_UID (var)); + print_generic_expr (dump_file, var, 0); + fprintf (dump_file, "\n"); } + ret = true; + } - /* Groups behave like their parent. */ - for (c = elt->groups; c; c = c->sibling) - { - c->cannot_scalarize = 1; - c->use_block_copy = 1; - } + return ret; +} - return false; - } +/* Sort all accesses for the given variable, check for partial overlaps and + return NULL if there are any. If there are none, pick a representative for + each combination of offset and size and create a linked list out of them. + Return the pointer to the first representative and make sure it is the first + one in the vector of accesses. */ - /* Don't decide if we've no uses and no groups. */ - if (elt->n_uses == 0 && elt->n_copies == 0 && elt->groups == NULL) - ; +static struct access * +sort_and_splice_var_accesses (tree var) +{ + int i, j, access_count; + struct access *res, **prev_acc_ptr = &res; + VEC (access_p, heap) *access_vec; + bool first = true; + HOST_WIDE_INT low = -1, high = 0; - else if (!elt->is_scalar) - { - tree size_tree = TYPE_SIZE_UNIT (elt->type); - bool use_block_copy = true; - - /* Tradeoffs for COMPLEX types pretty much always make it better - to go ahead and split the components. */ - if (TREE_CODE (elt->type) == COMPLEX_TYPE) - use_block_copy = false; - - /* Don't bother trying to figure out the rest if the structure is - so large we can't do easy arithmetic. This also forces block - copies for variable sized structures. */ - else if (host_integerp (size_tree, 1)) - { - unsigned HOST_WIDE_INT full_size, inst_size = 0; - unsigned int max_size, max_count, inst_count, full_count; - - /* If the sra-max-structure-size parameter is 0, then the - user has not overridden the parameter and we can choose a - sensible default. */ - max_size = SRA_MAX_STRUCTURE_SIZE - ? SRA_MAX_STRUCTURE_SIZE - : MOVE_RATIO (optimize_function_for_speed_p (cfun)) * UNITS_PER_WORD; - max_count = SRA_MAX_STRUCTURE_COUNT - ? SRA_MAX_STRUCTURE_COUNT - : MOVE_RATIO (optimize_function_for_speed_p (cfun)); - - full_size = tree_low_cst (size_tree, 1); - full_count = count_type_elements (elt->type, false); - inst_count = sum_instantiated_sizes (elt, &inst_size); - - /* If there is only one scalar field in the record, don't block copy. */ - if (single_scalar_field_in_record_p (elt->type)) - use_block_copy = false; - - /* ??? What to do here. If there are two fields, and we've only - instantiated one, then instantiating the other is clearly a win. - If there are a large number of fields then the size of the copy - is much more of a factor. */ - - /* If the structure is small, and we've made copies, go ahead - and instantiate, hoping that the copies will go away. */ - if (full_size <= max_size - && (full_count - inst_count) <= max_count - && elt->n_copies > elt->n_uses) - use_block_copy = false; - else if (inst_count * 100 >= full_count * SRA_FIELD_STRUCTURE_RATIO - && inst_size * 100 >= full_size * SRA_FIELD_STRUCTURE_RATIO) - use_block_copy = false; - - /* In order to avoid block copy, we have to be able to instantiate - all elements of the type. See if this is possible. */ - if (!use_block_copy - && (!can_completely_scalarize_p (elt) - || !type_can_instantiate_all_elements (elt->type))) - use_block_copy = true; - } + access_vec = get_base_access_vector (var); + if (!access_vec) + return NULL; + access_count = VEC_length (access_p, access_vec); - elt->use_block_copy = use_block_copy; + /* Sort by <OFFSET, SIZE>. */ + qsort (VEC_address (access_p, access_vec), access_count, sizeof (access_p), + compare_access_positions); - /* Groups behave like their parent. */ - for (c = elt->groups; c; c = c->sibling) - c->use_block_copy = use_block_copy; + i = 0; + while (i < access_count) + { + struct access *access = VEC_index (access_p, access_vec, i); + bool modification = access->write; + bool grp_read = !access->write; + bool grp_partial_lhs = access->grp_partial_lhs; + bool first_scalar = is_gimple_reg_type (access->type); + bool unscalarizable_region = access->grp_unscalarizable_region; - if (dump_file) + if (first || access->offset >= high) { - fprintf (dump_file, "Using %s for ", - use_block_copy ? "block-copy" : "element-copy"); - dump_sra_elt_name (dump_file, elt); - fputc ('\n', dump_file); + first = false; + low = access->offset; + high = access->offset + access->size; } + else if (access->offset > low && access->offset + access->size > high) + return NULL; + else + gcc_assert (access->offset >= low + && access->offset + access->size <= high); - if (!use_block_copy) + j = i + 1; + while (j < access_count) { - instantiate_missing_elements (elt); - return true; + struct access *ac2 = VEC_index (access_p, access_vec, j); + if (ac2->offset != access->offset || ac2->size != access->size) + break; + modification |= ac2->write; + grp_read |= !ac2->write; + grp_partial_lhs |= ac2->grp_partial_lhs; + unscalarizable_region |= ac2->grp_unscalarizable_region; + relink_to_new_repr (access, ac2); + + /* If there are both aggregate-type and scalar-type accesses with + this combination of size and offset, the comparison function + should have put the scalars first. */ + gcc_assert (first_scalar || !is_gimple_reg_type (ac2->type)); + ac2->group_representative = access; + j++; } - } - any_inst = elt->replacement != NULL; + i = j; + + access->group_representative = access; + access->grp_write = modification; + access->grp_read = grp_read; + access->grp_partial_lhs = grp_partial_lhs; + access->grp_unscalarizable_region = unscalarizable_region; + if (access->first_link) + add_access_to_work_queue (access); - for (c = elt->children; c ; c = c->sibling) - any_inst |= decide_block_copy (c); + *prev_acc_ptr = access; + prev_acc_ptr = &access->next_grp; + } - return any_inst; + gcc_assert (res == VEC_index (access_p, access_vec, 0)); + return res; } -/* Entry point to phase 3. Instantiate scalar replacement variables. */ +/* Create a variable for the given ACCESS which determines the type, name and a + few other properties. Return the variable declaration and store it also to + ACCESS->replacement. */ -static void -decide_instantiations (void) +static tree +create_access_replacement (struct access *access) { - unsigned int i; - bool cleared_any; - bitmap_head done_head; - bitmap_iterator bi; + tree repl; + + repl = create_tmp_var (access->type, "SR"); + get_var_ann (repl); + add_referenced_var (repl); + mark_sym_for_renaming (repl); - /* We cannot clear bits from a bitmap we're iterating over, - so save up all the bits to clear until the end. */ - bitmap_initialize (&done_head, &bitmap_default_obstack); - cleared_any = false; + if (!access->grp_partial_lhs + && (TREE_CODE (access->type) == COMPLEX_TYPE + || TREE_CODE (access->type) == VECTOR_TYPE)) + DECL_GIMPLE_REG_P (repl) = 1; - EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, bi) + DECL_SOURCE_LOCATION (repl) = DECL_SOURCE_LOCATION (access->base); + DECL_ARTIFICIAL (repl) = 1; + + if (DECL_NAME (access->base) + && !DECL_IGNORED_P (access->base) + && !DECL_ARTIFICIAL (access->base)) { - tree var = referenced_var (i); - struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); - if (elt) - { - decide_instantiation_1 (elt, 0, 0); - if (!decide_block_copy (elt)) - elt = NULL; - } - if (!elt) - { - bitmap_set_bit (&done_head, i); - cleared_any = true; - } + char *pretty_name = make_fancy_name (access->expr); + + DECL_NAME (repl) = get_identifier (pretty_name); + obstack_free (&name_obstack, pretty_name); + + SET_DECL_DEBUG_EXPR (repl, access->expr); + DECL_DEBUG_EXPR_IS_FROM (repl) = 1; + DECL_IGNORED_P (repl) = 0; } - if (cleared_any) + DECL_IGNORED_P (repl) = DECL_IGNORED_P (access->base); + TREE_NO_WARNING (repl) = TREE_NO_WARNING (access->base); + + if (dump_file) { - bitmap_and_compl_into (sra_candidates, &done_head); - bitmap_and_compl_into (needs_copy_in, &done_head); + fprintf (dump_file, "Created a replacement for "); + print_generic_expr (dump_file, access->base, 0); + fprintf (dump_file, " offset: %u, size: %u: ", + (unsigned) access->offset, (unsigned) access->size); + print_generic_expr (dump_file, repl, 0); + fprintf (dump_file, "\n"); } - bitmap_clear (&done_head); - - mark_set_for_renaming (sra_candidates); - if (dump_file) - fputc ('\n', dump_file); + return repl; } - -/* Phase Four: Update the function to match the replacements created. */ +/* Return ACCESS scalar replacement, create it if it does not exist yet. */ -/* Mark all the variables in virtual operands in all the statements in - LIST for renaming. */ - -static void -mark_all_v_defs_seq (gimple_seq seq) +static inline tree +get_access_replacement (struct access *access) { - gimple_stmt_iterator gsi; + gcc_assert (access->grp_to_be_replaced); + + if (access->replacement_decl) + return access->replacement_decl; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - update_stmt_if_modified (gsi_stmt (gsi)); + access->replacement_decl = create_access_replacement (access); + return access->replacement_decl; } -/* Mark every replacement under ELT with TREE_NO_WARNING. */ +/* Build a subtree of accesses rooted in *ACCESS, and move the pointer in the + linked list along the way. Stop when *ACCESS is NULL or the access pointed + to it is not "within" the root. */ static void -mark_no_warning (struct sra_elt *elt) +build_access_subtree (struct access **access) { - if (!elt->all_no_warning) + struct access *root = *access, *last_child = NULL; + HOST_WIDE_INT limit = root->offset + root->size; + + *access = (*access)->next_grp; + while (*access && (*access)->offset + (*access)->size <= limit) { - if (elt->replacement) - TREE_NO_WARNING (elt->replacement) = 1; + if (!last_child) + root->first_child = *access; else - { - struct sra_elt *c; - FOR_EACH_ACTUAL_CHILD (c, elt) - mark_no_warning (c); - } - elt->all_no_warning = true; + last_child->next_sibling = *access; + last_child = *access; + + build_access_subtree (access); } } -/* Build a single level component reference to ELT rooted at BASE. */ +/* Build a tree of access representatives, ACCESS is the pointer to the first + one, others are linked in a list by the next_grp field. Decide about scalar + replacements on the way, return true iff any are to be created. */ -static tree -generate_one_element_ref (struct sra_elt *elt, tree base) +static void +build_access_trees (struct access *access) { - switch (TREE_CODE (TREE_TYPE (base))) + while (access) { - case RECORD_TYPE: - { - tree field = elt->element; - - /* We can't test elt->in_bitfld_block here because, when this is - called from instantiate_element, we haven't set this field - yet. */ - if (TREE_CODE (field) == BIT_FIELD_REF) - { - tree ret = unshare_expr (field); - TREE_OPERAND (ret, 0) = base; - return ret; - } - - /* Watch out for compatible records with differing field lists. */ - if (DECL_FIELD_CONTEXT (field) != TYPE_MAIN_VARIANT (TREE_TYPE (base))) - field = find_compatible_field (TREE_TYPE (base), field); + struct access *root = access; - return build3 (COMPONENT_REF, elt->type, base, field, NULL); - } - - case ARRAY_TYPE: - if (TREE_CODE (elt->element) == RANGE_EXPR) - return build4 (ARRAY_RANGE_REF, elt->type, base, - TREE_OPERAND (elt->element, 0), NULL, NULL); - else - return build4 (ARRAY_REF, elt->type, base, elt->element, NULL, NULL); - - case COMPLEX_TYPE: - if (elt->element == integer_zero_node) - return build1 (REALPART_EXPR, elt->type, base); - else - return build1 (IMAGPART_EXPR, elt->type, base); - - default: - gcc_unreachable (); + build_access_subtree (&access); + root->next_grp = access; } } -/* Build a full component reference to ELT rooted at its native variable. */ +/* Analyze the subtree of accesses rooted in ROOT, scheduling replacements when + both seeming beneficial and when ALLOW_REPLACEMENTS allows it. Also set + all sorts of access flags appropriately along the way, notably always ser + grp_read when MARK_READ is true and grp_write when MARK_WRITE is true. */ -static tree -generate_element_ref (struct sra_elt *elt) +static bool +analyze_access_subtree (struct access *root, bool allow_replacements, + bool mark_read, bool mark_write) { - if (elt->parent) - return generate_one_element_ref (elt, generate_element_ref (elt->parent)); - else - return elt->element; -} + struct access *child; + HOST_WIDE_INT limit = root->offset + root->size; + HOST_WIDE_INT covered_to = root->offset; + bool scalar = is_gimple_reg_type (root->type); + bool hole = false, sth_created = false; -/* Return true if BF is a bit-field that we can handle like a scalar. */ + if (mark_read) + root->grp_read = true; + else if (root->grp_read) + mark_read = true; -static bool -scalar_bitfield_p (tree bf) -{ - return (TREE_CODE (bf) == BIT_FIELD_REF - && (is_gimple_reg (TREE_OPERAND (bf, 0)) - || (TYPE_MODE (TREE_TYPE (TREE_OPERAND (bf, 0))) != BLKmode - && (!TREE_SIDE_EFFECTS (TREE_OPERAND (bf, 0)) - || (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE - (TREE_OPERAND (bf, 0)))) - <= BITS_PER_WORD))))); -} + if (mark_write) + root->grp_write = true; + else if (root->grp_write) + mark_write = true; -/* Create an assignment statement from SRC to DST. */ + if (root->grp_unscalarizable_region) + allow_replacements = false; -static gimple_seq -sra_build_assignment (tree dst, tree src) -{ - gimple stmt; - gimple_seq seq = NULL, seq2 = NULL; - /* Turning BIT_FIELD_REFs into bit operations enables other passes - to do a much better job at optimizing the code. - From dst = BIT_FIELD_REF <var, sz, off> we produce - - SR.1 = (scalar type) var; - SR.2 = SR.1 >> off; - SR.3 = SR.2 & ((1 << sz) - 1); - ... possible sign extension of SR.3 ... - dst = (destination type) SR.3; - */ - if (scalar_bitfield_p (src)) + for (child = root->first_child; child; child = child->next_sibling) { - tree var, shift, width; - tree utype, stype; - bool unsignedp = (INTEGRAL_TYPE_P (TREE_TYPE (src)) - ? TYPE_UNSIGNED (TREE_TYPE (src)) : true); - struct gimplify_ctx gctx; - - var = TREE_OPERAND (src, 0); - width = TREE_OPERAND (src, 1); - /* The offset needs to be adjusted to a right shift quantity - depending on the endianness. */ - if (BYTES_BIG_ENDIAN) - { - tree tmp = size_binop (PLUS_EXPR, width, TREE_OPERAND (src, 2)); - shift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), tmp); - } + if (!hole && child->offset < covered_to) + hole = true; else - shift = TREE_OPERAND (src, 2); - - /* In weird cases we have non-integral types for the source or - destination object. - ??? For unknown reasons we also want an unsigned scalar type. */ - stype = TREE_TYPE (var); - if (!INTEGRAL_TYPE_P (stype)) - stype = lang_hooks.types.type_for_size (TREE_INT_CST_LOW - (TYPE_SIZE (stype)), 1); - else if (!TYPE_UNSIGNED (stype)) - stype = unsigned_type_for (stype); - - utype = TREE_TYPE (dst); - if (!INTEGRAL_TYPE_P (utype)) - utype = lang_hooks.types.type_for_size (TREE_INT_CST_LOW - (TYPE_SIZE (utype)), 1); - else if (!TYPE_UNSIGNED (utype)) - utype = unsigned_type_for (utype); - - /* Convert the base var of the BIT_FIELD_REF to the scalar type - we use for computation if we cannot use it directly. */ - if (INTEGRAL_TYPE_P (TREE_TYPE (var))) - var = fold_convert (stype, var); - else - var = fold_build1 (VIEW_CONVERT_EXPR, stype, var); - - if (!integer_zerop (shift)) - var = fold_build2 (RSHIFT_EXPR, stype, var, shift); + covered_to += child->size; - /* If we need a masking operation, produce one. */ - if (TREE_INT_CST_LOW (width) == TYPE_PRECISION (stype)) - unsignedp = true; - else - { - tree one = build_int_cst_wide (stype, 1, 0); - tree mask = int_const_binop (LSHIFT_EXPR, one, width, 0); - mask = int_const_binop (MINUS_EXPR, mask, one, 0); - var = fold_build2 (BIT_AND_EXPR, stype, var, mask); - } + sth_created |= analyze_access_subtree (child, allow_replacements, + mark_read, mark_write); - /* After shifting and masking, convert to the target type. */ - var = fold_convert (utype, var); + root->grp_unscalarized_data |= child->grp_unscalarized_data; + hole |= !child->grp_covered; + } - /* Perform sign extension, if required. - ??? This should never be necessary. */ - if (!unsignedp) + if (allow_replacements && scalar && !root->first_child) + { + if (dump_file && (dump_flags & TDF_DETAILS)) { - tree signbit = int_const_binop (LSHIFT_EXPR, - build_int_cst_wide (utype, 1, 0), - size_binop (MINUS_EXPR, width, - bitsize_int (1)), 0); - - var = fold_build2 (BIT_XOR_EXPR, utype, var, signbit); - var = fold_build2 (MINUS_EXPR, utype, var, signbit); + fprintf (dump_file, "Marking "); + print_generic_expr (dump_file, root->base, 0); + fprintf (dump_file, " offset: %u, size: %u: ", + (unsigned) root->offset, (unsigned) root->size); + fprintf (dump_file, " to be replaced.\n"); } - /* fold_build3 (BIT_FIELD_REF, ...) sometimes returns a cast. */ - STRIP_NOPS (dst); - - /* Finally, move and convert to the destination. */ - if (INTEGRAL_TYPE_P (TREE_TYPE (dst))) - var = fold_convert (TREE_TYPE (dst), var); - else - var = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (dst), var); - - push_gimplify_context (&gctx); - gctx.allow_rhs_cond_expr = true; - - gimplify_assign (dst, var, &seq); - - if (gimple_referenced_vars (cfun)) - for (var = gctx.temps; var; var = TREE_CHAIN (var)) - { - add_referenced_var (var); - mark_sym_for_renaming (var); - } - pop_gimplify_context (NULL); - - return seq; + root->grp_to_be_replaced = 1; + sth_created = true; + hole = false; } + else if (covered_to < limit) + hole = true; - /* fold_build3 (BIT_FIELD_REF, ...) sometimes returns a cast. */ - if (CONVERT_EXPR_P (dst)) + if (sth_created && !hole) { - STRIP_NOPS (dst); - src = fold_convert (TREE_TYPE (dst), src); + root->grp_covered = 1; + return true; } - /* It was hoped that we could perform some type sanity checking - here, but since front-ends can emit accesses of fields in types - different from their nominal types and copy structures containing - them as a whole, we'd have to handle such differences here. - Since such accesses under different types require compatibility - anyway, there's little point in making tests and/or adding - conversions to ensure the types of src and dst are the same. - So we just assume type differences at this point are ok. - The only exception we make here are pointer types, which can be different - in e.g. structurally equal, but non-identical RECORD_TYPEs. */ - else if (POINTER_TYPE_P (TREE_TYPE (dst)) - && !useless_type_conversion_p (TREE_TYPE (dst), TREE_TYPE (src))) - src = fold_convert (TREE_TYPE (dst), src); - - /* ??? Only call the gimplifier if we need to. Otherwise we may - end up substituting with DECL_VALUE_EXPR - see PR37380. */ - if (!handled_component_p (src) - && !SSA_VAR_P (src)) + if (root->grp_write || TREE_CODE (root->base) == PARM_DECL) + root->grp_unscalarized_data = 1; /* not covered and written to */ + if (sth_created) + return true; + return false; +} + +/* Analyze all access trees linked by next_grp by the means of + analyze_access_subtree. */ +static bool +analyze_access_trees (struct access *access) +{ + bool ret = false; + + while (access) { - src = force_gimple_operand (src, &seq2, false, NULL_TREE); - gimple_seq_add_seq (&seq, seq2); + if (analyze_access_subtree (access, true, false, false)) + ret = true; + access = access->next_grp; } - stmt = gimple_build_assign (dst, src); - gimple_seq_add_stmt (&seq, stmt); - return seq; -} -/* BIT_FIELD_REFs must not be shared. sra_build_elt_assignment() - takes care of assignments, but we must create copies for uses. */ -#define REPLDUP(t) (TREE_CODE (t) != BIT_FIELD_REF ? (t) : unshare_expr (t)) + return ret; +} -/* Emit an assignment from SRC to DST, but if DST is a scalarizable - BIT_FIELD_REF, turn it into bit operations. */ +/* Return true iff a potential new child of LACC at offset OFFSET and with size + SIZE would conflict with an already existing one. If exactly such a child + already exists in LACC, store a pointer to it in EXACT_MATCH. */ -static gimple_seq -sra_build_bf_assignment (tree dst, tree src) +static bool +child_would_conflict_in_lacc (struct access *lacc, HOST_WIDE_INT norm_offset, + HOST_WIDE_INT size, struct access **exact_match) { - tree var, type, utype, tmp, tmp2, tmp3; - gimple_seq seq; - gimple stmt; - tree cst, cst2, mask; - tree minshift, maxshift; + struct access *child; - if (TREE_CODE (dst) != BIT_FIELD_REF) - return sra_build_assignment (dst, src); + for (child = lacc->first_child; child; child = child->next_sibling) + { + if (child->offset == norm_offset && child->size == size) + { + *exact_match = child; + return true; + } - var = TREE_OPERAND (dst, 0); + if (child->offset < norm_offset + size + && child->offset + child->size > norm_offset) + return true; + } - if (!scalar_bitfield_p (dst)) - return sra_build_assignment (REPLDUP (dst), src); + return false; +} - seq = NULL; +/* Set the expr of TARGET to one just like MODEL but with is own base at the + bottom of the handled components. */ - cst = fold_convert (bitsizetype, TREE_OPERAND (dst, 2)); - cst2 = size_binop (PLUS_EXPR, - fold_convert (bitsizetype, TREE_OPERAND (dst, 1)), - cst); +static void +duplicate_expr_for_different_base (struct access *target, + struct access *model) +{ + tree t, expr = unshare_expr (model->expr); - if (BYTES_BIG_ENDIAN) - { - maxshift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), cst); - minshift = size_binop (MINUS_EXPR, TYPE_SIZE (TREE_TYPE (var)), cst2); - } - else - { - maxshift = cst2; - minshift = cst; - } + gcc_assert (handled_component_p (expr)); + t = expr; + while (handled_component_p (TREE_OPERAND (t, 0))) + t = TREE_OPERAND (t, 0); + gcc_assert (TREE_OPERAND (t, 0) == model->base); + TREE_OPERAND (t, 0) = target->base; - type = TREE_TYPE (var); - if (!INTEGRAL_TYPE_P (type)) - type = lang_hooks.types.type_for_size - (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (var))), 1); - if (TYPE_UNSIGNED (type)) - utype = type; - else - utype = unsigned_type_for (type); + target->expr = expr; +} - mask = build_int_cst_wide (utype, 1, 0); - if (TREE_INT_CST_LOW (maxshift) == TYPE_PRECISION (utype)) - cst = build_int_cst_wide (utype, 0, 0); - else - cst = int_const_binop (LSHIFT_EXPR, mask, maxshift, true); - if (integer_zerop (minshift)) - cst2 = mask; - else - cst2 = int_const_binop (LSHIFT_EXPR, mask, minshift, true); - mask = int_const_binop (MINUS_EXPR, cst, cst2, true); - mask = fold_build1 (BIT_NOT_EXPR, utype, mask); - if (TYPE_MAIN_VARIANT (utype) != TYPE_MAIN_VARIANT (TREE_TYPE (var)) - && !integer_zerop (mask)) - { - tmp = var; - if (!is_gimple_variable (tmp)) - tmp = unshare_expr (var); - else - TREE_NO_WARNING (var) = true; +/* Create a new child access of PARENT, with all properties just like MODEL + except for its offset and with its grp_write false and grp_read true. + Return the new access. Note that this access is created long after all + splicing and sorting, it's not located in any access vector and is + automatically a representative of its group. */ - tmp2 = make_rename_temp (utype, "SR"); +static struct access * +create_artificial_child_access (struct access *parent, struct access *model, + HOST_WIDE_INT new_offset) +{ + struct access *access; + struct access **child; - if (INTEGRAL_TYPE_P (TREE_TYPE (var))) - tmp = fold_convert (utype, tmp); - else - tmp = fold_build1 (VIEW_CONVERT_EXPR, utype, tmp); + gcc_assert (!model->grp_unscalarizable_region); - stmt = gimple_build_assign (tmp2, tmp); - gimple_seq_add_stmt (&seq, stmt); - } - else - tmp2 = var; + access = (struct access *) pool_alloc (access_pool); + memset (access, 0, sizeof (struct access)); + access->base = parent->base; + access->offset = new_offset; + access->size = model->size; + duplicate_expr_for_different_base (access, model); + access->type = model->type; + access->grp_write = true; + access->grp_read = false; - if (!integer_zerop (mask)) - { - tmp = make_rename_temp (utype, "SR"); - stmt = gimple_build_assign (tmp, fold_build2 (BIT_AND_EXPR, utype, - tmp2, mask)); - gimple_seq_add_stmt (&seq, stmt); - } - else - tmp = mask; + child = &parent->first_child; + while (*child && (*child)->offset < new_offset) + child = &(*child)->next_sibling; - if (is_gimple_reg (src) && INTEGRAL_TYPE_P (TREE_TYPE (src))) - tmp2 = src; - else if (INTEGRAL_TYPE_P (TREE_TYPE (src))) - { - gimple_seq tmp_seq; - tmp2 = make_rename_temp (TREE_TYPE (src), "SR"); - tmp_seq = sra_build_assignment (tmp2, src); - gimple_seq_add_seq (&seq, tmp_seq); - } - else - { - gimple_seq tmp_seq; - tmp2 = make_rename_temp - (lang_hooks.types.type_for_size - (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (src))), - 1), "SR"); - tmp_seq = sra_build_assignment (tmp2, fold_build1 (VIEW_CONVERT_EXPR, - TREE_TYPE (tmp2), src)); - gimple_seq_add_seq (&seq, tmp_seq); - } + access->next_sibling = *child; + *child = access; - if (!TYPE_UNSIGNED (TREE_TYPE (tmp2))) - { - gimple_seq tmp_seq; - tree ut = unsigned_type_for (TREE_TYPE (tmp2)); - tmp3 = make_rename_temp (ut, "SR"); - tmp2 = fold_convert (ut, tmp2); - tmp_seq = sra_build_assignment (tmp3, tmp2); - gimple_seq_add_seq (&seq, tmp_seq); - - tmp2 = fold_build1 (BIT_NOT_EXPR, utype, mask); - tmp2 = int_const_binop (RSHIFT_EXPR, tmp2, minshift, true); - tmp2 = fold_convert (ut, tmp2); - tmp2 = fold_build2 (BIT_AND_EXPR, ut, tmp3, tmp2); - - if (tmp3 != tmp2) - { - tmp3 = make_rename_temp (ut, "SR"); - tmp_seq = sra_build_assignment (tmp3, tmp2); - gimple_seq_add_seq (&seq, tmp_seq); - } + return access; +} - tmp2 = tmp3; - } - if (TYPE_MAIN_VARIANT (TREE_TYPE (tmp2)) != TYPE_MAIN_VARIANT (utype)) - { - gimple_seq tmp_seq; - tmp3 = make_rename_temp (utype, "SR"); - tmp2 = fold_convert (utype, tmp2); - tmp_seq = sra_build_assignment (tmp3, tmp2); - gimple_seq_add_seq (&seq, tmp_seq); - tmp2 = tmp3; - } +/* Propagate all subaccesses of RACC across an assignment link to LACC. Return + true if any new subaccess was created. Additionally, if RACC is a scalar + access but LACC is not, change the type of the latter. */ - if (!integer_zerop (minshift)) - { - tmp3 = make_rename_temp (utype, "SR"); - stmt = gimple_build_assign (tmp3, fold_build2 (LSHIFT_EXPR, utype, - tmp2, minshift)); - gimple_seq_add_stmt (&seq, stmt); - tmp2 = tmp3; - } +static bool +propagate_subacesses_accross_link (struct access *lacc, struct access *racc) +{ + struct access *rchild; + HOST_WIDE_INT norm_delta = lacc->offset - racc->offset; - if (utype != TREE_TYPE (var)) - tmp3 = make_rename_temp (utype, "SR"); - else - tmp3 = var; - stmt = gimple_build_assign (tmp3, fold_build2 (BIT_IOR_EXPR, utype, - tmp, tmp2)); - gimple_seq_add_stmt (&seq, stmt); + bool ret = false; + + if (is_gimple_reg_type (lacc->type) + || lacc->grp_unscalarizable_region + || racc->grp_unscalarizable_region) + return false; - if (tmp3 != var) + if (!lacc->first_child && !racc->first_child + && is_gimple_reg_type (racc->type)) { - if (TREE_TYPE (var) == type) - stmt = gimple_build_assign (var, fold_convert (type, tmp3)); - else - stmt = gimple_build_assign (var, fold_build1 (VIEW_CONVERT_EXPR, - TREE_TYPE (var), tmp3)); - gimple_seq_add_stmt (&seq, stmt); + duplicate_expr_for_different_base (lacc, racc); + lacc->type = racc->type; + return false; } - return seq; -} - -/* Expand an assignment of SRC to the scalarized representation of - ELT. If it is a field group, try to widen the assignment to cover - the full variable. */ - -static gimple_seq -sra_build_elt_assignment (struct sra_elt *elt, tree src) -{ - tree dst = elt->replacement; - tree var, tmp, cst, cst2; - gimple stmt; - gimple_seq seq; - - if (TREE_CODE (dst) != BIT_FIELD_REF - || !elt->in_bitfld_block) - return sra_build_assignment (REPLDUP (dst), src); - - var = TREE_OPERAND (dst, 0); - - /* Try to widen the assignment to the entire variable. - We need the source to be a BIT_FIELD_REF as well, such that, for - BIT_FIELD_REF<d,sz,dp> = BIT_FIELD_REF<s,sz,sp>, - by design, conditions are met such that we can turn it into - d = BIT_FIELD_REF<s,dw,sp-dp>. */ - if (elt->in_bitfld_block == 2 - && TREE_CODE (src) == BIT_FIELD_REF) + for (rchild = racc->first_child; rchild; rchild = rchild->next_sibling) { - tmp = src; - cst = TYPE_SIZE (TREE_TYPE (var)); - cst2 = size_binop (MINUS_EXPR, TREE_OPERAND (src, 2), - TREE_OPERAND (dst, 2)); + struct access *new_acc = NULL; + HOST_WIDE_INT norm_offset = rchild->offset + norm_delta; - src = TREE_OPERAND (src, 0); + if (rchild->grp_unscalarizable_region) + continue; - /* Avoid full-width bit-fields. */ - if (integer_zerop (cst2) - && tree_int_cst_equal (cst, TYPE_SIZE (TREE_TYPE (src)))) + if (child_would_conflict_in_lacc (lacc, norm_offset, rchild->size, + &new_acc)) { - if (INTEGRAL_TYPE_P (TREE_TYPE (src)) - && !TYPE_UNSIGNED (TREE_TYPE (src))) - src = fold_convert (unsigned_type_for (TREE_TYPE (src)), src); - - /* If a single conversion won't do, we'll need a statement - list. */ - if (TYPE_MAIN_VARIANT (TREE_TYPE (var)) - != TYPE_MAIN_VARIANT (TREE_TYPE (src))) - { - gimple_seq tmp_seq; - seq = NULL; - - if (!INTEGRAL_TYPE_P (TREE_TYPE (src))) - src = fold_build1 (VIEW_CONVERT_EXPR, - lang_hooks.types.type_for_size - (TREE_INT_CST_LOW - (TYPE_SIZE (TREE_TYPE (src))), - 1), src); - gcc_assert (TYPE_UNSIGNED (TREE_TYPE (src))); - - tmp = make_rename_temp (TREE_TYPE (src), "SR"); - stmt = gimple_build_assign (tmp, src); - gimple_seq_add_stmt (&seq, stmt); - - tmp_seq = sra_build_assignment (var, - fold_convert (TREE_TYPE (var), - tmp)); - gimple_seq_add_seq (&seq, tmp_seq); - - return seq; - } - - src = fold_convert (TREE_TYPE (var), src); - } - else - { - src = fold_convert (TREE_TYPE (var), tmp); + if (new_acc && rchild->first_child) + ret |= propagate_subacesses_accross_link (new_acc, rchild); + continue; } - return sra_build_assignment (var, src); + new_acc = create_artificial_child_access (lacc, rchild, norm_offset); + if (racc->first_child) + propagate_subacesses_accross_link (new_acc, rchild); + + ret = true; } - return sra_build_bf_assignment (dst, src); + return ret; } -/* Generate a set of assignment statements in *LIST_P to copy all - instantiated elements under ELT to or from the equivalent structure - rooted at EXPR. COPY_OUT controls the direction of the copy, with - true meaning to copy out of EXPR into ELT. */ +/* Propagate all subaccesses across assignment links. */ static void -generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr, - gimple_seq *seq_p) +propagate_all_subaccesses (void) { - struct sra_elt *c; - gimple_seq tmp_seq; - tree t; - - if (!copy_out && TREE_CODE (expr) == SSA_NAME - && TREE_CODE (TREE_TYPE (expr)) == COMPLEX_TYPE) + while (work_queue_head) { - tree r, i; + struct access *racc = pop_access_from_work_queue (); + struct assign_link *link; - c = lookup_element (elt, integer_zero_node, NULL, NO_INSERT); - r = c->replacement; - c = lookup_element (elt, integer_one_node, NULL, NO_INSERT); - i = c->replacement; + gcc_assert (racc->first_link); - t = build2 (COMPLEX_EXPR, elt->type, r, i); - tmp_seq = sra_build_bf_assignment (expr, t); - SSA_NAME_DEF_STMT (expr) = gimple_seq_last_stmt (tmp_seq); - gimple_seq_add_seq (seq_p, tmp_seq); - } - else if (elt->replacement) - { - if (copy_out) - tmp_seq = sra_build_elt_assignment (elt, expr); - else - tmp_seq = sra_build_bf_assignment (expr, REPLDUP (elt->replacement)); - gimple_seq_add_seq (seq_p, tmp_seq); - } - else - { - FOR_EACH_ACTUAL_CHILD (c, elt) + for (link = racc->first_link; link; link = link->next) { - t = generate_one_element_ref (c, unshare_expr (expr)); - generate_copy_inout (c, copy_out, t, seq_p); + struct access *lacc = link->lacc; + + if (!bitmap_bit_p (candidate_bitmap, DECL_UID (lacc->base))) + continue; + lacc = lacc->group_representative; + if (propagate_subacesses_accross_link (lacc, racc) + && lacc->first_link) + add_access_to_work_queue (lacc); } } } -/* Generate a set of assignment statements in *LIST_P to copy all instantiated - elements under SRC to their counterparts under DST. There must be a 1-1 - correspondence of instantiated elements. */ +/* Go through all accesses collected throughout the (intraprocedural) analysis + stage, exclude overlapping ones, identify representatives and build trees + out of them, making decisions about scalarization on the way. Return true + iff there are any to-be-scalarized variables after this stage. */ -static void -generate_element_copy (struct sra_elt *dst, struct sra_elt *src, gimple_seq *seq_p) +static bool +analyze_all_variable_accesses (void) { - struct sra_elt *dc, *sc; - - FOR_EACH_ACTUAL_CHILD (dc, dst) - { - sc = lookup_element (src, dc->element, NULL, NO_INSERT); - if (!sc && dc->in_bitfld_block == 2) - { - struct sra_elt *dcs; - - FOR_EACH_ACTUAL_CHILD (dcs, dc) - { - sc = lookup_element (src, dcs->element, NULL, NO_INSERT); - gcc_assert (sc); - generate_element_copy (dcs, sc, seq_p); - } + tree var; + referenced_var_iterator rvi; + bool res = false; - continue; - } + FOR_EACH_REFERENCED_VAR (var, rvi) + if (bitmap_bit_p (candidate_bitmap, DECL_UID (var))) + { + struct access *access; - /* If DST and SRC are structs with the same elements, but do not have - the same TYPE_MAIN_VARIANT, then lookup of DST FIELD_DECL in SRC - will fail. Try harder by finding the corresponding FIELD_DECL - in SRC. */ - if (!sc) - { - tree f; - - gcc_assert (useless_type_conversion_p (dst->type, src->type)); - gcc_assert (TREE_CODE (dc->element) == FIELD_DECL); - for (f = TYPE_FIELDS (src->type); f ; f = TREE_CHAIN (f)) - if (simple_cst_equal (DECL_FIELD_OFFSET (f), - DECL_FIELD_OFFSET (dc->element)) > 0 - && simple_cst_equal (DECL_FIELD_BIT_OFFSET (f), - DECL_FIELD_BIT_OFFSET (dc->element)) > 0 - && simple_cst_equal (DECL_SIZE (f), - DECL_SIZE (dc->element)) > 0 - && (useless_type_conversion_p (TREE_TYPE (dc->element), - TREE_TYPE (f)) - || (POINTER_TYPE_P (TREE_TYPE (dc->element)) - && POINTER_TYPE_P (TREE_TYPE (f))))) - break; - gcc_assert (f != NULL_TREE); - sc = lookup_element (src, f, NULL, NO_INSERT); - } + access = sort_and_splice_var_accesses (var); + if (access) + build_access_trees (access); + else + disqualify_candidate (var, + "No or inhibitingly overlapping accesses."); + } - generate_element_copy (dc, sc, seq_p); - } + propagate_all_subaccesses (); - if (dst->replacement) - { - gimple_seq tmp_seq; + FOR_EACH_REFERENCED_VAR (var, rvi) + if (bitmap_bit_p (candidate_bitmap, DECL_UID (var))) + { + struct access *access = get_first_repr_for_decl (var); - gcc_assert (src->replacement); + if (analyze_access_trees (access)) + { + res = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nAccess trees for "); + print_generic_expr (dump_file, var, 0); + fprintf (dump_file, " (UID: %u): \n", DECL_UID (var)); + dump_access_tree (dump_file, access); + fprintf (dump_file, "\n"); + } + } + else + disqualify_candidate (var, "No scalar replacements to be created."); + } - tmp_seq = sra_build_elt_assignment (dst, REPLDUP (src->replacement)); - gimple_seq_add_seq (seq_p, tmp_seq); - } + return res; } -/* Generate a set of assignment statements in *LIST_P to zero all instantiated - elements under ELT. In addition, do not assign to elements that have been - marked VISITED but do reset the visited flag; this allows easy coordination - with generate_element_init. */ +/* Return true iff a reference statement into aggregate AGG can be built for + every single to-be-replaced accesses that is a child of ACCESS, its sibling + or a child of its sibling. TOP_OFFSET is the offset from the processed + access subtree that has to be subtracted from offset of each access. */ -static void -generate_element_zero (struct sra_elt *elt, gimple_seq *seq_p) +static bool +ref_expr_for_all_replacements_p (struct access *access, tree agg, + HOST_WIDE_INT top_offset) { - struct sra_elt *c; - - if (elt->visited) - { - elt->visited = false; - return; - } - - if (!elt->in_bitfld_block) - FOR_EACH_ACTUAL_CHILD (c, elt) - generate_element_zero (c, seq_p); - - if (elt->replacement) + do { - tree t; - gimple_seq tmp_seq; + if (access->grp_to_be_replaced + && !build_ref_for_offset (NULL, TREE_TYPE (agg), + access->offset - top_offset, + access->type, false)) + return false; - gcc_assert (elt->is_scalar); - t = fold_convert (elt->type, integer_zero_node); + if (access->first_child + && !ref_expr_for_all_replacements_p (access->first_child, agg, + top_offset)) + return false; - tmp_seq = sra_build_elt_assignment (elt, t); - gimple_seq_add_seq (seq_p, tmp_seq); + access = access->next_sibling; } + while (access); + + return true; } -/* Generate an assignment VAR = INIT, where INIT may need gimplification. - Add the result to *LIST_P. */ +/* Generate statements copying scalar replacements of accesses within a subtree + into or out of AGG. ACCESS is the first child of the root of the subtree to + be processed. AGG is an aggregate type expression (can be a declaration but + does not have to be, it can for example also be an indirect_ref). + TOP_OFFSET is the offset of the processed subtree which has to be subtracted + from offsets of individual accesses to get corresponding offsets for AGG. + If CHUNK_SIZE is non-null, copy only replacements in the interval + <start_offset, start_offset + chunk_size>, otherwise copy all. GSI is a + statement iterator used to place the new statements. WRITE should be true + when the statements should write from AGG to the replacement and false if + vice versa. if INSERT_AFTER is true, new statements will be added after the + current statement in GSI, they will be added before the statement + otherwise. */ static void -generate_one_element_init (struct sra_elt *elt, tree init, gimple_seq *seq_p) +generate_subtree_copies (struct access *access, tree agg, + HOST_WIDE_INT top_offset, + HOST_WIDE_INT start_offset, HOST_WIDE_INT chunk_size, + gimple_stmt_iterator *gsi, bool write, + bool insert_after) { - gimple_seq tmp_seq = sra_build_elt_assignment (elt, init); - gimple_seq_add_seq (seq_p, tmp_seq); -} + do + { + tree expr = unshare_expr (agg); -/* Generate a set of assignment statements in *LIST_P to set all instantiated - elements under ELT with the contents of the initializer INIT. In addition, - mark all assigned elements VISITED; this allows easy coordination with - generate_element_zero. Return false if we found a case we couldn't - handle. */ + if (chunk_size && access->offset >= start_offset + chunk_size) + return; -static bool -generate_element_init_1 (struct sra_elt *elt, tree init, gimple_seq *seq_p) -{ - bool result = true; - enum tree_code init_code; - struct sra_elt *sub; - tree t; - unsigned HOST_WIDE_INT idx; - tree value, purpose; - - /* We can be passed DECL_INITIAL of a static variable. It might have a - conversion, which we strip off here. */ - STRIP_USELESS_TYPE_CONVERSION (init); - init_code = TREE_CODE (init); - - if (elt->is_scalar) - { - if (elt->replacement) + if (access->grp_to_be_replaced + && (chunk_size == 0 + || access->offset + access->size > start_offset)) { - generate_one_element_init (elt, init, seq_p); - elt->visited = true; - } - return result; - } + tree repl = get_access_replacement (access); + bool ref_found; + gimple stmt; - switch (init_code) - { - case COMPLEX_CST: - case COMPLEX_EXPR: - FOR_EACH_ACTUAL_CHILD (sub, elt) - { - if (sub->element == integer_zero_node) - t = (init_code == COMPLEX_EXPR - ? TREE_OPERAND (init, 0) : TREE_REALPART (init)); - else - t = (init_code == COMPLEX_EXPR - ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init)); - result &= generate_element_init_1 (sub, t, seq_p); - } - break; + ref_found = build_ref_for_offset (&expr, TREE_TYPE (agg), + access->offset - top_offset, + access->type, false); + gcc_assert (ref_found); - case CONSTRUCTOR: - FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx, purpose, value) - { - /* Array constructors are routinely created with NULL indices. */ - if (purpose == NULL_TREE) - { - result = false; - break; - } - if (TREE_CODE (purpose) == RANGE_EXPR) + if (write) { - tree lower = TREE_OPERAND (purpose, 0); - tree upper = TREE_OPERAND (purpose, 1); - - while (1) - { - sub = lookup_element (elt, lower, NULL, NO_INSERT); - if (sub != NULL) - result &= generate_element_init_1 (sub, value, seq_p); - if (tree_int_cst_equal (lower, upper)) - break; - lower = int_const_binop (PLUS_EXPR, lower, - integer_one_node, true); - } + if (access->grp_partial_lhs) + expr = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, + !insert_after, + insert_after ? GSI_NEW_STMT + : GSI_SAME_STMT); + stmt = gimple_build_assign (repl, expr); } else { - sub = lookup_element (elt, purpose, NULL, NO_INSERT); - if (sub != NULL) - result &= generate_element_init_1 (sub, value, seq_p); + TREE_NO_WARNING (repl) = 1; + if (access->grp_partial_lhs) + repl = force_gimple_operand_gsi (gsi, repl, true, NULL_TREE, + !insert_after, + insert_after ? GSI_NEW_STMT + : GSI_SAME_STMT); + stmt = gimple_build_assign (expr, repl); } + + if (insert_after) + gsi_insert_after (gsi, stmt, GSI_NEW_STMT); + else + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + update_stmt (stmt); } - break; - default: - elt->visited = true; - result = false; - } + if (access->first_child) + generate_subtree_copies (access->first_child, agg, top_offset, + start_offset, chunk_size, gsi, + write, insert_after); - return result; + access = access->next_sibling; + } + while (access); } -/* A wrapper function for generate_element_init_1 that handles cleanup after - gimplification. */ +/* Assign zero to all scalar replacements in an access subtree. ACCESS is the + the root of the subtree to be processed. GSI is the statement iterator used + for inserting statements which are added after the current statement if + INSERT_AFTER is true or before it otherwise. */ -static bool -generate_element_init (struct sra_elt *elt, tree init, gimple_seq *seq_p) -{ - bool ret; - struct gimplify_ctx gctx; +static void +init_subtree_with_zero (struct access *access, gimple_stmt_iterator *gsi, + bool insert_after) - push_gimplify_context (&gctx); - ret = generate_element_init_1 (elt, init, seq_p); - pop_gimplify_context (NULL); +{ + struct access *child; - /* The replacement can expose previously unreferenced variables. */ - if (ret && *seq_p) + if (access->grp_to_be_replaced) { - gimple_stmt_iterator i; + gimple stmt; - for (i = gsi_start (*seq_p); !gsi_end_p (i); gsi_next (&i)) - find_new_referenced_vars (gsi_stmt (i)); + stmt = gimple_build_assign (get_access_replacement (access), + fold_convert (access->type, + integer_zero_node)); + if (insert_after) + gsi_insert_after (gsi, stmt, GSI_NEW_STMT); + else + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + update_stmt (stmt); } - return ret; -} - -/* Helper function to insert LIST before GSI, and set up line number info. */ - -static void -sra_insert_before (gimple_stmt_iterator *gsi, gimple_seq seq) -{ - gimple stmt = gsi_stmt (*gsi); - - if (gimple_has_location (stmt)) - annotate_all_with_location (seq, gimple_location (stmt)); - gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); + for (child = access->first_child; child; child = child->next_sibling) + init_subtree_with_zero (child, gsi, insert_after); } -/* Similarly, but insert after GSI. Handles insertion onto edges as well. */ +/* Search for an access representative for the given expression EXPR and + return it or NULL if it cannot be found. */ -static void -sra_insert_after (gimple_stmt_iterator *gsi, gimple_seq seq) +static struct access * +get_access_for_expr (tree expr) { - gimple stmt = gsi_stmt (*gsi); + HOST_WIDE_INT offset, size, max_size; + tree base; - if (gimple_has_location (stmt)) - annotate_all_with_location (seq, gimple_location (stmt)); + /* FIXME: This should not be necessary but Ada produces V_C_Es with a type of + a different size than the size of its argument and we need the latter + one. */ + if (TREE_CODE (expr) == VIEW_CONVERT_EXPR) + expr = TREE_OPERAND (expr, 0); - if (stmt_ends_bb_p (stmt)) - insert_edge_copies_seq (seq, gsi_bb (*gsi)); - else - gsi_insert_seq_after (gsi, seq, GSI_SAME_STMT); -} + base = get_ref_base_and_extent (expr, &offset, &size, &max_size); + if (max_size == -1 || !DECL_P (base)) + return NULL; -/* Similarly, but replace the statement at GSI. */ + if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base))) + return NULL; -static void -sra_replace (gimple_stmt_iterator *gsi, gimple_seq seq) -{ - sra_insert_before (gsi, seq); - unlink_stmt_vdef (gsi_stmt (*gsi)); - gsi_remove (gsi, false); - if (gsi_end_p (*gsi)) - *gsi = gsi_last (gsi_seq (*gsi)); - else - gsi_prev (gsi); + return get_var_base_offset_size_access (base, offset, max_size); } -/* Data structure that bitfield_overlaps_p fills in with information - about the element passed in and how much of it overlaps with the - bit-range passed it to. */ - -struct bitfield_overlap_info -{ - /* The bit-length of an element. */ - tree field_len; - - /* The bit-position of the element in its parent. */ - tree field_pos; - - /* The number of bits of the element that overlap with the incoming - bit range. */ - tree overlap_len; - - /* The first bit of the element that overlaps with the incoming bit - range. */ - tree overlap_pos; -}; - -/* Return true if a BIT_FIELD_REF<(FLD->parent), BLEN, BPOS> - expression (referenced as BF below) accesses any of the bits in FLD, - false if it doesn't. If DATA is non-null, its field_len and - field_pos are filled in such that BIT_FIELD_REF<(FLD->parent), - field_len, field_pos> (referenced as BFLD below) represents the - entire field FLD->element, and BIT_FIELD_REF<BFLD, overlap_len, - overlap_pos> represents the portion of the entire field that - overlaps with BF. */ +/* Callback for scan_function. Replace the expression EXPR with a scalar + replacement if there is one and generate other statements to do type + conversion or subtree copying if necessary. GSI is used to place newly + created statements, WRITE is true if the expression is being written to (it + is on a LHS of a statement or output in an assembly statement). */ static bool -bitfield_overlaps_p (tree blen, tree bpos, struct sra_elt *fld, - struct bitfield_overlap_info *data) +sra_modify_expr (tree *expr, gimple_stmt_iterator *gsi, bool write, + void *data ATTRIBUTE_UNUSED) { - tree flen, fpos; - bool ret; + struct access *access; + tree type, bfr; - if (TREE_CODE (fld->element) == FIELD_DECL) - { - flen = fold_convert (bitsizetype, DECL_SIZE (fld->element)); - fpos = fold_convert (bitsizetype, DECL_FIELD_OFFSET (fld->element)); - fpos = size_binop (MULT_EXPR, fpos, bitsize_int (BITS_PER_UNIT)); - fpos = size_binop (PLUS_EXPR, fpos, DECL_FIELD_BIT_OFFSET (fld->element)); - } - else if (TREE_CODE (fld->element) == BIT_FIELD_REF) + if (TREE_CODE (*expr) == BIT_FIELD_REF) { - flen = fold_convert (bitsizetype, TREE_OPERAND (fld->element, 1)); - fpos = fold_convert (bitsizetype, TREE_OPERAND (fld->element, 2)); - } - else if (TREE_CODE (fld->element) == INTEGER_CST) - { - tree domain_type = TYPE_DOMAIN (TREE_TYPE (fld->parent->element)); - flen = fold_convert (bitsizetype, TYPE_SIZE (fld->type)); - fpos = fold_convert (bitsizetype, fld->element); - if (domain_type && TYPE_MIN_VALUE (domain_type)) - fpos = size_binop (MINUS_EXPR, fpos, - fold_convert (bitsizetype, - TYPE_MIN_VALUE (domain_type))); - fpos = size_binop (MULT_EXPR, flen, fpos); + bfr = *expr; + expr = &TREE_OPERAND (*expr, 0); } else - gcc_unreachable (); - - gcc_assert (host_integerp (blen, 1) - && host_integerp (bpos, 1) - && host_integerp (flen, 1) - && host_integerp (fpos, 1)); - - ret = ((!tree_int_cst_lt (fpos, bpos) - && tree_int_cst_lt (size_binop (MINUS_EXPR, fpos, bpos), - blen)) - || (!tree_int_cst_lt (bpos, fpos) - && tree_int_cst_lt (size_binop (MINUS_EXPR, bpos, fpos), - flen))); + bfr = NULL_TREE; - if (!ret) - return ret; - - if (data) - { - tree bend, fend; - - data->field_len = flen; - data->field_pos = fpos; - - fend = size_binop (PLUS_EXPR, fpos, flen); - bend = size_binop (PLUS_EXPR, bpos, blen); - - if (tree_int_cst_lt (bend, fend)) - data->overlap_len = size_binop (MINUS_EXPR, bend, fpos); - else - data->overlap_len = NULL; - - if (tree_int_cst_lt (fpos, bpos)) - { - data->overlap_pos = size_binop (MINUS_EXPR, bpos, fpos); - data->overlap_len = size_binop (MINUS_EXPR, - data->overlap_len - ? data->overlap_len - : data->field_len, - data->overlap_pos); - } - else - data->overlap_pos = NULL; - } - - return ret; -} - -/* Add to LISTP a sequence of statements that copies BLEN bits between - VAR and the scalarized elements of ELT, starting a bit VPOS of VAR - and at bit BPOS of ELT. The direction of the copy is given by - TO_VAR. */ - -static void -sra_explode_bitfield_assignment (tree var, tree vpos, bool to_var, - gimple_seq *seq_p, tree blen, tree bpos, - struct sra_elt *elt) -{ - struct sra_elt *fld; - struct bitfield_overlap_info flp; + if (TREE_CODE (*expr) == REALPART_EXPR || TREE_CODE (*expr) == IMAGPART_EXPR) + expr = &TREE_OPERAND (*expr, 0); + access = get_access_for_expr (*expr); + if (!access) + return false; + type = TREE_TYPE (*expr); - FOR_EACH_ACTUAL_CHILD (fld, elt) + if (access->grp_to_be_replaced) { - tree flen, fpos; - - if (!bitfield_overlaps_p (blen, bpos, fld, &flp)) - continue; - - flen = flp.overlap_len ? flp.overlap_len : flp.field_len; - fpos = flp.overlap_pos ? flp.overlap_pos : bitsize_int (0); - - if (fld->replacement) + tree repl = get_access_replacement (access); + /* If we replace a non-register typed access simply use the original + access expression to extract the scalar component afterwards. + This happens if scalarizing a function return value or parameter + like in gcc.c-torture/execute/20041124-1.c, 20050316-1.c and + gcc.c-torture/compile/20011217-1.c. */ + if (!is_gimple_reg_type (type)) { - tree infld, invar, type; - gimple_seq st; - - infld = fld->replacement; - - type = unsigned_type_for (TREE_TYPE (infld)); - if (TYPE_PRECISION (type) != TREE_INT_CST_LOW (flen)) - type = build_nonstandard_integer_type (TREE_INT_CST_LOW (flen), 1); - - if (TREE_CODE (infld) == BIT_FIELD_REF) + gimple stmt; + if (write) { - fpos = size_binop (PLUS_EXPR, fpos, TREE_OPERAND (infld, 2)); - infld = TREE_OPERAND (infld, 0); + tree ref = unshare_expr (access->expr); + if (access->grp_partial_lhs) + ref = force_gimple_operand_gsi (gsi, ref, true, NULL_TREE, + false, GSI_NEW_STMT); + stmt = gimple_build_assign (repl, ref); + gsi_insert_after (gsi, stmt, GSI_NEW_STMT); } - else if (BYTES_BIG_ENDIAN && DECL_P (fld->element) - && !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (infld)), - DECL_SIZE (fld->element))) + else { - fpos = size_binop (PLUS_EXPR, fpos, - TYPE_SIZE (TREE_TYPE (infld))); - fpos = size_binop (MINUS_EXPR, fpos, - DECL_SIZE (fld->element)); + if (access->grp_partial_lhs) + repl = force_gimple_operand_gsi (gsi, repl, true, NULL_TREE, + true, GSI_SAME_STMT); + stmt = gimple_build_assign (unshare_expr (access->expr), repl); + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); } - - infld = fold_build3 (BIT_FIELD_REF, type, infld, flen, fpos); - - invar = size_binop (MINUS_EXPR, flp.field_pos, bpos); - if (flp.overlap_pos) - invar = size_binop (PLUS_EXPR, invar, flp.overlap_pos); - invar = size_binop (PLUS_EXPR, invar, vpos); - - invar = fold_build3 (BIT_FIELD_REF, type, var, flen, invar); - - if (to_var) - st = sra_build_bf_assignment (invar, infld); - else - st = sra_build_bf_assignment (infld, invar); - - gimple_seq_add_seq (seq_p, st); } else { - tree sub = size_binop (MINUS_EXPR, flp.field_pos, bpos); - sub = size_binop (PLUS_EXPR, vpos, sub); - if (flp.overlap_pos) - sub = size_binop (PLUS_EXPR, sub, flp.overlap_pos); + gcc_assert (useless_type_conversion_p (type, access->type)); + *expr = repl; + } + } - sra_explode_bitfield_assignment (var, sub, to_var, seq_p, - flen, fpos, fld); + if (access->first_child) + { + HOST_WIDE_INT start_offset, chunk_size; + if (bfr + && host_integerp (TREE_OPERAND (bfr, 1), 1) + && host_integerp (TREE_OPERAND (bfr, 2), 1)) + { + start_offset = tree_low_cst (TREE_OPERAND (bfr, 1), 1); + chunk_size = tree_low_cst (TREE_OPERAND (bfr, 2), 1); } + else + start_offset = chunk_size = 0; + + generate_subtree_copies (access->first_child, access->base, 0, + start_offset, chunk_size, gsi, write, write); } + return true; } -/* Add to LISTBEFOREP statements that copy scalarized members of ELT - that overlap with BIT_FIELD_REF<(ELT->element), BLEN, BPOS> back - into the full variable, and to LISTAFTERP, if non-NULL, statements - that copy the (presumably modified) overlapping portions of the - full variable back to the scalarized variables. */ +/* Store all replacements in the access tree rooted in TOP_RACC either to their + base aggregate if there are unscalarized data or directly to LHS + otherwise. */ static void -sra_sync_for_bitfield_assignment (gimple_seq *seq_before_p, - gimple_seq *seq_after_p, - tree blen, tree bpos, - struct sra_elt *elt) +handle_unscalarized_data_in_subtree (struct access *top_racc, tree lhs, + gimple_stmt_iterator *gsi) { - struct sra_elt *fld; - struct bitfield_overlap_info flp; - - FOR_EACH_ACTUAL_CHILD (fld, elt) - if (bitfield_overlaps_p (blen, bpos, fld, &flp)) - { - if (fld->replacement || (!flp.overlap_len && !flp.overlap_pos)) - { - generate_copy_inout (fld, false, generate_element_ref (fld), - seq_before_p); - mark_no_warning (fld); - if (seq_after_p) - generate_copy_inout (fld, true, generate_element_ref (fld), - seq_after_p); - } - else - { - tree flen = flp.overlap_len ? flp.overlap_len : flp.field_len; - tree fpos = flp.overlap_pos ? flp.overlap_pos : bitsize_int (0); - - sra_sync_for_bitfield_assignment (seq_before_p, seq_after_p, - flen, fpos, fld); - } - } + if (top_racc->grp_unscalarized_data) + generate_subtree_copies (top_racc->first_child, top_racc->base, 0, 0, 0, + gsi, false, false); + else + generate_subtree_copies (top_racc->first_child, lhs, top_racc->offset, + 0, 0, gsi, false, false); } -/* Scalarize a USE. To recap, this is either a simple reference to ELT, - if elt is scalar, or some occurrence of ELT that requires a complete - aggregate. IS_OUTPUT is true if ELT is being modified. */ + +/* Try to generate statements to load all sub-replacements in an access + (sub)tree (LACC is the first child) from scalar replacements in the TOP_RACC + (sub)tree. If that is not possible, refresh the TOP_RACC base aggregate and + load the accesses from it. LEFT_OFFSET is the offset of the left whole + subtree being copied, RIGHT_OFFSET is the same thing for the right subtree. + GSI is stmt iterator used for statement insertions. *REFRESHED is true iff + the rhs top aggregate has already been refreshed by contents of its scalar + reductions and is set to true if this function has to do it. */ static void -scalarize_use (struct sra_elt *elt, tree *expr_p, gimple_stmt_iterator *gsi, - bool is_output, bool use_all) +load_assign_lhs_subreplacements (struct access *lacc, struct access *top_racc, + HOST_WIDE_INT left_offset, + HOST_WIDE_INT right_offset, + gimple_stmt_iterator *old_gsi, + gimple_stmt_iterator *new_gsi, + bool *refreshed, tree lhs) { - gimple stmt = gsi_stmt (*gsi); - tree bfexpr; - - if (elt->replacement) - { - tree replacement = elt->replacement; - - /* If we have a replacement, then updating the reference is as - simple as modifying the existing statement in place. */ - if (is_output - && TREE_CODE (elt->replacement) == BIT_FIELD_REF - && is_gimple_reg (TREE_OPERAND (elt->replacement, 0)) - && is_gimple_assign (stmt) - && gimple_assign_lhs_ptr (stmt) == expr_p) - { - gimple_seq newseq; - /* RHS must be a single operand. */ - gcc_assert (gimple_assign_single_p (stmt)); - newseq = sra_build_elt_assignment (elt, gimple_assign_rhs1 (stmt)); - sra_replace (gsi, newseq); - return; - } - else if (!is_output - && TREE_CODE (elt->replacement) == BIT_FIELD_REF - && is_gimple_assign (stmt) - && gimple_assign_rhs1_ptr (stmt) == expr_p) - { - tree tmp = make_rename_temp - (TREE_TYPE (gimple_assign_lhs (stmt)), "SR"); - gimple_seq newseq = sra_build_assignment (tmp, REPLDUP (elt->replacement)); - - sra_insert_before (gsi, newseq); - replacement = tmp; - } - if (is_output) - update_stmt_if_modified (stmt); - *expr_p = REPLDUP (replacement); - update_stmt (stmt); - } - else if (use_all && is_output - && is_gimple_assign (stmt) - && TREE_CODE (bfexpr - = gimple_assign_lhs (stmt)) == BIT_FIELD_REF - && &TREE_OPERAND (bfexpr, 0) == expr_p - && INTEGRAL_TYPE_P (TREE_TYPE (bfexpr)) - && TREE_CODE (TREE_TYPE (*expr_p)) == RECORD_TYPE) + do { - gimple_seq seq_before = NULL; - gimple_seq seq_after = NULL; - tree blen = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 1)); - tree bpos = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 2)); - bool update = false; - - if (!elt->use_block_copy) + if (lacc->grp_to_be_replaced) { - tree type = TREE_TYPE (bfexpr); - tree var = make_rename_temp (type, "SR"), tmp, vpos; - gimple st; + struct access *racc; + HOST_WIDE_INT offset = lacc->offset - left_offset + right_offset; + gimple stmt; + tree rhs; - gimple_assign_set_lhs (stmt, var); - update = true; - - if (!TYPE_UNSIGNED (type)) + racc = find_access_in_subtree (top_racc, offset, lacc->size); + if (racc && racc->grp_to_be_replaced) { - type = unsigned_type_for (type); - tmp = make_rename_temp (type, "SR"); - st = gimple_build_assign (tmp, fold_convert (type, var)); - gimple_seq_add_stmt (&seq_after, st); - var = tmp; + rhs = get_access_replacement (racc); + if (!useless_type_conversion_p (lacc->type, racc->type)) + rhs = fold_build1 (VIEW_CONVERT_EXPR, lacc->type, rhs); } - - /* If VAR is wider than BLEN bits, it is padded at the - most-significant end. We want to set VPOS such that - <BIT_FIELD_REF VAR BLEN VPOS> would refer to the - least-significant BLEN bits of VAR. */ - if (BYTES_BIG_ENDIAN) - vpos = size_binop (MINUS_EXPR, TYPE_SIZE (type), blen); else - vpos = bitsize_int (0); - sra_explode_bitfield_assignment - (var, vpos, false, &seq_after, blen, bpos, elt); - } - else - sra_sync_for_bitfield_assignment - (&seq_before, &seq_after, blen, bpos, elt); - - if (seq_before) - { - mark_all_v_defs_seq (seq_before); - sra_insert_before (gsi, seq_before); - } - if (seq_after) - { - mark_all_v_defs_seq (seq_after); - sra_insert_after (gsi, seq_after); - } - - if (update) - update_stmt (stmt); - } - else if (use_all && !is_output - && is_gimple_assign (stmt) - && TREE_CODE (bfexpr - = gimple_assign_rhs1 (stmt)) == BIT_FIELD_REF - && &TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == expr_p - && INTEGRAL_TYPE_P (TREE_TYPE (bfexpr)) - && TREE_CODE (TREE_TYPE (*expr_p)) == RECORD_TYPE) - { - gimple_seq seq = NULL; - tree blen = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 1)); - tree bpos = fold_convert (bitsizetype, TREE_OPERAND (bfexpr, 2)); - bool update = false; - - if (!elt->use_block_copy) - { - tree type = TREE_TYPE (bfexpr); - tree var = make_rename_temp (type, "SR"), tmp, vpos; - gimple st = NULL; - - gimple_assign_set_rhs1 (stmt, var); - update = true; - - if (!TYPE_UNSIGNED (type)) { - type = unsigned_type_for (type); - tmp = make_rename_temp (type, "SR"); - st = gimple_build_assign (var, - fold_convert (TREE_TYPE (var), tmp)); - var = tmp; - } + bool repl_found; - gimple_seq_add_stmt (&seq, - gimple_build_assign - (var, build_int_cst_wide (type, 0, 0))); + /* No suitable access on the right hand side, need to load from + the aggregate. See if we have to update it first... */ + if (!*refreshed) + { + gcc_assert (top_racc->first_child); + handle_unscalarized_data_in_subtree (top_racc, lhs, old_gsi); + *refreshed = true; + } - /* If VAR is wider than BLEN bits, it is padded at the - most-significant end. We want to set VPOS such that - <BIT_FIELD_REF VAR BLEN VPOS> would refer to the - least-significant BLEN bits of VAR. */ - if (BYTES_BIG_ENDIAN) - vpos = size_binop (MINUS_EXPR, TYPE_SIZE (type), blen); - else - vpos = bitsize_int (0); - sra_explode_bitfield_assignment - (var, vpos, true, &seq, blen, bpos, elt); + rhs = unshare_expr (top_racc->base); + repl_found = build_ref_for_offset (&rhs, + TREE_TYPE (top_racc->base), + lacc->offset - left_offset, + lacc->type, false); + gcc_assert (repl_found); + } - if (st) - gimple_seq_add_stmt (&seq, st); + stmt = gimple_build_assign (get_access_replacement (lacc), rhs); + gsi_insert_after (new_gsi, stmt, GSI_NEW_STMT); + update_stmt (stmt); } - else - sra_sync_for_bitfield_assignment - (&seq, NULL, blen, bpos, elt); - - if (seq) + else if (lacc->grp_read && !lacc->grp_covered && !*refreshed) { - mark_all_v_defs_seq (seq); - sra_insert_before (gsi, seq); + handle_unscalarized_data_in_subtree (top_racc, lhs, old_gsi); + *refreshed = true; } - if (update) - update_stmt (stmt); - } - else - { - gimple_seq seq = NULL; - - /* Otherwise we need some copies. If ELT is being read, then we - want to store all (modified) sub-elements back into the - structure before the reference takes place. If ELT is being - written, then we want to load the changed values back into - our shadow variables. */ - /* ??? We don't check modified for reads, we just always write all of - the values. We should be able to record the SSA number of the VOP - for which the values were last read. If that number matches the - SSA number of the VOP in the current statement, then we needn't - emit an assignment. This would also eliminate double writes when - a structure is passed as more than one argument to a function call. - This optimization would be most effective if sra_walk_function - processed the blocks in dominator order. */ - - generate_copy_inout (elt, is_output, generate_element_ref (elt), &seq); - if (seq == NULL) - return; - mark_all_v_defs_seq (seq); - if (is_output) - sra_insert_after (gsi, seq); - else - { - sra_insert_before (gsi, seq); - if (use_all) - mark_no_warning (elt); - } + if (lacc->first_child) + load_assign_lhs_subreplacements (lacc->first_child, top_racc, + left_offset, right_offset, + old_gsi, new_gsi, refreshed, lhs); + lacc = lacc->next_sibling; } + while (lacc); } -/* Scalarize a COPY. To recap, this is an assignment statement between - two scalarizable references, LHS_ELT and RHS_ELT. */ +/* Modify assignments with a CONSTRUCTOR on their RHS. STMT contains a pointer + to the assignment and GSI is the statement iterator pointing at it. Returns + the same values as sra_modify_assign. */ -static void -scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, - gimple_stmt_iterator *gsi) +static enum scan_assign_result +sra_modify_constructor_assign (gimple *stmt, gimple_stmt_iterator *gsi) { - gimple_seq seq; - gimple stmt; - - if (lhs_elt->replacement && rhs_elt->replacement) - { - /* If we have two scalar operands, modify the existing statement. */ - stmt = gsi_stmt (*gsi); + tree lhs = gimple_assign_lhs (*stmt); + struct access *acc; - /* See the commentary in sra_walk_function concerning - RETURN_EXPR, and why we should never see one here. */ - gcc_assert (is_gimple_assign (stmt)); - gcc_assert (gimple_assign_copy_p (stmt)); + acc = get_access_for_expr (lhs); + if (!acc) + return SRA_SA_NONE; - - gimple_assign_set_lhs (stmt, lhs_elt->replacement); - gimple_assign_set_rhs1 (stmt, REPLDUP (rhs_elt->replacement)); - update_stmt (stmt); - } - else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy) + if (VEC_length (constructor_elt, + CONSTRUCTOR_ELTS (gimple_assign_rhs1 (*stmt))) > 0) { - /* If either side requires a block copy, then sync the RHS back - to the original structure, leave the original assignment - statement (which will perform the block copy), then load the - LHS values out of its now-updated original structure. */ - /* ??? Could perform a modified pair-wise element copy. That - would at least allow those elements that are instantiated in - both structures to be optimized well. */ - - seq = NULL; - generate_copy_inout (rhs_elt, false, - generate_element_ref (rhs_elt), &seq); - if (seq) - { - mark_all_v_defs_seq (seq); - sra_insert_before (gsi, seq); - } + /* I have never seen this code path trigger but if it can happen the + following should handle it gracefully. */ + if (access_has_children_p (acc)) + generate_subtree_copies (acc->first_child, acc->base, 0, 0, 0, gsi, + true, true); + return SRA_SA_PROCESSED; + } - seq = NULL; - generate_copy_inout (lhs_elt, true, - generate_element_ref (lhs_elt), &seq); - if (seq) - { - mark_all_v_defs_seq (seq); - sra_insert_after (gsi, seq); - } + if (acc->grp_covered) + { + init_subtree_with_zero (acc, gsi, false); + unlink_stmt_vdef (*stmt); + gsi_remove (gsi, true); + return SRA_SA_REMOVED; } else { - /* Otherwise both sides must be fully instantiated. In which - case perform pair-wise element assignments and replace the - original block copy statement. */ - - stmt = gsi_stmt (*gsi); - update_stmt_if_modified (stmt); - - seq = NULL; - generate_element_copy (lhs_elt, rhs_elt, &seq); - gcc_assert (seq); - mark_all_v_defs_seq (seq); - sra_replace (gsi, seq); + init_subtree_with_zero (acc, gsi, true); + return SRA_SA_PROCESSED; } } -/* Scalarize an INIT. To recap, this is an assignment to a scalarizable - reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or - COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty - CONSTRUCTOR. */ -static void -scalarize_init (struct sra_elt *lhs_elt, tree rhs, gimple_stmt_iterator *gsi) +/* Callback of scan_function to process assign statements. It examines both + sides of the statement, replaces them with a scalare replacement if there is + one and generating copying of replacements if scalarized aggregates have been + used in the assignment. STMT is a pointer to the assign statement, GSI is + used to hold generated statements for type conversions and subtree + copying. */ + +static enum scan_assign_result +sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi, + void *data ATTRIBUTE_UNUSED) { - bool result = true; - gimple_seq seq = NULL, init_seq = NULL; + struct access *lacc, *racc; + tree lhs, rhs; + bool modify_this_stmt = false; + bool force_gimple_rhs = false; - /* Generate initialization statements for all members extant in the RHS. */ - if (rhs) + if (!gimple_assign_single_p (*stmt)) + return SRA_SA_NONE; + lhs = gimple_assign_lhs (*stmt); + rhs = gimple_assign_rhs1 (*stmt); + + if (TREE_CODE (rhs) == CONSTRUCTOR) + return sra_modify_constructor_assign (stmt, gsi); + + if (TREE_CODE (rhs) == REALPART_EXPR || TREE_CODE (lhs) == REALPART_EXPR + || TREE_CODE (rhs) == IMAGPART_EXPR || TREE_CODE (lhs) == IMAGPART_EXPR + || TREE_CODE (rhs) == BIT_FIELD_REF || TREE_CODE (lhs) == BIT_FIELD_REF) { - /* Unshare the expression just in case this is from a decl's initial. */ - rhs = unshare_expr (rhs); - result = generate_element_init (lhs_elt, rhs, &init_seq); + modify_this_stmt = sra_modify_expr (gimple_assign_rhs1_ptr (*stmt), + gsi, false, data); + modify_this_stmt |= sra_modify_expr (gimple_assign_lhs_ptr (*stmt), + gsi, true, data); + return modify_this_stmt ? SRA_SA_PROCESSED : SRA_SA_NONE; } - if (!result) + lacc = get_access_for_expr (lhs); + racc = get_access_for_expr (rhs); + if (!lacc && !racc) + return SRA_SA_NONE; + + if (lacc && lacc->grp_to_be_replaced) { - /* If we failed to convert the entire initializer, then we must - leave the structure assignment in place and must load values - from the structure into the slots for which we did not find - constants. The easiest way to do this is to generate a complete - copy-out, and then follow that with the constant assignments - that we were able to build. DCE will clean things up. */ - gimple_seq seq0 = NULL; - generate_copy_inout (lhs_elt, true, generate_element_ref (lhs_elt), - &seq0); - gimple_seq_add_seq (&seq0, seq); - seq = seq0; + lhs = get_access_replacement (lacc); + gimple_assign_set_lhs (*stmt, lhs); + modify_this_stmt = true; + if (lacc->grp_partial_lhs) + force_gimple_rhs = true; } - else + + if (racc && racc->grp_to_be_replaced) { - /* CONSTRUCTOR is defined such that any member not mentioned is assigned - a zero value. Initialize the rest of the instantiated elements. */ - generate_element_zero (lhs_elt, &seq); - gimple_seq_add_seq (&seq, init_seq); + rhs = get_access_replacement (racc); + modify_this_stmt = true; + if (racc->grp_partial_lhs) + force_gimple_rhs = true; } - if (lhs_elt->use_block_copy || !result) + if (modify_this_stmt) { - /* Since LHS is not fully instantiated, we must leave the structure - assignment in place. Treating this case differently from a USE - exposes constants to later optimizations. */ - if (seq) + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) { - mark_all_v_defs_seq (seq); - sra_insert_after (gsi, seq); + /* If we can avoid creating a VIEW_CONVERT_EXPR do so. + ??? This should move to fold_stmt which we simply should + call after building a VIEW_CONVERT_EXPR here. */ + if (AGGREGATE_TYPE_P (TREE_TYPE (lhs)) + && !access_has_children_p (lacc)) + { + tree expr = unshare_expr (lhs); + if (build_ref_for_offset (&expr, TREE_TYPE (lhs), racc->offset, + TREE_TYPE (rhs), false)) + { + lhs = expr; + gimple_assign_set_lhs (*stmt, expr); + } + } + else if (AGGREGATE_TYPE_P (TREE_TYPE (rhs)) + && !access_has_children_p (racc)) + { + tree expr = unshare_expr (rhs); + if (build_ref_for_offset (&expr, TREE_TYPE (rhs), lacc->offset, + TREE_TYPE (lhs), false)) + rhs = expr; + } + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) + rhs = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs); } - } - else - { - /* The LHS is fully instantiated. The list of initializations - replaces the original structure assignment. */ - gcc_assert (seq); - update_stmt_if_modified (gsi_stmt (*gsi)); - mark_all_v_defs_seq (seq); - sra_replace (gsi, seq); - } -} -/* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP - on all INDIRECT_REFs. */ - -static tree -mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) -{ - tree t = *tp; - - if (TREE_CODE (t) == INDIRECT_REF) - { - TREE_THIS_NOTRAP (t) = 1; - *walk_subtrees = 0; + if (force_gimple_rhs) + rhs = force_gimple_operand_gsi (gsi, rhs, true, NULL_TREE, + true, GSI_SAME_STMT); + if (gimple_assign_rhs1 (*stmt) != rhs) + { + gimple_assign_set_rhs_from_tree (gsi, rhs); + gcc_assert (*stmt == gsi_stmt (*gsi)); + } } - else if (IS_TYPE_OR_DECL_P (t)) - *walk_subtrees = 0; - - return NULL; -} - -/* Scalarize a LDST. To recap, this is an assignment between one scalarizable - reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true - if ELT is on the left-hand side. */ - -static void -scalarize_ldst (struct sra_elt *elt, tree other, - gimple_stmt_iterator *gsi, bool is_output) -{ - /* Shouldn't have gotten called for a scalar. */ - gcc_assert (!elt->replacement); - if (elt->use_block_copy) - { - /* Since ELT is not fully instantiated, we have to leave the - block copy in place. Treat this as a USE. */ - scalarize_use (elt, NULL, gsi, is_output, false); + /* From this point on, the function deals with assignments in between + aggregates when at least one has scalar reductions of some of its + components. There are three possible scenarios: Both the LHS and RHS have + to-be-scalarized components, 2) only the RHS has or 3) only the LHS has. + + In the first case, we would like to load the LHS components from RHS + components whenever possible. If that is not possible, we would like to + read it directly from the RHS (after updating it by storing in it its own + components). If there are some necessary unscalarized data in the LHS, + those will be loaded by the original assignment too. If neither of these + cases happen, the original statement can be removed. Most of this is done + by load_assign_lhs_subreplacements. + + In the second case, we would like to store all RHS scalarized components + directly into LHS and if they cover the aggregate completely, remove the + statement too. In the third case, we want the LHS components to be loaded + directly from the RHS (DSE will remove the original statement if it + becomes redundant). + + This is a bit complex but manageable when types match and when unions do + not cause confusion in a way that we cannot really load a component of LHS + from the RHS or vice versa (the access representing this level can have + subaccesses that are accessible only through a different union field at a + higher level - different from the one used in the examined expression). + Unions are fun. + + Therefore, I specially handle a fourth case, happening when there is a + specific type cast or it is impossible to locate a scalarized subaccess on + the other side of the expression. If that happens, I simply "refresh" the + RHS by storing in it is scalarized components leave the original statement + there to do the copying and then load the scalar replacements of the LHS. + This is what the first branch does. */ + + if (contains_view_convert_expr_p (rhs) || contains_view_convert_expr_p (lhs) + || (access_has_children_p (racc) + && !ref_expr_for_all_replacements_p (racc, lhs, racc->offset)) + || (access_has_children_p (lacc) + && !ref_expr_for_all_replacements_p (lacc, rhs, lacc->offset))) + { + if (access_has_children_p (racc)) + generate_subtree_copies (racc->first_child, racc->base, 0, 0, 0, + gsi, false, false); + if (access_has_children_p (lacc)) + generate_subtree_copies (lacc->first_child, lacc->base, 0, 0, 0, + gsi, true, true); } else { - /* The interesting case is when ELT is fully instantiated. In this - case we can have each element stored/loaded directly to/from the - corresponding slot in OTHER. This avoids a block copy. */ - - gimple_seq seq = NULL; - gimple stmt = gsi_stmt (*gsi); - - update_stmt_if_modified (stmt); - generate_copy_inout (elt, is_output, other, &seq); - gcc_assert (seq); - mark_all_v_defs_seq (seq); - - /* Preserve EH semantics. */ - if (stmt_ends_bb_p (stmt)) + if (access_has_children_p (lacc) && access_has_children_p (racc)) { - gimple_stmt_iterator si; - gimple first; - gimple_seq blist = NULL; - bool thr = stmt_could_throw_p (stmt); - - /* If the last statement of this BB created an EH edge - before scalarization, we have to locate the first - statement that can throw in the new statement list and - use that as the last statement of this BB, such that EH - semantics is preserved. All statements up to this one - are added to the same BB. All other statements in the - list will be added to normal outgoing edges of the same - BB. If they access any memory, it's the same memory, so - we can assume they won't throw. */ - si = gsi_start (seq); - for (first = gsi_stmt (si); - thr && !gsi_end_p (si) && !stmt_could_throw_p (first); - first = gsi_stmt (si)) + gimple_stmt_iterator orig_gsi = *gsi; + bool refreshed; + + if (lacc->grp_read && !lacc->grp_covered) { - gsi_remove (&si, false); - gimple_seq_add_stmt (&blist, first); + handle_unscalarized_data_in_subtree (racc, lhs, gsi); + refreshed = true; } + else + refreshed = false; - /* Extract the first remaining statement from LIST, this is - the EH statement if there is one. */ - gsi_remove (&si, false); - - if (blist) - sra_insert_before (gsi, blist); - - /* Replace the old statement with this new representative. */ - gsi_replace (gsi, first, true); + load_assign_lhs_subreplacements (lacc->first_child, racc, + lacc->offset, racc->offset, + &orig_gsi, gsi, &refreshed, lhs); + if (!refreshed || !racc->grp_unscalarized_data) + { + if (*stmt == gsi_stmt (*gsi)) + gsi_next (gsi); - if (!gsi_end_p (si)) + unlink_stmt_vdef (*stmt); + gsi_remove (&orig_gsi, true); + return SRA_SA_REMOVED; + } + } + else + { + if (access_has_children_p (racc)) { - /* If any reference would trap, then they all would. And more - to the point, the first would. Therefore none of the rest - will trap since the first didn't. Indicate this by - iterating over the remaining statements and set - TREE_THIS_NOTRAP in all INDIRECT_REFs. */ - do + if (!racc->grp_unscalarized_data) { - walk_gimple_stmt (&si, NULL, mark_notrap, NULL); - gsi_next (&si); + generate_subtree_copies (racc->first_child, lhs, + racc->offset, 0, 0, gsi, + false, false); + gcc_assert (*stmt == gsi_stmt (*gsi)); + unlink_stmt_vdef (*stmt); + gsi_remove (gsi, true); + return SRA_SA_REMOVED; } - while (!gsi_end_p (si)); - - insert_edge_copies_seq (seq, gsi_bb (*gsi)); + else + generate_subtree_copies (racc->first_child, lhs, + racc->offset, 0, 0, gsi, false, true); } + else if (access_has_children_p (lacc)) + generate_subtree_copies (lacc->first_child, rhs, lacc->offset, + 0, 0, gsi, true, true); } - else - sra_replace (gsi, seq); } + return modify_this_stmt ? SRA_SA_PROCESSED : SRA_SA_NONE; } -/* Generate initializations for all scalarizable parameters. */ +/* Generate statements initializing scalar replacements of parts of function + parameters. */ static void -scalarize_parms (void) +initialize_parameter_reductions (void) { + gimple_stmt_iterator gsi; gimple_seq seq = NULL; - unsigned i; - bitmap_iterator bi; - - EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, bi) - { - tree var = referenced_var (i); - struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); - generate_copy_inout (elt, true, var, &seq); - } + tree parm; - if (seq) + for (parm = DECL_ARGUMENTS (current_function_decl); + parm; + parm = TREE_CHAIN (parm)) { - insert_edge_copies_seq (seq, ENTRY_BLOCK_PTR); - mark_all_v_defs_seq (seq); - } -} - -/* Entry point to phase 4. Update the function to match replacements. */ - -static void -scalarize_function (void) -{ - static const struct sra_walk_fns fns = { - scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false - }; + VEC (access_p, heap) *access_vec; + struct access *access; - sra_walk_function (&fns); - scalarize_parms (); - gsi_commit_edge_inserts (); -} - - -/* Debug helper function. Print ELT in a nice human-readable format. */ + if (!bitmap_bit_p (candidate_bitmap, DECL_UID (parm))) + continue; + access_vec = get_base_access_vector (parm); + if (!access_vec) + continue; -static void -dump_sra_elt_name (FILE *f, struct sra_elt *elt) -{ - if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE) - { - fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f); - dump_sra_elt_name (f, elt->parent); - } - else - { - if (elt->parent) - dump_sra_elt_name (f, elt->parent); - if (DECL_P (elt->element)) + if (!seq) { - if (TREE_CODE (elt->element) == FIELD_DECL) - fputc ('.', f); - print_generic_expr (f, elt->element, dump_flags); + seq = gimple_seq_alloc (); + gsi = gsi_start (seq); } - else if (TREE_CODE (elt->element) == BIT_FIELD_REF) - fprintf (f, "$B" HOST_WIDE_INT_PRINT_DEC "F" HOST_WIDE_INT_PRINT_DEC, - tree_low_cst (TREE_OPERAND (elt->element, 2), 1), - tree_low_cst (TREE_OPERAND (elt->element, 1), 1)); - else if (TREE_CODE (elt->element) == RANGE_EXPR) - fprintf (f, "["HOST_WIDE_INT_PRINT_DEC".."HOST_WIDE_INT_PRINT_DEC"]", - TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 0)), - TREE_INT_CST_LOW (TREE_OPERAND (elt->element, 1))); - else - fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", - TREE_INT_CST_LOW (elt->element)); - } -} -/* Likewise, but callable from the debugger. */ + for (access = VEC_index (access_p, access_vec, 0); + access; + access = access->next_grp) + generate_subtree_copies (access, parm, 0, 0, 0, &gsi, true, true); + } -void -debug_sra_elt_name (struct sra_elt *elt) -{ - dump_sra_elt_name (stderr, elt); - fputc ('\n', stderr); + if (seq) + gsi_insert_seq_on_edge_immediate (single_succ_edge (ENTRY_BLOCK_PTR), seq); } -static void -sra_init_cache (void) +/* The "main" function of intraprocedural SRA passes. Runs the analysis and if + it reveals there are components of some aggregates to be scalarized, it runs + the required transformations. */ +static unsigned int +perform_intra_sra (void) { - if (sra_type_decomp_cache) - return; + int ret = 0; + sra_initialize (); - sra_type_decomp_cache = BITMAP_ALLOC (NULL); - sra_type_inst_cache = BITMAP_ALLOC (NULL); -} + if (!find_var_candidates ()) + goto out; + if (!scan_function (build_access_from_expr, build_accesses_from_assign, NULL, + true, NULL)) + goto out; -/* Main entry point. */ + if (!analyze_all_variable_accesses ()) + goto out; -static unsigned int -tree_sra (void) -{ - /* Initialize local variables. */ - gcc_obstack_init (&sra_obstack); - sra_candidates = BITMAP_ALLOC (NULL); - needs_copy_in = BITMAP_ALLOC (NULL); - sra_init_cache (); - sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL); - - /* Scan. If we find anything, instantiate and scalarize. */ - if (find_candidates_for_sra ()) - { - scan_function (); - decide_instantiations (); - scalarize_function (); - } + scan_function (sra_modify_expr, sra_modify_assign, NULL, + false, NULL); + initialize_parameter_reductions (); + ret = TODO_update_ssa; - /* Free allocated memory. */ - htab_delete (sra_map); - sra_map = NULL; - BITMAP_FREE (sra_candidates); - BITMAP_FREE (needs_copy_in); - BITMAP_FREE (sra_type_decomp_cache); - BITMAP_FREE (sra_type_inst_cache); - obstack_free (&sra_obstack, NULL); - return 0; + out: + sra_deinitialize (); + return ret; } +/* Perform early intraprocedural SRA. */ static unsigned int -tree_sra_early (void) +early_intra_sra (void) { - unsigned int ret; - - early_sra = true; - ret = tree_sra (); - early_sra = false; + sra_mode = SRA_MODE_EARLY_INTRA; + return perform_intra_sra (); +} - return ret; +/* Perform "late" intraprocedural SRA. */ +static unsigned int +late_intra_sra (void) +{ + sra_mode = SRA_MODE_INTRA; + return perform_intra_sra (); } + static bool -gate_sra (void) +gate_intra_sra (void) { return flag_tree_sra != 0; } + struct gimple_opt_pass pass_sra_early = { { GIMPLE_PASS, - "esra", /* name */ - gate_sra, /* gate */ - tree_sra_early, /* execute */ + "esra", /* name */ + gate_intra_sra, /* gate */ + early_intra_sra, /* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ TV_TREE_SRA, /* tv_id */ - PROP_cfg | PROP_ssa, /* properties_required */ + PROP_cfg | PROP_ssa, /* properties_required */ 0, /* properties_provided */ - 0, /* properties_destroyed */ + 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_dump_func | TODO_update_ssa @@ -3683,20 +2317,21 @@ struct gimple_opt_pass pass_sra_early = } }; + struct gimple_opt_pass pass_sra = { { GIMPLE_PASS, - "sra", /* name */ - gate_sra, /* gate */ - tree_sra, /* execute */ + "sra", /* name */ + gate_intra_sra, /* gate */ + late_intra_sra, /* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ TV_TREE_SRA, /* tv_id */ - PROP_cfg | PROP_ssa, /* properties_required */ + PROP_cfg | PROP_ssa, /* properties_required */ 0, /* properties_provided */ - 0, /* properties_destroyed */ + 0, /* properties_destroyed */ TODO_update_address_taken, /* todo_flags_start */ TODO_dump_func | TODO_update_ssa |