diff options
author | bonzini <bonzini@138bc75d-0d04-0410-961f-82ee72b054a4> | 2006-11-04 08:36:45 +0000 |
---|---|---|
committer | bonzini <bonzini@138bc75d-0d04-0410-961f-82ee72b054a4> | 2006-11-04 08:36:45 +0000 |
commit | 42a3a38baf5b4db458e3034643c30598706be2fd (patch) | |
tree | ba024c11cf4d0fba9de80471ac1eedc16e891dca | |
parent | b8dcd8a1e4168f06399318ef24d561455003f62d (diff) | |
download | gcc-42a3a38baf5b4db458e3034643c30598706be2fd.tar.gz |
2006-11-03 Paolo Bonzini <bonzini@gnu.org>
Steven Bosscher <stevenb.gcc@gmail.com>
* fwprop.c: New file.
* Makefile.in: Add fwprop.o.
* tree-pass.h (pass_rtl_fwprop, pass_rtl_fwprop_with_addr): New.
* passes.c (init_optimization_passes): Schedule forward propagation.
* rtlanal.c (loc_mentioned_in_p): Support NULL value of the second
parameter.
* timevar.def (TV_FWPROP): New.
* common.opt (-fforward-propagate): New.
* opts.c (decode_options): Enable forward propagation at -O2.
* gcse.c (one_cprop_pass): Do not run local cprop unless touching jumps.
* cse.c (fold_rtx_subreg, fold_rtx_mem, fold_rtx_mem_1, find_best_addr,
canon_for_address, table_size): Remove.
(new_basic_block, insert, remove_from_table): Remove references to
table_size.
(fold_rtx): Process SUBREGs and MEMs with equiv_constant, make
simplification loop more straightforward by not calling fold_rtx
recursively.
(equiv_constant): Move here a small part of fold_rtx_subreg,
do not call fold_rtx. Call avoid_constant_pool_reference
to process MEMs.
* recog.c (canonicalize_change_group): New.
* recog.h (canonicalize_change_group): New.
* doc/invoke.texi (Optimization Options): Document fwprop.
* doc/passes.texi (RTL passes): Document fwprop.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@118475 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 30 | ||||
-rw-r--r-- | gcc/Makefile.in | 5 | ||||
-rw-r--r-- | gcc/common.opt | 4 | ||||
-rw-r--r-- | gcc/cse.c | 969 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 12 | ||||
-rw-r--r-- | gcc/doc/passes.texi | 9 | ||||
-rw-r--r-- | gcc/fwprop.c | 1034 | ||||
-rw-r--r-- | gcc/gcse.c | 3 | ||||
-rw-r--r-- | gcc/opts.c | 1 | ||||
-rw-r--r-- | gcc/passes.c | 2 | ||||
-rw-r--r-- | gcc/recog.c | 22 | ||||
-rw-r--r-- | gcc/recog.h | 1 | ||||
-rw-r--r-- | gcc/rtlanal.c | 9 | ||||
-rw-r--r-- | gcc/timevar.def | 1 | ||||
-rw-r--r-- | gcc/tree-pass.h | 2 |
15 files changed, 1212 insertions, 892 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 67734b04b2c..029b1c9d66a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,32 @@ +2006-11-03 Paolo Bonzini <bonzini@gnu.org> + Steven Bosscher <stevenb.gcc@gmail.com> + + * fwprop.c: New file. + * Makefile.in: Add fwprop.o. + * tree-pass.h (pass_rtl_fwprop, pass_rtl_fwprop_with_addr): New. + * passes.c (init_optimization_passes): Schedule forward propagation. + * rtlanal.c (loc_mentioned_in_p): Support NULL value of the second + parameter. + * timevar.def (TV_FWPROP): New. + * common.opt (-fforward-propagate): New. + * opts.c (decode_options): Enable forward propagation at -O2. + * gcse.c (one_cprop_pass): Do not run local cprop unless touching jumps. + * cse.c (fold_rtx_subreg, fold_rtx_mem, fold_rtx_mem_1, find_best_addr, + canon_for_address, table_size): Remove. + (new_basic_block, insert, remove_from_table): Remove references to + table_size. + (fold_rtx): Process SUBREGs and MEMs with equiv_constant, make + simplification loop more straightforward by not calling fold_rtx + recursively. + (equiv_constant): Move here a small part of fold_rtx_subreg, + do not call fold_rtx. Call avoid_constant_pool_reference + to process MEMs. + * recog.c (canonicalize_change_group): New. + * recog.h (canonicalize_change_group): New. + + * doc/invoke.texi (Optimization Options): Document fwprop. + * doc/passes.texi (RTL passes): Document fwprop. + 2006-11-03 Geoffrey Keating <geoffk@apple.com> * c-decl.c (WANT_C99_INLINE_SEMANTICS): New, set to 1. @@ -23,7 +52,6 @@ 2006-11-03 Paul Brook <paul@codesourcery.com> - gcc/ * config/arm/arm.c (arm_file_start): New function. (TARGET_ASM_FILE_START): Define. (arm_default_cpu): New variable. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 55682b5e0fd..59be2fe09c6 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -997,7 +997,7 @@ OBJS-common = \ debug.o df-core.o df-problems.o df-scan.o dfp.o diagnostic.o dojump.o \ dominance.o loop-doloop.o \ dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \ - expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \ + expmed.o expr.o final.o flow.o fold-const.o function.o fwprop.o gcse.o \ genrtl.o ggc-common.o global.o graph.o gtype-desc.o \ haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \ insn-extract.o insn-opinit.o insn-output.o insn-peep.o insn-recog.o \ @@ -2336,6 +2336,9 @@ cse.o : cse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \ hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) $(EXPR_H) toplev.h \ output.h $(FUNCTION_H) $(BASIC_BLOCK_H) $(GGC_H) $(TM_P_H) $(TIMEVAR_H) \ except.h $(TARGET_H) $(PARAMS_H) rtlhooks-def.h tree-pass.h $(REAL_H) +fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ + toplev.h insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \ + output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) tree-pass.h web.o : web.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h toplev.h \ $(DF_H) $(OBSTACK_H) $(TIMEVAR_H) tree-pass.h diff --git a/gcc/common.opt b/gcc/common.opt index 204560f5fba..4aaa4d20f18 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -444,6 +444,10 @@ fforce-mem Common Report Var(flag_force_mem) Copy memory operands into registers before use +fforward-propagate +Common Report Var(flag_forward_propagate) +Perform a forward propagation pass on RTL + ; Nonzero means don't put addresses of constant functions in registers. ; Used for compiling the Unix kernel, where strange substitutions are ; done on the assembly output. diff --git a/gcc/cse.c b/gcc/cse.c index 0304d6f763f..198837774ff 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -528,10 +528,6 @@ struct table_elt static struct table_elt *table[HASH_SIZE]; -/* Number of elements in the hash table. */ - -static unsigned int table_size; - /* Chain of `struct table_elt's made so far for this function but currently removed from the table. */ @@ -604,7 +600,6 @@ static inline unsigned safe_hash (rtx, enum machine_mode); static unsigned hash_rtx_string (const char *); static rtx canon_reg (rtx, rtx); -static void find_best_addr (rtx, rtx *, enum machine_mode); static enum rtx_code find_comparison_args (enum rtx_code, rtx *, rtx *, enum machine_mode *, enum machine_mode *); @@ -735,57 +730,6 @@ approx_reg_cost (rtx x) return cost; } -/* Returns a canonical version of X for the address, from the point of view, - that all multiplications are represented as MULT instead of the multiply - by a power of 2 being represented as ASHIFT. */ - -static rtx -canon_for_address (rtx x) -{ - enum rtx_code code; - enum machine_mode mode; - rtx new = 0; - int i; - const char *fmt; - - if (!x) - return x; - - code = GET_CODE (x); - mode = GET_MODE (x); - - switch (code) - { - case ASHIFT: - if (GET_CODE (XEXP (x, 1)) == CONST_INT - && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (mode) - && INTVAL (XEXP (x, 1)) >= 0) - { - new = canon_for_address (XEXP (x, 0)); - new = gen_rtx_MULT (mode, new, - gen_int_mode ((HOST_WIDE_INT) 1 - << INTVAL (XEXP (x, 1)), - mode)); - } - break; - default: - break; - - } - if (new) - return new; - - /* Now recursively process each operand of this operation. */ - fmt = GET_RTX_FORMAT (code); - for (i = 0; i < GET_RTX_LENGTH (code); i++) - if (fmt[i] == 'e') - { - new = canon_for_address (XEXP (x, i)); - XEXP (x, i) = new; - } - return x; -} - /* Return a negative value if an rtx A, whose costs are given by COST_A and REGCOST_A, is more desirable than an rtx B. Return a positive value if A is less desirable, or 0 if the two are @@ -965,8 +909,6 @@ new_basic_block (void) } } - table_size = 0; - #ifdef HAVE_cc0 prev_insn = 0; prev_insn_cc0 = 0; @@ -1377,8 +1319,6 @@ remove_from_table (struct table_elt *elt, unsigned int hash) /* Now add it to the free element chain. */ elt->next_same_hash = free_element_chain; free_element_chain = elt; - - table_size--; } /* Look up X in the hash table and return its table element, @@ -1656,8 +1596,6 @@ insert (rtx x, struct table_elt *classp, unsigned int hash, enum machine_mode mo } } - table_size++; - return elt; } @@ -2824,231 +2762,6 @@ canon_reg (rtx x, rtx insn) return x; } -/* LOC is a location within INSN that is an operand address (the contents of - a MEM). Find the best equivalent address to use that is valid for this - insn. - - On most CISC machines, complicated address modes are costly, and rtx_cost - is a good approximation for that cost. However, most RISC machines have - only a few (usually only one) memory reference formats. If an address is - valid at all, it is often just as cheap as any other address. Hence, for - RISC machines, we use `address_cost' to compare the costs of various - addresses. For two addresses of equal cost, choose the one with the - highest `rtx_cost' value as that has the potential of eliminating the - most insns. For equal costs, we choose the first in the equivalence - class. Note that we ignore the fact that pseudo registers are cheaper than - hard registers here because we would also prefer the pseudo registers. */ - -static void -find_best_addr (rtx insn, rtx *loc, enum machine_mode mode) -{ - struct table_elt *elt; - rtx addr = *loc; - struct table_elt *p; - int found_better = 1; - int save_do_not_record = do_not_record; - int save_hash_arg_in_memory = hash_arg_in_memory; - int addr_volatile; - int regno; - unsigned hash; - - /* Do not try to replace constant addresses or addresses of local and - argument slots. These MEM expressions are made only once and inserted - in many instructions, as well as being used to control symbol table - output. It is not safe to clobber them. - - There are some uncommon cases where the address is already in a register - for some reason, but we cannot take advantage of that because we have - no easy way to unshare the MEM. In addition, looking up all stack - addresses is costly. */ - if ((GET_CODE (addr) == PLUS - && REG_P (XEXP (addr, 0)) - && GET_CODE (XEXP (addr, 1)) == CONST_INT - && (regno = REGNO (XEXP (addr, 0)), - regno == FRAME_POINTER_REGNUM || regno == HARD_FRAME_POINTER_REGNUM - || regno == ARG_POINTER_REGNUM)) - || (REG_P (addr) - && (regno = REGNO (addr), regno == FRAME_POINTER_REGNUM - || regno == HARD_FRAME_POINTER_REGNUM - || regno == ARG_POINTER_REGNUM)) - || CONSTANT_ADDRESS_P (addr)) - return; - - /* If this address is not simply a register, try to fold it. This will - sometimes simplify the expression. Many simplifications - will not be valid, but some, usually applying the associative rule, will - be valid and produce better code. */ - if (!REG_P (addr)) - { - rtx folded = canon_for_address (fold_rtx (addr, NULL_RTX)); - - if (folded != addr) - { - int addr_folded_cost = address_cost (folded, mode); - int addr_cost = address_cost (addr, mode); - - if ((addr_folded_cost < addr_cost - || (addr_folded_cost == addr_cost - /* ??? The rtx_cost comparison is left over from an older - version of this code. It is probably no longer helpful.*/ - && (rtx_cost (folded, MEM) > rtx_cost (addr, MEM) - || approx_reg_cost (folded) < approx_reg_cost (addr)))) - && validate_change (insn, loc, folded, 0)) - addr = folded; - } - } - - /* If this address is not in the hash table, we can't look for equivalences - of the whole address. Also, ignore if volatile. */ - - do_not_record = 0; - hash = HASH (addr, Pmode); - addr_volatile = do_not_record; - do_not_record = save_do_not_record; - hash_arg_in_memory = save_hash_arg_in_memory; - - if (addr_volatile) - return; - - elt = lookup (addr, hash, Pmode); - - if (elt) - { - /* We need to find the best (under the criteria documented above) entry - in the class that is valid. We use the `flag' field to indicate - choices that were invalid and iterate until we can't find a better - one that hasn't already been tried. */ - - for (p = elt->first_same_value; p; p = p->next_same_value) - p->flag = 0; - - while (found_better) - { - int best_addr_cost = address_cost (*loc, mode); - int best_rtx_cost = (elt->cost + 1) >> 1; - int exp_cost; - struct table_elt *best_elt = elt; - - found_better = 0; - for (p = elt->first_same_value; p; p = p->next_same_value) - if (! p->flag) - { - if ((REG_P (p->exp) - || exp_equiv_p (p->exp, p->exp, 1, false)) - && ((exp_cost = address_cost (p->exp, mode)) < best_addr_cost - || (exp_cost == best_addr_cost - && ((p->cost + 1) >> 1) > best_rtx_cost))) - { - found_better = 1; - best_addr_cost = exp_cost; - best_rtx_cost = (p->cost + 1) >> 1; - best_elt = p; - } - } - - if (found_better) - { - if (validate_change (insn, loc, - canon_reg (copy_rtx (best_elt->exp), - NULL_RTX), 0)) - return; - else - best_elt->flag = 1; - } - } - } - - /* If the address is a binary operation with the first operand a register - and the second a constant, do the same as above, but looking for - equivalences of the register. Then try to simplify before checking for - the best address to use. This catches a few cases: First is when we - have REG+const and the register is another REG+const. We can often merge - the constants and eliminate one insn and one register. It may also be - that a machine has a cheap REG+REG+const. Finally, this improves the - code on the Alpha for unaligned byte stores. */ - - if (flag_expensive_optimizations - && ARITHMETIC_P (*loc) - && REG_P (XEXP (*loc, 0))) - { - rtx op1 = XEXP (*loc, 1); - - do_not_record = 0; - hash = HASH (XEXP (*loc, 0), Pmode); - do_not_record = save_do_not_record; - hash_arg_in_memory = save_hash_arg_in_memory; - - elt = lookup (XEXP (*loc, 0), hash, Pmode); - if (elt == 0) - return; - - /* We need to find the best (under the criteria documented above) entry - in the class that is valid. We use the `flag' field to indicate - choices that were invalid and iterate until we can't find a better - one that hasn't already been tried. */ - - for (p = elt->first_same_value; p; p = p->next_same_value) - p->flag = 0; - - while (found_better) - { - int best_addr_cost = address_cost (*loc, mode); - int best_rtx_cost = (COST (*loc) + 1) >> 1; - struct table_elt *best_elt = elt; - rtx best_rtx = *loc; - int count; - - /* This is at worst case an O(n^2) algorithm, so limit our search - to the first 32 elements on the list. This avoids trouble - compiling code with very long basic blocks that can easily - call simplify_gen_binary so many times that we run out of - memory. */ - - found_better = 0; - for (p = elt->first_same_value, count = 0; - p && count < 32; - p = p->next_same_value, count++) - if (! p->flag - && (REG_P (p->exp) - || (GET_CODE (p->exp) != EXPR_LIST - && exp_equiv_p (p->exp, p->exp, 1, false)))) - - { - rtx new = simplify_gen_binary (GET_CODE (*loc), Pmode, - p->exp, op1); - int new_cost; - - /* Get the canonical version of the address so we can accept - more. */ - new = canon_for_address (new); - - new_cost = address_cost (new, mode); - - if (new_cost < best_addr_cost - || (new_cost == best_addr_cost - && (COST (new) + 1) >> 1 > best_rtx_cost)) - { - found_better = 1; - best_addr_cost = new_cost; - best_rtx_cost = (COST (new) + 1) >> 1; - best_elt = p; - best_rtx = new; - } - } - - if (found_better) - { - if (validate_change (insn, loc, - canon_reg (copy_rtx (best_rtx), - NULL_RTX), 0)) - return; - else - best_elt->flag = 1; - } - } - } -} - /* Given an operation (CODE, *PARG1, *PARG2), where code is a comparison operation (EQ, NE, GT, etc.), follow it back through the hash table and what values are being compared. @@ -3243,425 +2956,14 @@ find_comparison_args (enum rtx_code code, rtx *parg1, rtx *parg2, return code; } -/* Fold SUBREG. */ - -static rtx -fold_rtx_subreg (rtx x, rtx insn) -{ - enum machine_mode mode = GET_MODE (x); - rtx folded_arg0; - rtx const_arg0; - rtx new; - - /* See if we previously assigned a constant value to this SUBREG. */ - if ((new = lookup_as_function (x, CONST_INT)) != 0 - || (new = lookup_as_function (x, CONST_DOUBLE)) != 0) - return new; - - /* If this is a paradoxical SUBREG, we have no idea what value the - extra bits would have. However, if the operand is equivalent to - a SUBREG whose operand is the same as our mode, and all the modes - are within a word, we can just use the inner operand because - these SUBREGs just say how to treat the register. - - Similarly if we find an integer constant. */ - - if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))) - { - enum machine_mode imode = GET_MODE (SUBREG_REG (x)); - struct table_elt *elt; - - if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD - && GET_MODE_SIZE (imode) <= UNITS_PER_WORD - && (elt = lookup (SUBREG_REG (x), HASH (SUBREG_REG (x), imode), - imode)) != 0) - for (elt = elt->first_same_value; elt; elt = elt->next_same_value) - { - if (CONSTANT_P (elt->exp) - && GET_MODE (elt->exp) == VOIDmode) - return elt->exp; - - if (GET_CODE (elt->exp) == SUBREG - && GET_MODE (SUBREG_REG (elt->exp)) == mode - && exp_equiv_p (elt->exp, elt->exp, 1, false)) - return copy_rtx (SUBREG_REG (elt->exp)); - } - - return x; - } - - /* Fold SUBREG_REG. If it changed, see if we can simplify the - SUBREG. We might be able to if the SUBREG is extracting a single - word in an integral mode or extracting the low part. */ - - folded_arg0 = fold_rtx (SUBREG_REG (x), insn); - const_arg0 = equiv_constant (folded_arg0); - if (const_arg0) - folded_arg0 = const_arg0; - - if (folded_arg0 != SUBREG_REG (x)) - { - new = simplify_subreg (mode, folded_arg0, - GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); - if (new) - return new; - } - - if (REG_P (folded_arg0) - && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (folded_arg0))) - { - struct table_elt *elt; - - elt = lookup (folded_arg0, - HASH (folded_arg0, GET_MODE (folded_arg0)), - GET_MODE (folded_arg0)); - - if (elt) - elt = elt->first_same_value; - - if (subreg_lowpart_p (x)) - /* If this is a narrowing SUBREG and our operand is a REG, see - if we can find an equivalence for REG that is an arithmetic - operation in a wider mode where both operands are - paradoxical SUBREGs from objects of our result mode. In - that case, we couldn-t report an equivalent value for that - operation, since we don't know what the extra bits will be. - But we can find an equivalence for this SUBREG by folding - that operation in the narrow mode. This allows us to fold - arithmetic in narrow modes when the machine only supports - word-sized arithmetic. - - Also look for a case where we have a SUBREG whose operand - is the same as our result. If both modes are smaller than - a word, we are simply interpreting a register in different - modes and we can use the inner value. */ - - for (; elt; elt = elt->next_same_value) - { - enum rtx_code eltcode = GET_CODE (elt->exp); - - /* Just check for unary and binary operations. */ - if (UNARY_P (elt->exp) - && eltcode != SIGN_EXTEND - && eltcode != ZERO_EXTEND - && GET_CODE (XEXP (elt->exp, 0)) == SUBREG - && GET_MODE (SUBREG_REG (XEXP (elt->exp, 0))) == mode - && (GET_MODE_CLASS (mode) - == GET_MODE_CLASS (GET_MODE (XEXP (elt->exp, 0))))) - { - rtx op0 = SUBREG_REG (XEXP (elt->exp, 0)); - - if (!REG_P (op0) && ! CONSTANT_P (op0)) - op0 = fold_rtx (op0, NULL_RTX); - - op0 = equiv_constant (op0); - if (op0) - new = simplify_unary_operation (GET_CODE (elt->exp), mode, - op0, mode); - } - else if (ARITHMETIC_P (elt->exp) - && eltcode != DIV && eltcode != MOD - && eltcode != UDIV && eltcode != UMOD - && eltcode != ASHIFTRT && eltcode != LSHIFTRT - && eltcode != ROTATE && eltcode != ROTATERT - && ((GET_CODE (XEXP (elt->exp, 0)) == SUBREG - && (GET_MODE (SUBREG_REG (XEXP (elt->exp, 0))) - == mode)) - || CONSTANT_P (XEXP (elt->exp, 0))) - && ((GET_CODE (XEXP (elt->exp, 1)) == SUBREG - && (GET_MODE (SUBREG_REG (XEXP (elt->exp, 1))) - == mode)) - || CONSTANT_P (XEXP (elt->exp, 1)))) - { - rtx op0 = gen_lowpart_common (mode, XEXP (elt->exp, 0)); - rtx op1 = gen_lowpart_common (mode, XEXP (elt->exp, 1)); - - if (op0 && !REG_P (op0) && ! CONSTANT_P (op0)) - op0 = fold_rtx (op0, NULL_RTX); - - if (op0) - op0 = equiv_constant (op0); - - if (op1 && !REG_P (op1) && ! CONSTANT_P (op1)) - op1 = fold_rtx (op1, NULL_RTX); - - if (op1) - op1 = equiv_constant (op1); - - /* If we are looking for the low SImode part of - (ashift:DI c (const_int 32)), it doesn't work to - compute that in SImode, because a 32-bit shift in - SImode is unpredictable. We know the value is - 0. */ - if (op0 && op1 - && GET_CODE (elt->exp) == ASHIFT - && GET_CODE (op1) == CONST_INT - && INTVAL (op1) >= GET_MODE_BITSIZE (mode)) - { - if (INTVAL (op1) - < GET_MODE_BITSIZE (GET_MODE (elt->exp))) - /* If the count fits in the inner mode's width, - but exceeds the outer mode's width, the value - will get truncated to 0 by the subreg. */ - new = CONST0_RTX (mode); - else - /* If the count exceeds even the inner mode's width, - don't fold this expression. */ - new = 0; - } - else if (op0 && op1) - new = simplify_binary_operation (GET_CODE (elt->exp), - mode, op0, op1); - } - - else if (GET_CODE (elt->exp) == SUBREG - && GET_MODE (SUBREG_REG (elt->exp)) == mode - && (GET_MODE_SIZE (GET_MODE (folded_arg0)) - <= UNITS_PER_WORD) - && exp_equiv_p (elt->exp, elt->exp, 1, false)) - new = copy_rtx (SUBREG_REG (elt->exp)); - - if (new) - return new; - } - else - /* A SUBREG resulting from a zero extension may fold to zero - if it extracts higher bits than the ZERO_EXTEND's source - bits. FIXME: if combine tried to, er, combine these - instructions, this transformation may be moved to - simplify_subreg. */ - for (; elt; elt = elt->next_same_value) - { - if (GET_CODE (elt->exp) == ZERO_EXTEND - && subreg_lsb (x) - >= GET_MODE_BITSIZE (GET_MODE (XEXP (elt->exp, 0)))) - return CONST0_RTX (mode); - } - } - - return x; -} - -/* Fold MEM. Not to be called directly, see fold_rtx_mem instead. */ - -static rtx -fold_rtx_mem_1 (rtx x, rtx insn) -{ - enum machine_mode mode = GET_MODE (x); - rtx new; - - /* If we are not actually processing an insn, don't try to find the - best address. Not only don't we care, but we could modify the - MEM in an invalid way since we have no insn to validate - against. */ - if (insn != 0) - find_best_addr (insn, &XEXP (x, 0), mode); - - { - /* Even if we don't fold in the insn itself, we can safely do so - here, in hopes of getting a constant. */ - rtx addr = fold_rtx (XEXP (x, 0), NULL_RTX); - rtx base = 0; - HOST_WIDE_INT offset = 0; - - if (REG_P (addr) - && REGNO_QTY_VALID_P (REGNO (addr))) - { - int addr_q = REG_QTY (REGNO (addr)); - struct qty_table_elem *addr_ent = &qty_table[addr_q]; - - if (GET_MODE (addr) == addr_ent->mode - && addr_ent->const_rtx != NULL_RTX) - addr = addr_ent->const_rtx; - } - - /* Call target hook to avoid the effects of -fpic etc.... */ - addr = targetm.delegitimize_address (addr); - - /* If address is constant, split it into a base and integer - offset. */ - if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF) - base = addr; - else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS - && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) - { - base = XEXP (XEXP (addr, 0), 0); - offset = INTVAL (XEXP (XEXP (addr, 0), 1)); - } - else if (GET_CODE (addr) == LO_SUM - && GET_CODE (XEXP (addr, 1)) == SYMBOL_REF) - base = XEXP (addr, 1); - - /* If this is a constant pool reference, we can fold it into its - constant to allow better value tracking. */ - if (base && GET_CODE (base) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (base)) - { - rtx constant = get_pool_constant (base); - enum machine_mode const_mode = get_pool_mode (base); - rtx new; - - if (CONSTANT_P (constant) && GET_CODE (constant) != CONST_INT) - { - constant_pool_entries_cost = COST (constant); - constant_pool_entries_regcost = approx_reg_cost (constant); - } - - /* If we are loading the full constant, we have an - equivalence. */ - if (offset == 0 && mode == const_mode) - return constant; - - /* If this actually isn't a constant (weird!), we can't do - anything. Otherwise, handle the two most common cases: - extracting a word from a multi-word constant, and - extracting the low-order bits. Other cases don't seem - common enough to worry about. */ - if (! CONSTANT_P (constant)) - return x; - - if (GET_MODE_CLASS (mode) == MODE_INT - && GET_MODE_SIZE (mode) == UNITS_PER_WORD - && offset % UNITS_PER_WORD == 0 - && (new = operand_subword (constant, - offset / UNITS_PER_WORD, - 0, const_mode)) != 0) - return new; - - if (((BYTES_BIG_ENDIAN - && offset == GET_MODE_SIZE (GET_MODE (constant)) - 1) - || (! BYTES_BIG_ENDIAN && offset == 0)) - && (new = gen_lowpart (mode, constant)) != 0) - return new; - } - - /* If this is a reference to a label at a known position in a jump - table, we also know its value. */ - if (base && GET_CODE (base) == LABEL_REF) - { - rtx label = XEXP (base, 0); - rtx table_insn = NEXT_INSN (label); - - if (table_insn && JUMP_P (table_insn) - && GET_CODE (PATTERN (table_insn)) == ADDR_VEC) - { - rtx table = PATTERN (table_insn); - - if (offset >= 0 - && (offset / GET_MODE_SIZE (GET_MODE (table)) - < XVECLEN (table, 0))) - { - rtx label = XVECEXP - (table, 0, offset / GET_MODE_SIZE (GET_MODE (table))); - rtx set; - - /* If we have an insn that loads the label from the - jumptable into a reg, we don't want to set the reg - to the label, because this may cause a reference to - the label to remain after the label is removed in - some very obscure cases (PR middle-end/18628). */ - if (!insn) - return label; - - set = single_set (insn); +/* If X is a nontrivial arithmetic operation on an argument for which + a constant value can be determined, return the result of operating + on that value, as a constant. Otherwise, return X, possibly with + one or more operands changed to a forward-propagated constant. - if (! set || SET_SRC (set) != x) - return x; - - /* If it's a jump, it's safe to reference the label. */ - if (SET_DEST (set) == pc_rtx) - return label; - - return x; - } - } - if (table_insn && JUMP_P (table_insn) - && GET_CODE (PATTERN (table_insn)) == ADDR_DIFF_VEC) - { - rtx table = PATTERN (table_insn); - - if (offset >= 0 - && (offset / GET_MODE_SIZE (GET_MODE (table)) - < XVECLEN (table, 1))) - { - offset /= GET_MODE_SIZE (GET_MODE (table)); - new = gen_rtx_MINUS (Pmode, XVECEXP (table, 1, offset), - XEXP (table, 0)); - - if (GET_MODE (table) != Pmode) - new = gen_rtx_TRUNCATE (GET_MODE (table), new); - - /* Indicate this is a constant. This isn't a valid - form of CONST, but it will only be used to fold the - next insns and then discarded, so it should be - safe. - - Note this expression must be explicitly discarded, - by cse_insn, else it may end up in a REG_EQUAL note - and "escape" to cause problems elsewhere. */ - return gen_rtx_CONST (GET_MODE (new), new); - } - } - } - - return x; - } -} - -/* Fold MEM. */ - -static rtx -fold_rtx_mem (rtx x, rtx insn) -{ - /* To avoid infinite oscillations between fold_rtx and fold_rtx_mem, - refuse to allow recursion of the latter past n levels. This can - happen because fold_rtx_mem will try to fold the address of the - memory reference it is passed, i.e. conceptually throwing away - the MEM and reinjecting the bare address into fold_rtx. As a - result, patterns like - - set (reg1) - (plus (reg) - (mem (plus (reg2) (const_int)))) - - set (reg2) - (plus (reg) - (mem (plus (reg1) (const_int)))) - - will defeat any "first-order" short-circuit put in either - function to prevent these infinite oscillations. - - The heuristics for determining n is as follows: since each time - it is invoked fold_rtx_mem throws away a MEM, and since MEMs - are generically not nested, we assume that each invocation of - fold_rtx_mem corresponds to a new "top-level" operand, i.e. - the source or the destination of a SET. So fold_rtx_mem is - bound to stop or cycle before n recursions, n being the number - of expressions recorded in the hash table. We also leave some - play to account for the initial steps. */ - - static unsigned int depth; - rtx ret; - - if (depth > 3 + table_size) - return x; - - depth++; - ret = fold_rtx_mem_1 (x, insn); - depth--; - - return ret; -} - -/* If X is a nontrivial arithmetic operation on an argument - for which a constant value can be determined, return - the result of operating on that value, as a constant. - Otherwise, return X, possibly with one or more operands - modified by recursive calls to this function. - - If X is a register whose contents are known, we do NOT - return those contents here. equiv_constant is called to - perform that task. + If X is a register whose contents are known, we do NOT return + those contents here; equiv_constant is called to perform that task. + For SUBREGs and MEMs, we do that both here and in equiv_constant. INSN is the insn that we may be modifying. If it is 0, make a copy of X before modifying it. */ @@ -3674,10 +2976,9 @@ fold_rtx (rtx x, rtx insn) const char *fmt; int i; rtx new = 0; - int copied = 0; - int must_swap = 0; + int changed = 0; - /* Folded equivalents of first two operands of X. */ + /* Operands of X. */ rtx folded_arg0; rtx folded_arg1; @@ -3694,10 +2995,16 @@ fold_rtx (rtx x, rtx insn) if (x == 0) return x; - mode = GET_MODE (x); + /* Try to perform some initial simplifications on X. */ code = GET_CODE (x); switch (code) { + case MEM: + case SUBREG: + if ((new = equiv_constant (x)) != NULL_RTX) + return new; + return x; + case CONST: case CONST_INT: case CONST_DOUBLE: @@ -3717,28 +3024,6 @@ fold_rtx (rtx x, rtx insn) return prev_insn_cc0; #endif - case SUBREG: - return fold_rtx_subreg (x, insn); - - case NOT: - case NEG: - /* If we have (NOT Y), see if Y is known to be (NOT Z). - If so, (NOT Y) simplifies to Z. Similarly for NEG. */ - new = lookup_as_function (XEXP (x, 0), code); - if (new) - return fold_rtx (copy_rtx (XEXP (new, 0)), insn); - break; - - case MEM: - return fold_rtx_mem (x, insn); - -#ifdef NO_FUNCTION_CSE - case CALL: - if (CONSTANT_P (XEXP (XEXP (x, 0), 0))) - return x; - break; -#endif - case ASM_OPERANDS: if (insn) { @@ -3746,12 +3031,21 @@ fold_rtx (rtx x, rtx insn) validate_change (insn, &ASM_OPERANDS_INPUT (x, i), fold_rtx (ASM_OPERANDS_INPUT (x, i), insn), 0); } + return x; + +#ifdef NO_FUNCTION_CSE + case CALL: + if (CONSTANT_P (XEXP (XEXP (x, 0), 0))) + return x; break; +#endif + /* Anything else goes through the loop below. */ default: break; } + mode = GET_MODE (x); const_arg0 = 0; const_arg1 = 0; const_arg2 = 0; @@ -3764,55 +3058,13 @@ fold_rtx (rtx x, rtx insn) for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) if (fmt[i] == 'e') { - rtx arg = XEXP (x, i); - rtx folded_arg = arg, const_arg = 0; - enum machine_mode mode_arg = GET_MODE (arg); - rtx cheap_arg, expensive_arg; - rtx replacements[2]; - int j; - int old_cost = COST_IN (XEXP (x, i), code); - - /* Most arguments are cheap, so handle them specially. */ - switch (GET_CODE (arg)) - { - case REG: - /* This is the same as calling equiv_constant; it is duplicated - here for speed. */ - if (REGNO_QTY_VALID_P (REGNO (arg))) - { - int arg_q = REG_QTY (REGNO (arg)); - struct qty_table_elem *arg_ent = &qty_table[arg_q]; - - if (arg_ent->const_rtx != NULL_RTX - && !REG_P (arg_ent->const_rtx) - && GET_CODE (arg_ent->const_rtx) != PLUS) - const_arg - = gen_lowpart (GET_MODE (arg), - arg_ent->const_rtx); - } - break; - - case CONST: - case CONST_INT: - case SYMBOL_REF: - case LABEL_REF: - case CONST_DOUBLE: - case CONST_VECTOR: - const_arg = arg; - break; - + rtx folded_arg = XEXP (x, i), const_arg; + enum machine_mode mode_arg = GET_MODE (folded_arg); #ifdef HAVE_cc0 - case CC0: - folded_arg = prev_insn_cc0; - mode_arg = prev_insn_cc0_mode; - const_arg = equiv_constant (folded_arg); - break; + if (CC0_P (folded_arg)) + folded_arg = prev_insn_cc0, mode_arg = prev_insn_cc0_mode; #endif - - default: - folded_arg = fold_rtx (arg, insn); - const_arg = equiv_constant (folded_arg); - } + const_arg = equiv_constant (folded_arg); /* For the first three operands, see if the operand is constant or equivalent to a constant. */ @@ -3832,120 +3084,50 @@ fold_rtx (rtx x, rtx insn) break; } - /* Pick the least expensive of the folded argument and an - equivalent constant argument. */ - if (const_arg == 0 || const_arg == folded_arg - || COST_IN (const_arg, code) > COST_IN (folded_arg, code)) - cheap_arg = folded_arg, expensive_arg = const_arg; - else - cheap_arg = const_arg, expensive_arg = folded_arg; - - /* Try to replace the operand with the cheapest of the two - possibilities. If it doesn't work and this is either of the first - two operands of a commutative operation, try swapping them. - If THAT fails, try the more expensive, provided it is cheaper - than what is already there. */ - - if (cheap_arg == XEXP (x, i)) - continue; - - if (insn == 0 && ! copied) - { - x = copy_rtx (x); - copied = 1; - } - - /* Order the replacements from cheapest to most expensive. */ - replacements[0] = cheap_arg; - replacements[1] = expensive_arg; - - for (j = 0; j < 2 && replacements[j]; j++) - { - int new_cost = COST_IN (replacements[j], code); - - /* Stop if what existed before was cheaper. Prefer constants - in the case of a tie. */ - if (new_cost > old_cost - || (new_cost == old_cost && CONSTANT_P (XEXP (x, i)))) - break; + /* Pick the least expensive of the argument and an equivalent constant + argument. */ + if (const_arg != 0 + && const_arg != folded_arg + && COST_IN (const_arg, code) <= COST_IN (folded_arg, code) /* It's not safe to substitute the operand of a conversion operator with a constant, as the conversion's identity depends upon the mode of its operand. This optimization is handled by the call to simplify_unary_operation. */ - if (GET_RTX_CLASS (code) == RTX_UNARY - && GET_MODE (replacements[j]) != mode_arg0 - && (code == ZERO_EXTEND - || code == SIGN_EXTEND - || code == TRUNCATE - || code == FLOAT_TRUNCATE - || code == FLOAT_EXTEND - || code == FLOAT - || code == FIX - || code == UNSIGNED_FLOAT - || code == UNSIGNED_FIX)) - continue; - - if (validate_change (insn, &XEXP (x, i), replacements[j], 0)) - break; - - if (GET_RTX_CLASS (code) == RTX_COMM_COMPARE - || GET_RTX_CLASS (code) == RTX_COMM_ARITH) - { - validate_change (insn, &XEXP (x, i), XEXP (x, 1 - i), 1); - validate_change (insn, &XEXP (x, 1 - i), replacements[j], 1); - - if (apply_change_group ()) - { - /* Swap them back to be invalid so that this loop can - continue and flag them to be swapped back later. */ - rtx tem; - - tem = XEXP (x, 0); XEXP (x, 0) = XEXP (x, 1); - XEXP (x, 1) = tem; - must_swap = 1; - break; - } - } - } - } + && (GET_RTX_CLASS (code) != RTX_UNARY + || GET_MODE (const_arg) == mode_arg0 + || (code != ZERO_EXTEND + && code != SIGN_EXTEND + && code != TRUNCATE + && code != FLOAT_TRUNCATE + && code != FLOAT_EXTEND + && code != FLOAT + && code != FIX + && code != UNSIGNED_FLOAT + && code != UNSIGNED_FIX))) + folded_arg = const_arg; + + if (folded_arg == XEXP (x, i)) + continue; - else - { - if (fmt[i] == 'E') - /* Don't try to fold inside of a vector of expressions. - Doing nothing is harmless. */ - {;} + if (insn == NULL_RTX && !changed) + x = copy_rtx (x); + changed = 1; + validate_change (insn, &XEXP (x, i), folded_arg, 1); } - /* If a commutative operation, place a constant integer as the second - operand unless the first operand is also a constant integer. Otherwise, - place any constant second unless the first operand is also a constant. */ - - if (COMMUTATIVE_P (x)) + if (changed) { - if (must_swap - || swap_commutative_operands_p (const_arg0 ? const_arg0 - : XEXP (x, 0), - const_arg1 ? const_arg1 - : XEXP (x, 1))) + /* Canonicalize X if necessary, and keep const_argN and folded_argN + consistent with the order in X. */ + if (canonicalize_change_group (insn, x)) { - rtx tem = XEXP (x, 0); - - if (insn == 0 && ! copied) - { - x = copy_rtx (x); - copied = 1; - } - - validate_change (insn, &XEXP (x, 0), XEXP (x, 1), 1); - validate_change (insn, &XEXP (x, 1), tem, 1); - if (apply_change_group ()) - { - tem = const_arg0, const_arg0 = const_arg1, const_arg1 = tem; - tem = folded_arg0, folded_arg0 = folded_arg1, folded_arg1 = tem; - } + rtx tem; + tem = const_arg0, const_arg0 = const_arg1, const_arg1 = tem; + tem = folded_arg0, folded_arg0 = folded_arg1, folded_arg1 = tem; } + + apply_change_group (); } /* If X is an arithmetic operation, see if we can simplify it. */ @@ -4477,16 +3659,31 @@ equiv_constant (rtx x) if (x == 0 || CONSTANT_P (x)) return x; - /* If X is a MEM, try to fold it outside the context of any insn to see if - it might be equivalent to a constant. That handles the case where it - is a constant-pool reference. Then try to look it up in the hash table - in case it is something whose value we have seen before. */ + if (GET_CODE (x) == SUBREG) + { + rtx new; + + /* See if we previously assigned a constant value to this SUBREG. */ + if ((new = lookup_as_function (x, CONST_INT)) != 0 + || (new = lookup_as_function (x, CONST_DOUBLE)) != 0) + return new; + + if (REG_P (SUBREG_REG (x)) + && (new = equiv_constant (SUBREG_REG (x))) != 0) + return simplify_subreg (GET_MODE (x), SUBREG_REG (x), + GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); + + return 0; + } + + /* If X is a MEM, see if it is a constant-pool reference, or look it up in + the hash table in case its value was seen before. */ if (MEM_P (x)) { struct table_elt *elt; - x = fold_rtx (x, NULL_RTX); + x = avoid_constant_pool_reference (x); if (CONSTANT_P (x)) return x; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 5bbbfc53664..2e0de418bc4 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -310,7 +310,7 @@ Objective-C and Objective-C++ Dialects}. -fcse-skip-blocks -fcx-limited-range -fdata-sections @gol -fdelayed-branch -fdelete-null-pointer-checks -fearly-inlining @gol -fexpensive-optimizations -ffast-math -ffloat-store @gol --fforce-addr -ffunction-sections @gol +-fforce-addr -fforward-propagate -ffunction-sections @gol -fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol -fcrossjumping -fif-conversion -fif-conversion2 @gol -finline-functions -finline-functions-called-once @gol @@ -4621,6 +4621,16 @@ register-load. This option is now a nop and will be removed in 4.2. Force memory address constants to be copied into registers before doing arithmetic on them. +@item -fforward-propagate +@opindex fforward-propagate +Perform a forward propagation pass on RTL. The pass tries to combine two +instructions and checks if the result can be simplified. If loop unrolling +is active, two passes are performed and the second is scheduled after +loop unrolling. + +This option is enabled by default at optimization levels @option{-O2}, +@option{-O3}, @option{-Os}. + @item -fomit-frame-pointer @opindex fomit-frame-pointer Don't keep the frame pointer in a register for functions that diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index 9da919152b7..fc6aa2696a4 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -685,6 +685,15 @@ optimization pass''. The bulk of the code for this pass is in @file{cfgcleanup.c}, and there are support routines in @file{cfgrtl.c} and @file{jump.c}. +@item Forward propagation of single-def values + +This pass attempts to remove redundant computation by substituting +variables that come from a single definition, and +seeing if the result can be simplified. It performs copy propagation +and addressing mode selection. The pass is run twice, with values +being propagated into loops only on the second run. It is located in +@file{fwprop.c}. + @item Common subexpression elimination This pass removes redundant computation within basic blocks, and diff --git a/gcc/fwprop.c b/gcc/fwprop.c new file mode 100644 index 00000000000..1e4f749eb12 --- /dev/null +++ b/gcc/fwprop.c @@ -0,0 +1,1034 @@ +/* RTL-based forward propagation pass for GNU compiler. + Copyright (C) 2005, 2006 Free Software Foundation, Inc. + Contributed by Paolo Bonzini and Steven Bosscher. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "toplev.h" + +#include "timevar.h" +#include "rtl.h" +#include "tm_p.h" +#include "emit-rtl.h" +#include "insn-config.h" +#include "recog.h" +#include "flags.h" +#include "obstack.h" +#include "basic-block.h" +#include "output.h" +#include "df.h" +#include "target.h" +#include "cfgloop.h" +#include "tree-pass.h" + + +/* This pass does simple forward propagation and simplification when an + operand of an insn can only come from a single def. This pass uses + df.c, so it is global. However, we only do limited analysis of + available expressions. + + 1) The pass tries to propagate the source of the def into the use, + and checks if the result is independent of the substituted value. + For example, the high word of a (zero_extend:DI (reg:SI M)) is always + zero, independent of the source register. + + In particular, we propagate constants into the use site. Sometimes + RTL expansion did not put the constant in the same insn on purpose, + to satisfy a predicate, and the result will fail to be recognized; + but this happens rarely and in this case we can still create a + REG_EQUAL note. For multi-word operations, this + + (set (subreg:SI (reg:DI 120) 0) (const_int 0)) + (set (subreg:SI (reg:DI 120) 4) (const_int -1)) + (set (subreg:SI (reg:DI 122) 0) + (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0))) + (set (subreg:SI (reg:DI 122) 4) + (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4))) + + can be simplified to the much simpler + + (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119))) + (set (subreg:SI (reg:DI 122) 4) (const_int -1)) + + This particular propagation is also effective at putting together + complex addressing modes. We are more aggressive inside MEMs, in + that all definitions are propagated if the use is in a MEM; if the + result is a valid memory address we check address_cost to decide + whether the substitution is worthwhile. + + 2) The pass propagates register copies. This is not as effective as + the copy propagation done by CSE's canon_reg, which works by walking + the instruction chain, it can help the other transformations. + + We should consider removing this optimization, and instead reorder the + RTL passes, because GCSE does this transformation too. With some luck, + the CSE pass at the end of rest_of_handle_gcse could also go away. + + 3) The pass looks for paradoxical subregs that are actually unnecessary. + Things like this: + + (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) + (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) + (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0) + (subreg:SI (reg:QI 121) 0))) + + are very common on machines that can only do word-sized operations. + For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0), + if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0), + we can replace the paradoxical subreg with simply (reg:WIDE M). The + above will simplify this to + + (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) + (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) + (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119))) + + where the first two insns are now dead. */ + + +static struct loops loops; +static struct df *df; +static int num_changes; + + +/* Do not try to replace constant addresses or addresses of local and + argument slots. These MEM expressions are made only once and inserted + in many instructions, as well as being used to control symbol table + output. It is not safe to clobber them. + + There are some uncommon cases where the address is already in a register + for some reason, but we cannot take advantage of that because we have + no easy way to unshare the MEM. In addition, looking up all stack + addresses is costly. */ + +static bool +can_simplify_addr (rtx addr) +{ + rtx reg; + + if (CONSTANT_ADDRESS_P (addr)) + return false; + + if (GET_CODE (addr) == PLUS) + reg = XEXP (addr, 0); + else + reg = addr; + + return (!REG_P (reg) + || (REGNO (reg) != FRAME_POINTER_REGNUM + && REGNO (reg) != HARD_FRAME_POINTER_REGNUM + && REGNO (reg) != ARG_POINTER_REGNUM)); +} + +/* Returns a canonical version of X for the address, from the point of view, + that all multiplications are represented as MULT instead of the multiply + by a power of 2 being represented as ASHIFT. + + Every ASHIFT we find has been made by simplify_gen_binary and was not + there before, so it is not shared. So we can do this in place. */ + +static void +canonicalize_address (rtx x) +{ + for (;;) + switch (GET_CODE (x)) + { + case ASHIFT: + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x)) + && INTVAL (XEXP (x, 1)) >= 0) + { + HOST_WIDE_INT shift = INTVAL (XEXP (x, 1)); + PUT_CODE (x, MULT); + XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift, + GET_MODE (x)); + } + + x = XEXP (x, 0); + break; + + case PLUS: + if (GET_CODE (XEXP (x, 0)) == PLUS + || GET_CODE (XEXP (x, 0)) == ASHIFT + || GET_CODE (XEXP (x, 0)) == CONST) + canonicalize_address (XEXP (x, 0)); + + x = XEXP (x, 1); + break; + + case CONST: + x = XEXP (x, 0); + break; + + default: + return; + } +} + +/* OLD is a memory address. Return whether it is good to use NEW instead, + for a memory access in the given MODE. */ + +static bool +should_replace_address (rtx old, rtx new, enum machine_mode mode) +{ + int gain; + + if (rtx_equal_p (old, new) || !memory_address_p (mode, new)) + return false; + + /* Copy propagation is always ok. */ + if (REG_P (old) && REG_P (new)) + return true; + + /* Prefer the new address if it is less expensive. */ + gain = address_cost (old, mode) - address_cost (new, mode); + + /* If the addresses have equivalent cost, prefer the new address + if it has the highest `rtx_cost'. That has the potential of + eliminating the most insns without additional costs, and it + is the same that cse.c used to do. */ + if (gain == 0) + gain = rtx_cost (new, SET) - rtx_cost (old, SET); + + return (gain > 0); +} + +/* Replace all occurrences of OLD in *PX with NEW and try to simplify the + resulting expression. Replace *PX with a new RTL expression if an + occurrence of OLD was found. + + If CAN_APPEAR is true, we always return true; if it is false, we + can return false if, for at least one occurrence OLD, we failed to + collapse the result to a constant. For example, (mult:M (reg:M A) + (minus:M (reg:M B) (reg:M A))) may collapse to zero if replacing + (reg:M B) with (reg:M A). + + CAN_APPEAR is disregarded inside MEMs: in that case, we always return + true if the simplification is a cheaper and valid memory address. + + This is only a wrapper around simplify-rtx.c: do not add any pattern + matching code here. (The sole exception is the handling of LO_SUM, but + that is because there is no simplify_gen_* function for LO_SUM). */ + +static bool +propagate_rtx_1 (rtx *px, rtx old, rtx new, bool can_appear) +{ + rtx x = *px, tem = NULL_RTX, op0, op1, op2; + enum rtx_code code = GET_CODE (x); + enum machine_mode mode = GET_MODE (x); + enum machine_mode op_mode; + bool valid_ops = true; + + /* If X is OLD_RTX, return NEW_RTX. Otherwise, if this is an expression, + try to build a new expression from recursive substitution. */ + + if (x == old) + { + *px = new; + return can_appear; + } + + switch (GET_RTX_CLASS (code)) + { + case RTX_UNARY: + op0 = XEXP (x, 0); + op_mode = GET_MODE (op0); + valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); + if (op0 == XEXP (x, 0)) + return true; + tem = simplify_gen_unary (code, mode, op0, op_mode); + break; + + case RTX_BIN_ARITH: + case RTX_COMM_ARITH: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); + valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear); + if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) + return true; + tem = simplify_gen_binary (code, mode, op0, op1); + break; + + case RTX_COMPARE: + case RTX_COMM_COMPARE: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); + valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); + valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear); + if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) + return true; + tem = simplify_gen_relational (code, mode, op_mode, op0, op1); + break; + + case RTX_TERNARY: + case RTX_BITFIELD_OPS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + op2 = XEXP (x, 2); + op_mode = GET_MODE (op0); + valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); + valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear); + valid_ops &= propagate_rtx_1 (&op2, old, new, can_appear); + if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2)) + return true; + if (op_mode == VOIDmode) + op_mode = GET_MODE (op0); + tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2); + break; + + case RTX_EXTRA: + /* The only case we try to handle is a SUBREG. */ + if (code == SUBREG) + { + op0 = XEXP (x, 0); + valid_ops &= propagate_rtx_1 (&op0, old, new, can_appear); + if (op0 == XEXP (x, 0)) + return true; + tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)), + SUBREG_BYTE (x)); + } + break; + + case RTX_OBJ: + if (code == MEM && x != new) + { + rtx new_op0; + op0 = XEXP (x, 0); + + /* There are some addresses that we cannot work on. */ + if (!can_simplify_addr (op0)) + return true; + + op0 = new_op0 = targetm.delegitimize_address (op0); + valid_ops &= propagate_rtx_1 (&new_op0, old, new, true); + + /* Dismiss transformation that we do not want to carry on. */ + if (!valid_ops + || new_op0 == op0 + || GET_MODE (new_op0) != GET_MODE (op0)) + return true; + + canonicalize_address (new_op0); + + /* Copy propagations are always ok. Otherwise check the costs. */ + if (!(REG_P (old) && REG_P (new)) + && !should_replace_address (op0, new_op0, GET_MODE (x))) + return true; + + tem = replace_equiv_address_nv (x, new_op0); + } + + else if (code == LO_SUM) + { + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + /* The only simplification we do attempts to remove references to op0 + or make it constant -- in both cases, op0's invalidity will not + make the result invalid. */ + propagate_rtx_1 (&op0, old, new, true); + valid_ops &= propagate_rtx_1 (&op1, old, new, can_appear); + if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) + return true; + + /* (lo_sum (high x) x) -> x */ + if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1)) + tem = op1; + else + tem = gen_rtx_LO_SUM (mode, op0, op1); + + /* OP1 is likely not a legitimate address, otherwise there would have + been no LO_SUM. We want it to disappear if it is invalid, return + false in that case. */ + return memory_address_p (mode, tem); + } + + else if (code == REG) + { + if (rtx_equal_p (x, old)) + { + *px = new; + return can_appear; + } + } + break; + + default: + break; + } + + /* No change, no trouble. */ + if (tem == NULL_RTX) + return true; + + *px = tem; + + /* The replacement we made so far is valid, if all of the recursive + replacements were valid, or we could simplify everything to + a constant. */ + return valid_ops || can_appear || CONSTANT_P (tem); +} + +/* Replace all occurrences of OLD in X with NEW and try to simplify the + resulting expression (in mode MODE). Return a new expresion if it is + a constant, otherwise X. + + Simplifications where occurrences of NEW collapse to a constant are always + accepted. All simplifications are accepted if NEW is a pseudo too. + Otherwise, we accept simplifications that have a lower or equal cost. */ + +static rtx +propagate_rtx (rtx x, enum machine_mode mode, rtx old, rtx new) +{ + rtx tem; + bool collapsed; + + if (REG_P (new) && REGNO (new) < FIRST_PSEUDO_REGISTER) + return NULL_RTX; + + new = copy_rtx (new); + + tem = x; + collapsed = propagate_rtx_1 (&tem, old, new, REG_P (new) || CONSTANT_P (new)); + if (tem == x || !collapsed) + return NULL_RTX; + + /* gen_lowpart_common will not be able to process VOIDmode entities other + than CONST_INTs. */ + if (GET_MODE (tem) == VOIDmode && GET_CODE (tem) != CONST_INT) + return NULL_RTX; + + if (GET_MODE (tem) == VOIDmode) + tem = rtl_hooks.gen_lowpart_no_emit (mode, tem); + else + gcc_assert (GET_MODE (tem) == mode); + + return tem; +} + + + + +/* Return true if the register from reference REF is killed + between FROM to (but not including) TO. */ + +static bool +local_ref_killed_between_p (struct df_ref * ref, rtx from, rtx to) +{ + rtx insn; + struct df_ref *def; + + for (insn = from; insn != to; insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn)) + continue; + + def = DF_INSN_DEFS (df, insn); + while (def) + { + if (DF_REF_REGNO (ref) == DF_REF_REGNO (def)) + return true; + def = def->next_ref; + } + } + return false; +} + + +/* Check if the given DEF is available in INSN. This would require full + computation of available expressions; we check only restricted conditions: + - if DEF is the sole definition of its register, go ahead; + - in the same basic block, we check for no definitions killing the + definition of DEF_INSN; + - if USE's basic block has DEF's basic block as the sole predecessor, + we check if the definition is killed after DEF_INSN or before + TARGET_INSN insn, in their respective basic blocks. */ +static bool +use_killed_between (struct df_ref *use, rtx def_insn, rtx target_insn) +{ + basic_block def_bb, target_bb; + int regno; + struct df_ref * def; + + /* Check if the reg in USE has only one definition. We already + know that this definition reaches use, or we wouldn't be here. */ + regno = DF_REF_REGNO (use); + def = DF_REG_DEF_GET (df, regno)->reg_chain; + if (def && (def->next_reg == NULL)) + return false; + + /* Check if we are in the same basic block. */ + def_bb = BLOCK_FOR_INSN (def_insn); + target_bb = BLOCK_FOR_INSN (target_insn); + if (def_bb == target_bb) + { + /* In some obscure situations we can have a def reaching a use + that is _before_ the def. In other words the def does not + dominate the use even though the use and def are in the same + basic block. This can happen when a register may be used + uninitialized in a loop. In such cases, we must assume that + DEF is not available. */ + if (DF_INSN_LUID (df, def_insn) >= DF_INSN_LUID (df, target_insn)) + return true; + + return local_ref_killed_between_p (use, def_insn, target_insn); + } + + /* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */ + if (single_pred_p (target_bb) + && single_pred (target_bb) == def_bb) + { + struct df_ref *x; + + /* See if USE is killed between DEF_INSN and the last insn in the + basic block containing DEF_INSN. */ + x = df_bb_regno_last_def_find (df, def_bb, regno); + if (x && DF_INSN_LUID (df, x->insn) >= DF_INSN_LUID (df, def_insn)) + return true; + + /* See if USE is killed between TARGET_INSN and the first insn in the + basic block containing TARGET_INSN. */ + x = df_bb_regno_first_def_find (df, target_bb, regno); + if (x && DF_INSN_LUID (df, x->insn) < DF_INSN_LUID (df, target_insn)) + return true; + + return false; + } + + /* Otherwise assume the worst case. */ + return true; +} + + +/* for_each_rtx traversal function that returns 1 if BODY points to + a non-constant mem. */ + +static int +varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *body; + return MEM_P (x) && !MEM_READONLY_P (x); +} + +/* Check if all uses in DEF_INSN can be used in TARGET_INSN. This + would require full computation of available expressions; + we check only restricted conditions, see use_killed_between. */ +static bool +all_uses_available_at (rtx def_insn, rtx target_insn) +{ + struct df_ref * use; + rtx def_set = single_set (def_insn); + + gcc_assert (def_set); + + /* If target_insn comes right after def_insn, which is very common + for addresses, we can use a quicker test. */ + if (NEXT_INSN (def_insn) == target_insn + && REG_P (SET_DEST (def_set))) + { + rtx def_reg = SET_DEST (def_set); + + /* If the insn uses the reg that it defines, the substitution is + invalid. */ + for (use = DF_INSN_USES (df, def_insn); use; use = use->next_ref) + if (rtx_equal_p (use->reg, def_reg)) + return false; + } + else + { + /* Look at all the uses of DEF_INSN, and see if they are not + killed between DEF_INSN and TARGET_INSN. */ + for (use = DF_INSN_USES (df, def_insn); use; use = use->next_ref) + if (use_killed_between (use, def_insn, target_insn)) + return false; + } + + /* We don't do any analysis of memories or aliasing. Reject any + instruction that involves references to non-constant memory. */ + return !for_each_rtx (&SET_SRC (def_set), varying_mem_p, NULL); +} + + +struct find_occurrence_data +{ + rtx find; + rtx *retval; +}; + +/* Callback for for_each_rtx, used in find_occurrence. + See if PX is the rtx we have to find. Return 1 to stop for_each_rtx + if successful, or 0 to continue traversing otherwise. */ + +static int +find_occurrence_callback (rtx *px, void *data) +{ + struct find_occurrence_data *fod = (struct find_occurrence_data *) data; + rtx x = *px; + rtx find = fod->find; + + if (x == find) + { + fod->retval = px; + return 1; + } + + return 0; +} + +/* Return a pointer to one of the occurrences of register FIND in *PX. */ + +static rtx * +find_occurrence (rtx *px, rtx find) +{ + struct find_occurrence_data data; + + gcc_assert (REG_P (find) + || (GET_CODE (find) == SUBREG + && REG_P (SUBREG_REG (find)))); + + data.find = find; + data.retval = NULL; + for_each_rtx (px, find_occurrence_callback, &data); + return data.retval; +} + + +/* Inside INSN, the expression rooted at *LOC has been changed, moving some + uses from ORIG_USES. Find those that are present, and create new items + in the data flow object of the pass. Mark any new uses as having the + given TYPE. */ +static void +update_df (rtx insn, rtx *loc, struct df_ref *orig_uses, enum df_ref_type type, + int new_flags) +{ + struct df_ref *use; + + /* Add a use for the registers that were propagated. */ + for (use = orig_uses; use; use = use->next_ref) + { + struct df_ref *orig_use = use, *new_use; + rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use)); + + if (!new_loc) + continue; + + /* Add a new insn use. Use the original type, because it says if the + use was within a MEM. */ + new_use = df_ref_create (df, DF_REF_REG (orig_use), new_loc, + insn, BLOCK_FOR_INSN (insn), + type, DF_REF_FLAGS (orig_use) | new_flags); + + /* Set up the use-def chain. */ + df_chain_copy (df->problems_by_index[DF_CHAIN], + new_use, DF_REF_CHAIN (orig_use)); + } +} + + +/* Try substituting NEW into LOC, which originated from forward propagation + of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are + substituting the whole SET_SRC, so we can set a REG_EQUAL note if the + new insn is not recognized. Return whether the substitution was + performed. */ + +static bool +try_fwprop_subst (struct df_ref *use, rtx *loc, rtx new, rtx def_insn, bool set_reg_equal) +{ + rtx insn = DF_REF_INSN (use); + enum df_ref_type type = DF_REF_TYPE (use); + int flags = DF_REF_FLAGS (use); + + if (dump_file) + { + fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn)); + print_inline_rtx (dump_file, *loc, 2); + fprintf (dump_file, "\n with "); + print_inline_rtx (dump_file, new, 2); + fprintf (dump_file, "\n"); + } + + if (validate_change (insn, loc, new, false)) + { + num_changes++; + if (dump_file) + fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn)); + + /* Unlink the use that we changed. */ + df_ref_remove (df, use); + if (!CONSTANT_P (new)) + update_df (insn, loc, DF_INSN_USES (df, def_insn), type, flags); + + return true; + } + else + { + if (dump_file) + fprintf (dump_file, "Changes to insn %d not recognized\n", + INSN_UID (insn)); + + /* Can also record a simplified value in a REG_EQUAL note, making a + new one if one does not already exist. */ + if (set_reg_equal) + { + if (dump_file) + fprintf (dump_file, " Setting REG_EQUAL note\n"); + + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, copy_rtx (new), + REG_NOTES (insn)); + + if (!CONSTANT_P (new)) + update_df (insn, loc, DF_INSN_USES (df, def_insn), + type, DF_REF_IN_NOTE); + } + + return false; + } +} + + +/* If USE is a paradoxical subreg, see if it can be replaced by a pseudo. */ + +static bool +forward_propagate_subreg (struct df_ref *use, rtx def_insn, rtx def_set) +{ + rtx use_reg = DF_REF_REG (use); + rtx use_insn, src; + + /* Only consider paradoxical subregs... */ + enum machine_mode use_mode = GET_MODE (use_reg); + if (GET_CODE (use_reg) != SUBREG + || !REG_P (SET_DEST (def_set)) + || GET_MODE_SIZE (use_mode) + <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg)))) + return false; + + /* If this is a paradoxical SUBREG, we have no idea what value the + extra bits would have. However, if the operand is equivalent to + a SUBREG whose operand is the same as our mode, and all the modes + are within a word, we can just use the inner operand because + these SUBREGs just say how to treat the register. */ + use_insn = DF_REF_INSN (use); + src = SET_SRC (def_set); + if (GET_CODE (src) == SUBREG + && REG_P (SUBREG_REG (src)) + && GET_MODE (SUBREG_REG (src)) == use_mode + && subreg_lowpart_p (src) + && all_uses_available_at (def_insn, use_insn)) + return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src), + def_insn, false); + else + return false; +} + +/* Try to replace USE with SRC (defined in DEF_INSN) and simplify the + result. */ + +static bool +forward_propagate_and_simplify (struct df_ref *use, rtx def_insn, rtx def_set) +{ + rtx use_insn = DF_REF_INSN (use); + rtx use_set = single_set (use_insn); + rtx src, reg, new, *loc; + bool set_reg_equal; + enum machine_mode mode; + + if (!use_set) + return false; + + /* Do not propagate into PC, CC0, etc. */ + if (GET_MODE (SET_DEST (use_set)) == VOIDmode) + return false; + + /* If def and use are subreg, check if they match. */ + reg = DF_REF_REG (use); + if (GET_CODE (reg) == SUBREG + && GET_CODE (SET_DEST (def_set)) == SUBREG + && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg) + || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))) + return false; + + /* Check if the def had a subreg, but the use has the whole reg. */ + if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG) + return false; + + /* Check if the use has a subreg, but the def had the whole reg. Unlike the + previous case, the optimization is possible and often useful indeed. */ + if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set))) + reg = SUBREG_REG (reg); + + /* Check if the substitution is valid (last, because it's the most + expensive check!). */ + src = SET_SRC (def_set); + if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn)) + return false; + + /* Check if the def is loading something from the constant pool; in this + case we would undo optimization such as compress_float_constant. + Still, we can set a REG_EQUAL note. */ + if (MEM_P (src) && MEM_READONLY_P (src)) + { + rtx x = avoid_constant_pool_reference (src); + if (x != src) + { + rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); + rtx old = note ? XEXP (note, 0) : SET_SRC (use_set); + rtx new = simplify_replace_rtx (old, src, x); + if (old != new) + set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new)); + } + return false; + } + + /* Else try simplifying. */ + + if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE) + { + loc = &SET_DEST (use_set); + set_reg_equal = false; + } + else + { + rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); + if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) + loc = &XEXP (note, 0); + else + loc = &SET_SRC (use_set); + + /* Do not replace an existing REG_EQUAL note if the insn is not + recognized. Either we're already replacing in the note, or + we'll separately try plugging the definition in the note and + simplifying. */ + set_reg_equal = (note == NULL_RTX); + } + + if (GET_MODE (*loc) == VOIDmode) + mode = GET_MODE (SET_DEST (use_set)); + else + mode = GET_MODE (*loc); + + new = propagate_rtx (*loc, mode, reg, src); + + if (!new) + return false; + + return try_fwprop_subst (use, loc, new, def_insn, set_reg_equal); +} + + +/* Given a use USE of an insn, if it has a single reaching + definition, try to forward propagate it into that insn. */ + +static void +forward_propagate_into (struct df_ref *use) +{ + struct df_link *defs; + struct df_ref *def; + rtx def_insn, def_set, use_insn; + rtx parent; + + if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) + return; + + /* Only consider uses that have a single definition. */ + defs = DF_REF_CHAIN (use); + if (!defs || defs->next) + return; + + def = defs->ref; + if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE) + return; + + /* Do not propagate loop invariant definitions inside the loop if + we are going to unroll. */ + if (loops.num > 0 + && DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father) + return; + + /* Check if the use is still present in the insn! */ + use_insn = DF_REF_INSN (use); + if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) + parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); + else + parent = PATTERN (use_insn); + + if (!loc_mentioned_in_p (DF_REF_LOC (use), parent)) + return; + + def_insn = DF_REF_INSN (def); + def_set = single_set (def_insn); + if (!def_set) + return; + + /* Only try one kind of propagation. If two are possible, we'll + do it on the following iterations. */ + if (!forward_propagate_and_simplify (use, def_insn, def_set)) + forward_propagate_subreg (use, def_insn, def_set); +} + + +static void +fwprop_init (void) +{ + num_changes = 0; + + /* We do not always want to propagate into loops, so we have to find + loops and be careful about them. But we have to call flow_loops_find + before df_analyze, because flow_loops_find may introduce new jump + insns (sadly) if we are not working in cfglayout mode. */ + if (flag_rerun_cse_after_loop && (flag_unroll_loops || flag_peel_loops)) + { + calculate_dominance_info (CDI_DOMINATORS); + flow_loops_find (&loops); + } + + /* Now set up the dataflow problem (we only want use-def chains) and + put the dataflow solver to work. */ + df = df_init (DF_SUBREGS | DF_EQUIV_NOTES); + df_chain_add_problem (df, DF_UD_CHAIN); + df_analyze (df); + df_dump (df, dump_file); +} + +static void +fwprop_done (void) +{ + df_finish (df); + + if (flag_rerun_cse_after_loop && (flag_unroll_loops || flag_peel_loops)) + { + flow_loops_free (&loops); + free_dominance_info (CDI_DOMINATORS); + loops.num = 0; + } + + cleanup_cfg (0); + delete_trivially_dead_insns (get_insns (), max_reg_num ()); + + if (dump_file) + fprintf (dump_file, + "\nNumber of successful forward propagations: %d\n\n", + num_changes); +} + + + +/* Main entry point. */ + +static bool +gate_fwprop (void) +{ + return optimize > 0 && flag_forward_propagate; +} + +static unsigned int +fwprop (void) +{ + unsigned i; + + fwprop_init (); + + /* Go through all the uses. update_df will create new ones at the + end, and we'll go through them as well. + + Do not forward propagate addresses into loops until after unrolling. + CSE did so because it was able to fix its own mess, but we are not. */ + + df_reorganize_refs (&df->use_info); + for (i = 0; i < DF_USES_SIZE (df); i++) + { + struct df_ref *use = DF_USES_GET (df, i); + if (use) + if (loops.num == 0 + || DF_REF_TYPE (use) == DF_REF_REG_USE + || DF_REF_BB (use)->loop_father == NULL) + forward_propagate_into (use); + } + + fwprop_done (); + + return 0; +} + +struct tree_opt_pass pass_rtl_fwprop = +{ + "fwprop1", /* name */ + gate_fwprop, /* gate */ + fwprop, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_FWPROP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; + +static bool +gate_fwprop_addr (void) +{ + return optimize > 0 && flag_forward_propagate && flag_rerun_cse_after_loop + && (flag_unroll_loops || flag_peel_loops); +} + +static unsigned int +fwprop_addr (void) +{ + unsigned i; + fwprop_init (); + + /* Go through all the uses. update_df will create new ones at the + end, and we'll go through them as well. */ + df_reorganize_refs (&df->use_info); + for (i = 0; i < DF_USES_SIZE (df); i++) + { + struct df_ref *use = DF_USES_GET (df, i); + if (use) + if (DF_REF_TYPE (use) != DF_REF_REG_USE + && DF_REF_BB (use)->loop_father != NULL) + forward_propagate_into (use); + } + + fwprop_done (); + + return 0; +} + +struct tree_opt_pass pass_rtl_fwprop_addr = +{ + "fwprop2", /* name */ + gate_fwprop_addr, /* gate */ + fwprop_addr, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_FWPROP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; diff --git a/gcc/gcse.c b/gcc/gcse.c index b28273d48ce..f1214952b24 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -3396,7 +3396,8 @@ one_cprop_pass (int pass, bool cprop_jumps, bool bypass_jumps) global_const_prop_count = local_const_prop_count = 0; global_copy_prop_count = local_copy_prop_count = 0; - local_cprop_pass (cprop_jumps); + if (cprop_jumps) + local_cprop_pass (cprop_jumps); /* Determine implicit sets. */ implicit_sets = XCNEWVEC (rtx, last_basic_block); diff --git a/gcc/opts.c b/gcc/opts.c index bbce07498b9..da16f7bf341 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -474,6 +474,7 @@ decode_options (unsigned int argc, const char **argv) flag_thread_jumps = 1; flag_crossjumping = 1; flag_optimize_sibling_calls = 1; + flag_forward_propagate = 1; flag_cse_follow_jumps = 1; flag_gcse = 1; flag_expensive_optimizations = 1; diff --git a/gcc/passes.c b/gcc/passes.c index 46e4756e84d..9d585d3bf6a 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -635,6 +635,7 @@ init_optimization_passes (void) NEXT_PASS (pass_instantiate_virtual_regs); NEXT_PASS (pass_jump2); NEXT_PASS (pass_cse); + NEXT_PASS (pass_rtl_fwprop); NEXT_PASS (pass_gcse); NEXT_PASS (pass_jump_bypass); NEXT_PASS (pass_rtl_ifcvt); @@ -645,6 +646,7 @@ init_optimization_passes (void) NEXT_PASS (pass_loop2); NEXT_PASS (pass_web); NEXT_PASS (pass_cse2); + NEXT_PASS (pass_rtl_fwprop_addr); NEXT_PASS (pass_life); NEXT_PASS (pass_combine); NEXT_PASS (pass_if_after_combine); diff --git a/gcc/recog.c b/gcc/recog.c index a3948a729e0..18ad72f2d4e 100644 --- a/gcc/recog.c +++ b/gcc/recog.c @@ -238,6 +238,28 @@ validate_change (rtx object, rtx *loc, rtx new, int in_group) return apply_change_group (); } +/* Keep X canonicalized if some changes have made it non-canonical; only + modifies the operands of X, not (for example) its code. Simplifications + are not the job of this routine. + + Return true if anything was changed. */ +bool +canonicalize_change_group (rtx insn, rtx x) +{ + if (COMMUTATIVE_P (x) + && swap_commutative_operands_p (XEXP (x, 0), XEXP (x, 1))) + { + /* Oops, the caller has made X no longer canonical. + Let's redo the changes in the correct order. */ + rtx tem = XEXP (x, 0); + validate_change (insn, &XEXP (x, 0), XEXP (x, 1), 1); + validate_change (insn, &XEXP (x, 1), tem, 1); + return true; + } + else + return false; +} + /* This subroutine of apply_change_group verifies whether the changes to INSN were valid; i.e. whether INSN can still be recognized. */ diff --git a/gcc/recog.h b/gcc/recog.h index 8281a9e55e7..b921b8074dc 100644 --- a/gcc/recog.h +++ b/gcc/recog.h @@ -74,6 +74,7 @@ extern void init_recog_no_volatile (void); extern int check_asm_operands (rtx); extern int asm_operand_ok (rtx, const char *); extern int validate_change (rtx, rtx *, rtx, int); +extern bool canonicalize_change_group (rtx insn, rtx x); extern int insn_invalid_p (rtx); extern int verify_changes (int); extern void confirm_change_group (void); diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index b0a816106d4..8a7c914022c 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -2837,10 +2837,15 @@ auto_inc_p (rtx x) int loc_mentioned_in_p (rtx *loc, rtx in) { - enum rtx_code code = GET_CODE (in); - const char *fmt = GET_RTX_FORMAT (code); + enum rtx_code code; + const char *fmt; int i, j; + if (!in) + return 0; + + code = GET_CODE (in); + fmt = GET_RTX_FORMAT (code); for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) { if (loc == &in->u.fld[i].rt_rtx) diff --git a/gcc/timevar.def b/gcc/timevar.def index 28b0b766eba..bdfe9ae201c 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -128,6 +128,7 @@ DEFTIMEVAR (TV_TEMPLATE_INSTANTIATION, "template instantiation") DEFTIMEVAR (TV_EXPAND , "expand") DEFTIMEVAR (TV_VARCONST , "varconst") DEFTIMEVAR (TV_JUMP , "jump") +DEFTIMEVAR (TV_FWPROP , "forward prop") DEFTIMEVAR (TV_CSE , "CSE") DEFTIMEVAR (TV_LOOP , "loop analysis") DEFTIMEVAR (TV_GCSE , "global CSE") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 8a553021706..3ab0ef730b7 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -330,6 +330,8 @@ extern struct tree_opt_pass pass_rtl_eh; extern struct tree_opt_pass pass_initial_value_sets; extern struct tree_opt_pass pass_unshare_all_rtl; extern struct tree_opt_pass pass_instantiate_virtual_regs; +extern struct tree_opt_pass pass_rtl_fwprop; +extern struct tree_opt_pass pass_rtl_fwprop_addr; extern struct tree_opt_pass pass_jump2; extern struct tree_opt_pass pass_cse; extern struct tree_opt_pass pass_gcse; |