gcc/

2012-03-31 Kenneth Zadeck <zadeck@naturalbridge.com> Richard Sandiford <r.sandiford@uk.ibm.com> * Makefile.in (lower-subreg.o, target-globals.o): Depend on lower-subreg.h. * lower-subreg.h: New file. * target-globals.h (this_target_lower_subreg): Declare. (target_globals): Add lower_subreg; (restore_target_globals): Restore this_target_lower_subreg. * target-globals.c: Include it. (default_target_globals): Add default_target_lower_subreg. (save_target_globals): Initialize target_lower_subreg. * rtl.h (init_lower_subreg): Added declaration. * toplev.c (backend_init_target): Call initializer for lower-subreg pass. * lower-subreg.c (LOG_COSTS, FORCE_LOWERING): New macros. (default_target_lower_subreg): New variable. (this_target_lower_subreg): Likewise. (twice_word_mode, choices): New macros. (shift_cost, compute_splitting_shift, compute_costs) (init_lower_subreg): New functions. (resolve_simple_move): Add speed_p argument. Check choices. (find_pseudo_copy): Don't check the mode size here. (resolve_simple_move): Assert the mode size. (find_decomposable_shift_zext): Add speed_p argument and return a bool. Check choices. (resolve_shift_zext): Add comment. (dump_shift_choices, dump_choices): New functions. (decompose_multiword_subregs): Dump list of profitable transformations. Add code to skip non profitable transformations. Update calls to simple_move and find_decomposable_shift_zext. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@187015 138bc75d-0d04-0410-961f-82ee72b054a4
author: rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4> 2012-05-01 14:45:24 +0000
committer: rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4> 2012-05-01 14:45:24 +0000
commit: c7944dcedf702fbf1a8a802158732a46b5d72830 (patch)
tree: 4e8ab9b9d983a212b8aa35ed9a63c7bffb107ef5
parent: e7fd8dfafd84cfd4c885a178508ade470abac0e6 (diff)
download: gcc-c7944dcedf702fbf1a8a802158732a46b5d72830.tar.gz
8 files changed, 449 insertions, 51 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5928a9ee0a2..e7851b47ae7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,35 @@
+2012-05-01  Kenneth Zadeck  <zadeck@naturalbridge.com>
+	    Richard Sandiford  <r.sandiford@uk.ibm.com>
+
+	* Makefile.in (lower-subreg.o, target-globals.o): Depend on
+	lower-subreg.h.
+	* lower-subreg.h: New file.
+	* target-globals.h (this_target_lower_subreg): Declare.
+	(target_globals): Add lower_subreg;
+	(restore_target_globals): Restore this_target_lower_subreg.
+	* target-globals.c: Include it.
+	(default_target_globals): Add default_target_lower_subreg.
+	(save_target_globals): Initialize target_lower_subreg.
+	* rtl.h (init_lower_subreg): Added declaration.
+	* toplev.c (backend_init_target): Call initializer for lower-subreg
+	pass.
+	* lower-subreg.c (LOG_COSTS, FORCE_LOWERING): New macros.
+	(default_target_lower_subreg): New variable.
+	(this_target_lower_subreg): Likewise.
+	(twice_word_mode, choices): New macros.
+	(shift_cost, compute_splitting_shift, compute_costs)
+	(init_lower_subreg): New functions.
+	(resolve_simple_move): Add speed_p argument.  Check choices.
+	(find_pseudo_copy): Don't check the mode size here.
+	(resolve_simple_move): Assert the mode size.
+	(find_decomposable_shift_zext): Add speed_p argument and return
+	a bool.  Check choices.
+	(resolve_shift_zext): Add comment.
+	(dump_shift_choices, dump_choices): New functions.
+	(decompose_multiword_subregs): Dump list of profitable
+	transformations.  Add code to skip non profitable transformations.
+	Update calls to simple_move and find_decomposable_shift_zext.
+
 2012-05-01  Ian Bolton  <ian.bolton@arm.com>
             Sameera Deshpande  <sameera.deshpande@arm.com>
             Greta Yorsh  <greta.yorsh@arm.com>
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 97c8973d260..d35d110e920 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3428,11 +3428,13 @@ dbgcnt.o: dbgcnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DIAGNOSTIC_CORE_H) $(DB
 lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(MACHMODE_H) $(TM_H) $(RTL_H) $(TM_P_H) $(TIMEVAR_H) $(FLAGS_H) \
    insn-config.h $(BASIC_BLOCK_H) $(RECOG_H) $(OBSTACK_H) $(BITMAP_H) \
-   $(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H) dce.h
+   $(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H) dce.h \
+   lower-subreg.h
 target-globals.o : target-globals.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) insn-config.h $(MACHMODE_H) $(GGC_H) toplev.h target-globals.h \
    $(FLAGS_H) $(REGS_H) $(RTL_H) reload.h expmed.h $(EXPR_H) $(OPTABS_H) \
-   $(LIBFUNCS_H) $(CFGLOOP_H) $(IRA_INT_H) builtins.h gcse.h bb-reorder.h
+   $(LIBFUNCS_H) $(CFGLOOP_H) $(IRA_INT_H) builtins.h gcse.h bb-reorder.h \
+   lower-subreg.h
 hw-doloop.o : hw-doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) $(FLAGS_H) $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) \
    $(DF_H) $(CFGLAYOUT_H) $(CFGLOOP_H) output.h $(RECOG_H) $(TARGET_H) \
diff --git a/gcc/lower-subreg.c b/gcc/lower-subreg.c
index 89f3044b7fe..a11b33d1391 100644
--- a/gcc/lower-subreg.c
+++ b/gcc/lower-subreg.c
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "regs.h"
 #include "tree-pass.h"
 #include "df.h"
+#include "lower-subreg.h"
 
 #ifdef STACK_GROWS_DOWNWARD
 # undef STACK_GROWS_DOWNWARD
@@ -52,10 +53,35 @@ DEF_VEC_P (bitmap);
 DEF_VEC_ALLOC_P (bitmap,heap);
 
 /* Decompose multi-word pseudo-registers into individual
-   pseudo-registers when possible.  This is possible when all the uses
-   of a multi-word register are via SUBREG, or are copies of the
-   register to another location.  Breaking apart the register permits
-   more CSE and permits better register allocation.  */
+   pseudo-registers when possible and profitable.  This is possible
+   when all the uses of a multi-word register are via SUBREG, or are
+   copies of the register to another location.  Breaking apart the
+   register permits more CSE and permits better register allocation.
+   This is profitable if the machine does not have move instructions
+   to do this.
+
+   This pass only splits moves with modes that are wider than
+   word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
+   modes that are twice the width of word_mode.  The latter could be
+   generalized if there was a need to do this, but the trend in
+   architectures is to not need this.
+
+   There are two useful preprocessor defines for use by maintainers:
+
+   #define LOG_COSTS 1
+
+   if you wish to see the actual cost estimates that are being used
+   for each mode wider than word mode and the cost estimates for zero
+   extension and the shifts.   This can be useful when port maintainers
+   are tuning insn rtx costs.
+
+   #define FORCE_LOWERING 1
+
+   if you wish to test the pass with all the transformation forced on.
+   This can be useful for finding bugs in the transformations.  */
+
+#define LOG_COSTS 0
+#define FORCE_LOWERING 0
 
 /* Bit N in this bitmap is set if regno N is used in a context in
    which we can decompose it.  */
@@ -75,8 +101,190 @@ static bitmap subreg_context;
    copy from reg M to reg N.  */
 static VEC(bitmap,heap) *reg_copy_graph;
 
-/* Return whether X is a simple object which we can take a word_mode
-   subreg of.  */
+struct target_lower_subreg default_target_lower_subreg;
+#if SWITCHABLE_TARGET
+struct target_lower_subreg *this_target_lower_subreg
+  = &default_target_lower_subreg;
+#endif
+
+#define twice_word_mode \
+  this_target_lower_subreg->x_twice_word_mode
+#define choices \
+  this_target_lower_subreg->x_choices
+
+/* RTXes used while computing costs.  */
+struct cost_rtxes {
+  /* Source and target registers.  */
+  rtx source;
+  rtx target;
+
+  /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
+  rtx zext;
+
+  /* A shift of SOURCE.  */
+  rtx shift;
+
+  /* A SET of TARGET.  */
+  rtx set;
+};
+
+/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
+   rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
+
+static int
+shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
+	    enum machine_mode mode, int op1)
+{
+  PUT_MODE (rtxes->target, mode);
+  PUT_CODE (rtxes->shift, code);
+  PUT_MODE (rtxes->shift, mode);
+  PUT_MODE (rtxes->source, mode);
+  XEXP (rtxes->shift, 1) = GEN_INT (op1);
+  SET_SRC (rtxes->set) = rtxes->shift;
+  return insn_rtx_cost (rtxes->set, speed_p);
+}
+
+/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
+   to true if it is profitable to split a double-word CODE shift
+   of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
+   for speed or size profitability.
+
+   Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
+   the cost of moving zero into a word-mode register.  WORD_MOVE_COST
+   is the cost of moving between word registers.  */
+
+static void
+compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
+			 bool *splitting, enum rtx_code code,
+			 int word_move_zero_cost, int word_move_cost)
+{
+  int wide_cost, narrow_cost, i;
+
+  for (i = 0; i < BITS_PER_WORD; i++)
+    {
+      wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
+			      i + BITS_PER_WORD);
+      if (i == 0)
+	narrow_cost = word_move_cost;
+      else
+	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
+
+      if (LOG_COSTS)
+	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
+		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
+		 i + BITS_PER_WORD, wide_cost, narrow_cost,
+		 word_move_zero_cost);
+
+      if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
+	splitting[i] = true;
+    }
+}
+
+/* Compute what we should do when optimizing for speed or size; SPEED_P
+   selects which.  Use RTXES for computing costs.  */
+
+static void
+compute_costs (bool speed_p, struct cost_rtxes *rtxes)
+{
+  unsigned int i;
+  int word_move_zero_cost, word_move_cost;
+
+  SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
+  word_move_zero_cost = insn_rtx_cost (rtxes->set, speed_p);
+
+  SET_SRC (rtxes->set) = rtxes->source;
+  word_move_cost = insn_rtx_cost (rtxes->set, speed_p);
+
+  if (LOG_COSTS)
+    fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
+	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
+
+  for (i = 0; i < MAX_MACHINE_MODE; i++)
+    {
+      enum machine_mode mode = (enum machine_mode) i;
+      int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+      if (factor > 1)
+	{
+	  int mode_move_cost;
+
+	  PUT_MODE (rtxes->target, mode);
+	  PUT_MODE (rtxes->source, mode);
+	  mode_move_cost = insn_rtx_cost (rtxes->set, speed_p);
+
+	  if (LOG_COSTS)
+	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
+		     GET_MODE_NAME (mode), mode_move_cost,
+		     word_move_cost, factor);
+
+	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
+	    {
+	      choices[speed_p].move_modes_to_split[i] = true;
+	      choices[speed_p].something_to_do = true;
+	    }
+	}
+    }
+
+  /* For the moves and shifts, the only case that is checked is one
+     where the mode of the target is an integer mode twice the width
+     of the word_mode.
+
+     If it is not profitable to split a double word move then do not
+     even consider the shifts or the zero extension.  */
+  if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
+    {
+      int zext_cost;
+
+      /* The only case here to check to see if moving the upper part with a
+	 zero is cheaper than doing the zext itself.  */
+      PUT_MODE (rtxes->target, twice_word_mode);
+      PUT_MODE (rtxes->source, word_mode);
+      SET_SRC (rtxes->set) = rtxes->zext;
+      zext_cost = insn_rtx_cost (rtxes->set, speed_p);
+
+      if (LOG_COSTS)
+	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
+		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
+		 zext_cost, word_move_cost, word_move_zero_cost);
+
+      if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
+	choices[speed_p].splitting_zext = true;
+
+      compute_splitting_shift (speed_p, rtxes,
+			       choices[speed_p].splitting_ashift, ASHIFT,
+			       word_move_zero_cost, word_move_cost);
+      compute_splitting_shift (speed_p, rtxes,
+			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
+			       word_move_zero_cost, word_move_cost);
+    }
+}
+
+/* Do one-per-target initialisation.  This involves determining
+   which operations on the machine are profitable.  If none are found,
+   then the pass just returns when called.  */
+
+void
+init_lower_subreg (void)
+{
+  struct cost_rtxes rtxes;
+
+  memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
+
+  twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
+
+  rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
+  rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
+  rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
+  rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
+  rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
+
+  if (LOG_COSTS)
+    fprintf (stderr, "\nSize costs\n==========\n\n");
+  compute_costs (false, &rtxes);
+
+  if (LOG_COSTS)
+    fprintf (stderr, "\nSpeed costs\n===========\n\n");
+  compute_costs (true, &rtxes);
+}
 
 static bool
 simple_move_operand (rtx x)
@@ -101,12 +309,15 @@ simple_move_operand (rtx x)
   return true;
 }
 
-/* If INSN is a single set between two objects, return the single set.
-   Such an insn can always be decomposed.  INSN should have been
-   passed to recog and extract_insn before this is called.  */
+/* If INSN is a single set between two objects that we want to split,
+   return the single set.  SPEED_P says whether we are optimizing
+   INSN for speed or size.
+
+   INSN should have been passed to recog and extract_insn before this
+   is called.  */
 
 static rtx
-simple_move (rtx insn)
+simple_move (rtx insn, bool speed_p)
 {
   rtx x;
   rtx set;
@@ -150,6 +361,9 @@ simple_move (rtx insn)
   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
     return NULL_RTX;
 
+  if (!choices[speed_p].move_modes_to_split[(int) mode])
+    return NULL_RTX;
+
   return set;
 }
 
@@ -173,9 +387,6 @@ find_pseudo_copy (rtx set)
   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
     return false;
 
-  if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
-    return false;
-
   b = VEC_index (bitmap, reg_copy_graph, rs);
   if (b == NULL)
     {
@@ -668,8 +879,7 @@ resolve_simple_move (rtx set, rtx insn)
   orig_mode = GET_MODE (dest);
 
   words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-  if (words <= 1)
-    return insn;
+  gcc_assert (words > 1);
 
   start_sequence ();
 
@@ -931,12 +1141,13 @@ resolve_debug (rtx insn)
   resolve_reg_notes (insn);
 }
 
-/* Checks if INSN is a decomposable multiword-shift or zero-extend and
-   sets the decomposable_context bitmap accordingly.  A non-zero value
-   is returned if a decomposable insn has been found.  */
+/* Check if INSN is a decomposable multiword-shift or zero-extend and
+   set the decomposable_context bitmap accordingly.  SPEED_P is true
+   if we are optimizing INSN for speed rather than size.  Return true
+   if INSN is decomposable.  */
 
-static int
-find_decomposable_shift_zext (rtx insn)
+static bool
+find_decomposable_shift_zext (rtx insn, bool speed_p)
 {
   rtx set;
   rtx op;
@@ -944,41 +1155,44 @@ find_decomposable_shift_zext (rtx insn)
 
   set = single_set (insn);
   if (!set)
-    return 0;
+    return false;
 
   op = SET_SRC (set);
   if (GET_CODE (op) != ASHIFT
       && GET_CODE (op) != LSHIFTRT
       && GET_CODE (op) != ZERO_EXTEND)
-    return 0;
+    return false;
 
   op_operand = XEXP (op, 0);
   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
       || HARD_REGISTER_NUM_P (REGNO (op_operand))
-      || !SCALAR_INT_MODE_P (GET_MODE (op)))
-    return 0;
+      || GET_MODE (op) != twice_word_mode)
+    return false;
 
   if (GET_CODE (op) == ZERO_EXTEND)
     {
       if (GET_MODE (op_operand) != word_mode
-	  || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD)
-	return 0;
+	  || !choices[speed_p].splitting_zext)
+	return false;
     }
   else /* left or right shift */
     {
+      bool *splitting = (GET_CODE (op) == ASHIFT
+			 ? choices[speed_p].splitting_ashift
+			 : choices[speed_p].splitting_lshiftrt);
       if (!CONST_INT_P (XEXP (op, 1))
-	  || INTVAL (XEXP (op, 1)) < BITS_PER_WORD
-	  || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD)
-	return 0;
+	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
+			2 * BITS_PER_WORD - 1)
+	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
+	return false;
+
+      bitmap_set_bit (decomposable_context, REGNO (op_operand));
     }
 
   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
 
-  if (GET_CODE (op) != ZERO_EXTEND)
-    bitmap_set_bit (decomposable_context, REGNO (op_operand));
-
-  return 1;
+  return true;
 }
 
 /* Decompose a more than word wide shift (in INSN) of a multiword
@@ -1008,6 +1222,8 @@ resolve_shift_zext (rtx insn)
 
   op_operand = XEXP (op, 0);
 
+  /* We can tear this operation apart only if the regs were already
+     torn apart.  */
   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
     return NULL_RTX;
 
@@ -1073,6 +1289,56 @@ resolve_shift_zext (rtx insn)
   return insns;
 }
 
+/* Print to dump_file a description of what we're doing with shift code CODE.
+   SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
+
+static void
+dump_shift_choices (enum rtx_code code, bool *splitting)
+{
+  int i;
+  const char *sep;
+
+  fprintf (dump_file,
+	   "  Splitting mode %s for %s lowering with shift amounts = ",
+	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
+  sep = "";
+  for (i = 0; i < BITS_PER_WORD; i++)
+    if (splitting[i])
+      {
+	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
+	sep = ",";
+      }
+  fprintf (dump_file, "\n");
+}
+
+/* Print to dump_file a description of what we're doing when optimizing
+   for speed or size; SPEED_P says which.  DESCRIPTION is a description
+   of the SPEED_P choice.  */
+
+static void
+dump_choices (bool speed_p, const char *description)
+{
+  unsigned int i;
+
+  fprintf (dump_file, "Choices when optimizing for %s:\n", description);
+
+  for (i = 0; i < MAX_MACHINE_MODE; i++)
+    if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
+      fprintf (dump_file, "  %s mode %s for copy lowering.\n",
+	       choices[speed_p].move_modes_to_split[i]
+	       ? "Splitting"
+	       : "Skipping",
+	       GET_MODE_NAME ((enum machine_mode) i));
+
+  fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
+	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
+	   GET_MODE_NAME (twice_word_mode));
+
+  dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
+  dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
+  fprintf (dump_file, "\n");
+}
+
 /* Look for registers which are always accessed via word-sized SUBREGs
    or via copies.  Decompose these registers into several word-sized
    pseudo-registers.  */
@@ -1082,9 +1348,21 @@ decompose_multiword_subregs (void)
 {
   unsigned int max;
   basic_block bb;
+  bool speed_p;
 
-  if (df)
-    df_set_flags (DF_DEFER_INSN_RESCAN);
+  if (dump_file)
+    {
+      dump_choices (false, "size");
+      dump_choices (true, "speed");
+    }
+
+  /* Check if this target even has any modes to consider lowering.   */
+  if (!choices[false].something_to_do && !choices[true].something_to_do)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Nothing to do!\n");
+      return;
+    }
 
   max = max_reg_num ();
 
@@ -1094,24 +1372,38 @@ decompose_multiword_subregs (void)
      all the insns.  */
   {
     unsigned int i;
+    bool useful_modes_seen = false;
 
     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
+      if (regno_reg_rtx[i] != NULL)
+	{
+	  enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
+	  if (choices[false].move_modes_to_split[(int) mode]
+	      || choices[true].move_modes_to_split[(int) mode])
+	    {
+	      useful_modes_seen = true;
+	      break;
+	    }
+	}
+
+    if (!useful_modes_seen)
       {
-	if (regno_reg_rtx[i] != NULL
-	    && GET_MODE_SIZE (GET_MODE (regno_reg_rtx[i])) > UNITS_PER_WORD)
-	  break;
+	if (dump_file)
+	  fprintf (dump_file, "Nothing to lower in this function.\n");
+	return;
       }
-    if (i == max)
-      return;
   }
 
   if (df)
-    run_word_dce ();
+    {
+      df_set_flags (DF_DEFER_INSN_RESCAN);
+      run_word_dce ();
+    }
 
-  /* FIXME: When the dataflow branch is merged, we can change this
-     code to look for each multi-word pseudo-register and to find each
-     insn which sets or uses that register.  That should be faster
-     than scanning all the insns.  */
+  /* FIXME: It may be possible to change this code to look for each
+     multi-word pseudo-register and to find each insn which sets or
+     uses that register.  That should be faster than scanning all the
+     insns.  */
 
   decomposable_context = BITMAP_ALLOC (NULL);
   non_decomposable_context = BITMAP_ALLOC (NULL);
@@ -1121,6 +1413,7 @@ decompose_multiword_subregs (void)
   VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
   memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
 
+  speed_p = optimize_function_for_speed_p (cfun);
   FOR_EACH_BB (bb)
     {
       rtx insn;
@@ -1138,12 +1431,12 @@ decompose_multiword_subregs (void)
 
 	  recog_memoized (insn);
 
-	  if (find_decomposable_shift_zext (insn))
+	  if (find_decomposable_shift_zext (insn, speed_p))
 	    continue;
 
 	  extract_insn (insn);
 
-	  set = simple_move (insn);
+	  set = simple_move (insn, speed_p);
 
 	  if (!set)
 	    cmi = NOT_SIMPLE_MOVE;
@@ -1197,7 +1490,9 @@ decompose_multiword_subregs (void)
       FOR_EACH_BB (bb)
 	{
 	  rtx insn;
+	  bool speed_p;
 
+	  speed_p = optimize_bb_for_speed_p (bb);
 	  FOR_BB_INSNS (bb, insn)
 	    {
 	      rtx pat;
@@ -1220,7 +1515,7 @@ decompose_multiword_subregs (void)
 		  recog_memoized (insn);
 		  extract_insn (insn);
 
-		  set = simple_move (insn);
+		  set = simple_move (insn, speed_p);
 		  if (set)
 		    {
 		      rtx orig_insn = insn;
diff --git a/gcc/lower-subreg.h b/gcc/lower-subreg.h
new file mode 100644
index 00000000000..5c960dbfd09
--- /dev/null
+++ b/gcc/lower-subreg.h
@@ -0,0 +1,59 @@
+/* Target-dependent costs for lower-subreg.c.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option; any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef LOWER_SUBREG_H
+#define LOWER_SUBREG_H 1
+
+/* Information about whether, and where, lower-subreg should be applied.  */
+struct lower_subreg_choices {
+  /* A boolean vector for move splitting that is indexed by mode and is
+     true for each mode that is to have its copies split.  */
+  bool move_modes_to_split[MAX_MACHINE_MODE];
+
+  /* True if zero-extensions from word_mode to twice_word_mode should
+     be split.  */
+  bool splitting_zext;
+
+  /* Index X is true if twice_word_mode shifts by X + BITS_PER_WORD
+     should be split.  */
+  bool splitting_ashift[MAX_BITS_PER_WORD];
+  bool splitting_lshiftrt[MAX_BITS_PER_WORD];
+
+  /* True if there is at least one mode that is worth splitting.  */
+  bool something_to_do;
+};
+
+/* Target-specific information for the subreg lowering pass.  */
+struct target_lower_subreg {
+  /* An integer mode that is twice as wide as word_mode.  */
+  enum machine_mode x_twice_word_mode;
+
+  /* What we have decided to do when optimizing for size (index 0)
+     and speed (index 1).  */
+  struct lower_subreg_choices x_choices[2];
+};
+
+extern struct target_lower_subreg default_target_lower_subreg;
+#if SWITCHABLE_TARGET
+extern struct target_lower_subreg *this_target_lower_subreg;
+#else
+#define this_target_lower_subreg (&default_target_lower_subreg)
+#endif
+
+#endif
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 1da1792d1a3..86c56acc197 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2526,6 +2526,9 @@ extern void init_expmed (void);
 extern void expand_inc (rtx, rtx);
 extern void expand_dec (rtx, rtx);
 
+/* In lower-subreg.c */
+extern void init_lower_subreg (void);
+
 /* In gcse.c */
 extern bool can_copy_p (enum machine_mode);
 extern bool can_assign_to_reg_without_clobbers_p (rtx);
diff --git a/gcc/target-globals.c b/gcc/target-globals.c
index 7a4058ebd18..e679f21614e 100644
--- a/gcc/target-globals.c
+++ b/gcc/target-globals.c
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "gcse.h"
 #include "bb-reorder.h"
+#include "lower-subreg.h"
 
 #if SWITCHABLE_TARGET
 struct target_globals default_target_globals = {
@@ -56,7 +57,8 @@ struct target_globals default_target_globals = {
   &default_target_ira_int,
   &default_target_builtins,
   &default_target_gcse,
-  &default_target_bb_reorder
+  &default_target_bb_reorder,
+  &default_target_lower_subreg
 };
 
 struct target_globals *
@@ -79,6 +81,7 @@ save_target_globals (void)
   g->builtins = XCNEW (struct target_builtins);
   g->gcse = XCNEW (struct target_gcse);
   g->bb_reorder = XCNEW (struct target_bb_reorder);
+  g->lower_subreg = XCNEW (struct target_lower_subreg);
   restore_target_globals (g);
   init_reg_sets ();
   target_reinit ();
diff --git a/gcc/target-globals.h b/gcc/target-globals.h
index 166f21ada4f..fb0f260c0c9 100644
--- a/gcc/target-globals.h
+++ b/gcc/target-globals.h
@@ -35,6 +35,7 @@ extern struct target_ira_int *this_target_ira_int;
 extern struct target_builtins *this_target_builtins;
 extern struct target_gcse *this_target_gcse;
 extern struct target_bb_reorder *this_target_bb_reorder;
+extern struct target_lower_subreg *this_target_lower_subreg;
 
 struct GTY(()) target_globals {
   struct target_flag_state *GTY((skip)) flag_state;
@@ -51,6 +52,7 @@ struct GTY(()) target_globals {
   struct target_builtins *GTY((skip)) builtins;
   struct target_gcse *GTY((skip)) gcse;
   struct target_bb_reorder *GTY((skip)) bb_reorder;
+  struct target_lower_subreg *GTY((skip)) lower_subreg;
 };
 
 extern struct target_globals default_target_globals;
@@ -74,6 +76,7 @@ restore_target_globals (struct target_globals *g)
   this_target_builtins = g->builtins;
   this_target_gcse = g->gcse;
   this_target_bb_reorder = g->bb_reorder;
+  this_target_lower_subreg = g->lower_subreg;
 }
 #endif
 
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 7d7be83e28e..d9aaa429f65 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1601,6 +1601,7 @@ backend_init_target (void)
   /* rtx_cost is mode-dependent, so cached values need to be recomputed
      on a mode change.  */
   init_expmed ();
+  init_lower_subreg ();
 
   /* We may need to recompute regno_save_code[] and regno_restore_code[]
      after a mode change as well.  */
author	rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>	2012-05-01 14:45:24 +0000
committer	rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>	2012-05-01 14:45:24 +0000
commit	c7944dcedf702fbf1a8a802158732a46b5d72830 (patch)
tree	4e8ab9b9d983a212b8aa35ed9a63c7bffb107ef5
parent	e7fd8dfafd84cfd4c885a178508ade470abac0e6 (diff)
download	gcc-c7944dcedf702fbf1a8a802158732a46b5d72830.tar.gz