diff options
author | rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-05-01 14:45:24 +0000 |
---|---|---|
committer | rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-05-01 14:45:24 +0000 |
commit | c7944dcedf702fbf1a8a802158732a46b5d72830 (patch) | |
tree | 4e8ab9b9d983a212b8aa35ed9a63c7bffb107ef5 | |
parent | e7fd8dfafd84cfd4c885a178508ade470abac0e6 (diff) | |
download | gcc-c7944dcedf702fbf1a8a802158732a46b5d72830.tar.gz |
gcc/
2012-03-31 Kenneth Zadeck <zadeck@naturalbridge.com>
Richard Sandiford <r.sandiford@uk.ibm.com>
* Makefile.in (lower-subreg.o, target-globals.o): Depend on
lower-subreg.h.
* lower-subreg.h: New file.
* target-globals.h (this_target_lower_subreg): Declare.
(target_globals): Add lower_subreg;
(restore_target_globals): Restore this_target_lower_subreg.
* target-globals.c: Include it.
(default_target_globals): Add default_target_lower_subreg.
(save_target_globals): Initialize target_lower_subreg.
* rtl.h (init_lower_subreg): Added declaration.
* toplev.c (backend_init_target): Call initializer for lower-subreg
pass.
* lower-subreg.c (LOG_COSTS, FORCE_LOWERING): New macros.
(default_target_lower_subreg): New variable.
(this_target_lower_subreg): Likewise.
(twice_word_mode, choices): New macros.
(shift_cost, compute_splitting_shift, compute_costs)
(init_lower_subreg): New functions.
(resolve_simple_move): Add speed_p argument. Check choices.
(find_pseudo_copy): Don't check the mode size here.
(resolve_simple_move): Assert the mode size.
(find_decomposable_shift_zext): Add speed_p argument and return
a bool. Check choices.
(resolve_shift_zext): Add comment.
(dump_shift_choices, dump_choices): New functions.
(decompose_multiword_subregs): Dump list of profitable
transformations. Add code to skip non profitable transformations.
Update calls to simple_move and find_decomposable_shift_zext.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@187015 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 32 | ||||
-rw-r--r-- | gcc/Makefile.in | 6 | ||||
-rw-r--r-- | gcc/lower-subreg.c | 391 | ||||
-rw-r--r-- | gcc/lower-subreg.h | 59 | ||||
-rw-r--r-- | gcc/rtl.h | 3 | ||||
-rw-r--r-- | gcc/target-globals.c | 5 | ||||
-rw-r--r-- | gcc/target-globals.h | 3 | ||||
-rw-r--r-- | gcc/toplev.c | 1 |
8 files changed, 449 insertions, 51 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5928a9ee0a2..e7851b47ae7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,35 @@ +2012-05-01 Kenneth Zadeck <zadeck@naturalbridge.com> + Richard Sandiford <r.sandiford@uk.ibm.com> + + * Makefile.in (lower-subreg.o, target-globals.o): Depend on + lower-subreg.h. + * lower-subreg.h: New file. + * target-globals.h (this_target_lower_subreg): Declare. + (target_globals): Add lower_subreg; + (restore_target_globals): Restore this_target_lower_subreg. + * target-globals.c: Include it. + (default_target_globals): Add default_target_lower_subreg. + (save_target_globals): Initialize target_lower_subreg. + * rtl.h (init_lower_subreg): Added declaration. + * toplev.c (backend_init_target): Call initializer for lower-subreg + pass. + * lower-subreg.c (LOG_COSTS, FORCE_LOWERING): New macros. + (default_target_lower_subreg): New variable. + (this_target_lower_subreg): Likewise. + (twice_word_mode, choices): New macros. + (shift_cost, compute_splitting_shift, compute_costs) + (init_lower_subreg): New functions. + (resolve_simple_move): Add speed_p argument. Check choices. + (find_pseudo_copy): Don't check the mode size here. + (resolve_simple_move): Assert the mode size. + (find_decomposable_shift_zext): Add speed_p argument and return + a bool. Check choices. + (resolve_shift_zext): Add comment. + (dump_shift_choices, dump_choices): New functions. + (decompose_multiword_subregs): Dump list of profitable + transformations. Add code to skip non profitable transformations. + Update calls to simple_move and find_decomposable_shift_zext. + 2012-05-01 Ian Bolton <ian.bolton@arm.com> Sameera Deshpande <sameera.deshpande@arm.com> Greta Yorsh <greta.yorsh@arm.com> diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 97c8973d260..d35d110e920 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3428,11 +3428,13 @@ dbgcnt.o: dbgcnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DIAGNOSTIC_CORE_H) $(DB lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(MACHMODE_H) $(TM_H) $(RTL_H) $(TM_P_H) $(TIMEVAR_H) $(FLAGS_H) \ insn-config.h $(BASIC_BLOCK_H) $(RECOG_H) $(OBSTACK_H) $(BITMAP_H) \ - $(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H) dce.h + $(EXPR_H) $(EXCEPT_H) $(REGS_H) $(TREE_PASS_H) $(DF_H) dce.h \ + lower-subreg.h target-globals.o : target-globals.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) insn-config.h $(MACHMODE_H) $(GGC_H) toplev.h target-globals.h \ $(FLAGS_H) $(REGS_H) $(RTL_H) reload.h expmed.h $(EXPR_H) $(OPTABS_H) \ - $(LIBFUNCS_H) $(CFGLOOP_H) $(IRA_INT_H) builtins.h gcse.h bb-reorder.h + $(LIBFUNCS_H) $(CFGLOOP_H) $(IRA_INT_H) builtins.h gcse.h bb-reorder.h \ + lower-subreg.h hw-doloop.o : hw-doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) $(FLAGS_H) $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) \ $(DF_H) $(CFGLAYOUT_H) $(CFGLOOP_H) output.h $(RECOG_H) $(TARGET_H) \ diff --git a/gcc/lower-subreg.c b/gcc/lower-subreg.c index 89f3044b7fe..a11b33d1391 100644 --- a/gcc/lower-subreg.c +++ b/gcc/lower-subreg.c @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see #include "regs.h" #include "tree-pass.h" #include "df.h" +#include "lower-subreg.h" #ifdef STACK_GROWS_DOWNWARD # undef STACK_GROWS_DOWNWARD @@ -52,10 +53,35 @@ DEF_VEC_P (bitmap); DEF_VEC_ALLOC_P (bitmap,heap); /* Decompose multi-word pseudo-registers into individual - pseudo-registers when possible. This is possible when all the uses - of a multi-word register are via SUBREG, or are copies of the - register to another location. Breaking apart the register permits - more CSE and permits better register allocation. */ + pseudo-registers when possible and profitable. This is possible + when all the uses of a multi-word register are via SUBREG, or are + copies of the register to another location. Breaking apart the + register permits more CSE and permits better register allocation. + This is profitable if the machine does not have move instructions + to do this. + + This pass only splits moves with modes that are wider than + word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer + modes that are twice the width of word_mode. The latter could be + generalized if there was a need to do this, but the trend in + architectures is to not need this. + + There are two useful preprocessor defines for use by maintainers: + + #define LOG_COSTS 1 + + if you wish to see the actual cost estimates that are being used + for each mode wider than word mode and the cost estimates for zero + extension and the shifts. This can be useful when port maintainers + are tuning insn rtx costs. + + #define FORCE_LOWERING 1 + + if you wish to test the pass with all the transformation forced on. + This can be useful for finding bugs in the transformations. */ + +#define LOG_COSTS 0 +#define FORCE_LOWERING 0 /* Bit N in this bitmap is set if regno N is used in a context in which we can decompose it. */ @@ -75,8 +101,190 @@ static bitmap subreg_context; copy from reg M to reg N. */ static VEC(bitmap,heap) *reg_copy_graph; -/* Return whether X is a simple object which we can take a word_mode - subreg of. */ +struct target_lower_subreg default_target_lower_subreg; +#if SWITCHABLE_TARGET +struct target_lower_subreg *this_target_lower_subreg + = &default_target_lower_subreg; +#endif + +#define twice_word_mode \ + this_target_lower_subreg->x_twice_word_mode +#define choices \ + this_target_lower_subreg->x_choices + +/* RTXes used while computing costs. */ +struct cost_rtxes { + /* Source and target registers. */ + rtx source; + rtx target; + + /* A twice_word_mode ZERO_EXTEND of SOURCE. */ + rtx zext; + + /* A shift of SOURCE. */ + rtx shift; + + /* A SET of TARGET. */ + rtx set; +}; + +/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the + rtxes in RTXES. SPEED_P selects between the speed and size cost. */ + +static int +shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, + enum machine_mode mode, int op1) +{ + PUT_MODE (rtxes->target, mode); + PUT_CODE (rtxes->shift, code); + PUT_MODE (rtxes->shift, mode); + PUT_MODE (rtxes->source, mode); + XEXP (rtxes->shift, 1) = GEN_INT (op1); + SET_SRC (rtxes->set) = rtxes->shift; + return insn_rtx_cost (rtxes->set, speed_p); +} + +/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] + to true if it is profitable to split a double-word CODE shift + of X + BITS_PER_WORD bits. SPEED_P says whether we are testing + for speed or size profitability. + + Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is + the cost of moving zero into a word-mode register. WORD_MOVE_COST + is the cost of moving between word registers. */ + +static void +compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, + bool *splitting, enum rtx_code code, + int word_move_zero_cost, int word_move_cost) +{ + int wide_cost, narrow_cost, i; + + for (i = 0; i < BITS_PER_WORD; i++) + { + wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, + i + BITS_PER_WORD); + if (i == 0) + narrow_cost = word_move_cost; + else + narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i); + + if (LOG_COSTS) + fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n", + GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), + i + BITS_PER_WORD, wide_cost, narrow_cost, + word_move_zero_cost); + + if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost) + splitting[i] = true; + } +} + +/* Compute what we should do when optimizing for speed or size; SPEED_P + selects which. Use RTXES for computing costs. */ + +static void +compute_costs (bool speed_p, struct cost_rtxes *rtxes) +{ + unsigned int i; + int word_move_zero_cost, word_move_cost; + + SET_SRC (rtxes->set) = CONST0_RTX (word_mode); + word_move_zero_cost = insn_rtx_cost (rtxes->set, speed_p); + + SET_SRC (rtxes->set) = rtxes->source; + word_move_cost = insn_rtx_cost (rtxes->set, speed_p); + + if (LOG_COSTS) + fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n", + GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); + + for (i = 0; i < MAX_MACHINE_MODE; i++) + { + enum machine_mode mode = (enum machine_mode) i; + int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; + if (factor > 1) + { + int mode_move_cost; + + PUT_MODE (rtxes->target, mode); + PUT_MODE (rtxes->source, mode); + mode_move_cost = insn_rtx_cost (rtxes->set, speed_p); + + if (LOG_COSTS) + fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n", + GET_MODE_NAME (mode), mode_move_cost, + word_move_cost, factor); + + if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) + { + choices[speed_p].move_modes_to_split[i] = true; + choices[speed_p].something_to_do = true; + } + } + } + + /* For the moves and shifts, the only case that is checked is one + where the mode of the target is an integer mode twice the width + of the word_mode. + + If it is not profitable to split a double word move then do not + even consider the shifts or the zero extension. */ + if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) + { + int zext_cost; + + /* The only case here to check to see if moving the upper part with a + zero is cheaper than doing the zext itself. */ + PUT_MODE (rtxes->target, twice_word_mode); + PUT_MODE (rtxes->source, word_mode); + SET_SRC (rtxes->set) = rtxes->zext; + zext_cost = insn_rtx_cost (rtxes->set, speed_p); + + if (LOG_COSTS) + fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n", + GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), + zext_cost, word_move_cost, word_move_zero_cost); + + if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) + choices[speed_p].splitting_zext = true; + + compute_splitting_shift (speed_p, rtxes, + choices[speed_p].splitting_ashift, ASHIFT, + word_move_zero_cost, word_move_cost); + compute_splitting_shift (speed_p, rtxes, + choices[speed_p].splitting_lshiftrt, LSHIFTRT, + word_move_zero_cost, word_move_cost); + } +} + +/* Do one-per-target initialisation. This involves determining + which operations on the machine are profitable. If none are found, + then the pass just returns when called. */ + +void +init_lower_subreg (void) +{ + struct cost_rtxes rtxes; + + memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg)); + + twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode); + + rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER); + rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1); + rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source); + rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); + rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); + + if (LOG_COSTS) + fprintf (stderr, "\nSize costs\n==========\n\n"); + compute_costs (false, &rtxes); + + if (LOG_COSTS) + fprintf (stderr, "\nSpeed costs\n===========\n\n"); + compute_costs (true, &rtxes); +} static bool simple_move_operand (rtx x) @@ -101,12 +309,15 @@ simple_move_operand (rtx x) return true; } -/* If INSN is a single set between two objects, return the single set. - Such an insn can always be decomposed. INSN should have been - passed to recog and extract_insn before this is called. */ +/* If INSN is a single set between two objects that we want to split, + return the single set. SPEED_P says whether we are optimizing + INSN for speed or size. + + INSN should have been passed to recog and extract_insn before this + is called. */ static rtx -simple_move (rtx insn) +simple_move (rtx insn, bool speed_p) { rtx x; rtx set; @@ -150,6 +361,9 @@ simple_move (rtx insn) if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) return NULL_RTX; + if (!choices[speed_p].move_modes_to_split[(int) mode]) + return NULL_RTX; + return set; } @@ -173,9 +387,6 @@ find_pseudo_copy (rtx set) if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) return false; - if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD) - return false; - b = VEC_index (bitmap, reg_copy_graph, rs); if (b == NULL) { @@ -668,8 +879,7 @@ resolve_simple_move (rtx set, rtx insn) orig_mode = GET_MODE (dest); words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - if (words <= 1) - return insn; + gcc_assert (words > 1); start_sequence (); @@ -931,12 +1141,13 @@ resolve_debug (rtx insn) resolve_reg_notes (insn); } -/* Checks if INSN is a decomposable multiword-shift or zero-extend and - sets the decomposable_context bitmap accordingly. A non-zero value - is returned if a decomposable insn has been found. */ +/* Check if INSN is a decomposable multiword-shift or zero-extend and + set the decomposable_context bitmap accordingly. SPEED_P is true + if we are optimizing INSN for speed rather than size. Return true + if INSN is decomposable. */ -static int -find_decomposable_shift_zext (rtx insn) +static bool +find_decomposable_shift_zext (rtx insn, bool speed_p) { rtx set; rtx op; @@ -944,41 +1155,44 @@ find_decomposable_shift_zext (rtx insn) set = single_set (insn); if (!set) - return 0; + return false; op = SET_SRC (set); if (GET_CODE (op) != ASHIFT && GET_CODE (op) != LSHIFTRT && GET_CODE (op) != ZERO_EXTEND) - return 0; + return false; op_operand = XEXP (op, 0); if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) || HARD_REGISTER_NUM_P (REGNO (op_operand)) - || !SCALAR_INT_MODE_P (GET_MODE (op))) - return 0; + || GET_MODE (op) != twice_word_mode) + return false; if (GET_CODE (op) == ZERO_EXTEND) { if (GET_MODE (op_operand) != word_mode - || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD) - return 0; + || !choices[speed_p].splitting_zext) + return false; } else /* left or right shift */ { + bool *splitting = (GET_CODE (op) == ASHIFT + ? choices[speed_p].splitting_ashift + : choices[speed_p].splitting_lshiftrt); if (!CONST_INT_P (XEXP (op, 1)) - || INTVAL (XEXP (op, 1)) < BITS_PER_WORD - || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD) - return 0; + || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, + 2 * BITS_PER_WORD - 1) + || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) + return false; + + bitmap_set_bit (decomposable_context, REGNO (op_operand)); } bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); - if (GET_CODE (op) != ZERO_EXTEND) - bitmap_set_bit (decomposable_context, REGNO (op_operand)); - - return 1; + return true; } /* Decompose a more than word wide shift (in INSN) of a multiword @@ -1008,6 +1222,8 @@ resolve_shift_zext (rtx insn) op_operand = XEXP (op, 0); + /* We can tear this operation apart only if the regs were already + torn apart. */ if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) return NULL_RTX; @@ -1073,6 +1289,56 @@ resolve_shift_zext (rtx insn) return insns; } +/* Print to dump_file a description of what we're doing with shift code CODE. + SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ + +static void +dump_shift_choices (enum rtx_code code, bool *splitting) +{ + int i; + const char *sep; + + fprintf (dump_file, + " Splitting mode %s for %s lowering with shift amounts = ", + GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); + sep = ""; + for (i = 0; i < BITS_PER_WORD; i++) + if (splitting[i]) + { + fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD); + sep = ","; + } + fprintf (dump_file, "\n"); +} + +/* Print to dump_file a description of what we're doing when optimizing + for speed or size; SPEED_P says which. DESCRIPTION is a description + of the SPEED_P choice. */ + +static void +dump_choices (bool speed_p, const char *description) +{ + unsigned int i; + + fprintf (dump_file, "Choices when optimizing for %s:\n", description); + + for (i = 0; i < MAX_MACHINE_MODE; i++) + if (GET_MODE_SIZE (i) > UNITS_PER_WORD) + fprintf (dump_file, " %s mode %s for copy lowering.\n", + choices[speed_p].move_modes_to_split[i] + ? "Splitting" + : "Skipping", + GET_MODE_NAME ((enum machine_mode) i)); + + fprintf (dump_file, " %s mode %s for zero_extend lowering.\n", + choices[speed_p].splitting_zext ? "Splitting" : "Skipping", + GET_MODE_NAME (twice_word_mode)); + + dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift); + dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift); + fprintf (dump_file, "\n"); +} + /* Look for registers which are always accessed via word-sized SUBREGs or via copies. Decompose these registers into several word-sized pseudo-registers. */ @@ -1082,9 +1348,21 @@ decompose_multiword_subregs (void) { unsigned int max; basic_block bb; + bool speed_p; - if (df) - df_set_flags (DF_DEFER_INSN_RESCAN); + if (dump_file) + { + dump_choices (false, "size"); + dump_choices (true, "speed"); + } + + /* Check if this target even has any modes to consider lowering. */ + if (!choices[false].something_to_do && !choices[true].something_to_do) + { + if (dump_file) + fprintf (dump_file, "Nothing to do!\n"); + return; + } max = max_reg_num (); @@ -1094,24 +1372,38 @@ decompose_multiword_subregs (void) all the insns. */ { unsigned int i; + bool useful_modes_seen = false; for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) + if (regno_reg_rtx[i] != NULL) + { + enum machine_mode mode = GET_MODE (regno_reg_rtx[i]); + if (choices[false].move_modes_to_split[(int) mode] + || choices[true].move_modes_to_split[(int) mode]) + { + useful_modes_seen = true; + break; + } + } + + if (!useful_modes_seen) { - if (regno_reg_rtx[i] != NULL - && GET_MODE_SIZE (GET_MODE (regno_reg_rtx[i])) > UNITS_PER_WORD) - break; + if (dump_file) + fprintf (dump_file, "Nothing to lower in this function.\n"); + return; } - if (i == max) - return; } if (df) - run_word_dce (); + { + df_set_flags (DF_DEFER_INSN_RESCAN); + run_word_dce (); + } - /* FIXME: When the dataflow branch is merged, we can change this - code to look for each multi-word pseudo-register and to find each - insn which sets or uses that register. That should be faster - than scanning all the insns. */ + /* FIXME: It may be possible to change this code to look for each + multi-word pseudo-register and to find each insn which sets or + uses that register. That should be faster than scanning all the + insns. */ decomposable_context = BITMAP_ALLOC (NULL); non_decomposable_context = BITMAP_ALLOC (NULL); @@ -1121,6 +1413,7 @@ decompose_multiword_subregs (void) VEC_safe_grow (bitmap, heap, reg_copy_graph, max); memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max); + speed_p = optimize_function_for_speed_p (cfun); FOR_EACH_BB (bb) { rtx insn; @@ -1138,12 +1431,12 @@ decompose_multiword_subregs (void) recog_memoized (insn); - if (find_decomposable_shift_zext (insn)) + if (find_decomposable_shift_zext (insn, speed_p)) continue; extract_insn (insn); - set = simple_move (insn); + set = simple_move (insn, speed_p); if (!set) cmi = NOT_SIMPLE_MOVE; @@ -1197,7 +1490,9 @@ decompose_multiword_subregs (void) FOR_EACH_BB (bb) { rtx insn; + bool speed_p; + speed_p = optimize_bb_for_speed_p (bb); FOR_BB_INSNS (bb, insn) { rtx pat; @@ -1220,7 +1515,7 @@ decompose_multiword_subregs (void) recog_memoized (insn); extract_insn (insn); - set = simple_move (insn); + set = simple_move (insn, speed_p); if (set) { rtx orig_insn = insn; diff --git a/gcc/lower-subreg.h b/gcc/lower-subreg.h new file mode 100644 index 00000000000..5c960dbfd09 --- /dev/null +++ b/gcc/lower-subreg.h @@ -0,0 +1,59 @@ +/* Target-dependent costs for lower-subreg.c. + Copyright (C) 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option; any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef LOWER_SUBREG_H +#define LOWER_SUBREG_H 1 + +/* Information about whether, and where, lower-subreg should be applied. */ +struct lower_subreg_choices { + /* A boolean vector for move splitting that is indexed by mode and is + true for each mode that is to have its copies split. */ + bool move_modes_to_split[MAX_MACHINE_MODE]; + + /* True if zero-extensions from word_mode to twice_word_mode should + be split. */ + bool splitting_zext; + + /* Index X is true if twice_word_mode shifts by X + BITS_PER_WORD + should be split. */ + bool splitting_ashift[MAX_BITS_PER_WORD]; + bool splitting_lshiftrt[MAX_BITS_PER_WORD]; + + /* True if there is at least one mode that is worth splitting. */ + bool something_to_do; +}; + +/* Target-specific information for the subreg lowering pass. */ +struct target_lower_subreg { + /* An integer mode that is twice as wide as word_mode. */ + enum machine_mode x_twice_word_mode; + + /* What we have decided to do when optimizing for size (index 0) + and speed (index 1). */ + struct lower_subreg_choices x_choices[2]; +}; + +extern struct target_lower_subreg default_target_lower_subreg; +#if SWITCHABLE_TARGET +extern struct target_lower_subreg *this_target_lower_subreg; +#else +#define this_target_lower_subreg (&default_target_lower_subreg) +#endif + +#endif diff --git a/gcc/rtl.h b/gcc/rtl.h index 1da1792d1a3..86c56acc197 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2526,6 +2526,9 @@ extern void init_expmed (void); extern void expand_inc (rtx, rtx); extern void expand_dec (rtx, rtx); +/* In lower-subreg.c */ +extern void init_lower_subreg (void); + /* In gcse.c */ extern bool can_copy_p (enum machine_mode); extern bool can_assign_to_reg_without_clobbers_p (rtx); diff --git a/gcc/target-globals.c b/gcc/target-globals.c index 7a4058ebd18..e679f21614e 100644 --- a/gcc/target-globals.c +++ b/gcc/target-globals.c @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "gcse.h" #include "bb-reorder.h" +#include "lower-subreg.h" #if SWITCHABLE_TARGET struct target_globals default_target_globals = { @@ -56,7 +57,8 @@ struct target_globals default_target_globals = { &default_target_ira_int, &default_target_builtins, &default_target_gcse, - &default_target_bb_reorder + &default_target_bb_reorder, + &default_target_lower_subreg }; struct target_globals * @@ -79,6 +81,7 @@ save_target_globals (void) g->builtins = XCNEW (struct target_builtins); g->gcse = XCNEW (struct target_gcse); g->bb_reorder = XCNEW (struct target_bb_reorder); + g->lower_subreg = XCNEW (struct target_lower_subreg); restore_target_globals (g); init_reg_sets (); target_reinit (); diff --git a/gcc/target-globals.h b/gcc/target-globals.h index 166f21ada4f..fb0f260c0c9 100644 --- a/gcc/target-globals.h +++ b/gcc/target-globals.h @@ -35,6 +35,7 @@ extern struct target_ira_int *this_target_ira_int; extern struct target_builtins *this_target_builtins; extern struct target_gcse *this_target_gcse; extern struct target_bb_reorder *this_target_bb_reorder; +extern struct target_lower_subreg *this_target_lower_subreg; struct GTY(()) target_globals { struct target_flag_state *GTY((skip)) flag_state; @@ -51,6 +52,7 @@ struct GTY(()) target_globals { struct target_builtins *GTY((skip)) builtins; struct target_gcse *GTY((skip)) gcse; struct target_bb_reorder *GTY((skip)) bb_reorder; + struct target_lower_subreg *GTY((skip)) lower_subreg; }; extern struct target_globals default_target_globals; @@ -74,6 +76,7 @@ restore_target_globals (struct target_globals *g) this_target_builtins = g->builtins; this_target_gcse = g->gcse; this_target_bb_reorder = g->bb_reorder; + this_target_lower_subreg = g->lower_subreg; } #endif diff --git a/gcc/toplev.c b/gcc/toplev.c index 7d7be83e28e..d9aaa429f65 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1601,6 +1601,7 @@ backend_init_target (void) /* rtx_cost is mode-dependent, so cached values need to be recomputed on a mode change. */ init_expmed (); + init_lower_subreg (); /* We may need to recompute regno_save_code[] and regno_restore_code[] after a mode change as well. */ |