summaryrefslogtreecommitdiff
path: root/gcc/lra-coalesce.c
diff options
context:
space:
mode:
authorvmakarov <vmakarov@138bc75d-0d04-0410-961f-82ee72b054a4>2012-10-23 15:51:41 +0000
committervmakarov <vmakarov@138bc75d-0d04-0410-961f-82ee72b054a4>2012-10-23 15:51:41 +0000
commitc6a6cdaaea571860c94f9a9fe0f98c597fef7c81 (patch)
tree915ce489d01a05653371ff4f7770258ffacab1b4 /gcc/lra-coalesce.c
parentd9459f6b9e27edcf999b5c06b87e21f8f24fd26f (diff)
downloadgcc-c6a6cdaaea571860c94f9a9fe0f98c597fef7c81.tar.gz
2012-10-23 Vladimir Makarov <vmakarov@redhat.com>
* dbxout.c (dbxout_symbol_location): Pass new argument to alter_subreg. * dwarf2out.c: Include ira.h and lra.h. (based_loc_descr, compute_frame_pointer_to_fb_displacement): Use lra_eliminate_regs for LRA instead of eliminate_regs. * expr.c (emit_move_insn_1): Pass an additional argument to emit_move_via_integer. Use emit_move_via_integer for LRA only if the insn is recognized. * emit-rtl.c (gen_rtx_REG): Add lra_in_progress. (validate_subreg): Don't check offset for LRA and floating point modes. * final.c (final_scan_insn, cleanup_subreg_operands): Pass new argument to alter_subreg. (walk_alter_subreg, output_operand): Ditto. (alter_subreg): Add new argument. * gcse.c (calculate_bb_reg_pressure): Add parameter to ira_setup_eliminable_regset call. * ira.c: Include lra.h. (ira_init_once, ira_init, ira_finish_once): Call lra_start_once, lra_init, lra_finish_once in anyway. (ira_setup_eliminable_regset): Add parameter. Remove need_fp. Call lra_init_elimination and mark HARD_FRAME_POINTER_REGNUM as living forever if frame_pointer_needed. (setup_reg_class_relations): Set up ira_reg_class_subset. (ira_reg_equiv_invariant_p, ira_reg_equiv_const): Remove. (find_reg_equiv_invariant_const): Ditto. (setup_reg_renumber): Use ira_equiv_no_lvalue_p instead of ira_reg_equiv_invariant_p. Skip caps for LRA. (setup_reg_equiv_init, ira_update_equiv_info_by_shuffle_insn): New functions. (ira_reg_equiv_len, ira_reg_equiv): New externals. (ira_reg_equiv): New. (ira_expand_reg_equiv, init_reg_equiv, finish_reg_equiv): New functions. (no_equiv, update_equiv_regs): Use ira_reg_equiv instead of reg_equiv_init. (setup_reg_equiv): New function. (ira_use_lra_p): New global. (ira): Set up lra_simple_p and ira_conflicts_p. Set up and restore flag_caller_saves and flag_ira_region. Move initialization of ira_obstack and ira_bitmap_obstack upper. Call init_reg_equiv, setup_reg_equiv, and setup_reg_equiv_init instead of initialization of ira_reg_equiv_len, ira_reg_equiv_invariant_p, and ira_reg_equiv_const. Call ira_setup_eliminable_regset with a new argument. Don't flatten IRA IRA for LRA. Don't reassign conflict allocnos for LRA. Call finish_reg_equiv. (do_reload): Prepare code for LRA call. Call LRA. * ira.h (ira_use_lra_p): New external. (struct target_ira): Add members x_ira_class_subset_p x_ira_reg_class_subset, and x_ira_reg_classes_intersect_p. (ira_class_subset_p, ira_reg_class_subset): New macros. (ira_reg_classes_intersect_p): New macro. (struct ira_reg_equiv): New. (ira_setup_eliminable_regset): Add an argument. (ira_expand_reg_equiv, ira_update_equiv_info_by_shuffle_insn): New prototypes. * ira-color.c (color_pass, move_spill_restore, coalesce_allocnos): Use ira_equiv_no_lvalue_p. (coalesce_spill_slots, ira_sort_regnos_for_alter_reg): Ditto. * ira-emit.c (ira_create_new_reg): Call ira_expand_reg_equiv. (generate_edge_moves, change_loop) Use ira_equiv_no_lvalue_p. (emit_move_list): Simplify code. Call ira_update_equiv_info_by_shuffle_insn. Use ira_reg_equiv instead of ira_reg_equiv_invariant_p and ira_reg_equiv_const. Change assert. * ira-int.h (struct target_ira_int): Remove x_ira_class_subset_p and x_ira_reg_classes_intersect_p. (ira_class_subset_p, ira_reg_classes_intersect_p): Remove. (ira_reg_equiv_len, ira_reg_equiv_invariant_p): Ditto. (ira_reg_equiv_const): Ditto. (ira_equiv_no_lvalue_p): New function. * jump.c (true_regnum): Always use hard_regno for subreg_get_info when lra is in progress. * haifa-sched.c (sched_init): Pass new argument to ira_setup_eliminable_regset. * loop-invariant.c (calculate_loop_reg_pressure): Pass new argument to ira_setup_eliminable_regset. * lra.h: New. * lra-int.h: Ditto. * lra.c: Ditto. * lra-assigns.c: Ditto. * lra-constraints.c: Ditto. * lra-coalesce.c: Ditto. * lra-eliminations.c: Ditto. * lra-lives.c: Ditto. * lra-spills.c: Ditto. * Makefile.in (LRA_INT_H): New. (OBJS): Add lra.o, lra-assigns.o, lra-coalesce.o, lra-constraints.o, lra-eliminations.o, lra-lives.o, and lra-spills.o. (dwarf2out.o): Add dependence on ira.h and lra.h. (ira.o): Add dependence on lra.h. (lra.o, lra-assigns.o, lra-coalesce.o, lra-constraints.o): New entries. (lra-eliminations.o, lra-lives.o, lra-spills.o): Ditto. * output.h (alter_subreg): Add new argument. * rtlanal.c (simplify_subreg_regno): Permit mode changes for LRA. Permit ARG_POINTER_REGNUM and STACK_POINTER_REGNUM for LRA. * recog.c (general_operand, register_operand): Accept paradoxical FLOAT_MODE subregs for LRA. (scratch_operand): Accept pseudos for LRA. * rtl.h (lra_in_progress): New external. (debug_bb_n_slim, debug_bb_slim, print_value_slim): New prototypes. (debug_rtl_slim, debug_insn_slim): Ditto. * sdbout.c (sdbout_symbol): Pass new argument to alter_subreg. * sched-vis.c (print_value_slim): New. * target.def (lra_p): New hook. (register_priority): Ditto. (different_addr_displacement_p): Ditto. (spill_class): Ditto. * target-globals.h (this_target_lra_int): New external. (target_globals): New member lra_int. (restore_target_globals): Restore this_target_lra_int. * target-globals.c: Include lra-int.h. (default_target_globals): Add &default_target_lra_int. * targhooks.c (default_lra_p): New function. (default_register_priority): Ditto. (default_different_addr_displacement_p): Ditto. * targhooks.h (default_lra_p): Declare. (default_register_priority): Ditto. (default_different_addr_displacement_p): Ditto. * timevar.def (TV_LRA, TV_LRA_ELIMINATE, TV_LRA_INHERITANCE): New. (TV_LRA_CREATE_LIVE_RANGES, TV_LRA_ASSIGN, TV_LRA_COALESCE): New. * config/arm/arm.c (load_multiple_sequence): Pass new argument toOB alter_subreg. (store_multiple_sequence): Ditto. * config/i386/i386.h (enum ix86_tune_indices): Add X86_TUNE_GENERAL_REGS_SSE_SPILL. (TARGET_GENERAL_REGS_SSE_SPILL): New macro. * config/i386/i386.c (initial_ix86_tune_features): Set up X86_TUNE_GENERAL_REGS_SSE_SPILL for m_COREI7 and m_CORE2I7. (ix86_lra_p, ix86_register_priority): New functions. (ix86_secondary_reload): Add NON_Q_REGS, SIREG, DIREG. (inline_secondary_memory_needed): Change assert. (ix86_spill_class): New function. (TARGET_LRA_P, TARGET_REGISTER_BANK, TARGET_SPILL_CLASS): New macros. * config/m68k/m68k.c (emit_move_sequence): Pass new argument to alter_subreg. * config/m32r/m32r.c (gen_split_move_double): Ditto. * config/pa/pa.c (pa_emit_move_sequence): Ditto. * config/sh/sh.md: Ditto. * config/v850/v850.c (v850_reorg): Ditto. * config/xtensa/xtensa.c (fixup_subreg_mem): Ditto. * doc/md.texi: Add new interpretation of hint * for LRA. * doc/passes.texi: Describe LRA pass. * doc/tm.texi.in: Add TARGET_LRA_P, TARGET_REGISTER_PRIORITY, TARGET_DIFFERENT_ADDR_DISPLACEMENT_P, and TARGET_SPILL_CLASS. * doc/tm.texi: Update. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@192719 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/lra-coalesce.c')
-rw-r--r--gcc/lra-coalesce.c351
1 files changed, 351 insertions, 0 deletions
diff --git a/gcc/lra-coalesce.c b/gcc/lra-coalesce.c
new file mode 100644
index 00000000000..57c3111b922
--- /dev/null
+++ b/gcc/lra-coalesce.c
@@ -0,0 +1,351 @@
+/* Coalesce spilled pseudos.
+ Copyright (C) 2010, 2011, 2012
+ Free Software Foundation, Inc.
+ Contributed by Vladimir Makarov <vmakarov@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+/* This file contains a pass making some simple RTL code
+ transformations by coalescing pseudos to remove some move insns.
+
+ Spilling pseudos in LRA can create memory-memory moves. We should
+ remove potential memory-memory moves before the next constraint
+ pass because the constraint pass will generate additional insns for
+ such moves and all these insns will be hard to remove afterwards.
+
+ Here we coalesce only spilled pseudos. Coalescing non-spilled
+ pseudos (with different hard regs) might result in spilling
+ additional pseudos because of possible conflicts with other
+ non-spilled pseudos and, as a consequence, in more constraint
+ passes and even LRA infinite cycling. Trivial the same hard
+ register moves will be removed by subsequent compiler passes.
+
+ We don't coalesce special reload pseudos. It complicates LRA code
+ a lot without visible generated code improvement.
+
+ The pseudo live-ranges are used to find conflicting pseudos during
+ coalescing.
+
+ Most frequently executed moves is tried to be coalesced first. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "output.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "except.h"
+#include "timevar.h"
+#include "ira.h"
+#include "lra-int.h"
+#include "df.h"
+
+/* Arrays whose elements represent the first and the next pseudo
+ (regno) in the coalesced pseudos group to which given pseudo (its
+ regno is the index) belongs. The next of the last pseudo in the
+ group refers to the first pseudo in the group, in other words the
+ group is represented by a cyclic list. */
+static int *first_coalesced_pseudo, *next_coalesced_pseudo;
+
+/* The function is used to sort moves according to their execution
+ frequencies. */
+static int
+move_freq_compare_func (const void *v1p, const void *v2p)
+{
+ rtx mv1 = *(const rtx *) v1p;
+ rtx mv2 = *(const rtx *) v2p;
+ int pri1, pri2;
+
+ pri1 = BLOCK_FOR_INSN (mv1)->frequency;
+ pri2 = BLOCK_FOR_INSN (mv2)->frequency;
+ if (pri2 - pri1)
+ return pri2 - pri1;
+
+ /* If frequencies are equal, sort by moves, so that the results of
+ qsort leave nothing to chance. */
+ return (int) INSN_UID (mv1) - (int) INSN_UID (mv2);
+}
+
+/* Pseudos which go away after coalescing. */
+static bitmap_head coalesced_pseudos_bitmap;
+
+/* Merge two sets of coalesced pseudos given correspondingly by
+ pseudos REGNO1 and REGNO2 (more accurately merging REGNO2 group
+ into REGNO1 group). Set up COALESCED_PSEUDOS_BITMAP. */
+static void
+merge_pseudos (int regno1, int regno2)
+{
+ int regno, first, first2, last, next;
+
+ first = first_coalesced_pseudo[regno1];
+ if ((first2 = first_coalesced_pseudo[regno2]) == first)
+ return;
+ for (last = regno2, regno = next_coalesced_pseudo[regno2];;
+ regno = next_coalesced_pseudo[regno])
+ {
+ first_coalesced_pseudo[regno] = first;
+ bitmap_set_bit (&coalesced_pseudos_bitmap, regno);
+ if (regno == regno2)
+ break;
+ last = regno;
+ }
+ next = next_coalesced_pseudo[first];
+ next_coalesced_pseudo[first] = regno2;
+ next_coalesced_pseudo[last] = next;
+ lra_reg_info[first].live_ranges
+ = (lra_merge_live_ranges
+ (lra_reg_info[first].live_ranges,
+ lra_copy_live_range_list (lra_reg_info[first2].live_ranges)));
+ if (GET_MODE_SIZE (lra_reg_info[first].biggest_mode)
+ < GET_MODE_SIZE (lra_reg_info[first2].biggest_mode))
+ lra_reg_info[first].biggest_mode = lra_reg_info[first2].biggest_mode;
+}
+
+/* Change pseudos in *LOC on their coalescing group
+ representatives. */
+static bool
+substitute (rtx *loc)
+{
+ int i, regno;
+ const char *fmt;
+ enum rtx_code code;
+ bool res;
+
+ if (*loc == NULL_RTX)
+ return false;
+ code = GET_CODE (*loc);
+ if (code == REG)
+ {
+ regno = REGNO (*loc);
+ if (regno < FIRST_PSEUDO_REGISTER
+ || first_coalesced_pseudo[regno] == regno)
+ return false;
+ *loc = regno_reg_rtx[first_coalesced_pseudo[regno]];
+ return true;
+ }
+
+ res = false;
+ fmt = GET_RTX_FORMAT (code);
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'e')
+ {
+ if (substitute (&XEXP (*loc, i)))
+ res = true;
+ }
+ else if (fmt[i] == 'E')
+ {
+ int j;
+
+ for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
+ if (substitute (&XVECEXP (*loc, i, j)))
+ res = true;
+ }
+ }
+ return res;
+}
+
+/* The current iteration (1, 2, ...) of the coalescing pass. */
+int lra_coalesce_iter;
+
+/* Return true if the move involving REGNO1 and REGNO2 is a potential
+ memory-memory move. */
+static bool
+mem_move_p (int regno1, int regno2)
+{
+ return reg_renumber[regno1] < 0 && reg_renumber[regno2] < 0;
+}
+
+/* Pseudos used instead of the coalesced pseudos. */
+static bitmap_head used_pseudos_bitmap;
+
+/* Set up USED_PSEUDOS_BITMAP, and update LR_BITMAP (a BB live info
+ bitmap). */
+static void
+update_live_info (bitmap lr_bitmap)
+{
+ unsigned int j;
+ bitmap_iterator bi;
+
+ bitmap_clear (&used_pseudos_bitmap);
+ EXECUTE_IF_AND_IN_BITMAP (&coalesced_pseudos_bitmap, lr_bitmap,
+ FIRST_PSEUDO_REGISTER, j, bi)
+ bitmap_set_bit (&used_pseudos_bitmap, first_coalesced_pseudo[j]);
+ if (! bitmap_empty_p (&used_pseudos_bitmap))
+ {
+ bitmap_and_compl_into (lr_bitmap, &coalesced_pseudos_bitmap);
+ bitmap_ior_into (lr_bitmap, &used_pseudos_bitmap);
+ }
+}
+
+/* Return true if pseudo REGNO can be potentially coalesced. Use
+ SPLIT_PSEUDO_BITMAP to find pseudos whose live ranges were
+ split. */
+static bool
+coalescable_pseudo_p (int regno, bitmap split_origin_bitmap)
+{
+ lra_assert (regno >= FIRST_PSEUDO_REGISTER);
+ /* Don't coalesce inheritance pseudos because spilled inheritance
+ pseudos will be removed in subsequent 'undo inheritance'
+ pass. */
+ return (lra_reg_info[regno].restore_regno < 0
+ /* We undo splits for spilled pseudos whose live ranges were
+ split. So don't coalesce them, it is not necessary and
+ the undo transformations would be wrong. */
+ && ! bitmap_bit_p (split_origin_bitmap, regno)
+ /* We don't want to coalesce regnos with equivalences, at
+ least without updating this info. */
+ && ira_reg_equiv[regno].constant == NULL_RTX
+ && ira_reg_equiv[regno].memory == NULL_RTX
+ && ira_reg_equiv[regno].invariant == NULL_RTX);
+}
+
+/* The major function for aggressive pseudo coalescing of moves only
+ if the both pseudos were spilled and not special reload pseudos. */
+bool
+lra_coalesce (void)
+{
+ basic_block bb;
+ rtx mv, set, insn, next, *sorted_moves;
+ int i, mv_num, sregno, dregno, restore_regno;
+ unsigned int regno;
+ int coalesced_moves;
+ int max_regno = max_reg_num ();
+ bitmap_head involved_insns_bitmap, split_origin_bitmap;
+ bitmap_iterator bi;
+
+ timevar_push (TV_LRA_COALESCE);
+
+ if (lra_dump_file != NULL)
+ fprintf (lra_dump_file,
+ "\n********** Pseudos coalescing #%d: **********\n\n",
+ ++lra_coalesce_iter);
+ first_coalesced_pseudo = XNEWVEC (int, max_regno);
+ next_coalesced_pseudo = XNEWVEC (int, max_regno);
+ for (i = 0; i < max_regno; i++)
+ first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i;
+ sorted_moves = XNEWVEC (rtx, get_max_uid ());
+ mv_num = 0;
+ /* Collect pseudos whose live ranges were split. */
+ bitmap_initialize (&split_origin_bitmap, &reg_obstack);
+ EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, regno, bi)
+ if ((restore_regno = lra_reg_info[regno].restore_regno) >= 0)
+ bitmap_set_bit (&split_origin_bitmap, restore_regno);
+ /* Collect moves. */
+ coalesced_moves = 0;
+ FOR_EACH_BB (bb)
+ {
+ FOR_BB_INSNS_SAFE (bb, insn, next)
+ if (INSN_P (insn)
+ && (set = single_set (insn)) != NULL_RTX
+ && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set))
+ && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER
+ && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
+ && mem_move_p (sregno, dregno)
+ && coalescable_pseudo_p (sregno, &split_origin_bitmap)
+ && coalescable_pseudo_p (dregno, &split_origin_bitmap)
+ && ! side_effects_p (set)
+ && !(lra_intersected_live_ranges_p
+ (lra_reg_info[sregno].live_ranges,
+ lra_reg_info[dregno].live_ranges)))
+ sorted_moves[mv_num++] = insn;
+ }
+ bitmap_clear (&split_origin_bitmap);
+ qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func);
+ /* Coalesced copies, most frequently executed first. */
+ bitmap_initialize (&coalesced_pseudos_bitmap, &reg_obstack);
+ bitmap_initialize (&involved_insns_bitmap, &reg_obstack);
+ for (i = 0; i < mv_num; i++)
+ {
+ mv = sorted_moves[i];
+ set = single_set (mv);
+ lra_assert (set != NULL && REG_P (SET_SRC (set))
+ && REG_P (SET_DEST (set)));
+ sregno = REGNO (SET_SRC (set));
+ dregno = REGNO (SET_DEST (set));
+ if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno])
+ {
+ coalesced_moves++;
+ if (lra_dump_file != NULL)
+ fprintf
+ (lra_dump_file, " Coalescing move %i:r%d-r%d (freq=%d)\n",
+ INSN_UID (mv), sregno, dregno,
+ BLOCK_FOR_INSN (mv)->frequency);
+ /* We updated involved_insns_bitmap when doing the merge. */
+ }
+ else if (!(lra_intersected_live_ranges_p
+ (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges,
+ lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges)))
+ {
+ coalesced_moves++;
+ if (lra_dump_file != NULL)
+ fprintf
+ (lra_dump_file,
+ " Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n",
+ INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)),
+ dregno, ORIGINAL_REGNO (SET_DEST (set)),
+ BLOCK_FOR_INSN (mv)->frequency);
+ bitmap_ior_into (&involved_insns_bitmap,
+ &lra_reg_info[sregno].insn_bitmap);
+ bitmap_ior_into (&involved_insns_bitmap,
+ &lra_reg_info[dregno].insn_bitmap);
+ merge_pseudos (sregno, dregno);
+ }
+ }
+ bitmap_initialize (&used_pseudos_bitmap, &reg_obstack);
+ FOR_EACH_BB (bb)
+ {
+ update_live_info (df_get_live_in (bb));
+ update_live_info (df_get_live_out (bb));
+ FOR_BB_INSNS_SAFE (bb, insn, next)
+ if (INSN_P (insn)
+ && bitmap_bit_p (&involved_insns_bitmap, INSN_UID (insn)))
+ {
+ if (! substitute (&insn))
+ continue;
+ lra_update_insn_regno_info (insn);
+ if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set))
+ {
+ /* Coalesced move. */
+ if (lra_dump_file != NULL)
+ fprintf (lra_dump_file, " Removing move %i (freq=%d)\n",
+ INSN_UID (insn), BLOCK_FOR_INSN (insn)->frequency);
+ lra_set_insn_deleted (insn);
+ }
+ }
+ }
+ bitmap_clear (&used_pseudos_bitmap);
+ bitmap_clear (&involved_insns_bitmap);
+ bitmap_clear (&coalesced_pseudos_bitmap);
+ if (lra_dump_file != NULL && coalesced_moves != 0)
+ fprintf (lra_dump_file, "Coalesced Moves = %d\n", coalesced_moves);
+ free (sorted_moves);
+ free (next_coalesced_pseudo);
+ free (first_coalesced_pseudo);
+ timevar_pop (TV_LRA_COALESCE);
+ return coalesced_moves != 0;
+}