diff options
author | dje <dje@138bc75d-0d04-0410-961f-82ee72b054a4> | 2004-03-03 16:32:45 +0000 |
---|---|---|
committer | dje <dje@138bc75d-0d04-0410-961f-82ee72b054a4> | 2004-03-03 16:32:45 +0000 |
commit | 839f841511dc0908d11199c48add0335fd068928 (patch) | |
tree | a9a88dd3608db538ae3d670cde68145ab19f3038 /gcc/gcse.c | |
parent | 399d212bf0da5ed1e49763af597f5cdbcdc40acf (diff) | |
download | gcc-839f841511dc0908d11199c48add0335fd068928.tar.gz |
2004-03-03 Mostafa Hagog <mustafa@il.ibm.com>
* common.opt: Add description of the new -fgcse-after-reload flag.
* flags.h (flag_gcse_after_reload): Declaration of global variable.
* gcse.c (reg_used_on_edge ,reg_set_between_after_reload_p,
reg_used_between_after_reload_p, rtx get_avail_load_store_reg,
is_jump_table_basic_block, bb_has_well_behaved_predecessors,
get_bb_avail_insn, hash_scan_set_after_reload,
compute_hash_table_after_reload, eliminate_partially_redundant_loads,
gcse_after_reload, get_bb_avail_insn): New functions to implement
gcse-after-reload.
(gcse_after_reload_main): New function, the main entry point to
gcse-after-reload.
* rtl.h (gcse_after_reload_main): Declaration of the new function.
* opts.c (common_handle_option): Handle the -fgcse-after-reload flag.
* toplev.c (flag_gcse_after_reload): Initialization.
* passes.c (rest_of_handl_gcse2): Call gcse_after_reload_main.
* params.def (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION): New parameters for tuning
the gcse after reload optimization.
* params.h (GCSE_AFTER_RELOAD_PARTIAL_FRACTION,
GCSE_AFTER_RELOAD_CRITICAL_FRACTION): Two macros to access the tuning
parameters.
* doc/invoke.texi: Documentation for the new flag gcse-after-reload.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@78842 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/gcse.c')
-rw-r--r-- | gcc/gcse.c | 638 |
1 files changed, 638 insertions, 0 deletions
diff --git a/gcc/gcse.c b/gcc/gcse.c index ad1c9fda64e..975fb1fbe2f 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -1980,6 +1980,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, antic_occr->insn = insn; antic_occr->next = NULL; + antic_occr->deleted_p = 0; } } @@ -2016,6 +2017,7 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p, avail_occr->insn = insn; avail_occr->next = NULL; + avail_occr->deleted_p = 0; } } } @@ -2102,6 +2104,7 @@ insert_set_in_table (rtx x, rtx insn, struct hash_table *table) cur_occr->insn = insn; cur_occr->next = NULL; + cur_occr->deleted_p = 0; } } @@ -8091,4 +8094,639 @@ is_too_expensive (const char *pass) return false; } +/* The following code implements gcse after reload, the purpose of this + pass is to cleanup redundant loads generated by reload and other + optimizations that come after gcse. It searches for simple inter-block + redundancies and tries to eliminate them by adding moves and loads + in cold places. */ + +/* The following structure holds the information about the occurrences of + the redundant instructions. */ +struct unoccr +{ + struct unoccr *next; + edge pred; + rtx insn; +}; + +static bool reg_used_on_edge (rtx, edge); +static rtx reg_set_between_after_reload_p (rtx, rtx, rtx); +static rtx reg_used_between_after_reload_p (rtx, rtx, rtx); +static rtx get_avail_load_store_reg (rtx); +static bool is_jump_table_basic_block (basic_block); +static bool bb_has_well_behaved_predecessors (basic_block); +static struct occr* get_bb_avail_insn (basic_block, struct occr *); +static void hash_scan_set_after_reload (rtx, rtx, struct hash_table *); +static void compute_hash_table_after_reload (struct hash_table *); +static void eliminate_partially_redundant_loads (basic_block, + rtx, + struct expr *); +static void gcse_after_reload (void); +static struct occr* get_bb_avail_insn (basic_block, struct occr *); +void gcse_after_reload_main (rtx, FILE *); + + +/* Check if register REG is used in any insn waiting to be inserted on E. + Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p + with PREV(insn),NEXT(insn) instead of calling + reg_overlap_mentioned_p. */ + +static bool +reg_used_on_edge (rtx reg, edge e) +{ + rtx insn; + + for (insn = e->insns; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return true; + + return false; +} + +/* Return the insn that sets register REG or clobbers it in between + FROM_INSN and TO_INSN (exclusive of those two). + Just like reg_set_between but for hard registers and not pseudos. */ + +static rtx +reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) +{ + rtx insn; + int regno; + + if (GET_CODE (reg) != REG) + abort (); + regno = REGNO (reg); + + /* We are called after register allocation. */ + if (regno >= FIRST_PSEUDO_REGISTER) + abort (); + + if (from_insn == to_insn) + return NULL_RTX; + + for (insn = NEXT_INSN (from_insn); + insn != to_insn; + insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + if (FIND_REG_INC_NOTE (insn, reg) + || (GET_CODE (insn) == CALL_INSN + && call_used_regs[regno]) + || find_reg_fusage (insn, CLOBBER, reg)) + return insn; + } + if (set_of (reg, insn) != NULL_RTX) + return insn; + } + return NULL_RTX; +} + +/* Return the insn that uses register REG in between FROM_INSN and TO_INSN + (exclusive of those two). Similar to reg_used_between but for hard + registers and not pseudos. */ + +static rtx +reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) +{ + rtx insn; + int regno; + + if (GET_CODE (reg) != REG) + return to_insn; + regno = REGNO (reg); + + /* We are called after register allocation. */ + if (regno >= FIRST_PSEUDO_REGISTER) + abort (); + if (from_insn == to_insn) + return NULL_RTX; + + for (insn = NEXT_INSN (from_insn); + insn != to_insn; + insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && (reg_overlap_mentioned_p (reg, PATTERN (insn)) + || (GET_CODE (insn) == CALL_INSN + && call_used_regs[regno]) + || find_reg_fusage (insn, USE, reg) + || find_reg_fusage (insn, CLOBBER, reg))) + return insn; + return NULL_RTX; +} + +/* Return the loaded/stored register of a load/store instruction. */ + +static rtx +get_avail_load_store_reg (rtx insn) +{ + if (GET_CODE (SET_DEST (PATTERN (insn))) == REG) /* A load. */ + return SET_DEST(PATTERN(insn)); + if (GET_CODE (SET_SRC (PATTERN (insn))) == REG) /* A store. */ + return SET_SRC (PATTERN (insn)); + abort (); +} + +/* Don't handle ABNORMAL edges or jump tables. */ + +static bool +is_jump_table_basic_block (basic_block bb) +{ + rtx insn = BB_END (bb); + + if (GET_CODE (insn) == JUMP_INSN && + (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) + return true; + return false; +} + +/* Return nonzero if the predecessors of BB are "well behaved". */ + +static bool +bb_has_well_behaved_predecessors (basic_block bb) +{ + edge pred; + + if (! bb->pred) + return false; + for (pred = bb->pred; pred != NULL; pred = pred->pred_next) + if (((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred)) + || is_jump_table_basic_block (pred->src)) + return false; + return true; +} + + +/* Search for the occurrences of expression in BB. */ + +static struct occr* +get_bb_avail_insn (basic_block bb, struct occr *occr) +{ + for (; occr != NULL; occr = occr->next) + if (BLOCK_FOR_INSN (occr->insn)->index == bb->index) + return occr; + return NULL; +} + +/* Perform partial GCSE pass after reload, try to eliminate redundant loads + created by the reload pass. We try to look for a full or partial + redundant loads fed by one or more loads/stores in predecessor BBs, + and try adding loads to make them fully redundant. We also check if + it's worth adding loads to be able to delete the redundant load. + + Algorithm: + 1. Build available expressions hash table: + For each load/store instruction, if the loaded/stored memory didn't + change until the end of the basic block add this memory expression to + the hash table. + 2. Perform Redundancy elimination: + For each load instruction do the following: + perform partial redundancy elimination, check if it's worth adding + loads to make the load fully redundant. If so add loads and + register copies and delete the load. + + Future enhancement: + if loaded register is used/defined between load and some store, + look for some other free register between load and all its stores, + and replace load with a copy from this register to the loaded + register. */ + + +/* This handles the case where several stores feed a partially redundant + load. It checks if the redundancy elimination is possible and if it's + worth it. */ + +static void +eliminate_partially_redundant_loads (basic_block bb, rtx insn, + struct expr *expr) +{ + edge pred; + rtx avail_insn = NULL_RTX; + rtx avail_reg; + rtx dest, pat; + struct occr *a_occr; + struct unoccr *occr, *avail_occrs = NULL; + struct unoccr *unoccr, *unavail_occrs = NULL; + int npred_ok = 0; + gcov_type ok_count = 0; /* Redundant load execution count. */ + gcov_type critical_count = 0; /* Execution count of critical edges. */ + + /* The execution count of the loads to be added to make the + load fully redundant. */ + gcov_type not_ok_count = 0; + basic_block pred_bb; + + pat = PATTERN (insn); + dest = SET_DEST (pat); + /* Check if the loaded register is not used nor killed from the beginning + of the block. */ + if (reg_used_between_after_reload_p (dest, + PREV_INSN (BB_HEAD (bb)), insn)) + return; + + /* Check potential for replacing load with copy for predecessors. */ + for (pred = bb->pred; pred; pred = pred->pred_next) + { + rtx next_pred_bb_end; + + avail_insn = NULL_RTX; + pred_bb = pred->src; + next_pred_bb_end = NEXT_INSN (BB_END (pred_bb)); + for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr; + a_occr = get_bb_avail_insn (pred_bb, a_occr->next)) + { + /* Check if the loaded register is not used. */ + avail_insn = a_occr->insn; + if (! (avail_reg = get_avail_load_store_reg (avail_insn))) + abort (); + /* Make sure we can generate a move from register avail_reg to + dest. */ + extract_insn (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg))); + if (! constrain_operands (1) + || reg_killed_on_edge (avail_reg, pred) + || reg_used_on_edge (dest, pred)) + { + avail_insn = NULL; + continue; + } + if (! reg_set_between_after_reload_p (avail_reg, avail_insn, + next_pred_bb_end)) + /* AVAIL_INSN remains non-null. */ + break; + else + avail_insn = NULL; + } + if (avail_insn != NULL_RTX) + { + npred_ok++; + ok_count += pred->count; + if (EDGE_CRITICAL_P (pred)) + critical_count += pred->count; + occr = (struct unoccr *) gmalloc (sizeof (struct unoccr)); + occr->insn = avail_insn; + occr->pred = pred; + occr->next = avail_occrs; + avail_occrs = occr; + } + else + { + not_ok_count += pred->count; + if (EDGE_CRITICAL_P (pred)) + critical_count += pred->count; + unoccr = (struct unoccr *) gmalloc (sizeof (struct unoccr)); + unoccr->insn = NULL_RTX; + unoccr->pred = pred; + unoccr->next = unavail_occrs; + unavail_occrs = unoccr; + } + } + + if (npred_ok == 0 /* No load can be replaced by copy. */ + || (optimize_size && npred_ok > 1)) /* Prevent exploding the code. */ + return; + + /* Check if it's worth applying the partial redundancy elimination. */ + if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count) + return; + + if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count) + return; + + /* Generate moves to the loaded register from where + the memory is available. */ + for (occr = avail_occrs; occr; occr = occr->next) + { + avail_insn = occr->insn; + pred = occr->pred; + /* Set avail_reg to be the register having the value of the + memory. */ + avail_reg = get_avail_load_store_reg (avail_insn); + if (! avail_reg) + abort (); + + insert_insn_on_edge (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg)), + pred); + + if (gcse_file) + fprintf (gcse_file, + "GCSE AFTER reload generating move from %d to %d on \ + edge from %d to %d\n", + REGNO (avail_reg), + REGNO (dest), + pred->src->index, + pred->dest->index); + } + + /* Regenerate loads where the memory is unavailable. */ + for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next) + { + pred = unoccr->pred; + insert_insn_on_edge (copy_insn (PATTERN (insn)), pred); + + if (gcse_file) + fprintf (gcse_file, + "GCSE AFTER reload: generating on edge from %d to %d\ + a copy of load:\n", + pred->src->index, + pred->dest->index); + } + + /* Delete the insn if it is not available in this block and mark it + for deletion if it is available. If insn is available it may help + discover additional redundancies, so mark it for later deletion.*/ + for (a_occr = get_bb_avail_insn (bb, expr->avail_occr); + a_occr && (a_occr->insn != insn); + a_occr = get_bb_avail_insn (bb, a_occr->next)); + + if (!a_occr) + delete_insn (insn); + else + a_occr->deleted_p = 1; +} + +/* Performing the redundancy elimination as described before. */ + +static void +gcse_after_reload (void) +{ + unsigned int i; + rtx insn; + basic_block bb; + struct expr *expr; + struct occr *occr; + + /* Note we start at block 1. */ + + if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR) + return; + + FOR_BB_BETWEEN (bb, + ENTRY_BLOCK_PTR->next_bb->next_bb, + EXIT_BLOCK_PTR, + next_bb) + { + if (! bb_has_well_behaved_predecessors (bb)) + continue; + + /* Do not try this optimization on cold basic blocks. */ + if (probably_cold_bb_p (bb)) + continue; + + reset_opr_set_tables (); + + for (insn = BB_HEAD (bb); + insn != NULL + && insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + /* Is it a load - of the form (set (reg) (mem))? */ + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (SET_DEST (PATTERN (insn))) == REG + && GET_CODE (SET_SRC (PATTERN (insn))) == MEM) + { + rtx pat = PATTERN (insn); + rtx src = SET_SRC (pat); + struct expr *expr; + + if (general_operand (src, GET_MODE (src)) + /* Is the expression recorded? */ + && (expr = lookup_expr (src, &expr_hash_table)) != NULL + /* Are the operands unchanged since the start of the + block? */ + && oprs_not_set_p (src, insn) + && ! MEM_VOLATILE_P (src) + && GET_MODE (src) != BLKmode + && !(flag_non_call_exceptions && may_trap_p (src)) + && !side_effects_p (src)) + { + /* We now have a load (insn) and an available memory at + its BB start (expr). Try to remove the loads if it is + redundant. */ + eliminate_partially_redundant_loads (bb, insn, expr); + } + } + + /* Keep track of everything modified by this insn. */ + if (INSN_P (insn)) + mark_oprs_set (insn); + } + } + + commit_edge_insertions (); + + /* Go over the expression hash table and delete insns that were + marked for later deletion. */ + for (i = 0; i < expr_hash_table.size; i++) + { + for (expr = expr_hash_table.table[i]; + expr != NULL; + expr = expr->next_same_hash) + for (occr = expr->avail_occr; occr; occr = occr->next) + if (occr->deleted_p) + delete_insn (occr->insn); + } +} + +/* Scan pattern PAT of INSN and add an entry to the hash TABLE. + After reload we are interested in loads/stores only. */ + +static void +hash_scan_set_after_reload (rtx pat, rtx insn, struct hash_table *table) +{ + rtx src = SET_SRC (pat); + rtx dest = SET_DEST (pat); + + if (GET_CODE (src) != MEM && GET_CODE (dest) != MEM) + return; + + if (GET_CODE (dest) == REG) + { + if (/* Don't GCSE something if we can't do a reg/reg copy. */ + can_copy_p (GET_MODE (dest)) + /* GCSE commonly inserts instruction after the insn. We can't + do that easily for EH_REGION notes so disable GCSE on these + for now. */ + && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX) + /* Is SET_SRC something we want to gcse? */ + && general_operand (src, GET_MODE (src)) + /* Don't CSE a nop. */ + && ! set_noop_p (pat) + && ! JUMP_P (insn)) + { + /* An expression is not available if its operands are + subsequently modified, including this insn. */ + if (oprs_available_p (src, insn)) + insert_expr_in_table (src, GET_MODE (dest), insn, 0, 1, table); + } + } + else if ((GET_CODE (src) == REG)) + { + /* Only record sets of pseudo-regs in the hash table. */ + if (/* Don't GCSE something if we can't do a reg/reg copy. */ + can_copy_p (GET_MODE (src)) + /* GCSE commonly inserts instruction after the insn. We can't + do that easily for EH_REGION notes so disable GCSE on these + for now. */ + && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX) + /* Is SET_DEST something we want to gcse? */ + && general_operand (dest, GET_MODE (dest)) + /* Don't CSE a nop. */ + && ! set_noop_p (pat) + &&! JUMP_P (insn) + && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest))) + /* Check if the memory expression is killed after insn. */ + && ! load_killed_in_block_p (BLOCK_FOR_INSN (insn), + INSN_CUID (insn) + 1, + dest, + 1) + && oprs_unchanged_p (XEXP (dest, 0), insn, 1)) + { + insert_expr_in_table (dest, GET_MODE (dest), insn, 0, 1, table); + } + } +} + + +/* Create hash table of memory expressions available at end of basic + blocks. */ + +static void +compute_hash_table_after_reload (struct hash_table *table) +{ + unsigned int i; + + table->set_p = 0; + + /* Initialize count of number of entries in hash table. */ + table->n_elems = 0; + memset ((char *) table->table, 0, + table->size * sizeof (struct expr *)); + + /* While we compute the hash table we also compute a bit array of which + registers are set in which blocks. */ + sbitmap_vector_zero (reg_set_in_block, last_basic_block); + + /* Re-cache any INSN_LIST nodes we have allocated. */ + clear_modify_mem_tables (); + + /* Some working arrays used to track first and last set in each block. */ + reg_avail_info = (struct reg_avail_info*) + gmalloc (max_gcse_regno * sizeof (struct reg_avail_info)); + + for (i = 0; i < max_gcse_regno; ++i) + reg_avail_info[i].last_bb = NULL; + + FOR_EACH_BB (current_bb) + { + rtx insn; + unsigned int regno; + + /* First pass over the instructions records information used to + determine when registers and memory are first and last set. */ + for (insn = BB_HEAD (current_bb); + insn && insn != NEXT_INSN (BB_END (current_bb)); + insn = NEXT_INSN (insn)) + { + if (! INSN_P (insn)) + continue; + + if (GET_CODE (insn) == CALL_INSN) + { + bool clobbers_all = false; + +#ifdef NON_SAVING_SETJMP + if (NON_SAVING_SETJMP + && find_reg_note (insn, REG_SETJMP, NULL_RTX)) + clobbers_all = true; +#endif + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (clobbers_all + || TEST_HARD_REG_BIT (regs_invalidated_by_call, + regno)) + record_last_reg_set_info (insn, regno); + + mark_call (insn); + } + + note_stores (PATTERN (insn), record_last_set_info, insn); + + if (GET_CODE (PATTERN (insn)) == SET) + { + rtx src, dest; + + src = SET_SRC (PATTERN (insn)); + dest = SET_DEST (PATTERN (insn)); + if (GET_CODE (src) == MEM && auto_inc_p (XEXP (src, 0))) + { + regno = REGNO (XEXP (XEXP (src, 0), 0)); + record_last_reg_set_info (insn, regno); + } + if (GET_CODE (dest) == MEM && auto_inc_p (XEXP (dest, 0))) + { + regno = REGNO (XEXP (XEXP (dest, 0), 0)); + record_last_reg_set_info (insn, regno); + } + } + } + + /* The next pass builds the hash table. */ + for (insn = BB_HEAD (current_bb); + insn && insn != NEXT_INSN (BB_END (current_bb)); + insn = NEXT_INSN (insn)) + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET) + if (! find_reg_note (insn, REG_LIBCALL, NULL_RTX)) + hash_scan_set_after_reload (PATTERN (insn), insn, table); + } + + free (reg_avail_info); + reg_avail_info = NULL; +} + + +/* Main entry point of the GCSE after reload - clean some redundant loads + due to spilling. */ + +void +gcse_after_reload_main (rtx f, FILE* file) +{ + gcse_subst_count = 0; + gcse_create_count = 0; + + gcse_file = file; + + gcc_obstack_init (&gcse_obstack); + bytes_used = 0; + + /* We need alias. */ + init_alias_analysis (); + + max_gcse_regno = max_reg_num (); + + alloc_reg_set_mem (max_gcse_regno); + alloc_gcse_mem (f); + alloc_hash_table (max_cuid, &expr_hash_table, 0); + compute_hash_table_after_reload (&expr_hash_table); + + if (gcse_file) + dump_hash_table (gcse_file, "Expression", &expr_hash_table); + + if (expr_hash_table.n_elems > 0) + gcse_after_reload (); + + free_hash_table (&expr_hash_table); + + free_gcse_mem (); + free_reg_set_mem (); + + /* We are finished with alias. */ + end_alias_analysis (); + + obstack_free (&gcse_obstack, NULL); +} + #include "gt-gcse.h" |