diff options
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/Makefile.in | 5 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 41 | ||||
-rw-r--r-- | gcc/params.def | 35 | ||||
-rw-r--r-- | gcc/rtl.h | 2 | ||||
-rw-r--r-- | gcc/timevar.def | 1 | ||||
-rw-r--r-- | gcc/toplev.c | 21 | ||||
-rw-r--r-- | gcc/tracer.c | 373 |
8 files changed, 489 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e0f3d486384..2e5018942e9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +Sat Jun 1 23:29:51 CEST 2002 Jan Hubicka <jh@suse.cz> + + * Makefile.in (tracer.o): New. + * params.def (TRACER_*): New options. + * rtl.h (tracer): Declare. + * timevar.def (TV_TRACER): New. + * toplev.c (dump_file_index): Add DFI_tracer. + (dump_file_info): Add tracer. + (flag_tracer): New. + (lang_indepdenent_options): Add tracer. + (rest_of_compilation): Call tracer. + * tracer.c: New file. + * invoke.texi (-ftracer): Document. + (--param tracer-*): Document. + 2002-06-01 Daniel Berlin <dberlin@dberlin.org> * tree-inline.c (expand_call_inline): Make the statement diff --git a/gcc/Makefile.in b/gcc/Makefile.in index f052d98997e..69431fcebf0 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -732,7 +732,7 @@ OBJS = alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \ reload.o reload1.o reorg.o resource.o rtl.o rtlanal.o rtl-error.o \ sbitmap.o sched-deps.o sched-ebb.o sched-rgn.o sched-vis.o sdbout.o \ sibcall.o simplify-rtx.o ssa.o ssa-ccp.o ssa-dce.o stmt.o \ - stor-layout.o stringpool.o timevar.o toplev.o tree.o tree-dump.o \ + stor-layout.o stringpool.o timevar.o toplev.o tracer.o tree.o tree-dump.o \ tree-inline.o unroll.o varasm.o varray.o version.o vmsdbgout.o xcoffout.o \ $(GGC) $(out_object_file) $(EXTRA_OBJS) @@ -1596,6 +1596,9 @@ predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h \ lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) toplev.h $(RTL_H) $(GGC_H) bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \ flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H) +tracer.o : tracer.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \ + $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h flags.h \ + $(PARAMS_H) profile.h cfglayout.o : cfglayout.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \ insn-config.h $(BASIC_BLOCK_H) hard-reg-set.h output.h function.h \ cfglayout.h diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1ab9cb39f98..0b58d11fa8f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -282,7 +282,7 @@ in the following sections. -frerun-cse-after-loop -frerun-loop-opt @gol -fschedule-insns -fschedule-insns2 @gol -fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol --fstrength-reduce -fstrict-aliasing -fthread-jumps -ftrapv @gol +-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps -ftrapv @gol -funroll-all-loops -funroll-loops @gol --param @var{name}=@var{value} -O -O0 -O1 -O2 -O3 -Os} @@ -3637,6 +3637,12 @@ those which have no call-preserved registers to use instead. For all machines, optimization level 2 and higher enables this flag by default. +@item -ftracer +@opindex ftracer +Perform tail duplication to enlarge superblock size. This transformation +simplifies the control flow of the function allowing other optimizations to do +better job. + @item -funroll-loops @opindex funroll-loops Unroll loops whose number of iterations can be determined at compile @@ -3941,6 +3947,39 @@ given basic block needs to have to be considered hot. @item hot-bb-frequency-fraction Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot + +@item tracer-dynamic-coverage +@itemx tracer-dynamic-coverage-feedback + +This value is used to limit superblock formation once given percentage of +executed instructions is covered. This limits unnecesary code size expansion. + +The @option{tracer-dynamic-coverage-feedback} is used only when profile +feedback is available. The real profiles (as opposed to statically estimated +ones) are much less balanced allowing the threshold to be larger value. + +@item tracer-max-code-growth +Stop tail duplication once code growth has reached given percentage. This is +rather hokey argument, as most of the duplicates will be elliminated later in +cross jumping, so it may be set to much higher values than is the desired code +growth. + +@item tracer-min-branch-ratio + +Stop reverse growth when the reverse probability of best edge is less than this +threshold (in percent). + +@item tracer-min-branch-ratio +@itemx tracer-min-branch-ratio-feedback + +Stop forward growth if the best edge do have probability lower than this +threshold. + +Similary to @option{tracer-dynamic-coverage} two values are present, one for +compilation for profile feedback and one for compilation without. The value +for compilation with profile feedback needs to be more conservative (higher) in +order to make tracer effective. + @end table @end table diff --git a/gcc/params.def b/gcc/params.def index de55ecc5841..1b3105556b2 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -153,12 +153,43 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS, DEFPARAM(HOT_BB_COUNT_FRACTION, "hot-bb-count-fraction", - "Select fraction of the maximal count of repetitions of basic block in program given basic block needs to have to be considered hot", + "Select fraction of the maximal count of repetitions of basic block in \ +program given basic block needs to have to be considered hot", 10000) DEFPARAM(HOT_BB_FREQUENCY_FRACTION, "hot-bb-frequency-fraction", - "Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot", + "Select fraction of the maximal frequency of executions of basic \ +block in function given basic block needs to have to be considered hot", 1000) +DEFPARAM(TRACER_DYNAMIC_COVERAGE_FEEDBACK, + "tracer-dynamic-coverage-feedback", + "The percentage of function, weighted by execution frequency, that \ +must be covered by trace formation. Used when profile feedback is available", + 95) +DEFPARAM(TRACER_DYNAMIC_COVERAGE, + "tracer-dynamic-coverage", + "The percentage of function, weighted by execution frequency, that \ +must be covered by trace formation. Used when profile feedback is not available", + 75) +DEFPARAM(TRACER_MAX_CODE_GROWTH, + "tracer-max-code-growth", + "Maximal code growth caused by tail duplication (in percents)", + 100) +DEFPARAM(TRACER_MIN_BRANCH_RATIO, + "tracer-min-branch-ratio", + "Stop reverse growth if the reverse probability of best edge is less \ +than this threshold (in percents)", + 10) +DEFPARAM(TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK, + "tracer-min-branch-probability-feedback", + "Stop forward growth if the probability of best edge is less than \ +this threshold (in percents). Used when profile feedback is available", + 30) +DEFPARAM(TRACER_MIN_BRANCH_PROBABILITY, + "tracer-min-branch-probability", + "Stop forward growth if the probability of best edge is less than \ +this threshold (in percents). Used when profile feedback is not available", + 50) /* Local variables: mode:c diff --git a/gcc/rtl.h b/gcc/rtl.h index f4adc1da6b5..81388eb8b74 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2281,4 +2281,6 @@ extern void if_convert PARAMS ((int)); /* In predict.c */ extern void invert_br_probabilities PARAMS ((rtx)); extern bool expensive_function_p PARAMS ((int)); +/* In tracer.c */ +extern void tracer PARAMS ((void)); #endif /* ! GCC_RTL_H */ diff --git a/gcc/timevar.def b/gcc/timevar.def index 74f8907ae7b..49eeedbf0a7 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -58,6 +58,7 @@ DEFTIMEVAR (TV_JUMP , "jump") DEFTIMEVAR (TV_CSE , "CSE") DEFTIMEVAR (TV_GCSE , "global CSE") DEFTIMEVAR (TV_LOOP , "loop analysis") +DEFTIMEVAR (TV_TRACER , "tracer") DEFTIMEVAR (TV_CSE2 , "CSE 2") DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction") DEFTIMEVAR (TV_FLOW , "flow analysis") diff --git a/gcc/toplev.c b/gcc/toplev.c index a3d0dfd08e0..b9432f44c35 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -224,6 +224,7 @@ enum dump_file_index DFI_loop, DFI_cfg, DFI_bp, + DFI_tracer, DFI_cse2, DFI_life, DFI_combine, @@ -271,6 +272,7 @@ static struct dump_file_info dump_file[DFI_MAX] = { "loop", 'L', 1, 0, 0 }, { "cfg", 'f', 1, 0, 0 }, { "bp", 'b', 1, 0, 0 }, + { "tracer", 'T', 1, 0, 0 }, { "cse2", 't', 1, 0, 0 }, { "life", 'f', 1, 0, 0 }, /* Yes, duplicate enable switch. */ { "combine", 'c', 1, 0, 0 }, @@ -866,6 +868,10 @@ int flag_merge_constants = 1; one, unconditionally renumber instruction UIDs. */ int flag_renumber_insns = 1; +/* Nonzero if we perform superblock formation. */ + +int flag_tracer = 0; + /* Values of the -falign-* flags: how much to align labels in code. 0 means `use default', 1 means `don't align'. For each variable, there is an _log variant which is the power @@ -977,6 +983,8 @@ static const lang_independent_options f_options[] = N_("When possible do not generate stack frames") }, {"optimize-sibling-calls", &flag_optimize_sibling_calls, 1, N_("Optimize sibling and tail recursive calls") }, + {"tracer", &flag_tracer, 1, + N_("Perform superblock formation via tail duplication") }, {"cse-follow-jumps", &flag_cse_follow_jumps, 1, N_("When running CSE, follow jumps to their targets") }, {"cse-skip-blocks", &flag_cse_skip_blocks, 1, @@ -2958,6 +2966,19 @@ rest_of_compilation (decl) close_dump_file (DFI_bp, print_rtl_with_bb, insns); timevar_pop (TV_BRANCH_PROB); } + if (flag_tracer) + { + timevar_push (TV_TRACER); + open_dump_file (DFI_tracer, decl); + if (rtl_dump_file) + dump_flow_info (rtl_dump_file); + cleanup_cfg (CLEANUP_EXPENSIVE); + tracer (); + cleanup_cfg (CLEANUP_EXPENSIVE); + close_dump_file (DFI_tracer, print_rtl_with_bb, get_insns ()); + timevar_pop (TV_TRACER); + reg_scan (get_insns (), max_reg_num (), 0); + } if (optimize > 0) { diff --git a/gcc/tracer.c b/gcc/tracer.c new file mode 100644 index 00000000000..b7c5a768b7d --- /dev/null +++ b/gcc/tracer.c @@ -0,0 +1,373 @@ +/* The tracer pass for the GNU compiler. + Contributed by Jan Hubicka, SuSE Labs. + Copyright (C) 2001 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ + +/* This pass performs the tail duplication needed for superblock formation. + For more information see: + + Design and Analysis of Profile-Based Optimization in Compaq's + Compilation Tools for Alpha; Journal of Instruction-Level + Parallelism 3 (2000) 1-25 + + Unlike Compaq's implementation we don't do the loop peeling as most + probably a better job can be done by a special pass and we don't + need to worry too much about the code size implications as the tail + duplicates are crossjumped again if optimizations are not + performed. */ + + +#include "config.h" +#include "system.h" +#include "tree.h" +#include "rtl.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "output.h" +#include "cfglayout.h" +#include "fibheap.h" +#include "flags.h" +#include "params.h" +#include "profile.h" + +static int count_insns PARAMS ((basic_block)); +static bool ignore_bb_p PARAMS ((basic_block)); +static bool better_p PARAMS ((edge, edge)); +static int find_trace PARAMS ((basic_block, basic_block *)); +static void tail_duplicate PARAMS ((void)); +static void layout_superblocks PARAMS ((void)); +static bool ignore_bb_p PARAMS ((basic_block)); + +/* Minimal outgoing edge probability considered for superblock formation. */ +static int probability_cutoff; +static int branch_ratio_cutoff; + +/* Return true if BB has been seen - it is connected to some trace + already. */ + +#define seen(bb) (RBI (bb)->visited || RBI (bb)->next) + +/* Return true if we should ignore the basic block for purposes of tracing. */ +static bool +ignore_bb_p (basic_block bb) +{ + if (bb->index < 0) + return true; + if (!maybe_hot_bb_p (bb)) + return true; + return false; +} + +/* Return number of instructions in the block. */ + +static int +count_insns (bb) + basic_block bb; +{ + rtx insn; + int n = 0; + + for (insn = bb->head; insn != NEXT_INSN (bb->end); insn = NEXT_INSN (insn)) + if (active_insn_p (insn)) + n++; + return n; +} + +/* Return true if E1 is more frequent than E2. */ +static bool +better_p (e1, e2) + edge e1, e2; +{ + if (e1->count != e2->count) + return e1->count > e2->count; + if (e1->src->frequency * e1->probability != + e2->src->frequency * e2->probability) + return (e1->src->frequency * e1->probability + > e2->src->frequency * e2->probability); + /* This is needed to avoid changes in the decision after + CFG is modified. */ + if (e1->src != e2->src) + return e1->src->index > e2->src->index; + return e1->dest->index > e2->dest->index; +} + +/* Return most frequent successor of basic block BB. */ + +static edge +find_best_successor (basic_block bb) +{ + edge e; + edge best = NULL; + + for (e = bb->succ; e; e = e->succ_next) + if (!best || better_p (e, best)) + best = e; + if (!best || ignore_bb_p (best->dest)) + return NULL; + if (best->probability <= probability_cutoff) + return NULL; + return best; +} + +/* Return most frequent predecessor of basic block BB. */ + +static edge +find_best_predecessor (basic_block bb) +{ + edge e; + edge best = NULL; + + for (e = bb->pred; e; e = e->pred_next) + if (!best || better_p (e, best)) + best = e; + if (!best || ignore_bb_p (best->src)) + return NULL; + if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE + < bb->frequency * branch_ratio_cutoff) + return NULL; + return best; +} + +/* Find the trace using bb and record it in the TRACE array. + Return number of basic blocks recorded. */ + +static int +find_trace (bb, trace) + basic_block bb; + basic_block *trace; +{ + int i = 0; + edge e; + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); + + while ((e = find_best_predecessor (bb)) != NULL) + { + basic_block bb2 = e->src; + if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) + || find_best_successor (bb2) != e) + break; + if (rtl_dump_file) + fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency); + bb = bb2; + } + if (rtl_dump_file) + fprintf (rtl_dump_file, " forward %i [%i]", bb->index, bb->frequency); + trace[i++] = bb; + + /* Follow the trace in forward direction. */ + while ((e = find_best_successor (bb)) != NULL) + { + bb = e->dest; + if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) + || find_best_predecessor (bb) != e) + break; + if (rtl_dump_file) + fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency); + trace[i++] = bb; + } + if (rtl_dump_file) + fprintf (rtl_dump_file, "\n"); + return i; +} + +/* Look for basic blocks in frequency order, construct traces and tail duplicate + if profitable. */ + +static void +tail_duplicate () +{ + fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t)); + basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks); + int *counts = xmalloc (sizeof (int) * last_basic_block); + int ninsns = 0, nduplicated = 0; + gcov_type weighted_insns = 0, traced_insns = 0; + fibheap_t heap = fibheap_new (); + gcov_type cover_insns; + int max_dup_insns; + basic_block bb; + + if (profile_info.count_profiles_merged && flag_branch_probabilities) + probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); + else + probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); + probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; + + branch_ratio_cutoff = + (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); + + FOR_EACH_BB (bb) + { + int n = count_insns (bb); + if (!ignore_bb_p (bb)) + blocks[bb->index] = fibheap_insert (heap, -bb->frequency, + bb); + + counts [bb->index] = n; + ninsns += n; + weighted_insns += n * bb->frequency; + } + + if (profile_info.count_profiles_merged && flag_branch_probabilities) + cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); + else + cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); + cover_insns = (weighted_insns * cover_insns + 50) / 100; + max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; + + while (traced_insns < cover_insns && nduplicated < max_dup_insns + && !fibheap_empty (heap)) + { + basic_block bb = fibheap_extract_min (heap); + int n, pos; + + if (!bb) + break; + + blocks[bb->index] = NULL; + + if (ignore_bb_p (bb)) + continue; + if (seen (bb)) + abort (); + + n = find_trace (bb, trace); + + bb = trace[0]; + traced_insns += bb->frequency * counts [bb->index]; + if (blocks[bb->index]) + { + fibheap_delete_node (heap, blocks[bb->index]); + blocks[bb->index] = NULL; + } + + for (pos = 1; pos < n; pos++) + { + basic_block bb2 = trace[pos]; + + if (blocks[bb2->index]) + { + fibheap_delete_node (heap, blocks[bb2->index]); + blocks[bb2->index] = NULL; + } + traced_insns += bb2->frequency * counts [bb2->index]; + if (bb2->pred && bb2->pred->pred_next + && cfg_layout_can_duplicate_bb_p (bb2)) + { + edge e = bb2->pred; + basic_block old = bb2; + + while (e->src != bb) + e = e->pred_next; + nduplicated += counts [bb2->index]; + bb2 = cfg_layout_duplicate_bb (bb2, e); + + /* Reconsider the original copy of block we've duplicated. + Removing the most common predecesor may make it to be + head. */ + blocks[old->index] = + fibheap_insert (heap, -old->frequency, old); + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Duplicated %i as %i [%i]\n", + old->index, bb2->index, bb2->frequency); + } + RBI (bb)->next = bb2; + RBI (bb2)->visited = 1; + bb = bb2; + /* In case the trace became infrequent, stop duplicating. */ + if (ignore_bb_p (bb)) + break; + } + if (rtl_dump_file) + fprintf (rtl_dump_file, " covered now %.1f\n\n", + traced_insns * 100.0 / weighted_insns); + } + if (rtl_dump_file) + fprintf (rtl_dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, + nduplicated * 100 / ninsns); + + free (blocks); + free (trace); + free (counts); + fibheap_delete (heap); +} + +/* Connect the superblocks into linear seuqence. At the moment we attempt to keep + the original order as much as possible, but the algorithm may be made smarter + later if needed. BB reordering pass should void most of the benefits of such + change though. */ + +static void +layout_superblocks () +{ + basic_block end = ENTRY_BLOCK_PTR->succ->dest; + basic_block bb = ENTRY_BLOCK_PTR->succ->dest->next_bb; + + while (bb != EXIT_BLOCK_PTR) + { + edge e, best = NULL; + while (RBI (end)->next) + end = RBI (end)->next; + + for (e = end->succ; e; e = e->succ_next) + if (e->dest != EXIT_BLOCK_PTR + && e->dest != ENTRY_BLOCK_PTR->succ->dest + && !RBI (e->dest)->visited + && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best))) + best = e; + + if (best) + { + RBI (end)->next = best->dest; + RBI (best->dest)->visited = 1; + } + else + for (; bb != EXIT_BLOCK_PTR; bb=bb->next_bb) + { + if (!RBI (bb)->visited) + { + RBI (end)->next = bb; + RBI (bb)->visited = 1; + break; + } + } + } +} + +/* Main entry point to this file. */ + +void +tracer () +{ + if (n_basic_blocks <= 1) + return; + cfg_layout_initialize (); + mark_dfs_back_edges (); + if (rtl_dump_file) + dump_flow_info (rtl_dump_file); + tail_duplicate (); + layout_superblocks (); + if (rtl_dump_file) + dump_flow_info (rtl_dump_file); + cfg_layout_finalize (); + /* Merge basic blocks in duplicated traces. */ + cleanup_cfg (CLEANUP_EXPENSIVE); +} |