diff options
-rw-r--r-- | gcc/ChangeLog | 21 | ||||
-rw-r--r-- | gcc/cgraph.c | 1 | ||||
-rw-r--r-- | gcc/cgraph.h | 3 | ||||
-rw-r--r-- | gcc/lto-cgraph.c | 127 | ||||
-rw-r--r-- | gcc/lto-streamer-in.c | 20 | ||||
-rw-r--r-- | gcc/lto-streamer.h | 3 |
6 files changed, 145 insertions, 30 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f09ba9535c9..d697ad1b104 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2010-12-03 Jan Hubicka <jh@suse.cz> + + PR tree-optimization/46760 + * cgraph.c (cgraph_create_node): Initialize count_materialization_scale. + * cgraph.h (struct cgraph_node): Add count_materialization_scale. + * lto-cgraph.c (lto_output_edge): Fix assert. + (lto_output_node): Output count_materialization_scale. + (output_profile_summary): Output only runs and sum_max. + (input_node): Input count_materialization_scale. + (input_profile_summary): Read data into file specific gcov summary. + (merge_profile_summaries): New function. + (input_cgraph): Update call of input_profile_summary; + call merge_profile_summaries. + * lto-streamer-in.c (input_cfg): Add count_materialization_scale arg; + rescale counts at read in. + (intput_bb): Likewise. + (input_function): Update call of input_bb. + (lto_read_body): Update call of input_cfg. + * lto-streamer.h: Inlclude gcov-io.h + (lto_file_decl_data): Add gcov_ctr_summary. + 2010-12-03 Dave Korn <dave.korn.cygwin@gmail.com> * doc/tm.texi.in (Cond. Exec. Macros): Rename node from this ... diff --git a/gcc/cgraph.c b/gcc/cgraph.c index a53e1684d25..3ffc38463ef 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -478,6 +478,7 @@ cgraph_create_node (void) node->previous = NULL; node->global.estimated_growth = INT_MIN; node->frequency = NODE_FREQUENCY_NORMAL; + node->count_materialization_scale = REG_BR_PROB_BASE; ipa_empty_ref_list (&node->ref_list); cgraph_nodes = node; cgraph_n_nodes++; diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 99d549c328a..57a7e3bed22 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -233,6 +233,9 @@ struct GTY((chain_next ("%h.next"), chain_prev ("%h.previous"))) cgraph_node { /* Expected number of executions: calculated in profile.c. */ gcov_type count; + /* How to scale counts at materialization time; used to merge + LTO units with different number of profile runs. */ + int count_materialization_scale; /* Unique id of the node. */ int uid; /* Ordering of all cgraph nodes. */ diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 1c7290ed0f7..140a926909a 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -302,6 +302,7 @@ lto_output_edge (struct lto_simple_output_block *ob, struct cgraph_edge *edge, gcc_assert (!(flags & (ECF_LOOPING_CONST_OR_PURE | ECF_MAY_BE_ALLOCA | ECF_SIBCALL + | ECF_LEAF | ECF_NOVOPS))); } lto_output_bitpack (&bp); @@ -462,6 +463,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl); lto_output_sleb128_stream (ob->main_stream, node->count); + lto_output_sleb128_stream (ob->main_stream, node->count_materialization_scale); if (tag == LTO_cgraph_analyzed_node) { @@ -661,12 +663,12 @@ output_profile_summary (struct lto_simple_output_block *ob) { if (profile_info) { - /* We do not output num, it is not terribly useful. */ + /* We do not output num, sum_all and run_max, they are not used by + GCC profile feedback and they are difficult to merge from multiple + units. */ gcc_assert (profile_info->runs); lto_output_uleb128_stream (ob->main_stream, profile_info->runs); - lto_output_sleb128_stream (ob->main_stream, profile_info->sum_all); - lto_output_sleb128_stream (ob->main_stream, profile_info->run_max); - lto_output_sleb128_stream (ob->main_stream, profile_info->sum_max); + lto_output_uleb128_stream (ob->main_stream, profile_info->sum_max); } else lto_output_uleb128_stream (ob->main_stream, 0); @@ -1045,6 +1047,7 @@ input_node (struct lto_file_decl_data *file_data, node = cgraph_node (fn_decl); node->count = lto_input_sleb128 (ib); + node->count_materialization_scale = lto_input_sleb128 (ib); if (tag == LTO_cgraph_analyzed_node) { @@ -1424,32 +1427,108 @@ static struct gcov_ctr_summary lto_gcov_summary; /* Input profile_info from IB. */ static void -input_profile_summary (struct lto_input_block *ib) +input_profile_summary (struct lto_input_block *ib, + struct lto_file_decl_data *file_data) { unsigned int runs = lto_input_uleb128 (ib); if (runs) { - if (!profile_info) - { - profile_info = <o_gcov_summary; - lto_gcov_summary.runs = runs; - lto_gcov_summary.sum_all = lto_input_sleb128 (ib); - lto_gcov_summary.run_max = lto_input_sleb128 (ib); - lto_gcov_summary.sum_max = lto_input_sleb128 (ib); - } - /* We can support this by scaling all counts to nearest common multiple - of all different runs, but it is perhaps not worth the effort. */ - else if (profile_info->runs != runs - || profile_info->sum_all != lto_input_sleb128 (ib) - || profile_info->run_max != lto_input_sleb128 (ib) - || profile_info->sum_max != lto_input_sleb128 (ib)) - sorry ("combining units with different profiles is not supported"); - /* We allow some units to have profile and other to not have one. This will - just make unprofiled units to be size optimized that is sane. */ + file_data->profile_info.runs = runs; + file_data->profile_info.sum_max = lto_input_uleb128 (ib); + if (runs > file_data->profile_info.sum_max) + fatal_error ("Corrupted profile info in %s: sum_max is smaller than runs", + file_data->file_name); } } +/* Rescale profile summaries to the same number of runs in the whole unit. */ + +static void +merge_profile_summaries (struct lto_file_decl_data **file_data_vec) +{ + struct lto_file_decl_data *file_data; + unsigned int j; + gcov_unsigned_t max_runs = 0; + struct cgraph_node *node; + struct cgraph_edge *edge; + + /* Find unit with maximal number of runs. If we ever get serious about + roundoff errors, we might also consider computing smallest common + multiply. */ + for (j = 0; (file_data = file_data_vec[j]) != NULL; j++) + if (max_runs < file_data->profile_info.runs) + max_runs = file_data->profile_info.runs; + + if (!max_runs) + return; + + /* Simple overflow check. We probably don't need to support that many train + runs. Such a large value probably imply data corruption anyway. */ + if (max_runs > INT_MAX / REG_BR_PROB_BASE) + { + sorry ("At most %i profile runs is supported. Perhaps corrupted profile?", + INT_MAX / REG_BR_PROB_BASE); + return; + } + + profile_info = <o_gcov_summary; + lto_gcov_summary.runs = max_runs; + lto_gcov_summary.sum_max = 0; + + /* Rescale all units to the maximal number of runs. + sum_max can not be easily merged, as we have no idea what files come from + the same run. We do not use the info anyway, so leave it 0. */ + for (j = 0; (file_data = file_data_vec[j]) != NULL; j++) + if (file_data->profile_info.runs) + { + int scale = ((REG_BR_PROB_BASE * max_runs + + file_data->profile_info.runs / 2) + / file_data->profile_info.runs); + lto_gcov_summary.sum_max = MAX (lto_gcov_summary.sum_max, + (file_data->profile_info.sum_max + * scale + + REG_BR_PROB_BASE / 2) + / REG_BR_PROB_BASE); + } + + /* Watch roundoff errors. */ + if (lto_gcov_summary.sum_max < max_runs) + lto_gcov_summary.sum_max = max_runs; + + /* If merging already happent at WPA time, we are done. */ + if (flag_ltrans) + return; + + /* Now compute count_materialization_scale of each node. + During LTRANS we already have values of count_materialization_scale + computed, so just update them. */ + for (node = cgraph_nodes; node; node = node->next) + if (node->local.lto_file_data->profile_info.run_max) + { + int scale; + if (node->local.lto_file_data->profile_info.runs) + scale = + ((node->count_materialization_scale * max_runs + + node->local.lto_file_data->profile_info.run_max / 2) + / node->local.lto_file_data->profile_info.run_max); + else + scale = node->count_materialization_scale; + node->count_materialization_scale = scale; + if (scale < 0) + fatal_error ("Profile information in %s corrupted", + file_data->file_name); + + if (scale == REG_BR_PROB_BASE) + continue; + for (edge = node->callees; edge; edge = edge->next_callee) + edge->count = ((edge->count * scale + REG_BR_PROB_BASE / 2) + / REG_BR_PROB_BASE); + node->count = ((node->count * scale + REG_BR_PROB_BASE / 2) + / REG_BR_PROB_BASE); + } +} + /* Input and merge the cgraph from each of the .o files passed to lto1. */ @@ -1473,7 +1552,7 @@ input_cgraph (void) &data, &len); if (!ib) fatal_error ("cannot find LTO cgraph in %s", file_data->file_name); - input_profile_summary (ib); + input_profile_summary (ib, file_data); file_data->cgraph_node_encoder = lto_cgraph_encoder_new (); nodes = input_cgraph_1 (file_data, ib); lto_destroy_simple_input_block (file_data, LTO_section_cgraph, @@ -1499,6 +1578,8 @@ input_cgraph (void) VEC_free (cgraph_node_ptr, heap, nodes); VEC_free (varpool_node_ptr, heap, varpool); } + merge_profile_summaries (file_data_vec); + /* Clear out the aux field that was used to store enough state to tell which nodes should be overwritten. */ diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c index f167d400302..fb201449c4f 100644 --- a/gcc/lto-streamer-in.c +++ b/gcc/lto-streamer-in.c @@ -719,7 +719,8 @@ make_new_block (struct function *fn, unsigned int index) /* Read the CFG for function FN from input block IB. */ static void -input_cfg (struct lto_input_block *ib, struct function *fn) +input_cfg (struct lto_input_block *ib, struct function *fn, + int count_materialization_scale) { unsigned int bb_count; basic_block p_bb; @@ -752,7 +753,8 @@ input_cfg (struct lto_input_block *ib, struct function *fn) if (bb == NULL) bb = make_new_block (fn, index); - edge_count = lto_input_uleb128 (ib); + edge_count = (lto_input_uleb128 (ib) * count_materialization_scale + + REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE; /* Connect up the CFG. */ for (i = 0; i < edge_count; i++) @@ -1066,7 +1068,8 @@ input_gimple_stmt (struct lto_input_block *ib, struct data_in *data_in, static void input_bb (struct lto_input_block *ib, enum LTO_tags tag, - struct data_in *data_in, struct function *fn) + struct data_in *data_in, struct function *fn, + int count_materialization_scale) { unsigned int index; basic_block bb; @@ -1079,7 +1082,8 @@ input_bb (struct lto_input_block *ib, enum LTO_tags tag, index = lto_input_uleb128 (ib); bb = BASIC_BLOCK_FOR_FUNCTION (fn, index); - bb->count = lto_input_sleb128 (ib); + bb->count = (lto_input_sleb128 (ib) * count_materialization_scale + + REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE; bb->loop_depth = lto_input_sleb128 (ib); bb->frequency = lto_input_sleb128 (ib); bb->flags = lto_input_sleb128 (ib); @@ -1253,12 +1257,14 @@ input_function (tree fn_decl, struct data_in *data_in, DECL_INITIAL (fn_decl) = lto_input_tree (ib, data_in); gcc_assert (DECL_INITIAL (fn_decl)); DECL_SAVED_TREE (fn_decl) = NULL_TREE; + node = cgraph_node (fn_decl); /* Read all the basic blocks. */ tag = input_record_start (ib); while (tag) { - input_bb (ib, tag, data_in, fn); + input_bb (ib, tag, data_in, fn, + node->count_materialization_scale); tag = input_record_start (ib); } @@ -1300,7 +1306,6 @@ input_function (tree fn_decl, struct data_in *data_in, gimple_set_body (fn_decl, bb_seq (ei_edge (ei)->dest)); } - node = cgraph_node (fn_decl); fixup_call_stmt_edges (node, stmts); execute_all_ipa_stmt_fixups (node, stmts); @@ -1393,6 +1398,7 @@ lto_read_body (struct lto_file_decl_data *file_data, tree fn_decl, { struct function *fn = DECL_STRUCT_FUNCTION (fn_decl); struct lto_in_decl_state *decl_state; + struct cgraph_node *node = cgraph_node (fn_decl); push_cfun (fn); init_tree_ssa (fn); @@ -1402,7 +1408,7 @@ lto_read_body (struct lto_file_decl_data *file_data, tree fn_decl, gcc_assert (decl_state); file_data->current_decl_state = decl_state; - input_cfg (&ib_cfg, fn); + input_cfg (&ib_cfg, fn, node->count_materialization_scale); /* Set up the struct function. */ input_function (fn_decl, data_in, &ib_main); diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index ee57ba744f6..b82c54ede1c 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "vec.h" #include "vecprim.h" #include "alloc-pool.h" +#include "gcov-io.h" /* Define when debugging the LTO streamer. This causes the writer to output the numeric value for the memory address of the tree node @@ -610,6 +611,8 @@ struct GTY(()) lto_file_decl_data /* Symbol resolutions for this file */ VEC(ld_plugin_symbol_resolution_t,heap) * GTY((skip)) resolutions; + + struct gcov_ctr_summary GTY((skip)) profile_info; }; typedef struct lto_file_decl_data *lto_file_decl_data_ptr; |