diff options
author | tejohnson <tejohnson@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-11-12 15:03:28 +0000 |
---|---|---|
committer | tejohnson <tejohnson@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-11-12 15:03:28 +0000 |
commit | 38a65d4ef7e87789fbbfed30df8226be9a3ba85f (patch) | |
tree | dd1e6b187157352f6f08341a70daaedf473980fa /gcc | |
parent | 6d3c030fe155dda153688b331c17ba52017f758f (diff) | |
download | gcc-38a65d4ef7e87789fbbfed30df8226be9a3ba85f.tar.gz |
2013-11-12 Teresa Johnson <tejohnson@google.com>
Jan Hubicka <jh@suse.cz>
* predict.c (drop_profile): New function.
(handle_missing_profiles): Ditto.
(counts_to_freqs): Don't overwrite estimated frequencies
when function has no profile counts.
* predict.h (handle_missing_profiles): Declare.
* tree-inline.c (freqs_to_counts): New function.
(copy_cfg_body): Invoke freqs_to_counts as needed.
* tree-profile.c (tree_profiling): Invoke handle_missing_profiles.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204704 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/predict.c | 116 | ||||
-rw-r--r-- | gcc/predict.h | 1 | ||||
-rw-r--r-- | gcc/tree-inline.c | 41 | ||||
-rw-r--r-- | gcc/tree-profile.c | 2 |
5 files changed, 172 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6667b33ea2d..32bd0ece099 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2013-11-12 Teresa Johnson <tejohnson@google.com> + Jan Hubicka <jh@suse.cz> + + * predict.c (drop_profile): New function. + (handle_missing_profiles): Ditto. + (counts_to_freqs): Don't overwrite estimated frequencies + when function has no profile counts. + * predict.h (handle_missing_profiles): Declare. + * tree-inline.c (freqs_to_counts): New function. + (copy_cfg_body): Invoke freqs_to_counts as needed. + * tree-profile.c (tree_profiling): Invoke handle_missing_profiles. + 2013-11-12 H.J. Lu <hongjiu.lu@intel.com> PR target/59088 diff --git a/gcc/predict.c b/gcc/predict.c index cc9a0534e03..2a500eaa78d 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -2765,6 +2765,116 @@ estimate_loops (void) BITMAP_FREE (tovisit); } +/* Drop the profile for NODE to guessed, and update its frequency based on + whether it is expected to be HOT. */ + +static void +drop_profile (struct cgraph_node *node, bool hot) +{ + struct function *fn = DECL_STRUCT_FUNCTION (node->decl); + + if (dump_file) + fprintf (dump_file, + "Dropping 0 profile for %s/%i. %s based on calls.\n", + cgraph_node_name (node), node->order, + hot ? "Function is hot" : "Function is normal"); + /* We only expect to miss profiles for functions that are reached + via non-zero call edges in cases where the function may have + been linked from another module or library (COMDATs and extern + templates). See the comments below for handle_missing_profiles. */ + if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)) + { + if (flag_profile_correction) + { + if (dump_file) + fprintf (dump_file, + "Missing counts for called function %s/%i\n", + cgraph_node_name (node), node->order); + } + else + error ("Missing counts for called function %s/%i", + cgraph_node_name (node), node->order); + } + + profile_status_for_function (fn) + = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT); + node->frequency + = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL; +} + +/* In the case of COMDAT routines, multiple object files will contain the same + function and the linker will select one for the binary. In that case + all the other copies from the profile instrument binary will be missing + profile counts. Look for cases where this happened, due to non-zero + call counts going to 0-count functions, and drop the profile to guessed + so that we can use the estimated probabilities and avoid optimizing only + for size. + + The other case where the profile may be missing is when the routine + is not going to be emitted to the object file, e.g. for "extern template" + class methods. Those will be marked DECL_EXTERNAL. Emit a warning in + all other cases of non-zero calls to 0-count functions. */ + +void +handle_missing_profiles (void) +{ + struct cgraph_node *node; + int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); + vec<struct cgraph_node *> worklist; + worklist.create (64); + + /* See if 0 count function has non-0 count callers. In this case we + lost some profile. Drop its function profile to PROFILE_GUESSED. */ + FOR_EACH_DEFINED_FUNCTION (node) + { + struct cgraph_edge *e; + gcov_type call_count = 0; + struct function *fn = DECL_STRUCT_FUNCTION (node->decl); + + if (node->count) + continue; + for (e = node->callers; e; e = e->next_caller) + call_count += e->count; + if (call_count + && fn && fn->cfg + && (call_count * unlikely_count_fraction >= profile_info->runs)) + { + bool maybe_hot = maybe_hot_count_p (NULL, call_count); + + drop_profile (node, maybe_hot); + worklist.safe_push (node); + } + } + + /* Propagate the profile dropping to other 0-count COMDATs that are + potentially called by COMDATs we already dropped the profile on. */ + while (worklist.length () > 0) + { + struct cgraph_edge *e; + + node = worklist.pop (); + for (e = node->callees; e; e = e->next_caller) + { + struct cgraph_node *callee = e->callee; + struct function *fn = DECL_STRUCT_FUNCTION (callee->decl); + + if (callee->count > 0) + continue; + if (DECL_COMDAT (callee->decl) && fn && fn->cfg + && profile_status_for_function (fn) == PROFILE_READ) + { + /* Since there are no non-0 call counts to this function, + we don't know for sure whether it is hot. Indicate to + the drop_profile routine that function should be marked + normal, rather than hot. */ + drop_profile (node, false); + worklist.safe_push (callee); + } + } + } + worklist.release (); +} + /* Convert counts measured by profile driven feedback to frequencies. Return nonzero iff there was any nonzero execution count. */ @@ -2774,6 +2884,12 @@ counts_to_freqs (void) gcov_type count_max, true_count_max = 0; basic_block bb; + /* Don't overwrite the estimated frequencies when the profile for + the function is missing. We may drop this function PROFILE_GUESSED + later in drop_profile (). */ + if (!ENTRY_BLOCK_PTR->count) + return 0; + FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) true_count_max = MAX (bb->count, true_count_max); diff --git a/gcc/predict.h b/gcc/predict.h index 02650e2d55c..83b1695c65e 100644 --- a/gcc/predict.h +++ b/gcc/predict.h @@ -37,6 +37,7 @@ enum prediction extern void predict_insn_def (rtx, enum br_predictor, enum prediction); extern int counts_to_freqs (void); +extern void handle_missing_profiles (void); extern void estimate_bb_frequencies (bool); extern const char *predictor_name (enum br_predictor); extern tree build_predict_expr (enum br_predictor, enum prediction); diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 77013b390cb..0d1f1c7621b 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -2353,6 +2353,29 @@ redirect_all_calls (copy_body_data * id, basic_block bb) } } +/* Convert estimated frequencies into counts for NODE, scaling COUNT + with each bb's frequency. Used when NODE has a 0-weight entry + but we are about to inline it into a non-zero count call bb. + See the comments for handle_missing_profiles() in predict.c for + when this can happen for COMDATs. */ + +void +freqs_to_counts (struct cgraph_node *node, gcov_type count) +{ + basic_block bb; + edge_iterator ei; + edge e; + struct function *fn = DECL_STRUCT_FUNCTION (node->decl); + + FOR_ALL_BB_FN(bb, fn) + { + bb->count = apply_scale (count, + GCOV_COMPUTE_SCALE (bb->frequency, BB_FREQ_MAX)); + FOR_EACH_EDGE (e, ei, bb->succs) + e->count = apply_probability (e->src->count, e->probability); + } +} + /* Make a copy of the body of FN so that it can be inserted inline in another function. Walks FN via CFG, returns new fndecl. */ @@ -2373,6 +2396,24 @@ copy_cfg_body (copy_body_data * id, gcov_type count, int frequency_scale, int incoming_frequency = 0; gcov_type incoming_count = 0; + /* This can happen for COMDAT routines that end up with 0 counts + despite being called (see the comments for handle_missing_profiles() + in predict.c as to why). Apply counts to the blocks in the callee + before inlining, using the guessed edge frequencies, so that we don't + end up with a 0-count inline body which can confuse downstream + optimizations such as function splitting. */ + if (!ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count && count) + { + /* Apply the larger of the call bb count and the total incoming + call edge count to the callee. */ + gcov_type in_count = 0; + struct cgraph_edge *in_edge; + for (in_edge = id->src_node->callers; in_edge; + in_edge = in_edge->next_caller) + in_count += in_edge->count; + freqs_to_counts (id->src_node, count > in_count ? count : in_count); + } + if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count) count_scale = GCOV_COMPUTE_SCALE (count, diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c index 43d5b923e1f..9f9dba854fd 100644 --- a/gcc/tree-profile.c +++ b/gcc/tree-profile.c @@ -645,6 +645,8 @@ tree_profiling (void) pop_cfun (); } + handle_missing_profiles (); + del_node_map (); return 0; } |