summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authortejohnson <tejohnson@138bc75d-0d04-0410-961f-82ee72b054a4>2013-11-12 15:03:28 +0000
committertejohnson <tejohnson@138bc75d-0d04-0410-961f-82ee72b054a4>2013-11-12 15:03:28 +0000
commit38a65d4ef7e87789fbbfed30df8226be9a3ba85f (patch)
treedd1e6b187157352f6f08341a70daaedf473980fa /gcc
parent6d3c030fe155dda153688b331c17ba52017f758f (diff)
downloadgcc-38a65d4ef7e87789fbbfed30df8226be9a3ba85f.tar.gz
2013-11-12 Teresa Johnson <tejohnson@google.com>
Jan Hubicka <jh@suse.cz> * predict.c (drop_profile): New function. (handle_missing_profiles): Ditto. (counts_to_freqs): Don't overwrite estimated frequencies when function has no profile counts. * predict.h (handle_missing_profiles): Declare. * tree-inline.c (freqs_to_counts): New function. (copy_cfg_body): Invoke freqs_to_counts as needed. * tree-profile.c (tree_profiling): Invoke handle_missing_profiles. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204704 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/predict.c116
-rw-r--r--gcc/predict.h1
-rw-r--r--gcc/tree-inline.c41
-rw-r--r--gcc/tree-profile.c2
5 files changed, 172 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6667b33ea2d..32bd0ece099 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2013-11-12 Teresa Johnson <tejohnson@google.com>
+ Jan Hubicka <jh@suse.cz>
+
+ * predict.c (drop_profile): New function.
+ (handle_missing_profiles): Ditto.
+ (counts_to_freqs): Don't overwrite estimated frequencies
+ when function has no profile counts.
+ * predict.h (handle_missing_profiles): Declare.
+ * tree-inline.c (freqs_to_counts): New function.
+ (copy_cfg_body): Invoke freqs_to_counts as needed.
+ * tree-profile.c (tree_profiling): Invoke handle_missing_profiles.
+
2013-11-12 H.J. Lu <hongjiu.lu@intel.com>
PR target/59088
diff --git a/gcc/predict.c b/gcc/predict.c
index cc9a0534e03..2a500eaa78d 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -2765,6 +2765,116 @@ estimate_loops (void)
BITMAP_FREE (tovisit);
}
+/* Drop the profile for NODE to guessed, and update its frequency based on
+ whether it is expected to be HOT. */
+
+static void
+drop_profile (struct cgraph_node *node, bool hot)
+{
+ struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
+
+ if (dump_file)
+ fprintf (dump_file,
+ "Dropping 0 profile for %s/%i. %s based on calls.\n",
+ cgraph_node_name (node), node->order,
+ hot ? "Function is hot" : "Function is normal");
+ /* We only expect to miss profiles for functions that are reached
+ via non-zero call edges in cases where the function may have
+ been linked from another module or library (COMDATs and extern
+ templates). See the comments below for handle_missing_profiles. */
+ if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl))
+ {
+ if (flag_profile_correction)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "Missing counts for called function %s/%i\n",
+ cgraph_node_name (node), node->order);
+ }
+ else
+ error ("Missing counts for called function %s/%i",
+ cgraph_node_name (node), node->order);
+ }
+
+ profile_status_for_function (fn)
+ = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
+ node->frequency
+ = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
+}
+
+/* In the case of COMDAT routines, multiple object files will contain the same
+ function and the linker will select one for the binary. In that case
+ all the other copies from the profile instrument binary will be missing
+ profile counts. Look for cases where this happened, due to non-zero
+ call counts going to 0-count functions, and drop the profile to guessed
+ so that we can use the estimated probabilities and avoid optimizing only
+ for size.
+
+ The other case where the profile may be missing is when the routine
+ is not going to be emitted to the object file, e.g. for "extern template"
+ class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
+ all other cases of non-zero calls to 0-count functions. */
+
+void
+handle_missing_profiles (void)
+{
+ struct cgraph_node *node;
+ int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
+ vec<struct cgraph_node *> worklist;
+ worklist.create (64);
+
+ /* See if 0 count function has non-0 count callers. In this case we
+ lost some profile. Drop its function profile to PROFILE_GUESSED. */
+ FOR_EACH_DEFINED_FUNCTION (node)
+ {
+ struct cgraph_edge *e;
+ gcov_type call_count = 0;
+ struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
+
+ if (node->count)
+ continue;
+ for (e = node->callers; e; e = e->next_caller)
+ call_count += e->count;
+ if (call_count
+ && fn && fn->cfg
+ && (call_count * unlikely_count_fraction >= profile_info->runs))
+ {
+ bool maybe_hot = maybe_hot_count_p (NULL, call_count);
+
+ drop_profile (node, maybe_hot);
+ worklist.safe_push (node);
+ }
+ }
+
+ /* Propagate the profile dropping to other 0-count COMDATs that are
+ potentially called by COMDATs we already dropped the profile on. */
+ while (worklist.length () > 0)
+ {
+ struct cgraph_edge *e;
+
+ node = worklist.pop ();
+ for (e = node->callees; e; e = e->next_caller)
+ {
+ struct cgraph_node *callee = e->callee;
+ struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
+
+ if (callee->count > 0)
+ continue;
+ if (DECL_COMDAT (callee->decl) && fn && fn->cfg
+ && profile_status_for_function (fn) == PROFILE_READ)
+ {
+ /* Since there are no non-0 call counts to this function,
+ we don't know for sure whether it is hot. Indicate to
+ the drop_profile routine that function should be marked
+ normal, rather than hot. */
+ drop_profile (node, false);
+ worklist.safe_push (callee);
+ }
+ }
+ }
+ worklist.release ();
+}
+
/* Convert counts measured by profile driven feedback to frequencies.
Return nonzero iff there was any nonzero execution count. */
@@ -2774,6 +2884,12 @@ counts_to_freqs (void)
gcov_type count_max, true_count_max = 0;
basic_block bb;
+ /* Don't overwrite the estimated frequencies when the profile for
+ the function is missing. We may drop this function PROFILE_GUESSED
+ later in drop_profile (). */
+ if (!ENTRY_BLOCK_PTR->count)
+ return 0;
+
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
true_count_max = MAX (bb->count, true_count_max);
diff --git a/gcc/predict.h b/gcc/predict.h
index 02650e2d55c..83b1695c65e 100644
--- a/gcc/predict.h
+++ b/gcc/predict.h
@@ -37,6 +37,7 @@ enum prediction
extern void predict_insn_def (rtx, enum br_predictor, enum prediction);
extern int counts_to_freqs (void);
+extern void handle_missing_profiles (void);
extern void estimate_bb_frequencies (bool);
extern const char *predictor_name (enum br_predictor);
extern tree build_predict_expr (enum br_predictor, enum prediction);
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 77013b390cb..0d1f1c7621b 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -2353,6 +2353,29 @@ redirect_all_calls (copy_body_data * id, basic_block bb)
}
}
+/* Convert estimated frequencies into counts for NODE, scaling COUNT
+ with each bb's frequency. Used when NODE has a 0-weight entry
+ but we are about to inline it into a non-zero count call bb.
+ See the comments for handle_missing_profiles() in predict.c for
+ when this can happen for COMDATs. */
+
+void
+freqs_to_counts (struct cgraph_node *node, gcov_type count)
+{
+ basic_block bb;
+ edge_iterator ei;
+ edge e;
+ struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
+
+ FOR_ALL_BB_FN(bb, fn)
+ {
+ bb->count = apply_scale (count,
+ GCOV_COMPUTE_SCALE (bb->frequency, BB_FREQ_MAX));
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->count = apply_probability (e->src->count, e->probability);
+ }
+}
+
/* Make a copy of the body of FN so that it can be inserted inline in
another function. Walks FN via CFG, returns new fndecl. */
@@ -2373,6 +2396,24 @@ copy_cfg_body (copy_body_data * id, gcov_type count, int frequency_scale,
int incoming_frequency = 0;
gcov_type incoming_count = 0;
+ /* This can happen for COMDAT routines that end up with 0 counts
+ despite being called (see the comments for handle_missing_profiles()
+ in predict.c as to why). Apply counts to the blocks in the callee
+ before inlining, using the guessed edge frequencies, so that we don't
+ end up with a 0-count inline body which can confuse downstream
+ optimizations such as function splitting. */
+ if (!ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count && count)
+ {
+ /* Apply the larger of the call bb count and the total incoming
+ call edge count to the callee. */
+ gcov_type in_count = 0;
+ struct cgraph_edge *in_edge;
+ for (in_edge = id->src_node->callers; in_edge;
+ in_edge = in_edge->next_caller)
+ in_count += in_edge->count;
+ freqs_to_counts (id->src_node, count > in_count ? count : in_count);
+ }
+
if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count)
count_scale
= GCOV_COMPUTE_SCALE (count,
diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index 43d5b923e1f..9f9dba854fd 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -645,6 +645,8 @@ tree_profiling (void)
pop_cfun ();
}
+ handle_missing_profiles ();
+
del_node_map ();
return 0;
}