summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2015-04-01 07:41:17 +0000
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2015-04-01 07:41:17 +0000
commit71e37927a59cb6c7a102008d82a23ccedfae5864 (patch)
treeeac1696d54046990c5a2e48ab0b4ac01dab188be
parenta6cb943740cf3ef01acc5e2d28c343a12b814329 (diff)
downloadgcc-71e37927a59cb6c7a102008d82a23ccedfae5864.tar.gz
* lto-cgraph.c (lto_output_node, input_overwrite_node): Stream
split_part. * ipa-inline.c (edge_badness): Add wrapper penalty. (sum_callers): Move up. (inline_small_functions): Set single_caller. * ipa-inline.h (inline_summary): Add single_caller. * ipa-split.c (split_function): Set split_part. (cgraph_node::create_clone): Do not shadow decl; copy split_part. * cgraph.h (cgraph_node): Add split_part. * gcc.dg/ipa/inlinehint-4.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@221806 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/cgraph.h2
-rw-r--r--gcc/ipa-inline.c117
-rw-r--r--gcc/ipa-inline.h3
-rw-r--r--gcc/ipa-split.c2
-rw-r--r--gcc/lto-cgraph.c2
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.dg/ipa/inlinehint-4.c40
8 files changed, 161 insertions, 21 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f7070967575..9c39d764e01 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2015-03-31 Jan Hubicka <hubicka@ucw.cz>
+
+ * lto-cgraph.c (lto_output_node, input_overwrite_node): Stream
+ split_part.
+ * ipa-inline.c (edge_badness): Add wrapper penalty.
+ (sum_callers): Move up.
+ (inline_small_functions): Set single_caller.
+ * ipa-inline.h (inline_summary): Add single_caller.
+ * ipa-split.c (split_function): Set split_part.
+ (cgraph_node::create_clone): Do not shadow decl; copy split_part.
+ * cgraph.h (cgraph_node): Add split_part.
+
2015-03-31 Uros Bizjak <ubizjak@gmail.com>
PR target/58945
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 650e68921f3..cf8c7b64b9b 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1319,6 +1319,8 @@ public:
unsigned merged : 1;
/* True if function was created to be executed in parallel. */
unsigned parallelized_function : 1;
+ /* True if function is part split out by ipa-split. */
+ unsigned split_part : 1;
private:
/* Worker for call_for_symbol_and_aliases. */
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index bc328468fab..77d6d85025a 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -1088,6 +1088,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
else if (opt_for_fn (caller->decl, flag_guess_branch_prob) || caller->count)
{
sreal numerator, denominator;
+ int overall_growth;
numerator = (compute_uninlined_call_time (callee_info, edge)
- compute_inlined_call_time (edge, edge_time));
@@ -1098,8 +1099,74 @@ edge_badness (struct cgraph_edge *edge, bool dump)
else if (opt_for_fn (caller->decl, flag_branch_probabilities))
numerator = numerator >> 11;
denominator = growth;
- if (callee_info->growth > 0)
- denominator *= callee_info->growth * callee_info->growth;
+
+ overall_growth = callee_info->growth;
+
+ /* Look for inliner wrappers of the form:
+
+ inline_caller ()
+ {
+ do_fast_job...
+ if (need_more_work)
+ noninline_callee ();
+ }
+ Withhout panilizing this case, we usually inline noninline_callee
+ into the inline_caller because overall_growth is small preventing
+ further inlining of inline_caller.
+
+ Penalize only callgraph edges to functions with small overall
+ growth ...
+ */
+ if (growth > overall_growth
+ /* ... and having only one caller which is not inlined ... */
+ && callee_info->single_caller
+ && !edge->caller->global.inlined_to
+ /* ... and edges executed only conditionally ... */
+ && edge->frequency < CGRAPH_FREQ_BASE
+ /* ... consider case where callee is not inline but caller is ... */
+ && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
+ && DECL_DECLARED_INLINE_P (caller->decl))
+ /* ... or when early optimizers decided to split and edge
+ frequency still indicates splitting is a win ... */
+ || (callee->split_part && !caller->split_part
+ && edge->frequency
+ < CGRAPH_FREQ_BASE
+ * PARAM_VALUE
+ (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100
+ /* ... and do not overwrite user specified hints. */
+ && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
+ || DECL_DECLARED_INLINE_P (caller->decl)))))
+ {
+ struct inline_summary *caller_info = inline_summaries->get (caller);
+ int caller_growth = caller_info->growth;
+
+ /* Only apply the penalty when caller looks like inline candidate,
+ and it is not called once and. */
+ if (!caller_info->single_caller && overall_growth < caller_growth
+ && caller_info->inlinable
+ && caller_info->size
+ < (DECL_DECLARED_INLINE_P (caller->decl)
+ ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
+ {
+ if (dump)
+ fprintf (dump_file,
+ " Wrapper penalty. Increasing growth %i to %i\n",
+ overall_growth, caller_growth);
+ overall_growth = caller_growth;
+ }
+ }
+ if (overall_growth > 0)
+ {
+ /* Strongly preffer functions with few callers that can be inlined
+ fully. The square root here leads to smaller binaries at average.
+ Watch however for extreme cases and return to linear function
+ when growth is large. */
+ if (overall_growth < 256)
+ overall_growth *= overall_growth;
+ else
+ overall_growth += 256 * 256 - 256;
+ denominator *= overall_growth;
+ }
badness = - numerator / denominator;
@@ -1109,13 +1176,15 @@ edge_badness (struct cgraph_edge *edge, bool dump)
" %f: guessed profile. frequency %f, count %"PRId64
" caller count %"PRId64
" time w/o inlining %f, time w inlining %f"
- " overall growth %i (current) %i (original)\n",
- badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE,
+ " overall growth %i (current) %i (original)"
+ " %i (compensated)\n",
+ badness.to_double (),
+ (double)edge->frequency / CGRAPH_FREQ_BASE,
edge->count, caller->count,
compute_uninlined_call_time (callee_info, edge).to_double (),
compute_inlined_call_time (edge, edge_time).to_double (),
estimate_growth (callee),
- callee_info->growth);
+ callee_info->growth, overall_growth);
}
}
/* When function local profile is not available or it does not give
@@ -1133,8 +1202,8 @@ edge_badness (struct cgraph_edge *edge, bool dump)
else
badness = badness << nest;
if (dump)
- fprintf (dump_file, " %f: no profile. nest %i\n", badness.to_double (),
- nest);
+ fprintf (dump_file, " %f: no profile. nest %i\n",
+ badness.to_double (), nest);
}
gcc_checking_assert (badness != 0);
@@ -1649,6 +1718,20 @@ inline_account_function_p (struct cgraph_node *node)
&& node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
}
+/* Count number of callers of NODE and store it into DATA (that
+ points to int. Worker for cgraph_for_node_and_aliases. */
+
+static bool
+sum_callers (struct cgraph_node *node, void *data)
+{
+ struct cgraph_edge *e;
+ int *num_calls = (int *)data;
+
+ for (e = node->callers; e; e = e->next_caller)
+ (*num_calls)++;
+ return false;
+}
+
/* We use greedy algorithm for inlining of small functions:
All inline candidates are put into prioritized heap ordered in
increasing badness.
@@ -1693,6 +1776,12 @@ inline_small_functions (void)
if (inline_account_function_p (node))
initial_size += info->size;
info->growth = estimate_growth (node);
+
+ int num_calls = 0;
+ node->call_for_symbol_and_aliases (sum_callers, &num_calls,
+ true);
+ if (num_calls == 1)
+ info->single_caller = true;
if (dfs && dfs->next_cycle)
{
struct cgraph_node *n2;
@@ -2085,20 +2174,6 @@ flatten_function (struct cgraph_node *node, bool early)
inline_update_overall_summary (node);
}
-/* Count number of callers of NODE and store it into DATA (that
- points to int. Worker for cgraph_for_node_and_aliases. */
-
-static bool
-sum_callers (struct cgraph_node *node, void *data)
-{
- struct cgraph_edge *e;
- int *num_calls = (int *)data;
-
- for (e = node->callers; e; e = e->next_caller)
- (*num_calls)++;
- return false;
-}
-
/* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
DATA points to number of calls originally found so we avoid infinite
recursion. */
diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h
index ed4d66fef4a..85041f67dd7 100644
--- a/gcc/ipa-inline.h
+++ b/gcc/ipa-inline.h
@@ -129,6 +129,9 @@ struct GTY(()) inline_summary
/* True when function contains cilk spawn (and thus we can not inline
into it). */
unsigned contains_cilk_spawn : 1;
+ /* True wen there is only one caller of the function before small function
+ inlining. */
+ unsigned int single_caller : 1;
/* Information about function that will result after applying all the
inline decisions present in the callgraph. Generally kept up to
diff --git a/gcc/ipa-split.c b/gcc/ipa-split.c
index 5d5db0e4eee..a28f3a1ad92 100644
--- a/gcc/ipa-split.c
+++ b/gcc/ipa-split.c
@@ -1402,6 +1402,8 @@ split_function (basic_block return_bb, struct split_point *split_point,
(vNULL, NULL, args_to_skip, !split_part_return_p, split_point->split_bbs,
split_point->entry_bb, "part");
+ node->split_part = true;
+
/* Let's take a time profile for splitted function. */
node->tp_first_run = cur_node->tp_first_run + 1;
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index 088de860646..fa18d363b20 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -578,6 +578,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
bp_pack_enum (&bp, ld_plugin_symbol_resolution,
LDPR_NUM_KNOWN, node->resolution);
bp_pack_value (&bp, node->instrumentation_clone, 1);
+ bp_pack_value (&bp, node->split_part, 1);
streamer_write_bitpack (&bp);
streamer_write_data_stream (ob->main_stream, section, strlen (section) + 1);
@@ -1214,6 +1215,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data,
node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution,
LDPR_NUM_KNOWN);
node->instrumentation_clone = bp_unpack_value (bp, 1);
+ node->split_part = bp_unpack_value (bp, 1);
gcc_assert (flag_ltrans
|| (!node->in_other_partition
&& !node->used_from_other_partition));
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c9739af8ddc..0dd0421fce7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2015-03-31 Jan Hubicka <hubicka@ucw.cz>
+
+ * gcc.dg/ipa/inlinehint-4.c: New testcase.
+
2015-03-31 Alex Velenko <Alex.Velenko@arm.com>
* gcc.target/arm/pr45701-1.c (history_expand_line_internal): Add an
diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c
new file mode 100644
index 00000000000..52d2f1a6763
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c
@@ -0,0 +1,40 @@
+/* { dg-options "-O3 -fdump-ipa-inline-details -fno-early-inlining --param large-unit-insns=1" } */
+/* { dg-add-options bind_pic_locally } */
+int *hashval;
+int *hash;
+int hash_size;
+
+static int
+lookup_slow (int val)
+{
+ int i = val % hash_size;
+ while (hashval[i] && hashval[i] != val)
+ i++;
+ return hash[i];
+}
+
+static inline int
+lookup (int val)
+{
+ static int cache, cache_val;
+ if (val == cache_val)
+ return cache;
+ else
+ {
+ cache_val = val;
+ cache = lookup_slow (val);
+ return cache;
+ }
+}
+
+int
+test (int i)
+{
+ return lookup (i) + lookup (2 * i) + lookup (3 * i) + lookup (4 * i) +
+ lookup (5 * i) + lookup (6 * i) + lookup (7 * i) + lookup (8 * i) +
+ lookup (9 * i);
+}
+/* { dg-final { scan-ipa-dump "Wrapper penalty" "inline" } } */
+/* { dg-final { scan-ipa-dump-not "Inlining lookup_slow to lookup" "inline" } } */
+/* { dg-final { scan-ipa-dump "Inlining lookup to test" "inline" } } */
+/* { dg-final { cleanup-ipa-dump "inline" } } */