1 files changed, 362 insertions, 122 deletions
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 6eede0d35fc..1e22d6eb87b 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -113,10 +113,12 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "ipa-inline.h"
 #include "ipa-utils.h"
+#include "sreal.h"
 
 /* Statistics we collect about inlining algorithm.  */
 static int overall_size;
 static gcov_type max_count;
+static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 
 /* Return false when inlining edge E would lead to violating
    limits on function unit growth or stack usage growth.  
@@ -229,10 +231,13 @@ report_inline_failed_reason (struct cgraph_edge *e)
    We check whether inlining is possible at all and whether
    caller growth limits allow doing so.  
 
-   if REPORT is true, output reason to the dump file.  */
+   if REPORT is true, output reason to the dump file.  
+
+   if DISREGARD_LIMITES is true, ignore size limits.*/
 
 static bool
-can_inline_edge_p (struct cgraph_edge *e, bool report)
+can_inline_edge_p (struct cgraph_edge *e, bool report,
+		   bool disregard_limits = false)
 {
   bool inlinable = true;
   enum availability avail;
@@ -309,6 +314,7 @@ can_inline_edge_p (struct cgraph_edge *e, bool report)
     }
   /* Check if caller growth allows the inlining.  */
   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)
+	   && !disregard_limits
 	   && !lookup_attribute ("flatten",
 				 DECL_ATTRIBUTES
 				   (e->caller->global.inlined_to
@@ -744,6 +750,15 @@ check_caller_edge (struct cgraph_node *node, void *edge)
           && node->callers != edge);
 }
 
+/* If NODE has a caller, return true.  */
+
+static bool
+has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
+{
+  if (node->callers)
+    return true;
+  return false;
+}
 
 /* Decide if inlining NODE would reduce unit size by eliminating
    the offline copy of function.  
@@ -757,7 +772,7 @@ want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
    bool has_hot_call = false;
 
    /* Does it have callers?  */
-   if (!node->callers)
+   if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
      return false;
    /* Already inlined?  */
    if (function->global.inlined_to)
@@ -887,12 +902,26 @@ edge_badness (struct cgraph_edge *edge, bool dump)
 
   else if (max_count)
     {
+      sreal tmp, relbenefit_real, growth_real;
       int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
-      badness =
-	((int)
-	 ((double) edge->count * INT_MIN / 2 / max_count / RELATIVE_TIME_BENEFIT_RANGE) *
-	 relbenefit) / growth;
-      
+
+      sreal_init(&relbenefit_real, relbenefit, 0);
+      sreal_init(&growth_real, growth, 0);
+
+      /* relative_edge_count.  */
+      sreal_init (&tmp, edge->count, 0);
+      sreal_div (&tmp, &tmp, &max_count_real);
+
+      /* relative_time_benefit.  */
+      sreal_mul (&tmp, &tmp, &relbenefit_real);
+      sreal_div (&tmp, &tmp, &max_relbenefit_real);
+
+      /* growth_f_caller.  */
+      sreal_mul (&tmp, &tmp, &half_int_min_real);
+      sreal_div (&tmp, &tmp, &growth_real);
+
+      badness = -1 * sreal_to_int (&tmp);
+ 
       /* Be sure that insanity of the profile won't lead to increasing counts
 	 in the scalling and thus to overflow in the computation above.  */
       gcc_assert (max_count >= edge->count);
@@ -1388,6 +1417,92 @@ add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
     }
 }
 
+/* Remove EDGE from the fibheap.  */
+
+static void
+heap_edge_removal_hook (struct cgraph_edge *e, void *data)
+{
+  if (e->callee)
+    reset_node_growth_cache (e->callee);
+  if (e->aux)
+    {
+      fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
+      e->aux = NULL;
+    }
+}
+
+/* Return true if speculation of edge E seems useful.
+   If ANTICIPATE_INLINING is true, be conservative and hope that E
+   may get inlined.  */
+
+bool
+speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
+{
+  enum availability avail;
+  struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
+  struct cgraph_edge *direct, *indirect;
+  struct ipa_ref *ref;
+
+  gcc_assert (e->speculative && !e->indirect_unknown_callee);
+
+  if (!cgraph_maybe_hot_edge_p (e))
+    return false;
+
+  /* See if IP optimizations found something potentially useful about the
+     function.  For now we look only for CONST/PURE flags.  Almost everything
+     else we propagate is useless.  */
+  if (avail >= AVAIL_AVAILABLE)
+    {
+      int ecf_flags = flags_from_decl_or_type (target->symbol.decl);
+      if (ecf_flags & ECF_CONST)
+        {
+          cgraph_speculative_call_info (e, direct, indirect, ref);
+	  if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
+	    return true;
+        }
+      else if (ecf_flags & ECF_PURE)
+        {
+          cgraph_speculative_call_info (e, direct, indirect, ref);
+	  if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
+	    return true;
+        }
+    }
+  /* If we did not managed to inline the function nor redirect
+     to an ipa-cp clone (that are seen by having local flag set),
+     it is probably pointless to inline it unless hardware is missing
+     indirect call predictor.  */
+  if (!anticipate_inlining && e->inline_failed && !target->local.local)
+    return false;
+  /* For overwritable targets there is not much to do.  */
+  if (e->inline_failed && !can_inline_edge_p (e, false, true))
+    return false;
+  /* OK, speculation seems interesting.  */
+  return true;
+}
+
+/* We know that EDGE is not going to be inlined.
+   See if we can remove speculation.  */
+
+static void
+resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
+{
+  if (edge->speculative && !speculation_useful_p (edge, false))
+    {
+      struct cgraph_node *node = edge->caller;
+      struct cgraph_node *where = node->global.inlined_to
+				  ? node->global.inlined_to : node;
+      bitmap updated_nodes = BITMAP_ALLOC (NULL);
+
+      cgraph_resolve_speculation (edge, NULL);
+      reset_edge_caches (where);
+      inline_update_overall_summary (where);
+      update_caller_keys (edge_heap, where,
+			  updated_nodes, NULL);
+      update_callee_keys (edge_heap, where,
+			  updated_nodes);
+      BITMAP_FREE (updated_nodes);
+    }
+}
 
 /* We use greedy algorithm for inlining of small functions:
    All inline candidates are put into prioritized heap ordered in
@@ -1406,10 +1521,14 @@ inline_small_functions (void)
   vec<cgraph_edge_p> new_indirect_edges = vNULL;
   int initial_size = 0;
   struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
+  struct cgraph_edge_hook_list *edge_removal_hook_holder;
 
   if (flag_indirect_inlining)
     new_indirect_edges.create (8);
 
+  edge_removal_hook_holder
+    = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
+
   /* Compute overall unit size and other global parameters used by badness
      metrics.  */
 
@@ -1448,6 +1567,9 @@ inline_small_functions (void)
 	  if (max_count < edge->count)
 	    max_count = edge->count;
       }
+  sreal_init (&max_count_real, max_count, 0);
+  sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
+  sreal_init (&half_int_min_real, INT_MAX / 2, 0);
   ipa_free_postorder_info ();
   initialize_growth_caches ();
 
@@ -1463,14 +1585,19 @@ inline_small_functions (void)
   /* Populate the heeap with all edges we might inline.  */
 
   FOR_EACH_DEFINED_FUNCTION (node)
-    if (!node->global.inlined_to)
-      {
-	if (dump_file)
-	  fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
-		   cgraph_node_name (node), node->symbol.order);
+    {
+      bool update = false;
+      struct cgraph_edge *next;
 
-	for (edge = node->callers; edge; edge = edge->next_caller)
+      if (dump_file)
+	fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
+		 cgraph_node_name (node), node->symbol.order);
+
+      for (edge = node->callees; edge; edge = next)
+	{
+	  next = edge->next_callee;
 	  if (edge->inline_failed
+	      && !edge->aux
 	      && can_inline_edge_p (edge, true)
 	      && want_inline_small_function_p (edge, true)
 	      && edge->inline_failed)
@@ -1478,7 +1605,24 @@ inline_small_functions (void)
 	      gcc_assert (!edge->aux);
 	      update_edge_key (edge_heap, edge);
 	    }
-      }
+	  if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
+	    {
+	      cgraph_resolve_speculation (edge, NULL);
+	      update = true;
+	    }
+	}
+      if (update)
+	{
+	  struct cgraph_node *where = node->global.inlined_to
+				      ? node->global.inlined_to : node;
+	  inline_update_overall_summary (where);
+          reset_node_growth_cache (where);
+	  reset_edge_caches (where);
+          update_caller_keys (edge_heap, where,
+			      updated_nodes, NULL);
+          bitmap_clear (updated_nodes);
+	}
+    }
 
   gcc_assert (in_lto_p
 	      || !max_count
@@ -1519,7 +1663,10 @@ inline_small_functions (void)
 	}
 
       if (!can_inline_edge_p (edge, true))
-	continue;
+	{
+	  resolve_noninline_speculation (edge_heap, edge);
+	  continue;
+	}
       
       callee = cgraph_function_or_thunk_node (edge->callee, NULL);
       growth = estimate_edge_growth (edge);
@@ -1553,11 +1700,15 @@ inline_small_functions (void)
 	{
 	  edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
 	  report_inline_failed_reason (edge);
+	  resolve_noninline_speculation (edge_heap, edge);
 	  continue;
 	}
 
       if (!want_inline_small_function_p (edge, true))
-	continue;
+	{
+	  resolve_noninline_speculation (edge_heap, edge);
+	  continue;
+	}
 
       /* Heuristics for inlining small functions works poorly for
 	 recursive calls where we do efect similar to loop unrolling.
@@ -1573,6 +1724,7 @@ inline_small_functions (void)
 				   ? &new_indirect_edges : NULL))
 	    {
 	      edge->inline_failed = CIF_RECURSIVE_INLINING;
+	      resolve_noninline_speculation (edge_heap, edge);
 	      continue;
 	    }
 	  reset_edge_caches (where);
@@ -1581,6 +1733,7 @@ inline_small_functions (void)
 	  if (flag_indirect_inlining)
 	    add_new_edges_to_heap (edge_heap, new_indirect_edges);
           update_callee_keys (edge_heap, where, updated_nodes);
+	  bitmap_clear (updated_nodes);
 	}
       else
 	{
@@ -1606,6 +1759,7 @@ inline_small_functions (void)
 	      edge->inline_failed
 		= (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl)
 		   ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
+	      resolve_noninline_speculation (edge_heap, edge);
 	      continue;
 	    }
 	  else if (depth && dump_file)
@@ -1663,6 +1817,7 @@ inline_small_functions (void)
 	     initial_size, overall_size,
 	     initial_size ? overall_size * 100 / (initial_size) - 100: 0);
   BITMAP_FREE (updated_nodes);
+  cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
 }
 
 /* Flatten NODE.  Performed both during early inlining and
@@ -1746,6 +1901,60 @@ flatten_function (struct cgraph_node *node, bool early)
     inline_update_overall_summary (node);
 }
 
+/* Count number of callers of NODE and store it into DATA (that
+   points to int.  Worker for cgraph_for_node_and_aliases.  */
+
+static bool
+sum_callers (struct cgraph_node *node, void *data)
+{
+  struct cgraph_edge *e;
+  int *num_calls = (int *)data;
+
+  for (e = node->callers; e; e = e->next_caller)
+    (*num_calls)++;
+  return false;
+}
+
+/* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
+   DATA points to number of calls originally found so we avoid infinite
+   recursion.  */
+
+static bool
+inline_to_all_callers (struct cgraph_node *node, void *data)
+{
+  int *num_calls = (int *)data;
+  while (node->callers && !node->global.inlined_to)
+    {
+      struct cgraph_node *caller = node->callers->caller;
+
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "\nInlining %s size %i.\n",
+		   cgraph_node_name (node),
+		   inline_summary (node)->size);
+	  fprintf (dump_file,
+		   " Called once from %s %i insns.\n",
+		   cgraph_node_name (node->callers->caller),
+		   inline_summary (node->callers->caller)->size);
+	}
+
+      inline_call (node->callers, true, NULL, NULL, true);
+      if (dump_file)
+	fprintf (dump_file,
+		 " Inlined into %s which now has %i size\n",
+		 cgraph_node_name (caller),
+		 inline_summary (caller)->size);
+      if (!(*num_calls)--)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "New calls found; giving up.\n");
+	  break;
+	}
+    }
+  return false;
+}
+
 /* Decide on the inlining.  We do so in the topological order to avoid
    expenses on updating data structures.  */
 
@@ -1757,6 +1966,11 @@ ipa_inline (void)
   struct cgraph_node **order =
     XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
   int i;
+  int cold;
+  bool remove_functions = false;
+
+  if (!optimize)
+    return 0;
 
   if (in_lto_p && optimize)
     ipa_update_after_lto_read ();
@@ -1804,69 +2018,60 @@ ipa_inline (void)
      code size will shrink because the out-of-line copy is eliminated. 
      We do this regardless on the callee size as long as function growth limits
      are met.  */
-  if (flag_inline_functions_called_once)
+  if (dump_file)
+    fprintf (dump_file,
+	     "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
+
+  /* Inlining one function called once has good chance of preventing
+     inlining other function into the same callee.  Ideally we should
+     work in priority order, but probably inlining hot functions first
+     is good cut without the extra pain of maintaining the queue.
+
+     ??? this is not really fitting the bill perfectly: inlining function
+     into callee often leads to better optimization of callee due to
+     increased context for optimization.
+     For example if main() function calls a function that outputs help
+     and then function that does the main optmization, we should inline
+     the second with priority even if both calls are cold by themselves.
+
+     We probably want to implement new predicate replacing our use of
+     maybe_hot_edge interpreted as maybe_hot_edge || callee is known
+     to be hot.  */
+  for (cold = 0; cold <= 1; cold ++)
     {
-      int cold;
-      if (dump_file)
-	fprintf (dump_file,
-		 "\nDeciding on functions to be inlined into all callers:\n");
-
-      /* Inlining one function called once has good chance of preventing
-	 inlining other function into the same callee.  Ideally we should
-	 work in priority order, but probably inlining hot functions first
-	 is good cut without the extra pain of maintaining the queue.
-
-	 ??? this is not really fitting the bill perfectly: inlining function
-	 into callee often leads to better optimization of callee due to
-	 increased context for optimization.
-	 For example if main() function calls a function that outputs help
-	 and then function that does the main optmization, we should inline
-	 the second with priority even if both calls are cold by themselves.
-
-	 We probably want to implement new predicate replacing our use of
-	 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
-	 to be hot.  */
-      for (cold = 0; cold <= 1; cold ++)
+      FOR_EACH_DEFINED_FUNCTION (node)
 	{
-	  FOR_EACH_DEFINED_FUNCTION (node)
+	  struct cgraph_edge *edge, *next;
+	  bool update=false;
+
+	  for (edge = node->callees; edge; edge = next)
 	    {
-	      if (want_inline_function_to_all_callers_p (node, cold))
+	      next = edge->next_callee;
+	      if (edge->speculative && !speculation_useful_p (edge, false))
 		{
-		  int num_calls = 0;
-		  struct cgraph_edge *e;
-		  for (e = node->callers; e; e = e->next_caller)
-		    num_calls++;
-		  while (node->callers && !node->global.inlined_to)
-		    {
-		      struct cgraph_node *caller = node->callers->caller;
-
-		      if (dump_file)
-			{
-			  fprintf (dump_file,
-				   "\nInlining %s size %i.\n",
-				   cgraph_node_name (node),
-				   inline_summary (node)->size);
-			  fprintf (dump_file,
-				   " Called once from %s %i insns.\n",
-				   cgraph_node_name (node->callers->caller),
-				   inline_summary (node->callers->caller)->size);
-			}
-
-		      inline_call (node->callers, true, NULL, NULL, true);
-		      if (dump_file)
-			fprintf (dump_file,
-				 " Inlined into %s which now has %i size\n",
-				 cgraph_node_name (caller),
-				 inline_summary (caller)->size);
-		      if (!num_calls--)
-		        {
-			  if (dump_file)
-			    fprintf (dump_file, "New calls found; giving up.\n");
-			  break;
-		        }
-		    }
+		  cgraph_resolve_speculation (edge, NULL);
+		  update = true;
+		  remove_functions = true;
 		}
 	    }
+	  if (update)
+	    {
+	      struct cgraph_node *where = node->global.inlined_to
+					  ? node->global.inlined_to : node;
+              reset_node_growth_cache (where);
+	      reset_edge_caches (where);
+	      inline_update_overall_summary (where);
+	    }
+	  if (flag_inline_functions_called_once
+	      && want_inline_function_to_all_callers_p (node, cold))
+	    {
+	      int num_calls = 0;
+	      cgraph_for_node_and_aliases (node, sum_callers,
+					   &num_calls, true);
+	      cgraph_for_node_and_aliases (node, inline_to_all_callers,
+					   &num_calls, true);
+	      remove_functions = true;
+	    }
 	}
     }
 
@@ -1884,7 +2089,7 @@ ipa_inline (void)
   /* In WPA we use inline summaries for partitioning process.  */
   if (!flag_wpa)
     inline_free_summary ();
-  return 0;
+  return remove_functions ? TODO_remove_functions : 0;
 }
 
 /* Inline always-inline function calls in NODE.  */
@@ -2011,6 +2216,7 @@ early_inliner (void)
 #ifdef ENABLE_CHECKING
   verify_cgraph_node (node);
 #endif
+  ipa_remove_all_references (&node->symbol.ref_list);
 
   /* Even when not optimizing or not inlining inline always-inline
      functions.  */
@@ -2086,65 +2292,99 @@ early_inliner (void)
   return todo;
 }
 
-struct gimple_opt_pass pass_early_inline =
+namespace {
+
+const pass_data pass_data_early_inline =
 {
- {
-  GIMPLE_PASS,
-  "einline",	 			/* name */
-  OPTGROUP_INLINE,                      /* optinfo_flags */
-  NULL,					/* gate */
-  early_inliner,			/* execute */
-  NULL,					/* sub */
-  NULL,					/* next */
-  0,					/* static_pass_number */
-  TV_EARLY_INLINING,			/* tv_id */
-  PROP_ssa,                             /* properties_required */
-  0,					/* properties_provided */
-  0,					/* properties_destroyed */
-  0,					/* todo_flags_start */
-  0                 			/* todo_flags_finish */
- }
+  GIMPLE_PASS, /* type */
+  "einline", /* name */
+  OPTGROUP_INLINE, /* optinfo_flags */
+  false, /* has_gate */
+  true, /* has_execute */
+  TV_EARLY_INLINING, /* tv_id */
+  PROP_ssa, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
 };
 
+class pass_early_inline : public gimple_opt_pass
+{
+public:
+  pass_early_inline(gcc::context *ctxt)
+    : gimple_opt_pass(pass_data_early_inline, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  unsigned int execute () { return early_inliner (); }
+
+}; // class pass_early_inline
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_early_inline (gcc::context *ctxt)
+{
+  return new pass_early_inline (ctxt);
+}
+
 
 /* When to run IPA inlining.  Inlining of always-inline functions
    happens during early inlining.
 
-   Enable inlining unconditoinally at -flto.  We need size estimates to
-   drive partitioning.  */
+   Enable inlining unconditoinally, because callgraph redirection
+   happens here.   */
 
 static bool
 gate_ipa_inline (void)
 {
-  return optimize || flag_lto || flag_wpa;
+  return true;
 }
 
-struct ipa_opt_pass_d pass_ipa_inline =
+namespace {
+
+const pass_data pass_data_ipa_inline =
 {
- {
-  IPA_PASS,
-  "inline",				/* name */
-  OPTGROUP_INLINE,                      /* optinfo_flags */
-  gate_ipa_inline,			/* gate */
-  ipa_inline,				/* execute */
-  NULL,					/* sub */
-  NULL,					/* next */
-  0,					/* static_pass_number */
-  TV_IPA_INLINING,      		/* tv_id */
-  0,	                                /* properties_required */
-  0,					/* properties_provided */
-  0,					/* properties_destroyed */
-  TODO_remove_functions,		/* todo_flags_finish */
-  TODO_dump_symtab 
-  | TODO_remove_functions		/* todo_flags_finish */
- },
- inline_generate_summary,		/* generate_summary */
- inline_write_summary,			/* write_summary */
- inline_read_summary,			/* read_summary */
- NULL,					/* write_optimization_summary */
- NULL,					/* read_optimization_summary */
- NULL,					/* stmt_fixup */
- 0,					/* TODOs */
- inline_transform,			/* function_transform */
- NULL,					/* variable_transform */
+  IPA_PASS, /* type */
+  "inline", /* name */
+  OPTGROUP_INLINE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_IPA_INLINING, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  TODO_remove_functions, /* todo_flags_start */
+  ( TODO_dump_symtab ), /* todo_flags_finish */
 };
+
+class pass_ipa_inline : public ipa_opt_pass_d
+{
+public:
+  pass_ipa_inline(gcc::context *ctxt)
+    : ipa_opt_pass_d(pass_data_ipa_inline, ctxt,
+		     inline_generate_summary, /* generate_summary */
+		     inline_write_summary, /* write_summary */
+		     inline_read_summary, /* read_summary */
+		     NULL, /* write_optimization_summary */
+		     NULL, /* read_optimization_summary */
+		     NULL, /* stmt_fixup */
+		     0, /* function_transform_todo_flags_start */
+		     inline_transform, /* function_transform */
+		     NULL) /* variable_transform */
+  {}
+
+  /* opt_pass methods: */
+  bool gate () { return gate_ipa_inline (); }
+  unsigned int execute () { return ipa_inline (); }
+
+}; // class pass_ipa_inline
+
+} // anon namespace
+
+ipa_opt_pass_d *
+make_pass_ipa_inline (gcc::context *ctxt)
+{
+  return new pass_ipa_inline (ctxt);
+}