Fix the occurrence analyser

Ticket #18603 demonstrated that the occurrence analyser's handling of local RULES for imported Ids (which I now call IMP-RULES) was inadequate. It led the simplifier into an infnite loop by failing to label a binder as a loop breaker. The main change in this commit is to treat IMP-RULES in a simple and uniform way: as extra rules for the local binder. See Note [IMP-RULES: local rules for imported functions] This led to quite a bit of refactoring. The result is still tricky, but it's much better than before, and better documented I think. Oh, and it fixes the bug.
author: Simon Peyton Jones <simonpj@microsoft.com> 2020-08-27 16:11:57 +0100
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2020-09-22 05:37:59 -0400
commit: 416bd50e58b23ad70813b18a913ca77a3ab6e936 (patch)
tree: 64826e21ef4c9fa9beae9e20ee1e553540f15e3f
parent: 6de40f83c53c3b1899f7b4912badbe98e4fbde88 (diff)
download: haskell-416bd50e58b23ad70813b18a913ca77a3ab6e936.tar.gz
4 files changed, 611 insertions, 480 deletions
diff --git a/compiler/GHC/Core/FVs.hs b/compiler/GHC/Core/FVs.hs
index 374f7cfec8..0cbf81d528 100644
--- a/compiler/GHC/Core/FVs.hs
+++ b/compiler/GHC/Core/FVs.hs
@@ -10,16 +10,12 @@ Taken quite directly from the Peyton Jones/Lester paper.
 -- | A module concerned with finding the free variables of an expression.
 module GHC.Core.FVs (
         -- * Free variables of expressions and binding groups
-        exprFreeVars,
+        exprFreeVars,     exprsFreeVars,
         exprFreeVarsDSet,
-        exprFreeVarsList,
-        exprFreeIds,
-        exprFreeIdsDSet,
-        exprFreeIdsList,
-        exprsFreeIdsDSet,
-        exprsFreeIdsList,
-        exprsFreeVars,
-        exprsFreeVarsList,
+        exprFreeVarsList, exprsFreeVarsList,
+        exprFreeIds,      exprsFreeIds,
+        exprFreeIdsDSet,  exprsFreeIdsDSet,
+        exprFreeIdsList,  exprsFreeIdsList,
         bindFreeVars,
 
         -- * Selective free variables of expressions
@@ -127,6 +123,9 @@ exprFreeVarsList = fvVarList . exprFVs
 exprFreeIds :: CoreExpr -> IdSet        -- Find all locally-defined free Ids
 exprFreeIds = exprSomeFreeVars isLocalId
 
+exprsFreeIds :: [CoreExpr] -> IdSet        -- Find all locally-defined free Ids
+exprsFreeIds = exprsSomeFreeVars isLocalId
+
 -- | Find all locally-defined free Ids in an expression
 -- returning a deterministic set.
 exprFreeIdsDSet :: CoreExpr -> DIdSet -- Find all locally-defined free Ids
diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs
index 0180bec651..e352910cfb 100644
--- a/compiler/GHC/Core/Opt/OccurAnal.hs
+++ b/compiler/GHC/Core/Opt/OccurAnal.hs
@@ -61,10 +61,15 @@ import Data.List
 Here's the externally-callable interface:
 -}
 
+occurAnalyseExpr :: CoreExpr -> CoreExpr
+-- Do occurrence analysis, and discard occurrence info returned
+occurAnalyseExpr expr
+  = snd (occAnal initOccEnv expr)
+
 occurAnalysePgm :: Module         -- Used only in debug output
                 -> (Id -> Bool)         -- Active unfoldings
                 -> (Activation -> Bool) -- Active rules
-                -> [CoreRule]
+                -> [CoreRule]           -- Local rules for imported Ids
                 -> CoreProgram -> CoreProgram
 occurAnalysePgm this_mod active_unf active_rule imp_rules binds
   | isEmptyDetails final_usage
@@ -95,15 +100,21 @@ occurAnalysePgm this_mod active_unf active_rule imp_rules binds
     initial_uds = addManyOccs emptyDetails (rulesFreeVars imp_rules)
     -- The RULES declarations keep things alive!
 
-    -- Note [Preventing loops due to imported functions rules]
-    imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
-                            [ mapVarEnv (const maps_to) $
-                                getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
-                            | imp_rule <- imp_rules
-                            , not (isBuiltinRule imp_rule)  -- See Note [Plugin rules]
-                            , let maps_to = exprFreeIds (ru_rhs imp_rule)
-                                             `delVarSetList` ru_bndrs imp_rule
-                            , arg <- ru_args imp_rule ]
+    -- imp_rule_edges maps a top-level local binder 'f' to the
+    -- RHS free vars of any IMP-RULE, a local RULE for an imported function,
+    -- where 'f' appears on the LHS
+    --   e.g.  RULE foldr f = blah
+    --         imp_rule_edges contains f :-> fvs(blah)
+    -- We treat such RULES as extra rules for 'f'
+    -- See Note [Preventing loops due to imported functions rules]
+    imp_rule_edges :: ImpRuleEdges
+    imp_rule_edges = foldr (plusVarEnv_C (++)) emptyVarEnv
+                           [ mapVarEnv (const [(act,rhs_fvs)]) $ getUniqSet $
+                             exprsFreeIds args `delVarSetList` bndrs
+                           | Rule { ru_act = act, ru_bndrs = bndrs
+                                   , ru_args = args, ru_rhs = rhs } <- imp_rules
+                                   -- Not BuiltinRules; see Note [Plugin rules]
+                           , let rhs_fvs = exprFreeIds rhs `delVarSetList` bndrs ]
 
     go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
     go _ []
@@ -115,297 +126,64 @@ occurAnalysePgm this_mod active_unf active_rule imp_rules binds
            (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
                                               bs_usage
 
-occurAnalyseExpr :: CoreExpr -> CoreExpr
--- Do occurrence analysis, and discard occurrence info returned
-occurAnalyseExpr expr
-  = snd (occAnal initOccEnv expr)
-
-{- Note [Plugin rules]
-~~~~~~~~~~~~~~~~~~~~~~
-Conal Elliott (#11651) built a GHC plugin that added some
-BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
-do some domain-specific transformations that could not be expressed
-with an ordinary pattern-matching CoreRule.  But then we can't extract
-the dependencies (in imp_rule_edges) from ru_rhs etc, because a
-BuiltinRule doesn't have any of that stuff.
-
-So we simply assume that BuiltinRules have no dependencies, and filter
-them out from the imp_rule_edges comprehension.
--}
-
-{-
-************************************************************************
+{- *********************************************************************
 *                                                                      *
-                Bindings
+                IMP-RULES
+         Local rules for imported functions
 *                                                                      *
-************************************************************************
-
-Note [Recursive bindings: the grand plan]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When we come across a binding group
-  Rec { x1 = r1; ...; xn = rn }
-we treat it like this (occAnalRecBind):
-
-1. Occurrence-analyse each right hand side, and build a
-   "Details" for each binding to capture the results.
-
-   Wrap the details in a Node (details, node-id, dep-node-ids),
-   where node-id is just the unique of the binder, and
-   dep-node-ids lists all binders on which this binding depends.
-   We'll call these the "scope edges".
-   See Note [Forming the Rec groups].
-
-   All this is done by makeNode.
-
-2. Do SCC-analysis on these Nodes.  Each SCC will become a new Rec or
-   NonRec.  The key property is that every free variable of a binding
-   is accounted for by the scope edges, so that when we are done
-   everything is still in scope.
-
-3. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
-   identify suitable loop-breakers to ensure that inlining terminates.
-   This is done by occAnalRec.
-
-4. To do so we form a new set of Nodes, with the same details, but
-   different edges, the "loop-breaker nodes". The loop-breaker nodes
-   have both more and fewer dependencies than the scope edges
-   (see Note [Choosing loop breakers])
-
-   More edges: if f calls g, and g has an active rule that mentions h
-               then we add an edge from f -> h
-
-   Fewer edges: we only include dependencies on active rules, on rule
-                RHSs (not LHSs) and if there is an INLINE pragma only
-                on the stable unfolding (and vice versa).  The scope
-                edges must be much more inclusive.
-
-5.  The "weak fvs" of a node are, by definition:
-       the scope fvs - the loop-breaker fvs
-    See Note [Weak loop breakers], and the nd_weak field of Details
-
-6.  Having formed the loop-breaker nodes
-
-Note [Dead code]
-~~~~~~~~~~~~~~~~
-Dropping dead code for a cyclic Strongly Connected Component is done
-in a very simple way:
-
-        the entire SCC is dropped if none of its binders are mentioned
-        in the body; otherwise the whole thing is kept.
-
-The key observation is that dead code elimination happens after
-dependency analysis: so 'occAnalBind' processes SCCs instead of the
-original term's binding groups.
-
-Thus 'occAnalBind' does indeed drop 'f' in an example like
-
-        letrec f = ...g...
-               g = ...(...g...)...
-        in
-           ...g...
-
-when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
-'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
-'AcyclicSCC f', where 'body_usage' won't contain 'f'.
-
-------------------------------------------------------------
-Note [Forming Rec groups]
-~~~~~~~~~~~~~~~~~~~~~~~~~
-We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
-and "g uses f", no matter how indirectly.  We do a SCC analysis
-with an edge f -> g if "f uses g".
-
-More precisely, "f uses g" iff g should be in scope wherever f is.
-That is, g is free in:
-  a) the rhs 'ef'
-  b) or the RHS of a rule for f (Note [Rules are extra RHSs])
-  c) or the LHS or a rule for f (Note [Rule dependency info])
-
-These conditions apply regardless of the activation of the RULE (eg it might be
-inactive in this phase but become active later).  Once a Rec is broken up
-it can never be put back together, so we must be conservative.
-
-The principle is that, regardless of rule firings, every variable is
-always in scope.
-
-  * Note [Rules are extra RHSs]
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
-    keeps the specialised "children" alive.  If the parent dies
-    (because it isn't referenced any more), then the children will die
-    too (unless they are already referenced directly).
-
-    To that end, we build a Rec group for each cyclic strongly
-    connected component,
-        *treating f's rules as extra RHSs for 'f'*.
-    More concretely, the SCC analysis runs on a graph with an edge
-    from f -> g iff g is mentioned in
-        (a) f's rhs
-        (b) f's RULES
-    These are rec_edges.
-
-    Under (b) we include variables free in *either* LHS *or* RHS of
-    the rule.  The former might seems silly, but see Note [Rule
-    dependency info].  So in Example [eftInt], eftInt and eftIntFB
-    will be put in the same Rec, even though their 'main' RHSs are
-    both non-recursive.
-
-  * Note [Rule dependency info]
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    The VarSet in a RuleInfo is used for dependency analysis in the
-    occurrence analyser.  We must track free vars in *both* lhs and rhs.
-    Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
-    Why both? Consider
-        x = y
-        RULE f x = v+4
-    Then if we substitute y for x, we'd better do so in the
-    rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
-    as well as 'v'
-
-  * Note [Rules are visible in their own rec group]
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    We want the rules for 'f' to be visible in f's right-hand side.
-    And we'd like them to be visible in other functions in f's Rec
-    group.  E.g. in Note [Specialisation rules] we want f' rule
-    to be visible in both f's RHS, and fs's RHS.
-
-    This means that we must simplify the RULEs first, before looking
-    at any of the definitions.  This is done by Simplify.simplRecBind,
-    when it calls addLetIdInfo.
-
-------------------------------------------------------------
-Note [Choosing loop breakers]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Loop breaking is surprisingly subtle.  First read the section 4 of
-"Secrets of the GHC inliner".  This describes our basic plan.
-We avoid infinite inlinings by choosing loop breakers, and
-ensuring that a loop breaker cuts each loop.
-
-See also Note [Inlining and hs-boot files] in GHC.Core.ToIface, which
-deals with a closely related source of infinite loops.
-
-Fundamentally, we do SCC analysis on a graph.  For each recursive
-group we choose a loop breaker, delete all edges to that node,
-re-analyse the SCC, and iterate.
-
-But what is the graph?  NOT the same graph as was used for Note
-[Forming Rec groups]!  In particular, a RULE is like an equation for
-'f' that is *always* inlined if it is applicable.  We do *not* disable
-rules for loop-breakers.  It's up to whoever makes the rules to make
-sure that the rules themselves always terminate.  See Note [Rules for
-recursive functions] in GHC.Core.Opt.Simplify
-
-Hence, if
-    f's RHS (or its INLINE template if it has one) mentions g, and
-    g has a RULE that mentions h, and
-    h has a RULE that mentions f
+********************************************************************* -}
 
-then we *must* choose f to be a loop breaker.  Example: see Note
-[Specialisation rules].
-
-In general, take the free variables of f's RHS, and augment it with
-all the variables reachable by RULES from those starting points.  That
-is the whole reason for computing rule_fv_env in occAnalBind.  (Of
-course we only consider free vars that are also binders in this Rec
-group.)  See also Note [Finding rule RHS free vars]
-
-Note that when we compute this rule_fv_env, we only consider variables
-free in the *RHS* of the rule, in contrast to the way we build the
-Rec group in the first place (Note [Rule dependency info])
-
-Note that if 'g' has RHS that mentions 'w', we should add w to
-g's loop-breaker edges.  More concretely there is an edge from f -> g
-iff
-        (a) g is mentioned in f's RHS `xor` f's INLINE rhs
-            (see Note [Inline rules])
-        (b) or h is mentioned in f's RHS, and
-            g appears in the RHS of an active RULE of h
-            or a transitive sequence of active rules starting with h
-
-Why "active rules"?  See Note [Finding rule RHS free vars]
-
-Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
-chosen as a loop breaker, because their RHSs don't mention each other.
-And indeed both can be inlined safely.
-
-Note again that the edges of the graph we use for computing loop breakers
-are not the same as the edges we use for computing the Rec blocks.
-That's why we compute
-
-- rec_edges          for the Rec block analysis
-- loop_breaker_nodes for the loop breaker analysis
-
-  * Note [Finding rule RHS free vars]
-    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    Consider this real example from Data Parallel Haskell
-         tagZero :: Array Int -> Array Tag
-         {-# INLINE [1] tagZeroes #-}
-         tagZero xs = pmap (\x -> fromBool (x==0)) xs
-
-         {-# RULES "tagZero" [~1] forall xs n.
-             pmap fromBool <blah blah> = tagZero xs #-}
-    So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
-    However, tagZero can only be inlined in phase 1 and later, while
-    the RULE is only active *before* phase 1.  So there's no problem.
-
-    To make this work, we look for the RHS free vars only for
-    *active* rules. That's the reason for the occ_rule_act field
-    of the OccEnv.
-
-  * Note [Weak loop breakers]
-    ~~~~~~~~~~~~~~~~~~~~~~~~~
-    There is a last nasty wrinkle.  Suppose we have
-
-        Rec { f = f_rhs
-              RULE f [] = g
-
-              h = h_rhs
-              g = h
-              ...more...
-        }
-
-    Remember that we simplify the RULES before any RHS (see Note
-    [Rules are visible in their own rec group] above).
-
-    So we must *not* postInlineUnconditionally 'g', even though
-    its RHS turns out to be trivial.  (I'm assuming that 'g' is
-    not chosen as a loop breaker.)  Why not?  Because then we
-    drop the binding for 'g', which leaves it out of scope in the
-    RULE!
+type ImpRuleEdges = IdEnv [(Activation, VarSet)]
+    -- Mapping from a local Id 'f' to info about its IMP-RULES,
+    -- i.e. /local/ rules for an imported Id that mention 'f' on the LHS
+    -- We record (a) its Activation and (b) the RHS free vars
+    -- See Note [IMP-RULES: local rules for imported functions]
 
-    Here's a somewhat different example of the same thing
-        Rec { g = h
-            ; h = ...f...
-            ; f = f_rhs
-              RULE f [] = g }
-    Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
-    g, because the RULE for f is active throughout.  So the RHS of h
-    might rewrite to     h = ...g...
-    So g must remain in scope in the output program!
+noImpRuleEdges :: ImpRuleEdges
+noImpRuleEdges = emptyVarEnv
 
-    We "solve" this by:
+lookupImpRules :: ImpRuleEdges -> Id -> [(Activation,VarSet)]
+lookupImpRules imp_rule_edges bndr
+  = case lookupVarEnv imp_rule_edges bndr of
+      Nothing -> []
+      Just vs -> vs
+
+impRulesScopeUsage :: [(Activation,VarSet)] -> UsageDetails
+-- Variable mentioned in RHS of an IMP-RULE for the bndr,
+-- whether active or not
+impRulesScopeUsage imp_rules_info
+  = foldr add emptyDetails imp_rules_info
+  where
+    add (_,vs) usage = addManyOccs usage vs
 
-        Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
-        iff g is a "missing free variable" of the Rec group
+impRulesActiveFvs :: (Activation -> Bool) -> VarSet
+                  -> [(Activation,VarSet)] -> VarSet
+impRulesActiveFvs is_active bndr_set vs
+  = foldr add emptyVarSet vs `intersectVarSet` bndr_set
+  where
+    add (act,vs) acc | is_active act = vs `unionVarSet` acc
+                     | otherwise     = acc
 
-    A "missing free variable" x is one that is mentioned in an RHS or
-    INLINE or RULE of a binding in the Rec group, but where the
-    dependency on x may not show up in the loop_breaker_nodes (see
-    note [Choosing loop breakers} above).
+{- Note [IMP-RULES: local rules for imported functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We quite often have
+  * A /local/ rule
+  * for an /imported/ function
+like this:
+  foo x = blah
+  {-# RULE "map/foo" forall xs. map foo xs = xs #-}
+We call them IMP-RULES.  They are important in practice, and occur a
+lot in the libraries.
 
-    A normal "strong" loop breaker has IAmLoopBreaker False.  So
+IMP-RULES are held in mg_rules of ModGuts, and passed in to
+occurAnalysePgm.
 
-                                    Inline  postInlineUnconditionally
-   strong   IAmLoopBreaker False    no      no
-   weak     IAmLoopBreaker True     yes     no
-            other                   yes     yes
+Main Invariant:
 
-    The **sole** reason for this kind of loop breaker is so that
-    postInlineUnconditionally does not fire.  Ugh.  (Typically it'll
-    inline via the usual callSiteInline stuff, so it'll be dead in the
-    next pass, so the main Ugh is the tiresome complication.)
+* Throughout, we treat an IMP-RULE that mentions 'f' on its LHS
+  just like a RULE for f.
 
-Note [Rules for imported functions]
+Note [IMP-RULES: unavoidable loops]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider this
    f = /\a. B.g a
@@ -428,14 +206,28 @@ B.g.  We could only spot such loops by exhaustively following
 unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
 f.
 
-Note that RULES for imported functions are important in practice; they
-occur a lot in the libraries.
-
 We regard this potential infinite loop as a *programmer* error.
 It's up the programmer not to write silly rules like
      RULE f x = f x
 and the example above is just a more complicated version.
 
+Note [Specialising imported functions] (referred to from Specialise)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For *automatically-generated* rules, the programmer can't be
+responsible for the "programmer error" in Note [IMP-RULES: unavoidable
+loops].  In particular, consider specialising a recursive function
+defined in another module.  If we specialise a recursive function B.g,
+we get
+  g_spec = .....(B.g Int).....
+  RULE B.g Int = g_spec
+Here, g_spec doesn't look recursive, but when the rule fires, it
+becomes so.  And if B.g was mutually recursive, the loop might not be
+as obvious as it is here.
+
+To avoid this,
+ * When specialising a function that is a loop breaker,
+   give a NOINLINE pragma to the specialised function
+
 Note [Preventing loops due to imported functions rules]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider:
@@ -501,56 +293,54 @@ And we are in an infinite loop again, except that this time the loop is producin
 infinitely large *term* (an unrolling of filter) and so the simplifier finally
 dies with "ticks exhausted"
 
-Because of this problem, we make a small change in the occurrence analyser
-designed to mark functions like "filter" as strong loop breakers on the basis that:
-  1. The RHS of filter mentions the local function "filterFB"
-  2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
+SOLUTION: we treat the rule "filterList" as an extra rule for 'filterFB'
+because it mentions 'filterFB' on the LHS.  This is the Main Invariant
+in Note [IMP-RULES: local rules for imported functions].
 
-So for each RULE for an *imported* function we are going to add
-dependency edges between the *local* FVS of the rule LHS and the
-*local* FVS of the rule RHS. We don't do anything special for RULES on
-local functions because the standard occurrence analysis stuff is
-pretty good at getting loop-breakerness correct there.
+So, during loop-breaker analysis:
 
-It is important to note that even with this extra hack we aren't always going to get
-things right. For example, it might be that the rule LHS mentions an imported Id,
-and another module has a RULE that can rewrite that imported Id to one of our local
-Ids.
+- for each active RULE for a local function 'f' we add an edge bewteen
+  'f' and the local FVs of the rule RHS
 
-Note [Specialising imported functions] (referred to from Specialise)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-BUT for *automatically-generated* rules, the programmer can't be
-responsible for the "programmer error" in Note [Rules for imported
-functions].  In particular, consider specialising a recursive function
-defined in another module.  If we specialise a recursive function B.g,
-we get
-         g_spec = .....(B.g Int).....
-         RULE B.g Int = g_spec
-Here, g_spec doesn't look recursive, but when the rule fires, it
-becomes so.  And if B.g was mutually recursive, the loop might
-not be as obvious as it is here.
+- for each active RULE for an *imported* function we add dependency
+  edges between the *local* FVS of the rule LHS and the *local* FVS of
+  the rule RHS.
 
-To avoid this,
- * When specialising a function that is a loop breaker,
-   give a NOINLINE pragma to the specialised function
+Even with this extra hack we aren't always going to get things
+right. For example, it might be that the rule LHS mentions an imported
+Id, and another module has a RULE that can rewrite that imported Id to
+one of our local Ids.
+
+Note [Plugin rules]
+~~~~~~~~~~~~~~~~~~~
+Conal Elliott (#11651) built a GHC plugin that added some
+BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
+do some domain-specific transformations that could not be expressed
+with an ordinary pattern-matching CoreRule.  But then we can't extract
+the dependencies (in imp_rule_edges) from ru_rhs etc, because a
+BuiltinRule doesn't have any of that stuff.
+
+So we simply assume that BuiltinRules have no dependencies, and filter
+them out from the imp_rule_edges comprehension.
 
 Note [Glomming]
 ~~~~~~~~~~~~~~~
-RULES for imported Ids can make something at the top refer to something at the bottom:
-        f = \x -> B.g (q x)
-        h = \y -> 3
+RULES for imported Ids can make something at the top refer to
+something at the bottom:
 
-        RULE:  B.g (q x) = h x
+        foo = ...(B.f @Int)...
+        $sf = blah
+        RULE:  B.f @Int = $sf
 
-Applying this rule makes f refer to h, although f doesn't appear to
-depend on h.  (And, as in Note [Rules for imported functions], the
-dependency might be more indirect. For example, f might mention C.t
-rather than B.g, where C.t eventually inlines to B.g.)
+Applying this rule makes foo refer to $sf, although foo doesn't appear to
+depend on $sf.  (And, as in Note [Rules for imported functions], the
+dependency might be more indirect. For example, foo might mention C.t
+rather than B.f, where C.t eventually inlines to B.f.)
 
 NOTICE that this cannot happen for rules whose head is a
 locally-defined function, because we accurately track dependencies
 through RULES.  It only happens for rules whose head is an imported
-function (B.g in the example above).
+function (B.f in the example above).
 
 Solution:
   - When simplifying, bring all top level identifiers into
@@ -565,6 +355,145 @@ Solution:
     then just glom all the bindings into a single Rec, so that
     the *next* iteration of the occurrence analyser will sort
     them all out.   This part happens in occurAnalysePgm.
+-}
+
+{-
+************************************************************************
+*                                                                      *
+                Bindings
+*                                                                      *
+************************************************************************
+
+Note [Recursive bindings: the grand plan]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we come across a binding group
+  Rec { x1 = r1; ...; xn = rn }
+we treat it like this (occAnalRecBind):
+
+1. Note [Forming Rec groups]
+   Occurrence-analyse each right hand side, and build a
+   "Details" for each binding to capture the results.
+   Wrap the details in a LetrecNode, ready for SCC analysis.
+   All this is done by makeNode.
+
+2. Do SCC-analysis on these Nodes:
+   - Each CyclicSCC will become a new Rec
+   - Each AcyclicSCC will become a new NonRec
+   The key property is that every free variable of a binding is
+   accounted for by the scope edges, so that when we are done
+   everything is still in scope.
+
+3. For each AcyclicSCC, just make a NonRec binding.
+
+4. For each CyclicSCC of the scope-edge SCC-analysis in (2), we
+   identify suitable loop-breakers to ensure that inlining terminates.
+   This is done by occAnalRec.
+
+   4a To do so we form a new set of Nodes, with the same details, but
+      different edges, the "loop-breaker nodes". The loop-breaker nodes
+      have both more and fewer dependencies than the scope edges
+      (see Note [Choosing loop breakers])
+
+      More edges: if f calls g, and g has an active rule that mentions h
+                 then we add an edge from f -> h
+
+      Fewer edges: we only include dependencies on active rules, on rule
+                   RHSs (not LHSs) and if there is an INLINE pragma only
+                   on the stable unfolding (and vice versa).  The scope
+                   edges must be much more inclusive.
+
+   4b. The "weak fvs" of a node are, by definition:
+       the scope fvs - the loop-breaker fvs
+       See Note [Weak loop breakers], and the nd_weak field of Details
+
+Note [Dead code]
+~~~~~~~~~~~~~~~~
+Dropping dead code for a cyclic Strongly Connected Component is done
+in a very simple way:
+
+        the entire SCC is dropped if none of its binders are mentioned
+        in the body; otherwise the whole thing is kept.
+
+The key observation is that dead code elimination happens after
+dependency analysis: so 'occAnalBind' processes SCCs instead of the
+original term's binding groups.
+
+Thus 'occAnalBind' does indeed drop 'f' in an example like
+
+        letrec f = ...g...
+               g = ...(...g...)...
+        in
+           ...g...
+
+when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
+'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
+'AcyclicSCC f', where 'body_usage' won't contain 'f'.
+
+------------------------------------------------------------
+Note [Forming Rec groups]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+The key point about the "Forming Rec groups" step is that it /preserves
+scoping/.  If 'x' is mentioned, it had better be bound somewhere.  So if
+we start with
+  Rec { f = ...h...
+      ; g = ...f...
+      ; h = ...f... }
+we can split into SCCs
+  Rec { f = ...h...
+      ; h = ..f... }
+  NonRec { g = ...f... }
+
+We put bindings {f = ef; g = eg } in a Rec group if "f uses g" and "g
+uses f", no matter how indirectly.  We do a SCC analysis with an edge
+f -> g if "f mentions g". That is, g is free in:
+  a) the rhs 'ef'
+  b) or the RHS of a rule for f, whether active or inactive
+       Note [Rules are extra RHSs]
+  c) or the LHS or a rule for f, whether active or inactive
+       Note [Rule dependency info]
+  d) the RHS of an /active/ local IMP-RULE
+       Note [IMP-RULES: local rules for imported functions]
+
+(b) and (c) apply regardless of the activation of the RULE, because even if
+the rule is inactive its free variables must be bound.  But (d) doesn't need
+to worry about this because IMP-RULES are always notionally at the bottom
+of the file.
+
+  * Note [Rules are extra RHSs]
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
+    keeps the specialised "children" alive.  If the parent dies
+    (because it isn't referenced any more), then the children will die
+    too (unless they are already referenced directly).
+
+    So in Example [eftInt], eftInt and eftIntFB will be put in the
+    same Rec, even though their 'main' RHSs are both non-recursive.
+
+    We must also include inactive rules, so that their free vars
+    remain in scope.
+
+  * Note [Rule dependency info]
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    The VarSet in a RuleInfo is used for dependency analysis in the
+    occurrence analyser.  We must track free vars in *both* lhs and rhs.
+    Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
+    Why both? Consider
+        x = y
+        RULE f x = v+4
+    Then if we substitute y for x, we'd better do so in the
+    rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
+    as well as 'v'
+
+  * Note [Rules are visible in their own rec group]
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    We want the rules for 'f' to be visible in f's right-hand side.
+    And we'd like them to be visible in other functions in f's Rec
+    group.  E.g. in Note [Specialisation rules] we want f' rule
+    to be visible in both f's RHS, and fs's RHS.
+
+    This means that we must simplify the RULEs first, before looking
+    at any of the definitions.  This is done by Simplify.simplRecBind,
+    when it calls addLetIdInfo.
 
 ------------------------------------------------------------
 Note [Inline rules]
@@ -724,6 +653,13 @@ propagate.
   This appears to be very rare in practice. TODO Perhaps we should gather
   statistics to be sure.
 
+Note [Unfoldings and join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We assume that anything in an unfolding occurs multiple times, since
+unfoldings are often copied (that's the whole point!). But we still
+need to track tail calls for the purpose of finding join points.
+
+
 ------------------------------------------------------------
 Note [Adjusting right-hand sides]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -794,45 +730,50 @@ occAnalNonRecBind env lvl imp_rule_edges bndr rhs body_usage
   = (body_usage, [])
 
   | otherwise                   -- It's mentioned in the body
-  = (body_usage' `andUDs` rhs_usage4, [NonRec final_bndr rhs'])
+  = (body_usage' `andUDs` rhs_usage, [NonRec final_bndr rhs'])
   where
     (body_usage', tagged_bndr) = tagNonRecBinder lvl body_usage bndr
-    occ                        = idOccInfo tagged_bndr
+    final_bndr = tagged_bndr `setIdUnfolding` unf'
+                             `setIdSpecialisation` mkRuleInfo rules'
+    rhs_usage = rhs_uds `andUDs` unf_uds `andUDs` rule_uds
 
     -- Get the join info from the *new* decision
     -- See Note [Join points and unfoldings/rules]
     mb_join_arity = willBeJoinId_maybe tagged_bndr
     is_join_point = isJust mb_join_arity
 
-    final_bndr = tagged_bndr `setIdUnfolding` unf'
-                             `setIdSpecialisation` mkRuleInfo rules'
-
+    --------- Right hand side ---------
     env1 | is_join_point    = env  -- See Note [Join point RHSs]
          | certainly_inline = env  -- See Note [Cascading inlines]
          | otherwise        = rhsCtxt env
 
     -- See Note [Sources of one-shot information]
     rhs_env = env1 { occ_one_shots = argOneShots dmd }
+    (rhs_uds, rhs') = occAnalRhs rhs_env NonRecursive mb_join_arity rhs
 
-    (rhs_usage1, rhs') = occAnalRhs rhs_env mb_join_arity rhs
-
-    -- Unfoldings
+    --------- Unfolding ---------
     -- See Note [Unfoldings and join points]
     unf = idUnfolding bndr
-    (unf_usage, unf') = occAnalUnfolding rhs_env mb_join_arity unf
-    rhs_usage2 = rhs_usage1 `andUDs` unf_usage
+    (unf_uds, unf') = occAnalUnfolding rhs_env NonRecursive mb_join_arity unf
 
-    -- Rules
+    --------- Rules ---------
     -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
-    rules_w_uds = occAnalRules rhs_env mb_join_arity bndr
-    rule_uds    = map (\(_, l, r) -> l `andUDs` r) rules_w_uds
-    rules'      = map fstOf3 rules_w_uds
-    rhs_usage3 = foldr andUDs rhs_usage2 rule_uds
-    rhs_usage4 = case lookupVarEnv imp_rule_edges bndr of
-                   Nothing -> rhs_usage3
-                   Just vs -> addManyOccs rhs_usage3 vs
-       -- See Note [Preventing loops due to imported functions rules]
-
+    rules_w_uds  = occAnalRules rhs_env mb_join_arity bndr
+    rules'       = map fstOf3 rules_w_uds
+    imp_rule_uds = impRulesScopeUsage (lookupImpRules imp_rule_edges bndr)
+         -- imp_rule_uds: consider
+         --     h = ...
+         --     g = ...
+         --     RULE map g = h
+         -- Then we want to ensure that h is in scope everwhere
+         -- that g is (since the RULE might turn g into h), so
+         -- we make g mention h.
+
+    rule_uds = foldr add_rule_uds imp_rule_uds rules_w_uds
+    add_rule_uds (_, l, r) uds = l `andUDs` r `andUDs` uds
+
+    ----------
+    occ = idOccInfo tagged_bndr
     certainly_inline -- See Note [Cascading inlines]
       = case occ of
           OneOcc { occ_in_lam = NotInsideLam, occ_n_br = 1 }
@@ -846,13 +787,13 @@ occAnalNonRecBind env lvl imp_rule_edges bndr rhs body_usage
 -----------------
 occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
                -> UsageDetails -> (UsageDetails, [CoreBind])
+-- For a recursive group, we
+--      * occ-analyse all the RHSs
+--      * compute strongly-connected components
+--      * feed those components to occAnalRec
+-- See Note [Recursive bindings: the grand plan]
 occAnalRecBind env lvl imp_rule_edges pairs body_usage
   = foldr (occAnalRec rhs_env lvl) (body_usage, []) sccs
-        -- For a recursive group, we
-        --      * occ-analyse all the RHSs
-        --      * compute strongly-connected components
-        --      * feed those components to occAnalRec
-        -- See Note [Recursive bindings: the grand plan]
   where
     sccs :: [SCC Details]
     sccs = {-# SCC "occAnalBind.scc" #-}
@@ -866,14 +807,6 @@ occAnalRecBind env lvl imp_rule_edges pairs body_usage
     bndr_set = mkVarSet bndrs
     rhs_env  = env `addInScope` bndrs
 
-{-
-Note [Unfoldings and join points]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We assume that anything in an unfolding occurs multiple times, since unfoldings
-are often copied (that's the whole point!). But we still need to track tail
-calls for the purpose of finding join points.
--}
 
 -----------------------------
 occAnalRec :: OccEnv -> TopLevelFlag
@@ -893,8 +826,8 @@ occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
      NonRec tagged_bndr rhs : binds)
   where
     (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
-    rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
-                              rhs_bndrs rhs_uds
+    rhs_uds'   = adjustRhsUsage NonRecursive (willBeJoinId_maybe tagged_bndr)
+                                rhs_bndrs rhs_uds
 
         -- The Rec case is the interesting one
         -- See Note [Recursive bindings: the grand plan]
@@ -910,15 +843,14 @@ occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
     (final_uds, Rec pairs : binds)
 
   where
-    bndrs    = map nd_bndr details_s
-    bndr_set = mkVarSet bndrs
+    bndrs = map nd_bndr details_s
 
     ------------------------------
-        -- See Note [Choosing loop breakers] for loop_breaker_nodes
+    -- Make the nodes for the loop-breaker analysis
+    -- See Note [Choosing loop breakers] for loop_breaker_nodes
     final_uds :: UsageDetails
     loop_breaker_nodes :: [LetrecNode]
-    (final_uds, loop_breaker_nodes)
-      = mkLoopBreakerNodes env lvl bndr_set body_uds details_s
+    (final_uds, loop_breaker_nodes) = mkLoopBreakerNodes env lvl body_uds details_s
 
     ------------------------------
     weak_fvs :: VarSet
@@ -927,8 +859,8 @@ occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
     ---------------------------
     -- Now reconstruct the cycle
     pairs :: [(Id,CoreExpr)]
-    pairs | isEmptyVarSet weak_fvs = reOrderNodes   0 bndr_set weak_fvs loop_breaker_nodes []
-          | otherwise              = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
+    pairs | isEmptyVarSet weak_fvs = reOrderNodes   0 weak_fvs loop_breaker_nodes []
+          | otherwise              = loopBreakNodes 0 weak_fvs loop_breaker_nodes []
           -- If weak_fvs is empty, the loop_breaker_nodes will include
           -- all the edges in the original scope edges [remember,
           -- weak_fvs is the difference between scope edges and
@@ -937,14 +869,151 @@ occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
           -- exactly that case
 
 
-------------------------------------------------------------------
---                 Loop breaking
-------------------------------------------------------------------
+{- *********************************************************************
+*                                                                      *
+                Loop breaking
+*                                                                      *
+********************************************************************* -}
+
+{- Note [Choosing loop breakers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Loop breaking is surprisingly subtle.  First read the section 4 of
+"Secrets of the GHC inliner".  This describes our basic plan.
+We avoid infinite inlinings by choosing loop breakers, and
+ensuring that a loop breaker cuts each loop.
+
+See also Note [Inlining and hs-boot files] in GHC.Core.ToIface, which
+deals with a closely related source of infinite loops.
+
+Fundamentally, we do SCC analysis on a graph.  For each recursive
+group we choose a loop breaker, delete all edges to that node,
+re-analyse the SCC, and iterate.
+
+But what is the graph?  NOT the same graph as was used for Note
+[Forming Rec groups]!  In particular, a RULE is like an equation for
+'f' that is *always* inlined if it is applicable.  We do *not* disable
+rules for loop-breakers.  It's up to whoever makes the rules to make
+sure that the rules themselves always terminate.  See Note [Rules for
+recursive functions] in GHC.Core.Opt.Simplify
+
+Hence, if
+    f's RHS (or its INLINE template if it has one) mentions g, and
+    g has a RULE that mentions h, and
+    h has a RULE that mentions f
+
+then we *must* choose f to be a loop breaker.  Example: see Note
+[Specialisation rules].
+
+In general, take the free variables of f's RHS, and augment it with
+all the variables reachable by RULES from those starting points.  That
+is the whole reason for computing rule_fv_env in occAnalBind.  (Of
+course we only consider free vars that are also binders in this Rec
+group.)  See also Note [Finding rule RHS free vars]
+
+Note that when we compute this rule_fv_env, we only consider variables
+free in the *RHS* of the rule, in contrast to the way we build the
+Rec group in the first place (Note [Rule dependency info])
+
+Note that if 'g' has RHS that mentions 'w', we should add w to
+g's loop-breaker edges.  More concretely there is an edge from f -> g
+iff
+        (a) g is mentioned in f's RHS `xor` f's INLINE rhs
+            (see Note [Inline rules])
+        (b) or h is mentioned in f's RHS, and
+            g appears in the RHS of an active RULE of h
+            or a /transitive sequence/ of /active rules/ starting with h
+
+Why "active rules"?  See Note [Finding rule RHS free vars]
+
+Why "transitive sequence"?  Because active rules apply
+unconditionallly, without checking loop-breaker-ness.
+See Note [Loop breaker dependencies].
+
+Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
+chosen as a loop breaker, because their RHSs don't mention each other.
+And indeed both can be inlined safely.
+
+Note again that the edges of the graph we use for computing loop breakers
+are not the same as the edges we use for computing the Rec blocks.
+That's why we use
+
+- makeNode             for the Rec block analysis
+- makeLoopBreakerNodes for the loop breaker analysis
+
+  * Note [Finding rule RHS free vars]
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Consider this real example from Data Parallel Haskell
+         tagZero :: Array Int -> Array Tag
+         {-# INLINE [1] tagZeroes #-}
+         tagZero xs = pmap (\x -> fromBool (x==0)) xs
+
+         {-# RULES "tagZero" [~1] forall xs n.
+             pmap fromBool <blah blah> = tagZero xs #-}
+    So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
+    However, tagZero can only be inlined in phase 1 and later, while
+    the RULE is only active *before* phase 1.  So there's no problem.
+
+    To make this work, we look for the RHS free vars only for
+    *active* rules. That's the reason for the occ_rule_act field
+    of the OccEnv.
+
+  * Note [Weak loop breakers]
+    ~~~~~~~~~~~~~~~~~~~~~~~~~
+    There is a last nasty wrinkle.  Suppose we have
+
+        Rec { f = f_rhs
+              RULE f [] = g
+
+              h = h_rhs
+              g = h
+              ...more...
+        }
+
+    Remember that we simplify the RULES before any RHS (see Note
+    [Rules are visible in their own rec group] above).
+
+    So we must *not* postInlineUnconditionally 'g', even though
+    its RHS turns out to be trivial.  (I'm assuming that 'g' is
+    not chosen as a loop breaker.)  Why not?  Because then we
+    drop the binding for 'g', which leaves it out of scope in the
+    RULE!
+
+    Here's a somewhat different example of the same thing
+        Rec { g = h
+            ; h = ...f...
+            ; f = f_rhs
+              RULE f [] = g }
+    Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
+    g, because the RULE for f is active throughout.  So the RHS of h
+    might rewrite to     h = ...g...
+    So g must remain in scope in the output program!
+
+    We "solve" this by:
+
+        Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
+        iff g is a "missing free variable" of the Rec group
+
+    A "missing free variable" x is one that is mentioned in an RHS or
+    INLINE or RULE of a binding in the Rec group, but where the
+    dependency on x may not show up in the loop_breaker_nodes (see
+    note [Choosing loop breakers} above).
+
+    A normal "strong" loop breaker has IAmLoopBreaker False.  So
+
+                                    Inline  postInlineUnconditionally
+   strong   IAmLoopBreaker False    no      no
+   weak     IAmLoopBreaker True     yes     no
+            other                   yes     yes
+
+    The **sole** reason for this kind of loop breaker is so that
+    postInlineUnconditionally does not fire.  Ugh.  (Typically it'll
+    inline via the usual callSiteInline stuff, so it'll be dead in the
+    next pass, so the main Ugh is the tiresome complication.)
+-}
 
 type Binding = (Id,CoreExpr)
 
 loopBreakNodes :: Int
-               -> VarSet        -- All binders
                -> VarSet        -- Binders whose dependencies may be "missing"
                                 -- See Note [Weak loop breakers]
                -> [LetrecNode]
@@ -968,7 +1037,7 @@ recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
 -}
 
 -- Return the bindings sorted into a plausible order, and marked with loop breakers.
-loopBreakNodes depth bndr_set weak_fvs nodes binds
+loopBreakNodes depth weak_fvs nodes binds
   = -- pprTrace "loopBreakNodes" (ppr nodes) $
     go (stronglyConnCompFromEdgedVerticesUniqR nodes)
   where
@@ -977,20 +1046,20 @@ loopBreakNodes depth bndr_set weak_fvs nodes binds
 
     loop_break_scc scc binds
       = case scc of
-          AcyclicSCC node  -> mk_non_loop_breaker weak_fvs node : binds
-          CyclicSCC nodes  -> reOrderNodes depth bndr_set weak_fvs nodes binds
+          AcyclicSCC node  -> nodeBinding (mk_non_loop_breaker weak_fvs) node : binds
+          CyclicSCC nodes  -> reOrderNodes depth weak_fvs nodes binds
 
 ----------------------------------
-reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
+reOrderNodes :: Int -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
     -- Choose a loop breaker, mark it no-inline,
     -- and call loopBreakNodes on the rest
-reOrderNodes _ _ _ []     _     = panic "reOrderNodes"
-reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
-reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
+reOrderNodes _ _ []     _     = panic "reOrderNodes"
+reOrderNodes _ _ [node] binds = nodeBinding mk_loop_breaker node : binds
+reOrderNodes depth weak_fvs (node : nodes) binds
   = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen
     --                               , text "chosen" <+> ppr chosen_nodes ]) $
-    loopBreakNodes new_depth bndr_set weak_fvs unchosen $
-    (map mk_loop_breaker chosen_nodes ++ binds)
+    loopBreakNodes new_depth weak_fvs unchosen $
+    (map (nodeBinding mk_loop_breaker) chosen_nodes ++ binds)
   where
     (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
                                                  (nd_score (node_payload node))
@@ -1002,20 +1071,24 @@ reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
         -- After two iterations (d=0, d=1) give up
         -- and approximate, returning to d=0
 
-mk_loop_breaker :: LetrecNode -> Binding
-mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
-  = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
+nodeBinding :: (Id -> Id) -> LetrecNode -> Binding
+nodeBinding set_id_occ (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
+  = (set_id_occ bndr, rhs)
+
+mk_loop_breaker :: Id -> Id
+mk_loop_breaker bndr
+  = bndr `setIdOccInfo` occ'
   where
+    occ'      = strongLoopBreaker { occ_tail = tail_info }
     tail_info = tailCallInfo (idOccInfo bndr)
 
-mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
+mk_non_loop_breaker :: VarSet -> Id -> Id
 -- See Note [Weak loop breakers]
-mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr
-                                                 , nd_rhs = rhs})
-  | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
-  | otherwise                  = (bndr, rhs)
+mk_non_loop_breaker weak_fvs bndr
+  | bndr `elemVarSet` weak_fvs = setIdOccInfo bndr occ'
+  | otherwise                  = bndr
   where
-    occ' = weakLoopBreaker { occ_tail = tail_info }
+    occ'      = weakLoopBreaker { occ_tail = tail_info }
     tail_info = tailCallInfo (idOccInfo bndr)
 
 ----------------------------------
@@ -1178,11 +1251,6 @@ ToDo: try using the occurrence info for the inline'd binder.
 ************************************************************************
 -}
 
-type ImpRuleEdges = IdEnv IdSet     -- Mapping from FVs of imported RULE LHSs to RHS FVs
-
-noImpRuleEdges :: ImpRuleEdges
-noImpRuleEdges = emptyVarEnv
-
 type LetrecNode = Node Unique Details  -- Node comes from Digraph
                                        -- The Unique key is gotten from the Id
 data Details
@@ -1209,7 +1277,8 @@ data Details
                                 -- dependencies might not be respected by loop_breaker_nodes
                                 -- See Note [Weak loop breakers]
 
-       , nd_active_rule_fvs :: IdSet   -- Free variables of the RHS of active RULES
+       , nd_active_rule_fvs :: IdSet    -- Variables bound in this Rec group that are free
+                                        -- in the RHS of an active rule for this bndr
 
        , nd_score :: NodeScore
   }
@@ -1220,7 +1289,7 @@ instance Outputable Details where
                   , text "uds =" <+> ppr (nd_uds nd)
                   , text "inl =" <+> ppr (nd_inl nd)
                   , text "weak =" <+> ppr (nd_weak nd)
-                  , text "rule =" <+> ppr (nd_active_rule_fvs nd)
+                  , text "rule_rvs =" <+> ppr (nd_active_rule_fvs nd)
                   , text "score =" <+> ppr (nd_score nd)
              ])
 
@@ -1241,7 +1310,9 @@ makeNode :: OccEnv -> ImpRuleEdges -> VarSet
          -> (Var, CoreExpr) -> LetrecNode
 -- See Note [Recursive bindings: the grand plan]
 makeNode env imp_rule_edges bndr_set (bndr, rhs)
-  = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs)
+  = DigraphNode { node_payload      = details
+                , node_key          = varUnique bndr
+                , node_dependencies = nonDetKeysUniqSet node_fvs }
     -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
     -- is still deterministic with edges in nondeterministic order as
     -- explained in Note [Deterministic SCC] in GHC.Data.Graph.Directed.
@@ -1249,7 +1320,7 @@ makeNode env imp_rule_edges bndr_set (bndr, rhs)
     details = ND { nd_bndr            = bndr'
                  , nd_rhs             = rhs'
                  , nd_rhs_bndrs       = bndrs'
-                 , nd_uds             = rhs_usage3
+                 , nd_uds             = rhs_usage
                  , nd_inl             = inl_fvs
                  , nd_weak            = node_fvs `minusVarSet` inl_fvs
                  , nd_active_rule_fvs = active_rule_fvs
@@ -1258,6 +1329,11 @@ makeNode env imp_rule_edges bndr_set (bndr, rhs)
     bndr' = bndr `setIdUnfolding`      unf'
                  `setIdSpecialisation` mkRuleInfo rules'
 
+    rhs_usage = rhs_uds `andUDs` unf_uds `andUDs` rule_uds
+                   -- Note [Rules are extra RHSs]
+                   -- Note [Rule dependency info]
+    node_fvs   = udFreeVars bndr_set rhs_usage
+
     -- Get join point info from the *current* decision
     -- We don't know what the new decision will be!
     -- Using the old decision at least allows us to
@@ -1265,66 +1341,68 @@ makeNode env imp_rule_edges bndr_set (bndr, rhs)
     -- See Note [Join points and unfoldings/rules]
     mb_join_arity = isJoinId_maybe bndr
 
+    --------- Right hand side ---------
     -- Constructing the edges for the main Rec computation
     -- See Note [Forming Rec groups]
-    (bndrs, body) = collectBinders rhs
-    rhs_env       = rhsCtxt env
-    (rhs_usage1, bndrs', body') = occAnalLamOrRhs rhs_env bndrs body
-    rhs'       = mkLams bndrs' body'
-    rhs_usage3 = foldr andUDs rhs_usage1 rule_uds
-                 `andUDs` unf_uds
-                   -- Note [Rules are extra RHSs]
-                   -- Note [Rule dependency info]
-    node_fvs   = udFreeVars bndr_set rhs_usage3
+    -- Do not use occAnalRhs because we don't yet know
+    -- the final answer for mb_join_arity
+    (bndrs, body)            = collectBinders rhs
+    rhs_env                  = rhsCtxt env
+    (rhs_uds, bndrs', body') = occAnalLamOrRhs rhs_env bndrs body
+    rhs'                     = mkLams bndrs' body'
+
+    --------- Unfolding ---------
+    -- See Note [Unfoldings and join points]
+    unf = realIdUnfolding bndr -- realIdUnfolding: Ignore loop-breaker-ness
+                               -- here because that is what we are setting!
+    (unf_uds, unf') = occAnalUnfolding rhs_env Recursive mb_join_arity unf
+    inl_uds | isStableUnfolding unf = unf_uds
+            | otherwise             = rhs_uds
+    inl_fvs = udFreeVars bndr_set inl_uds
+    -- inl_fvs: the vars that would become free if the function was inlined;
+    -- usually that means the RHS, unless the unfolding is a stable one.
+    -- Note: We could do this only for functions with an *active* unfolding
+    --       (returning emptyVarSet for an inactive one), but is_active
+    --       isn't the right thing (it tells about RULE activation),
+    --       so we'd need more plumbing
 
-    -- Finding the free variables of the rules
-    is_active = occ_rule_act env :: Activation -> Bool
 
+    --------- IMP-RULES --------
+    is_active     = occ_rule_act env :: Activation -> Bool
+    imp_rule_info = lookupImpRules imp_rule_edges bndr
+    imp_rule_uds  = impRulesScopeUsage imp_rule_info
+    imp_rule_fvs  = impRulesActiveFvs is_active bndr_set imp_rule_info
+
+    --------- All rules --------
     rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
     rules_w_uds = occAnalRules rhs_env mb_join_arity bndr
+    rules'      = map fstOf3 rules_w_uds
 
-    rules' = map fstOf3 rules_w_uds
-
-    rules_w_rhs_fvs :: [(Activation, VarSet)]    -- Find the RHS fvs
-    rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
-                               (lookupVarEnv imp_rule_edges bndr)
-      -- See Note [Preventing loops due to imported functions rules]
-                      [ (ru_act rule, udFreeVars bndr_set rhs_uds)
-                      | (rule, _, rhs_uds) <- rules_w_uds ]
-    rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds
-    active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
-                                        , is_active a]
+    rule_uds = foldr add_rule_uds imp_rule_uds rules_w_uds
+    add_rule_uds (_, l, r) uds = l `andUDs` r `andUDs` uds
 
-    -- Finding the usage details of the INLINE pragma (if any)
-    unf = realIdUnfolding bndr -- realIdUnfolding: Ignore loop-breaker-ness
-                               -- here because that is what we are setting!
-    (unf_uds, unf') = occAnalUnfolding rhs_env mb_join_arity unf
+    active_rule_fvs = foldr add_active_rule imp_rule_fvs rules_w_uds
+    add_active_rule (rule, _, rhs_uds) fvs
+      | is_active (ruleActivation rule)
+      = udFreeVars bndr_set rhs_uds `unionVarSet` fvs
+      | otherwise
+      = fvs
 
-    -- Find the "nd_inl" free vars; for the loop-breaker phase
-    -- These are the vars that would become free if the function
-    -- was inlinined; usually that means the RHS, unless the
-    -- unfolding is a stable one.
-    -- Note: We could do this only for functions with an *active* unfolding
-    --       (returning emptyVarSet for an inactive one), but is_active
-    --       isn't the right thing (it tells about RULE activation),
-    --       so we'd need more plumbing
-    inl_fvs | isStableUnfolding unf = udFreeVars bndr_set unf_uds
-            | otherwise             = udFreeVars bndr_set rhs_usage1
 
 mkLoopBreakerNodes :: OccEnv -> TopLevelFlag
-                   -> VarSet
                    -> UsageDetails   -- for BODY of let
                    -> [Details]
                    -> (UsageDetails, -- adjusted
                        [LetrecNode])
--- Does four things
+-- This function primarily creates the Nodes for the
+-- loop-breaker SCC analysis.  More specifically:
 --   a) tag each binder with its occurrence info
 --   b) add a NodeScore to each node
 --   c) make a Node with the right dependency edges for
 --      the loop-breaker SCC analysis
 --   d) adjust each RHS's usage details according to
 --      the binder's (new) shotness and join-point-hood
-mkLoopBreakerNodes env lvl bndr_set body_uds details_s
+mkLoopBreakerNodes env lvl body_uds details_s
   = (final_uds, zipWithEqual "mkLoopBreakerNodes" mk_lb_node details_s bndrs')
   where
     (final_uds, bndrs')
@@ -1334,28 +1412,46 @@ mkLoopBreakerNodes env lvl bndr_set body_uds details_s
                  <- details_s ]
 
     mk_lb_node nd@(ND { nd_bndr = old_bndr, nd_inl = inl_fvs }) new_bndr
-      = DigraphNode nd' (varUnique old_bndr) (nonDetKeysUniqSet lb_deps)
+      = DigraphNode { node_payload      = new_nd
+                    , node_key          = varUnique old_bndr
+                    , node_dependencies = nonDetKeysUniqSet lb_deps }
               -- It's OK to use nonDetKeysUniqSet here as
               -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
               -- in nondeterministic order as explained in
               -- Note [Deterministic SCC] in GHC.Data.Graph.Directed.
       where
-        nd'     = nd { nd_bndr = new_bndr, nd_score = score }
-        score   = nodeScore env new_bndr lb_deps nd
+        new_nd = nd { nd_bndr = new_bndr, nd_score = score }
+        score  = nodeScore env new_bndr lb_deps nd
         lb_deps = extendFvs_ rule_fv_env inl_fvs
-
+        -- See Note [Loop breaker dependencies]
 
     rule_fv_env :: IdEnv IdSet
-        -- Maps a variable f to the variables from this group
-        --      mentioned in RHS of active rules for f
-        -- Domain is *subset* of bound vars (others have no rule fvs)
-    rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
-    init_rule_fvs   -- See Note [Finding rule RHS free vars]
-      = [ (b, trimmed_rule_fvs)
-        | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
-        , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
-        , not (isEmptyVarSet trimmed_rule_fvs) ]
+    -- Maps a variable f to the variables from this group
+    --      reachable by a sequence of RULES starting with f
+    -- Domain is *subset* of bound vars (others have no rule fvs)
+    -- See Note [Finding rule RHS free vars]
+    -- Why transClosureFV?  See Note [Loop breaker dependencies]
+    rule_fv_env = transClosureFV $ mkVarEnv $
+                  [ (b, rule_fvs)
+                  | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
+                  , not (isEmptyVarSet rule_fvs) ]
+
+{- Note [Loop breaker dependencies]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The loop breaker dependencies of x in a recursive
+group { f1 = e1; ...; fn = en } are:
 
+- The "inline free variables" of f: the fi free in
+  either f's unfolding (if f has a stable unfolding)
+  of     f's RHS       (if not)
+
+- Any fi reachable from those inline free variables by a sequence
+  of RULE rewrites.  Remember, rule rewriting is not affected
+  by fi being a loop breaker, so we have to take the transitive
+  closure in case f is the only possible loop breaker in the loop.
+
+  Hence rule_fv_env.  We need only account for /active/ rules.
+-}
 
 ------------------------------------------
 nodeScore :: OccEnv
@@ -1567,29 +1663,31 @@ Hence the is_lb field of NodeScore
 ************************************************************************
 -}
 
-occAnalRhs :: OccEnv -> Maybe JoinArity
+occAnalRhs :: OccEnv -> RecFlag -> Maybe JoinArity
            -> CoreExpr   -- RHS
            -> (UsageDetails, CoreExpr)
-occAnalRhs env mb_join_arity rhs
+occAnalRhs env is_rec mb_join_arity rhs
   = case occAnalLamOrRhs env bndrs body of { (body_usage, bndrs', body') ->
-    let rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
+    let final_bndrs | isRec is_rec = bndrs'
+                    | otherwise    = markJoinOneShots mb_join_arity bndrs'
                -- For a /non-recursive/ join point we can mark all
                -- its join-lambda as one-shot; and it's a good idea to do so
 
         -- Final adjustment
-        rhs_usage = adjustRhsUsage mb_join_arity NonRecursive bndrs' body_usage
+        rhs_usage = adjustRhsUsage is_rec mb_join_arity final_bndrs body_usage
 
-    in (rhs_usage, rhs') }
+    in (rhs_usage, mkLams final_bndrs body') }
   where
     (bndrs, body) = collectBinders rhs
 
 occAnalUnfolding :: OccEnv
+                 -> RecFlag
                  -> Maybe JoinArity   -- See Note [Join points and unfoldings/rules]
                  -> Unfolding
                  -> (UsageDetails, Unfolding)
 -- Occurrence-analyse a stable unfolding;
 -- discard a non-stable one altogether.
-occAnalUnfolding env mb_join_arity unf
+occAnalUnfolding env is_rec mb_join_arity unf
   = case unf of
       unf@(CoreUnfolding { uf_tmpl = rhs, uf_src = src })
         | isStableSource src -> (usage,        unf')
@@ -1600,7 +1698,7 @@ occAnalUnfolding env mb_join_arity unf
               -- to guide its decisions.  It's ok to leave un-substituted
               -- expressions in the tree because all the variables that were in
               -- scope remain in scope; there is no cloning etc.
-          (usage, rhs') = occAnalRhs env mb_join_arity rhs
+          (usage, rhs') = occAnalRhs env is_rec mb_join_arity rhs
 
           unf' | noBinderSwaps env = unf -- Note [Unfoldings and rules]
                | otherwise         = unf { uf_tmpl = rhs' }
@@ -1897,7 +1995,7 @@ occAnalApp env (Var fun, args, ticks)
   --     This caused #18296
   | fun `hasKey` runRWKey
   , [t1, t2, arg]  <- args
-  , let (usage, arg') = occAnalRhs env (Just 1) arg
+  , let (usage, arg') = occAnalRhs env NonRecursive (Just 1) arg
   = (usage, mkTicks ticks $ mkApps (Var fun) [t1, t2, arg'])
 
 occAnalApp env (Var fun, args, ticks)
@@ -2233,6 +2331,7 @@ addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
   = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
 
+--------------------
 transClosureFV :: VarEnv VarSet -> VarEnv VarSet
 -- If (f,g), (g,h) are in the input, then (f,h) is in the output
 --                                   as well as (f,g), (g,h)
@@ -2643,7 +2742,10 @@ v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
 
 udFreeVars :: VarSet -> UsageDetails -> VarSet
 -- Find the subset of bndrs that are mentioned in uds
-udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
+udFreeVars bndrs ud = restrictFreeVars bndrs (ud_env ud)
+
+restrictFreeVars :: VarSet -> OccInfoEnv -> VarSet
+restrictFreeVars bndrs fvs = restrictUniqSetToUFM bndrs fvs
 
 {- Note [Do not mark CoVars as dead]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2712,20 +2814,20 @@ flattenUsageDetails ud
 
 -------------------
 -- See Note [Adjusting right-hand sides]
-adjustRhsUsage :: Maybe JoinArity -> RecFlag
+adjustRhsUsage :: RecFlag -> Maybe JoinArity
                -> [CoreBndr]     -- Outer lambdas, AFTER occ anal
                -> UsageDetails   -- From body of lambda
                -> UsageDetails
-adjustRhsUsage mb_join_arity rec_flag bndrs usage
-  = markAllInsideLamIf     (not one_shot)   $
+adjustRhsUsage is_rec mb_join_arity bndrs usage
+  = markAllInsideLamIf (not one_shot) $
     markAllNonTailIf (not exact_join) $
     usage
   where
     one_shot = case mb_join_arity of
                  Just join_arity
-                   | isRec rec_flag -> False
-                   | otherwise      -> all isOneShotBndr (drop join_arity bndrs)
-                 Nothing            -> all isOneShotBndr bndrs
+                   | isRec is_rec -> False
+                   | otherwise    -> all isOneShotBndr (drop join_arity bndrs)
+                 Nothing          -> all isOneShotBndr bndrs
 
     exact_join = exactJoin mb_join_arity bndrs
 
@@ -2806,7 +2908,7 @@ tagRecBinders lvl body_uds triples
      --    join-point-hood decision
      rhs_udss' = map adjust triples
      adjust (bndr, rhs_uds, rhs_bndrs)
-       = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
+       = adjustRhsUsage Recursive mb_join_arity rhs_bndrs rhs_uds
        where
          -- Can't use willBeJoinId_maybe here because we haven't tagged the
          -- binder yet (the tag depends on these adjustments!)
diff --git a/testsuite/tests/simplCore/should_compile/T18603.hs b/testsuite/tests/simplCore/should_compile/T18603.hs
new file mode 100644
index 0000000000..d85f77c66a
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T18603.hs
@@ -0,0 +1,29 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Test where
+
+import GHC.Base (build, foldr, id, Maybe(..))
+
+catMaybes :: [Maybe a] -> [a]
+catMaybes = mapMaybe id
+
+mapMaybe          :: (a -> Maybe b) -> [a] -> [b]
+mapMaybe _ []     = []
+mapMaybe f (x:xs) =
+ let rs = mapMaybe f xs in
+ case f x of
+  Nothing -> rs
+  Just r  -> r:rs
+{-# NOINLINE [1] mapMaybe #-}
+
+{-# RULES
+"mapMaybe"     [~1] forall f xs. mapMaybe f xs
+                    = build (\c n -> foldr (mapMaybeFB c f) n xs)
+"mapMaybeList" [1]  forall f. foldr (mapMaybeFB (:) f) [] = mapMaybe f
+  #-}
+
+{-# INLINE [0] mapMaybeFB #-} -- See Note [Inline FB functions] in GHC.List
+mapMaybeFB :: (b -> r -> r) -> (a -> Maybe b) -> a -> r -> r
+mapMaybeFB cons f x next = case f x of
+  Nothing -> next
+  Just r -> cons r next
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index d377cfd06b..1535e32253 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -333,6 +333,7 @@ test('T18347', normal, compile, ['-dcore-lint -O'])
 test('T18355', [ grep_errmsg(r'OneShot') ], compile, ['-O -ddump-simpl -dsuppress-uniques'])
 test('T18399', normal, compile, ['-dcore-lint -O'])
 test('T18589', normal, compile, ['-dcore-lint -O'])
+test('T18603', normal, compile, ['-dcore-lint -O'])
 
 # T18649 should /not/ generate a specialisation rule
 test('T18649', normal, compile, ['-O -ddump-rules -Wno-simplifiable-class-constraints'])
author	Simon Peyton Jones <simonpj@microsoft.com>	2020-08-27 16:11:57 +0100
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2020-09-22 05:37:59 -0400
commit	416bd50e58b23ad70813b18a913ca77a3ab6e936 (patch)
tree	64826e21ef4c9fa9beae9e20ee1e553540f15e3f
parent	6de40f83c53c3b1899f7b4912badbe98e4fbde88 (diff)
download	haskell-416bd50e58b23ad70813b18a913ca77a3ab6e936.tar.gz