1 files changed, 329 insertions, 133 deletions
diff --git a/compiler/simplCore/SimplUtils.hs b/compiler/simplCore/SimplUtils.hs
index e6e660b91f..ca1b9bd23d 100644
--- a/compiler/simplCore/SimplUtils.hs
+++ b/compiler/simplCore/SimplUtils.hs
@@ -17,8 +17,8 @@ module SimplUtils (
         simplEnvForGHCi, updModeForStableUnfoldings, updModeForRules,
 
         -- The continuation type
-        SimplCont(..), DupFlag(..),
-        isSimplified,
+        SimplCont(..), DupFlag(..), StaticEnv,
+        isSimplified, contIsStop,
         contIsDupable, contResultType, contHoleType,
         contIsTrivial, contArgs,
         countArgs,
@@ -30,13 +30,18 @@ module SimplUtils (
         addValArgTo, addCastTo, addTyArgTo,
         argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
 
-        abstractFloats
+        abstractFloats,
+
+        -- Utilities
+        isExitJoinId
     ) where
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import SimplEnv
-import CoreMonad        ( SimplifierMode(..), Tick(..) )
+import CoreMonad        ( SimplMode(..), Tick(..) )
 import DynFlags
 import CoreSyn
 import qualified CoreSubst
@@ -57,6 +62,7 @@ import DataCon          ( dataConWorkId, isNullaryRepDataCon )
 import VarSet
 import BasicTypes
 import Util
+import OrdList          ( isNilOL )
 import MonadUtils
 import Outputable
 import Pair
@@ -114,7 +120,7 @@ data SimplCont
   | ApplyToVal         -- (ApplyToVal arg K)[e] = K[ e arg ]
       { sc_dup  :: DupFlag      -- See Note [DupFlag invariants]
       , sc_arg  :: InExpr       -- The argument,
-      , sc_env  :: StaticEnv    --     and its static env
+      , sc_env  :: StaticEnv    -- see Note [StaticEnv invariant]
       , sc_cont :: SimplCont }
 
   | ApplyToTy          -- (ApplyToTy ty K)[e] = K[ e ty ]
@@ -127,7 +133,7 @@ data SimplCont
       { sc_dup  :: DupFlag        -- See Note [DupFlag invariants]
       , sc_bndr :: InId           -- case binder
       , sc_alts :: [InAlt]        -- Alternatives
-      , sc_env  :: StaticEnv      --   and their static environment
+      , sc_env  :: StaticEnv      -- See Note [StaticEnv invariant]
       , sc_cont :: SimplCont }
 
   -- The two strict forms have no DupFlag, because we never duplicate them
@@ -137,7 +143,7 @@ data SimplCont
       , sc_bndr  :: InId
       , sc_bndrs :: [InBndr]
       , sc_body  :: InExpr
-      , sc_env   :: StaticEnv
+      , sc_env   :: StaticEnv      -- See Note [StaticEnv invariant]
       , sc_cont  :: SimplCont }
 
   | StrictArg           -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
@@ -151,6 +157,8 @@ data SimplCont
         (Tickish Id)    -- Tick tickish <hole>
         SimplCont
 
+type StaticEnv = SimplEnv       -- Just the static part is relevant
+
 data DupFlag = NoDup       -- Unsimplified, might be big
              | Simplified  -- Simplified
              | OkToDup     -- Simplified and small
@@ -164,7 +172,25 @@ perhapsSubstTy dup env ty
   | isSimplified dup = ty
   | otherwise        = substTy env ty
 
-{-
+{- Note [StaticEnv invariant]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We pair up an InExpr or InAlts with a StaticEnv, which establishes the
+lexical scope for that InExpr.  When we simplify that InExpr/InAlts, we
+use
+  - Its captured StaticEnv
+  - Overriding its InScopeSet with the larger one at the
+    simplification point.
+
+Why override the InScopeSet?  Example:
+      (let y = ey in f) ex
+By the time we simplify ex, 'y' will be in scope.
+
+However the InScopeSet in the StaticEnv is not irrelevant: it should
+include all the free vars of applying the substitution to the InExpr.
+Reason: contHoleType uses perhapsSubstTy to apply the substitution to
+the expression, and that (rightly) gives ASSERT failures if the InScopeSet
+isn't big enough.
+
 Note [DupFlag invariants]
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 In both (ApplyToVal dup _ env k)
@@ -196,7 +222,7 @@ instance Outputable SimplCont where
     = (text "StrictArg" <+> ppr (ai_fun ai)) $$ ppr cont
   ppr (Select { sc_dup = dup, sc_bndr = bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
     = (text "Select" <+> ppr dup <+> ppr bndr) $$
-       ifPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
+       whenPprDebug (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont
 
 
 {- Note [The hole type in ApplyToTy]
@@ -345,6 +371,10 @@ contIsRhs (Stop _ RhsCtxt) = True
 contIsRhs _                = False
 
 -------------------
+contIsStop :: SimplCont -> Bool
+contIsStop (Stop {}) = True
+contIsStop _         = False
+
 contIsDupable :: SimplCont -> Bool
 contIsDupable (Stop {})                         = True
 contIsDupable (ApplyToTy  { sc_cont = k })      = contIsDupable k
@@ -419,23 +449,25 @@ contArgs cont
 
 
 -------------------
-mkArgInfo :: Id
+mkArgInfo :: SimplEnv
+          -> Id
           -> [CoreRule] -- Rules for function
           -> Int        -- Number of value args
           -> SimplCont  -- Context of the call
           -> ArgInfo
 
-mkArgInfo fun rules n_val_args call_cont
+mkArgInfo env fun rules n_val_args call_cont
   | n_val_args < idArity fun            -- Note [Unsaturated functions]
   = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
-            , ai_rules = fun_rules, ai_encl = False
+            , ai_rules = fun_rules
+            , ai_encl = False
             , ai_strs = vanilla_stricts
             , ai_discs = vanilla_discounts }
   | otherwise
   = ArgInfo { ai_fun = fun, ai_args = [], ai_type = fun_ty
             , ai_rules = fun_rules
-            , ai_encl = interestingArgContext rules call_cont
-            , ai_strs  = add_type_str fun_ty arg_stricts
+            , ai_encl  = interestingArgContext rules call_cont
+            , ai_strs  = arg_stricts
             , ai_discs = arg_discounts }
   where
     fun_ty = idType fun
@@ -453,7 +485,11 @@ mkArgInfo fun rules n_val_args call_cont
     vanilla_stricts  = repeat False
 
     arg_stricts
-      = case splitStrictSig (idStrictness fun) of
+      | not (sm_inline (seMode env))
+      = vanilla_stricts -- See Note [Do not expose strictness if sm_inline=False]
+      | otherwise
+      = add_type_str fun_ty $
+        case splitStrictSig (idStrictness fun) of
           (demands, result_info)
                 | not (demands `lengthExceeds` n_val_args)
                 ->      -- Enough args, use the strictness given.
@@ -475,26 +511,25 @@ mkArgInfo fun rules n_val_args call_cont
     add_type_str :: Type -> [Bool] -> [Bool]
     -- If the function arg types are strict, record that in the 'strictness bits'
     -- No need to instantiate because unboxed types (which dominate the strict
-    -- types) can't instantiate type variables.
-    -- add_type_str is done repeatedly (for each call); might be better
-    -- once-for-all in the function
+    --   types) can't instantiate type variables.
+    -- add_type_str is done repeatedly (for each call);
+    --   might be better once-for-all in the function
     -- But beware primops/datacons with no strictness
 
-    add_type_str
-      = go
-      where
-        go _ [] = []
-        go fun_ty strs            -- Look through foralls
-            | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty       -- Includes coercions
-            = go fun_ty' strs
-        go fun_ty (str:strs)      -- Add strict-type info
-            | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
-            = (str || Just False == isLiftedType_maybe arg_ty) : go fun_ty' strs
-               -- If the type is levity-polymorphic, we can't know whether it's
-               -- strict. isLiftedType_maybe will return Just False only when
-               -- we're sure the type is unlifted.
-        go _ strs
-            = strs
+    add_type_str _ [] = []
+    add_type_str fun_ty all_strs@(str:strs)
+      | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty        -- Add strict-type info
+      = (str || Just False == isLiftedType_maybe arg_ty)
+        : add_type_str fun_ty' strs
+          -- If the type is levity-polymorphic, we can't know whether it's
+          -- strict. isLiftedType_maybe will return Just False only when
+          -- we're sure the type is unlifted.
+
+      | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty
+      = add_type_str fun_ty' all_strs     -- Look through foralls
+
+      | otherwise
+      = all_strs
 
 {- Note [Unsaturated functions]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -504,6 +539,28 @@ Consider (test eyeball/inline4)
 where f has arity 2.  Then we do not want to inline 'x', because
 it'll just be floated out again.  Even if f has lots of discounts
 on its first argument -- it must be saturated for these to kick in
+
+Note [Do not expose strictness if sm_inline=False]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Trac #15163 showed a case in which we had
+
+  {-# INLINE [1] zip #-}
+  zip = undefined
+
+  {-# RULES "foo" forall as bs. stream (zip as bs) = ..blah... #-}
+
+If we expose zip's bottoming nature when simplifing the LHS of the
+RULE we get
+  {-# RULES "foo" forall as bs.
+                   stream (case zip of {}) = ..blah... #-}
+discarding the arguments to zip.  Usually this is fine, but on the
+LHS of a rule it's not, because 'as' and 'bs' are now not bound on
+the LHS.
+
+This is a pretty pathalogical example, so I'm not losing sleep over
+it, but the simplest solution was to check sm_inline; if it is False,
+which it is on the LHS of a rule (see updModeForRules), then don't
+make use of the strictness info for the function.
 -}
 
 
@@ -546,14 +603,31 @@ since we can just eliminate this case instead (x is in WHNF).  Similar
 applies when x is bound to a lambda expression.  Hence
 contIsInteresting looks for case expressions with just a single
 default case.
+
+Note [No case of case is boring]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we see
+   case f x of <alts>
+
+we'd usually treat the context as interesting, to encourage 'f' to
+inline.  But if case-of-case is off, it's really not so interesting
+after all, because we are unlikely to be able to push the case
+expression into the branches of any case in f's unfolding.  So, to
+reduce unnecessary code expansion, we just make the context look boring.
+This made a small compile-time perf improvement in perf/compiler/T6048,
+and it looks plausible to me.
 -}
 
-interestingCallContext :: SimplCont -> CallCtxt
+interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt
 -- See Note [Interesting call context]
-interestingCallContext cont
+interestingCallContext env cont
   = interesting cont
   where
-    interesting (Select {})     = CaseCtxt
+    interesting (Select {})
+       | sm_case_case (getMode env) = CaseCtxt
+       | otherwise                  = BoringCtxt
+       -- See Note [No case of case is boring]
+
     interesting (ApplyToVal {}) = ValAppCtxt
         -- Can happen if we have (f Int |> co) y
         -- If f has an INLINE prag we need to give it some
@@ -694,11 +768,11 @@ interestingArg env e = go env 0 e
 {-
 ************************************************************************
 *                                                                      *
-                  SimplifierMode
+                  SimplMode
 *                                                                      *
 ************************************************************************
 
-The SimplifierMode controls several switches; see its definition in
+The SimplMode controls several switches; see its definition in
 CoreMonad
         sm_rules      :: Bool     -- Whether RULES are enabled
         sm_inline     :: Bool     -- Whether inlining is enabled
@@ -708,19 +782,20 @@ CoreMonad
 
 simplEnvForGHCi :: DynFlags -> SimplEnv
 simplEnvForGHCi dflags
-  = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
-                           , sm_phase = InitialPhase
-                           , sm_rules = rules_on
+  = mkSimplEnv $ SimplMode { sm_names  = ["GHCi"]
+                           , sm_phase  = InitialPhase
+                           , sm_dflags = dflags
+                           , sm_rules  = rules_on
                            , sm_inline = False
                            , sm_eta_expand = eta_expand_on
-                           , sm_case_case = True }
+                           , sm_case_case  = True }
   where
     rules_on      = gopt Opt_EnableRewriteRules   dflags
     eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags
    -- Do not do any inlining, in case we expose some unboxed
    -- tuple stuff that confuses the bytecode interpreter
 
-updModeForStableUnfoldings :: Activation -> SimplifierMode -> SimplifierMode
+updModeForStableUnfoldings :: Activation -> SimplMode -> SimplMode
 -- See Note [Simplifying inside stable unfoldings]
 updModeForStableUnfoldings inline_rule_act current_mode
   = current_mode { sm_phase      = phaseFromActivation inline_rule_act
@@ -733,12 +808,12 @@ updModeForStableUnfoldings inline_rule_act current_mode
     phaseFromActivation (ActiveAfter _ n) = Phase n
     phaseFromActivation _                 = InitialPhase
 
-updModeForRules :: SimplifierMode -> SimplifierMode
+updModeForRules :: SimplMode -> SimplMode
 -- See Note [Simplifying rules]
 updModeForRules current_mode
-  = current_mode { sm_phase  = InitialPhase
-                 , sm_inline = False
-                 , sm_rules  = False
+  = current_mode { sm_phase      = InitialPhase
+                 , sm_inline     = False  -- See Note [Do not expose strictness if sm_inline=False]
+                 , sm_rules      = False
                  , sm_eta_expand = False }
 
 {- Note [Simplifying rules]
@@ -840,7 +915,7 @@ f when it is inlined.  So our conservative plan (implemented by
 updModeForStableUnfoldings) is this:
 
   -------------------------------------------------------------
-  When simplifying the RHS of an stable unfolding, set the phase
+  When simplifying the RHS of a stable unfolding, set the phase
   to the phase in which the stable unfolding first becomes active
   -------------------------------------------------------------
 
@@ -890,8 +965,8 @@ mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
 continuation.
 -}
 
-activeUnfolding :: SimplEnv -> Id -> Bool
-activeUnfolding env id
+activeUnfolding :: SimplMode -> Id -> Bool
+activeUnfolding mode id
   | isCompulsoryUnfolding (realIdUnfolding id)
   = True   -- Even sm_inline can't override compulsory unfoldings
   | otherwise
@@ -902,8 +977,6 @@ activeUnfolding env id
       --  (a) they are active
       --  (b) sm_inline says so, except that for stable unfoldings
       --                         (ie pragmas) we inline anyway
-  where
-    mode = getMode env
 
 getUnfoldingInRuleMatch :: SimplEnv -> InScopeEnv
 -- When matching in RULE, we want to "look through" an unfolding
@@ -928,13 +1001,11 @@ getUnfoldingInRuleMatch env
      | otherwise           = isActive (sm_phase mode) (idInlineActivation id)
 
 ----------------------
-activeRule :: SimplEnv -> Activation -> Bool
+activeRule :: SimplMode -> Activation -> Bool
 -- Nothing => No rules at all
-activeRule env
+activeRule mode
   | not (sm_rules mode) = \_ -> False     -- Rewriting is off
   | otherwise           = isActive (sm_phase mode)
-  where
-    mode = getMode env
 
 {-
 ************************************************************************
@@ -1017,7 +1088,7 @@ spectral/mandel/Mandel.hs, where the mandelset function gets a useful
 let-float if you inline windowToViewport
 
 However, as usual for Gentle mode, do not inline things that are
-inactive in the intial stages.  See Note [Gentle mode].
+inactive in the initial stages.  See Note [Gentle mode].
 
 Note [Stable unfoldings and preInlineUnconditionally]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1041,6 +1112,11 @@ want PreInlineUnconditionally to second-guess it.  A live example is
 Trac #3736.
     c.f. Note [Stable unfoldings and postInlineUnconditionally]
 
+NB: if the pragama is INLINEABLE, then we don't want to behave int
+this special way -- an INLINEABLE pragam just says to GHC "inline this
+if you like".  But if there is a unique occurrence, we want to inline
+the stable unfolding, not the RHS.
+
 Note [Top-level bottoming Ids]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Don't inline top-level Ids that are bottoming, even if they are used just
@@ -1054,31 +1130,45 @@ is a term (not a coercion) so we can't necessarily inline the latter in
 the former.
 -}
 
-preInlineUnconditionally :: DynFlags -> SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
+preInlineUnconditionally
+    :: SimplEnv -> TopLevelFlag -> InId
+    -> InExpr -> StaticEnv  -- These two go together
+    -> Maybe SimplEnv       -- Returned env has extended substitution
 -- Precondition: rhs satisfies the let/app invariant
 -- See Note [CoreSyn let/app invariant] in CoreSyn
 -- Reason: we don't want to inline single uses, or discard dead bindings,
 --         for unlifted, side-effect-ful bindings
-preInlineUnconditionally dflags env top_lvl bndr rhs
-  | not active                               = False
-  | isStableUnfolding (idUnfolding bndr)     = False -- Note [Stable unfoldings and preInlineUnconditionally]
-  | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
-  | not (gopt Opt_SimplPreInlining dflags)   = False
-  | isCoVar bndr                             = False -- Note [Do not inline CoVars unconditionally]
-  | otherwise = case idOccInfo bndr of
-                  IAmDead                    -> True -- Happens in ((\x.1) v)
-                  occ@OneOcc { occ_one_br = True }
-                                             -> try_once (occ_in_lam occ)
-                                                         (occ_int_cxt occ)
-                  _                          -> False
+preInlineUnconditionally env top_lvl bndr rhs rhs_env
+  | not pre_inline_unconditionally           = Nothing
+  | not active                               = Nothing
+  | isTopLevel top_lvl && isBottomingId bndr = Nothing -- Note [Top-level bottoming Ids]
+  | isCoVar bndr                             = Nothing -- Note [Do not inline CoVars unconditionally]
+  | isExitJoinId bndr                        = Nothing -- Note [Do not inline exit join points]
+                                                       -- in module Exitify
+  | not (one_occ (idOccInfo bndr))           = Nothing
+  | not (isStableUnfolding unf)              = Just (extend_subst_with rhs)
+
+  -- Note [Stable unfoldings and preInlineUnconditionally]
+  | isInlinablePragma inline_prag
+  , Just inl <- maybeUnfoldingTemplate unf   = Just (extend_subst_with inl)
+  | otherwise                                = Nothing
   where
-    mode = getMode env
-    active = isActive (sm_phase mode) act
-             -- See Note [pre/postInlineUnconditionally in gentle mode]
-    act = idInlineActivation bndr
-    try_once in_lam int_cxt     -- There's one textual occurrence
+    unf = idUnfolding bndr
+    extend_subst_with inl_rhs = extendIdSubst env bndr (mkContEx rhs_env inl_rhs)
+
+    one_occ IAmDead = True -- Happens in ((\x.1) v)
+    one_occ (OneOcc { occ_one_br = True      -- One textual occurrence
+                    , occ_in_lam = in_lam
+                    , occ_int_cxt = int_cxt })
         | not in_lam = isNotTopLevel top_lvl || early_phase
         | otherwise  = int_cxt && canInlineInLam rhs
+    one_occ _        = False
+
+    pre_inline_unconditionally = gopt Opt_SimplPreInlining (seDynFlags env)
+    mode   = getMode env
+    active = isActive (sm_phase mode) (inlinePragmaActivation inline_prag)
+             -- See Note [pre/postInlineUnconditionally in gentle mode]
+    inline_prag = idInlinePragma bndr
 
 -- Be very careful before inlining inside a lambda, because (a) we must not
 -- invalidate occurrence information, and (b) we want to avoid pushing a
@@ -1163,18 +1253,16 @@ story for now.
 -}
 
 postInlineUnconditionally
-    :: DynFlags -> SimplEnv -> TopLevelFlag
-    -> OutId            -- The binder (an InId would be fine too)
-                        --            (*not* a CoVar)
+    :: SimplEnv -> TopLevelFlag
+    -> OutId            -- The binder (*not* a CoVar), including its unfolding
     -> OccInfo          -- From the InId
     -> OutExpr
-    -> Unfolding
     -> Bool
 -- Precondition: rhs satisfies the let/app invariant
 -- See Note [CoreSyn let/app invariant] in CoreSyn
 -- Reason: we don't want to inline single uses, or discard dead bindings,
 --         for unlifted, side-effect-ful bindings
-postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
+postInlineUnconditionally env top_lvl bndr occ_info rhs
   | not active                  = False
   | isWeakLoopBreaker occ_info  = False -- If it's a loop-breaker of any kind, don't inline
                                         -- because it might be referred to "earlier"
@@ -1242,7 +1330,9 @@ postInlineUnconditionally dflags env top_lvl bndr occ_info rhs unfolding
 -- Alas!
 
   where
-    active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
+    unfolding = idUnfolding bndr
+    dflags    = seDynFlags env
+    active    = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
         -- See Note [pre/postInlineUnconditionally in gentle mode]
 
 {-
@@ -1278,7 +1368,7 @@ ones that are trivial):
 
 Note [Stable unfoldings and postInlineUnconditionally]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Do not do postInlineUnconditionally if the Id has an stable unfolding,
+Do not do postInlineUnconditionally if the Id has a stable unfolding,
 otherwise we lose the unfolding.  Example
 
      -- f has stable unfolding with rhs (e |> co)
@@ -1414,40 +1504,49 @@ because the latter is not well-kinded.
 ************************************************************************
 -}
 
-tryEtaExpandRhs :: SimplEnv -> RecFlag -> OutId -> OutExpr
-                -> SimplM (Arity, OutExpr)
+tryEtaExpandRhs :: SimplMode -> OutId -> OutExpr
+                -> SimplM (Arity, Bool, OutExpr)
 -- See Note [Eta-expanding at let bindings]
-tryEtaExpandRhs env is_rec bndr rhs
-  = do { dflags <- getDynFlags
-       ; (new_arity, new_rhs) <- try_expand dflags
+-- If tryEtaExpandRhs rhs = (n, is_bot, rhs') then
+--   (a) rhs' has manifest arity
+--   (b) if is_bot is True then rhs' applied to n args is guaranteed bottom
+tryEtaExpandRhs mode bndr rhs
+  | Just join_arity <- isJoinId_maybe bndr
+  = do { let (join_bndrs, join_body) = collectNBinders join_arity rhs
+       ; return (count isId join_bndrs, exprIsBottom join_body, rhs) }
+         -- Note [Do not eta-expand join points]
+         -- But do return the correct arity and bottom-ness, because
+         -- these are used to set the bndr's IdInfo (Trac #15517)
+
+  | otherwise
+  = do { (new_arity, is_bot, new_rhs) <- try_expand
 
        ; WARN( new_arity < old_id_arity,
                (text "Arity decrease:" <+> (ppr bndr <+> ppr old_id_arity
                 <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
                         -- Note [Arity decrease] in Simplify
-         return (new_arity, new_rhs) }
+         return (new_arity, is_bot, new_rhs) }
   where
-    try_expand dflags
+    try_expand
       | exprIsTrivial rhs
-      = return (exprArity rhs, rhs)
-
-      | sm_eta_expand (getMode env)      -- Provided eta-expansion is on
-      , let new_arity1 = findRhsArity dflags bndr rhs old_arity
-            new_arity2 = idCallArity bndr
-            new_arity  = max new_arity1 new_arity2
-      , new_arity > old_arity      -- And the current manifest arity isn't enough
-      = if is_rec == Recursive && isJoinId bndr
-           then WARN(True, text "Can't eta-expand recursive join point:" <+>
-                             ppr bndr)
-                return (old_arity, rhs)
-           else do { tick (EtaExpansion bndr)
-                   ; return (new_arity, etaExpand new_arity rhs) }
+      = return (exprArity rhs, False, rhs)
+
+      | sm_eta_expand mode      -- Provided eta-expansion is on
+      , new_arity > old_arity   -- And the current manifest arity isn't enough
+      = do { tick (EtaExpansion bndr)
+           ; return (new_arity, is_bot, etaExpand new_arity rhs) }
+
       | otherwise
-      = return (old_arity, rhs)
+      = return (old_arity, is_bot && new_arity == old_arity, rhs)
 
+    dflags       = sm_dflags mode
     old_arity    = exprArity rhs -- See Note [Do not expand eta-expand PAPs]
     old_id_arity = idArity bndr
 
+    (new_arity1, is_bot) = findRhsArity dflags bndr rhs old_arity
+    new_arity2 = idCallArity bndr
+    new_arity  = max new_arity1 new_arity2
+
 {-
 Note [Eta-expanding at let bindings]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1473,6 +1572,44 @@ because then 'genMap' will inline, and it really shouldn't: at least
 as far as the programmer is concerned, it's not applied to two
 arguments!
 
+Note [Do not eta-expand join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Similarly to CPR (see Note [Don't CPR join points] in WorkWrap), a join point
+stands well to gain from its outer binding's eta-expansion, and eta-expanding a
+join point is fraught with issues like how to deal with a cast:
+
+    let join $j1 :: IO ()
+             $j1 = ...
+             $j2 :: Int -> IO ()
+             $j2 n = if n > 0 then $j1
+                              else ...
+
+    =>
+
+    let join $j1 :: IO ()
+             $j1 = (\eta -> ...)
+                     `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
+                                 ~  IO ()
+             $j2 :: Int -> IO ()
+             $j2 n = (\eta -> if n > 0 then $j1
+                                       else ...)
+                     `cast` N:IO :: State# RealWorld -> (# State# RealWorld, ())
+                                 ~  IO ()
+
+The cast here can't be pushed inside the lambda (since it's not casting to a
+function type), so the lambda has to stay, but it can't because it contains a
+reference to a join point. In fact, $j2 can't be eta-expanded at all. Rather
+than try and detect this situation (and whatever other situations crop up!), we
+don't bother; again, any surrounding eta-expansion will improve these join
+points anyway, since an outer cast can *always* be pushed inside. By the time
+CorePrep comes around, the code is very likely to look more like this:
+
+    let join $j1 :: State# RealWorld -> (# State# RealWorld, ())
+             $j1 = (...) eta
+             $j2 :: Int -> State# RealWorld -> (# State# RealWorld, ())
+             $j2 = if n > 0 then $j1
+                            else (...) eta
+
 Note [Do not eta-expand PAPs]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We used to have old_arity = manifestArity rhs, which meant that we
@@ -1603,22 +1740,25 @@ new binding is abstracted.  Note that
     which is obviously bogus.
 -}
 
-abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
-abstractFloats main_tvs body_env body
+abstractFloats :: DynFlags -> TopLevelFlag -> [OutTyVar] -> SimplFloats
+              -> OutExpr -> SimplM ([OutBind], OutExpr)
+abstractFloats dflags top_lvl main_tvs floats body
   = ASSERT( notNull body_floats )
+    ASSERT( isNilOL (sfJoinFloats floats) )
     do  { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
         ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
   where
+    is_top_lvl  = isTopLevel top_lvl
     main_tv_set = mkVarSet main_tvs
-    body_floats = getFloatBinds body_env
-    empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
+    body_floats = letFloatBinds (sfLetFloats floats)
+    empty_subst = CoreSubst.mkEmptySubst (sfInScope floats)
 
     abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
     abstract subst (NonRec id rhs)
-      = do { (poly_id, poly_app) <- mk_poly tvs_here id
-           ; let poly_rhs = mkLams tvs_here rhs'
-                 subst'   = CoreSubst.extendIdSubst subst id poly_app
-           ; return (subst', (NonRec poly_id poly_rhs)) }
+      = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id
+           ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs'
+                 subst' = CoreSubst.extendIdSubst subst id poly_app
+           ; return (subst', NonRec poly_id2 poly_rhs) }
       where
         rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
 
@@ -1629,11 +1769,13 @@ abstractFloats main_tvs body_env body
                    exprSomeFreeVarsList isTyVar rhs'
 
     abstract subst (Rec prs)
-       = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
+       = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
             ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
-                  poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs)
-                              | rhs <- rhss]
-            ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
+                  poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
+                               | (poly_id, rhs) <- poly_ids `zip` rhss
+                               , let rhs' = CoreSubst.substExpr (text "abstract_floats")
+                                                                subst' rhs ]
+            ; return (subst', Rec poly_pairs) }
        where
          (ids,rhss) = unzip prs
                 -- For a recursive group, it's a bit of a pain to work out the minimal
@@ -1651,7 +1793,8 @@ abstractFloats main_tvs body_env body
                 -- Here, we must abstract 'x' over 'a'.
          tvs_here = toposortTyVars main_tvs
 
-    mk_poly tvs_here var
+    mk_poly1 :: [TyVar] -> Id -> SimplM (Id, CoreExpr)
+    mk_poly1 tvs_here var
       = do { uniq <- getUniqueM
            ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
                   poly_ty   = mkInvForAllTys tvs_here (idType var) -- But new type of course
@@ -1671,6 +1814,21 @@ abstractFloats main_tvs body_env body
                 -- the occurrences of x' will be just the occurrences originally
                 -- pinned on x.
 
+    mk_poly2 :: Id -> [TyVar] -> CoreExpr -> (Id, CoreExpr)
+    mk_poly2 poly_id tvs_here rhs
+      = (poly_id `setIdUnfolding` unf, poly_rhs)
+      where
+        poly_rhs = mkLams tvs_here rhs
+        unf = mkUnfolding dflags InlineRhs is_top_lvl False poly_rhs
+
+        -- We want the unfolding.  Consider
+        --      let
+        --            x = /\a. let y = ... in Just y
+        --      in body
+        -- Then we float the y-binding out (via abstractFloats and addPolyBind)
+        -- but 'x' may well then be inlined in 'body' in which case we'd like the
+        -- opportunity to inline 'y' too.
+
 {-
 Note [Abstract over coercions]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1785,7 +1943,7 @@ prepareAlts scrut case_bndr' alts
 mkCase tries these things
 
 * Note [Nerge nested cases]
-* Note [Elimiante identity case]
+* Note [Eliminate identity case]
 * Note [Scrutinee constant folding]
 
 Note [Merge Nested Cases]
@@ -1985,13 +2143,18 @@ mkCase1 dflags scrut bndr alts_ty alts = mkCase2 dflags scrut bndr alts_ty alts
 
 mkCase2 dflags scrut bndr alts_ty alts
   | -- See Note [Scrutinee Constant Folding]
-    case alts of  -- Not if there is just a DEFAULT alterantive
+    case alts of  -- Not if there is just a DEFAULT alternative
       [(DEFAULT,_,_)] -> False
       _               -> True
   , gopt Opt_CaseFolding dflags
   , Just (scrut', tx_con, mk_orig) <- caseRules dflags scrut
   = do { bndr' <- newId (fsLit "lwild") (exprType scrut')
-       ; alts' <- mapM  (tx_alt tx_con mk_orig bndr') alts
+
+       ; alts' <- mapMaybeM (tx_alt tx_con mk_orig bndr') alts
+                  -- mapMaybeM: discard unreachable alternatives
+                  -- See Note [Unreachable caseRules alternatives]
+                  -- in PrelRules
+
        ; mkCase3 dflags scrut' bndr' alts_ty $
          add_default (re_sort alts')
        }
@@ -2015,19 +2178,14 @@ mkCase2 dflags scrut bndr alts_ty alts
     -- to construct an expression equivalent to the original one, for use
     -- in the DEFAULT case
 
+    tx_alt :: (AltCon -> Maybe AltCon) -> (Id -> CoreExpr) -> Id
+           -> CoreAlt -> SimplM (Maybe CoreAlt)
     tx_alt tx_con mk_orig new_bndr (con, bs, rhs)
-      | DataAlt dc <- con', not (isNullaryRepDataCon dc)
-      = -- For non-nullary data cons we must invent some fake binders
-        -- See Note [caseRules for dataToTag] in PrelRules
-        do { us <- getUniquesM
-           ; let (ex_tvs, arg_ids) = dataConRepInstPat us dc
-                                         (tyConAppArgs (idType new_bndr))
-           ; return (con', ex_tvs ++ arg_ids, rhs') }
-      | otherwise
-      = return (con', [], rhs')
+      = case tx_con con of
+          Nothing   -> return Nothing
+          Just con' -> do { bs' <- mk_new_bndrs new_bndr con'
+                          ; return (Just (con', bs', rhs')) }
       where
-        con' = tx_con con
-
         rhs' | isDeadBinder bndr = rhs
              | otherwise         = bindNonRec bndr orig_val rhs
 
@@ -2036,23 +2194,61 @@ mkCase2 dflags scrut bndr alts_ty alts
                       LitAlt l   -> Lit l
                       DataAlt dc -> mkConApp2 dc (tyConAppArgs (idType bndr)) bs
 
+    mk_new_bndrs new_bndr (DataAlt dc)
+      | not (isNullaryRepDataCon dc)
+      = -- For non-nullary data cons we must invent some fake binders
+        -- See Note [caseRules for dataToTag] in PrelRules
+        do { us <- getUniquesM
+           ; let (ex_tvs, arg_ids) = dataConRepInstPat us dc
+                                        (tyConAppArgs (idType new_bndr))
+           ; return (ex_tvs ++ arg_ids) }
+    mk_new_bndrs _ _ = return []
 
     re_sort :: [CoreAlt] -> [CoreAlt]  -- Re-sort the alternatives to
     re_sort alts = sortBy cmpAlt alts  -- preserve the #case_invariants#
 
     add_default :: [CoreAlt] -> [CoreAlt]
-    -- TagToEnum may change a boolean True/False set of alternatives
-    -- to LitAlt 0#/1# alterantives.  But literal alternatives always
-    -- have a DEFAULT (I think).  So add it.
+    -- See Note [Literal cases]
     add_default ((LitAlt {}, bs, rhs) : alts) = (DEFAULT, bs, rhs) : alts
     add_default alts                          = alts
 
+{- Note [Literal cases]
+~~~~~~~~~~~~~~~~~~~~~~~
+If we have
+  case tagToEnum (a ># b) of
+     False -> e1
+     True  -> e2
+
+then caseRules for TagToEnum will turn it into
+  case tagToEnum (a ># b) of
+     0# -> e1
+     1# -> e2
+
+Since the case is exhaustive (all cases are) we can convert it to
+  case tagToEnum (a ># b) of
+     DEFAULT -> e1
+     1#      -> e2
+
+This may generate sligthtly better code (although it should not, since
+all cases are exhaustive) and/or optimise better.  I'm not certain that
+it's necessary, but currenty we do make this change.  We do it here,
+NOT in the TagToEnum rules (see "Beware" in Note [caseRules for tagToEnum]
+in PrelRules)
+-}
+
 --------------------------------------------------
 --      Catch-all
 --------------------------------------------------
 mkCase3 _dflags scrut bndr alts_ty alts
   = return (Case scrut bndr alts_ty alts)
 
+-- See Note [Exitification] and Note [Do not inline exit join points] in Exitify.hs
+-- This lives here (and not in Id) because occurrence info is only valid on
+-- InIds, so it's crucial that isExitJoinId is only called on freshly
+-- occ-analysed code. It's not a generic function you can call anywhere.
+isExitJoinId :: Var -> Bool
+isExitJoinId id = isJoinId id && isOneOcc (idOccInfo id) && occ_in_lam (idOccInfo id)
+
 {-
 Note [Dead binders]
 ~~~~~~~~~~~~~~~~~~~~