15 files changed, 1819 insertions, 1166 deletions
diff --git a/compiler/coreSyn/CoreArity.hs b/compiler/coreSyn/CoreArity.hs
index 3f429d1ad2..d15da87aac 100644
--- a/compiler/coreSyn/CoreArity.hs
+++ b/compiler/coreSyn/CoreArity.hs
@@ -18,6 +18,8 @@ module CoreArity (
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CoreSyn
 import CoreFVs
 import CoreUtils
@@ -521,61 +523,60 @@ mk_cheap_fn dflags cheap_app
 
 
 ----------------------
-findRhsArity :: DynFlags -> Id -> CoreExpr -> Arity -> Arity
+findRhsArity :: DynFlags -> Id -> CoreExpr -> Arity -> (Arity, Bool)
 -- This implements the fixpoint loop for arity analysis
 -- See Note [Arity analysis]
+-- If findRhsArity e = (n, is_bot) then
+--  (a) any application of e to <n arguments will not do much work,
+--      so it is safe to expand e  ==>  (\x1..xn. e x1 .. xn)
+--  (b) if is_bot=True, then e applied to n args is guaranteed bottom
 findRhsArity dflags bndr rhs old_arity
-  = go (rhsEtaExpandArity dflags init_cheap_app rhs)
+  = go (get_arity init_cheap_app)
        -- We always call exprEtaExpandArity once, but usually
        -- that produces a result equal to old_arity, and then
        -- we stop right away (since arities should not decrease)
        -- Result: the common case is that there is just one iteration
   where
+    is_lam = has_lam rhs
+
+    has_lam (Tick _ e) = has_lam e
+    has_lam (Lam b e)  = isId b || has_lam e
+    has_lam _          = False
+
     init_cheap_app :: CheapAppFun
     init_cheap_app fn n_val_args
       | fn == bndr = True   -- On the first pass, this binder gets infinite arity
       | otherwise  = isCheapApp fn n_val_args
 
-    go :: Arity -> Arity
-    go cur_arity
-      | cur_arity <= old_arity = cur_arity
-      | new_arity == cur_arity = cur_arity
+    go :: (Arity, Bool) -> (Arity, Bool)
+    go cur_info@(cur_arity, _)
+      | cur_arity <= old_arity = cur_info
+      | new_arity == cur_arity = cur_info
       | otherwise = ASSERT( new_arity < cur_arity )
 #if defined(DEBUG)
                     pprTrace "Exciting arity"
                        (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity
-                                                    , ppr rhs])
+                             , ppr rhs])
 #endif
-                    go new_arity
+                    go new_info
       where
-        new_arity = rhsEtaExpandArity dflags cheap_app rhs
+        new_info@(new_arity, _) = get_arity cheap_app
 
         cheap_app :: CheapAppFun
         cheap_app fn n_val_args
           | fn == bndr = n_val_args < cur_arity
           | otherwise  = isCheapApp fn n_val_args
 
--- ^ The Arity returned is the number of value args the
--- expression can be applied to without doing much work
-rhsEtaExpandArity :: DynFlags -> CheapAppFun -> CoreExpr -> Arity
--- exprEtaExpandArity is used when eta expanding
---      e  ==>  \xy -> e x y
-rhsEtaExpandArity dflags cheap_app e
-  = case (arityType env e) of
-      ATop (os:oss)
-        | isOneShotInfo os || has_lam e -> 1 + length oss
-                                   -- Don't expand PAPs/thunks
-                                   -- Note [Eta expanding thunks]
-        | otherwise       -> 0
-      ATop []             -> 0
-      ABot n              -> n
-  where
-    env = AE { ae_cheap_fn = mk_cheap_fn dflags cheap_app
-             , ae_ped_bot  = gopt Opt_PedanticBottoms dflags }
-
-    has_lam (Tick _ e) = has_lam e
-    has_lam (Lam b e)  = isId b || has_lam e
-    has_lam _          = False
+    get_arity :: CheapAppFun -> (Arity, Bool)
+    get_arity cheap_app
+      = case (arityType env rhs) of
+          ABot n -> (n, True)
+          ATop (os:oss) | isOneShotInfo os || is_lam
+                  -> (1 + length oss, False)    -- Don't expand PAPs/thunks
+          ATop _  -> (0,              False)    -- Note [Eta expanding thunks]
+       where
+         env = AE { ae_cheap_fn = mk_cheap_fn dflags cheap_app
+                  , ae_ped_bot  = gopt Opt_PedanticBottoms dflags }
 
 {-
 Note [Arity analysis]
@@ -936,7 +937,7 @@ etaExpand n orig_expr
           -- See Note [Eta expansion and source notes]
           (expr', args) = collectArgs expr
           (ticks, expr'') = stripTicksTop tickishFloatable expr'
-          sexpr = foldl App expr'' args
+          sexpr = foldl' App expr'' args
           retick expr = foldr mkTick expr ticks
 
                                 -- Abstraction    Application
@@ -1036,10 +1037,19 @@ mkEtaWW orig_n orig_expr in_scope orig_ty
        | n == 0
        = (getTCvInScope subst, reverse eis)
 
-       | Just (tv,ty') <- splitForAllTy_maybe ty
-       , let (subst', tv') = Type.substTyVarBndr subst tv
+       | Just (tcv,ty') <- splitForAllTy_maybe ty
+       , let (subst', tcv') = Type.substVarBndr subst tcv
+       = let ((n_subst, n_tcv), n_n)
+               -- We want to have at least 'n' lambdas at the top.
+               -- If tcv is a tyvar, it corresponds to one Lambda (/\).
+               --   And we won't reduce n.
+               -- If tcv is a covar, we could eta-expand the expr with one
+               --   lambda \co:ty. e co. In this case we generate a new variable
+               --   of the coercion type, update the scope, and reduce n by 1.
+               | isTyVar tcv = ((subst', tcv'), n)
+               | otherwise  = (freshEtaId n subst' (varType tcv'), n-1)
            -- Avoid free vars of the original expression
-       = go n subst' ty' (EtaVar tv' : eis)
+         in go n_n n_subst ty' (EtaVar n_tcv : eis)
 
        | Just (arg_ty, res_ty) <- splitFunTy_maybe ty
        , not (isTypeLevPoly arg_ty)
@@ -1122,8 +1132,8 @@ etaBodyForJoinPoint need_args body
       = (reverse rev_bs, e)
     go n ty subst rev_bs e
       | Just (tv, res_ty) <- splitForAllTy_maybe ty
-      , let (subst', tv') = Type.substTyVarBndr subst tv
-      = go (n-1) res_ty subst' (tv' : rev_bs) (e `App` Type (mkTyVarTy tv'))
+      , let (subst', tv') = Type.substVarBndr subst tv
+      = go (n-1) res_ty subst' (tv' : rev_bs) (e `App` varToCoreExpr tv')
       | Just (arg_ty, res_ty) <- splitFunTy_maybe ty
       , let (subst', b) = freshEtaId n subst arg_ty
       = go (n-1) res_ty subst' (b : rev_bs) (e `App` Var b)
diff --git a/compiler/coreSyn/CoreFVs.hs b/compiler/coreSyn/CoreFVs.hs
index f5343caf2b..bc54d26ad3 100644
--- a/compiler/coreSyn/CoreFVs.hs
+++ b/compiler/coreSyn/CoreFVs.hs
@@ -37,7 +37,6 @@ module CoreFVs (
         ruleRhsFreeVars, ruleFreeVars, rulesFreeVars,
         rulesFreeVarsDSet,
         ruleLhsFreeIds, ruleLhsFreeIdsList,
-        vectsFreeVars,
 
         expr_fvs,
 
@@ -60,6 +59,8 @@ module CoreFVs (
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CoreSyn
 import Id
 import IdInfo
@@ -350,7 +351,7 @@ orphNamesOfType (TyVarTy _)          = emptyNameSet
 orphNamesOfType (LitTy {})           = emptyNameSet
 orphNamesOfType (TyConApp tycon tys) = orphNamesOfTyCon tycon
                                        `unionNameSet` orphNamesOfTypes tys
-orphNamesOfType (ForAllTy bndr res)  = orphNamesOfType (binderKind bndr)
+orphNamesOfType (ForAllTy bndr res)  = orphNamesOfType (binderType bndr)
                                        `unionNameSet` orphNamesOfType res
 orphNamesOfType (FunTy arg res)      = unitNameSet funTyConName    -- NB!  See Trac #8535
                                        `unionNameSet` orphNamesOfType arg
@@ -365,8 +366,13 @@ orphNamesOfThings f = foldr (unionNameSet . f) emptyNameSet
 orphNamesOfTypes :: [Type] -> NameSet
 orphNamesOfTypes = orphNamesOfThings orphNamesOfType
 
+orphNamesOfMCo :: MCoercion -> NameSet
+orphNamesOfMCo MRefl    = emptyNameSet
+orphNamesOfMCo (MCo co) = orphNamesOfCo co
+
 orphNamesOfCo :: Coercion -> NameSet
-orphNamesOfCo (Refl _ ty)           = orphNamesOfType ty
+orphNamesOfCo (Refl ty)             = orphNamesOfType ty
+orphNamesOfCo (GRefl _ ty mco)      = orphNamesOfType ty `unionNameSet` orphNamesOfMCo mco
 orphNamesOfCo (TyConAppCo _ tc cos) = unitNameSet (getName tc) `unionNameSet` orphNamesOfCos cos
 orphNamesOfCo (AppCo co1 co2)       = orphNamesOfCo co1 `unionNameSet` orphNamesOfCo co2
 orphNamesOfCo (ForAllCo _ kind_co co)
@@ -377,20 +383,19 @@ orphNamesOfCo (AxiomInstCo con _ cos) = orphNamesOfCoCon con `unionNameSet` orph
 orphNamesOfCo (UnivCo p _ t1 t2)    = orphNamesOfProv p `unionNameSet` orphNamesOfType t1 `unionNameSet` orphNamesOfType t2
 orphNamesOfCo (SymCo co)            = orphNamesOfCo co
 orphNamesOfCo (TransCo co1 co2)     = orphNamesOfCo co1 `unionNameSet` orphNamesOfCo co2
-orphNamesOfCo (NthCo _ co)          = orphNamesOfCo co
+orphNamesOfCo (NthCo _ _ co)        = orphNamesOfCo co
 orphNamesOfCo (LRCo  _ co)          = orphNamesOfCo co
 orphNamesOfCo (InstCo co arg)       = orphNamesOfCo co `unionNameSet` orphNamesOfCo arg
-orphNamesOfCo (CoherenceCo co1 co2) = orphNamesOfCo co1 `unionNameSet` orphNamesOfCo co2
 orphNamesOfCo (KindCo co)           = orphNamesOfCo co
 orphNamesOfCo (SubCo co)            = orphNamesOfCo co
 orphNamesOfCo (AxiomRuleCo _ cs)    = orphNamesOfCos cs
+orphNamesOfCo (HoleCo _)            = emptyNameSet
 
 orphNamesOfProv :: UnivCoProvenance -> NameSet
 orphNamesOfProv UnsafeCoerceProv    = emptyNameSet
 orphNamesOfProv (PhantomProv co)    = orphNamesOfCo co
 orphNamesOfProv (ProofIrrelProv co) = orphNamesOfCo co
 orphNamesOfProv (PluginProv _)      = emptyNameSet
-orphNamesOfProv (HoleProv _)        = emptyNameSet
 
 orphNamesOfCos :: [Coercion] -> NameSet
 orphNamesOfCos = orphNamesOfThings orphNamesOfCo
@@ -513,17 +518,6 @@ put this 'f' in a Rec block, but will mark the binding as a non-rule loop
 breaker, which is perfectly inlinable.
 -}
 
--- |Free variables of a vectorisation declaration
-vectsFreeVars :: [CoreVect] -> VarSet
-vectsFreeVars = mapUnionVarSet vectFreeVars
-  where
-    vectFreeVars (Vect   _ rhs)   = fvVarSet $ filterFV isLocalId $ expr_fvs rhs
-    vectFreeVars (NoVect _)       = noFVs
-    vectFreeVars (VectType _ _ _) = noFVs
-    vectFreeVars (VectClass _)    = noFVs
-    vectFreeVars (VectInst _)     = noFVs
-      -- this function is only concerned with values, not types
-
 {-
 ************************************************************************
 *                                                                      *
@@ -535,14 +529,23 @@ The free variable pass annotates every node in the expression with its
 NON-GLOBAL free variables and type variables.
 -}
 
-type FVAnn = DVarSet
+type FVAnn = DVarSet  -- See Note [The FVAnn invariant]
+
+{- Note [The FVAnn invariant]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Invariant: a FVAnn, say S, is closed:
+  That is: if v is in S,
+           then freevars( v's type/kind ) is also in S
+-}
 
 -- | Every node in a binding group annotated with its
 -- (non-global) free variables, both Ids and TyVars, and type.
 type CoreBindWithFVs = AnnBind Id FVAnn
+
 -- | Every node in an expression annotated with its
 -- (non-global) free variables, both Ids and TyVars, and type.
-type CoreExprWithFVs  = AnnExpr Id FVAnn
+-- NB: see Note [The FVAnn invariant]
+type CoreExprWithFVs  = AnnExpr  Id FVAnn
 type CoreExprWithFVs' = AnnExpr' Id FVAnn
 
 -- | Every node in an expression annotated with its
@@ -696,12 +699,14 @@ freeVarsBind (Rec binds) body_fvs
     rhss2        = map freeVars rhss
     rhs_body_fvs = foldr (unionFVs . freeVarsOf) body_fvs rhss2
     binders_fvs  = fvDVarSet $ mapUnionFV bndrRuleAndUnfoldingFVs binders
+                   -- See Note [The FVAnn invariant]
     all_fvs      = rhs_body_fvs `unionFVs` binders_fvs
             -- The "delBinderFV" happens after adding the idSpecVars,
             -- since the latter may add some of the binders as fvs
 
 freeVars :: CoreExpr -> CoreExprWithFVs
--- ^ Annotate a 'CoreExpr' with its (non-global) free type and value variables at every tree node
+-- ^ Annotate a 'CoreExpr' with its (non-global) free type
+--   and value variables at every tree node.
 freeVars = go
   where
     go :: CoreExpr -> CoreExprWithFVs
@@ -709,7 +714,8 @@ freeVars = go
       | isLocalVar v = (aFreeVar v `unionFVs` ty_fvs, AnnVar v)
       | otherwise    = (emptyDVarSet,                 AnnVar v)
       where
-        ty_fvs = dVarTypeTyCoVars v  -- Do we need this?
+        ty_fvs = dVarTypeTyCoVars v
+                 -- See Note [The FVAnn invariant]
 
     go (Lit lit) = (emptyDVarSet, AnnLit lit)
     go (Lam b body)
@@ -719,6 +725,7 @@ freeVars = go
         body'@(body_fvs, _) = go body
         b_ty  = idType b
         b_fvs = tyCoVarsOfTypeDSet b_ty
+                -- See Note [The FVAnn invariant]
 
     go (App fun arg)
       = ( freeVarsOf fun' `unionFVs` freeVarsOf arg'
@@ -731,8 +738,8 @@ freeVars = go
       = ( (bndr `delBinderFV` alts_fvs)
            `unionFVs` freeVarsOf scrut2
            `unionFVs` tyCoVarsOfTypeDSet ty
-          -- don't need to look at (idType bndr)
-          -- b/c that's redundant with scrut
+          -- Don't need to look at (idType bndr)
+          -- because that's redundant with scrut
         , AnnCase scrut2 bndr ty alts2 )
       where
         scrut2 = go scrut
diff --git a/compiler/coreSyn/CoreLint.hs b/compiler/coreSyn/CoreLint.hs
index 2be1020674..21edba1241 100644
--- a/compiler/coreSyn/CoreLint.hs
+++ b/compiler/coreSyn/CoreLint.hs
@@ -11,7 +11,7 @@ A ``lint'' pass to check for Core correctness
 module CoreLint (
     lintCoreBindings, lintUnfolding,
     lintPassResult, lintInteractiveExpr, lintExpr,
-    lintAnnots,
+    lintAnnots, lintTypes,
 
     -- ** Debug output
     endPass, endPassIO,
@@ -21,6 +21,8 @@ module CoreLint (
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CoreSyn
 import CoreFVs
 import CoreUtils
@@ -64,10 +66,10 @@ import Demand ( splitStrictSig, isBotRes )
 import HscTypes
 import DynFlags
 import Control.Monad
-#if __GLASGOW_HASKELL__ > 710
 import qualified Control.Monad.Fail as MonadFail
-#endif
 import MonadUtils
+import Data.Foldable      ( toList )
+import Data.List.NonEmpty ( NonEmpty )
 import Data.Maybe
 import Pair
 import qualified GHC.LanguageExtensions as LangExt
@@ -200,8 +202,8 @@ points but not the RHSes of value bindings (thunks and functions).
 ************************************************************************
 
 These functions are not CoreM monad stuff, but they probably ought to
-be, and it makes a conveneint place.  place for them.  They print out
-stuff before and after core passes, and do Core Lint when necessary.
+be, and it makes a convenient place for them.  They print out stuff
+before and after core passes, and do Core Lint when necessary.
 -}
 
 endPass :: CoreToDo -> CoreProgram -> [CoreRule] -> CoreM ()
@@ -266,13 +268,13 @@ coreDumpFlag (CoreDoFloatOutwards {}) = Just Opt_D_verbose_core2core
 coreDumpFlag CoreLiberateCase         = Just Opt_D_verbose_core2core
 coreDumpFlag CoreDoStaticArgs         = Just Opt_D_verbose_core2core
 coreDumpFlag CoreDoCallArity          = Just Opt_D_dump_call_arity
+coreDumpFlag CoreDoExitify            = Just Opt_D_dump_exitify
 coreDumpFlag CoreDoStrictness         = Just Opt_D_dump_stranal
 coreDumpFlag CoreDoWorkerWrapper      = Just Opt_D_dump_worker_wrapper
 coreDumpFlag CoreDoSpecialising       = Just Opt_D_dump_spec
 coreDumpFlag CoreDoSpecConstr         = Just Opt_D_dump_spec
 coreDumpFlag CoreCSE                  = Just Opt_D_dump_cse
-coreDumpFlag CoreDoVectorisation      = Just Opt_D_dump_vect
-coreDumpFlag CoreDesugar              = Just Opt_D_dump_ds
+coreDumpFlag CoreDesugar              = Just Opt_D_dump_ds_preopt
 coreDumpFlag CoreDesugarOpt           = Just Opt_D_dump_ds
 coreDumpFlag CoreTidy                 = Just Opt_D_dump_simpl
 coreDumpFlag CorePrep                 = Just Opt_D_dump_prep
@@ -404,7 +406,8 @@ lintCoreBindings dflags pass local_in_scope binds
   where
     in_scope_set = mkInScopeSet (mkVarSet local_in_scope)
 
-    flags = LF { lf_check_global_ids = check_globals
+    flags = defaultLintFlags
+               { lf_check_global_ids = check_globals
                , lf_check_inline_loop_breakers = check_lbs
                , lf_check_static_ptrs = check_static_ptrs }
 
@@ -455,8 +458,16 @@ lintCoreBindings dflags pass local_in_scope binds
 *                                                                      *
 ************************************************************************
 
-We use this to check all unfoldings that come in from interfaces
-(it is very painful to catch errors otherwise):
+Note [Linting Unfoldings from Interfaces]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We use this to check all top-level unfoldings that come in from interfaces
+(it is very painful to catch errors otherwise).
+
+We do not need to call lintUnfolding on unfoldings that are nested within
+top-level unfoldings; they are linted when we lint the top-level unfolding;
+hence the `TopLevelFlag` on `tcPragExpr` in TcIface.
+
 -}
 
 lintUnfolding :: DynFlags
@@ -508,6 +519,11 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs)
        ; binder_ty <- applySubstTy (idType binder)
        ; ensureEqTys binder_ty ty (mkRhsMsg binder (text "RHS") ty)
 
+       -- If the binding is for a CoVar, the RHS should be (Coercion co)
+       -- See Note [CoreSyn type and coercion invariant] in CoreSyn
+       ; checkL (not (isCoVar binder) || isCoArg rhs)
+                (mkLetErr binder rhs)
+
        -- Check that it's not levity-polymorphic
        -- Do this first, because otherwise isUnliftedType panics
        -- Annoyingly, this duplicates the test in lintIdBdr,
@@ -520,7 +536,7 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs)
        ; checkL ( isJoinId binder
                || not (isUnliftedType binder_ty)
                || (isNonRec rec_flag && exprOkForSpeculation rhs)
-               || exprIsLiteralString rhs)
+               || exprIsTickedString rhs)
            (badBndrTyMsg binder (text "unlifted"))
 
         -- Check that if the binder is top-level or recursive, it's not
@@ -528,14 +544,14 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs)
         -- computation to perform, see Note [CoreSyn top-level string literals].
        ; checkL (not (isStrictId binder)
             || (isNonRec rec_flag && not (isTopLevel top_lvl_flag))
-            || exprIsLiteralString rhs)
+            || exprIsTickedString rhs)
            (mkStrictMsg binder)
 
         -- Check that if the binder is at the top level and has type Addr#,
         -- that it is a string literal, see
         -- Note [CoreSyn top-level string literals].
        ; checkL (not (isTopLevel top_lvl_flag && binder_ty `eqType` addrPrimTy)
-                 || exprIsLiteralString rhs)
+                 || exprIsTickedString rhs)
            (mkTopNonLitStrMsg binder)
 
        ; flags <- getLintFlags
@@ -548,6 +564,7 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs)
                                   (mkInvalidJoinPointMsg binder binder_ty)
 
        ; when (lf_check_inline_loop_breakers flags
+               && isStableUnfolding (realIdUnfolding binder)
                && isStrongLoopBreaker (idOccInfo binder)
                && isInlinePragma (idInlinePragma binder))
               (addWarnL (text "INLINE binder is (non-rule) loop breaker:" <+> ppr binder))
@@ -645,18 +662,11 @@ lintRhs _bndr rhs = fmap lf_check_static_ptrs getLintFlags >>= go
     go _ = markAllJoinsBad $ lintCoreExpr rhs
 
 lintIdUnfolding :: Id -> Type -> Unfolding -> LintM ()
-lintIdUnfolding bndr bndr_ty (CoreUnfolding { uf_tmpl = rhs, uf_src = src })
-  | isStableSource src
+lintIdUnfolding bndr bndr_ty uf
+  | isStableUnfolding uf
+  , Just rhs <- maybeUnfoldingTemplate uf
   = do { ty <- lintRhs bndr rhs
        ; ensureEqTys bndr_ty ty (mkRhsMsg bndr (text "unfolding") ty) }
-
-lintIdUnfolding bndr bndr_ty (DFunUnfolding { df_con = con, df_bndrs = bndrs
-                                            , df_args = args })
-  = do { ty <- lintBinders LambdaBind bndrs $ \ bndrs' ->
-               do { res_ty <- lintCoreArgs (dataConRepType con) args
-                  ; return (mkLamTypes bndrs' res_ty) }
-       ; ensureEqTys bndr_ty ty (mkRhsMsg bndr (text "dfun unfolding") ty) }
-
 lintIdUnfolding  _ _ _
   = return ()       -- Do not Lint unstable unfoldings, because that leads
                     -- to exponential behaviour; c.f. CoreFVs.idUnfoldingVars
@@ -703,8 +713,7 @@ lintCoreExpr (Cast expr co)
   = do { expr_ty <- markAllJoinsBad $ lintCoreExpr expr
        ; co' <- applySubstCo co
        ; (_, k2, from_ty, to_ty, r) <- lintCoercion co'
-       ; lintL (classifiesTypeWithValues k2)
-               (text "Target of cast not # or *:" <+> ppr co)
+       ; checkValueKind k2 (text "target of cast" <+> quotes (ppr co))
        ; lintRole co' Representational r
        ; ensureEqTys from_ty expr_ty (mkCastErr expr co' from_ty expr_ty)
        ; return to_ty }
@@ -787,13 +796,9 @@ lintCoreExpr e@(Case scrut var alt_ty alts) =
      ; (alt_ty, _) <- lintInTy alt_ty
      ; (var_ty, _) <- lintInTy (idType var)
 
-     -- See Note [No alternatives lint check]
-     ; when (null alts) $
-     do { checkL (not (exprIsHNF scrut))
-          (text "No alternatives for a case scrutinee in head-normal form:" <+> ppr scrut)
-        ; checkWarnL scrut_diverges
-          (text "No alternatives for a case scrutinee not known to diverge for sure:" <+> ppr scrut)
-        }
+     -- We used to try to check whether a case expression with no
+     -- alternatives was legitimate, but this didn't work.
+     -- See Note [No alternatives lint check] for details.
 
      -- See Note [Rules for floating-point comparisons] in PrelRules
      ; let isLitPat (LitAlt _, _ , _) = True
@@ -842,6 +847,7 @@ lintVarOcc :: Var -> Int -- Number of arguments (type or value) being passed
 lintVarOcc var nargs
   = do  { checkL (isNonCoVarId var)
                  (text "Non term variable" <+> ppr var)
+                 -- See CoreSyn Note [Variable occurrences in Core]
 
         -- Cneck that the type of the occurrence is the same
         -- as the type of the binding site
@@ -920,23 +926,46 @@ checkJoinOcc var n_args
 {-
 Note [No alternatives lint check]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Case expressions with no alternatives are odd beasts, and worth looking at
-in the linter (cf Trac #10180).  We check two things:
+Case expressions with no alternatives are odd beasts, and it would seem
+like they would worth be looking at in the linter (cf Trac #10180). We
+used to check two things:
 
-* exprIsHNF is false: certainly, it would be terribly wrong if the
-  scrutinee was already in head normal form.
+* exprIsHNF is false: it would *seem* to be terribly wrong if
+  the scrutinee was already in head normal form.
 
 * exprIsBottom is true: we should be able to see why GHC believes the
   scrutinee is diverging for sure.
 
-In principle, the first check is redundant: exprIsBottom == True will
-always imply exprIsHNF == False.  But the first check is reliable: If
-exprIsHNF == True, then there definitely is a problem (exprIsHNF errs
-on the right side).  If the second check triggers then it may be the
-case that the compiler got smarter elsewhere, and the empty case is
-correct, but that exprIsBottom is unable to see it. In particular, the
-empty-type check in exprIsBottom is an approximation. Therefore, this
-check is not fully reliable, and we keep both around.
+It was already known that the second test was not entirely reliable.
+Unfortunately (Trac #13990), the first test turned out not to be reliable
+either. Getting the checks right turns out to be somewhat complicated.
+
+For example, suppose we have (comment 8)
+
+  data T a where
+    TInt :: T Int
+
+  absurdTBool :: T Bool -> a
+  absurdTBool v = case v of
+
+  data Foo = Foo !(T Bool)
+
+  absurdFoo :: Foo -> a
+  absurdFoo (Foo x) = absurdTBool x
+
+GHC initially accepts the empty case because of the GADT conditions. But then
+we inline absurdTBool, getting
+
+  absurdFoo (Foo x) = case x of
+
+x is in normal form (because the Foo constructor is strict) but the
+case is empty. To avoid this problem, GHC would have to recognize
+that matching on Foo x is already absurd, which is not so easy.
+
+More generally, we don't really know all the ways that GHC can
+lose track of why an expression is bottom, so we shouldn't make too
+much fuss when that happens.
+
 
 Note [Beta redexes]
 ~~~~~~~~~~~~~~~~~~~
@@ -1092,7 +1121,7 @@ checkCaseAlts e ty alts =
   where
     (con_alts, maybe_deflt) = findDefault alts
 
-        -- Check that successive alternatives have increasing tags
+        -- Check that successive alternatives have strictly increasing tags
     increasing_tag (alt1 : rest@( alt2 : _)) = alt1 `ltAlt` alt2 && increasing_tag rest
     increasing_tag _                         = True
 
@@ -1244,6 +1273,19 @@ lintIdBndr top_lvl bind_site id linterF
 %************************************************************************
 -}
 
+lintTypes :: DynFlags
+          -> [TyCoVar]   -- Treat these as in scope
+          -> [Type]
+          -> Maybe MsgDoc -- Nothing => OK
+lintTypes dflags vars tys
+  | isEmptyBag errs = Nothing
+  | otherwise       = Just (pprMessageBag errs)
+  where
+    in_scope = emptyInScopeSet
+    (_warns, errs) = initL dflags defaultLintFlags in_scope linter
+    linter = lintBinders LambdaBind vars $ \_ ->
+             mapM_ lintInTy tys
+
 lintInTy :: InType -> LintM (LintedType, LintedKind)
 -- Types only, not kinds
 -- Check the type, and apply the substitution to it
@@ -1252,7 +1294,9 @@ lintInTy ty
   = addLoc (InType ty) $
     do  { ty' <- applySubstTy ty
         ; k  <- lintType ty'
-        ; lintKind k
+        ; lintKind k  -- The kind returned by lintType is already
+                      -- a LintedKind but we also want to check that
+                      -- k :: *, which lintKind does
         ; return (ty', k) }
 
 checkTyCon :: TyCon -> LintM ()
@@ -1280,25 +1324,25 @@ lintType ty@(AppTy t1 t2)
        ; lint_ty_app ty k1 [(t2,k2)] }
 
 lintType ty@(TyConApp tc tys)
-  | Just ty' <- coreView ty
-  = lintType ty'   -- Expand type synonyms, so that we do not bogusly complain
-                   --  about un-saturated type synonyms
+  | isTypeSynonymTyCon tc
+  = do { report_unsat <- lf_report_unsat_syns <$> getLintFlags
+       ; lintTySynApp report_unsat ty tc tys }
 
-  -- We should never see a saturated application of funTyCon; such applications
-  -- should be represented with the FunTy constructor. See Note [Linting
-  -- function types] and Note [Representation of function types].
   | isFunTyCon tc
   , tys `lengthIs` 4
+    -- We should never see a saturated application of funTyCon; such
+    -- applications should be represented with the FunTy constructor.
+    -- See Note [Linting function types] and
+    -- Note [Representation of function types].
   = failWithL (hang (text "Saturated application of (->)") 2 (ppr ty))
 
-  | isTypeSynonymTyCon tc || isTypeFamilyTyCon tc
-       -- Also type synonyms and type families
+  | isTypeFamilyTyCon tc -- Check for unsaturated type family
   , tys `lengthLessThan` tyConArity tc
   = failWithL (hang (text "Un-saturated type application") 2 (ppr ty))
 
   | otherwise
   = do { checkTyCon tc
-       ; ks <- mapM lintType tys
+       ; ks <- setReportUnsat True (mapM lintType tys)
        ; lint_ty_app ty (tyConKind tc) (tys `zip` ks) }
 
 -- arrows can related *unlifted* kinds, so this has to be separate from
@@ -1308,28 +1352,83 @@ lintType ty@(FunTy t1 t2)
        ; k2 <- lintType t2
        ; lintArrow (text "type or kind" <+> quotes (ppr ty)) k1 k2 }
 
-lintType t@(ForAllTy (TvBndr tv _vis) ty)
-  = do { lintL (isTyVar tv) (text "Covar bound in type:" <+> ppr t)
-       ; lintTyBndr tv $ \tv' ->
-          do { k <- lintType ty
-             ; lintL (not (tv' `elemVarSet` tyCoVarsOfType k))
-                     (text "Variable escape in forall:" <+> ppr t)
-             ; lintL (classifiesTypeWithValues k)
-                     (text "Non-* and non-# kind in forall:" <+> ppr t)
-             ; return k }}
+lintType t@(ForAllTy (Bndr tv _vis) ty)
+  -- forall over types
+  | isTyVar tv
+  = do { lintTyBndr tv $ \tv' ->
+    do { k <- lintType ty
+       ; checkValueKind k (text "the body of forall:" <+> ppr t)
+       ; case occCheckExpand [tv'] k of  -- See Note [Stupid type synonyms]
+           Just k' -> return k'
+           Nothing -> failWithL (hang (text "Variable escape in forall:")
+                                    2 (vcat [ text "type:" <+> ppr t
+                                            , text "kind:" <+> ppr k ]))
+    }}
+
+lintType t@(ForAllTy (Bndr cv _vis) ty)
+  -- forall over coercions
+  = do { lintL (isCoVar cv)
+               (text "Non-Tyvar or Non-Covar bound in type:" <+> ppr t)
+       ; lintL (cv `elemVarSet` tyCoVarsOfType ty)
+               (text "Covar does not occur in the body:" <+> ppr t)
+       ; lintCoBndr cv $ \_ ->
+    do { k <- lintType ty
+       ; checkValueKind k (text "the body of forall:" <+> ppr t)
+       ; return liftedTypeKind
+           -- We don't check variable escape here. Namely, k could refer to cv'
+           -- See Note [NthCo and newtypes] in TyCoRep
+    }}
 
 lintType ty@(LitTy l) = lintTyLit l >> return (typeKind ty)
 
 lintType (CastTy ty co)
   = do { k1 <- lintType ty
        ; (k1', k2) <- lintStarCoercion co
-       ; ensureEqTys k1 k1' (mkCastErr ty co k1' k1)
+       ; ensureEqTys k1 k1' (mkCastTyErr ty co k1' k1)
        ; return k2 }
 
 lintType (CoercionTy co)
   = do { (k1, k2, ty1, ty2, r) <- lintCoercion co
        ; return $ mkHeteroCoercionType r k1 k2 ty1 ty2 }
 
+{- Note [Stupid type synonyms]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (Trac #14939)
+   type Alg cls ob = ob
+   f :: forall (cls :: * -> Constraint) (b :: Alg cls *). b
+
+Here 'cls' appears free in b's kind, which would usually be illegal
+(because in (forall a. ty), ty's kind should not mention 'a'). But
+#in this case (Alg cls *) = *, so all is well.  Currently we allow
+this, and make Lint expand synonyms where necessary to make it so.
+
+c.f. TcUnify.occCheckExpand and CoreUtils.coreAltsType which deal
+with the same problem. A single systematic solution eludes me.
+-}
+
+-----------------
+lintTySynApp :: Bool -> Type -> TyCon -> [Type] -> LintM LintedKind
+-- See Note [Linting type synonym applications]
+lintTySynApp report_unsat ty tc tys
+  | report_unsat   -- Report unsaturated only if report_unsat is on
+  , tys `lengthLessThan` tyConArity tc
+  = failWithL (hang (text "Un-saturated type application") 2 (ppr ty))
+
+  | otherwise
+  = do { ks <- setReportUnsat False (mapM lintType tys)
+
+       ; when report_unsat $
+         case expandSynTyCon_maybe tc tys of
+            Nothing -> pprPanic "lintTySynApp" (ppr tc <+> ppr tys)
+                       -- Previous guards should have made this impossible
+            Just (tenv, rhs, tys') -> do { _ <- lintType expanded_ty
+                                         ; return () }
+                where
+                  expanded_ty = mkAppTys (substTy (mkTvSubstPrs tenv) rhs) tys'
+
+       ; lint_ty_app ty (tyConKind tc) (tys `zip` ks) }
+
+-----------------
 lintKind :: OutKind -> LintM ()
 -- If you edit this function, you may need to update the GHC formalism
 -- See Note [GHC Formalism]
@@ -1338,13 +1437,15 @@ lintKind k = do { sk <- lintType k
                          (addErrL (hang (text "Ill-kinded kind:" <+> ppr k)
                                       2 (text "has kind:" <+> ppr sk))) }
 
--- confirms that a type is really *
-lintStar :: SDoc -> OutKind -> LintM ()
-lintStar doc k
+-----------------
+-- Confirms that a type is really *, #, Constraint etc
+checkValueKind :: OutKind -> SDoc -> LintM ()
+checkValueKind k doc
   = lintL (classifiesTypeWithValues k)
           (text "Non-*-like kind when *-like expected:" <+> ppr k $$
            text "when checking" <+> doc)
 
+-----------------
 lintArrow :: SDoc -> LintedKind -> LintedKind -> LintM LintedKind
 -- If you edit this function, you may need to update the GHC formalism
 -- See Note [GHC Formalism]
@@ -1359,6 +1460,7 @@ lintArrow what k1 k2   -- Eg lintArrow "type or kind `blah'" k1 k2
                   2 (text "in" <+> what)
              , what <+> text "kind:" <+> ppr k ]
 
+-----------------
 lint_ty_app :: Type -> LintedKind -> [(LintedType,LintedKind)] -> LintM LintedKind
 lint_ty_app ty k tys
   = lint_app (text "type" <+> quotes (ppr ty)) k tys
@@ -1390,23 +1492,28 @@ lint_app doc kfn kas
          -- Note [The substitution invariant] in TyCoRep
          ; foldlM (go_app in_scope) kfn kas }
   where
-    fail_msg = vcat [ hang (text "Kind application error in") 2 doc
-                    , nest 2 (text "Function kind =" <+> ppr kfn)
-                    , nest 2 (text "Arg kinds =" <+> ppr kas) ]
+    fail_msg extra = vcat [ hang (text "Kind application error in") 2 doc
+                          , nest 2 (text "Function kind =" <+> ppr kfn)
+                          , nest 2 (text "Arg kinds =" <+> ppr kas)
+                          , extra ]
 
-    go_app in_scope kfn ka
+    go_app in_scope kfn tka
       | Just kfn' <- coreView kfn
-      = go_app in_scope kfn' ka
+      = go_app in_scope kfn' tka
 
-    go_app _ (FunTy kfa kfb) (_,ka)
-      = do { unless (ka `eqType` kfa) (addErrL fail_msg)
+    go_app _ (FunTy kfa kfb) tka@(_,ka)
+      = do { unless (ka `eqType` kfa) $
+             addErrL (fail_msg (text "Fun:" <+> (ppr kfa $$ ppr tka)))
            ; return kfb }
 
-    go_app in_scope (ForAllTy (TvBndr kv _vis) kfn) (ta,ka)
-      = do { unless (ka `eqType` tyVarKind kv) (addErrL fail_msg)
-           ; return (substTyWithInScope in_scope [kv] [ta] kfn) }
+    go_app in_scope (ForAllTy (Bndr kv _vis) kfn) tka@(ta,ka)
+      = do { let kv_kind = varType kv
+           ; unless (ka `eqType` kv_kind) $
+             addErrL (fail_msg (text "Forall:" <+> (ppr kv $$ ppr kv_kind $$ ppr tka)))
+           ; return $ substTy (extendTCvSubst (mkEmptyTCvSubst in_scope) kv ta) kfn }
 
-    go_app _ _ _ = failWithL fail_msg
+    go_app _ kfn ka
+       = failWithL (fail_msg (text "Not a fun:" <+> (ppr kfn $$ ppr ka)))
 
 {- *********************************************************************
 *                                                                      *
@@ -1421,7 +1528,7 @@ lintCoreRule _ _ (BuiltinRule {})
 lintCoreRule fun fun_ty rule@(Rule { ru_name = name, ru_bndrs = bndrs
                                    , ru_args = args, ru_rhs = rhs })
   = lintBinders LambdaBind bndrs $ \ _ ->
-    do { lhs_ty <- foldM lintCoreArg fun_ty args
+    do { lhs_ty <- lintCoreArgs fun_ty args
        ; rhs_ty <- case isJoinId_maybe fun of
                      Just join_arity
                        -> do { checkL (args `lengthIs` join_arity) $
@@ -1431,7 +1538,8 @@ lintCoreRule fun fun_ty rule@(Rule { ru_name = name, ru_bndrs = bndrs
                      _ -> markAllJoinsBad $ lintCoreExpr rhs
        ; ensureEqTys lhs_ty rhs_ty $
          (rule_doc <+> vcat [ text "lhs type:" <+> ppr lhs_ty
-                            , text "rhs type:" <+> ppr rhs_ty ])
+                            , text "rhs type:" <+> ppr rhs_ty
+                            , text "fun_ty:" <+> ppr fun_ty ])
        ; let bad_bndrs = filter is_bad_bndr bndrs
 
        ; checkL (null bad_bndrs)
@@ -1519,8 +1627,8 @@ lintInCo co
 lintStarCoercion :: OutCoercion -> LintM (LintedType, LintedType)
 lintStarCoercion g
   = do { (k1, k2, t1, t2, r) <- lintCoercion g
-       ; lintStar (text "the kind of the left type in" <+> ppr g) k1
-       ; lintStar (text "the kind of the right type in" <+> ppr g) k2
+       ; checkValueKind k1 (text "the kind of the left type in" <+> ppr g)
+       ; checkValueKind k2 (text "the kind of the right type in" <+> ppr g)
        ; lintRole g Nominal r
        ; return (t1, t2) }
 
@@ -1530,15 +1638,28 @@ lintCoercion :: OutCoercion -> LintM (LintedKind, LintedKind, LintedType, Linted
 --
 -- If   lintCoercion co = (k1, k2, s1, s2, r)
 -- then co :: s1 ~r s2
---      s1 :: k2
+--      s1 :: k1
 --      s2 :: k2
 
 -- If you edit this function, you may need to update the GHC formalism
 -- See Note [GHC Formalism]
-lintCoercion (Refl r ty)
+lintCoercion (Refl ty)
+  = do { k <- lintType ty
+       ; return (k, k, ty, ty, Nominal) }
+
+lintCoercion (GRefl r ty MRefl)
   = do { k <- lintType ty
        ; return (k, k, ty, ty, r) }
 
+lintCoercion (GRefl r ty (MCo co))
+  = do { k <- lintType ty
+       ; (_, _, k1, k2, r') <- lintCoercion co
+       ; ensureEqTys k k1
+               (hang (text "GRefl coercion kind mis-match:" <+> ppr co)
+                   2 (vcat [ppr ty, ppr k, ppr k1]))
+       ; lintRole co Nominal r'
+       ; return (k1, k2, ty, mkCastTy ty co, r) }
+
 lintCoercion co@(TyConAppCo r tc cos)
   | tc `hasKey` funTyConKey
   , [_rep1,_rep2,_co1,_co2] <- cos
@@ -1559,7 +1680,7 @@ lintCoercion co@(TyConAppCo r tc cos)
 lintCoercion co@(AppCo co1 co2)
   | TyConAppCo {} <- co1
   = failWithL (text "TyConAppCo to the left of AppCo:" <+> ppr co)
-  | Refl _ (TyConApp {}) <- co1
+  | Just (TyConApp {}, _) <- isReflCo_maybe co1
   = failWithL (text "Refl (TyConApp ...) to the left of AppCo:" <+> ppr co)
   | otherwise
   = do { (k1,  k2,  s1, s2, r1) <- lintCoercion co1
@@ -1575,6 +1696,8 @@ lintCoercion co@(AppCo co1 co2)
 
 ----------
 lintCoercion (ForAllCo tv1 kind_co co)
+  -- forall over types
+  | isTyVar tv1
   = do { (_, k2) <- lintStarCoercion kind_co
        ; let tv2 = setTyVarKind tv1 k2
        ; addInScopeVar tv1 $
@@ -1594,6 +1717,37 @@ lintCoercion (ForAllCo tv1 kind_co co)
                    substTy subst t2
        ; return (k3, k4, tyl, tyr, r) } }
 
+lintCoercion (ForAllCo cv1 kind_co co)
+  -- forall over coercions
+  = ASSERT( isCoVar cv1 )
+    do { (_, k2) <- lintStarCoercion kind_co
+       ; let cv2 = setVarType cv1 k2
+       ; addInScopeVar cv1 $
+    do {
+       ; (k3, k4, t1, t2, r) <- lintCoercion co
+       ; checkValueKind k3 (text "the body of a ForAllCo over covar:" <+> ppr co)
+       ; checkValueKind k4 (text "the body of a ForAllCo over covar:" <+> ppr co)
+           -- See Note [Weird typing rule for ForAllTy] in Type
+       ; in_scope <- getInScope
+       ; let tyl   = mkTyCoInvForAllTy cv1 t1
+             r2    = coVarRole cv1
+             kind_co' = downgradeRole r2 Nominal kind_co
+             eta1  = mkNthCo r2 2 kind_co'
+             eta2  = mkNthCo r2 3 kind_co'
+             subst = mkCvSubst in_scope $
+                     -- We need both the free vars of the `t2` and the
+                     -- free vars of the range of the substitution in
+                     -- scope. All the free vars of `t2` and `kind_co` should
+                     -- already be in `in_scope`, because they've been
+                     -- linted and `cv2` has the same unique as `cv1`.
+                     -- See Note [The substitution invariant]
+                     unitVarEnv cv1 (eta1 `mkTransCo` (mkCoVarCo cv2)
+                                          `mkTransCo` (mkSymCo eta2))
+             tyr = mkTyCoInvForAllTy cv2 $
+                   substTy subst t2
+       ; return (liftedTypeKind, liftedTypeKind, tyl, tyr, r) } }
+                   -- See Note [Weird typing rule for ForAllTy] in Type
+
 lintCoercion co@(FunCo r co1 co2)
   = do { (k1,k'1,s1,t1,r1) <- lintCoercion co1
        ; (k2,k'2,s2,t2,r2) <- lintCoercion co2
@@ -1630,8 +1784,6 @@ lintCoercion co@(UnivCo prov r ty1 ty2)
                                     ; check_kinds kco k1 k2 }
 
            PluginProv _     -> return ()  -- no extra checks
-           HoleProv h       -> addErrL $
-                               text "Unfilled coercion hole:" <+> ppr h
 
        ; when (r /= Phantom && classifiesTypeWithValues k1
                             && classifiesTypeWithValues k2)
@@ -1668,8 +1820,8 @@ lintCoercion co@(UnivCo prov r ty1 ty2)
        = do { dflags <- getDynFlags
             ; checkWarnL (isUnBoxed rep1 == isUnBoxed rep2)
                          (report "between unboxed and boxed value")
-            ; checkWarnL (TyCon.primRepSizeW dflags rep1
-                           == TyCon.primRepSizeW dflags rep2)
+            ; checkWarnL (TyCon.primRepSizeB dflags rep1
+                           == TyCon.primRepSizeB dflags rep2)
                          (report "between unboxed values of different size")
             ; let fl = liftM2 (==) (TyCon.primRepIsFloat rep1)
                                    (TyCon.primRepIsFloat rep2)
@@ -1697,15 +1849,19 @@ lintCoercion co@(TransCo co1 co2)
        ; lintRole co r1 r2
        ; return (k1a, k2b, ty1a, ty2b, r1) }
 
-lintCoercion the_co@(NthCo n co)
+lintCoercion the_co@(NthCo r0 n co)
   = do { (_, _, s, t, r) <- lintCoercion co
        ; case (splitForAllTy_maybe s, splitForAllTy_maybe t) of
-         { (Just (tv_s, _ty_s), Just (tv_t, _ty_t))
-             |  n == 0
-             -> return (ks, kt, ts, tt, Nominal)
+         { (Just (tcv_s, _ty_s), Just (tcv_t, _ty_t))
+             -- works for both tyvar and covar
+             | n == 0
+             ,  (isForAllTy_ty s && isForAllTy_ty t)
+             || (isForAllTy_co s && isForAllTy_co t)
+             -> do { lintRole the_co Nominal r0
+                   ; return (ks, kt, ts, tt, r0) }
              where
-               ts = tyVarKind tv_s
-               tt = tyVarKind tv_t
+               ts = varType tcv_s
+               tt = varType tcv_t
                ks = typeKind ts
                kt = typeKind tt
 
@@ -1716,7 +1872,8 @@ lintCoercion the_co@(NthCo n co)
                  -- see Note [NthCo and newtypes] in TyCoRep
              , tys_s `equalLength` tys_t
              , tys_s `lengthExceeds` n
-             -> return (ks, kt, ts, tt, tr)
+             -> do { lintRole the_co tr r0
+                   ; return (ks, kt, ts, tt, r0) }
              where
                ts = getNth tys_s n
                tt = getNth tys_t n
@@ -1747,16 +1904,32 @@ lintCoercion (InstCo co arg)
        ; (k1',k2',s1,s2, r') <- lintCoercion arg
        ; lintRole arg Nominal r'
        ; in_scope <- getInScope
-       ; case (splitForAllTy_maybe t1', splitForAllTy_maybe t2') of
-          (Just (tv1,t1), Just (tv2,t2))
-            | k1' `eqType` tyVarKind tv1
-            , k2' `eqType` tyVarKind tv2
-            -> return (k3, k4,
-                       substTyWithInScope in_scope [tv1] [s1] t1,
-                       substTyWithInScope in_scope [tv2] [s2] t2, r)
-            | otherwise
-            -> failWithL (text "Kind mis-match in inst coercion")
-          _ -> failWithL (text "Bad argument of inst") }
+       ; case (splitForAllTy_ty_maybe t1', splitForAllTy_ty_maybe t2') of
+         -- forall over tvar
+         { (Just (tv1,t1), Just (tv2,t2))
+             | k1' `eqType` tyVarKind tv1
+             , k2' `eqType` tyVarKind tv2
+             -> return (k3, k4,
+                        substTyWithInScope in_scope [tv1] [s1] t1,
+                        substTyWithInScope in_scope [tv2] [s2] t2, r)
+             | otherwise
+             -> failWithL (text "Kind mis-match in inst coercion")
+         ; _ -> case (splitForAllTy_co_maybe t1', splitForAllTy_co_maybe t2') of
+         -- forall over covar
+         { (Just (cv1, t1), Just (cv2, t2))
+             | k1' `eqType` varType cv1
+             , k2' `eqType` varType cv2
+             , CoercionTy s1' <- s1
+             , CoercionTy s2' <- s2
+             -> do { return $
+                       (liftedTypeKind, liftedTypeKind
+                          -- See Note [Weird typing rule for ForAllTy] in Type
+                       , substTy (mkCvSubst in_scope $ unitVarEnv cv1 s1') t1
+                       , substTy (mkCvSubst in_scope $ unitVarEnv cv2 s2') t2
+                       , r) }
+             | otherwise
+             -> failWithL (text "Kind mis-match in inst coercion")
+         ; _ -> failWithL (text "Bad argument of inst") }}}
 
 lintCoercion co@(AxiomInstCo con ind cos)
   = do { unless (0 <= ind && ind < numBranches (coAxiomBranches con))
@@ -1797,12 +1970,6 @@ lintCoercion co@(AxiomInstCo con ind cos)
            ; return (extendTCvSubst subst_l ktv s',
                      extendTCvSubst subst_r ktv t') }
 
-lintCoercion (CoherenceCo co1 co2)
-  = do { (_, k2, t1, t2, r) <- lintCoercion co1
-       ; let lhsty = mkCastTy t1 co2
-       ; k1' <- lintType lhsty
-       ; return (k1', k2, lhsty, t2, r) }
-
 lintCoercion (KindCo co)
   = do { (k1, k2, _, _, _) <- lintCoercion co
        ; return (liftedTypeKind, liftedTypeKind, k1, k2, Nominal) }
@@ -1838,6 +2005,11 @@ lintCoercion this@(AxiomRuleCo co cs)
                           [ text "Expected:" <+> int (n + length es)
                           , text "Provided:" <+> int n ]
 
+lintCoercion (HoleCo h)
+  = do { addErrL $ text "Unfilled coercion hole:" <+> ppr h
+       ; lintCoercion (CoVarCo (coHoleCoVar h)) }
+
+
 ----------
 lintUnliftedCoVar :: CoVar -> LintM ()
 lintUnliftedCoVar cv
@@ -1870,8 +2042,8 @@ data LintEnv
 data LintFlags
   = LF { lf_check_global_ids           :: Bool -- See Note [Checking for global Ids]
        , lf_check_inline_loop_breakers :: Bool -- See Note [Checking for INLINE loop breakers]
-       , lf_check_static_ptrs          :: StaticPtrCheck
-                                             -- ^ See Note [Checking StaticPtrs]
+       , lf_check_static_ptrs :: StaticPtrCheck -- ^ See Note [Checking StaticPtrs]
+       , lf_report_unsat_syns :: Bool -- ^ See Note [Linting type synonym applications]
     }
 
 -- See Note [Checking StaticPtrs]
@@ -1888,6 +2060,7 @@ defaultLintFlags :: LintFlags
 defaultLintFlags = LF { lf_check_global_ids = False
                       , lf_check_inline_loop_breakers = True
                       , lf_check_static_ptrs = AllowAnywhere
+                      , lf_report_unsat_syns = True
                       }
 
 newtype LintM a =
@@ -1932,6 +2105,37 @@ rename type binders as we go, maintaining a substitution.
 The same substitution also supports let-type, current expressed as
         (/\(a:*). body) ty
 Here we substitute 'ty' for 'a' in 'body', on the fly.
+
+Note [Linting type synonym applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When lining a type-synonym application
+  S ty1 .. tyn
+we behave as follows (Trac #15057):
+
+* If lf_report_unsat_syns = True, and S has arity < n,
+  complain about an unsaturated type synonym.
+
+* Switch off lf_report_unsat_syns, and lint ty1 .. tyn.
+
+  Reason: catch out of scope variables or other ill-kinded gubbins,
+  even if S discards that argument entirely. E.g. (#15012):
+     type FakeOut a = Int
+     type family TF a
+     type instance TF Int = FakeOut a
+  Here 'a' is out of scope; but if we expand FakeOut, we conceal
+  that out-of-scope error.
+
+  Reason for switching off lf_report_unsat_syns: with
+  LiberalTypeSynonyms, GHC allows unsaturated synonyms provided they
+  are saturated when the type is expanded. Example
+     type T f = f Int
+     type S a = a -> a
+     type Z = T S
+  In Z's RHS, S appears unsaturated, but it is saturated when T is expanded.
+
+* If lf_report_unsat_syns is on, expand the synonym application and
+  lint the result.  Reason: want to check that synonyms are saturated
+  when the type is expanded.
 -}
 
 instance Functor LintM where
@@ -1942,17 +2146,15 @@ instance Applicative LintM where
       (<*>) = ap
 
 instance Monad LintM where
-  fail err = failWithL (text err)
+  fail = MonadFail.fail
   m >>= k  = LintM (\ env errs ->
                        let (res, errs') = unLintM m env errs in
                          case res of
                            Just r -> unLintM (k r) env errs'
                            Nothing -> (Nothing, errs'))
 
-#if __GLASGOW_HASKELL__ > 710
 instance MonadFail.MonadFail LintM where
     fail err = failWithL (text err)
-#endif
 
 instance HasDynFlags LintM where
   getDynFlags = LintM (\ e errs -> (Just (le_dynflags e), errs))
@@ -1982,6 +2184,13 @@ initL dflags flags in_scope m
              , le_loc = []
              , le_dynflags = dflags }
 
+setReportUnsat :: Bool -> LintM a -> LintM a
+-- Switch off lf_report_unsat_syns
+setReportUnsat ru thing_inside
+  = LintM $ \ env errs ->
+    let env' = env { le_flags = (le_flags env) { lf_report_unsat_syns = ru } }
+    in unLintM thing_inside env' errs
+
 getLintFlags :: LintM LintFlags
 getLintFlags = LintM $ \ env errs -> (Just (le_flags env), errs)
 
@@ -2017,10 +2226,9 @@ addMsg env msgs msg
    locs = le_loc env
    (loc, cxt1) = dumpLoc (head locs)
    cxts        = [snd (dumpLoc loc) | loc <- locs]
-   context     = sdocWithPprDebug $ \dbg -> if dbg
-                  then vcat (reverse cxts) $$ cxt1 $$
-                         text "Substitution:" <+> ppr (le_subst env)
-                  else cxt1
+   context     = ifPprDebug (vcat (reverse cxts) $$ cxt1 $$
+                             text "Substitution:" <+> ppr (le_subst env))
+                            cxt1
 
    mk_msg msg = mkLocMessage SevWarning (mkSrcSpan loc loc) (context $$ msg)
 
@@ -2342,14 +2550,32 @@ mkArityMsg binder
          ]
            where (StrictSig dmd_ty) = idStrictness binder
 -}
-mkCastErr :: Outputable casted => casted -> Coercion -> Type -> Type -> MsgDoc
-mkCastErr expr co from_ty expr_ty
-  = vcat [text "From-type of Cast differs from type of enclosed expression",
-          text "From-type:" <+> ppr from_ty,
-          text "Type of enclosed expr:" <+> ppr expr_ty,
-          text "Actual enclosed expr:" <+> ppr expr,
+mkCastErr :: CoreExpr -> Coercion -> Type -> Type -> MsgDoc
+mkCastErr expr = mk_cast_err "expression" "type" (ppr expr)
+
+mkCastTyErr :: Type -> Coercion -> Kind -> Kind -> MsgDoc
+mkCastTyErr ty = mk_cast_err "type" "kind" (ppr ty)
+
+mk_cast_err :: String -- ^ What sort of casted thing this is
+                      --   (\"expression\" or \"type\").
+            -> String -- ^ What sort of coercion is being used
+                      --   (\"type\" or \"kind\").
+            -> SDoc   -- ^ The thing being casted.
+            -> Coercion -> Type -> Type -> MsgDoc
+mk_cast_err thing_str co_str pp_thing co from_ty thing_ty
+  = vcat [from_msg <+> text "of Cast differs from" <+> co_msg
+            <+> text "of" <+> enclosed_msg,
+          from_msg <> colon <+> ppr from_ty,
+          text (capitalise co_str) <+> text "of" <+> enclosed_msg <> colon
+            <+> ppr thing_ty,
+          text "Actual" <+> enclosed_msg <> colon <+> pp_thing,
           text "Coercion used in cast:" <+> ppr co
          ]
+  where
+    co_msg, from_msg, enclosed_msg :: SDoc
+    co_msg       = text co_str
+    from_msg     = text "From-" <> co_msg
+    enclosed_msg = text "enclosed" <+> text thing_str
 
 mkBadUnivCoMsg :: LeftOrRight -> Coercion -> SDoc
 mkBadUnivCoMsg lr co
@@ -2431,15 +2657,15 @@ pprLeftOrRight :: LeftOrRight -> MsgDoc
 pprLeftOrRight CLeft  = text "left"
 pprLeftOrRight CRight = text "right"
 
-dupVars :: [[Var]] -> MsgDoc
+dupVars :: [NonEmpty Var] -> MsgDoc
 dupVars vars
   = hang (text "Duplicate variables brought into scope")
-       2 (ppr vars)
+       2 (ppr (map toList vars))
 
-dupExtVars :: [[Name]] -> MsgDoc
+dupExtVars :: [NonEmpty Name] -> MsgDoc
 dupExtVars vars
   = hang (text "Duplicate top-level variables with the same qualified name")
-       2 (ppr vars)
+       2 (ppr (map toList vars))
 
 {-
 ************************************************************************
diff --git a/compiler/coreSyn/TrieMap.hs b/compiler/coreSyn/CoreMap.hs
index a6b9db46cb..11f2fb1b11 100644
--- a/compiler/coreSyn/TrieMap.hs
+++ b/compiler/coreSyn/CoreMap.hs
@@ -9,7 +9,7 @@
 {-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE UndecidableInstances #-}
-module TrieMap(
+module CoreMap(
    -- * Maps over Core expressions
    CoreMap, emptyCoreMap, extendCoreMap, lookupCoreMap, foldCoreMap,
    -- * Maps over 'Type's
@@ -24,6 +24,8 @@ module TrieMap(
    ListMap,
    -- * Maps over 'Literal's
    LiteralMap,
+   -- * Map for compressing leaves. See Note [Compressed TrieMap]
+   GenMap,
    -- * 'TrieMap' class
    TrieMap(..), insertTM, deleteTM,
    lkDFreeVar, xtDFreeVar,
@@ -31,15 +33,15 @@ module TrieMap(
    (>.>), (|>), (|>>),
  ) where
 
+import GhcPrelude
+
+import TrieMap
 import CoreSyn
 import Coercion
-import Literal
 import Name
 import Type
 import TyCoRep
 import Var
-import UniqDFM
-import Unique( Unique )
 import FastString(FastString)
 import Util
 
@@ -51,386 +53,44 @@ import Outputable
 import Control.Monad( (>=>) )
 
 {-
-This module implements TrieMaps, which are finite mappings
-whose key is a structured value like a CoreExpr or Type.
+This module implements TrieMaps over Core related data structures
+like CoreExpr or Type. It is built on the Tries from the TrieMap
+module.
 
 The code is very regular and boilerplate-like, but there is
 some neat handling of *binders*.  In effect they are deBruijn
 numbered on the fly.
 
-The regular pattern for handling TrieMaps on data structures was first
-described (to my knowledge) in Connelly and Morris's 1995 paper "A
-generalization of the Trie Data Structure"; there is also an accessible
-description of the idea in Okasaki's book "Purely Functional Data
-Structures", Section 10.3.2
 
-************************************************************************
-*                                                                      *
-                   The TrieMap class
-*                                                                      *
-************************************************************************
 -}
 
-type XT a = Maybe a -> Maybe a  -- How to alter a non-existent elt (Nothing)
-                                --               or an existing elt (Just)
-
-class TrieMap m where
-   type Key m :: *
-   emptyTM  :: m a
-   lookupTM :: forall b. Key m -> m b -> Maybe b
-   alterTM  :: forall b. Key m -> XT b -> m b -> m b
-   mapTM    :: (a->b) -> m a -> m b
-
-   foldTM   :: (a -> b -> b) -> m a -> b -> b
-      -- The unusual argument order here makes
-      -- it easy to compose calls to foldTM;
-      -- see for example fdE below
-
-insertTM :: TrieMap m => Key m -> a -> m a -> m a
-insertTM k v m = alterTM k (\_ -> Just v) m
-
-deleteTM :: TrieMap m => Key m -> m a -> m a
-deleteTM k m = alterTM k (\_ -> Nothing) m
-
 ----------------------
 -- Recall that
 --   Control.Monad.(>=>) :: (a -> Maybe b) -> (b -> Maybe c) -> a -> Maybe c
 
-(>.>) :: (a -> b) -> (b -> c) -> a -> c
--- Reverse function composition (do f first, then g)
-infixr 1 >.>
-(f >.> g) x = g (f x)
-infixr 1 |>, |>>
-
-(|>) :: a -> (a->b) -> b     -- Reverse application
-x |> f = f x
-
-----------------------
-(|>>) :: TrieMap m2
-      => (XT (m2 a) -> m1 (m2 a) -> m1 (m2 a))
-      -> (m2 a -> m2 a)
-      -> m1 (m2 a) -> m1 (m2 a)
-(|>>) f g = f (Just . g . deMaybe)
-
-deMaybe :: TrieMap m => Maybe (m a) -> m a
-deMaybe Nothing  = emptyTM
-deMaybe (Just m) = m
-
-{-
-************************************************************************
-*                                                                      *
-                   IntMaps
-*                                                                      *
-************************************************************************
--}
-
-instance TrieMap IntMap.IntMap where
-  type Key IntMap.IntMap = Int
-  emptyTM = IntMap.empty
-  lookupTM k m = IntMap.lookup k m
-  alterTM = xtInt
-  foldTM k m z = IntMap.foldr k z m
-  mapTM f m = IntMap.map f m
-
-xtInt :: Int -> XT a -> IntMap.IntMap a -> IntMap.IntMap a
-xtInt k f m = IntMap.alter f k m
-
-instance Ord k => TrieMap (Map.Map k) where
-  type Key (Map.Map k) = k
-  emptyTM = Map.empty
-  lookupTM = Map.lookup
-  alterTM k f m = Map.alter f k m
-  foldTM k m z = Map.foldr k z m
-  mapTM f m = Map.map f m
-
-
-{-
-Note [foldTM determinism]
-~~~~~~~~~~~~~~~~~~~~~~~~~
-We want foldTM to be deterministic, which is why we have an instance of
-TrieMap for UniqDFM, but not for UniqFM. Here's an example of some things that
-go wrong if foldTM is nondeterministic. Consider:
-
-  f a b = return (a <> b)
-
-Depending on the order that the typechecker generates constraints you
-get either:
-
-  f :: (Monad m, Monoid a) => a -> a -> m a
-
-or:
-
-  f :: (Monoid a, Monad m) => a -> a -> m a
-
-The generated code will be different after desugaring as the dictionaries
-will be bound in different orders, leading to potential ABI incompatibility.
-
-One way to solve this would be to notice that the typeclasses could be
-sorted alphabetically.
-
-Unfortunately that doesn't quite work with this example:
-
-  f a b = let x = a <> a; y = b <> b in x
-
-where you infer:
-
-  f :: (Monoid m, Monoid m1) => m1 -> m -> m1
-
-or:
-
-  f :: (Monoid m1, Monoid m) => m1 -> m -> m1
-
-Here you could decide to take the order of the type variables in the type
-according to depth first traversal and use it to order the constraints.
-
-The real trouble starts when the user enables incoherent instances and
-the compiler has to make an arbitrary choice. Consider:
-
-  class T a b where
-    go :: a -> b -> String
-
-  instance (Show b) => T Int b where
-    go a b = show a ++ show b
-
-  instance (Show a) => T a Bool where
-    go a b = show a ++ show b
-
-  f = go 10 True
-
-GHC is free to choose either dictionary to implement f, but for the sake of
-determinism we'd like it to be consistent when compiling the same sources
-with the same flags.
-
-inert_dicts :: DictMap is implemented with a TrieMap. In getUnsolvedInerts it
-gets converted to a bag of (Wanted) Cts using a fold. Then in
-solve_simple_wanteds it's merged with other WantedConstraints. We want the
-conversion to a bag to be deterministic. For that purpose we use UniqDFM
-instead of UniqFM to implement the TrieMap.
-
-See Note [Deterministic UniqFM] in UniqDFM for more details on how it's made
-deterministic.
--}
-
-instance TrieMap UniqDFM where
-  type Key UniqDFM = Unique
-  emptyTM = emptyUDFM
-  lookupTM k m = lookupUDFM m k
-  alterTM k f m = alterUDFM f m k
-  foldTM k m z = foldUDFM k z m
-  mapTM f m = mapUDFM f m
-
-{-
-************************************************************************
-*                                                                      *
-                   Maybes
-*                                                                      *
-************************************************************************
-
-If              m is a map from k -> val
-then (MaybeMap m) is a map from (Maybe k) -> val
--}
-
-data MaybeMap m a = MM { mm_nothing  :: Maybe a, mm_just :: m a }
-
-instance TrieMap m => TrieMap (MaybeMap m) where
-   type Key (MaybeMap m) = Maybe (Key m)
-   emptyTM  = MM { mm_nothing = Nothing, mm_just = emptyTM }
-   lookupTM = lkMaybe lookupTM
-   alterTM  = xtMaybe alterTM
-   foldTM   = fdMaybe
-   mapTM    = mapMb
-
-mapMb :: TrieMap m => (a->b) -> MaybeMap m a -> MaybeMap m b
-mapMb f (MM { mm_nothing = mn, mm_just = mj })
-  = MM { mm_nothing = fmap f mn, mm_just = mapTM f mj }
-
-lkMaybe :: (forall b. k -> m b -> Maybe b)
-        -> Maybe k -> MaybeMap m a -> Maybe a
-lkMaybe _  Nothing  = mm_nothing
-lkMaybe lk (Just x) = mm_just >.> lk x
-
-xtMaybe :: (forall b. k -> XT b -> m b -> m b)
-        -> Maybe k -> XT a -> MaybeMap m a -> MaybeMap m a
-xtMaybe _  Nothing  f m = m { mm_nothing  = f (mm_nothing m) }
-xtMaybe tr (Just x) f m = m { mm_just = mm_just m |> tr x f }
-
-fdMaybe :: TrieMap m => (a -> b -> b) -> MaybeMap m a -> b -> b
-fdMaybe k m = foldMaybe k (mm_nothing m)
-            . foldTM k (mm_just m)
-
-{-
-************************************************************************
-*                                                                      *
-                   Lists
-*                                                                      *
-************************************************************************
--}
-
-data ListMap m a
-  = LM { lm_nil  :: Maybe a
-       , lm_cons :: m (ListMap m a) }
-
-instance TrieMap m => TrieMap (ListMap m) where
-   type Key (ListMap m) = [Key m]
-   emptyTM  = LM { lm_nil = Nothing, lm_cons = emptyTM }
-   lookupTM = lkList lookupTM
-   alterTM  = xtList alterTM
-   foldTM   = fdList
-   mapTM    = mapList
-
-mapList :: TrieMap m => (a->b) -> ListMap m a -> ListMap m b
-mapList f (LM { lm_nil = mnil, lm_cons = mcons })
-  = LM { lm_nil = fmap f mnil, lm_cons = mapTM (mapTM f) mcons }
-
-lkList :: TrieMap m => (forall b. k -> m b -> Maybe b)
-        -> [k] -> ListMap m a -> Maybe a
-lkList _  []     = lm_nil
-lkList lk (x:xs) = lm_cons >.> lk x >=> lkList lk xs
-
-xtList :: TrieMap m => (forall b. k -> XT b -> m b -> m b)
-        -> [k] -> XT a -> ListMap m a -> ListMap m a
-xtList _  []     f m = m { lm_nil  = f (lm_nil m) }
-xtList tr (x:xs) f m = m { lm_cons = lm_cons m |> tr x |>> xtList tr xs f }
-
-fdList :: forall m a b. TrieMap m
-       => (a -> b -> b) -> ListMap m a -> b -> b
-fdList k m = foldMaybe k          (lm_nil m)
-           . foldTM    (fdList k) (lm_cons m)
-
-foldMaybe :: (a -> b -> b) -> Maybe a -> b -> b
-foldMaybe _ Nothing  b = b
-foldMaybe k (Just a) b = k a b
-
-{-
-************************************************************************
-*                                                                      *
-                   Basic maps
-*                                                                      *
-************************************************************************
--}
-
-lkDNamed :: NamedThing n => n -> DNameEnv a -> Maybe a
-lkDNamed n env = lookupDNameEnv env (getName n)
-
-xtDNamed :: NamedThing n => n -> XT a -> DNameEnv a -> DNameEnv a
-xtDNamed tc f m = alterDNameEnv f m (getName tc)
-
-------------------------
-type LiteralMap  a = Map.Map Literal a
-
-emptyLiteralMap :: LiteralMap a
-emptyLiteralMap = emptyTM
-
-lkLit :: Literal -> LiteralMap a -> Maybe a
-lkLit = lookupTM
-
-xtLit :: Literal -> XT a -> LiteralMap a -> LiteralMap a
-xtLit = alterTM
-
-{-
-************************************************************************
-*                                                                      *
-                   GenMap
-*                                                                      *
-************************************************************************
-
-Note [Compressed TrieMap]
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The GenMap constructor augments TrieMaps with leaf compression.  This helps
-solve the performance problem detailed in #9960: suppose we have a handful
-H of entries in a TrieMap, each with a very large key, size K. If you fold over
-such a TrieMap you'd expect time O(H). That would certainly be true of an
-association list! But with TrieMap we actually have to navigate down a long
-singleton structure to get to the elements, so it takes time O(K*H).  This
-can really hurt on many type-level computation benchmarks:
-see for example T9872d.
-
-The point of a TrieMap is that you need to navigate to the point where only one
-key remains, and then things should be fast.  So the point of a SingletonMap
-is that, once we are down to a single (key,value) pair, we stop and
-just use SingletonMap.
-
-'EmptyMap' provides an even more basic (but essential) optimization: if there is
-nothing in the map, don't bother building out the (possibly infinite) recursive
-TrieMap structure!
--}
-
-data GenMap m a
-   = EmptyMap
-   | SingletonMap (Key m) a
-   | MultiMap (m a)
-
-instance (Outputable a, Outputable (m a)) => Outputable (GenMap m a) where
-  ppr EmptyMap = text "Empty map"
-  ppr (SingletonMap _ v) = text "Singleton map" <+> ppr v
-  ppr (MultiMap m) = ppr m
-
--- TODO undecidable instance
-instance (Eq (Key m), TrieMap m) => TrieMap (GenMap m) where
-   type Key (GenMap m) = Key m
-   emptyTM  = EmptyMap
-   lookupTM = lkG
-   alterTM  = xtG
-   foldTM   = fdG
-   mapTM    = mapG
-
 -- NB: Be careful about RULES and type families (#5821).  So we should make sure
 -- to specify @Key TypeMapX@ (and not @DeBruijn Type@, the reduced form)
 
+-- The CoreMap makes heavy use of GenMap. However the CoreMap Types are not
+-- known when defining GenMap so we can only specialize them here.
+
 {-# SPECIALIZE lkG :: Key TypeMapX     -> TypeMapG a     -> Maybe a #-}
 {-# SPECIALIZE lkG :: Key CoercionMapX -> CoercionMapG a -> Maybe a #-}
 {-# SPECIALIZE lkG :: Key CoreMapX     -> CoreMapG a     -> Maybe a #-}
-lkG :: (Eq (Key m), TrieMap m) => Key m -> GenMap m a -> Maybe a
-lkG _ EmptyMap                         = Nothing
-lkG k (SingletonMap k' v') | k == k'   = Just v'
-                           | otherwise = Nothing
-lkG k (MultiMap m)                     = lookupTM k m
+
 
 {-# SPECIALIZE xtG :: Key TypeMapX     -> XT a -> TypeMapG a -> TypeMapG a #-}
 {-# SPECIALIZE xtG :: Key CoercionMapX -> XT a -> CoercionMapG a -> CoercionMapG a #-}
 {-# SPECIALIZE xtG :: Key CoreMapX     -> XT a -> CoreMapG a -> CoreMapG a #-}
-xtG :: (Eq (Key m), TrieMap m) => Key m -> XT a -> GenMap m a -> GenMap m a
-xtG k f EmptyMap
-    = case f Nothing of
-        Just v  -> SingletonMap k v
-        Nothing -> EmptyMap
-xtG k f m@(SingletonMap k' v')
-    | k' == k
-    -- The new key matches the (single) key already in the tree.  Hence,
-    -- apply @f@ to @Just v'@ and build a singleton or empty map depending
-    -- on the 'Just'/'Nothing' response respectively.
-    = case f (Just v') of
-        Just v'' -> SingletonMap k' v''
-        Nothing  -> EmptyMap
-    | otherwise
-    -- We've hit a singleton tree for a different key than the one we are
-    -- searching for. Hence apply @f@ to @Nothing@. If result is @Nothing@ then
-    -- we can just return the old map. If not, we need a map with *two*
-    -- entries. The easiest way to do that is to insert two items into an empty
-    -- map of type @m a@.
-    = case f Nothing of
-        Nothing  -> m
-        Just v   -> emptyTM |> alterTM k' (const (Just v'))
-                           >.> alterTM k  (const (Just v))
-                           >.> MultiMap
-xtG k f (MultiMap m) = MultiMap (alterTM k f m)
 
 {-# SPECIALIZE mapG :: (a -> b) -> TypeMapG a     -> TypeMapG b #-}
 {-# SPECIALIZE mapG :: (a -> b) -> CoercionMapG a -> CoercionMapG b #-}
 {-# SPECIALIZE mapG :: (a -> b) -> CoreMapG a     -> CoreMapG b #-}
-mapG :: TrieMap m => (a -> b) -> GenMap m a -> GenMap m b
-mapG _ EmptyMap = EmptyMap
-mapG f (SingletonMap k v) = SingletonMap k (f v)
-mapG f (MultiMap m) = MultiMap (mapTM f m)
 
 {-# SPECIALIZE fdG :: (a -> b -> b) -> TypeMapG a     -> b -> b #-}
 {-# SPECIALIZE fdG :: (a -> b -> b) -> CoercionMapG a -> b -> b #-}
 {-# SPECIALIZE fdG :: (a -> b -> b) -> CoreMapG a     -> b -> b #-}
-fdG :: TrieMap m => (a -> b -> b) -> GenMap m a -> b -> b
-fdG _ EmptyMap = \z -> z
-fdG k (SingletonMap _ v) = \z -> k v z
-fdG k (MultiMap m) = foldTM k m
+
 
 {-
 ************************************************************************
@@ -438,7 +98,16 @@ fdG k (MultiMap m) = foldTM k m
                    CoreMap
 *                                                                      *
 ************************************************************************
+-}
+
+lkDNamed :: NamedThing n => n -> DNameEnv a -> Maybe a
+lkDNamed n env = lookupDNameEnv env (getName n)
 
+xtDNamed :: NamedThing n => n -> XT a -> DNameEnv a -> DNameEnv a
+xtDNamed tc f m = alterDNameEnv f m (getName tc)
+
+
+{-
 Note [Binders]
 ~~~~~~~~~~~~~~
  * In general we check binders as late as possible because types are
@@ -545,7 +214,7 @@ instance Eq (DeBruijn CoreExpr) where
     go _ _ = False
 
 emptyE :: CoreMapX a
-emptyE = CM { cm_var = emptyTM, cm_lit = emptyLiteralMap
+emptyE = CM { cm_var = emptyTM, cm_lit = emptyTM
             , cm_co = emptyTM, cm_type = emptyTM
             , cm_cast = emptyTM, cm_app = emptyTM
             , cm_lam = emptyTM, cm_letn = emptyTM
@@ -612,7 +281,7 @@ lkE :: DeBruijn CoreExpr -> CoreMapX a -> Maybe a
 lkE (D env expr) cm = go expr cm
   where
     go (Var v)              = cm_var  >.> lkVar env v
-    go (Lit l)              = cm_lit  >.> lkLit l
+    go (Lit l)              = cm_lit  >.> lookupTM l
     go (Type t)             = cm_type >.> lkG (D env t)
     go (Coercion c)         = cm_co   >.> lkG (D env c)
     go (Cast e c)           = cm_cast >.> lkG (D env e) >=> lkG (D env c)
@@ -640,7 +309,7 @@ xtE (D env (Type t))             f m = m { cm_type = cm_type m
                                                  |> xtG (D env t) f }
 xtE (D env (Coercion c))         f m = m { cm_co   = cm_co m
                                                  |> xtG (D env c) f }
-xtE (D _   (Lit l))              f m = m { cm_lit  = cm_lit m  |> xtLit l f }
+xtE (D _   (Lit l))              f m = m { cm_lit  = cm_lit m  |> alterTM l f }
 xtE (D env (Cast e c))           f m = m { cm_cast = cm_cast m |> xtG (D env e)
                                                  |>> xtG (D env c) f }
 xtE (D env (Tick t e))           f m = m { cm_tick = cm_tick m |> xtG (D env e)
@@ -687,7 +356,7 @@ instance TrieMap AltMap where
    type Key AltMap = CoreAlt
    emptyTM  = AM { am_deflt = emptyTM
                  , am_data = emptyDNameEnv
-                 , am_lit  = emptyLiteralMap }
+                 , am_lit  = emptyTM }
    lookupTM = lkA emptyCME
    alterTM  = xtA emptyCME
    foldTM   = fdA
@@ -712,7 +381,7 @@ mapA f (AM { am_deflt = adeflt, am_data = adata, am_lit = alit })
 
 lkA :: CmEnv -> CoreAlt -> AltMap a -> Maybe a
 lkA env (DEFAULT,    _, rhs)  = am_deflt >.> lkG (D env rhs)
-lkA env (LitAlt lit, _, rhs)  = am_lit >.> lkLit lit >=> lkG (D env rhs)
+lkA env (LitAlt lit, _, rhs)  = am_lit >.> lookupTM lit >=> lkG (D env rhs)
 lkA env (DataAlt dc, bs, rhs) = am_data >.> lkDNamed dc
                                         >=> lkG (D (extendCMEs env bs) rhs)
 
@@ -720,7 +389,7 @@ xtA :: CmEnv -> CoreAlt -> XT a -> AltMap a -> AltMap a
 xtA env (DEFAULT, _, rhs)    f m =
     m { am_deflt = am_deflt m |> xtG (D env rhs) f }
 xtA env (LitAlt l, _, rhs)   f m =
-    m { am_lit   = am_lit m   |> xtLit l |>> xtG (D env rhs) f }
+    m { am_lit   = am_lit m   |> alterTM l |>> xtG (D env rhs) f }
 xtA env (DataAlt d, bs, rhs) f m =
     m { am_data  = am_data m  |> xtDNamed d
                              |>> xtG (D (extendCMEs env bs) rhs) f }
@@ -814,7 +483,7 @@ trieMapView ty
   -- First check for TyConApps that need to be expanded to
   -- AppTy chains.
   | Just (tc, tys@(_:_)) <- tcSplitTyConApp_maybe ty
-  = Just $ foldl AppTy (TyConApp tc []) tys
+  = Just $ foldl' AppTy (TyConApp tc []) tys
 
   -- Then resolve any remaining nullary synonyms.
   | Just ty' <- tcView ty = Just ty'
@@ -853,8 +522,8 @@ instance Eq (DeBruijn Type) where
             -> tc == tc' && D env tys == D env' tys'
         (LitTy l, LitTy l')
             -> l == l'
-        (ForAllTy (TvBndr tv _) ty, ForAllTy (TvBndr tv' _) ty')
-            -> D env (tyVarKind tv)    == D env' (tyVarKind tv') &&
+        (ForAllTy (Bndr tv _) ty, ForAllTy (Bndr tv' _) ty')
+            -> D env (varType tv)      == D env' (varType tv') &&
                D (extendCME env tv) ty == D (extendCME env' tv') ty'
         (CoercionTy {}, CoercionTy {})
             -> True
@@ -866,9 +535,9 @@ instance {-# OVERLAPPING #-}
 
 emptyT :: TypeMapX a
 emptyT = TM { tm_var  = emptyTM
-            , tm_app  = EmptyMap
+            , tm_app  = emptyTM
             , tm_tycon  = emptyDNameEnv
-            , tm_forall = EmptyMap
+            , tm_forall = emptyTM
             , tm_tylit  = emptyTyLitMap
             , tm_coerce = Nothing }
 
@@ -894,7 +563,7 @@ lkT (D env ty) m = go ty m
     go (TyConApp tc [])            = tm_tycon  >.> lkDNamed tc
     go ty@(TyConApp _ (_:_))       = pprPanic "lkT TyConApp" (ppr ty)
     go (LitTy l)                   = tm_tylit  >.> lkTyLit l
-    go (ForAllTy (TvBndr tv _) ty) = tm_forall >.> lkG (D (extendCME env tv) ty)
+    go (ForAllTy (Bndr tv _) ty)   = tm_forall >.> lkG (D (extendCME env tv) ty)
                                                >=> lkBndr env tv
     go ty@(FunTy {})               = pprPanic "lkT FunTy" (ppr ty)
     go (CastTy t _)                = go t
@@ -911,7 +580,7 @@ xtT (D _   (TyConApp tc []))  f m = m { tm_tycon  = tm_tycon m |> xtDNamed tc f
 xtT (D _   (LitTy l))         f m = m { tm_tylit  = tm_tylit m |> xtTyLit l f }
 xtT (D env (CastTy t _))      f m = xtT (D env t) f m
 xtT (D _   (CoercionTy {}))   f m = m { tm_coerce = tm_coerce m |> f }
-xtT (D env (ForAllTy (TvBndr tv _) ty))  f m
+xtT (D env (ForAllTy (Bndr tv _) ty))  f m
   = m { tm_forall = tm_forall m |> xtG (D (extendCME env tv) ty)
                                 |>> xtBndr env tv f }
 xtT (D _   ty@(TyConApp _ (_:_))) _ _ = pprPanic "xtT TyConApp" (ppr ty)
@@ -1047,7 +716,7 @@ extendCME (CME { cme_next = bv, cme_env = env }) v
   = CME { cme_next = bv+1, cme_env = extendVarEnv env v bv }
 
 extendCMEs :: CmEnv -> [Var] -> CmEnv
-extendCMEs env vs = foldl extendCME env vs
+extendCMEs env vs = foldl' extendCME env vs
 
 lookupCME :: CmEnv -> Var -> Maybe BoundVar
 lookupCME (CME { cme_env = env }) v = lookupVarEnv env v
diff --git a/compiler/coreSyn/CoreOpt.hs b/compiler/coreSyn/CoreOpt.hs
index 4a196057b1..2367c4548d 100644
--- a/compiler/coreSyn/CoreOpt.hs
+++ b/compiler/coreSyn/CoreOpt.hs
@@ -20,7 +20,9 @@ module CoreOpt (
 
 #include "HsVersions.h"
 
-import CoreArity( joinRhsArity, etaExpandToJoinPoint )
+import GhcPrelude
+
+import CoreArity( etaExpandToJoinPoint )
 
 import CoreSyn
 import CoreSubst
@@ -30,10 +32,11 @@ import PprCore  ( pprCoreBindings, pprRules )
 import OccurAnal( occurAnalyseExpr, occurAnalysePgm )
 import Literal  ( Literal(MachStr) )
 import Id
-import Var      ( varType )
+import Var      ( varType, isNonCoVarId )
 import VarSet
 import VarEnv
 import DataCon
+import Demand( etaExpandStrictSig )
 import OptCoercion ( optCoercion )
 import Type     hiding ( substTy, extendTvSubst, extendCvSubst, extendTvSubstList
                        , isInScope, substTyVarBndr, cloneTyVarBndr )
@@ -84,7 +87,7 @@ little dance in action; the full Simplifier is a lot more complicated.
 
 -}
 
-simpleOptExpr :: CoreExpr -> CoreExpr
+simpleOptExpr :: DynFlags -> CoreExpr -> CoreExpr
 -- See Note [The simple optimiser]
 -- Do simple optimisation on an expression
 -- The optimisation is very straightforward: just
@@ -101,9 +104,9 @@ simpleOptExpr :: CoreExpr -> CoreExpr
 -- in  (let x = y in ....) we substitute for x; so y's occ-info
 -- may change radically
 
-simpleOptExpr expr
+simpleOptExpr dflags expr
   = -- pprTrace "simpleOptExpr" (ppr init_subst $$ ppr expr)
-    simpleOptExprWith init_subst expr
+    simpleOptExprWith dflags init_subst expr
   where
     init_subst = mkEmptySubst (mkInScopeSet (exprFreeVars expr))
         -- It's potentially important to make a proper in-scope set
@@ -116,32 +119,35 @@ simpleOptExpr expr
         -- It's a bit painful to call exprFreeVars, because it makes
         -- three passes instead of two (occ-anal, and go)
 
-simpleOptExprWith :: Subst -> InExpr -> OutExpr
+simpleOptExprWith :: DynFlags -> Subst -> InExpr -> OutExpr
 -- See Note [The simple optimiser]
-simpleOptExprWith subst expr
+simpleOptExprWith dflags subst expr
   = simple_opt_expr init_env (occurAnalyseExpr expr)
   where
-    init_env = SOE { soe_inl = emptyVarEnv, soe_subst = subst }
+    init_env = SOE { soe_dflags = dflags
+                   , soe_inl = emptyVarEnv
+                   , soe_subst = subst }
 
 ----------------------
 simpleOptPgm :: DynFlags -> Module
-             -> CoreProgram -> [CoreRule] -> [CoreVect]
-             -> IO (CoreProgram, [CoreRule], [CoreVect])
+             -> CoreProgram -> [CoreRule]
+             -> IO (CoreProgram, [CoreRule])
 -- See Note [The simple optimiser]
-simpleOptPgm dflags this_mod binds rules vects
+simpleOptPgm dflags this_mod binds rules
   = do { dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
                        (pprCoreBindings occ_anald_binds $$ pprRules rules );
 
-       ; return (reverse binds', rules', vects') }
+       ; return (reverse binds', rules') }
   where
-    occ_anald_binds  = occurAnalysePgm this_mod (\_ -> False) {- No rules active -}
-                                       rules vects emptyVarSet binds
+    occ_anald_binds  = occurAnalysePgm this_mod
+                          (\_ -> True)  {- All unfoldings active -}
+                          (\_ -> False) {- No rules active -}
+                          rules binds
 
-    (final_env, binds') = foldl do_one (emptyEnv, []) occ_anald_binds
+    (final_env, binds') = foldl' do_one (emptyEnv dflags, []) occ_anald_binds
     final_subst = soe_subst final_env
 
     rules' = substRulesForImportedIds final_subst rules
-    vects' = substVects final_subst vects
              -- We never unconditionally inline into rules,
              -- hence paying just a substitution
 
@@ -156,7 +162,8 @@ simpleOptPgm dflags this_mod binds rules vects
 type SimpleClo = (SimpleOptEnv, InExpr)
 
 data SimpleOptEnv
-  = SOE { soe_inl   :: IdEnv SimpleClo
+  = SOE { soe_dflags :: DynFlags
+        , soe_inl   :: IdEnv SimpleClo
              -- Deals with preInlineUnconditionally; things
              -- that occur exactly once and are inlined
              -- without having first been simplified
@@ -171,13 +178,15 @@ instance Outputable SimpleOptEnv where
                             , text "soe_subst =" <+> ppr subst ]
                    <+> text "}"
 
-emptyEnv :: SimpleOptEnv
-emptyEnv = SOE { soe_inl = emptyVarEnv
-               , soe_subst = emptySubst }
+emptyEnv :: DynFlags -> SimpleOptEnv
+emptyEnv dflags
+  = SOE { soe_dflags = dflags
+        , soe_inl = emptyVarEnv
+        , soe_subst = emptySubst }
 
 soeZapSubst :: SimpleOptEnv -> SimpleOptEnv
-soeZapSubst (SOE { soe_subst = subst })
-  = SOE { soe_inl = emptyVarEnv, soe_subst = zapSubstEnv subst }
+soeZapSubst env@(SOE { soe_subst = subst })
+  = env { soe_inl = emptyVarEnv, soe_subst = zapSubstEnv subst }
 
 soeSetInScope :: SimpleOptEnv -> SimpleOptEnv -> SimpleOptEnv
 -- Take in-scope set from env1, and the rest from env2
@@ -206,13 +215,13 @@ simple_opt_expr env expr
 
     go (App e1 e2)      = simple_app env e1 [(env,e2)]
     go (Type ty)        = Type     (substTy subst ty)
-    go (Coercion co)    = Coercion (optCoercion (getTCvSubst subst) co)
+    go (Coercion co)    = Coercion (optCoercion (soe_dflags env) (getTCvSubst subst) co)
     go (Lit lit)        = Lit lit
     go (Tick tickish e) = mkTick (substTickish subst tickish) (go e)
     go (Cast e co)      | isReflCo co' = go e
                         | otherwise    = Cast (go e) co'
                         where
-                          co' = optCoercion (getTCvSubst subst) co
+                          co' = optCoercion (soe_dflags env) (getTCvSubst subst) co
 
     go (Let bind body) = case simple_opt_bind env bind of
                            (env', Nothing)   -> simple_opt_expr env' body
@@ -323,7 +332,7 @@ simple_opt_bind env (Rec prs)
     res_bind          = Just (Rec (reverse rev_prs'))
     prs'              = joinPointBindings_maybe prs `orElse` prs
     (env', bndrs')    = subst_opt_bndrs env (map fst prs')
-    (env'', rev_prs') = foldl do_pr (env', []) (prs' `zip` bndrs')
+    (env'', rev_prs') = foldl' do_pr (env', []) (prs' `zip` bndrs')
     do_pr (env, prs) ((b,r), b')
        = (env', case mb_pr of
                   Just pr -> pr : prs
@@ -347,30 +356,43 @@ simple_bind_pair env@(SOE { soe_inl = inl_env, soe_subst = subst })
     (env { soe_subst = extendTvSubst subst in_bndr out_ty }, Nothing)
 
   | Coercion co <- in_rhs
-  , let out_co = optCoercion (getTCvSubst (soe_subst rhs_env)) co
+  , let out_co = optCoercion (soe_dflags env) (getTCvSubst (soe_subst rhs_env)) co
   = ASSERT( isCoVar in_bndr )
     (env { soe_subst = extendCvSubst subst in_bndr out_co }, Nothing)
 
-  | pre_inline_unconditionally
+  | ASSERT2( isNonCoVarId in_bndr, ppr in_bndr )
+    -- The previous two guards got rid of tyvars and coercions
+    -- See Note [CoreSyn type and coercion invariant] in CoreSyn
+    pre_inline_unconditionally
   = (env { soe_inl = extendVarEnv inl_env in_bndr clo }, Nothing)
 
   | otherwise
-  = simple_out_bind_pair env in_bndr mb_out_bndr
-                         (simple_opt_clo env clo)
+  = simple_out_bind_pair env in_bndr mb_out_bndr out_rhs
                          occ active stable_unf
   where
     stable_unf = isStableUnfolding (idUnfolding in_bndr)
     active     = isAlwaysActive (idInlineActivation in_bndr)
     occ        = idOccInfo in_bndr
 
+    out_rhs | Just join_arity <- isJoinId_maybe in_bndr
+            = simple_join_rhs join_arity
+            | otherwise
+            = simple_opt_clo env clo
+
+    simple_join_rhs join_arity -- See Note [Preserve join-binding arity]
+      = mkLams join_bndrs' (simple_opt_expr env_body join_body)
+      where
+        env0 = soeSetInScope env rhs_env
+        (join_bndrs, join_body) = collectNBinders join_arity in_rhs
+        (env_body, join_bndrs') = subst_opt_bndrs env0 join_bndrs
+
     pre_inline_unconditionally :: Bool
     pre_inline_unconditionally
-       | isCoVar in_bndr          = False    -- See Note [Do not inline CoVars unconditionally]
-       | isExportedId in_bndr     = False    --     in SimplUtils
+       | isExportedId in_bndr     = False
        | stable_unf               = False
        | not active               = False    -- Note [Inline prag in simplOpt]
        | not (safe_to_inline occ) = False
-       | otherwise = True
+       | otherwise                = True
 
         -- Unconditionally safe to inline
     safe_to_inline :: OccInfo -> Bool
@@ -403,7 +425,10 @@ simple_out_bind_pair :: SimpleOptEnv
                      -> (SimpleOptEnv, Maybe (OutVar, OutExpr))
 simple_out_bind_pair env in_bndr mb_out_bndr out_rhs
                      occ_info active stable_unf
-  | post_inline_unconditionally
+  | ASSERT2( isNonCoVarId in_bndr, ppr in_bndr )
+    -- Type and coercion bindings are caught earlier
+    -- See Note [CoreSyn type and coercion invariant]
+    post_inline_unconditionally
   = ( env' { soe_subst = extendIdSubst (soe_subst env) in_bndr out_rhs }
     , Nothing)
 
@@ -417,14 +442,16 @@ simple_out_bind_pair env in_bndr mb_out_bndr out_rhs
 
     post_inline_unconditionally :: Bool
     post_inline_unconditionally
-       | not active                  = False
-       | isWeakLoopBreaker occ_info  = False -- If it's a loop-breaker of any kind, don't inline
-                                             -- because it might be referred to "earlier"
-       | stable_unf                  = False -- Note [Stable unfoldings and postInlineUnconditionally]
-       | isExportedId in_bndr        = False -- Note [Exported Ids and trivial RHSs]
-       | exprIsTrivial out_rhs       = True
-       | coercible_hack              = True
-       | otherwise                   = False
+       | isExportedId in_bndr  = False -- Note [Exported Ids and trivial RHSs]
+       | stable_unf            = False -- Note [Stable unfoldings and postInlineUnconditionally]
+       | not active            = False --     in SimplUtils
+       | is_loop_breaker       = False -- If it's a loop-breaker of any kind, don't inline
+                                       -- because it might be referred to "earlier"
+       | exprIsTrivial out_rhs = True
+       | coercible_hack        = True
+       | otherwise             = False
+
+    is_loop_breaker = isWeakLoopBreaker occ_info
 
     -- See Note [Getting the map/coerce RULE to work]
     coercible_hack | (Var fun, args) <- collectArgs out_rhs
@@ -447,6 +474,14 @@ trivial ones.  But we do here!  Why?  In the simple optimiser
 Those differences obviate the reasons for not inlining a trivial rhs,
 and increase the benefit for doing so.  So we unconditionally inline trivial
 rhss here.
+
+Note [Preserve join-binding arity]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Be careful /not/ to eta-reduce the RHS of a join point, lest we lose
+the join-point arity invariant.  Trac #15108 was caused by simplifying
+the RHS with simple_opt_expr, which does eta-reduction.  Solution:
+simplify the RHS of a join point by simplifying under the lambdas
+(which of course should be there).
 -}
 
 ----------------------
@@ -471,8 +506,8 @@ subst_opt_id_bndr :: SimpleOptEnv -> InId -> (SimpleOptEnv, OutId)
 -- It's important to zap fragile OccInfo (which CoreSubst.substIdBndr
 -- carefully does not do) because simplOptExpr invalidates it
 
-subst_opt_id_bndr (SOE { soe_subst = subst, soe_inl = inl }) old_id
-  = (SOE { soe_subst = new_subst, soe_inl = new_inl }, new_id)
+subst_opt_id_bndr env@(SOE { soe_subst = subst, soe_inl = inl }) old_id
+  = (env { soe_subst = new_subst, soe_inl = new_inl }, new_id)
   where
     Subst in_scope id_subst tv_subst cv_subst = subst
 
@@ -513,18 +548,6 @@ wrapLet :: Maybe (Id,CoreExpr) -> CoreExpr -> CoreExpr
 wrapLet Nothing      body = body
 wrapLet (Just (b,r)) body = Let (NonRec b r) body
 
-------------------
-substVects :: Subst -> [CoreVect] -> [CoreVect]
-substVects subst = map (substVect subst)
-
-------------------
-substVect :: Subst -> CoreVect -> CoreVect
-substVect subst  (Vect v rhs)        = Vect v (simpleOptExprWith subst rhs)
-substVect _subst vd@(NoVect _)       = vd
-substVect _subst vd@(VectType _ _ _) = vd
-substVect _subst vd@(VectClass _)    = vd
-substVect _subst vd@(VectInst _)     = vd
-
 {-
 Note [Inline prag in simplOpt]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -642,58 +665,43 @@ joinPointBinding_maybe bndr rhs
   = Just (bndr, rhs)
 
   | AlwaysTailCalled join_arity <- tailCallInfo (idOccInfo bndr)
-  , not (bad_unfolding join_arity (idUnfolding bndr))
   , (bndrs, body) <- etaExpandToJoinPoint join_arity rhs
-  = Just (bndr `asJoinId` join_arity, mkLams bndrs body)
+  , let str_sig   = idStrictness bndr
+        str_arity = count isId bndrs  -- Strictness demands are for Ids only
+        join_bndr = bndr `asJoinId`        join_arity
+                         `setIdStrictness` etaExpandStrictSig str_arity str_sig
+  = Just (join_bndr, mkLams bndrs body)
 
   | otherwise
   = Nothing
 
-  where
-    -- bad_unfolding returns True if we should /not/ convert a non-join-id
-    -- into a join-id, even though it is AlwaysTailCalled
-    -- See Note [Join points and INLINE pragmas]
-    bad_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs })
-      = isStableSource src && join_arity > joinRhsArity rhs
-    bad_unfolding _ (DFunUnfolding {})
-      = True
-    bad_unfolding _ _
-      = False
-
 joinPointBindings_maybe :: [(InBndr, InExpr)] -> Maybe [(InBndr, InExpr)]
 joinPointBindings_maybe bndrs
   = mapM (uncurry joinPointBinding_maybe) bndrs
 
 
-{- Note [Join points and INLINE pragmas]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
-   f x = let g = \x. not  -- Arity 1
-             {-# INLINE g #-}
-         in case x of
-              A -> g True True
-              B -> g True False
-              C -> blah2
+{- Note [Strictness and join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have
 
-Here 'g' is always tail-called applied to 2 args, but the stable
-unfolding captured by the INLINE pragma has arity 1.  If we try to
-convert g to be a join point, its unfolding will still have arity 1
-(since it is stable, and we don't meddle with stable unfoldings), and
-Lint will complain (see Note [Invariants on join points], (2a), in
-CoreSyn.  Trac #13413.
+   let f = \x.  if x>200 then e1 else e1
 
-Moreover, since g is going to be inlined anyway, there is no benefit
-from making it a join point.
+and we know that f is strict in x.  Then if we subsequently
+discover that f is an arity-2 join point, we'll eta-expand it to
 
-If it is recursive, and uselessly marked INLINE, this will stop us
-making it a join point, which is annoying.  But occasionally
-(notably in class methods; see Note [Instances and loop breakers] in
-TcInstDcls) we mark recursive things as INLINE but the recursion
-unravels; so ignoring INLINE pragmas on recursive things isn't good
-either.
+   let f = \x y.  if x>200 then e1 else e1
 
+and now it's only strict if applied to two arguments.  So we should
+adjust the strictness info.
 
-************************************************************************
+A more common case is when
+
+   f = \x. error ".."
+
+and again its arity increses (Trac #15517)
+-}
+
+{- *********************************************************************
 *                                                                      *
          exprIsConApp_maybe
 *                                                                      *
@@ -768,9 +776,11 @@ exprIsConApp_maybe (in_scope, id_unf) expr
     go subst (Tick t expr) cont
        | not (tickishIsCode t) = go subst expr cont
     go subst (Cast expr co1) (CC args co2)
-       | Just (args', co1') <- pushCoArgs (subst_co subst co1) args
+       | Just (args', m_co1') <- pushCoArgs (subst_co subst co1) args
             -- See Note [Push coercions in exprIsConApp_maybe]
-       = go subst expr (CC args' (co1' `mkTransCo` co2))
+       = case m_co1' of
+           MCo co1' -> go subst expr (CC args' (co1' `mkTransCo` co2))
+           MRefl    -> go subst expr (CC args' co2)
     go subst (App fun arg) (CC args co)
        = go subst fun (CC (subst_arg subst arg : args) co)
     go subst (Lam var body) (CC (arg:args) co)
@@ -930,7 +940,7 @@ exprIsLambda_maybe (in_scope_set, id_unf) e
     -- Make sure there is hope to get a lambda
     , Just rhs <- expandUnfolding_maybe (id_unf f)
     -- Optimize, for beta-reduction
-    , let e' =  simpleOptExprWith (mkEmptySubst in_scope_set) (rhs `mkApps` as)
+    , let e' = simpleOptExprWith unsafeGlobalDynFlags (mkEmptySubst in_scope_set) (rhs `mkApps` as)
     -- Recurse, because of possible casts
     , Just (x', e'', ts') <- exprIsLambda_maybe (in_scope_set, id_unf) e'
     , let res = Just (x', e'', ts++ts')
@@ -964,36 +974,45 @@ Here we implement the "push rules" from FC papers:
   by pushing the coercion into the arguments
 -}
 
-pushCoArgs :: Coercion -> [CoreArg] -> Maybe ([CoreArg], Coercion)
-pushCoArgs co []         = return ([], co)
-pushCoArgs co (arg:args) = do { (arg',  co1) <- pushCoArg  co  arg
-                              ; (args', co2) <- pushCoArgs co1 args
-                              ; return (arg':args', co2) }
+pushCoArgs :: CoercionR -> [CoreArg] -> Maybe ([CoreArg], MCoercion)
+pushCoArgs co []         = return ([], MCo co)
+pushCoArgs co (arg:args) = do { (arg',  m_co1) <- pushCoArg  co  arg
+                              ; case m_co1 of
+                                  MCo co1 -> do { (args', m_co2) <- pushCoArgs co1 args
+                                                 ; return (arg':args', m_co2) }
+                                  MRefl  -> return (arg':args, MRefl) }
 
-pushCoArg :: Coercion -> CoreArg -> Maybe (CoreArg, Coercion)
+pushCoArg :: CoercionR -> CoreArg -> Maybe (CoreArg, MCoercion)
 -- We have (fun |> co) arg, and we want to transform it to
 --         (fun arg) |> co
 -- This may fail, e.g. if (fun :: N) where N is a newtype
 -- C.f. simplCast in Simplify.hs
 -- 'co' is always Representational
+-- If the returned coercion is Nothing, then it would have been reflexive
+pushCoArg co (Type ty) = do { (ty', m_co') <- pushCoTyArg co ty
+                            ; return (Type ty', m_co') }
+pushCoArg co val_arg   = do { (arg_co, m_co') <- pushCoValArg co
+                            ; return (val_arg `mkCast` arg_co, m_co') }
 
-pushCoArg co (Type ty) = do { (ty', co') <- pushCoTyArg co ty
-                            ; return (Type ty', co') }
-pushCoArg co val_arg   = do { (arg_co, co') <- pushCoValArg co
-                            ; return (mkCast val_arg arg_co, co') }
-
-pushCoTyArg :: Coercion -> Type -> Maybe (Type, Coercion)
+pushCoTyArg :: CoercionR -> Type -> Maybe (Type, MCoercionR)
 -- We have (fun |> co) @ty
 -- Push the coercion through to return
 --         (fun @ty') |> co'
 -- 'co' is always Representational
+-- If the returned coercion is Nothing, then it would have been reflexive;
+-- it's faster not to compute it, though.
 pushCoTyArg co ty
-  | tyL `eqType` tyR
-  = Just (ty, mkRepReflCo (piResultTy tyR ty))
+  -- The following is inefficient - don't do `eqType` here, the coercion
+  -- optimizer will take care of it. See Trac #14737.
+  -- -- | tyL `eqType` tyR
+  -- -- = Just (ty, Nothing)
+
+  | isReflCo co
+  = Just (ty, MRefl)
 
-  | isForAllTy tyL
-  = ASSERT2( isForAllTy tyR, ppr co $$ ppr ty )
-    Just (ty `mkCastTy` mkSymCo co1, co2)
+  | isForAllTy_ty tyL
+  = ASSERT2( isForAllTy_ty tyR, ppr co $$ ppr ty )
+    Just (ty `mkCastTy` co1, MCo co2)
 
   | otherwise
   = Nothing
@@ -1003,41 +1022,48 @@ pushCoTyArg co ty
        -- tyL = forall (a1 :: k1). ty1
        -- tyR = forall (a2 :: k2). ty2
 
-    co1 = mkNthCo 0 co
-       -- co1 :: k1 ~ k2
-       -- Note that NthCo can extract an equality between the kinds
-       -- of the types related by a coercion between forall-types.
+    co1 = mkSymCo (mkNthCo Nominal 0 co)
+       -- co1 :: k2 ~N k1
+       -- Note that NthCo can extract a Nominal equality between the
+       -- kinds of the types related by a coercion between forall-types.
        -- See the NthCo case in CoreLint.
 
-    co2 = mkInstCo co (mkCoherenceLeftCo (mkNomReflCo ty) co1)
+    co2 = mkInstCo co (mkGReflLeftCo Nominal ty co1)
         -- co2 :: ty1[ (ty|>co1)/a1 ] ~ ty2[ ty/a2 ]
         -- Arg of mkInstCo is always nominal, hence mkNomReflCo
 
-pushCoValArg :: Coercion -> Maybe (Coercion, Coercion)
+pushCoValArg :: CoercionR -> Maybe (Coercion, MCoercion)
 -- We have (fun |> co) arg
 -- Push the coercion through to return
 --         (fun (arg |> co_arg)) |> co_res
 -- 'co' is always Representational
+-- If the second returned Coercion is actually Nothing, then no cast is necessary;
+-- the returned coercion would have been reflexive.
 pushCoValArg co
-  | tyL `eqType` tyR
-  = Just (mkRepReflCo arg, mkRepReflCo res)
+  -- The following is inefficient - don't do `eqType` here, the coercion
+  -- optimizer will take care of it. See Trac #14737.
+  -- -- | tyL `eqType` tyR
+  -- -- = Just (mkRepReflCo arg, Nothing)
+
+  | isReflCo co
+  = Just (mkRepReflCo arg, MRefl)
 
   | isFunTy tyL
-  , (co1, co2) <- decomposeFunCo co
+  , (co1, co2) <- decomposeFunCo Representational co
               -- If   co  :: (tyL1 -> tyL2) ~ (tyR1 -> tyR2)
               -- then co1 :: tyL1 ~ tyR1
               --      co2 :: tyL2 ~ tyR2
   = ASSERT2( isFunTy tyR, ppr co $$ ppr arg )
-    Just (mkSymCo co1, co2)
+    Just (mkSymCo co1, MCo co2)
 
   | otherwise
   = Nothing
   where
-    (arg, res)   = splitFunTy tyR
+    arg = funArgTy tyR
     Pair tyL tyR = coercionKind co
 
 pushCoercionIntoLambda
-    :: InScopeSet -> Var -> CoreExpr -> Coercion -> Maybe (Var, CoreExpr)
+    :: InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr)
 -- This implements the Push rule from the paper on coercions
 --    (\x. e) |> co
 -- ===>
@@ -1047,7 +1073,7 @@ pushCoercionIntoLambda in_scope x e co
     , Pair s1s2 t1t2 <- coercionKind co
     , Just (_s1,_s2) <- splitFunTy_maybe s1s2
     , Just (t1,_t2) <- splitFunTy_maybe t1t2
-    = let (co1, co2) = decomposeFunCo co
+    = let (co1, co2) = decomposeFunCo Representational co
           -- Should we optimize the coercions here?
           -- Otherwise they might not match too well
           x' = x `setIdType` t1
@@ -1086,19 +1112,19 @@ pushCoDataCon dc dc_args co
   = let
         tc_arity       = tyConArity to_tc
         dc_univ_tyvars = dataConUnivTyVars dc
-        dc_ex_tyvars   = dataConExTyVars dc
+        dc_ex_tcvars   = dataConExTyCoVars dc
         arg_tys        = dataConRepArgTys dc
 
         non_univ_args  = dropList dc_univ_tyvars dc_args
-        (ex_args, val_args) = splitAtList dc_ex_tyvars non_univ_args
+        (ex_args, val_args) = splitAtList dc_ex_tcvars non_univ_args
 
         -- Make the "Psi" from the paper
-        omegas = decomposeCo tc_arity co
+        omegas = decomposeCo tc_arity co (tyConRolesRepresentational to_tc)
         (psi_subst, to_ex_arg_tys)
           = liftCoSubstWithEx Representational
                               dc_univ_tyvars
                               omegas
-                              dc_ex_tyvars
+                              dc_ex_tcvars
                               (map exprToType ex_args)
 
           -- Cast the value arguments (which include dictionaries)
@@ -1107,7 +1133,7 @@ pushCoDataCon dc dc_args co
 
         to_ex_args = map Type to_ex_arg_tys
 
-        dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tyvars,
+        dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tcvars,
                          ppr arg_tys, ppr dc_args,
                          ppr ex_args, ppr val_args, ppr co, ppr from_ty, ppr to_ty, ppr to_tc ]
     in
@@ -1140,7 +1166,7 @@ collectBindersPushingCo e
     go bs e           = (reverse bs, e)
 
     -- We are in a cast; peel off casts until we hit a lambda.
-    go_c :: [Var] -> CoreExpr -> Coercion -> ([Var], CoreExpr)
+    go_c :: [Var] -> CoreExpr -> CoercionR -> ([Var], CoreExpr)
     -- (go_c bs e c) is same as (go bs e (e |> c))
     go_c bs (Cast e co1) co2 = go_c bs e (co1 `mkTransCo` co2)
     go_c bs (Lam b e)    co  = go_lam bs b e co
@@ -1148,20 +1174,28 @@ collectBindersPushingCo e
 
     -- We are in a lambda under a cast; peel off lambdas and build a
     -- new coercion for the body.
-    go_lam :: [Var] -> Var -> CoreExpr -> Coercion -> ([Var], CoreExpr)
+    go_lam :: [Var] -> Var -> CoreExpr -> CoercionR -> ([Var], CoreExpr)
     -- (go_lam bs b e c) is same as (go_c bs (\b.e) c)
     go_lam bs b e co
       | isTyVar b
       , let Pair tyL tyR = coercionKind co
-      , ASSERT( isForAllTy tyL )
-        isForAllTy tyR
-      , isReflCo (mkNthCo 0 co)  -- See Note [collectBindersPushingCo]
+      , ASSERT( isForAllTy_ty tyL )
+        isForAllTy_ty tyR
+      , isReflCo (mkNthCo Nominal 0 co)  -- See Note [collectBindersPushingCo]
       = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkTyVarTy b)))
 
+      | isCoVar b
+      , let Pair tyL tyR = coercionKind co
+      , ASSERT( isForAllTy_co tyL )
+        isForAllTy_co tyR
+      , isReflCo (mkNthCo Nominal 0 co)  -- See Note [collectBindersPushingCo]
+      , let cov = mkCoVarCo b
+      = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkCoercionTy cov)))
+
       | isId b
       , let Pair tyL tyR = coercionKind co
       , ASSERT( isFunTy tyL) isFunTy tyR
-      , (co_arg, co_res) <- decomposeFunCo co
+      , (co_arg, co_res) <- decomposeFunCo Representational co
       , isReflCo co_arg  -- See Note [collectBindersPushingCo]
       = go_c (b:bs) e co_res
 
diff --git a/compiler/coreSyn/CorePrep.hs b/compiler/coreSyn/CorePrep.hs
index 4f7a0da835..9c2954d4ef 100644
--- a/compiler/coreSyn/CorePrep.hs
+++ b/compiler/coreSyn/CorePrep.hs
@@ -8,12 +8,15 @@ Core pass to saturate constructors and PrimOps
 {-# LANGUAGE BangPatterns, CPP, MultiWayIf #-}
 
 module CorePrep (
-      corePrepPgm, corePrepExpr, cvtLitInteger,
-      lookupMkIntegerName, lookupIntegerSDataConName
+      corePrepPgm, corePrepExpr, cvtLitInteger, cvtLitNatural,
+      lookupMkIntegerName, lookupIntegerSDataConName,
+      lookupMkNaturalName, lookupNaturalSDataConName
   ) where
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import OccurAnal
 
 import HscTypes
@@ -58,12 +61,14 @@ import Name             ( NamedThing(..), nameSrcSpan )
 import SrcLoc           ( SrcSpan(..), realSrcLocSpan, mkRealSrcLoc )
 import Data.Bits
 import MonadUtils       ( mapAccumLM )
-import Data.List        ( mapAccumL )
+import Data.List        ( mapAccumL, foldl' )
 import Control.Monad
+import CostCentre       ( CostCentre, ccFromThisModule )
+import qualified Data.Set as S
 
 {-
 -- ---------------------------------------------------------------------------
--- Overview
+-- Note [CorePrep Overview]
 -- ---------------------------------------------------------------------------
 
 The goal of this pass is to prepare for code generation.
@@ -118,10 +123,16 @@ The goal of this pass is to prepare for code generation.
     special case where we use the S# constructor for Integers that
     are in the range of Int.
 
-11. Uphold tick consistency while doing this: We move ticks out of
+11. Same for LitNatural.
+
+12. Uphold tick consistency while doing this: We move ticks out of
     (non-type) applications where we can, and make sure that we
     annotate according to scoping rules when floating.
 
+13. Collect cost centres (including cost centres in unfoldings) if we're in
+    profiling mode. We have to do this here beucase we won't have unfoldings
+    after this pass (see `zapUnfolding` and Note [Drop unfoldings and rules].
+
 This is all done modulo type applications and abstractions, so that
 when type erasure is done for conversion to STG, we don't end up with
 any trivial or useless bindings.
@@ -167,7 +178,7 @@ type CpeRhs  = CoreExpr    -- Non-terminal 'rhs'
 -}
 
 corePrepPgm :: HscEnv -> Module -> ModLocation -> CoreProgram -> [TyCon]
-            -> IO CoreProgram
+            -> IO (CoreProgram, S.Set CostCentre)
 corePrepPgm hsc_env this_mod mod_loc binds data_tycons =
     withTiming (pure dflags)
                (text "CorePrep"<+>brackets (ppr this_mod))
@@ -175,7 +186,13 @@ corePrepPgm hsc_env this_mod mod_loc binds data_tycons =
     us <- mkSplitUniqSupply 's'
     initialCorePrepEnv <- mkInitialCorePrepEnv dflags hsc_env
 
-    let implicit_binds = mkDataConWorkers dflags mod_loc data_tycons
+    let cost_centres
+          | WayProf `elem` ways dflags
+          = collectCostCentres this_mod binds
+          | otherwise
+          = S.empty
+
+        implicit_binds = mkDataConWorkers dflags mod_loc data_tycons
             -- NB: we must feed mkImplicitBinds through corePrep too
             -- so that they are suitably cloned and eta-expanded
 
@@ -185,7 +202,7 @@ corePrepPgm hsc_env this_mod mod_loc binds data_tycons =
                       return (deFloatTop (floats1 `appendFloats` floats2))
 
     endPassIO hsc_env alwaysQualify CorePrep binds_out []
-    return binds_out
+    return (binds_out, cost_centres)
   where
     dflags = hsc_dflags hsc_env
 
@@ -405,23 +422,21 @@ cpeBind top_lvl env (NonRec bndr rhs)
   = do { (_, bndr1) <- cpCloneBndr env bndr
        ; let dmd         = idDemandInfo bndr
              is_unlifted = isUnliftedType (idType bndr)
-       ; (floats, bndr2, rhs2) <- cpePair top_lvl NonRecursive
-                                          dmd
-                                          is_unlifted
-                                          env bndr1 rhs
+       ; (floats, rhs1) <- cpePair top_lvl NonRecursive
+                                   dmd is_unlifted
+                                   env bndr1 rhs
        -- See Note [Inlining in CorePrep]
-       ; if exprIsTrivial rhs2 && isNotTopLevel top_lvl
-            then return (extendCorePrepEnvExpr env bndr rhs2, floats, Nothing)
+       ; if exprIsTrivial rhs1 && isNotTopLevel top_lvl
+            then return (extendCorePrepEnvExpr env bndr rhs1, floats, Nothing)
             else do {
 
-       ; let new_float = mkFloat dmd is_unlifted bndr2 rhs2
+       ; let new_float = mkFloat dmd is_unlifted bndr1 rhs1
 
-        -- We want bndr'' in the envt, because it records
-        -- the evaluated-ness of the binder
-       ; return (extendCorePrepEnv env bndr bndr2,
+       ; return (extendCorePrepEnv env bndr bndr1,
                  addFloat floats new_float,
                  Nothing) }}
-  | otherwise -- See Note [Join points and floating]
+
+  | otherwise -- A join point; see Note [Join points and floating]
   = ASSERT(not (isTopLevel top_lvl)) -- can't have top-level join point
     do { (_, bndr1) <- cpCloneBndr env bndr
        ; (bndr2, rhs1) <- cpeJoinPair env bndr1 rhs
@@ -432,14 +447,17 @@ cpeBind top_lvl env (NonRec bndr rhs)
 cpeBind top_lvl env (Rec pairs)
   | not (isJoinId (head bndrs))
   = do { (env', bndrs1) <- cpCloneBndrs env bndrs
-       ; stuff <- zipWithM (cpePair top_lvl Recursive topDmd False env') bndrs1 rhss
+       ; stuff <- zipWithM (cpePair top_lvl Recursive topDmd False env')
+                           bndrs1 rhss
 
-       ; let (floats_s, bndrs2, rhss2) = unzip3 stuff
-             all_pairs = foldrOL add_float (bndrs2 `zip` rhss2)
+       ; let (floats_s, rhss1) = unzip stuff
+             all_pairs = foldrOL add_float (bndrs1 `zip` rhss1)
                                            (concatFloats floats_s)
-       ; return (extendCorePrepEnvList env (bndrs `zip` bndrs2),
+
+       ; return (extendCorePrepEnvList env (bndrs `zip` bndrs1),
                  unitFloat (FloatLet (Rec all_pairs)),
                  Nothing) }
+
   | otherwise -- See Note [Join points and floating]
   = do { (env', bndrs1) <- cpCloneBndrs env bndrs
        ; pairs1 <- zipWithM (cpeJoinPair env') bndrs1 rhss
@@ -459,9 +477,10 @@ cpeBind top_lvl env (Rec pairs)
 
 ---------------
 cpePair :: TopLevelFlag -> RecFlag -> Demand -> Bool
-        -> CorePrepEnv -> Id -> CoreExpr
-        -> UniqSM (Floats, Id, CpeRhs)
+        -> CorePrepEnv -> OutId -> CoreExpr
+        -> UniqSM (Floats, CpeRhs)
 -- Used for all bindings
+-- The binder is already cloned, hence an OutId
 cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
   = ASSERT(not (isJoinId bndr)) -- those should use cpeJoinPair
     do { (floats1, rhs1) <- cpeRhsE env rhs
@@ -483,15 +502,7 @@ cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
         -- Wrap floating ticks
        ; let (floats4, rhs4) = wrapTicks floats3 rhs3
 
-        -- Record if the binder is evaluated
-        -- and otherwise trim off the unfolding altogether
-        -- It's not used by the code generator; getting rid of it reduces
-        -- heap usage and, since we may be changing uniques, we'd have
-        -- to substitute to keep it right
-       ; let bndr' | exprIsHNF rhs3 = bndr `setIdUnfolding` evaldUnfolding
-                   | otherwise      = bndr `setIdUnfolding` noUnfolding
-
-       ; return (floats4, bndr', rhs4) }
+       ; return (floats4, rhs4) }
   where
     platform = targetPlatform (cpe_dynFlags env)
 
@@ -571,7 +582,6 @@ cpeJoinPair env bndr rhs
 {-
 Note [Arity and join points]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 Up to now, we've allowed a join point to have an arity greater than its join
 arity (minus type arguments), since this is what's useful for eta expansion.
 However, for code gen purposes, its arity must be exactly the number of value
@@ -601,9 +611,12 @@ cpeRhsE :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeRhs)
 
 cpeRhsE _env expr@(Type {})      = return (emptyFloats, expr)
 cpeRhsE _env expr@(Coercion {})  = return (emptyFloats, expr)
-cpeRhsE env (Lit (LitInteger i _))
+cpeRhsE env (Lit (LitNumber LitNumInteger i _))
     = cpeRhsE env (cvtLitInteger (cpe_dynFlags env) (getMkIntegerId env)
                    (cpe_integerSDataCon env) i)
+cpeRhsE env (Lit (LitNumber LitNumNatural i _))
+    = cpeRhsE env (cvtLitNatural (cpe_dynFlags env) (getMkNaturalId env)
+                   (cpe_naturalSDataCon env) i)
 cpeRhsE _env expr@(Lit {}) = return (emptyFloats, expr)
 cpeRhsE env expr@(Var {})  = cpeApp env expr
 cpeRhsE env expr@(App {}) = cpeApp env expr
@@ -642,9 +655,7 @@ cpeRhsE env expr@(Lam {})
 
 cpeRhsE env (Case scrut bndr ty alts)
   = do { (floats, scrut') <- cpeBody env scrut
-       ; let bndr1 = bndr `setIdUnfolding` evaldUnfolding
-            -- Record that the case binder is evaluated in the alternatives
-       ; (env', bndr2) <- cpCloneBndr env bndr1
+       ; (env', bndr2) <- cpCloneBndr env bndr
        ; let alts'
                  -- This flag is intended to aid in debugging strictness
                  -- analysis bugs. These are particularly nasty to chase down as
@@ -688,6 +699,24 @@ cvtLitInteger dflags mk_integer _ i
         bits = 31
         mask = 2 ^ bits - 1
 
+cvtLitNatural :: DynFlags -> Id -> Maybe DataCon -> Integer -> CoreExpr
+-- Here we convert a literal Natural to the low-level
+-- representation.
+-- See Note [Natural literals] in Literal
+cvtLitNatural dflags _ (Just sdatacon) i
+  | inWordRange dflags i -- Special case for small naturals
+    = mkConApp sdatacon [Lit (mkMachWord dflags i)]
+
+cvtLitNatural dflags mk_natural _ i
+    = mkApps (Var mk_natural) [words]
+  where words = mkListExpr wordTy (f i)
+        f 0 = []
+        f x = let low  = x .&. mask
+                  high = x `shiftR` bits
+              in mkConApp wordDataCon [Lit (mkMachWord dflags low)] : f high
+        bits = 32
+        mask = 2 ^ bits - 1
+
 -- ---------------------------------------------------------------------------
 --              CpeBody: produces a result satisfying CpeBody
 -- ---------------------------------------------------------------------------
@@ -823,6 +852,7 @@ cpeApp top_env expr
           in cpe_app env terminal (args' ++ args) (depth + depth' - 1)
     cpe_app env (Var f) [CpeApp _runtimeRep@Type{}, CpeApp _type@Type{}, CpeApp arg] 1
         | f `hasKey` runRWKey
+        -- See Note [runRW magic]
         -- Replace (runRW# f) by (f realWorld#), beta reducing if possible (this
         -- is why we return a CorePrepEnv as well)
         = case arg of
@@ -916,11 +946,51 @@ isLazyExpr (Tick _ e)              = isLazyExpr e
 isLazyExpr (Var f `App` _ `App` _) = f `hasKey` lazyIdKey
 isLazyExpr _                       = False
 
+{- Note [runRW magic]
+~~~~~~~~~~~~~~~~~~~~~
+Some definitions, for instance @runST@, must have careful control over float out
+of the bindings in their body. Consider this use of @runST@,
+
+    f x = runST ( \ s -> let (a, s')  = newArray# 100 [] s
+                             (_, s'') = fill_in_array_or_something a x s'
+                         in freezeArray# a s'' )
+
+If we inline @runST@, we'll get:
+
+    f x = let (a, s')  = newArray# 100 [] realWorld#{-NB-}
+              (_, s'') = fill_in_array_or_something a x s'
+          in freezeArray# a s''
+
+And now if we allow the @newArray#@ binding to float out to become a CAF,
+we end up with a result that is totally and utterly wrong:
+
+    f = let (a, s')  = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
+        in \ x ->
+            let (_, s'') = fill_in_array_or_something a x s'
+            in freezeArray# a s''
+
+All calls to @f@ will share a {\em single} array! Clearly this is nonsense and
+must be prevented.
+
+This is what @runRW#@ gives us: by being inlined extremely late in the
+optimization (right before lowering to STG, in CorePrep), we can ensure that
+no further floating will occur. This allows us to safely inline things like
+@runST@, which are otherwise needlessly expensive (see #10678 and #5916).
+
+'runRW' is defined (for historical reasons) in GHC.Magic, with a NOINLINE
+pragma.  It is levity-polymorphic.
+
+    runRW# :: forall (r1 :: RuntimeRep). (o :: TYPE r)
+           => (State# RealWorld -> (# State# RealWorld, o #))
+                              -> (# State# RealWorld, o #)
+
+It needs no special treatment in GHC except this special inlining here
+in CorePrep (and in ByteCodeGen).
+
 -- ---------------------------------------------------------------------------
 --      CpeArg: produces a result satisfying CpeArg
 -- ---------------------------------------------------------------------------
 
-{-
 Note [ANF-ising literal string arguments]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1040,16 +1110,26 @@ saturateDataToTag sat_expr
     eval_data2tag_arg other     -- Should not happen
         = pprPanic "eval_data2tag" (ppr other)
 
-{-
-Note [dataToTag magic]
-~~~~~~~~~~~~~~~~~~~~~~
-Horrid: we must ensure that the arg of data2TagOp is evaluated
-  (data2tag x) -->  (case x of y -> data2tag y)
+{- Note [dataToTag magic]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+We must ensure that the arg of data2TagOp is evaluated. So
+in general CorePrep does this transformation:
+  data2tag e   -->   case e of y -> data2tag y
 (yuk yuk) take into account the lambdas we've now introduced
 
 How might it not be evaluated?  Well, we might have floated it out
 of the scope of a `seq`, or dropped the `seq` altogether.
 
+We only do this if 'e' is not a WHNF.  But if it's a simple
+variable (common case) we need to know its evaluated-ness flag.
+Example:
+   data T = MkT !Bool
+   f v = case v of
+           MkT y -> dataToTag# y
+Here we don't want to generate an extra case on 'y', because it's
+already evaluated.  So we want to keep the evaluated-ness flag
+on y.  See Note [Preserve evaluated-ness in CorePrep].
+
 
 ************************************************************************
 *                                                                      *
@@ -1332,8 +1412,8 @@ canFloatFromNoCaf platform (Floats ok_to_spec fs) rhs
     -- the new binding is static. However it can't mention
     -- any non-static things or it would *already* be Caffy
     rhs_ok = rhsIsStatic platform (\_ -> False)
-                         (\i -> pprPanic "rhsIsStatic" (integer i))
-                         -- Integer literals should not show up
+                         (\_nt i -> pprPanic "rhsIsStatic" (integer i))
+                         -- Integer or Natural literals should not show up
 
 wantFloatNested :: RecFlag -> Demand -> Bool -> Floats -> CpeRhs -> Bool
 wantFloatNested is_rec dmd is_unlifted floats rhs
@@ -1442,7 +1522,9 @@ data CorePrepEnv
         --      see Note [lazyId magic], Note [Inlining in CorePrep]
         --      and Note [CorePrep inlines trivial CoreExpr not Id] (#12076)
         , cpe_mkIntegerId     :: Id
+        , cpe_mkNaturalId     :: Id
         , cpe_integerSDataCon :: Maybe DataCon
+        , cpe_naturalSDataCon :: Maybe DataCon
     }
 
 lookupMkIntegerName :: DynFlags -> HscEnv -> IO Id
@@ -1450,13 +1532,24 @@ lookupMkIntegerName dflags hsc_env
     = guardIntegerUse dflags $ liftM tyThingId $
       lookupGlobal hsc_env mkIntegerName
 
+lookupMkNaturalName :: DynFlags -> HscEnv -> IO Id
+lookupMkNaturalName dflags hsc_env
+    = guardNaturalUse dflags $ liftM tyThingId $
+      lookupGlobal hsc_env mkNaturalName
+
 lookupIntegerSDataConName :: DynFlags -> HscEnv -> IO (Maybe DataCon)
 lookupIntegerSDataConName dflags hsc_env = case cIntegerLibraryType of
     IntegerGMP -> guardIntegerUse dflags $ liftM (Just . tyThingDataCon) $
                   lookupGlobal hsc_env integerSDataConName
     IntegerSimple -> return Nothing
 
--- | Helper for 'lookupMkIntegerName' and 'lookupIntegerSDataConName'
+lookupNaturalSDataConName :: DynFlags -> HscEnv -> IO (Maybe DataCon)
+lookupNaturalSDataConName dflags hsc_env = case cIntegerLibraryType of
+    IntegerGMP -> guardNaturalUse dflags $ liftM (Just . tyThingDataCon) $
+                  lookupGlobal hsc_env naturalSDataConName
+    IntegerSimple -> return Nothing
+
+-- | Helper for 'lookupMkIntegerName', 'lookupIntegerSDataConName'
 guardIntegerUse :: DynFlags -> IO a -> IO a
 guardIntegerUse dflags act
   | thisPackage dflags == primUnitId
@@ -1465,15 +1558,33 @@ guardIntegerUse dflags act
   = return $ panic "Can't use Integer in integer-*"
   | otherwise = act
 
+-- | Helper for 'lookupMkNaturalName', 'lookupNaturalSDataConName'
+--
+-- Just like we can't use Integer literals in `integer-*`, we can't use Natural
+-- literals in `base`. If we do, we get interface loading error for GHC.Natural.
+guardNaturalUse :: DynFlags -> IO a -> IO a
+guardNaturalUse dflags act
+  | thisPackage dflags == primUnitId
+  = return $ panic "Can't use Natural in ghc-prim"
+  | thisPackage dflags == integerUnitId
+  = return $ panic "Can't use Natural in integer-*"
+  | thisPackage dflags == baseUnitId
+  = return $ panic "Can't use Natural in base"
+  | otherwise = act
+
 mkInitialCorePrepEnv :: DynFlags -> HscEnv -> IO CorePrepEnv
 mkInitialCorePrepEnv dflags hsc_env
     = do mkIntegerId <- lookupMkIntegerName dflags hsc_env
+         mkNaturalId <- lookupMkNaturalName dflags hsc_env
          integerSDataCon <- lookupIntegerSDataConName dflags hsc_env
+         naturalSDataCon <- lookupNaturalSDataConName dflags hsc_env
          return $ CPE {
                       cpe_dynFlags = dflags,
                       cpe_env = emptyVarEnv,
                       cpe_mkIntegerId = mkIntegerId,
-                      cpe_integerSDataCon = integerSDataCon
+                      cpe_mkNaturalId = mkNaturalId,
+                      cpe_integerSDataCon = integerSDataCon,
+                      cpe_naturalSDataCon = naturalSDataCon
                   }
 
 extendCorePrepEnv :: CorePrepEnv -> Id -> Id -> CorePrepEnv
@@ -1498,30 +1609,74 @@ lookupCorePrepEnv cpe id
 getMkIntegerId :: CorePrepEnv -> Id
 getMkIntegerId = cpe_mkIntegerId
 
+getMkNaturalId :: CorePrepEnv -> Id
+getMkNaturalId = cpe_mkNaturalId
+
 ------------------------------------------------------------------------------
 -- Cloning binders
 -- ---------------------------------------------------------------------------
 
-cpCloneBndrs :: CorePrepEnv -> [Var] -> UniqSM (CorePrepEnv, [Var])
+cpCloneBndrs :: CorePrepEnv -> [InVar] -> UniqSM (CorePrepEnv, [OutVar])
 cpCloneBndrs env bs = mapAccumLM cpCloneBndr env bs
 
-cpCloneBndr  :: CorePrepEnv -> Var -> UniqSM (CorePrepEnv, Var)
+cpCloneBndr  :: CorePrepEnv -> InVar -> UniqSM (CorePrepEnv, OutVar)
 cpCloneBndr env bndr
-  | isLocalId bndr, not (isCoVar bndr)
-  = do bndr' <- setVarUnique bndr <$> getUniqueM
-
-       -- We are going to OccAnal soon, so drop (now-useless) rules/unfoldings
-       -- so that we can drop more stuff as dead code.
-       -- See also Note [Dead code in CorePrep]
-       let bndr'' = bndr' `setIdUnfolding` noUnfolding
-                          `setIdSpecialisation` emptyRuleInfo
-       return (extendCorePrepEnv env bndr bndr'', bndr'')
-
-  | otherwise   -- Top level things, which we don't want
-                -- to clone, have become GlobalIds by now
-                -- And we don't clone tyvars, or coercion variables
+  | not (isId bndr)
   = return (env, bndr)
 
+  | otherwise
+  = do { bndr' <- clone_it bndr
+
+       -- Drop (now-useless) rules/unfoldings
+       -- See Note [Drop unfoldings and rules]
+       -- and Note [Preserve evaluated-ness in CorePrep]
+       ; let unfolding' = zapUnfolding (realIdUnfolding bndr)
+                          -- Simplifier will set the Id's unfolding
+
+             bndr'' = bndr' `setIdUnfolding`      unfolding'
+                            `setIdSpecialisation` emptyRuleInfo
+
+       ; return (extendCorePrepEnv env bndr bndr'', bndr'') }
+  where
+    clone_it bndr
+      | isLocalId bndr, not (isCoVar bndr)
+      = do { uniq <- getUniqueM; return (setVarUnique bndr uniq) }
+      | otherwise   -- Top level things, which we don't want
+                    -- to clone, have become GlobalIds by now
+                    -- And we don't clone tyvars, or coercion variables
+      = return bndr
+
+{- Note [Drop unfoldings and rules]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want to drop the unfolding/rules on every Id:
+
+  - We are now past interface-file generation, and in the
+    codegen pipeline, so we really don't need full unfoldings/rules
+
+  - The unfolding/rule may be keeping stuff alive that we'd like
+    to discard.  See  Note [Dead code in CorePrep]
+
+  - Getting rid of unnecessary unfoldings reduces heap usage
+
+  - We are changing uniques, so if we didn't discard unfoldings/rules
+    we'd have to substitute in them
+
+HOWEVER, we want to preserve evaluated-ness; see
+Note [Preserve evaluated-ness in CorePrep]
+
+Note [Preserve evaluated-ness in CorePrep]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want to preserve the evaluated-ness of each binder (via
+evaldUnfolding) for two reasons
+
+* In the code generator if we have
+     case x of y { Red -> e1; DEFAULT -> y }
+  we can return 'y' rather than entering it, if we know
+  it is evaluated (Trac #14626)
+
+* In the DataToTag magic (in CorePrep itself) we rely on
+  evaluated-ness.  See Note Note [dataToTag magic].
+-}
 
 ------------------------------------------------------------------------------
 -- Cloning ccall Ids; each must have a unique name,
@@ -1598,3 +1753,39 @@ wrapTicks (Floats flag floats0) expr =
                                              (ppr other)
         wrapBind t (NonRec binder rhs) = NonRec binder (mkTick t rhs)
         wrapBind t (Rec pairs)         = Rec (mapSnd (mkTick t) pairs)
+
+------------------------------------------------------------------------------
+-- Collecting cost centres
+-- ---------------------------------------------------------------------------
+
+-- | Collect cost centres defined in the current module, including those in
+-- unfoldings.
+collectCostCentres :: Module -> CoreProgram -> S.Set CostCentre
+collectCostCentres mod_name
+  = foldl' go_bind S.empty
+  where
+    go cs e = case e of
+      Var{} -> cs
+      Lit{} -> cs
+      App e1 e2 -> go (go cs e1) e2
+      Lam _ e -> go cs e
+      Let b e -> go (go_bind cs b) e
+      Case scrt _ _ alts -> go_alts (go cs scrt) alts
+      Cast e _ -> go cs e
+      Tick (ProfNote cc _ _) e ->
+        go (if ccFromThisModule cc mod_name then S.insert cc cs else cs) e
+      Tick _ e -> go cs e
+      Type{} -> cs
+      Coercion{} -> cs
+
+    go_alts = foldl' (\cs (_con, _bndrs, e) -> go cs e)
+
+    go_bind :: S.Set CostCentre -> CoreBind -> S.Set CostCentre
+    go_bind cs (NonRec b e) =
+      go (maybe cs (go cs) (get_unf b)) e
+    go_bind cs (Rec bs) =
+      foldl' (\cs' (b, e) -> go (maybe cs' (go cs') (get_unf b)) e) cs bs
+
+    -- Unfoldings may have cost centres that in the original definion are
+    -- optimized away, see #5889.
+    get_unf = maybeUnfoldingTemplate . realIdUnfolding
diff --git a/compiler/coreSyn/CoreSeq.hs b/compiler/coreSyn/CoreSeq.hs
index d426bd3581..7de8923a71 100644
--- a/compiler/coreSyn/CoreSeq.hs
+++ b/compiler/coreSyn/CoreSeq.hs
@@ -10,6 +10,8 @@ module CoreSeq (
         megaSeqIdInfo, seqRuleInfo, seqBinds,
     ) where
 
+import GhcPrelude
+
 import CoreSyn
 import IdInfo
 import Demand( seqDemand, seqStrictSig )
diff --git a/compiler/coreSyn/CoreStats.hs b/compiler/coreSyn/CoreStats.hs
index cb73d147a8..826ffe171e 100644
--- a/compiler/coreSyn/CoreStats.hs
+++ b/compiler/coreSyn/CoreStats.hs
@@ -11,6 +11,8 @@ module CoreStats (
         CoreStats(..), coreBindsStats, exprStats,
     ) where
 
+import GhcPrelude
+
 import BasicTypes
 import CoreSyn
 import Outputable
diff --git a/compiler/coreSyn/CoreSubst.hs b/compiler/coreSyn/CoreSubst.hs
index 919d9e8cd0..2df3fb1b52 100644
--- a/compiler/coreSyn/CoreSubst.hs
+++ b/compiler/coreSyn/CoreSubst.hs
@@ -36,6 +36,8 @@ module CoreSubst (
 #include "HsVersions.h"
 
 
+import GhcPrelude
+
 import CoreSyn
 import CoreFVs
 import CoreSeq
@@ -77,19 +79,9 @@ import Data.List
 --
 -- Some invariants apply to how you use the substitution:
 --
--- 1. #in_scope_invariant# The in-scope set contains at least those 'Id's and 'TyVar's that will be in scope /after/
--- applying the substitution to a term. Precisely, the in-scope set must be a superset of the free vars of the
--- substitution range that might possibly clash with locally-bound variables in the thing being substituted in.
---
--- 2. #apply_once# You may apply the substitution only /once/
---
--- There are various ways of setting up the in-scope set such that the first of these invariants hold:
---
--- * Arrange that the in-scope set really is all the things in scope
---
--- * Arrange that it's the free vars of the range of the substitution
+-- 1. Note [The substitution invariant] in TyCoRep
 --
--- * Make it empty, if you know that all the free vars of the substitution are fresh, and hence can't possibly clash
+-- 2. Note [Substitutions apply only once] in TyCoRep
 data Subst
   = Subst InScopeSet  -- Variables in in scope (both Ids and TyVars) /after/
                       -- applying the substitution
@@ -97,7 +89,7 @@ data Subst
           TvSubstEnv  -- Substitution from TyVars to Types
           CvSubstEnv  -- Substitution from CoVars to Coercions
 
-        -- INVARIANT 1: See #in_scope_invariant#
+        -- INVARIANT 1: See TyCoRep Note [The substitution invariant]
         -- This is what lets us deal with name capture properly
         -- It's a hard invariant to check...
         --
@@ -179,7 +171,7 @@ mkEmptySubst in_scope = Subst in_scope emptyVarEnv emptyVarEnv emptyVarEnv
 mkSubst :: InScopeSet -> TvSubstEnv -> CvSubstEnv -> IdSubstEnv -> Subst
 mkSubst in_scope tvs cvs ids = Subst in_scope ids tvs cvs
 
--- | Find the in-scope set: see "CoreSubst#in_scope_invariant"
+-- | Find the in-scope set: see TyCoRep Note [The substitution invariant]
 substInScope :: Subst -> InScopeSet
 substInScope (Subst in_scope _ _ _) = in_scope
 
@@ -189,7 +181,8 @@ zapSubstEnv :: Subst -> Subst
 zapSubstEnv (Subst in_scope _ _ _) = Subst in_scope emptyVarEnv emptyVarEnv emptyVarEnv
 
 -- | Add a substitution for an 'Id' to the 'Subst': you must ensure that the in-scope set is
--- such that the "CoreSubst#in_scope_invariant" is true after extending the substitution like this
+-- such that TyCoRep Note [The substitution invariant]
+-- holds after extending the substitution like this
 extendIdSubst :: Subst -> Id -> CoreExpr -> Subst
 -- ToDo: add an ASSERT that fvs(subst-result) is already in the in-scope set
 extendIdSubst (Subst in_scope ids tvs cvs) v r
@@ -205,8 +198,8 @@ extendIdSubstList (Subst in_scope ids tvs cvs) prs
 -- | Add a substitution for a 'TyVar' to the 'Subst'
 -- The 'TyVar' *must* be a real TyVar, and not a CoVar
 -- You must ensure that the in-scope set is such that
--- the "CoreSubst#in_scope_invariant" is true after extending
--- the substitution like this.
+-- TyCoRep Note [The substitution invariant] holds
+-- after extending the substitution like this.
 extendTvSubst :: Subst -> TyVar -> Type -> Subst
 extendTvSubst (Subst in_scope ids tvs cvs) tv ty
   = ASSERT( isTyVar tv )
@@ -219,8 +212,10 @@ extendTvSubstList subst vrs
   where
     extend subst (v, r) = extendTvSubst subst v r
 
--- | Add a substitution from a 'CoVar' to a 'Coercion' to the 'Subst': you must ensure that the in-scope set is
--- such that the "CoreSubst#in_scope_invariant" is true after extending the substitution like this
+-- | Add a substitution from a 'CoVar' to a 'Coercion' to the 'Subst':
+-- you must ensure that the in-scope set satisfies
+-- TyCoRep Note [The substitution invariant]
+-- after extending the substitution like this
 extendCvSubst :: Subst -> CoVar -> Coercion -> Subst
 extendCvSubst (Subst in_scope ids tvs cvs) v r
   = ASSERT( isCoVar v )
@@ -343,7 +338,8 @@ instance Outputable Subst where
 -}
 
 -- | Apply a substitution to an entire 'CoreExpr'. Remember, you may only
--- apply the substitution /once/: see "CoreSubst#apply_once"
+-- apply the substitution /once/:
+-- see Note [Substitutions apply only once] in TyCoRep
 --
 -- Do *not* attempt to short-cut in the case of an empty substitution!
 -- See Note [Extending the Subst]
diff --git a/compiler/coreSyn/CoreSyn.hs b/compiler/coreSyn/CoreSyn.hs
index 99478d2b66..aa27d7a7fb 100644
--- a/compiler/coreSyn/CoreSyn.hs
+++ b/compiler/coreSyn/CoreSyn.hs
@@ -5,6 +5,7 @@
 
 {-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-}
 {-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE BangPatterns #-}
 
 -- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection
 module CoreSyn (
@@ -18,7 +19,7 @@ module CoreSyn (
         InId, InBind, InExpr, InAlt, InArg, InType, InKind,
                InBndr, InVar, InCoercion, InTyVar, InCoVar,
         OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind,
-               OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar,
+               OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar, MOutCoercion,
 
         -- ** 'Expr' construction
         mkLet, mkLets, mkLetNonRec, mkLetRec, mkLams,
@@ -40,12 +41,12 @@ module CoreSyn (
         bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts,
         collectBinders, collectTyBinders, collectTyAndValBinders,
         collectNBinders,
-        collectArgs, collectArgsTicks, flattenBinds,
+        collectArgs, stripNArgs, collectArgsTicks, flattenBinds,
 
         exprToType, exprToCoercion_maybe,
         applyTypeToArg,
 
-        isValArg, isTypeArg, isTyCoArg, valArgCount, valBndrCount,
+        isValArg, isTypeArg, isCoArg, isTyCoArg, valArgCount, valBndrCount,
         isRuntimeArg, isRuntimeVar,
 
         -- * Tick-related functions
@@ -77,7 +78,7 @@ module CoreSyn (
         collectAnnArgs, collectAnnArgsTicks,
 
         -- ** Operations on annotations
-        deAnnotate, deAnnotate', deAnnAlt,
+        deAnnotate, deAnnotate', deAnnAlt, deAnnBind,
         collectAnnBndrs, collectNAnnBndrs,
 
         -- * Orphanhood
@@ -92,13 +93,12 @@ module CoreSyn (
         ruleArity, ruleName, ruleIdName, ruleActivation,
         setRuleIdName, ruleModule,
         isBuiltinRule, isLocalRule, isAutoRule,
-
-        -- * Core vectorisation declarations data type
-        CoreVect(..)
     ) where
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CostCentre
 import VarEnv( InScopeSet )
 import Var
@@ -110,7 +110,6 @@ import NameEnv( NameEnv, emptyNameEnv )
 import Literal
 import DataCon
 import Module
-import TyCon
 import BasicTypes
 import DynFlags
 import Outputable
@@ -174,6 +173,7 @@ These data types are the heart of the compiler
 -- The language consists of the following elements:
 --
 -- *  Variables
+--    See Note [Variable occurrences in Core]
 --
 -- *  Primitive literals
 --
@@ -188,29 +188,10 @@ These data types are the heart of the compiler
 --    this corresponds to allocating a thunk for the things
 --    bound and then executing the sub-expression.
 --
---    #top_level_invariant#
---    #letrec_invariant#
---
---    The right hand sides of all top-level and recursive @let@s
---    /must/ be of lifted type (see "Type#type_classification" for
---    the meaning of /lifted/ vs. /unlifted/). There is one exception
---    to this rule, top-level @let@s are allowed to bind primitive
---    string literals, see Note [CoreSyn top-level string literals].
---
+--    See Note [CoreSyn letrec invariant]
 --    See Note [CoreSyn let/app invariant]
 --    See Note [Levity polymorphism invariants]
---
---    #type_let#
---    We allow a /non-recursive/ let to bind a type variable, thus:
---
---    > Let (NonRec tv (Type ty)) body
---
---    This can be very convenient for postponing type substitutions until
---    the next run of the simplifier.
---
---    At the moment, the rest of the compiler only deals with type-let
---    in a Let expression, rather than at top level.  We may want to revist
---    this choice.
+--    See Note [CoreSyn type and coercion invariant]
 --
 -- *  Case expression. Operationally this corresponds to evaluating
 --    the scrutinee (expression examined) to weak head normal form
@@ -311,16 +292,17 @@ data AltCon
 -- This instance is a bit shady. It can only be used to compare AltCons for
 -- a single type constructor. Fortunately, it seems quite unlikely that we'll
 -- ever need to compare AltCons for different type constructors.
+-- The instance adheres to the order described in [CoreSyn case invariants]
 instance Ord AltCon where
   compare (DataAlt con1) (DataAlt con2) =
     ASSERT( dataConTyCon con1 == dataConTyCon con2 )
     compare (dataConTag con1) (dataConTag con2)
-  compare (DataAlt _) _ = LT
-  compare _ (DataAlt _) = GT
+  compare (DataAlt _) _ = GT
+  compare _ (DataAlt _) = LT
   compare (LitAlt l1) (LitAlt l2) = compare l1 l2
-  compare (LitAlt _) DEFAULT = LT
+  compare (LitAlt _) DEFAULT = GT
   compare DEFAULT DEFAULT = EQ
-  compare DEFAULT _ = GT
+  compare DEFAULT _ = LT
 
 -- | Binding, used for top level bindings in a module and local bindings in a @let@.
 
@@ -344,7 +326,7 @@ In particular, scrutinee variables `x` in expressions of the form
 "wild_". These "wild" variables may appear in the body of the
 case-expression, and further, may be shadowed within the body.
 
-So the Unique in an Var is not really unique at all.  Still, it's very
+So the Unique in a Var is not really unique at all.  Still, it's very
 useful to give a constant-time equality/ordering for Vars, and to give
 a key that can be used to make sets of Vars (VarSet), or mappings from
 Vars to other things (VarEnv).   Moreover, if you do want to eliminate
@@ -371,13 +353,25 @@ PrelRules for the rationale for this restriction.
 
 -------------------------- CoreSyn INVARIANTS ---------------------------
 
-Note [CoreSyn top-level invariant]
+Note [Variable occurrences in Core]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-See #toplevel_invariant#
+Variable /occurrences/ are never CoVars, though /bindings/ can be.
+All CoVars appear in Coercions.
+
+For example
+  \(c :: Age~#Int) (d::Int). d |> (sym c)
+Here 'c' is a CoVar, which is lambda-bound, but it /occurs/ in
+a Coercion, (sym c).
 
 Note [CoreSyn letrec invariant]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-See #letrec_invariant#
+The right hand sides of all top-level and recursive @let@s
+/must/ be of lifted type (see "Type#type_classification" for
+the meaning of /lifted/ vs. /unlifted/).
+
+There is one exception to this rule, top-level @let@s are
+allowed to bind primitive string literals: see
+Note [CoreSyn top-level string literals].
 
 Note [CoreSyn top-level string literals]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -398,10 +392,10 @@ The solution is simply to allow top-level unlifted binders. We can't allow
 arbitrary unlifted expression at the top-level though, unlifted binders cannot
 be thunks, so we just allow string literals.
 
-It is important to note that top-level primitive string literals cannot be
-wrapped in Ticks, as is otherwise done with lifted bindings. CoreToStg expects
-to see just a plain (Lit (MachStr ...)) expression on the RHS of primitive
-string bindings; anything else and things break. CoreLint checks this invariant.
+We allow the top-level primitive string literals to be wrapped in Ticks
+in the same way they can be wrapped when nested in an expression.
+CoreToSTG currently discards Ticks around top-level primitive string literals.
+See Trac #14779.
 
 Also see Note [Compilation plan for top-level string literals].
 
@@ -411,7 +405,7 @@ Here is a summary on how top-level string literals are handled by various
 parts of the compilation pipeline.
 
 * In the source language, there is no way to bind a primitive string literal
-  at the top leve.
+  at the top level.
 
 * In Core, we have a special rule that permits top-level Addr# bindings. See
   Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
@@ -451,6 +445,27 @@ which will generate a @case@ if necessary
 The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in
 coreSyn/MkCore.
 
+Note [CoreSyn type and coercion invariant]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We allow a /non-recursive/, /non-top-level/ let to bind type and
+coercion variables.  These can be very convenient for postponing type
+substitutions until the next run of the simplifier.
+
+* A type variable binding must have a RHS of (Type ty)
+
+* A coercion variable binding must have a RHS of (Coercion co)
+
+  It is possible to have terms that return a coercion, but we use
+  case-binding for those; e.g.
+     case (eq_sel d) of (co :: a ~# b) -> blah
+  where eq_sel :: (a~b) -> (a~#b)
+
+  Or even even
+      case (df @Int) of (co :: a ~# b) -> blah
+  Which is very exotic, and I think never encountered; but see
+  Note [Equality superclasses in quantified constraints]
+  in TcCanonical
+
 Note [CoreSyn case invariants]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 See #case_invariants#
@@ -703,33 +718,64 @@ polymorphic in its return type. That is, if its type is
   forall a1 ... ak. t1 -> ... -> tn -> r
 
 where its join arity is k+n, none of the type parameters ai may occur free in r.
-The most direct explanation is that given
 
-  join j @a1 ... @ak x1 ... xn = e1 in e2
+In some way, this falls out of the fact that given
+
+  join
+     j @a1 ... @ak x1 ... xn = e1
+  in e2
+
+then all calls to `j` are in tail-call positions of `e`, and expressions in
+tail-call positions in `e` have the same type as `e`.
+Therefore the type of `e1` -- the return type of the join point -- must be the
+same as the type of e2.
+Since the type variables aren't bound in `e2`, its type can't include them, and
+thus neither can the type of `e1`.
+
+This unfortunately prevents the `go` in the following code from being a
+join-point:
 
-our typing rules require `e1` and `e2` to have the same type. Therefore the type
-of `e1`---the return type of the join point---must be the same as the type of
-e2. Since the type variables aren't bound in `e2`, its type can't include them,
-and thus neither can the type of `e1`.
+  iter :: forall a. Int -> (a -> a) -> a -> a
+  iter @a n f x = go @a n f x
+    where
+      go :: forall a. Int -> (a -> a) -> a -> a
+      go @a 0 _ x = x
+      go @a n f x = go @a (n-1) f (f x)
 
-There's a deeper explanation in terms of the sequent calculus in Section 5.3 of
-a previous paper:
+In this case, a static argument transformation would fix that (see
+ticket #14620):
 
-  Paul Downen, Luke Maurer, Zena Ariola, and Simon Peyton Jones. "Sequent
-  calculus as a compiler intermediate language." ICFP'16.
+  iter :: forall a. Int -> (a -> a) -> a -> a
+  iter @a n f x = go' @a n f x
+    where
+      go' :: Int -> (a -> a) -> a -> a
+      go' 0 _ x = x
+      go' n f x = go' (n-1) f (f x)
 
-  https://www.microsoft.com/en-us/research/wp-content/uploads/2016/04/sequent-calculus-icfp16.pdf
+In general, loopification could be employed to do that (see #14068.)
 
-The quick version: Consider the CPS term (the paper uses the sequent calculus,
-but we can translate readily):
+Can we simply drop the requirement, and allow `go` to be a join-point? We
+could, and it would work. But we could not longer apply the case-of-join-point
+transformation universally. This transformation would do:
 
-  \k -> join j @a1 ... @ak x1 ... xn = e1 k in e2 k
+  case (join go @a n f x = case n of 0 -> x
+                                     n -> go @a (n-1) f (f x)
+        in go @Bool n neg True) of
+    True -> e1; False -> e2
 
-Since `j` is a join point, it doesn't bind a continuation variable but reuses
-the variable `k` from the context. But the parameters `ai` are not in `k`'s
-scope, and `k`'s type determines the return type of `j`; thus the `ai`s don't
-appear in the return type of `j`. (Also, since `e1` and `e2` are passed the same
-continuation, they must have the same type; hence the direct explanation above.)
+ ===>
+
+  join go @a n f x = case n of 0 -> case x of True -> e1; False -> e2
+                          n -> go @a (n-1) f (f x)
+  in go @Bool n neg True
+
+but that is ill-typed, as `x` is type `a`, not `Bool`.
+
+
+This also justifies why we do not consider the `e` in `e |> co` to be in
+tail position: A cast changes the type, but the type must be the same. But
+operationally, casts are vacuous, so this is a bit unfortunate! See #14610 for
+ideas how to fix this.
 
 ************************************************************************
 *                                                                      *
@@ -759,6 +805,7 @@ type OutBind     = CoreBind
 type OutExpr     = CoreExpr
 type OutAlt      = CoreAlt
 type OutArg      = CoreArg
+type MOutCoercion = MCoercion
 
 
 {- *********************************************************************
@@ -856,7 +903,7 @@ data TickishScoping =
 
     -- | Soft scoping: We want all code that is covered to stay
     -- covered.  Note that this scope type does not forbid
-    -- transformations from happening, as as long as all results of
+    -- transformations from happening, as long as all results of
     -- the transformations are still covered by this tick or a copy of
     -- it. For example
     --
@@ -1270,23 +1317,6 @@ setRuleIdName nm ru = ru { ru_fn = nm }
 {-
 ************************************************************************
 *                                                                      *
-\subsection{Vectorisation declarations}
-*                                                                      *
-************************************************************************
-
-Representation of desugared vectorisation declarations that are fed to the vectoriser (via
-'ModGuts').
--}
-
-data CoreVect = Vect      Id   CoreExpr
-              | NoVect    Id
-              | VectType  Bool TyCon (Maybe TyCon)
-              | VectClass TyCon                     -- class tycon
-              | VectInst  Id                        -- instance dfun (always SCALAR)  !!!FIXME: should be superfluous now
-
-{-
-************************************************************************
-*                                                                      *
                 Unfoldings
 *                                                                      *
 ************************************************************************
@@ -1800,12 +1830,12 @@ mkVarApps :: Expr b -> [Var] -> Expr b
 -- use 'MkCore.mkCoreConApps' if possible
 mkConApp      :: DataCon -> [Arg b] -> Expr b
 
-mkApps    f args = foldl App                       f args
-mkCoApps  f args = foldl (\ e a -> App e (Coercion a)) f args
-mkVarApps f vars = foldl (\ e a -> App e (varToCoreExpr a)) f vars
+mkApps    f args = foldl' App                       f args
+mkCoApps  f args = foldl' (\ e a -> App e (Coercion a)) f args
+mkVarApps f vars = foldl' (\ e a -> App e (varToCoreExpr a)) f vars
 mkConApp con args = mkApps (Var (dataConWorkId con)) args
 
-mkTyApps  f args = foldl (\ e a -> App e (mkTyArg a)) f args
+mkTyApps  f args = foldl' (\ e a -> App e (mkTyArg a)) f args
 
 mkConApp2 :: DataCon -> [Type] -> [Var] -> Expr b
 mkConApp2 con tys arg_ids = Var (dataConWorkId con)
@@ -2021,7 +2051,7 @@ collectNBinders orig_n orig_expr
     go n bs (Lam b e) = go (n-1) (b:bs) e
     go _ _  _         = pprPanic "collectNBinders" $ int orig_n
 
--- | Takes a nested application expression and returns the the function
+-- | Takes a nested application expression and returns the function
 -- being applied and the arguments to which it is applied
 collectArgs :: Expr b -> (Expr b, [Arg b])
 collectArgs expr
@@ -2030,6 +2060,16 @@ collectArgs expr
     go (App f a) as = go f (a:as)
     go e         as = (e, as)
 
+-- | Attempt to remove the last N arguments of a function call.
+-- Strip off any ticks or coercions encountered along the way and any
+-- at the end.
+stripNArgs :: Word -> Expr a -> Maybe (Expr a)
+stripNArgs !n (Tick _ e) = stripNArgs n e
+stripNArgs n (Cast f _) = stripNArgs n f
+stripNArgs 0 e = Just e
+stripNArgs n (App f _) = stripNArgs (n - 1) f
+stripNArgs _ _ = Nothing
+
 -- | Like @collectArgs@, but also collects looks through floatable
 -- ticks if it means that we can find more arguments.
 collectArgsTicks :: (Tickish Id -> Bool) -> Expr b
@@ -2077,6 +2117,12 @@ isTyCoArg (Type {})     = True
 isTyCoArg (Coercion {}) = True
 isTyCoArg _             = False
 
+-- | Returns @True@ iff the expression is a 'Coercion'
+-- expression at its top level
+isCoArg :: Expr b -> Bool
+isCoArg (Coercion {}) = True
+isCoArg _             = False
+
 -- | Returns @True@ iff the expression is a 'Type' expression at its
 -- top level.  Note this does NOT include 'Coercion's.
 isTypeArg :: Expr b -> Bool
@@ -2124,7 +2170,7 @@ data AnnBind bndr annot
   = AnnNonRec bndr (AnnExpr bndr annot)
   | AnnRec    [(bndr, AnnExpr bndr annot)]
 
--- | Takes a nested application expression and returns the the function
+-- | Takes a nested application expression and returns the function
 -- being applied and the arguments to which it is applied
 collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a])
 collectAnnArgs expr
@@ -2158,16 +2204,16 @@ deAnnotate' (AnnTick tick body)   = Tick tick (deAnnotate body)
 
 deAnnotate' (AnnLet bind body)
   = Let (deAnnBind bind) (deAnnotate body)
-  where
-    deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs)
-    deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs]
-
 deAnnotate' (AnnCase scrut v t alts)
   = Case (deAnnotate scrut) v t (map deAnnAlt alts)
 
 deAnnAlt :: AnnAlt bndr annot -> Alt bndr
 deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs)
 
+deAnnBind  :: AnnBind b annot -> Bind b
+deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs)
+deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs]
+
 -- | As 'collectBinders' but for 'AnnExpr' rather than 'Expr'
 collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot)
 collectAnnBndrs e
diff --git a/compiler/coreSyn/CoreTidy.hs b/compiler/coreSyn/CoreTidy.hs
index 89ce692422..be5e6c1619 100644
--- a/compiler/coreSyn/CoreTidy.hs
+++ b/compiler/coreSyn/CoreTidy.hs
@@ -14,13 +14,15 @@ module CoreTidy (
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CoreSyn
 import CoreSeq ( seqUnfolding )
 import CoreArity
 import Id
 import IdInfo
 import Demand ( zapUsageEnvSig )
-import Type( tidyType, tidyTyCoVarBndr )
+import Type( tidyType, tidyVarBndr )
 import Coercion( tidyCo )
 import Var
 import VarEnv
@@ -128,7 +130,7 @@ tidyVarOcc (_, var_env) v = lookupVarEnv var_env v `orElse` v
 -- tidyBndr is used for lambda and case binders
 tidyBndr :: TidyEnv -> Var -> (TidyEnv, Var)
 tidyBndr env var
-  | isTyCoVar var = tidyTyCoVarBndr env var
+  | isTyCoVar var = tidyVarBndr env var
   | otherwise     = tidyIdBndr env var
 
 tidyBndrs :: TidyEnv -> [Var] -> (TidyEnv, [Var])
@@ -157,9 +159,7 @@ tidyIdBndr env@(tidy_env, var_env) id
                                  `setOneShotInfo` oneShotInfo old_info
         old_info = idInfo id
         old_unf  = unfoldingInfo old_info
-        new_unf | isEvaldUnfolding old_unf = evaldUnfolding
-                | otherwise                = noUnfolding
-          -- See Note [Preserve evaluatedness]
+        new_unf  = zapUnfolding old_unf  -- See Note [Preserve evaluatedness]
     in
     ((tidy_env', var_env'), id')
    }
@@ -205,11 +205,10 @@ tidyLetBndr rec_tidy_env env@(tidy_env, var_env) (id,rhs)
                     `setInlinePragInfo` inlinePragInfo old_info
                     `setUnfoldingInfo`  new_unf
 
+        old_unf = unfoldingInfo old_info
         new_unf | isStableUnfolding old_unf = tidyUnfolding rec_tidy_env old_unf old_unf
-                | isEvaldUnfolding  old_unf = evaldUnfolding
+                | otherwise                 = zapUnfolding old_unf
                                               -- See Note [Preserve evaluatedness]
-                | otherwise                 = noUnfolding
-        old_unf = unfoldingInfo old_info
     in
     ((tidy_env', var_env'), id') }
 
diff --git a/compiler/coreSyn/CoreUnfold.hs b/compiler/coreSyn/CoreUnfold.hs
index a104cd693f..adb399ea6f 100644
--- a/compiler/coreSyn/CoreUnfold.hs
+++ b/compiler/coreSyn/CoreUnfold.hs
@@ -42,6 +42,8 @@ module CoreUnfold (
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import DynFlags
 import CoreSyn
 import PprCore          ()      -- Instances
@@ -63,8 +65,10 @@ import Bag
 import Util
 import Outputable
 import ForeignCall
+import Name
 
 import qualified Data.ByteString as BS
+import Data.List
 
 {-
 ************************************************************************
@@ -81,7 +85,7 @@ mkTopUnfolding dflags is_bottoming rhs
 mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding
 -- For implicit Ids, do a tiny bit of optimising first
 mkImplicitUnfolding dflags expr
-  = mkTopUnfolding dflags False (simpleOptExpr expr)
+  = mkTopUnfolding dflags False (simpleOptExpr dflags expr)
 
 -- Note [Top-level flag on inline rules]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -100,17 +104,17 @@ mkDFunUnfolding bndrs con ops
                   , df_args = map occurAnalyseExpr ops }
                   -- See Note [Occurrence analysis of unfoldings]
 
-mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
-mkWwInlineRule expr arity
+mkWwInlineRule :: DynFlags -> CoreExpr -> Arity -> Unfolding
+mkWwInlineRule dflags expr arity
   = mkCoreUnfolding InlineStable True
-                   (simpleOptExpr expr)
+                   (simpleOptExpr dflags expr)
                    (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk
                             , ug_boring_ok = boringCxtNotOk })
 
 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
 mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
   = mkCoreUnfolding InlineCompulsory True
-                    (simpleOptExpr expr)
+                    (simpleOptExpr unsafeGlobalDynFlags expr)
                     (UnfWhen { ug_arity = 0    -- Arity of unfolding doesn't matter
                              , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk })
 
@@ -122,7 +126,7 @@ mkWorkerUnfolding dflags work_fn
   | isStableSource src
   = mkCoreUnfolding src top_lvl new_tmpl guidance
   where
-    new_tmpl = simpleOptExpr (work_fn tmpl)
+    new_tmpl = simpleOptExpr dflags (work_fn tmpl)
     guidance = calcUnfoldingGuidance dflags False new_tmpl
 
 mkWorkerUnfolding _ _ _ = noUnfolding
@@ -137,7 +141,7 @@ mkInlineUnfolding expr
                     True         -- Note [Top-level flag on inline rules]
                     expr' guide
   where
-    expr' = simpleOptExpr expr
+    expr' = simpleOptExpr unsafeGlobalDynFlags expr
     guide = UnfWhen { ug_arity = manifestArity expr'
                     , ug_unsat_ok = unSaturatedOk
                     , ug_boring_ok = boring_ok }
@@ -151,24 +155,28 @@ mkInlineUnfoldingWithArity arity expr
                     True         -- Note [Top-level flag on inline rules]
                     expr' guide
   where
-    expr' = simpleOptExpr expr
+    expr' = simpleOptExpr unsafeGlobalDynFlags expr
     guide = UnfWhen { ug_arity = arity
                     , ug_unsat_ok = needSaturated
                     , ug_boring_ok = boring_ok }
-    boring_ok = inlineBoringOk expr'
+    -- See Note [INLINE pragmas and boring contexts] as to why we need to look
+    -- at the arity here.
+    boring_ok | arity == 0 = True
+              | otherwise  = inlineBoringOk expr'
 
 mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding
 mkInlinableUnfolding dflags expr
   = mkUnfolding dflags InlineStable False False expr'
   where
-    expr' = simpleOptExpr expr
+    expr' = simpleOptExpr dflags expr
 
-specUnfolding :: [Var] -> (CoreExpr -> CoreExpr) -> Arity -> Unfolding -> Unfolding
+specUnfolding :: DynFlags -> [Var] -> (CoreExpr -> CoreExpr) -> Arity
+              -> Unfolding -> Unfolding
 -- See Note [Specialising unfoldings]
 -- specUnfolding spec_bndrs spec_app arity_decrease unf
 --   = \spec_bndrs. spec_app( unf )
 --
-specUnfolding spec_bndrs spec_app arity_decrease
+specUnfolding dflags spec_bndrs spec_app arity_decrease
               df@(DFunUnfolding { df_bndrs = old_bndrs, df_con = con, df_args = args })
   = ASSERT2( arity_decrease == count isId old_bndrs - count isId spec_bndrs, ppr df )
     mkDFunUnfolding spec_bndrs con (map spec_arg args)
@@ -180,11 +188,11 @@ specUnfolding spec_bndrs spec_app arity_decrease
       --       \new_bndrs. MkD (spec_app(\old_bndrs. <op1>)) ... ditto <opn>
       -- The ASSERT checks the value part of that
   where
-    spec_arg arg = simpleOptExpr (spec_app (mkLams old_bndrs arg))
+    spec_arg arg = simpleOptExpr dflags (spec_app (mkLams old_bndrs arg))
                    -- The beta-redexes created by spec_app will be
                    -- simplified away by simplOptExpr
 
-specUnfolding spec_bndrs spec_app arity_decrease
+specUnfolding dflags spec_bndrs spec_app arity_decrease
               (CoreUnfolding { uf_src = src, uf_tmpl = tmpl
                              , uf_is_top = top_lvl
                              , uf_guidance = old_guidance })
@@ -195,13 +203,13 @@ specUnfolding spec_bndrs spec_app arity_decrease
  = let guidance = UnfWhen { ug_arity     = old_arity - arity_decrease
                           , ug_unsat_ok  = unsat_ok
                           , ug_boring_ok = boring_ok }
-       new_tmpl = simpleOptExpr (mkLams spec_bndrs (spec_app tmpl))
+       new_tmpl = simpleOptExpr dflags (mkLams spec_bndrs (spec_app tmpl))
                    -- The beta-redexes created by spec_app will be
                    -- simplified away by simplOptExpr
 
    in mkCoreUnfolding src top_lvl new_tmpl guidance
 
-specUnfolding _ _ _ _ = noUnfolding
+specUnfolding _ _ _ _ _ = noUnfolding
 
 {- Note [Specialising unfoldings]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -231,6 +239,72 @@ specUnfolding to specialise its unfolding.  Some important points:
         we keep it (so the specialised thing too will always inline)
      if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs
         (which arises from INLINABLE), we discard it
+
+Note [Honour INLINE on 0-ary bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+
+   x = <expensive>
+   {-# INLINE x #-}
+
+   f y = ...x...
+
+The semantics of an INLINE pragma is
+
+  inline x at every call site, provided it is saturated;
+  that is, applied to at least as many arguments as appear
+  on the LHS of the Haskell source definition.
+
+(This soure-code-derived arity is stored in the `ug_arity` field of
+the `UnfoldingGuidance`.)
+
+In the example, x's ug_arity is 0, so we should inline it at every use
+site.  It's rare to have such an INLINE pragma (usually INLINE Is on
+functions), but it's occasionally very important (Trac #15578, #15519).
+In #15519 we had something like
+   x = case (g a b) of I# r -> T r
+   {-# INLINE x #-}
+   f y = ...(h x)....
+
+where h is strict.  So we got
+   f y = ...(case g a b of I# r -> h (T r))...
+
+and that in turn allowed SpecConstr to ramp up performance.
+
+How do we deliver on this?  By adjusting the ug_boring_ok
+flag in mkInlineUnfoldingWithArity; see
+Note [INLINE pragmas and boring contexts]
+
+NB: there is a real risk that full laziness will float it right back
+out again. Consider again
+  x = factorial 200
+  {-# INLINE x #-}
+  f y = ...x...
+
+After inlining we get
+  f y = ...(factorial 200)...
+
+but it's entirely possible that full laziness will do
+  lvl23 = factorial 200
+  f y = ...lvl23...
+
+That's a problem for another day.
+
+Note [INLINE pragmas and boring contexts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+An INLINE pragma uses mkInlineUnfoldingWithArity to build the
+unfolding.  That sets the ug_boring_ok flag to False if the function
+is not tiny (inlineBorkingOK), so that even INLINE functions are not
+inlined in an utterly boring context.  E.g.
+     \x y. Just (f y x)
+Nothing is gained by inlining f here, even if it has an INLINE
+pragma.
+
+But for 0-ary bindings, we want to inline regardless; see
+Note [Honour INLINE on 0-ary bindings].
+
+I'm a bit worried that it's possible for the same kind of problem
+to arise for non-0-ary functions too, but let's wait and see.
 -}
 
 mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
@@ -696,7 +770,8 @@ sizeExpr dflags bOMB_OUT_SIZE top_args expr
 -- | Finds a nominal size of a string literal.
 litSize :: Literal -> Int
 -- Used by CoreUnfold.sizeExpr
-litSize (LitInteger {}) = 100   -- Note [Size of literal integers]
+litSize (LitNumber LitNumInteger _ _) = 100   -- Note [Size of literal integers]
+litSize (LitNumber LitNumNatural _ _) = 100
 litSize (MachStr str)   = 10 + 10 * ((BS.length str + 3) `div` 4)
         -- If size could be 0 then @f "x"@ might be too small
         -- [Sept03: make literal strings a bit bigger to avoid fruitless
@@ -943,7 +1018,7 @@ In a function application (f a b)
 Code for manipulating sizes
 -}
 
--- | The size of an candidate expression for unfolding
+-- | The size of a candidate expression for unfolding
 data ExprSize
     = TooBig
     | SizeIs { _es_size_is  :: {-# UNPACK #-} !Int -- ^ Size found
@@ -1147,51 +1222,55 @@ callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
       -- idUnfolding checks for loop-breakers, returning NoUnfolding
       -- Things with an INLINE pragma may have an unfolding *and*
       -- be a loop breaker  (maybe the knot is not yet untied)
-        CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top
+        CoreUnfolding { uf_tmpl = unf_template
                       , uf_is_work_free = is_wf
                       , uf_guidance = guidance, uf_expandable = is_exp }
           | active_unfolding -> tryUnfolding dflags id lone_variable
-                                    arg_infos cont_info unf_template is_top
+                                    arg_infos cont_info unf_template
                                     is_wf is_exp guidance
-          | otherwise -> traceInline dflags "Inactive unfolding:" (ppr id) Nothing
+          | otherwise -> traceInline dflags id "Inactive unfolding:" (ppr id) Nothing
         NoUnfolding      -> Nothing
         BootUnfolding    -> Nothing
         OtherCon {}      -> Nothing
         DFunUnfolding {} -> Nothing     -- Never unfold a DFun
 
-traceInline :: DynFlags -> String -> SDoc -> a -> a
-traceInline dflags str doc result
+traceInline :: DynFlags -> Id -> String -> SDoc -> a -> a
+traceInline dflags inline_id str doc result
+ | Just prefix <- inlineCheck dflags
+ =  if prefix `isPrefixOf` occNameString (getOccName inline_id)
+      then pprTrace str doc result
+      else result
  | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
  = pprTrace str doc result
  | otherwise
  = result
 
 tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
-             -> CoreExpr -> Bool -> Bool -> Bool -> UnfoldingGuidance
+             -> CoreExpr -> Bool -> Bool -> UnfoldingGuidance
              -> Maybe CoreExpr
 tryUnfolding dflags id lone_variable
-             arg_infos cont_info unf_template is_top
+             arg_infos cont_info unf_template
              is_wf is_exp guidance
  = case guidance of
-     UnfNever -> traceInline dflags str (text "UnfNever") Nothing
+     UnfNever -> traceInline dflags id str (text "UnfNever") Nothing
 
      UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }
         | enough_args && (boring_ok || some_benefit || ufVeryAggressive dflags)
                 -- See Note [INLINE for small functions (3)]
-        -> traceInline dflags str (mk_doc some_benefit empty True) (Just unf_template)
+        -> traceInline dflags id str (mk_doc some_benefit empty True) (Just unf_template)
         | otherwise
-        -> traceInline dflags str (mk_doc some_benefit empty False) Nothing
+        -> traceInline dflags id str (mk_doc some_benefit empty False) Nothing
         where
           some_benefit = calc_some_benefit uf_arity
           enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0)
 
      UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
         | ufVeryAggressive dflags
-        -> traceInline dflags str (mk_doc some_benefit extra_doc True) (Just unf_template)
+        -> traceInline dflags id str (mk_doc some_benefit extra_doc True) (Just unf_template)
         | is_wf && some_benefit && small_enough
-        -> traceInline dflags str (mk_doc some_benefit extra_doc True) (Just unf_template)
+        -> traceInline dflags id str (mk_doc some_benefit extra_doc True) (Just unf_template)
         | otherwise
-        -> traceInline dflags str (mk_doc some_benefit extra_doc False) Nothing
+        -> traceInline dflags id str (mk_doc some_benefit extra_doc False) Nothing
         where
           some_benefit = calc_some_benefit (length arg_discounts)
           extra_doc = text "discounted size =" <+> int discounted_size
@@ -1239,13 +1318,13 @@ tryUnfolding dflags id lone_variable
           = True
           | otherwise
           = case cont_info of
-              CaseCtxt   -> not (lone_variable && is_wf)  -- Note [Lone variables]
-              ValAppCtxt -> True                              -- Note [Cast then apply]
+              CaseCtxt   -> not (lone_variable && is_exp)  -- Note [Lone variables]
+              ValAppCtxt -> True                           -- Note [Cast then apply]
               RuleArgCtxt -> uf_arity > 0  -- See Note [Unfold info lazy contexts]
-              DiscArgCtxt -> uf_arity > 0  --
+              DiscArgCtxt -> uf_arity > 0  -- Note [Inlining in ArgCtxt]
               RhsCtxt     -> uf_arity > 0  --
-              _           -> not is_top && uf_arity > 0   -- Note [Nested functions]
-                                                      -- Note [Inlining in ArgCtxt]
+              _other      -> False         -- See Note [Nested functions]
+
 
 {-
 Note [Unfold into lazy contexts], Note [RHS of lets]
@@ -1315,18 +1394,17 @@ However for worker/wrapper it may be worth inlining even if the
 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
 require saturation.
 
-
 Note [Nested functions]
 ~~~~~~~~~~~~~~~~~~~~~~~
-If a function has a nested defn we also record some-benefit, on the
-grounds that we are often able to eliminate the binding, and hence the
-allocation, for the function altogether; this is good for join points.
-But this only makes sense for *functions*; inlining a constructor
-doesn't help allocation unless the result is scrutinised.  UNLESS the
-constructor occurs just once, albeit possibly in multiple case
-branches.  Then inlining it doesn't increase allocation, but it does
-increase the chance that the constructor won't be allocated at all in
-the branches that don't use it.
+At one time we treated a call of a non-top-level function as
+"interesting" (regardless of how boring the context) in the hope
+that inlining it would eliminate the binding, and its allocation.
+Specifically, in the default case of interesting_call we had
+   _other -> not is_top && uf_arity > 0
+
+But actually postInlineUnconditionally does some of this and overall
+it makes virtually no difference to nofib.  So I simplified away this
+special case
 
 Note [Cast then apply]
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -1386,9 +1464,10 @@ because the latter is strict.
         s = "foo"
         f = \x -> ...(error s)...
 
-Fundamentally such contexts should not encourage inlining because the
+Fundamentally such contexts should not encourage inlining because, provided
+the RHS is "expandable" (see Note [exprIsExpandable] in CoreUtils) the
 context can ``see'' the unfolding of the variable (e.g. case or a
-RULE) so there's no gain.  If the thing is bound to a value.
+RULE) so there's no gain.
 
 However, watch out:
 
@@ -1439,6 +1518,8 @@ This kind of thing can occur if you have
         foo = let x = e in (x,x)
 
 which Roman did.
+
+
 -}
 
 computeDiscount :: DynFlags -> [Int] -> Int -> [ArgSummary] -> CallCtxt
diff --git a/compiler/coreSyn/CoreUtils.hs b/compiler/coreSyn/CoreUtils.hs
index 540a36e0a1..453d984ec4 100644
--- a/compiler/coreSyn/CoreUtils.hs
+++ b/compiler/coreSyn/CoreUtils.hs
@@ -29,7 +29,8 @@ module CoreUtils (
         exprIsHNF, exprOkForSpeculation, exprOkForSideEffects, exprIsWorkFree,
         exprIsBig, exprIsConLike,
         rhsIsStatic, isCheapApp, isExpandableApp,
-        exprIsLiteralString, exprIsTopLevelBindable,
+        exprIsTickedString, exprIsTickedString_maybe,
+        exprIsTopLevelBindable,
         altsAreExhaustive,
 
         -- * Equality
@@ -58,6 +59,8 @@ module CoreUtils (
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import CoreSyn
 import PrelNames ( makeStaticName )
 import PprCore
@@ -72,8 +75,9 @@ import DataCon
 import PrimOp
 import Id
 import IdInfo
+import PrelNames( absentErrorIdKey )
 import Type
-import TyCoRep( TyBinder(..) )
+import TyCoRep( TyCoBinder(..), TyBinder )
 import Coercion
 import TyCon
 import Unique
@@ -83,14 +87,17 @@ import DynFlags
 import FastString
 import Maybes
 import ListSetOps       ( minusList )
-import BasicTypes       ( Arity )
+import BasicTypes       ( Arity, isConLike )
 import Platform
 import Util
 import Pair
+import Data.ByteString     ( ByteString )
 import Data.Function       ( on )
 import Data.List
 import Data.Ord            ( comparing )
 import OrdList
+import qualified Data.Set as Set
+import UniqSet
 
 {-
 ************************************************************************
@@ -123,13 +130,13 @@ exprType other = pprTrace "exprType" (pprCoreExpr other) alphaTy
 
 coreAltType :: CoreAlt -> Type
 -- ^ Returns the type of the alternatives right hand side
-coreAltType (_,bs,rhs)
-  | any bad_binder bs = expandTypeSynonyms ty
-  | otherwise         = ty    -- Note [Existential variables and silly type synonyms]
+coreAltType alt@(_,bs,rhs)
+  = case occCheckExpand bs rhs_ty of
+      -- Note [Existential variables and silly type synonyms]
+      Just ty -> ty
+      Nothing -> pprPanic "coreAltType" (pprCoreAlt alt $$ ppr rhs_ty)
   where
-    ty           = exprType rhs
-    free_tvs     = tyCoVarsOfType ty
-    bad_binder b = b `elemVarSet` free_tvs
+    rhs_ty = exprType rhs
 
 coreAltsType :: [CoreAlt] -> Type
 -- ^ Returns the type of the first alternative, which should be the same as for all alternatives
@@ -179,7 +186,7 @@ isExprLevPoly = go
 Note [Type bindings]
 ~~~~~~~~~~~~~~~~~~~~
 Core does allow type bindings, although such bindings are
-not much used, except in the output of the desuguarer.
+not much used, except in the output of the desugarer.
 Example:
      let a = Int in (\x:a. x)
 Given this, exprType must be careful to substitute 'a' in the
@@ -250,7 +257,7 @@ applyTypeToArgs e op_ty args
 
 -- | Wrap the given expression in the coercion safely, dropping
 -- identity coercions and coalescing nested coercions
-mkCast :: CoreExpr -> Coercion -> CoreExpr
+mkCast :: CoreExpr -> CoercionR -> CoreExpr
 mkCast e co
   | ASSERT2( coercionRole co == Representational
            , text "coercion" <+> ppr co <+> ptext (sLit "passed to mkCast")
@@ -474,8 +481,15 @@ bindNonRec :: Id -> CoreExpr -> CoreExpr -> CoreExpr
 -- the simplifier deals with them perfectly well. See
 -- also 'MkCore.mkCoreLet'
 bindNonRec bndr rhs body
-  | needsCaseBinding (idType bndr) rhs = Case rhs bndr (exprType body) [(DEFAULT, [], body)]
-  | otherwise                          = Let (NonRec bndr rhs) body
+  | isTyVar bndr                       = let_bind
+  | isCoVar bndr                       = if isCoArg rhs then let_bind
+    {- See Note [Binding coercions] -}                  else case_bind
+  | isJoinId bndr                      = let_bind
+  | needsCaseBinding (idType bndr) rhs = case_bind
+  | otherwise                          = let_bind
+  where
+    case_bind = Case rhs bndr (exprType body) [(DEFAULT, [], body)]
+    let_bind  = Let (NonRec bndr rhs) body
 
 -- | Tests whether we have to use a @case@ rather than @let@ binding for this expression
 -- as per the invariants of 'CoreExpr': see "CoreSyn#let_app_invariant"
@@ -498,7 +512,12 @@ mkAltExpr (LitAlt lit) [] []
 mkAltExpr (LitAlt _) _ _ = panic "mkAltExpr LitAlt"
 mkAltExpr DEFAULT _ _ = panic "mkAltExpr DEFAULT"
 
-{-
+{- Note [Binding coercions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider binding a CoVar, c = e.  Then, we must atisfy
+Note [CoreSyn type and coercion invariant] in CoreSyn,
+which allows only (Coercion co) on the RHS.
+
 ************************************************************************
 *                                                                      *
                Operations oer case alternatives
@@ -525,7 +544,7 @@ isDefaultAlt _               = False
 -- | Find the case alternative corresponding to a particular
 -- constructor: panics if no such constructor exists
 findAlt :: AltCon -> [(AltCon, a, b)] -> Maybe (AltCon, a, b)
-    -- A "Nothing" result *is* legitmiate
+    -- A "Nothing" result *is* legitimate
     -- See Note [Unreachable code]
 findAlt con alts
   = case alts of
@@ -607,8 +626,6 @@ filterAlts :: TyCon                -- ^ Type constructor of scrutinee's type (us
              --  2. The new alternatives, trimmed by
              --        a) remove imposs_cons
              --        b) remove constructors which can't match because of GADTs
-             --      and with the DEFAULT expanded to a DataAlt if there is exactly
-             --      remaining constructor that can match
              --
              -- NB: the final list of alternatives may be empty:
              -- This is a tricky corner case.  If the data type has no constructors,
@@ -626,22 +643,26 @@ filterAlts _tycon inst_tys imposs_cons alts
 
     trimmed_alts = filterOut (impossible_alt inst_tys) alts_wo_default
 
-    imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
+    imposs_cons_set = Set.fromList imposs_cons
+    imposs_deflt_cons =
+      imposs_cons ++ filterOut (`Set.member` imposs_cons_set) alt_cons
          -- "imposs_deflt_cons" are handled
          --   EITHER by the context,
          --   OR by a non-DEFAULT branch in this case expression.
 
     impossible_alt :: [Type] -> (AltCon, a, b) -> Bool
-    impossible_alt _ (con, _, _) | con `elem` imposs_cons = True
+    impossible_alt _ (con, _, _) | con `Set.member` imposs_cons_set = True
     impossible_alt inst_tys (DataAlt con, _, _) = dataConCannotMatch inst_tys con
     impossible_alt _  _                         = False
 
-refineDefaultAlt :: [Unique] -> TyCon -> [Type]
-                 -> [AltCon]  -- Constructors that cannot match the DEFAULT (if any)
+-- | Refine the default alternative to a 'DataAlt', if there is a unique way to do so.
+-- See Note [Refine Default Alts]
+refineDefaultAlt :: [Unique]          -- ^ Uniques for constructing new binders
+                 -> TyCon             -- ^ Type constructor of scrutinee's type
+                 -> [Type]            -- ^ Type arguments of scrutinee's type
+                 -> [AltCon]          -- ^ Constructors that cannot match the DEFAULT (if any)
                  -> [CoreAlt]
-                 -> (Bool, [CoreAlt])
--- Refine the default alternative to a DataAlt,
--- if there is a unique way to do so
+                 -> (Bool, [CoreAlt]) -- ^ 'True', if a default alt was replaced with a 'DataAlt'
 refineDefaultAlt us tycon tys imposs_deflt_cons all_alts
   | (DEFAULT,_,rhs) : rest_alts <- all_alts
   , isAlgTyCon tycon            -- It's a data type, tuple, or unboxed tuples.
@@ -649,8 +670,11 @@ refineDefaultAlt us tycon tys imposs_deflt_cons all_alts
                                 --      case x of { DEFAULT -> e }
                                 -- and we don't want to fill in a default for them!
   , Just all_cons <- tyConDataCons_maybe tycon
-  , let imposs_data_cons = [con | DataAlt con <- imposs_deflt_cons]   -- We now know it's a data type
-        impossible con   = con `elem` imposs_data_cons || dataConCannotMatch tys con
+  , let imposs_data_cons = mkUniqSet [con | DataAlt con <- imposs_deflt_cons]
+                             -- We now know it's a data type, so we can use
+                             -- UniqSet rather than Set (more efficient)
+        impossible con   = con `elementOfUniqSet` imposs_data_cons
+                             || dataConCannotMatch tys con
   = case filterOut impossible all_cons of
        -- Eliminate the default alternative
        -- altogether if it can't match:
@@ -675,6 +699,93 @@ refineDefaultAlt us tycon tys imposs_deflt_cons all_alts
   | otherwise      -- The common case
   = (False, all_alts)
 
+{- Note [Refine Default Alts]
+
+refineDefaultAlt replaces the DEFAULT alt with a constructor if there is one
+possible value it could be.
+
+The simplest example being
+
+foo :: () -> ()
+foo x = case x of !_ -> ()
+
+rewrites to
+
+foo :: () -> ()
+foo x = case x of () -> ()
+
+There are two reasons in general why this is desirable.
+
+1. We can simplify inner expressions
+
+In this example we can eliminate the inner case by refining the outer case.
+If we don't refine it, we are left with both case expressions.
+
+```
+{-# LANGUAGE BangPatterns #-}
+module Test where
+
+mid x = x
+{-# NOINLINE mid #-}
+
+data Foo = Foo1 ()
+
+test :: Foo -> ()
+test x =
+  case x of
+    !_ -> mid (case x of
+                Foo1 x1 -> x1)
+
+```
+
+refineDefaultAlt fills in the DEFAULT here with `Foo ip1` and then x
+becomes bound to `Foo ip1` so is inlined into the other case which
+causes the KnownBranch optimisation to kick in.
+
+
+2. combineIdenticalAlts does a better job
+
+Simon Jakobi also points out that that combineIdenticalAlts will do a better job
+if we refine the DEFAULT first.
+
+```
+data D = C0 | C1 | C2
+
+case e of
+   DEFAULT -> e0
+   C0 -> e1
+   C1 -> e1
+```
+
+When we apply combineIdenticalAlts to this expression, it can't
+combine the alts for C0 and C1, as we already have a default case.
+
+If we apply refineDefaultAlt first, we get
+
+```
+case e of
+  C0 -> e1
+  C1 -> e1
+  C2 -> e0
+```
+
+and combineIdenticalAlts can turn that into
+
+```
+case e of
+  DEFAULT -> e1
+  C2 -> e0
+```
+
+It isn't obvious that refineDefaultAlt does this but if you look at its one
+call site in SimplUtils then the `imposs_deflt_cons` argument is populated with
+constructors which are matched elsewhere.
+
+-}
+
+
+
+
 {- Note [Combine identical alternatives]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 If several alternatives are identical, merge them into a single
@@ -844,6 +955,8 @@ it off at source.
 -}
 
 exprIsTrivial :: CoreExpr -> Bool
+-- If you modify this function, you may also
+-- need to modify getIdFromTrivialExpr
 exprIsTrivial (Var _)          = True        -- See Note [Variables are trivial]
 exprIsTrivial (Type _)         = True
 exprIsTrivial (Coercion _)     = True
@@ -873,20 +986,24 @@ if the variable actually refers to a literal; thus we use
 T12076lit for an example where this matters.
 -}
 
-getIdFromTrivialExpr :: CoreExpr -> Id
+getIdFromTrivialExpr :: HasDebugCallStack => CoreExpr -> Id
 getIdFromTrivialExpr e
     = fromMaybe (pprPanic "getIdFromTrivialExpr" (ppr e))
                 (getIdFromTrivialExpr_maybe e)
 
 getIdFromTrivialExpr_maybe :: CoreExpr -> Maybe Id
 -- See Note [getIdFromTrivialExpr]
-getIdFromTrivialExpr_maybe e = go e
-  where go (Var v) = Just v
-        go (App f t) | not (isRuntimeArg t) = go f
-        go (Tick t e) | not (tickishIsCode t) = go e
-        go (Cast e _) = go e
-        go (Lam b e) | not (isRuntimeVar b) = go e
-        go _ = Nothing
+-- Th equations for this should line up with those for exprIsTrivial
+getIdFromTrivialExpr_maybe e
+  = go e
+  where
+    go (App f t) | not (isRuntimeArg t)   = go f
+    go (Tick t e) | not (tickishIsCode t) = go e
+    go (Cast e _)                         = go e
+    go (Lam b e) | not (isRuntimeVar b)   = go e
+    go (Case e _ _ [])                    = go e
+    go (Var v) = Just v
+    go _       = Nothing
 
 {-
 exprIsBottom is a very cheap and cheerful function; it may return
@@ -1073,29 +1190,6 @@ Note that exprIsHNF does not imply exprIsCheap.  Eg
 This responds True to exprIsHNF (you can discard a seq), but
 False to exprIsCheap.
 
-Note [exprIsExpandable]
-~~~~~~~~~~~~~~~~~~~~~~~
-An expression is "expandable" if we are willing to dupicate it, if doing
-so might make a RULE or case-of-constructor fire.  Mainly this means
-data-constructor applications, but it's a bit more generous than exprIsCheap
-because it is true of "CONLIKE" Ids: see Note [CONLIKE pragma] in BasicTypes.
-
-It is used to set the uf_expandable field of an Unfolding, and that
-in turn is used
-  * In RULE matching
-  * In exprIsConApp_maybe, exprIsLiteral_maybe, exprIsLambda_maybe
-
-But take care: exprIsExpandable should /not/ be true of primops.  I
-found this in test T5623a:
-    let q = /\a. Ptr a (a +# b)
-    in case q @ Float of Ptr v -> ...q...
-
-q's inlining should not be expandable, else exprIsConApp_maybe will
-say that (q @ Float) expands to (Ptr a (a +# b)), and that will
-duplicate the (a +# b) primop, which we should not do lightly.
-(It's quite hard to trigger this bug, but T13155 does so for GHC 8.0.)
-
-
 Note [Arguments and let-bindings exprIsCheapX]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 What predicate should we apply to the argument of an application, or the
@@ -1121,16 +1215,12 @@ in this (which it previously was):
 -}
 
 --------------------
-exprIsCheap :: CoreExpr -> Bool
-exprIsCheap = exprIsCheapX isCheapApp
-
-exprIsExpandable :: CoreExpr -> Bool -- See Note [exprIsExpandable]
-exprIsExpandable = exprIsCheapX isExpandableApp
-
 exprIsWorkFree :: CoreExpr -> Bool   -- See Note [exprIsWorkFree]
 exprIsWorkFree = exprIsCheapX isWorkFreeApp
 
---------------------
+exprIsCheap :: CoreExpr -> Bool
+exprIsCheap = exprIsCheapX isCheapApp
+
 exprIsCheapX :: CheapAppFun -> CoreExpr -> Bool
 exprIsCheapX ok_app e
   = ok e
@@ -1158,6 +1248,75 @@ exprIsCheapX ok_app e
       -- App, Let: see Note [Arguments and let-bindings exprIsCheapX]
 
 
+{- Note [exprIsExpandable]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+An expression is "expandable" if we are willing to duplicate it, if doing
+so might make a RULE or case-of-constructor fire.  Consider
+   let x = (a,b)
+       y = build g
+   in ....(case x of (p,q) -> rhs)....(foldr k z y)....
+
+We don't inline 'x' or 'y' (see Note [Lone variables] in CoreUnfold),
+but we do want
+
+ * the case-expression to simplify
+   (via exprIsConApp_maybe, exprIsLiteral_maybe)
+
+ * the foldr/build RULE to fire
+   (by expanding the unfolding during rule matching)
+
+So we classify the unfolding of a let-binding as "expandable" (via the
+uf_expandable field) if we want to do this kind of on-the-fly
+expansion.  Specifically:
+
+* True of constructor applications (K a b)
+
+* True of applications of a "CONLIKE" Id; see Note [CONLIKE pragma] in BasicTypes.
+  (NB: exprIsCheap might not be true of this)
+
+* False of case-expressions.  If we have
+    let x = case ... in ...(case x of ...)...
+  we won't simplify.  We have to inline x.  See Trac #14688.
+
+* False of let-expressions (same reason); and in any case we
+  float lets out of an RHS if doing so will reveal an expandable
+  application (see SimplEnv.doFloatFromRhs).
+
+* Take care: exprIsExpandable should /not/ be true of primops.  I
+  found this in test T5623a:
+    let q = /\a. Ptr a (a +# b)
+    in case q @ Float of Ptr v -> ...q...
+
+  q's inlining should not be expandable, else exprIsConApp_maybe will
+  say that (q @ Float) expands to (Ptr a (a +# b)), and that will
+  duplicate the (a +# b) primop, which we should not do lightly.
+  (It's quite hard to trigger this bug, but T13155 does so for GHC 8.0.)
+-}
+
+-------------------------------------
+exprIsExpandable :: CoreExpr -> Bool
+-- See Note [exprIsExpandable]
+exprIsExpandable e
+  = ok e
+  where
+    ok e = go 0 e
+
+    -- n is the number of value arguments
+    go n (Var v)                      = isExpandableApp v n
+    go _ (Lit {})                     = True
+    go _ (Type {})                    = True
+    go _ (Coercion {})                = True
+    go n (Cast e _)                   = go n e
+    go n (Tick t e) | tickishCounts t = False
+                    | otherwise       = go n e
+    go n (Lam x e)  | isRuntimeVar x  = n==0 || go (n-1) e
+                    | otherwise       = go n e
+    go n (App f e)  | isRuntimeArg e  = go (n+1) f && ok e
+                    | otherwise       = go n f
+    go _ (Case {})                    = False
+    go _ (Let {})                     = False
+
+
 -------------------------------------
 type CheapAppFun = Id -> Arity -> Bool
   -- Is an application of this function to n *value* args
@@ -1168,22 +1327,11 @@ type CheapAppFun = Id -> Arity -> Bool
   --    isCheapApp
   --    isExpandableApp
 
-  -- NB: isCheapApp and isExpandableApp are called from outside
-  --     this module, so don't be tempted to move the notRedex
-  --     stuff into the call site in exprIsCheapX, and remove it
-  --     from the CheapAppFun implementations
-
-
-notRedex :: CheapAppFun
-notRedex fn n_val_args
-  =  n_val_args == 0           -- No value args
-  || n_val_args < idArity fn   -- Partial application
-  || isBottomingId fn   -- OK to duplicate calls to bottom;
-                        -- it certainly doesn't need to be shared!
-
 isWorkFreeApp :: CheapAppFun
 isWorkFreeApp fn n_val_args
-  | notRedex fn n_val_args
+  | n_val_args == 0           -- No value args
+  = True
+  | n_val_args < idArity fn   -- Partial application
   = True
   | otherwise
   = case idDetails fn of
@@ -1192,11 +1340,11 @@ isWorkFreeApp fn n_val_args
 
 isCheapApp :: CheapAppFun
 isCheapApp fn n_val_args
-  | notRedex fn n_val_args
-  = True
+  | isWorkFreeApp fn n_val_args = True
+  | isBottomingId fn            = True  -- See Note [isCheapApp: bottoming functions]
   | otherwise
   = case idDetails fn of
-      DataConWorkId {} -> True
+      DataConWorkId {} -> True  -- Actually handled by isWorkFreeApp
       RecSelId {}      -> n_val_args == 1  -- See Note [Record selection]
       ClassOpId {}     -> n_val_args == 1
       PrimOpId op      -> primOpIsCheap op
@@ -1208,21 +1356,24 @@ isCheapApp fn n_val_args
 
 isExpandableApp :: CheapAppFun
 isExpandableApp fn n_val_args
-  | notRedex fn n_val_args
-  = True
-  | isConLikeId fn
-  = True
+  | isWorkFreeApp fn n_val_args = True
   | otherwise
   = case idDetails fn of
-      DataConWorkId {} -> True
+      DataConWorkId {} -> True  -- Actually handled by isWorkFreeApp
       RecSelId {}      -> n_val_args == 1  -- See Note [Record selection]
       ClassOpId {}     -> n_val_args == 1
       PrimOpId {}      -> False
-      _                -> all_pred_args n_val_args (idType fn)
+      _ | isBottomingId fn               -> False
+          -- See Note [isExpandableApp: bottoming functions]
+        | isConLike (idRuleMatchInfo fn) -> True
+        | all_args_are_preds             -> True
+        | otherwise                      -> False
 
   where
-  -- See if all the arguments are PredTys (implicit params or classes)
-  -- If so we'll regard it as expandable; see Note [Expandable overloadings]
+     -- See if all the arguments are PredTys (implicit params or classes)
+     -- If so we'll regard it as expandable; see Note [Expandable overloadings]
+     all_args_are_preds = all_pred_args n_val_args (idType fn)
+
      all_pred_args n_val_args ty
        | n_val_args == 0
        = True
@@ -1235,7 +1386,35 @@ isExpandableApp fn n_val_args
        | otherwise
        = False
 
-{- Note [Record selection]
+{- Note [isCheapApp: bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+I'm not sure why we have a special case for bottoming
+functions in isCheapApp.  Maybe we don't need it.
+
+Note [isExpandableApp: bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's important that isExpandableApp does not respond True to bottoming
+functions.  Recall  undefined :: HasCallStack => a
+Suppose isExpandableApp responded True to (undefined d), and we had:
+
+  x = undefined <dict-expr>
+
+Then Simplify.prepareRhs would ANF the RHS:
+
+  d = <dict-expr>
+  x = undefined d
+
+This is already bad: we gain nothing from having x bound to (undefined
+var), unlike the case for data constructors.  Worse, we get the
+simplifier loop described in OccurAnal Note [Cascading inlines].
+Suppose x occurs just once; OccurAnal.occAnalNonRecRhs decides x will
+certainly_inline; so we end up inlining d right back into x; but in
+the end x doesn't inline because it is bottom (preInlineUnconditionally);
+so the process repeats.. We could elaborate the certainly_inline logic
+some more, but it's better just to treat bottoming bindings as
+non-expandable, because ANFing them is a bad idea in the first place.
+
+Note [Record selection]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 I'm experimenting with making record selection
 look cheap, so we will substitute it inside a
@@ -1308,18 +1487,22 @@ it's applied only to dictionaries.
 --
 -- We can only do this if the @y + 1@ is ok for speculation: it has no
 -- side effects, and can't diverge or raise an exception.
-exprOkForSpeculation, exprOkForSideEffects :: Expr b -> Bool
+
+exprOkForSpeculation, exprOkForSideEffects :: CoreExpr -> Bool
 exprOkForSpeculation = expr_ok primOpOkForSpeculation
 exprOkForSideEffects = expr_ok primOpOkForSideEffects
-  -- Polymorphic in binder type
-  -- There is one call at a non-Id binder type, in SetLevels
 
-expr_ok :: (PrimOp -> Bool) -> Expr b -> Bool
+expr_ok :: (PrimOp -> Bool) -> CoreExpr -> Bool
 expr_ok _ (Lit _)      = True
 expr_ok _ (Type _)     = True
 expr_ok _ (Coercion _) = True
-expr_ok primop_ok (Var v)      = app_ok primop_ok v []
-expr_ok primop_ok (Cast e _)   = expr_ok primop_ok e
+
+expr_ok primop_ok (Var v)    = app_ok primop_ok v []
+expr_ok primop_ok (Cast e _) = expr_ok primop_ok e
+expr_ok primop_ok (Lam b e)
+                 | isTyVar b = expr_ok primop_ok  e
+                 | otherwise = True
+
 
 -- Tick annotations that *tick* cannot be speculated, because these
 -- are meant to identify whether or not (and how often) the particular
@@ -1328,10 +1511,18 @@ expr_ok primop_ok (Tick tickish e)
    | tickishCounts tickish = False
    | otherwise             = expr_ok primop_ok e
 
-expr_ok primop_ok (Case e _ _ alts)
-  =  expr_ok primop_ok e  -- Note [exprOkForSpeculation: case expressions]
+expr_ok _ (Let {}) = False
+  -- Lets can be stacked deeply, so just give up.
+  -- In any case, the argument of exprOkForSpeculation is
+  -- usually in a strict context, so any lets will have been
+  -- floated away.
+
+expr_ok primop_ok (Case scrut bndr _ alts)
+  =  -- See Note [exprOkForSpeculation: case expressions]
+     expr_ok primop_ok scrut
+  && isUnliftedType (idType bndr)
   && all (\(_,_,rhs) -> expr_ok primop_ok rhs) alts
-  && altsAreExhaustive alts     -- Note [Exhaustive alts]
+  && altsAreExhaustive alts
 
 expr_ok primop_ok other_expr
   = case collectArgs other_expr of
@@ -1340,7 +1531,7 @@ expr_ok primop_ok other_expr
         _            -> False
 
 -----------------------------
-app_ok :: (PrimOp -> Bool) -> Id -> [Expr b] -> Bool
+app_ok :: (PrimOp -> Bool) -> Id -> [CoreExpr] -> Bool
 app_ok primop_ok fun args
   = case idDetails fun of
       DFunId new_type ->  not new_type
@@ -1363,8 +1554,11 @@ app_ok primop_ok fun args
               -- Often there is a literal divisor, and this
               -- can get rid of a thunk in an inner loop
 
+        | SeqOp <- op    -- See Note [seq# and expr_ok]
+        -> all (expr_ok primop_ok) args
+
         | otherwise
-        -> primop_ok op     -- Check the primop itself
+        -> primop_ok op  -- Check the primop itself
         && and (zipWith arg_ok arg_tys args)  -- Check the arguments
 
       _other -> isUnliftedType (idType fun)          -- c.f. the Var case of exprIsHNF
@@ -1376,7 +1570,7 @@ app_ok primop_ok fun args
   where
     (arg_tys, _) = splitPiTys (idType fun)
 
-    arg_ok :: TyBinder -> Expr b -> Bool
+    arg_ok :: TyBinder -> CoreExpr -> Bool
     arg_ok (Named _) _ = True   -- A type argument
     arg_ok (Anon ty) arg        -- A term argument
        | isUnliftedType ty = expr_ok primop_ok arg
@@ -1411,22 +1605,72 @@ isDivOp FloatDivOp       = True
 isDivOp DoubleDivOp      = True
 isDivOp _                = False
 
-{-
-Note [exprOkForSpeculation: case expressions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It's always sound for exprOkForSpeculation to return False, and we
-don't want it to take too long, so it bales out on complicated-looking
-terms.  Notably lets, which can be stacked very deeply; and in any
-case the argument of exprOkForSpeculation is usually in a strict context,
-so any lets will have been floated away.
-
-However, we keep going on case-expressions.  An example like this one
-showed up in DPH code (Trac #3717):
+{- Note [exprOkForSpeculation: case expressions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+exprOkForSpeculation accepts very special case expressions.
+Reason: (a ==# b) is ok-for-speculation, but the litEq rules
+in PrelRules convert it (a ==# 3#) to
+   case a of { DEAFULT -> 0#; 3# -> 1# }
+for excellent reasons described in
+  PrelRules Note [The litEq rule: converting equality to case].
+So, annoyingly, we want that case expression to be
+ok-for-speculation too. Bother.
+
+But we restrict it sharply:
+
+* We restrict it to unlifted scrutinees. Consider this:
+     case x of y {
+       DEFAULT -> ... (let v::Int# = case y of { True  -> e1
+                                               ; False -> e2 }
+                       in ...) ...
+
+  Does the RHS of v satisfy the let/app invariant?  Previously we said
+  yes, on the grounds that y is evaluated.  But the binder-swap done
+  by SetLevels would transform the inner alternative to
+     DEFAULT -> ... (let v::Int# = case x of { ... }
+                     in ...) ....
+  which does /not/ satisfy the let/app invariant, because x is
+  not evaluated. See Note [Binder-swap during float-out]
+  in SetLevels.  To avoid this awkwardness it seems simpler
+  to stick to unlifted scrutinees where the issue does not
+  arise.
+
+* We restrict it to exhaustive alternatives. A non-exhaustive
+  case manifestly isn't ok-for-speculation. Consider
+    case e of x { DEAFULT ->
+      ...(case x of y
+            A -> ...
+            _ -> ...(case (case x of { B -> p; C -> p }) of
+                       I# r -> blah)...
+  If SetLevesls considers the inner nested case as ok-for-speculation
+  it can do case-floating (see Note [Floating cases] in SetLevels).
+  So we'd float to:
+    case e of x { DEAFULT ->
+    case (case x of { B -> p; C -> p }) of I# r ->
+    ...(case x of y
+            A -> ...
+            _ -> ...blah...)...
+  which is utterly bogus (seg fault); see Trac #5453.
+
+  Similarly, this is a valid program (albeit a slightly dodgy one)
+    let v = case x of { B -> ...; C -> ... }
+    in case x of
+         A -> ...
+         _ ->  ...v...v....
+  Should v be considered ok-for-speculation?  Its scrutinee may be
+  evaluated, but the alternatives are incomplete so we should not
+  evaluate it strictly.
+
+  Now, all this is for lifted types, but it'd be the same for any
+  finite unlifted type. We don't have many of them, but we might
+  add unlifted algebraic types in due course.
+
+----- Historical note: Trac #3717: --------
     foo :: Int -> Int
     foo 0 = 0
     foo n = (if n < 5 then 1 else 2) `seq` foo (n-1)
 
-If exprOkForSpeculation doesn't look through case expressions, you get this:
+In earlier GHCs, we got this:
     T.$wfoo =
       \ (ww :: GHC.Prim.Int#) ->
         case ww of ds {
@@ -1435,31 +1679,13 @@ If exprOkForSpeculation doesn't look through case expressions, you get this:
                           GHC.Types.True -> lvl})
                        of _ { __DEFAULT ->
                        T.$wfoo (GHC.Prim.-# ds_XkE 1) };
-          0 -> 0
-        }
-
-The inner case is redundant, and should be nuked.
-
-Note [Exhaustive alts]
-~~~~~~~~~~~~~~~~~~~~~~
-We might have something like
-  case x of {
-    A -> ...
-    _ -> ...(case x of { B -> ...; C -> ... })...
-Here, the inner case is fine, because the A alternative
-can't happen, but it's not ok to float the inner case outside
-the outer one (even if we know x is evaluated outside), because
-then it would be non-exhaustive. See Trac #5453.
-
-Similarly, this is a valid program (albeit a slightly dodgy one)
-   let v = case x of { B -> ...; C -> ... }
-   in case x of
-         A -> ...
-         _ ->  ...v...v....
-But we don't want to speculate the v binding.
+          0 -> 0 }
 
-One could try to be clever, but the easy fix is simpy to regard
-a non-exhaustive case as *not* okForSpeculation.
+Before join-points etc we could only get rid of two cases (which are
+redundant) by recognising that th e(case <# ds 5 of { ... }) is
+ok-for-speculation, even though it has /lifted/ type.  But now join
+points do the job nicely.
+------- End of historical note ------------
 
 
 Note [Primops with lifted arguments]
@@ -1471,8 +1697,8 @@ evaluate them.  Indeed, in general primops are, well, primitive
 and do not perform evaluation.
 
 There is one primop, dataToTag#, which does /require/ a lifted
-argument to be evaluted.  To ensure this, CorePrep adds an
-eval if it can't see the the argument is definitely evaluated
+argument to be evaluated.  To ensure this, CorePrep adds an
+eval if it can't see the argument is definitely evaluated
 (see [dataToTag magic] in CorePrep).
 
 We make no attempt to guarantee that dataToTag#'s argument is
@@ -1489,6 +1715,25 @@ See also Note [dataToTag#] in primops.txt.pp.
 
 Bottom line:
   * in exprOkForSpeculation we simply ignore all lifted arguments.
+  * except see Note [seq# and expr_ok] for an exception
+
+
+Note [seq# and expr_ok]
+~~~~~~~~~~~~~~~~~~~~~~~
+Recall that
+   seq# :: forall a s . a -> State# s -> (# State# s, a #)
+must always evaluate its first argument.  So it's really a
+counter-example to Note [Primops with lifted arguments]. In
+the case of seq# we must check the argument to seq#.  Remember
+item (d) of the specification of exprOkForSpeculation:
+
+  -- Precisely, it returns @True@ iff:
+  --  a) The expression guarantees to terminate,
+         ...
+  --  d) without throwing a Haskell exception
+
+The lack of this special case caused Trac #5129 to go bad again.
+See comment:24 and following
 
 
 ************************************************************************
@@ -1546,9 +1791,9 @@ exprIsHNFlike :: (Var -> Bool) -> (Unfolding -> Bool) -> CoreExpr -> Bool
 exprIsHNFlike is_con is_con_unf = is_hnf_like
   where
     is_hnf_like (Var v) -- NB: There are no value args at this point
-      =  is_con v       -- Catches nullary constructors,
-                        --      so that [] and () are values, for example
-      || idArity v > 0  -- Catches (e.g.) primops that don't have unfoldings
+      =  id_app_is_value v 0 -- Catches nullary constructors,
+                             --      so that [] and () are values, for example
+                             -- and (e.g.) primops that don't have unfoldings
       || is_con_unf (idUnfolding v)
         -- Check the thing's unfolding; it might be bound to a value
         -- We don't look through loop breakers here, which is a bit conservative
@@ -1561,7 +1806,7 @@ exprIsHNFlike is_con is_con_unf = is_hnf_like
     is_hnf_like (Coercion _)     = True       -- Same for coercions
     is_hnf_like (Lam b e)        = isRuntimeVar b || is_hnf_like e
     is_hnf_like (Tick tickish e) = not (tickishCounts tickish)
-                                      && is_hnf_like e
+                                   && is_hnf_like e
                                       -- See Note [exprIsHNF Tick]
     is_hnf_like (Cast e _)       = is_hnf_like e
     is_hnf_like (App e a)
@@ -1573,9 +1818,7 @@ exprIsHNFlike is_con is_con_unf = is_hnf_like
     -- There is at least one value argument
     -- 'n' is number of value args to which the expression is applied
     app_is_value :: CoreExpr -> Int -> Bool
-    app_is_value (Var fun) n_val_args
-      = idArity fun > n_val_args    -- Under-applied function
-        || is_con fun               --  or constructor-like
+    app_is_value (Var f)    nva = id_app_is_value f nva
     app_is_value (Tick _ f) nva = app_is_value f nva
     app_is_value (Cast f _) nva = app_is_value f nva
     app_is_value (App f a)  nva
@@ -1583,6 +1826,13 @@ exprIsHNFlike is_con is_con_unf = is_hnf_like
       | otherwise               = app_is_value f nva
     app_is_value _ _ = False
 
+    id_app_is_value id n_val_args
+       = is_con id
+       || idArity id > n_val_args
+       || id `hasKey` absentErrorIdKey  -- See Note [aBSENT_ERROR_ID] in MkCore
+                      -- absentError behaves like an honorary data constructor
+
+
 {-
 Note [exprIsHNF Tick]
 
@@ -1602,13 +1852,28 @@ don't want to discard a seq on it.
 exprIsTopLevelBindable :: CoreExpr -> Type -> Bool
 -- See Note [CoreSyn top-level string literals]
 -- Precondition: exprType expr = ty
+-- Top-level literal strings can't even be wrapped in ticks
+--   see Note [CoreSyn top-level string literals] in CoreSyn
 exprIsTopLevelBindable expr ty
-  = exprIsLiteralString expr
-  || not (isUnliftedType ty)
-
-exprIsLiteralString :: CoreExpr -> Bool
-exprIsLiteralString (Lit (MachStr _)) = True
-exprIsLiteralString _ = False
+  = not (isUnliftedType ty)
+  || exprIsTickedString expr
+
+-- | Check if the expression is zero or more Ticks wrapped around a literal
+-- string.
+exprIsTickedString :: CoreExpr -> Bool
+exprIsTickedString = isJust . exprIsTickedString_maybe
+
+-- | Extract a literal string from an expression that is zero or more Ticks
+-- wrapped around a literal string. Returns Nothing if the expression has a
+-- different shape.
+-- Used to "look through" Ticks in places that need to handle literal strings.
+exprIsTickedString_maybe :: CoreExpr -> Maybe ByteString
+exprIsTickedString_maybe (Lit (MachStr bs)) = Just bs
+exprIsTickedString_maybe (Tick t e)
+  -- we don't tick literals with CostCentre ticks, compare to mkTick
+  | tickishPlace t == PlaceCostCentre = Nothing
+  | otherwise = exprIsTickedString_maybe e
+exprIsTickedString_maybe _ = Nothing
 
 {-
 ************************************************************************
@@ -1620,8 +1885,8 @@ exprIsLiteralString _ = False
 These InstPat functions go here to avoid circularity between DataCon and Id
 -}
 
-dataConRepInstPat   ::                 [Unique] -> DataCon -> [Type] -> ([TyVar], [Id])
-dataConRepFSInstPat :: [FastString] -> [Unique] -> DataCon -> [Type] -> ([TyVar], [Id])
+dataConRepInstPat   ::                 [Unique] -> DataCon -> [Type] -> ([TyCoVar], [Id])
+dataConRepFSInstPat :: [FastString] -> [Unique] -> DataCon -> [Type] -> ([TyCoVar], [Id])
 
 dataConRepInstPat   = dataConInstPat (repeat ((fsLit "ipv")))
 dataConRepFSInstPat = dataConInstPat
@@ -1630,7 +1895,7 @@ dataConInstPat :: [FastString]          -- A long enough list of FSs to use for
                -> [Unique]              -- An equally long list of uniques, at least one for each binder
                -> DataCon
                -> [Type]                -- Types to instantiate the universally quantified tyvars
-               -> ([TyVar], [Id])       -- Return instantiated variables
+               -> ([TyCoVar], [Id])     -- Return instantiated variables
 -- dataConInstPat arg_fun fss us con inst_tys returns a tuple
 -- (ex_tvs, arg_ids),
 --
@@ -1663,7 +1928,7 @@ dataConInstPat fss uniqs con inst_tys
     (ex_bndrs, arg_ids)
   where
     univ_tvs = dataConUnivTyVars con
-    ex_tvs   = dataConExTyVars con
+    ex_tvs   = dataConExTyCoVars con
     arg_tys  = dataConRepArgTys con
     arg_strs = dataConRepStrictness con  -- 1-1 with arg_tys
     n_ex = length ex_tvs
@@ -1679,13 +1944,16 @@ dataConInstPat fss uniqs con inst_tys
     (full_subst, ex_bndrs) = mapAccumL mk_ex_var univ_subst
                                        (zip3 ex_tvs ex_fss ex_uniqs)
 
-    mk_ex_var :: TCvSubst -> (TyVar, FastString, Unique) -> (TCvSubst, TyVar)
-    mk_ex_var subst (tv, fs, uniq) = (Type.extendTvSubstWithClone subst tv
+    mk_ex_var :: TCvSubst -> (TyCoVar, FastString, Unique) -> (TCvSubst, TyCoVar)
+    mk_ex_var subst (tv, fs, uniq) = (Type.extendTCvSubstWithClone subst tv
                                        new_tv
                                      , new_tv)
       where
-        new_tv = mkTyVar (mkSysTvName uniq fs) kind
-        kind   = Type.substTyUnchecked subst (tyVarKind tv)
+        new_tv | isTyVar tv
+               = mkTyVar (mkSysTvName uniq fs) kind
+               | otherwise
+               = mkCoVar (mkSystemVarName uniq fs) kind
+        kind   = Type.substTyUnchecked subst (varType tv)
 
       -- Make value vars, instantiating types
     arg_ids = zipWith4 mk_id_var id_uniqs id_fss arg_tys arg_strs
@@ -2162,12 +2430,13 @@ and 'execute' it rather than allocating it statically.
 -- | This function is called only on *top-level* right-hand sides.
 -- Returns @True@ if the RHS can be allocated statically in the output,
 -- with no thunks involved at all.
-rhsIsStatic :: Platform
-            -> (Name -> Bool)         -- Which names are dynamic
-            -> (Integer -> CoreExpr)  -- Desugaring for integer literals (disgusting)
-                                      -- C.f. Note [Disgusting computation of CafRefs]
-                                      --      in TidyPgm
-            -> CoreExpr -> Bool
+rhsIsStatic
+   :: Platform
+   -> (Name -> Bool)         -- Which names are dynamic
+   -> (LitNumType -> Integer -> Maybe CoreExpr)
+      -- Desugaring for some literals (disgusting)
+      -- C.f. Note [Disgusting computation of CafRefs] in TidyPgm
+   -> CoreExpr -> Bool
 -- It's called (i) in TidyPgm.hasCafRefs to decide if the rhs is, or
 -- refers to, CAFs; (ii) in CoreToStg to decide whether to put an
 -- update flag on it and (iii) in DsExpr to decide how to expand
@@ -2222,7 +2491,7 @@ rhsIsStatic :: Platform
 --
 --    c) don't look through unfolding of f in (f x).
 
-rhsIsStatic platform is_dynamic_name cvt_integer rhs = is_static False rhs
+rhsIsStatic platform is_dynamic_name cvt_literal rhs = is_static False rhs
   where
   is_static :: Bool     -- True <=> in a constructor argument; must be atomic
             -> CoreExpr -> Bool
@@ -2232,7 +2501,9 @@ rhsIsStatic platform is_dynamic_name cvt_integer rhs = is_static False rhs
                                               && is_static in_arg e
   is_static in_arg (Cast e _)             = is_static in_arg e
   is_static _      (Coercion {})          = True   -- Behaves just like a literal
-  is_static in_arg (Lit (LitInteger i _)) = is_static in_arg (cvt_integer i)
+  is_static in_arg (Lit (LitNumber nt i _)) = case cvt_literal nt i of
+    Just e  -> is_static in_arg e
+    Nothing -> True
   is_static _      (Lit (MachLabel {}))   = False
   is_static _      (Lit _)                = True
         -- A MachLabel (foreign import "&foo") in an argument
diff --git a/compiler/coreSyn/MkCore.hs b/compiler/coreSyn/MkCore.hs
index 5a29994d0e..a425ad249e 100644
--- a/compiler/coreSyn/MkCore.hs
+++ b/compiler/coreSyn/MkCore.hs
@@ -42,15 +42,17 @@ module MkCore (
         mkNothingExpr, mkJustExpr,
 
         -- * Error Ids
-        mkRuntimeErrorApp, mkImpossibleExpr, errorIds,
-        rEC_CON_ERROR_ID, iRREFUT_PAT_ERROR_ID, rUNTIME_ERROR_ID,
+        mkRuntimeErrorApp, mkImpossibleExpr, mkAbsentErrorApp, errorIds,
+        rEC_CON_ERROR_ID, rUNTIME_ERROR_ID,
         nON_EXHAUSTIVE_GUARDS_ERROR_ID, nO_METHOD_BINDING_ERROR_ID,
         pAT_ERROR_ID, rEC_SEL_ERROR_ID, aBSENT_ERROR_ID,
-        tYPE_ERROR_ID,
+        tYPE_ERROR_ID, aBSENT_SUM_FIELD_ERROR_ID
     ) where
 
 #include "HsVersions.h"
 
+import GhcPrelude
+
 import Id
 import Var      ( EvVar, setTyVarUnique )
 
@@ -63,13 +65,11 @@ import TysWiredIn
 import PrelNames
 
 import HsUtils          ( mkChunkified, chunkify )
-import TcType           ( mkSpecSigmaTy )
 import Type
 import Coercion         ( isCoVar )
 import TysPrim
 import DataCon          ( DataCon, dataConWorkId )
-import IdInfo           ( vanillaIdInfo, setStrictnessInfo,
-                          setArityInfo )
+import IdInfo
 import Demand
 import Name      hiding ( varName )
 import Outputable
@@ -81,6 +81,7 @@ import DynFlags
 import Data.List
 
 import Data.Char        ( ord )
+import Control.Monad.Fail ( MonadFail )
 
 infixl 4 `mkCoreApp`, `mkCoreApps`
 
@@ -106,9 +107,7 @@ sortQuantVars vs = sorted_tcvs ++ ids
 -- appropriate (see "CoreSyn#let_app_invariant")
 mkCoreLet :: CoreBind -> CoreExpr -> CoreExpr
 mkCoreLet (NonRec bndr rhs) body        -- See Note [CoreSyn let/app invariant]
-  | needsCaseBinding (idType bndr) rhs
-  , not (isJoinId bndr)
-  = Case rhs bndr (exprType body) [(DEFAULT,[],body)]
+  = bindNonRec bndr rhs body
 mkCoreLet bind body
   = Let bind body
 
@@ -118,34 +117,43 @@ mkCoreLets :: [CoreBind] -> CoreExpr -> CoreExpr
 mkCoreLets binds body = foldr mkCoreLet body binds
 
 -- | Construct an expression which represents the application of one expression
+-- paired with its type to an argument. The result is paired with its type. This
+-- function is not exported and used in the definition of 'mkCoreApp' and
+-- 'mkCoreApps'.
+-- Respects the let/app invariant by building a case expression where necessary
+--   See CoreSyn Note [CoreSyn let/app invariant]
+mkCoreAppTyped :: SDoc -> (CoreExpr, Type) -> CoreExpr -> (CoreExpr, Type)
+mkCoreAppTyped _ (fun, fun_ty) (Type ty)
+  = (App fun (Type ty), piResultTy fun_ty ty)
+mkCoreAppTyped _ (fun, fun_ty) (Coercion co)
+  = (App fun (Coercion co), res_ty)
+  where
+    (_, res_ty) = splitFunTy fun_ty
+mkCoreAppTyped d (fun, fun_ty) arg
+  = ASSERT2( isFunTy fun_ty, ppr fun $$ ppr arg $$ d )
+    (mk_val_app fun arg arg_ty res_ty, res_ty)
+  where
+    (arg_ty, res_ty) = splitFunTy fun_ty
+
+-- | Construct an expression which represents the application of one expression
 -- to the other
-mkCoreApp :: SDoc -> CoreExpr -> CoreExpr -> CoreExpr
 -- Respects the let/app invariant by building a case expression where necessary
 --   See CoreSyn Note [CoreSyn let/app invariant]
-mkCoreApp _ fun (Type ty)     = App fun (Type ty)
-mkCoreApp _ fun (Coercion co) = App fun (Coercion co)
-mkCoreApp d fun arg           = ASSERT2( isFunTy fun_ty, ppr fun $$ ppr arg $$ d )
-                                mk_val_app fun arg arg_ty res_ty
-                              where
-                                fun_ty = exprType fun
-                                (arg_ty, res_ty) = splitFunTy fun_ty
+mkCoreApp :: SDoc -> CoreExpr -> CoreExpr -> CoreExpr
+mkCoreApp s fun arg
+  = fst $ mkCoreAppTyped s (fun, exprType fun) arg
 
 -- | Construct an expression which represents the application of a number of
 -- expressions to another. The leftmost expression in the list is applied first
 -- Respects the let/app invariant by building a case expression where necessary
 --   See CoreSyn Note [CoreSyn let/app invariant]
 mkCoreApps :: CoreExpr -> [CoreExpr] -> CoreExpr
--- Slightly more efficient version of (foldl mkCoreApp)
-mkCoreApps orig_fun orig_args
-  = go orig_fun (exprType orig_fun) orig_args
+mkCoreApps fun args
+  = fst $
+    foldl' (mkCoreAppTyped doc_string) (fun, fun_ty) args
   where
-    go fun _      []               = fun
-    go fun fun_ty (Type ty : args) = go (App fun (Type ty)) (piResultTy fun_ty ty) args
-    go fun fun_ty (arg     : args) = ASSERT2( isFunTy fun_ty, ppr fun_ty $$ ppr orig_fun
-                                                              $$ ppr orig_args )
-                                     go (mk_val_app fun arg arg_ty res_ty) res_ty args
-                                   where
-                                     (arg_ty, res_ty) = splitFunTy fun_ty
+    doc_string = ppr fun_ty $$ ppr fun $$ ppr args
+    fun_ty = exprType fun
 
 -- | Construct an expression which represents the application of a number of
 -- expressions to that of a data constructor expression. The leftmost expression
@@ -171,7 +179,7 @@ mk_val_app fun arg arg_ty res_ty
         --
         -- This is Dangerous.  But this is the only place we play this
         -- game, mk_val_app returns an expression that does not have
-        -- have a free wild-id.  So the only thing that can go wrong
+        -- a free wild-id.  So the only thing that can go wrong
         -- is if you take apart this case expression, and pass a
         -- fragment of it as the fun part of a 'mk_val_app'.
 
@@ -251,13 +259,9 @@ mkIntegerExpr i = do t <- lookupTyCon integerTyConName
                      return (Lit (mkLitInteger i (mkTyConTy t)))
 
 -- | Create a 'CoreExpr' which will evaluate to the given @Natural@
---
--- TODO: should we add LitNatural to Core?
-mkNaturalExpr  :: MonadThings m => Integer -> m CoreExpr  -- Result :: Natural
-mkNaturalExpr i = do iExpr <- mkIntegerExpr i
-                     fiExpr <- lookupId naturalFromIntegerName
-                     return (mkCoreApps (Var fiExpr) [iExpr])
-
+mkNaturalExpr  :: MonadThings m => Integer -> m CoreExpr
+mkNaturalExpr i = do t <- lookupTyCon naturalTyConName
+                     return (Lit (mkLitNatural i (mkTyConTy t)))
 
 -- | Create a 'CoreExpr' which will evaluate to the given @Float@
 mkFloatExpr :: Float -> CoreExpr
@@ -328,7 +332,7 @@ We could do one of two things:
 * Flatten it out, so that
     mkCoreTup [e1] = e1
 
-* Built a one-tuple (see Note [One-tuples] in TysWiredIn)
+* Build a one-tuple (see Note [One-tuples] in TysWiredIn)
     mkCoreTup1 [e1] = Unit e1
   We use a suffix "1" to indicate this.
 
@@ -362,7 +366,7 @@ mkCoreUbxTup :: [Type] -> [CoreExpr] -> CoreExpr
 mkCoreUbxTup tys exps
   = ASSERT( tys `equalLength` exps)
     mkCoreConApps (tupleDataCon Unboxed (length tys))
-             (map (Type . getRuntimeRep "mkCoreUbxTup") tys ++ map Type tys ++ exps)
+             (map (Type . getRuntimeRep) tys ++ map Type tys ++ exps)
 
 -- | Make a core tuple of the given boxity
 mkCoreTupBoxity :: Boxity -> [CoreExpr] -> CoreExpr
@@ -596,7 +600,7 @@ mkFoldrExpr elt_ty result_ty c n list = do
            `App` list)
 
 -- | Make a 'build' expression applied to a locally-bound worker function
-mkBuildExpr :: (MonadThings m, MonadUnique m)
+mkBuildExpr :: (MonadFail m, MonadThings m, MonadUnique m)
             => Type                                     -- ^ Type of list elements to be built
             -> ((Id, Type) -> (Id, Type) -> m CoreExpr) -- ^ Function that, given information about the 'Id's
                                                         -- of the binders for the build worker function, returns
@@ -651,7 +655,7 @@ mkRuntimeErrorApp
         -> CoreExpr
 
 mkRuntimeErrorApp err_id res_ty err_msg
-  = mkApps (Var err_id) [ Type (getRuntimeRep "mkRuntimeErrorApp" res_ty)
+  = mkApps (Var err_id) [ Type (getRuntimeRep res_ty)
                         , Type res_ty, err_string ]
   where
     err_string = Lit (mkMachString err_msg)
@@ -686,7 +690,6 @@ templates, but we don't ever expect to generate code for it.
 errorIds :: [Id]
 errorIds
   = [ rUNTIME_ERROR_ID,
-      iRREFUT_PAT_ERROR_ID,
       nON_EXHAUSTIVE_GUARDS_ERROR_ID,
       nO_METHOD_BINDING_ERROR_ID,
       pAT_ERROR_ID,
@@ -697,14 +700,16 @@ errorIds
       ]
 
 recSelErrorName, runtimeErrorName, absentErrorName :: Name
-irrefutPatErrorName, recConErrorName, patErrorName :: Name
+recConErrorName, patErrorName :: Name
 nonExhaustiveGuardsErrorName, noMethodBindingErrorName :: Name
 typeErrorName :: Name
+absentSumFieldErrorName :: Name
 
 recSelErrorName     = err_nm "recSelError"     recSelErrorIdKey     rEC_SEL_ERROR_ID
 absentErrorName     = err_nm "absentError"     absentErrorIdKey     aBSENT_ERROR_ID
+absentSumFieldErrorName = err_nm "absentSumFieldError"  absentSumFieldErrorIdKey
+                            aBSENT_SUM_FIELD_ERROR_ID
 runtimeErrorName    = err_nm "runtimeError"    runtimeErrorIdKey    rUNTIME_ERROR_ID
-irrefutPatErrorName = err_nm "irrefutPatError" irrefutPatErrorIdKey iRREFUT_PAT_ERROR_ID
 recConErrorName     = err_nm "recConError"     recConErrorIdKey     rEC_CON_ERROR_ID
 patErrorName        = err_nm "patError"        patErrorIdKey        pAT_ERROR_ID
 typeErrorName       = err_nm "typeError"       typeErrorIdKey       tYPE_ERROR_ID
@@ -717,19 +722,46 @@ nonExhaustiveGuardsErrorName = err_nm "nonExhaustiveGuardsError"
 err_nm :: String -> Unique -> Id -> Name
 err_nm str uniq id = mkWiredInIdName cONTROL_EXCEPTION_BASE (fsLit str) uniq id
 
-rEC_SEL_ERROR_ID, rUNTIME_ERROR_ID, iRREFUT_PAT_ERROR_ID, rEC_CON_ERROR_ID :: Id
+rEC_SEL_ERROR_ID, rUNTIME_ERROR_ID, rEC_CON_ERROR_ID :: Id
 pAT_ERROR_ID, nO_METHOD_BINDING_ERROR_ID, nON_EXHAUSTIVE_GUARDS_ERROR_ID :: Id
-tYPE_ERROR_ID, aBSENT_ERROR_ID :: Id
+tYPE_ERROR_ID, aBSENT_ERROR_ID, aBSENT_SUM_FIELD_ERROR_ID :: Id
 rEC_SEL_ERROR_ID                = mkRuntimeErrorId recSelErrorName
 rUNTIME_ERROR_ID                = mkRuntimeErrorId runtimeErrorName
-iRREFUT_PAT_ERROR_ID            = mkRuntimeErrorId irrefutPatErrorName
 rEC_CON_ERROR_ID                = mkRuntimeErrorId recConErrorName
 pAT_ERROR_ID                    = mkRuntimeErrorId patErrorName
 nO_METHOD_BINDING_ERROR_ID      = mkRuntimeErrorId noMethodBindingErrorName
 nON_EXHAUSTIVE_GUARDS_ERROR_ID  = mkRuntimeErrorId nonExhaustiveGuardsErrorName
-aBSENT_ERROR_ID                 = mkRuntimeErrorId absentErrorName
 tYPE_ERROR_ID                   = mkRuntimeErrorId typeErrorName
 
+-- Note [aBSENT_SUM_FIELD_ERROR_ID]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Absent argument error for unused unboxed sum fields are different than absent
+-- error used in dummy worker functions (see `mkAbsentErrorApp`):
+--
+-- - `absentSumFieldError` can't take arguments because it's used in unarise for
+--   unused pointer fields in unboxed sums, and applying an argument would
+--   require allocating a thunk.
+--
+-- - `absentSumFieldError` can't be CAFFY because that would mean making some
+--   non-CAFFY definitions that use unboxed sums CAFFY in unarise.
+--
+--   To make `absentSumFieldError` non-CAFFY we get a stable pointer to it in
+--   RtsStartup.c and mark it as non-CAFFY here.
+--
+-- Getting this wrong causes hard-to-debug runtime issues, see #15038.
+--
+-- TODO: Remove stable pointer hack after fixing #9718.
+--       However, we should still be careful about not making things CAFFY just
+--       because they use unboxed sums. Unboxed objects are supposed to be
+--       efficient, and none of the other unboxed literals make things CAFFY.
+
+aBSENT_SUM_FIELD_ERROR_ID
+  = mkVanillaGlobalWithInfo absentSumFieldErrorName
+      (mkSpecForAllTys [alphaTyVar] (mkTyVarTy alphaTyVar)) -- forall a . a
+      (vanillaIdInfo `setStrictnessInfo` mkClosedStrictSig [] exnRes
+                     `setArityInfo` 0
+                     `setCafInfo` NoCafRefs) -- #15038
+
 mkRuntimeErrorId :: Name -> Id
 -- Error function
 --   with type:  forall (r:RuntimeRep) (a:TYPE r). Addr# -> a
@@ -738,7 +770,7 @@ mkRuntimeErrorId :: Name -> Id
 -- The Addr# is expected to be the address of
 --   a UTF8-encoded error string
 mkRuntimeErrorId name
- = mkVanillaGlobalWithInfo name runtime_err_ty bottoming_info
+ = mkVanillaGlobalWithInfo name runtimeErrorTy bottoming_info
  where
     bottoming_info = vanillaIdInfo `setStrictnessInfo`    strict_sig
                                    `setArityInfo`         1
@@ -756,10 +788,11 @@ mkRuntimeErrorId name
     strict_sig = mkClosedStrictSig [evalDmd] exnRes
               -- exnRes: these throw an exception, not just diverge
 
-    -- forall (rr :: RuntimeRep) (a :: rr). Addr# -> a
-    --   See Note [Error and friends have an "open-tyvar" forall]
-    runtime_err_ty = mkSpecSigmaTy [runtimeRep1TyVar, openAlphaTyVar] []
-                                   (mkFunTy addrPrimTy openAlphaTy)
+runtimeErrorTy :: Type
+-- forall (rr :: RuntimeRep) (a :: rr). Addr# -> a
+--   See Note [Error and friends have an "open-tyvar" forall]
+runtimeErrorTy = mkSpecForAllTys [runtimeRep1TyVar, openAlphaTyVar]
+                                 (mkFunTy addrPrimTy openAlphaTy)
 
 {- Note [Error and friends have an "open-tyvar" forall]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -769,4 +802,98 @@ mkRuntimeErrorId name
 Notice the runtime-representation polymorphism. This ensures that
 "error" can be instantiated at unboxed as well as boxed types.
 This is OK because it never returns, so the return type is irrelevant.
+
+
+************************************************************************
+*                                                                      *
+                     aBSENT_ERROR_ID
+*                                                                      *
+************************************************************************
+
+Note [aBSENT_ERROR_ID]
+~~~~~~~~~~~~~~~~~~~~~~
+We use aBSENT_ERROR_ID to build dummy values in workers.  E.g.
+
+   f x = (case x of (a,b) -> b) + 1::Int
+
+The demand analyser figures ot that only the second component of x is
+used, and does a w/w split thus
+
+   f x = case x of (a,b) -> $wf b
+
+   $wf b = let a = absentError "blah"
+               x = (a,b)
+           in <the original RHS of f>
+
+After some simplification, the (absentError "blah") thunk goes away.
+
+------ Tricky wrinkle -------
+Trac #14285 had, roughly
+
+   data T a = MkT a !a
+   {-# INLINABLE f #-}
+   f x = case x of MkT a b -> g (MkT b a)
+
+It turned out that g didn't use the second component, and hence f doesn't use
+the first.  But the stable-unfolding for f looks like
+   \x. case x of MkT a b -> g ($WMkT b a)
+where $WMkT is the wrapper for MkT that evaluates its arguments.  We
+apply the same w/w split to this unfolding (see Note [Worker-wrapper
+for INLINEABLE functions] in WorkWrap) so the template ends up like
+   \b. let a = absentError "blah"
+           x = MkT a b
+        in case x of MkT a b -> g ($WMkT b a)
+
+After doing case-of-known-constructor, and expanding $WMkT we get
+   \b -> g (case absentError "blah" of a -> MkT b a)
+
+Yikes!  That bogusly appears to evaluate the absentError!
+
+This is extremely tiresome.  Another way to think of this is that, in
+Core, it is an invariant that a strict data contructor, like MkT, must
+be applied only to an argument in HNF. So (absentError "blah") had
+better be non-bottom.
+
+So the "solution" is to add a special case for absentError to exprIsHNFlike.
+This allows Simplify.rebuildCase, in the Note [Case to let transformation]
+branch, to convert the case on absentError into a let. We also make
+absentError *not* be diverging, unlike the other error-ids, so that we
+can be sure not to remove the case branches before converting the case to
+a let.
+
+If, by some bug or bizarre happenstance, we ever call absentError, we should
+throw an exception.  This should never happen, of course, but we definitely
+can't return anything.  e.g. if somehow we had
+    case absentError "foo" of
+       Nothing -> ...
+       Just x  -> ...
+then if we return, the case expression will select a field and continue.
+Seg fault city. Better to throw an exception. (Even though we've said
+it is in HNF :-)
+
+It might seem a bit surprising that seq on absentError is simply erased
+
+    absentError "foo" `seq` x ==> x
+
+but that should be okay; since there's no pattern match we can't really
+be relying on anything from it.
 -}
+
+aBSENT_ERROR_ID
+ = mkVanillaGlobalWithInfo absentErrorName absent_ty arity_info
+ where
+   absent_ty = mkSpecForAllTys [alphaTyVar] (mkFunTy addrPrimTy alphaTy)
+   -- Not runtime-rep polymorphic. aBSENT_ERROR_ID is only used for
+   -- lifted-type things; see Note [Absent errors] in WwLib
+   arity_info = vanillaIdInfo `setArityInfo` 1
+   -- NB: no bottoming strictness info, unlike other error-ids.
+   -- See Note [aBSENT_ERROR_ID]
+
+mkAbsentErrorApp :: Type         -- The type to instantiate 'a'
+                 -> String       -- The string to print
+                 -> CoreExpr
+
+mkAbsentErrorApp res_ty err_msg
+  = mkApps (Var aBSENT_ERROR_ID) [ Type res_ty, err_string ]
+  where
+    err_string = Lit (mkMachString err_msg)
diff --git a/compiler/coreSyn/PprCore.hs b/compiler/coreSyn/PprCore.hs
index 28d35528fe..f22d803cb1 100644
--- a/compiler/coreSyn/PprCore.hs
+++ b/compiler/coreSyn/PprCore.hs
@@ -15,6 +15,8 @@ module PprCore (
         pprRules, pprOptCo
     ) where
 
+import GhcPrelude
+
 import CoreSyn
 import CoreStats (exprStats)
 import Literal( pprLiteral )
@@ -126,10 +128,18 @@ ppr_binding ann (val_bdr, expr)
       -- lambda (the first rendering looks like a nullary join point returning
       -- an n-argument function).
     pp_join_bind join_arity
+      | bndrs `lengthAtLeast` join_arity
       = hang (ppr val_bdr <+> sep (map (pprBndr LambdaBind) lhs_bndrs))
            2 (equals <+> pprCoreExpr rhs)
+      | otherwise -- Yikes!  A join-binding with too few lambda
+                  -- Lint will complain, but we don't want to crash
+                  -- the pretty-printer else we can't see what's wrong
+                  -- So refer to printing  j = e
+      = pp_normal_bind
       where
-        (lhs_bndrs, rhs) = collectNBinders join_arity expr
+        (bndrs, body) = collectBinders expr
+        lhs_bndrs = take join_arity bndrs
+        rhs       = mkLams (drop join_arity bndrs) body
 
 pprParendExpr expr = ppr_expr parens expr
 pprCoreExpr   expr = ppr_expr noParens expr
@@ -213,7 +223,7 @@ ppr_expr add_par (Case expr var ty [(con,args,rhs)])
              ]
     else add_par $
          sep [sep [sep [ text "case" <+> pprCoreExpr expr
-                       , ifPprDebug (text "return" <+> ppr ty)
+                       , whenPprDebug (text "return" <+> ppr ty)
                        , text "of" <+> ppr_bndr var
                        ]
                   , char '{' <+> ppr_case_pat con args <+> arrow
@@ -228,7 +238,7 @@ ppr_expr add_par (Case expr var ty alts)
   = add_par $
     sep [sep [text "case"
                 <+> pprCoreExpr expr
-                <+> ifPprDebug (text "return" <+> ppr ty),
+                <+> whenPprDebug (text "return" <+> ppr ty),
               text "of" <+> ppr_bndr var <+> char '{'],
          nest 2 (vcat (punctuate semi (map pprCoreAlt alts))),
          char '}'
@@ -374,7 +384,7 @@ pprTypedLamBinder bind_site debug_on var
   = sdocWithDynFlags $ \dflags ->
     case () of
     _
-      | not debug_on            -- Show case-bound wild bilders only if debug is on
+      | not debug_on            -- Show case-bound wild binders only if debug is on
       , CaseBind <- bind_site
       , isDeadBinder var        -> empty
 
@@ -602,21 +612,3 @@ instance Outputable id => Outputable (Tickish id) where
   ppr (SourceNote span _) =
       hcat [ text "src<", pprUserRealSpan True span, char '>']
 
-{-
------------------------------------------------------
---      Vectorisation declarations
------------------------------------------------------
--}
-
-instance Outputable CoreVect where
-  ppr (Vect     var e)               = hang (text "VECTORISE" <+> ppr var <+> char '=')
-                                         4 (pprCoreExpr e)
-  ppr (NoVect   var)                 = text "NOVECTORISE" <+> ppr var
-  ppr (VectType False var Nothing)   = text "VECTORISE type" <+> ppr var
-  ppr (VectType True  var Nothing)   = text "VECTORISE SCALAR type" <+> ppr var
-  ppr (VectType False var (Just tc)) = text "VECTORISE type" <+> ppr var <+> char '=' <+>
-                                       ppr tc
-  ppr (VectType True var (Just tc))  = text "VECTORISE SCALAR type" <+> ppr var <+>
-                                       char '=' <+> ppr tc
-  ppr (VectClass tc)                 = text "VECTORISE class" <+> ppr tc
-  ppr (VectInst var)                 = text "VECTORISE SCALAR instance" <+> ppr var