27 files changed, 7446 insertions, 27 deletions
diff --git a/compiler/GHC/CoreToStg.hs b/compiler/GHC/CoreToStg.hs
new file mode 100644
index 0000000000..1ee5febd2e
--- /dev/null
+++ b/compiler/GHC/CoreToStg.hs
@@ -0,0 +1,939 @@
+{-# LANGUAGE CPP, DeriveFunctor #-}
+
+--
+-- (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
+--
+
+--------------------------------------------------------------
+-- Converting Core to STG Syntax
+--------------------------------------------------------------
+
+-- And, as we have the info in hand, we may convert some lets to
+-- let-no-escapes.
+
+module GHC.CoreToStg ( coreToStg ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import CoreSyn
+import CoreUtils        ( exprType, findDefault, isJoinBind
+                        , exprIsTickedString_maybe )
+import CoreArity        ( manifestArity )
+import GHC.Stg.Syntax
+
+import Type
+import GHC.Types.RepType
+import TyCon
+import MkId             ( coercionTokenId )
+import Id
+import IdInfo
+import DataCon
+import CostCentre
+import VarEnv
+import Module
+import Name             ( isExternalName, nameOccName, nameModule_maybe )
+import OccName          ( occNameFS )
+import BasicTypes       ( Arity )
+import TysWiredIn       ( unboxedUnitDataCon, unitDataConId )
+import Literal
+import Outputable
+import MonadUtils
+import FastString
+import Util
+import DynFlags
+import ForeignCall
+import Demand           ( isUsedOnce )
+import PrimOp           ( PrimCall(..), primOpWrapperId )
+import SrcLoc           ( mkGeneralSrcSpan )
+
+import Data.List.NonEmpty (nonEmpty, toList)
+import Data.Maybe    (fromMaybe)
+import Control.Monad (ap)
+
+-- Note [Live vs free]
+-- ~~~~~~~~~~~~~~~~~~~
+--
+-- The two are not the same. Liveness is an operational property rather
+-- than a semantic one. A variable is live at a particular execution
+-- point if it can be referred to directly again. In particular, a dead
+-- variable's stack slot (if it has one):
+--
+--           - should be stubbed to avoid space leaks, and
+--           - may be reused for something else.
+--
+-- There ought to be a better way to say this. Here are some examples:
+--
+--         let v = [q] \[x] -> e
+--         in
+--         ...v...  (but no q's)
+--
+-- Just after the `in', v is live, but q is dead. If the whole of that
+-- let expression was enclosed in a case expression, thus:
+--
+--         case (let v = [q] \[x] -> e in ...v...) of
+--                 alts[...q...]
+--
+-- (ie `alts' mention `q'), then `q' is live even after the `in'; because
+-- we'll return later to the `alts' and need it.
+--
+-- Let-no-escapes make this a bit more interesting:
+--
+--         let-no-escape v = [q] \ [x] -> e
+--         in
+--         ...v...
+--
+-- Here, `q' is still live at the `in', because `v' is represented not by
+-- a closure but by the current stack state.  In other words, if `v' is
+-- live then so is `q'. Furthermore, if `e' mentions an enclosing
+-- let-no-escaped variable, then its free variables are also live if `v' is.
+
+-- Note [What are these SRTs all about?]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider the Core program,
+--
+--     fibs = go 1 1
+--       where go a b = let c = a + c
+--                      in c : go b c
+--     add x = map (\y -> x*y) fibs
+--
+-- In this case we have a CAF, 'fibs', which is quite large after evaluation and
+-- has only one possible user, 'add'. Consequently, we want to ensure that when
+-- all references to 'add' die we can garbage collect any bit of 'fibs' that we
+-- have evaluated.
+--
+-- However, how do we know whether there are any references to 'fibs' still
+-- around? Afterall, the only reference to it is buried in the code generated
+-- for 'add'. The answer is that we record the CAFs referred to by a definition
+-- in its info table, namely a part of it known as the Static Reference Table
+-- (SRT).
+--
+-- Since SRTs are so common, we use a special compact encoding for them in: we
+-- produce one table containing a list of CAFs in a module and then include a
+-- bitmap in each info table describing which entries of this table the closure
+-- references.
+--
+-- See also: commentary/rts/storage/gc/CAFs on the GHC Wiki.
+
+-- Note [What is a non-escaping let]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- NB: Nowadays this is recognized by the occurrence analyser by turning a
+-- "non-escaping let" into a join point. The following is then an operational
+-- account of join points.
+--
+-- Consider:
+--
+--     let x = fvs \ args -> e
+--     in
+--         if ... then x else
+--            if ... then x else ...
+--
+-- `x' is used twice (so we probably can't unfold it), but when it is
+-- entered, the stack is deeper than it was when the definition of `x'
+-- happened.  Specifically, if instead of allocating a closure for `x',
+-- we saved all `x's fvs on the stack, and remembered the stack depth at
+-- that moment, then whenever we enter `x' we can simply set the stack
+-- pointer(s) to these remembered (compile-time-fixed) values, and jump
+-- to the code for `x'.
+--
+-- All of this is provided x is:
+--   1. non-updatable;
+--   2. guaranteed to be entered before the stack retreats -- ie x is not
+--      buried in a heap-allocated closure, or passed as an argument to
+--      something;
+--   3. all the enters have exactly the right number of arguments,
+--      no more no less;
+--   4. all the enters are tail calls; that is, they return to the
+--      caller enclosing the definition of `x'.
+--
+-- Under these circumstances we say that `x' is non-escaping.
+--
+-- An example of when (4) does not hold:
+--
+--     let x = ...
+--     in case x of ...alts...
+--
+-- Here, `x' is certainly entered only when the stack is deeper than when
+-- `x' is defined, but here it must return to ...alts... So we can't just
+-- adjust the stack down to `x''s recalled points, because that would lost
+-- alts' context.
+--
+-- Things can get a little more complicated.  Consider:
+--
+--     let y = ...
+--     in let x = fvs \ args -> ...y...
+--     in ...x...
+--
+-- Now, if `x' is used in a non-escaping way in ...x..., and `y' is used in a
+-- non-escaping way in ...y..., then `y' is non-escaping.
+--
+-- `x' can even be recursive!  Eg:
+--
+--     letrec x = [y] \ [v] -> if v then x True else ...
+--     in
+--         ...(x b)...
+
+-- Note [Cost-centre initialization plan]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Previously `coreToStg` was initializing cost-centre stack fields as `noCCS`,
+-- and the fields were then fixed by a separate pass `stgMassageForProfiling`.
+-- We now initialize these correctly. The initialization works like this:
+--
+--   - For non-top level bindings always use `currentCCS`.
+--
+--   - For top-level bindings, check if the binding is a CAF
+--
+--     - CAF:      If -fcaf-all is enabled, create a new CAF just for this CAF
+--                 and use it. Note that these new cost centres need to be
+--                 collected to be able to generate cost centre initialization
+--                 code, so `coreToTopStgRhs` now returns `CollectedCCs`.
+--
+--                 If -fcaf-all is not enabled, use "all CAFs" cost centre.
+--
+--     - Non-CAF:  Top-level (static) data is not counted in heap profiles; nor
+--                 do we set CCCS from it; so we just slam in
+--                 dontCareCostCentre.
+
+-- --------------------------------------------------------------
+-- Setting variable info: top-level, binds, RHSs
+-- --------------------------------------------------------------
+
+coreToStg :: DynFlags -> Module -> CoreProgram
+          -> ([StgTopBinding], CollectedCCs)
+coreToStg dflags this_mod pgm
+  = (pgm', final_ccs)
+  where
+    (_, (local_ccs, local_cc_stacks), pgm')
+      = coreTopBindsToStg dflags this_mod emptyVarEnv emptyCollectedCCs pgm
+
+    prof = WayProf `elem` ways dflags
+
+    final_ccs
+      | prof && gopt Opt_AutoSccsOnIndividualCafs dflags
+      = (local_ccs,local_cc_stacks)  -- don't need "all CAFs" CC
+      | prof
+      = (all_cafs_cc:local_ccs, all_cafs_ccs:local_cc_stacks)
+      | otherwise
+      = emptyCollectedCCs
+
+    (all_cafs_cc, all_cafs_ccs) = getAllCAFsCC this_mod
+
+coreTopBindsToStg
+    :: DynFlags
+    -> Module
+    -> IdEnv HowBound           -- environment for the bindings
+    -> CollectedCCs
+    -> CoreProgram
+    -> (IdEnv HowBound, CollectedCCs, [StgTopBinding])
+
+coreTopBindsToStg _      _        env ccs []
+  = (env, ccs, [])
+coreTopBindsToStg dflags this_mod env ccs (b:bs)
+  = (env2, ccs2, b':bs')
+  where
+        (env1, ccs1, b' ) =
+          coreTopBindToStg dflags this_mod env ccs b
+        (env2, ccs2, bs') =
+          coreTopBindsToStg dflags this_mod env1 ccs1 bs
+
+coreTopBindToStg
+        :: DynFlags
+        -> Module
+        -> IdEnv HowBound
+        -> CollectedCCs
+        -> CoreBind
+        -> (IdEnv HowBound, CollectedCCs, StgTopBinding)
+
+coreTopBindToStg _ _ env ccs (NonRec id e)
+  | Just str <- exprIsTickedString_maybe e
+  -- top-level string literal
+  -- See Note [CoreSyn top-level string literals] in CoreSyn
+  = let
+        env' = extendVarEnv env id how_bound
+        how_bound = LetBound TopLet 0
+    in (env', ccs, StgTopStringLit id str)
+
+coreTopBindToStg dflags this_mod env ccs (NonRec id rhs)
+  = let
+        env'      = extendVarEnv env id how_bound
+        how_bound = LetBound TopLet $! manifestArity rhs
+
+        (stg_rhs, ccs') =
+            initCts dflags env $
+              coreToTopStgRhs dflags ccs this_mod (id,rhs)
+
+        bind = StgTopLifted $ StgNonRec id stg_rhs
+    in
+    assertConsistentCafInfo dflags id bind (ppr bind)
+      -- NB: previously the assertion printed 'rhs' and 'bind'
+      --     as well as 'id', but that led to a black hole
+      --     where printing the assertion error tripped the
+      --     assertion again!
+    (env', ccs', bind)
+
+coreTopBindToStg dflags this_mod env ccs (Rec pairs)
+  = ASSERT( not (null pairs) )
+    let
+        binders = map fst pairs
+
+        extra_env' = [ (b, LetBound TopLet $! manifestArity rhs)
+                     | (b, rhs) <- pairs ]
+        env' = extendVarEnvList env extra_env'
+
+        -- generate StgTopBindings and CAF cost centres created for CAFs
+        (ccs', stg_rhss)
+          = initCts dflags env' $ do
+               mapAccumLM (\ccs rhs -> do
+                            (rhs', ccs') <-
+                              coreToTopStgRhs dflags ccs this_mod rhs
+                            return (ccs', rhs'))
+                          ccs
+                          pairs
+
+        bind = StgTopLifted $ StgRec (zip binders stg_rhss)
+    in
+    assertConsistentCafInfo dflags (head binders) bind (ppr binders)
+    (env', ccs', bind)
+
+-- | CAF consistency issues will generally result in segfaults and are quite
+-- difficult to debug (see #16846). We enable checking of the
+-- 'consistentCafInfo' invariant with @-dstg-lint@ to increase the chance that
+-- we catch these issues.
+assertConsistentCafInfo :: DynFlags -> Id -> StgTopBinding -> SDoc -> a -> a
+assertConsistentCafInfo dflags id bind err_doc result
+  | gopt Opt_DoStgLinting dflags || debugIsOn
+  , not $ consistentCafInfo id bind = pprPanic "assertConsistentCafInfo" err_doc
+  | otherwise = result
+
+-- Assertion helper: this checks that the CafInfo on the Id matches
+-- what CoreToStg has figured out about the binding's SRT.  The
+-- CafInfo will be exact in all cases except when CorePrep has
+-- floated out a binding, in which case it will be approximate.
+consistentCafInfo :: Id -> StgTopBinding -> Bool
+consistentCafInfo id bind
+  = WARN( not (exact || is_sat_thing) , ppr id <+> ppr id_marked_caffy <+> ppr binding_is_caffy )
+    safe
+  where
+    safe  = id_marked_caffy || not binding_is_caffy
+    exact = id_marked_caffy == binding_is_caffy
+    id_marked_caffy  = mayHaveCafRefs (idCafInfo id)
+    binding_is_caffy = topStgBindHasCafRefs bind
+    is_sat_thing = occNameFS (nameOccName (idName id)) == fsLit "sat"
+
+coreToTopStgRhs
+        :: DynFlags
+        -> CollectedCCs
+        -> Module
+        -> (Id,CoreExpr)
+        -> CtsM (StgRhs, CollectedCCs)
+
+coreToTopStgRhs dflags ccs this_mod (bndr, rhs)
+  = do { new_rhs <- coreToStgExpr rhs
+
+       ; let (stg_rhs, ccs') =
+               mkTopStgRhs dflags this_mod ccs bndr new_rhs
+             stg_arity =
+               stgRhsArity stg_rhs
+
+       ; return (ASSERT2( arity_ok stg_arity, mk_arity_msg stg_arity) stg_rhs,
+                 ccs') }
+  where
+        -- It's vital that the arity on a top-level Id matches
+        -- the arity of the generated STG binding, else an importing
+        -- module will use the wrong calling convention
+        --      (#2844 was an example where this happened)
+        -- NB1: we can't move the assertion further out without
+        --      blocking the "knot" tied in coreTopBindsToStg
+        -- NB2: the arity check is only needed for Ids with External
+        --      Names, because they are externally visible.  The CorePrep
+        --      pass introduces "sat" things with Local Names and does
+        --      not bother to set their Arity info, so don't fail for those
+    arity_ok stg_arity
+       | isExternalName (idName bndr) = id_arity == stg_arity
+       | otherwise                    = True
+    id_arity  = idArity bndr
+    mk_arity_msg stg_arity
+        = vcat [ppr bndr,
+                text "Id arity:" <+> ppr id_arity,
+                text "STG arity:" <+> ppr stg_arity]
+
+-- ---------------------------------------------------------------------------
+-- Expressions
+-- ---------------------------------------------------------------------------
+
+coreToStgExpr
+        :: CoreExpr
+        -> CtsM StgExpr
+
+-- The second and third components can be derived in a simple bottom up pass, not
+-- dependent on any decisions about which variables will be let-no-escaped or
+-- not.  The first component, that is, the decorated expression, may then depend
+-- on these components, but it in turn is not scrutinised as the basis for any
+-- decisions.  Hence no black holes.
+
+-- No LitInteger's or LitNatural's should be left by the time this is called.
+-- CorePrep should have converted them all to a real core representation.
+coreToStgExpr (Lit (LitNumber LitNumInteger _ _)) = panic "coreToStgExpr: LitInteger"
+coreToStgExpr (Lit (LitNumber LitNumNatural _ _)) = panic "coreToStgExpr: LitNatural"
+coreToStgExpr (Lit l)      = return (StgLit l)
+coreToStgExpr (App (Lit LitRubbish) _some_unlifted_type)
+  -- We lower 'LitRubbish' to @()@ here, which is much easier than doing it in
+  -- a STG to Cmm pass.
+  = coreToStgExpr (Var unitDataConId)
+coreToStgExpr (Var v)      = coreToStgApp v               [] []
+coreToStgExpr (Coercion _) = coreToStgApp coercionTokenId [] []
+
+coreToStgExpr expr@(App _ _)
+  = coreToStgApp f args ticks
+  where
+    (f, args, ticks) = myCollectArgs expr
+
+coreToStgExpr expr@(Lam _ _)
+  = let
+        (args, body) = myCollectBinders expr
+        args'        = filterStgBinders args
+    in
+    extendVarEnvCts [ (a, LambdaBound) | a <- args' ] $ do
+    body' <- coreToStgExpr body
+    let
+        result_expr = case nonEmpty args' of
+          Nothing     -> body'
+          Just args'' -> StgLam args'' body'
+
+    return result_expr
+
+coreToStgExpr (Tick tick expr)
+  = do case tick of
+         HpcTick{}    -> return ()
+         ProfNote{}   -> return ()
+         SourceNote{} -> return ()
+         Breakpoint{} -> panic "coreToStgExpr: breakpoint should not happen"
+       expr2 <- coreToStgExpr expr
+       return (StgTick tick expr2)
+
+coreToStgExpr (Cast expr _)
+  = coreToStgExpr expr
+
+-- Cases require a little more real work.
+
+coreToStgExpr (Case scrut _ _ [])
+  = coreToStgExpr scrut
+    -- See Note [Empty case alternatives] in CoreSyn If the case
+    -- alternatives are empty, the scrutinee must diverge or raise an
+    -- exception, so we can just dive into it.
+    --
+    -- Of course this may seg-fault if the scrutinee *does* return.  A
+    -- belt-and-braces approach would be to move this case into the
+    -- code generator, and put a return point anyway that calls a
+    -- runtime system error function.
+
+
+coreToStgExpr (Case scrut bndr _ alts) = do
+    alts2 <- extendVarEnvCts [(bndr, LambdaBound)] (mapM vars_alt alts)
+    scrut2 <- coreToStgExpr scrut
+    return (StgCase scrut2 bndr (mkStgAltType bndr alts) alts2)
+  where
+    vars_alt (con, binders, rhs)
+      | DataAlt c <- con, c == unboxedUnitDataCon
+      = -- This case is a bit smelly.
+        -- See Note [Nullary unboxed tuple] in Type.hs
+        -- where a nullary tuple is mapped to (State# World#)
+        ASSERT( null binders )
+        do { rhs2 <- coreToStgExpr rhs
+           ; return (DEFAULT, [], rhs2)  }
+      | otherwise
+      = let     -- Remove type variables
+            binders' = filterStgBinders binders
+        in
+        extendVarEnvCts [(b, LambdaBound) | b <- binders'] $ do
+        rhs2 <- coreToStgExpr rhs
+        return (con, binders', rhs2)
+
+coreToStgExpr (Let bind body) = do
+    coreToStgLet bind body
+
+coreToStgExpr e = pprPanic "coreToStgExpr" (ppr e)
+
+mkStgAltType :: Id -> [CoreAlt] -> AltType
+mkStgAltType bndr alts
+  | isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty
+  = MultiValAlt (length prim_reps)  -- always use MultiValAlt for unboxed tuples
+
+  | otherwise
+  = case prim_reps of
+      [LiftedRep] -> case tyConAppTyCon_maybe (unwrapType bndr_ty) of
+        Just tc
+          | isAbstractTyCon tc -> look_for_better_tycon
+          | isAlgTyCon tc      -> AlgAlt tc
+          | otherwise          -> ASSERT2( _is_poly_alt_tycon tc, ppr tc )
+                                  PolyAlt
+        Nothing                -> PolyAlt
+      [unlifted] -> PrimAlt unlifted
+      not_unary  -> MultiValAlt (length not_unary)
+  where
+   bndr_ty   = idType bndr
+   prim_reps = typePrimRep bndr_ty
+
+   _is_poly_alt_tycon tc
+        =  isFunTyCon tc
+        || isPrimTyCon tc   -- "Any" is lifted but primitive
+        || isFamilyTyCon tc -- Type family; e.g. Any, or arising from strict
+                            -- function application where argument has a
+                            -- type-family type
+
+   -- Sometimes, the TyCon is a AbstractTyCon which may not have any
+   -- constructors inside it.  Then we may get a better TyCon by
+   -- grabbing the one from a constructor alternative
+   -- if one exists.
+   look_for_better_tycon
+        | ((DataAlt con, _, _) : _) <- data_alts =
+                AlgAlt (dataConTyCon con)
+        | otherwise =
+                ASSERT(null data_alts)
+                PolyAlt
+        where
+                (data_alts, _deflt) = findDefault alts
+
+-- ---------------------------------------------------------------------------
+-- Applications
+-- ---------------------------------------------------------------------------
+
+coreToStgApp :: Id            -- Function
+             -> [CoreArg]     -- Arguments
+             -> [Tickish Id]  -- Debug ticks
+             -> CtsM StgExpr
+coreToStgApp f args ticks = do
+    (args', ticks') <- coreToStgArgs args
+    how_bound <- lookupVarCts f
+
+    let
+        n_val_args       = valArgCount args
+
+        -- Mostly, the arity info of a function is in the fn's IdInfo
+        -- But new bindings introduced by CoreSat may not have no
+        -- arity info; it would do us no good anyway.  For example:
+        --      let f = \ab -> e in f
+        -- No point in having correct arity info for f!
+        -- Hence the hasArity stuff below.
+        -- NB: f_arity is only consulted for LetBound things
+        f_arity   = stgArity f how_bound
+        saturated = f_arity <= n_val_args
+
+        res_ty = exprType (mkApps (Var f) args)
+        app = case idDetails f of
+                DataConWorkId dc
+                  | saturated    -> StgConApp dc args'
+                                      (dropRuntimeRepArgs (fromMaybe [] (tyConAppArgs_maybe res_ty)))
+
+                -- Some primitive operator that might be implemented as a library call.
+                -- As described in Note [Primop wrappers] in PrimOp.hs, here we
+                -- turn unsaturated primop applications into applications of
+                -- the primop's wrapper.
+                PrimOpId op
+                  | saturated    -> StgOpApp (StgPrimOp op) args' res_ty
+                  | otherwise    -> StgApp (primOpWrapperId op) args'
+
+                -- A call to some primitive Cmm function.
+                FCallId (CCall (CCallSpec (StaticTarget _ lbl (Just pkgId) True)
+                                          PrimCallConv _))
+                                 -> ASSERT( saturated )
+                                    StgOpApp (StgPrimCallOp (PrimCall lbl pkgId)) args' res_ty
+
+                -- A regular foreign call.
+                FCallId call     -> ASSERT( saturated )
+                                    StgOpApp (StgFCallOp call (idType f)) args' res_ty
+
+                TickBoxOpId {}   -> pprPanic "coreToStg TickBox" $ ppr (f,args')
+                _other           -> StgApp f args'
+
+        tapp = foldr StgTick app (ticks ++ ticks')
+
+    -- Forcing these fixes a leak in the code generator, noticed while
+    -- profiling for trac #4367
+    app `seq` return tapp
+
+-- ---------------------------------------------------------------------------
+-- Argument lists
+-- This is the guy that turns applications into A-normal form
+-- ---------------------------------------------------------------------------
+
+coreToStgArgs :: [CoreArg] -> CtsM ([StgArg], [Tickish Id])
+coreToStgArgs []
+  = return ([], [])
+
+coreToStgArgs (Type _ : args) = do     -- Type argument
+    (args', ts) <- coreToStgArgs args
+    return (args', ts)
+
+coreToStgArgs (Coercion _ : args)  -- Coercion argument; replace with place holder
+  = do { (args', ts) <- coreToStgArgs args
+       ; return (StgVarArg coercionTokenId : args', ts) }
+
+coreToStgArgs (Tick t e : args)
+  = ASSERT( not (tickishIsCode t) )
+    do { (args', ts) <- coreToStgArgs (e : args)
+       ; return (args', t:ts) }
+
+coreToStgArgs (arg : args) = do         -- Non-type argument
+    (stg_args, ticks) <- coreToStgArgs args
+    arg' <- coreToStgExpr arg
+    let
+        (aticks, arg'') = stripStgTicksTop tickishFloatable arg'
+        stg_arg = case arg'' of
+                       StgApp v []        -> StgVarArg v
+                       StgConApp con [] _ -> StgVarArg (dataConWorkId con)
+                       StgLit lit         -> StgLitArg lit
+                       _                  -> pprPanic "coreToStgArgs" (ppr arg)
+
+        -- WARNING: what if we have an argument like (v `cast` co)
+        --          where 'co' changes the representation type?
+        --          (This really only happens if co is unsafe.)
+        -- Then all the getArgAmode stuff in CgBindery will set the
+        -- cg_rep of the CgIdInfo based on the type of v, rather
+        -- than the type of 'co'.
+        -- This matters particularly when the function is a primop
+        -- or foreign call.
+        -- Wanted: a better solution than this hacky warning
+
+    dflags <- getDynFlags
+    let
+        arg_rep = typePrimRep (exprType arg)
+        stg_arg_rep = typePrimRep (stgArgType stg_arg)
+        bad_args = not (primRepsCompatible dflags arg_rep stg_arg_rep)
+
+    WARN( bad_args, text "Dangerous-looking argument. Probable cause: bad unsafeCoerce#" $$ ppr arg )
+     return (stg_arg : stg_args, ticks ++ aticks)
+
+
+-- ---------------------------------------------------------------------------
+-- The magic for lets:
+-- ---------------------------------------------------------------------------
+
+coreToStgLet
+         :: CoreBind     -- bindings
+         -> CoreExpr     -- body
+         -> CtsM StgExpr -- new let
+
+coreToStgLet bind body = do
+    (bind2, body2)
+       <- do
+
+          ( bind2, env_ext)
+                <- vars_bind bind
+
+          -- Do the body
+          extendVarEnvCts env_ext $ do
+             body2 <- coreToStgExpr body
+
+             return (bind2, body2)
+
+        -- Compute the new let-expression
+    let
+        new_let | isJoinBind bind = StgLetNoEscape noExtFieldSilent bind2 body2
+                | otherwise       = StgLet noExtFieldSilent bind2 body2
+
+    return new_let
+  where
+    mk_binding binder rhs
+        = (binder, LetBound NestedLet (manifestArity rhs))
+
+    vars_bind :: CoreBind
+              -> CtsM (StgBinding,
+                       [(Id, HowBound)])  -- extension to environment
+
+    vars_bind (NonRec binder rhs) = do
+        rhs2 <- coreToStgRhs (binder,rhs)
+        let
+            env_ext_item = mk_binding binder rhs
+
+        return (StgNonRec binder rhs2, [env_ext_item])
+
+    vars_bind (Rec pairs)
+      =    let
+                binders = map fst pairs
+                env_ext = [ mk_binding b rhs
+                          | (b,rhs) <- pairs ]
+           in
+           extendVarEnvCts env_ext $ do
+              rhss2 <- mapM coreToStgRhs pairs
+              return (StgRec (binders `zip` rhss2), env_ext)
+
+coreToStgRhs :: (Id,CoreExpr)
+             -> CtsM StgRhs
+
+coreToStgRhs (bndr, rhs) = do
+    new_rhs <- coreToStgExpr rhs
+    return (mkStgRhs bndr new_rhs)
+
+-- Generate a top-level RHS. Any new cost centres generated for CAFs will be
+-- appended to `CollectedCCs` argument.
+mkTopStgRhs :: DynFlags -> Module -> CollectedCCs
+            -> Id -> StgExpr -> (StgRhs, CollectedCCs)
+
+mkTopStgRhs dflags this_mod ccs bndr rhs
+  | StgLam bndrs body <- rhs
+  = -- StgLam can't have empty arguments, so not CAF
+    ( StgRhsClosure noExtFieldSilent
+                    dontCareCCS
+                    ReEntrant
+                    (toList bndrs) body
+    , ccs )
+
+  | StgConApp con args _ <- unticked_rhs
+  , -- Dynamic StgConApps are updatable
+    not (isDllConApp dflags this_mod con args)
+  = -- CorePrep does this right, but just to make sure
+    ASSERT2( not (isUnboxedTupleCon con || isUnboxedSumCon con)
+           , ppr bndr $$ ppr con $$ ppr args)
+    ( StgRhsCon dontCareCCS con args, ccs )
+
+  -- Otherwise it's a CAF, see Note [Cost-centre initialization plan].
+  | gopt Opt_AutoSccsOnIndividualCafs dflags
+  = ( StgRhsClosure noExtFieldSilent
+                    caf_ccs
+                    upd_flag [] rhs
+    , collectCC caf_cc caf_ccs ccs )
+
+  | otherwise
+  = ( StgRhsClosure noExtFieldSilent
+                    all_cafs_ccs
+                    upd_flag [] rhs
+    , ccs )
+
+  where
+    unticked_rhs = stripStgTicksTopE (not . tickishIsCode) rhs
+
+    upd_flag | isUsedOnce (idDemandInfo bndr) = SingleEntry
+             | otherwise                      = Updatable
+
+    -- CAF cost centres generated for -fcaf-all
+    caf_cc = mkAutoCC bndr modl
+    caf_ccs = mkSingletonCCS caf_cc
+           -- careful: the binder might be :Main.main,
+           -- which doesn't belong to module mod_name.
+           -- bug #249, tests prof001, prof002
+    modl | Just m <- nameModule_maybe (idName bndr) = m
+         | otherwise = this_mod
+
+    -- default CAF cost centre
+    (_, all_cafs_ccs) = getAllCAFsCC this_mod
+
+-- Generate a non-top-level RHS. Cost-centre is always currentCCS,
+-- see Note [Cost-centre initialzation plan].
+mkStgRhs :: Id -> StgExpr -> StgRhs
+mkStgRhs bndr rhs
+  | StgLam bndrs body <- rhs
+  = StgRhsClosure noExtFieldSilent
+                  currentCCS
+                  ReEntrant
+                  (toList bndrs) body
+
+  | isJoinId bndr -- must be a nullary join point
+  = ASSERT(idJoinArity bndr == 0)
+    StgRhsClosure noExtFieldSilent
+                  currentCCS
+                  ReEntrant -- ignored for LNE
+                  [] rhs
+
+  | StgConApp con args _ <- unticked_rhs
+  = StgRhsCon currentCCS con args
+
+  | otherwise
+  = StgRhsClosure noExtFieldSilent
+                  currentCCS
+                  upd_flag [] rhs
+  where
+    unticked_rhs = stripStgTicksTopE (not . tickishIsCode) rhs
+
+    upd_flag | isUsedOnce (idDemandInfo bndr) = SingleEntry
+             | otherwise                      = Updatable
+
+  {-
+    SDM: disabled.  Eval/Apply can't handle functions with arity zero very
+    well; and making these into simple non-updatable thunks breaks other
+    assumptions (namely that they will be entered only once).
+
+    upd_flag | isPAP env rhs  = ReEntrant
+             | otherwise      = Updatable
+
+-- Detect thunks which will reduce immediately to PAPs, and make them
+-- non-updatable.  This has several advantages:
+--
+--         - the non-updatable thunk behaves exactly like the PAP,
+--
+--         - the thunk is more efficient to enter, because it is
+--           specialised to the task.
+--
+--         - we save one update frame, one stg_update_PAP, one update
+--           and lots of PAP_enters.
+--
+--         - in the case where the thunk is top-level, we save building
+--           a black hole and furthermore the thunk isn't considered to
+--           be a CAF any more, so it doesn't appear in any SRTs.
+--
+-- We do it here, because the arity information is accurate, and we need
+-- to do it before the SRT pass to save the SRT entries associated with
+-- any top-level PAPs.
+
+isPAP env (StgApp f args) = listLengthCmp args arity == LT -- idArity f > length args
+                              where
+                                 arity = stgArity f (lookupBinding env f)
+isPAP env _               = False
+
+-}
+
+{- ToDo:
+          upd = if isOnceDem dem
+                    then (if isNotTop toplev
+                            then SingleEntry    -- HA!  Paydirt for "dem"
+                            else
+                     (if debugIsOn then trace "WARNING: SE CAFs unsupported, forcing UPD instead" else id) $
+                     Updatable)
+                else Updatable
+        -- For now we forbid SingleEntry CAFs; they tickle the
+        -- ASSERT in rts/Storage.c line 215 at newCAF() re mut_link,
+        -- and I don't understand why.  There's only one SE_CAF (well,
+        -- only one that tickled a great gaping bug in an earlier attempt
+        -- at ClosureInfo.getEntryConvention) in the whole of nofib,
+        -- specifically Main.lvl6 in spectral/cryptarithm2.
+        -- So no great loss.  KSW 2000-07.
+-}
+
+-- ---------------------------------------------------------------------------
+-- A monad for the core-to-STG pass
+-- ---------------------------------------------------------------------------
+
+-- There's a lot of stuff to pass around, so we use this CtsM
+-- ("core-to-STG monad") monad to help.  All the stuff here is only passed
+-- *down*.
+
+newtype CtsM a = CtsM
+    { unCtsM :: DynFlags -- Needed for checking for bad coercions in coreToStgArgs
+             -> IdEnv HowBound
+             -> a
+    }
+    deriving (Functor)
+
+data HowBound
+  = ImportBound         -- Used only as a response to lookupBinding; never
+                        -- exists in the range of the (IdEnv HowBound)
+
+  | LetBound            -- A let(rec) in this module
+        LetInfo         -- Whether top level or nested
+        Arity           -- Its arity (local Ids don't have arity info at this point)
+
+  | LambdaBound         -- Used for both lambda and case
+  deriving (Eq)
+
+data LetInfo
+  = TopLet              -- top level things
+  | NestedLet
+  deriving (Eq)
+
+-- For a let(rec)-bound variable, x, we record LiveInfo, the set of
+-- variables that are live if x is live.  This LiveInfo comprises
+--         (a) dynamic live variables (ones with a non-top-level binding)
+--         (b) static live variabes (CAFs or things that refer to CAFs)
+--
+-- For "normal" variables (a) is just x alone.  If x is a let-no-escaped
+-- variable then x is represented by a code pointer and a stack pointer
+-- (well, one for each stack).  So all of the variables needed in the
+-- execution of x are live if x is, and are therefore recorded in the
+-- LetBound constructor; x itself *is* included.
+--
+-- The set of dynamic live variables is guaranteed ot have no further
+-- let-no-escaped variables in it.
+
+-- The std monad functions:
+
+initCts :: DynFlags -> IdEnv HowBound -> CtsM a -> a
+initCts dflags env m = unCtsM m dflags env
+
+
+
+{-# INLINE thenCts #-}
+{-# INLINE returnCts #-}
+
+returnCts :: a -> CtsM a
+returnCts e = CtsM $ \_ _ -> e
+
+thenCts :: CtsM a -> (a -> CtsM b) -> CtsM b
+thenCts m k = CtsM $ \dflags env
+  -> unCtsM (k (unCtsM m dflags env)) dflags env
+
+instance Applicative CtsM where
+    pure = returnCts
+    (<*>) = ap
+
+instance Monad CtsM where
+    (>>=)  = thenCts
+
+instance HasDynFlags CtsM where
+    getDynFlags = CtsM $ \dflags _ -> dflags
+
+-- Functions specific to this monad:
+
+extendVarEnvCts :: [(Id, HowBound)] -> CtsM a -> CtsM a
+extendVarEnvCts ids_w_howbound expr
+   =    CtsM $   \dflags env
+   -> unCtsM expr dflags (extendVarEnvList env ids_w_howbound)
+
+lookupVarCts :: Id -> CtsM HowBound
+lookupVarCts v = CtsM $ \_ env -> lookupBinding env v
+
+lookupBinding :: IdEnv HowBound -> Id -> HowBound
+lookupBinding env v = case lookupVarEnv env v of
+                        Just xx -> xx
+                        Nothing -> ASSERT2( isGlobalId v, ppr v ) ImportBound
+
+getAllCAFsCC :: Module -> (CostCentre, CostCentreStack)
+getAllCAFsCC this_mod =
+    let
+      span = mkGeneralSrcSpan (mkFastString "<entire-module>") -- XXX do better
+      all_cafs_cc  = mkAllCafsCC this_mod span
+      all_cafs_ccs = mkSingletonCCS all_cafs_cc
+    in
+      (all_cafs_cc, all_cafs_ccs)
+
+-- Misc.
+
+filterStgBinders :: [Var] -> [Var]
+filterStgBinders bndrs = filter isId bndrs
+
+myCollectBinders :: Expr Var -> ([Var], Expr Var)
+myCollectBinders expr
+  = go [] expr
+  where
+    go bs (Lam b e)          = go (b:bs) e
+    go bs (Cast e _)         = go bs e
+    go bs e                  = (reverse bs, e)
+
+-- | Precondition: argument expression is an 'App', and there is a 'Var' at the
+-- head of the 'App' chain.
+myCollectArgs :: CoreExpr -> (Id, [CoreArg], [Tickish Id])
+myCollectArgs expr
+  = go expr [] []
+  where
+    go (Var v)          as ts = (v, as, ts)
+    go (App f a)        as ts = go f (a:as) ts
+    go (Tick t e)       as ts = ASSERT( all isTypeArg as )
+                                go e as (t:ts) -- ticks can appear in type apps
+    go (Cast e _)       as ts = go e as ts
+    go (Lam b e)        as ts
+       | isTyVar b            = go e as ts -- Note [Collect args]
+    go _                _  _  = pprPanic "CoreToStg.myCollectArgs" (ppr expr)
+
+-- Note [Collect args]
+-- ~~~~~~~~~~~~~~~~~~~
+--
+-- This big-lambda case occurred following a rather obscure eta expansion.
+-- It all seems a bit yukky to me.
+
+stgArity :: Id -> HowBound -> Arity
+stgArity _ (LetBound _ arity) = arity
+stgArity f ImportBound        = idArity f
+stgArity _ LambdaBound        = 0
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
new file mode 100644
index 0000000000..ea020c5f9e
--- /dev/null
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -0,0 +1,1733 @@
+{-
+(c) The University of Glasgow, 1994-2006
+
+
+Core pass to saturate constructors and PrimOps
+-}
+
+{-# LANGUAGE BangPatterns, CPP, MultiWayIf #-}
+
+module GHC.CoreToStg.Prep (
+      corePrepPgm, corePrepExpr, cvtLitInteger, cvtLitNatural,
+      lookupMkIntegerName, lookupIntegerSDataConName,
+      lookupMkNaturalName, lookupNaturalSDataConName
+  ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import OccurAnal
+
+import HscTypes
+import PrelNames
+import MkId             ( realWorldPrimId )
+import CoreUtils
+import CoreArity
+import CoreFVs
+import CoreMonad        ( CoreToDo(..) )
+import CoreLint         ( endPassIO )
+import CoreSyn
+import CoreSubst
+import MkCore hiding( FloatBind(..) )   -- We use our own FloatBind here
+import Type
+import Literal
+import Coercion
+import TcEnv
+import TyCon
+import Demand
+import Var
+import VarSet
+import VarEnv
+import Id
+import IdInfo
+import TysWiredIn
+import DataCon
+import BasicTypes
+import Module
+import UniqSupply
+import Maybes
+import OrdList
+import ErrUtils
+import DynFlags
+import Util
+import Outputable
+import GHC.Platform
+import FastString
+import Name             ( NamedThing(..), nameSrcSpan )
+import SrcLoc           ( SrcSpan(..), realSrcLocSpan, mkRealSrcLoc )
+import Data.Bits
+import MonadUtils       ( mapAccumLM )
+import Data.List        ( mapAccumL )
+import Control.Monad
+import CostCentre       ( CostCentre, ccFromThisModule )
+import qualified Data.Set as S
+
+{-
+-- ---------------------------------------------------------------------------
+-- Note [CorePrep Overview]
+-- ---------------------------------------------------------------------------
+
+The goal of this pass is to prepare for code generation.
+
+1.  Saturate constructor applications.
+
+2.  Convert to A-normal form; that is, function arguments
+    are always variables.
+
+    * Use case for strict arguments:
+        f E ==> case E of x -> f x
+        (where f is strict)
+
+    * Use let for non-trivial lazy arguments
+        f E ==> let x = E in f x
+        (were f is lazy and x is non-trivial)
+
+3.  Similarly, convert any unboxed lets into cases.
+    [I'm experimenting with leaving 'ok-for-speculation'
+     rhss in let-form right up to this point.]
+
+4.  Ensure that *value* lambdas only occur as the RHS of a binding
+    (The code generator can't deal with anything else.)
+    Type lambdas are ok, however, because the code gen discards them.
+
+5.  [Not any more; nuked Jun 2002] Do the seq/par munging.
+
+6.  Clone all local Ids.
+    This means that all such Ids are unique, rather than the
+    weaker guarantee of no clashes which the simplifier provides.
+    And that is what the code generator needs.
+
+    We don't clone TyVars or CoVars. The code gen doesn't need that,
+    and doing so would be tiresome because then we'd need
+    to substitute in types and coercions.
+
+7.  Give each dynamic CCall occurrence a fresh unique; this is
+    rather like the cloning step above.
+
+8.  Inject bindings for the "implicit" Ids:
+        * Constructor wrappers
+        * Constructor workers
+    We want curried definitions for all of these in case they
+    aren't inlined by some caller.
+
+9.  Replace (lazy e) by e.  See Note [lazyId magic] in MkId.hs
+    Also replace (noinline e) by e.
+
+10. Convert (LitInteger i t) into the core representation
+    for the Integer i. Normally this uses mkInteger, but if
+    we are using the integer-gmp implementation then there is a
+    special case where we use the S# constructor for Integers that
+    are in the range of Int.
+
+11. Same for LitNatural.
+
+12. Uphold tick consistency while doing this: We move ticks out of
+    (non-type) applications where we can, and make sure that we
+    annotate according to scoping rules when floating.
+
+13. Collect cost centres (including cost centres in unfoldings) if we're in
+    profiling mode. We have to do this here beucase we won't have unfoldings
+    after this pass (see `zapUnfolding` and Note [Drop unfoldings and rules].
+
+This is all done modulo type applications and abstractions, so that
+when type erasure is done for conversion to STG, we don't end up with
+any trivial or useless bindings.
+
+
+Note [CorePrep invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+Here is the syntax of the Core produced by CorePrep:
+
+    Trivial expressions
+       arg ::= lit |  var
+              | arg ty  |  /\a. arg
+              | truv co  |  /\c. arg  |  arg |> co
+
+    Applications
+       app ::= lit  |  var  |  app arg  |  app ty  | app co | app |> co
+
+    Expressions
+       body ::= app
+              | let(rec) x = rhs in body     -- Boxed only
+              | case body of pat -> body
+              | /\a. body | /\c. body
+              | body |> co
+
+    Right hand sides (only place where value lambdas can occur)
+       rhs ::= /\a.rhs  |  \x.rhs  |  body
+
+We define a synonym for each of these non-terminals.  Functions
+with the corresponding name produce a result in that syntax.
+-}
+
+type CpeArg  = CoreExpr    -- Non-terminal 'arg'
+type CpeApp  = CoreExpr    -- Non-terminal 'app'
+type CpeBody = CoreExpr    -- Non-terminal 'body'
+type CpeRhs  = CoreExpr    -- Non-terminal 'rhs'
+
+{-
+************************************************************************
+*                                                                      *
+                Top level stuff
+*                                                                      *
+************************************************************************
+-}
+
+corePrepPgm :: HscEnv -> Module -> ModLocation -> CoreProgram -> [TyCon]
+            -> IO (CoreProgram, S.Set CostCentre)
+corePrepPgm hsc_env this_mod mod_loc binds data_tycons =
+    withTiming dflags
+               (text "CorePrep"<+>brackets (ppr this_mod))
+               (const ()) $ do
+    us <- mkSplitUniqSupply 's'
+    initialCorePrepEnv <- mkInitialCorePrepEnv dflags hsc_env
+
+    let cost_centres
+          | WayProf `elem` ways dflags
+          = collectCostCentres this_mod binds
+          | otherwise
+          = S.empty
+
+        implicit_binds = mkDataConWorkers dflags mod_loc data_tycons
+            -- NB: we must feed mkImplicitBinds through corePrep too
+            -- so that they are suitably cloned and eta-expanded
+
+        binds_out = initUs_ us $ do
+                      floats1 <- corePrepTopBinds initialCorePrepEnv binds
+                      floats2 <- corePrepTopBinds initialCorePrepEnv implicit_binds
+                      return (deFloatTop (floats1 `appendFloats` floats2))
+
+    endPassIO hsc_env alwaysQualify CorePrep binds_out []
+    return (binds_out, cost_centres)
+  where
+    dflags = hsc_dflags hsc_env
+
+corePrepExpr :: DynFlags -> HscEnv -> CoreExpr -> IO CoreExpr
+corePrepExpr dflags hsc_env expr =
+    withTiming dflags (text "CorePrep [expr]") (const ()) $ do
+    us <- mkSplitUniqSupply 's'
+    initialCorePrepEnv <- mkInitialCorePrepEnv dflags hsc_env
+    let new_expr = initUs_ us (cpeBodyNF initialCorePrepEnv expr)
+    dumpIfSet_dyn dflags Opt_D_dump_prep "CorePrep" FormatCore (ppr new_expr)
+    return new_expr
+
+corePrepTopBinds :: CorePrepEnv -> [CoreBind] -> UniqSM Floats
+-- Note [Floating out of top level bindings]
+corePrepTopBinds initialCorePrepEnv binds
+  = go initialCorePrepEnv binds
+  where
+    go _   []             = return emptyFloats
+    go env (bind : binds) = do (env', floats, maybe_new_bind)
+                                 <- cpeBind TopLevel env bind
+                               MASSERT(isNothing maybe_new_bind)
+                                 -- Only join points get returned this way by
+                                 -- cpeBind, and no join point may float to top
+                               floatss <- go env' binds
+                               return (floats `appendFloats` floatss)
+
+mkDataConWorkers :: DynFlags -> ModLocation -> [TyCon] -> [CoreBind]
+-- See Note [Data constructor workers]
+-- c.f. Note [Injecting implicit bindings] in TidyPgm
+mkDataConWorkers dflags mod_loc data_tycons
+  = [ NonRec id (tick_it (getName data_con) (Var id))
+                                -- The ice is thin here, but it works
+    | tycon <- data_tycons,     -- CorePrep will eta-expand it
+      data_con <- tyConDataCons tycon,
+      let id = dataConWorkId data_con
+    ]
+ where
+   -- If we want to generate debug info, we put a source note on the
+   -- worker. This is useful, especially for heap profiling.
+   tick_it name
+     | debugLevel dflags == 0                = id
+     | RealSrcSpan span <- nameSrcSpan name  = tick span
+     | Just file <- ml_hs_file mod_loc       = tick (span1 file)
+     | otherwise                             = tick (span1 "???")
+     where tick span  = Tick (SourceNote span $ showSDoc dflags (ppr name))
+           span1 file = realSrcLocSpan $ mkRealSrcLoc (mkFastString file) 1 1
+
+{-
+Note [Floating out of top level bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+NB: we do need to float out of top-level bindings
+Consider        x = length [True,False]
+We want to get
+                s1 = False : []
+                s2 = True  : s1
+                x  = length s2
+
+We return a *list* of bindings, because we may start with
+        x* = f (g y)
+where x is demanded, in which case we want to finish with
+        a = g y
+        x* = f a
+And then x will actually end up case-bound
+
+Note [CafInfo and floating]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+What happens when we try to float bindings to the top level?  At this
+point all the CafInfo is supposed to be correct, and we must make certain
+that is true of the new top-level bindings.  There are two cases
+to consider
+
+a) The top-level binding is marked asCafRefs.  In that case we are
+   basically fine.  The floated bindings had better all be lazy lets,
+   so they can float to top level, but they'll all have HasCafRefs
+   (the default) which is safe.
+
+b) The top-level binding is marked NoCafRefs.  This really happens
+   Example.  CoreTidy produces
+      $fApplicativeSTM [NoCafRefs] = D:Alternative retry# ...blah...
+   Now CorePrep has to eta-expand to
+      $fApplicativeSTM = let sat = \xy. retry x y
+                         in D:Alternative sat ...blah...
+   So what we *want* is
+      sat [NoCafRefs] = \xy. retry x y
+      $fApplicativeSTM [NoCafRefs] = D:Alternative sat ...blah...
+
+   So, gruesomely, we must set the NoCafRefs flag on the sat bindings,
+   *and* substitute the modified 'sat' into the old RHS.
+
+   It should be the case that 'sat' is itself [NoCafRefs] (a value, no
+   cafs) else the original top-level binding would not itself have been
+   marked [NoCafRefs].  The DEBUG check in CoreToStg for
+   consistentCafInfo will find this.
+
+This is all very gruesome and horrible. It would be better to figure
+out CafInfo later, after CorePrep.  We'll do that in due course.
+Meanwhile this horrible hack works.
+
+Note [Join points and floating]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Join points can float out of other join points but not out of value bindings:
+
+  let z =
+    let  w = ... in -- can float
+    join k = ... in -- can't float
+    ... jump k ...
+  join j x1 ... xn =
+    let  y = ... in -- can float (but don't want to)
+    join h = ... in -- can float (but not much point)
+    ... jump h ...
+  in ...
+
+Here, the jump to h remains valid if h is floated outward, but the jump to k
+does not.
+
+We don't float *out* of join points. It would only be safe to float out of
+nullary join points (or ones where the arguments are all either type arguments
+or dead binders). Nullary join points aren't ever recursive, so they're always
+effectively one-shot functions, which we don't float out of. We *could* float
+join points from nullary join points, but there's no clear benefit at this
+stage.
+
+Note [Data constructor workers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Create any necessary "implicit" bindings for data con workers.  We
+create the rather strange (non-recursive!) binding
+
+        $wC = \x y -> $wC x y
+
+i.e. a curried constructor that allocates.  This means that we can
+treat the worker for a constructor like any other function in the rest
+of the compiler.  The point here is that CoreToStg will generate a
+StgConApp for the RHS, rather than a call to the worker (which would
+give a loop).  As Lennart says: the ice is thin here, but it works.
+
+Hmm.  Should we create bindings for dictionary constructors?  They are
+always fully applied, and the bindings are just there to support
+partial applications. But it's easier to let them through.
+
+
+Note [Dead code in CorePrep]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Imagine that we got an input program like this (see #4962):
+
+  f :: Show b => Int -> (Int, b -> Maybe Int -> Int)
+  f x = (g True (Just x) + g () (Just x), g)
+    where
+      g :: Show a => a -> Maybe Int -> Int
+      g _ Nothing = x
+      g y (Just z) = if z > 100 then g y (Just (z + length (show y))) else g y unknown
+
+After specialisation and SpecConstr, we would get something like this:
+
+  f :: Show b => Int -> (Int, b -> Maybe Int -> Int)
+  f x = (g$Bool_True_Just x + g$Unit_Unit_Just x, g)
+    where
+      {-# RULES g $dBool = g$Bool
+                g $dUnit = g$Unit #-}
+      g = ...
+      {-# RULES forall x. g$Bool True (Just x) = g$Bool_True_Just x #-}
+      g$Bool = ...
+      {-# RULES forall x. g$Unit () (Just x) = g$Unit_Unit_Just x #-}
+      g$Unit = ...
+      g$Bool_True_Just = ...
+      g$Unit_Unit_Just = ...
+
+Note that the g$Bool and g$Unit functions are actually dead code: they
+are only kept alive by the occurrence analyser because they are
+referred to by the rules of g, which is being kept alive by the fact
+that it is used (unspecialised) in the returned pair.
+
+However, at the CorePrep stage there is no way that the rules for g
+will ever fire, and it really seems like a shame to produce an output
+program that goes to the trouble of allocating a closure for the
+unreachable g$Bool and g$Unit functions.
+
+The way we fix this is to:
+ * In cloneBndr, drop all unfoldings/rules
+
+ * In deFloatTop, run a simple dead code analyser on each top-level
+   RHS to drop the dead local bindings. For that call to OccAnal, we
+   disable the binder swap, else the occurrence analyser sometimes
+   introduces new let bindings for cased binders, which lead to the bug
+   in #5433.
+
+The reason we don't just OccAnal the whole output of CorePrep is that
+the tidier ensures that all top-level binders are GlobalIds, so they
+don't show up in the free variables any longer. So if you run the
+occurrence analyser on the output of CoreTidy (or later) you e.g. turn
+this program:
+
+  Rec {
+  f = ... f ...
+  }
+
+Into this one:
+
+  f = ... f ...
+
+(Since f is not considered to be free in its own RHS.)
+
+
+************************************************************************
+*                                                                      *
+                The main code
+*                                                                      *
+************************************************************************
+-}
+
+cpeBind :: TopLevelFlag -> CorePrepEnv -> CoreBind
+        -> UniqSM (CorePrepEnv,
+                   Floats,         -- Floating value bindings
+                   Maybe CoreBind) -- Just bind' <=> returned new bind; no float
+                                   -- Nothing <=> added bind' to floats instead
+cpeBind top_lvl env (NonRec bndr rhs)
+  | not (isJoinId bndr)
+  = do { (_, bndr1) <- cpCloneBndr env bndr
+       ; let dmd         = idDemandInfo bndr
+             is_unlifted = isUnliftedType (idType bndr)
+       ; (floats, rhs1) <- cpePair top_lvl NonRecursive
+                                   dmd is_unlifted
+                                   env bndr1 rhs
+       -- See Note [Inlining in CorePrep]
+       ; if exprIsTrivial rhs1 && isNotTopLevel top_lvl
+            then return (extendCorePrepEnvExpr env bndr rhs1, floats, Nothing)
+            else do {
+
+       ; let new_float = mkFloat dmd is_unlifted bndr1 rhs1
+
+       ; return (extendCorePrepEnv env bndr bndr1,
+                 addFloat floats new_float,
+                 Nothing) }}
+
+  | otherwise -- A join point; see Note [Join points and floating]
+  = ASSERT(not (isTopLevel top_lvl)) -- can't have top-level join point
+    do { (_, bndr1) <- cpCloneBndr env bndr
+       ; (bndr2, rhs1) <- cpeJoinPair env bndr1 rhs
+       ; return (extendCorePrepEnv env bndr bndr2,
+                 emptyFloats,
+                 Just (NonRec bndr2 rhs1)) }
+
+cpeBind top_lvl env (Rec pairs)
+  | not (isJoinId (head bndrs))
+  = do { (env', bndrs1) <- cpCloneBndrs env bndrs
+       ; stuff <- zipWithM (cpePair top_lvl Recursive topDmd False env')
+                           bndrs1 rhss
+
+       ; let (floats_s, rhss1) = unzip stuff
+             all_pairs = foldrOL add_float (bndrs1 `zip` rhss1)
+                                           (concatFloats floats_s)
+
+       ; return (extendCorePrepEnvList env (bndrs `zip` bndrs1),
+                 unitFloat (FloatLet (Rec all_pairs)),
+                 Nothing) }
+
+  | otherwise -- See Note [Join points and floating]
+  = do { (env', bndrs1) <- cpCloneBndrs env bndrs
+       ; pairs1 <- zipWithM (cpeJoinPair env') bndrs1 rhss
+
+       ; let bndrs2 = map fst pairs1
+       ; return (extendCorePrepEnvList env' (bndrs `zip` bndrs2),
+                 emptyFloats,
+                 Just (Rec pairs1)) }
+  where
+    (bndrs, rhss) = unzip pairs
+
+        -- Flatten all the floats, and the current
+        -- group into a single giant Rec
+    add_float (FloatLet (NonRec b r)) prs2 = (b,r) : prs2
+    add_float (FloatLet (Rec prs1))   prs2 = prs1 ++ prs2
+    add_float b                       _    = pprPanic "cpeBind" (ppr b)
+
+---------------
+cpePair :: TopLevelFlag -> RecFlag -> Demand -> Bool
+        -> CorePrepEnv -> OutId -> CoreExpr
+        -> UniqSM (Floats, CpeRhs)
+-- Used for all bindings
+-- The binder is already cloned, hence an OutId
+cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
+  = ASSERT(not (isJoinId bndr)) -- those should use cpeJoinPair
+    do { (floats1, rhs1) <- cpeRhsE env rhs
+
+       -- See if we are allowed to float this stuff out of the RHS
+       ; (floats2, rhs2) <- float_from_rhs floats1 rhs1
+
+       -- Make the arity match up
+       ; (floats3, rhs3)
+            <- if manifestArity rhs1 <= arity
+               then return (floats2, cpeEtaExpand arity rhs2)
+               else WARN(True, text "CorePrep: silly extra arguments:" <+> ppr bndr)
+                               -- Note [Silly extra arguments]
+                    (do { v <- newVar (idType bndr)
+                        ; let float = mkFloat topDmd False v rhs2
+                        ; return ( addFloat floats2 float
+                                 , cpeEtaExpand arity (Var v)) })
+
+        -- Wrap floating ticks
+       ; let (floats4, rhs4) = wrapTicks floats3 rhs3
+
+       ; return (floats4, rhs4) }
+  where
+    platform = targetPlatform (cpe_dynFlags env)
+
+    arity = idArity bndr        -- We must match this arity
+
+    ---------------------
+    float_from_rhs floats rhs
+      | isEmptyFloats floats = return (emptyFloats, rhs)
+      | isTopLevel top_lvl   = float_top    floats rhs
+      | otherwise            = float_nested floats rhs
+
+    ---------------------
+    float_nested floats rhs
+      | wantFloatNested is_rec dmd is_unlifted floats rhs
+                  = return (floats, rhs)
+      | otherwise = dontFloat floats rhs
+
+    ---------------------
+    float_top floats rhs        -- Urhgh!  See Note [CafInfo and floating]
+      | mayHaveCafRefs (idCafInfo bndr)
+      , allLazyTop floats
+      = return (floats, rhs)
+
+      -- So the top-level binding is marked NoCafRefs
+      | Just (floats', rhs') <- canFloatFromNoCaf platform floats rhs
+      = return (floats', rhs')
+
+      | otherwise
+      = dontFloat floats rhs
+
+dontFloat :: Floats -> CpeRhs -> UniqSM (Floats, CpeBody)
+-- Non-empty floats, but do not want to float from rhs
+-- So wrap the rhs in the floats
+-- But: rhs1 might have lambdas, and we can't
+--      put them inside a wrapBinds
+dontFloat floats1 rhs
+  = do { (floats2, body) <- rhsToBody rhs
+        ; return (emptyFloats, wrapBinds floats1 $
+                               wrapBinds floats2 body) }
+
+{- Note [Silly extra arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we had this
+        f{arity=1} = \x\y. e
+We *must* match the arity on the Id, so we have to generate
+        f' = \x\y. e
+        f  = \x. f' x
+
+It's a bizarre case: why is the arity on the Id wrong?  Reason
+(in the days of __inline_me__):
+        f{arity=0} = __inline_me__ (let v = expensive in \xy. e)
+When InlineMe notes go away this won't happen any more.  But
+it seems good for CorePrep to be robust.
+-}
+
+---------------
+cpeJoinPair :: CorePrepEnv -> JoinId -> CoreExpr
+            -> UniqSM (JoinId, CpeRhs)
+-- Used for all join bindings
+-- No eta-expansion: see Note [Do not eta-expand join points] in SimplUtils
+cpeJoinPair env bndr rhs
+  = ASSERT(isJoinId bndr)
+    do { let Just join_arity = isJoinId_maybe bndr
+             (bndrs, body)   = collectNBinders join_arity rhs
+
+       ; (env', bndrs') <- cpCloneBndrs env bndrs
+
+       ; body' <- cpeBodyNF env' body -- Will let-bind the body if it starts
+                                      -- with a lambda
+
+       ; let rhs'  = mkCoreLams bndrs' body'
+             bndr' = bndr `setIdUnfolding` evaldUnfolding
+                          `setIdArity` count isId bndrs
+                            -- See Note [Arity and join points]
+
+       ; return (bndr', rhs') }
+
+{-
+Note [Arity and join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Up to now, we've allowed a join point to have an arity greater than its join
+arity (minus type arguments), since this is what's useful for eta expansion.
+However, for code gen purposes, its arity must be exactly the number of value
+arguments it will be called with, and it must have exactly that many value
+lambdas. Hence if there are extra lambdas we must let-bind the body of the RHS:
+
+  join j x y z = \w -> ... in ...
+    =>
+  join j x y z = (let f = \w -> ... in f) in ...
+
+This is also what happens with Note [Silly extra arguments]. Note that it's okay
+for us to mess with the arity because a join point is never exported.
+-}
+
+-- ---------------------------------------------------------------------------
+--              CpeRhs: produces a result satisfying CpeRhs
+-- ---------------------------------------------------------------------------
+
+cpeRhsE :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeRhs)
+-- If
+--      e  ===>  (bs, e')
+-- then
+--      e = let bs in e'        (semantically, that is!)
+--
+-- For example
+--      f (g x)   ===>   ([v = g x], f v)
+
+cpeRhsE _env expr@(Type {})      = return (emptyFloats, expr)
+cpeRhsE _env expr@(Coercion {})  = return (emptyFloats, expr)
+cpeRhsE env (Lit (LitNumber LitNumInteger i _))
+    = cpeRhsE env (cvtLitInteger (cpe_dynFlags env) (getMkIntegerId env)
+                   (cpe_integerSDataCon env) i)
+cpeRhsE env (Lit (LitNumber LitNumNatural i _))
+    = cpeRhsE env (cvtLitNatural (cpe_dynFlags env) (getMkNaturalId env)
+                   (cpe_naturalSDataCon env) i)
+cpeRhsE _env expr@(Lit {}) = return (emptyFloats, expr)
+cpeRhsE env expr@(Var {})  = cpeApp env expr
+cpeRhsE env expr@(App {}) = cpeApp env expr
+
+cpeRhsE env (Let bind body)
+  = do { (env', bind_floats, maybe_bind') <- cpeBind NotTopLevel env bind
+       ; (body_floats, body') <- cpeRhsE env' body
+       ; let expr' = case maybe_bind' of Just bind' -> Let bind' body'
+                                         Nothing    -> body'
+       ; return (bind_floats `appendFloats` body_floats, expr') }
+
+cpeRhsE env (Tick tickish expr)
+  | tickishPlace tickish == PlaceNonLam && tickish `tickishScopesLike` SoftScope
+  = do { (floats, body) <- cpeRhsE env expr
+         -- See [Floating Ticks in CorePrep]
+       ; return (unitFloat (FloatTick tickish) `appendFloats` floats, body) }
+  | otherwise
+  = do { body <- cpeBodyNF env expr
+       ; return (emptyFloats, mkTick tickish' body) }
+  where
+    tickish' | Breakpoint n fvs <- tickish
+             -- See also 'substTickish'
+             = Breakpoint n (map (getIdFromTrivialExpr . lookupCorePrepEnv env) fvs)
+             | otherwise
+             = tickish
+
+cpeRhsE env (Cast expr co)
+   = do { (floats, expr') <- cpeRhsE env expr
+        ; return (floats, Cast expr' co) }
+
+cpeRhsE env expr@(Lam {})
+   = do { let (bndrs,body) = collectBinders expr
+        ; (env', bndrs') <- cpCloneBndrs env bndrs
+        ; body' <- cpeBodyNF env' body
+        ; return (emptyFloats, mkLams bndrs' body') }
+
+cpeRhsE env (Case scrut bndr ty alts)
+  = do { (floats, scrut') <- cpeBody env scrut
+       ; (env', bndr2) <- cpCloneBndr env bndr
+       ; let alts'
+                 -- This flag is intended to aid in debugging strictness
+                 -- analysis bugs. These are particularly nasty to chase down as
+                 -- they may manifest as segmentation faults. When this flag is
+                 -- enabled we instead produce an 'error' expression to catch
+                 -- the case where a function we think should bottom
+                 -- unexpectedly returns.
+               | gopt Opt_CatchBottoms (cpe_dynFlags env)
+               , not (altsAreExhaustive alts)
+               = addDefault alts (Just err)
+               | otherwise = alts
+               where err = mkRuntimeErrorApp rUNTIME_ERROR_ID ty
+                                             "Bottoming expression returned"
+       ; alts'' <- mapM (sat_alt env') alts'
+       ; return (floats, Case scrut' bndr2 ty alts'') }
+  where
+    sat_alt env (con, bs, rhs)
+       = do { (env2, bs') <- cpCloneBndrs env bs
+            ; rhs' <- cpeBodyNF env2 rhs
+            ; return (con, bs', rhs') }
+
+cvtLitInteger :: DynFlags -> Id -> Maybe DataCon -> Integer -> CoreExpr
+-- Here we convert a literal Integer to the low-level
+-- representation. Exactly how we do this depends on the
+-- library that implements Integer.  If it's GMP we
+-- use the S# data constructor for small literals.
+-- See Note [Integer literals] in Literal
+cvtLitInteger dflags _ (Just sdatacon) i
+  | inIntRange dflags i -- Special case for small integers
+    = mkConApp sdatacon [Lit (mkLitInt dflags i)]
+
+cvtLitInteger dflags mk_integer _ i
+    = mkApps (Var mk_integer) [isNonNegative, ints]
+  where isNonNegative = if i < 0 then mkConApp falseDataCon []
+                                 else mkConApp trueDataCon  []
+        ints = mkListExpr intTy (f (abs i))
+        f 0 = []
+        f x = let low  = x .&. mask
+                  high = x `shiftR` bits
+              in mkConApp intDataCon [Lit (mkLitInt dflags low)] : f high
+        bits = 31
+        mask = 2 ^ bits - 1
+
+cvtLitNatural :: DynFlags -> Id -> Maybe DataCon -> Integer -> CoreExpr
+-- Here we convert a literal Natural to the low-level
+-- representation.
+-- See Note [Natural literals] in Literal
+cvtLitNatural dflags _ (Just sdatacon) i
+  | inWordRange dflags i -- Special case for small naturals
+    = mkConApp sdatacon [Lit (mkLitWord dflags i)]
+
+cvtLitNatural dflags mk_natural _ i
+    = mkApps (Var mk_natural) [words]
+  where words = mkListExpr wordTy (f i)
+        f 0 = []
+        f x = let low  = x .&. mask
+                  high = x `shiftR` bits
+              in mkConApp wordDataCon [Lit (mkLitWord dflags low)] : f high
+        bits = 32
+        mask = 2 ^ bits - 1
+
+-- ---------------------------------------------------------------------------
+--              CpeBody: produces a result satisfying CpeBody
+-- ---------------------------------------------------------------------------
+
+-- | Convert a 'CoreExpr' so it satisfies 'CpeBody', without
+-- producing any floats (any generated floats are immediately
+-- let-bound using 'wrapBinds').  Generally you want this, esp.
+-- when you've reached a binding form (e.g., a lambda) and
+-- floating any further would be incorrect.
+cpeBodyNF :: CorePrepEnv -> CoreExpr -> UniqSM CpeBody
+cpeBodyNF env expr
+  = do { (floats, body) <- cpeBody env expr
+       ; return (wrapBinds floats body) }
+
+-- | Convert a 'CoreExpr' so it satisfies 'CpeBody'; also produce
+-- a list of 'Floats' which are being propagated upwards.  In
+-- fact, this function is used in only two cases: to
+-- implement 'cpeBodyNF' (which is what you usually want),
+-- and in the case when a let-binding is in a case scrutinee--here,
+-- we can always float out:
+--
+--      case (let x = y in z) of ...
+--      ==> let x = y in case z of ...
+--
+cpeBody :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeBody)
+cpeBody env expr
+  = do { (floats1, rhs) <- cpeRhsE env expr
+       ; (floats2, body) <- rhsToBody rhs
+       ; return (floats1 `appendFloats` floats2, body) }
+
+--------
+rhsToBody :: CpeRhs -> UniqSM (Floats, CpeBody)
+-- Remove top level lambdas by let-binding
+
+rhsToBody (Tick t expr)
+  | tickishScoped t == NoScope  -- only float out of non-scoped annotations
+  = do { (floats, expr') <- rhsToBody expr
+       ; return (floats, mkTick t expr') }
+
+rhsToBody (Cast e co)
+        -- You can get things like
+        --      case e of { p -> coerce t (\s -> ...) }
+  = do { (floats, e') <- rhsToBody e
+       ; return (floats, Cast e' co) }
+
+rhsToBody expr@(Lam {})
+  | Just no_lam_result <- tryEtaReducePrep bndrs body
+  = return (emptyFloats, no_lam_result)
+  | all isTyVar bndrs           -- Type lambdas are ok
+  = return (emptyFloats, expr)
+  | otherwise                   -- Some value lambdas
+  = do { fn <- newVar (exprType expr)
+       ; let rhs   = cpeEtaExpand (exprArity expr) expr
+             float = FloatLet (NonRec fn rhs)
+       ; return (unitFloat float, Var fn) }
+  where
+    (bndrs,body) = collectBinders expr
+
+rhsToBody expr = return (emptyFloats, expr)
+
+
+
+-- ---------------------------------------------------------------------------
+--              CpeApp: produces a result satisfying CpeApp
+-- ---------------------------------------------------------------------------
+
+data ArgInfo = CpeApp  CoreArg
+             | CpeCast Coercion
+             | CpeTick (Tickish Id)
+
+{- Note [runRW arg]
+~~~~~~~~~~~~~~~~~~~
+If we got, say
+   runRW# (case bot of {})
+which happened in #11291, we do /not/ want to turn it into
+   (case bot of {}) realWorldPrimId#
+because that gives a panic in CoreToStg.myCollectArgs, which expects
+only variables in function position.  But if we are sure to make
+runRW# strict (which we do in MkId), this can't happen
+-}
+
+cpeApp :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeRhs)
+-- May return a CpeRhs because of saturating primops
+cpeApp top_env expr
+  = do { let (terminal, args, depth) = collect_args expr
+       ; cpe_app top_env terminal args depth
+       }
+
+  where
+    -- We have a nested data structure of the form
+    -- e `App` a1 `App` a2 ... `App` an, convert it into
+    -- (e, [CpeApp a1, CpeApp a2, ..., CpeApp an], depth)
+    -- We use 'ArgInfo' because we may also need to
+    -- record casts and ticks.  Depth counts the number
+    -- of arguments that would consume strictness information
+    -- (so, no type or coercion arguments.)
+    collect_args :: CoreExpr -> (CoreExpr, [ArgInfo], Int)
+    collect_args e = go e [] 0
+      where
+        go (App fun arg)      as !depth
+            = go fun (CpeApp arg : as)
+                (if isTyCoArg arg then depth else depth + 1)
+        go (Cast fun co)      as depth
+            = go fun (CpeCast co : as) depth
+        go (Tick tickish fun) as depth
+            | tickishPlace tickish == PlaceNonLam
+            && tickish `tickishScopesLike` SoftScope
+            = go fun (CpeTick tickish : as) depth
+        go terminal as depth = (terminal, as, depth)
+
+    cpe_app :: CorePrepEnv
+            -> CoreExpr
+            -> [ArgInfo]
+            -> Int
+            -> UniqSM (Floats, CpeRhs)
+    cpe_app env (Var f) (CpeApp Type{} : CpeApp arg : args) depth
+        | f `hasKey` lazyIdKey          -- Replace (lazy a) with a, and
+       || f `hasKey` noinlineIdKey      -- Replace (noinline a) with a
+        -- Consider the code:
+        --
+        --      lazy (f x) y
+        --
+        -- We need to make sure that we need to recursively collect arguments on
+        -- "f x", otherwise we'll float "f x" out (it's not a variable) and
+        -- end up with this awful -ddump-prep:
+        --
+        --      case f x of f_x {
+        --        __DEFAULT -> f_x y
+        --      }
+        --
+        -- rather than the far superior "f x y".  Test case is par01.
+        = let (terminal, args', depth') = collect_args arg
+          in cpe_app env terminal (args' ++ args) (depth + depth' - 1)
+    cpe_app env (Var f) [CpeApp _runtimeRep@Type{}, CpeApp _type@Type{}, CpeApp arg] 1
+        | f `hasKey` runRWKey
+        -- See Note [runRW magic]
+        -- Replace (runRW# f) by (f realWorld#), beta reducing if possible (this
+        -- is why we return a CorePrepEnv as well)
+        = case arg of
+            Lam s body -> cpe_app (extendCorePrepEnv env s realWorldPrimId) body [] 0
+            _          -> cpe_app env arg [CpeApp (Var realWorldPrimId)] 1
+    cpe_app env (Var v) args depth
+      = do { v1 <- fiddleCCall v
+           ; let e2 = lookupCorePrepEnv env v1
+                 hd = getIdFromTrivialExpr_maybe e2
+           -- NB: depth from collect_args is right, because e2 is a trivial expression
+           -- and thus its embedded Id *must* be at the same depth as any
+           -- Apps it is under are type applications only (c.f.
+           -- exprIsTrivial).  But note that we need the type of the
+           -- expression, not the id.
+           ; (app, floats) <- rebuild_app args e2 (exprType e2) emptyFloats stricts
+           ; mb_saturate hd app floats depth }
+        where
+          stricts = case idStrictness v of
+                            StrictSig (DmdType _ demands _)
+                              | listLengthCmp demands depth /= GT -> demands
+                                    -- length demands <= depth
+                              | otherwise                         -> []
+                -- If depth < length demands, then we have too few args to
+                -- satisfy strictness  info so we have to  ignore all the
+                -- strictness info, e.g. + (error "urk")
+                -- Here, we can't evaluate the arg strictly, because this
+                -- partial application might be seq'd
+
+        -- We inlined into something that's not a var and has no args.
+        -- Bounce it back up to cpeRhsE.
+    cpe_app env fun [] _ = cpeRhsE env fun
+
+        -- N-variable fun, better let-bind it
+    cpe_app env fun args depth
+      = do { (fun_floats, fun') <- cpeArg env evalDmd fun ty
+                          -- The evalDmd says that it's sure to be evaluated,
+                          -- so we'll end up case-binding it
+           ; (app, floats) <- rebuild_app args fun' ty fun_floats []
+           ; mb_saturate Nothing app floats depth }
+        where
+          ty = exprType fun
+
+    -- Saturate if necessary
+    mb_saturate head app floats depth =
+       case head of
+         Just fn_id -> do { sat_app <- maybeSaturate fn_id app depth
+                          ; return (floats, sat_app) }
+         _other              -> return (floats, app)
+
+    -- Deconstruct and rebuild the application, floating any non-atomic
+    -- arguments to the outside.  We collect the type of the expression,
+    -- the head of the application, and the number of actual value arguments,
+    -- all of which are used to possibly saturate this application if it
+    -- has a constructor or primop at the head.
+    rebuild_app
+        :: [ArgInfo]                  -- The arguments (inner to outer)
+        -> CpeApp
+        -> Type
+        -> Floats
+        -> [Demand]
+        -> UniqSM (CpeApp, Floats)
+    rebuild_app [] app _ floats ss = do
+      MASSERT(null ss) -- make sure we used all the strictness info
+      return (app, floats)
+    rebuild_app (a : as) fun' fun_ty floats ss = case a of
+      CpeApp arg@(Type arg_ty) ->
+        rebuild_app as (App fun' arg) (piResultTy fun_ty arg_ty) floats ss
+      CpeApp arg@(Coercion {}) ->
+        rebuild_app as (App fun' arg) (funResultTy fun_ty) floats ss
+      CpeApp arg -> do
+        let (ss1, ss_rest)  -- See Note [lazyId magic] in MkId
+               = case (ss, isLazyExpr arg) of
+                   (_   : ss_rest, True)  -> (topDmd, ss_rest)
+                   (ss1 : ss_rest, False) -> (ss1,    ss_rest)
+                   ([],            _)     -> (topDmd, [])
+            (arg_ty, res_ty) = expectJust "cpeBody:collect_args" $
+                               splitFunTy_maybe fun_ty
+        (fs, arg') <- cpeArg top_env ss1 arg arg_ty
+        rebuild_app as (App fun' arg') res_ty (fs `appendFloats` floats) ss_rest
+      CpeCast co ->
+        let ty2 = coercionRKind co
+        in rebuild_app as (Cast fun' co) ty2 floats ss
+      CpeTick tickish ->
+        -- See [Floating Ticks in CorePrep]
+        rebuild_app as fun' fun_ty (addFloat floats (FloatTick tickish)) ss
+
+isLazyExpr :: CoreExpr -> Bool
+-- See Note [lazyId magic] in MkId
+isLazyExpr (Cast e _)              = isLazyExpr e
+isLazyExpr (Tick _ e)              = isLazyExpr e
+isLazyExpr (Var f `App` _ `App` _) = f `hasKey` lazyIdKey
+isLazyExpr _                       = False
+
+{- Note [runRW magic]
+~~~~~~~~~~~~~~~~~~~~~
+Some definitions, for instance @runST@, must have careful control over float out
+of the bindings in their body. Consider this use of @runST@,
+
+    f x = runST ( \ s -> let (a, s')  = newArray# 100 [] s
+                             (_, s'') = fill_in_array_or_something a x s'
+                         in freezeArray# a s'' )
+
+If we inline @runST@, we'll get:
+
+    f x = let (a, s')  = newArray# 100 [] realWorld#{-NB-}
+              (_, s'') = fill_in_array_or_something a x s'
+          in freezeArray# a s''
+
+And now if we allow the @newArray#@ binding to float out to become a CAF,
+we end up with a result that is totally and utterly wrong:
+
+    f = let (a, s')  = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
+        in \ x ->
+            let (_, s'') = fill_in_array_or_something a x s'
+            in freezeArray# a s''
+
+All calls to @f@ will share a {\em single} array! Clearly this is nonsense and
+must be prevented.
+
+This is what @runRW#@ gives us: by being inlined extremely late in the
+optimization (right before lowering to STG, in CorePrep), we can ensure that
+no further floating will occur. This allows us to safely inline things like
+@runST@, which are otherwise needlessly expensive (see #10678 and #5916).
+
+'runRW' is defined (for historical reasons) in GHC.Magic, with a NOINLINE
+pragma.  It is levity-polymorphic.
+
+    runRW# :: forall (r1 :: RuntimeRep). (o :: TYPE r)
+           => (State# RealWorld -> (# State# RealWorld, o #))
+                              -> (# State# RealWorld, o #)
+
+It needs no special treatment in GHC except this special inlining here
+in CorePrep (and in ByteCodeGen).
+
+-- ---------------------------------------------------------------------------
+--      CpeArg: produces a result satisfying CpeArg
+-- ---------------------------------------------------------------------------
+
+Note [ANF-ising literal string arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Consider a program like,
+
+    data Foo = Foo Addr#
+
+    foo = Foo "turtle"#
+
+When we go to ANFise this we might think that we want to float the string
+literal like we do any other non-trivial argument. This would look like,
+
+    foo = u\ [] case "turtle"# of s { __DEFAULT__ -> Foo s }
+
+However, this 1) isn't necessary since strings are in a sense "trivial"; and 2)
+wreaks havoc on the CAF annotations that we produce here since we the result
+above is caffy since it is updateable. Ideally at some point in the future we
+would like to just float the literal to the top level as suggested in #11312,
+
+    s = "turtle"#
+    foo = Foo s
+
+However, until then we simply add a special case excluding literals from the
+floating done by cpeArg.
+-}
+
+-- | Is an argument okay to CPE?
+okCpeArg :: CoreExpr -> Bool
+-- Don't float literals. See Note [ANF-ising literal string arguments].
+okCpeArg (Lit _) = False
+-- Do not eta expand a trivial argument
+okCpeArg expr    = not (exprIsTrivial expr)
+
+-- This is where we arrange that a non-trivial argument is let-bound
+cpeArg :: CorePrepEnv -> Demand
+       -> CoreArg -> Type -> UniqSM (Floats, CpeArg)
+cpeArg env dmd arg arg_ty
+  = do { (floats1, arg1) <- cpeRhsE env arg     -- arg1 can be a lambda
+       ; (floats2, arg2) <- if want_float floats1 arg1
+                            then return (floats1, arg1)
+                            else dontFloat floats1 arg1
+                -- Else case: arg1 might have lambdas, and we can't
+                --            put them inside a wrapBinds
+
+       ; if okCpeArg arg2
+         then do { v <- newVar arg_ty
+                 ; let arg3      = cpeEtaExpand (exprArity arg2) arg2
+                       arg_float = mkFloat dmd is_unlifted v arg3
+                 ; return (addFloat floats2 arg_float, varToCoreExpr v) }
+         else return (floats2, arg2)
+       }
+  where
+    is_unlifted = isUnliftedType arg_ty
+    want_float  = wantFloatNested NonRecursive dmd is_unlifted
+
+{-
+Note [Floating unlifted arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider    C (let v* = expensive in v)
+
+where the "*" indicates "will be demanded".  Usually v will have been
+inlined by now, but let's suppose it hasn't (see #2756).  Then we
+do *not* want to get
+
+     let v* = expensive in C v
+
+because that has different strictness.  Hence the use of 'allLazy'.
+(NB: the let v* turns into a FloatCase, in mkLocalNonRec.)
+
+
+------------------------------------------------------------------------------
+-- Building the saturated syntax
+-- ---------------------------------------------------------------------------
+
+Note [Eta expansion of hasNoBinding things in CorePrep]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+maybeSaturate deals with eta expanding to saturate things that can't deal with
+unsaturated applications (identified by 'hasNoBinding', currently just
+foreign calls and unboxed tuple/sum constructors).
+
+Note that eta expansion in CorePrep is very fragile due to the "prediction" of
+CAFfyness made by TidyPgm (see Note [CAFfyness inconsistencies due to eta
+expansion in CorePrep] in TidyPgm for details.  We previously saturated primop
+applications here as well but due to this fragility (see #16846) we now deal
+with this another way, as described in Note [Primop wrappers] in PrimOp.
+
+It's quite likely that eta expansion of constructor applications will
+eventually break in a similar way to how primops did. We really should
+eliminate this case as well.
+-}
+
+maybeSaturate :: Id -> CpeApp -> Int -> UniqSM CpeRhs
+maybeSaturate fn expr n_args
+  | hasNoBinding fn        -- There's no binding
+  = return sat_expr
+
+  | otherwise
+  = return expr
+  where
+    fn_arity     = idArity fn
+    excess_arity = fn_arity - n_args
+    sat_expr     = cpeEtaExpand excess_arity expr
+
+{-
+************************************************************************
+*                                                                      *
+                Simple CoreSyn operations
+*                                                                      *
+************************************************************************
+-}
+
+{-
+-- -----------------------------------------------------------------------------
+--      Eta reduction
+-- -----------------------------------------------------------------------------
+
+Note [Eta expansion]
+~~~~~~~~~~~~~~~~~~~~~
+Eta expand to match the arity claimed by the binder Remember,
+CorePrep must not change arity
+
+Eta expansion might not have happened already, because it is done by
+the simplifier only when there at least one lambda already.
+
+NB1:we could refrain when the RHS is trivial (which can happen
+    for exported things).  This would reduce the amount of code
+    generated (a little) and make things a little words for
+    code compiled without -O.  The case in point is data constructor
+    wrappers.
+
+NB2: we have to be careful that the result of etaExpand doesn't
+   invalidate any of the assumptions that CorePrep is attempting
+   to establish.  One possible cause is eta expanding inside of
+   an SCC note - we're now careful in etaExpand to make sure the
+   SCC is pushed inside any new lambdas that are generated.
+
+Note [Eta expansion and the CorePrep invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It turns out to be much much easier to do eta expansion
+*after* the main CorePrep stuff.  But that places constraints
+on the eta expander: given a CpeRhs, it must return a CpeRhs.
+
+For example here is what we do not want:
+                f = /\a -> g (h 3)      -- h has arity 2
+After ANFing we get
+                f = /\a -> let s = h 3 in g s
+and now we do NOT want eta expansion to give
+                f = /\a -> \ y -> (let s = h 3 in g s) y
+
+Instead CoreArity.etaExpand gives
+                f = /\a -> \y -> let s = h 3 in g s y
+
+-}
+
+cpeEtaExpand :: Arity -> CpeRhs -> CpeRhs
+cpeEtaExpand arity expr
+  | arity == 0 = expr
+  | otherwise  = etaExpand arity expr
+
+{-
+-- -----------------------------------------------------------------------------
+--      Eta reduction
+-- -----------------------------------------------------------------------------
+
+Why try eta reduction?  Hasn't the simplifier already done eta?
+But the simplifier only eta reduces if that leaves something
+trivial (like f, or f Int).  But for deLam it would be enough to
+get to a partial application:
+        case x of { p -> \xs. map f xs }
+    ==> case x of { p -> map f }
+-}
+
+-- When updating this function, make sure it lines up with
+-- CoreUtils.tryEtaReduce!
+tryEtaReducePrep :: [CoreBndr] -> CoreExpr -> Maybe CoreExpr
+tryEtaReducePrep bndrs expr@(App _ _)
+  | ok_to_eta_reduce f
+  , n_remaining >= 0
+  , and (zipWith ok bndrs last_args)
+  , not (any (`elemVarSet` fvs_remaining) bndrs)
+  , exprIsHNF remaining_expr   -- Don't turn value into a non-value
+                               -- else the behaviour with 'seq' changes
+  = Just remaining_expr
+  where
+    (f, args) = collectArgs expr
+    remaining_expr = mkApps f remaining_args
+    fvs_remaining = exprFreeVars remaining_expr
+    (remaining_args, last_args) = splitAt n_remaining args
+    n_remaining = length args - length bndrs
+
+    ok bndr (Var arg) = bndr == arg
+    ok _    _         = False
+
+    -- We can't eta reduce something which must be saturated.
+    ok_to_eta_reduce (Var f) = not (hasNoBinding f)
+    ok_to_eta_reduce _       = False -- Safe. ToDo: generalise
+
+
+tryEtaReducePrep bndrs (Tick tickish e)
+  | tickishFloatable tickish
+  = fmap (mkTick tickish) $ tryEtaReducePrep bndrs e
+
+tryEtaReducePrep _ _ = Nothing
+
+{-
+************************************************************************
+*                                                                      *
+                Floats
+*                                                                      *
+************************************************************************
+
+Note [Pin demand info on floats]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We pin demand info on floated lets, so that we can see the one-shot thunks.
+-}
+
+data FloatingBind
+  = FloatLet CoreBind    -- Rhs of bindings are CpeRhss
+                         -- They are always of lifted type;
+                         -- unlifted ones are done with FloatCase
+
+ | FloatCase
+      Id CpeBody
+      Bool              -- The bool indicates "ok-for-speculation"
+
+ -- | See Note [Floating Ticks in CorePrep]
+ | FloatTick (Tickish Id)
+
+data Floats = Floats OkToSpec (OrdList FloatingBind)
+
+instance Outputable FloatingBind where
+  ppr (FloatLet b) = ppr b
+  ppr (FloatCase b r ok) = brackets (ppr ok) <+> ppr b <+> equals <+> ppr r
+  ppr (FloatTick t) = ppr t
+
+instance Outputable Floats where
+  ppr (Floats flag fs) = text "Floats" <> brackets (ppr flag) <+>
+                         braces (vcat (map ppr (fromOL fs)))
+
+instance Outputable OkToSpec where
+  ppr OkToSpec    = text "OkToSpec"
+  ppr IfUnboxedOk = text "IfUnboxedOk"
+  ppr NotOkToSpec = text "NotOkToSpec"
+
+-- Can we float these binds out of the rhs of a let?  We cache this decision
+-- to avoid having to recompute it in a non-linear way when there are
+-- deeply nested lets.
+data OkToSpec
+   = OkToSpec           -- Lazy bindings of lifted type
+   | IfUnboxedOk        -- A mixture of lazy lifted bindings and n
+                        -- ok-to-speculate unlifted bindings
+   | NotOkToSpec        -- Some not-ok-to-speculate unlifted bindings
+
+mkFloat :: Demand -> Bool -> Id -> CpeRhs -> FloatingBind
+mkFloat dmd is_unlifted bndr rhs
+  | use_case  = FloatCase bndr rhs (exprOkForSpeculation rhs)
+  | is_hnf    = FloatLet (NonRec bndr                       rhs)
+  | otherwise = FloatLet (NonRec (setIdDemandInfo bndr dmd) rhs)
+                   -- See Note [Pin demand info on floats]
+  where
+    is_hnf    = exprIsHNF rhs
+    is_strict = isStrictDmd dmd
+    use_case  = is_unlifted || is_strict && not is_hnf
+                -- Don't make a case for a value binding,
+                -- even if it's strict.  Otherwise we get
+                --      case (\x -> e) of ...!
+
+emptyFloats :: Floats
+emptyFloats = Floats OkToSpec nilOL
+
+isEmptyFloats :: Floats -> Bool
+isEmptyFloats (Floats _ bs) = isNilOL bs
+
+wrapBinds :: Floats -> CpeBody -> CpeBody
+wrapBinds (Floats _ binds) body
+  = foldrOL mk_bind body binds
+  where
+    mk_bind (FloatCase bndr rhs _) body = mkDefaultCase rhs bndr body
+    mk_bind (FloatLet bind)        body = Let bind body
+    mk_bind (FloatTick tickish)    body = mkTick tickish body
+
+addFloat :: Floats -> FloatingBind -> Floats
+addFloat (Floats ok_to_spec floats) new_float
+  = Floats (combine ok_to_spec (check new_float)) (floats `snocOL` new_float)
+  where
+    check (FloatLet _) = OkToSpec
+    check (FloatCase _ _ ok_for_spec)
+        | ok_for_spec  =  IfUnboxedOk
+        | otherwise    =  NotOkToSpec
+    check FloatTick{}  = OkToSpec
+        -- The ok-for-speculation flag says that it's safe to
+        -- float this Case out of a let, and thereby do it more eagerly
+        -- We need the top-level flag because it's never ok to float
+        -- an unboxed binding to the top level
+
+unitFloat :: FloatingBind -> Floats
+unitFloat = addFloat emptyFloats
+
+appendFloats :: Floats -> Floats -> Floats
+appendFloats (Floats spec1 floats1) (Floats spec2 floats2)
+  = Floats (combine spec1 spec2) (floats1 `appOL` floats2)
+
+concatFloats :: [Floats] -> OrdList FloatingBind
+concatFloats = foldr (\ (Floats _ bs1) bs2 -> appOL bs1 bs2) nilOL
+
+combine :: OkToSpec -> OkToSpec -> OkToSpec
+combine NotOkToSpec _ = NotOkToSpec
+combine _ NotOkToSpec = NotOkToSpec
+combine IfUnboxedOk _ = IfUnboxedOk
+combine _ IfUnboxedOk = IfUnboxedOk
+combine _ _           = OkToSpec
+
+deFloatTop :: Floats -> [CoreBind]
+-- For top level only; we don't expect any FloatCases
+deFloatTop (Floats _ floats)
+  = foldrOL get [] floats
+  where
+    get (FloatLet b) bs = occurAnalyseRHSs b : bs
+    get (FloatCase var body _) bs  =
+      occurAnalyseRHSs (NonRec var body) : bs
+    get b _ = pprPanic "corePrepPgm" (ppr b)
+
+    -- See Note [Dead code in CorePrep]
+    occurAnalyseRHSs (NonRec x e) = NonRec x (occurAnalyseExpr_NoBinderSwap e)
+    occurAnalyseRHSs (Rec xes)    = Rec [(x, occurAnalyseExpr_NoBinderSwap e) | (x, e) <- xes]
+
+---------------------------------------------------------------------------
+
+canFloatFromNoCaf :: Platform -> Floats -> CpeRhs -> Maybe (Floats, CpeRhs)
+       -- Note [CafInfo and floating]
+canFloatFromNoCaf platform (Floats ok_to_spec fs) rhs
+  | OkToSpec <- ok_to_spec           -- Worth trying
+  , Just (subst, fs') <- go (emptySubst, nilOL) (fromOL fs)
+  = Just (Floats OkToSpec fs', subst_expr subst rhs)
+  | otherwise
+  = Nothing
+  where
+    subst_expr = substExpr (text "CorePrep")
+
+    go :: (Subst, OrdList FloatingBind) -> [FloatingBind]
+       -> Maybe (Subst, OrdList FloatingBind)
+
+    go (subst, fbs_out) [] = Just (subst, fbs_out)
+
+    go (subst, fbs_out) (FloatLet (NonRec b r) : fbs_in)
+      | rhs_ok r
+      = go (subst', fbs_out `snocOL` new_fb) fbs_in
+      where
+        (subst', b') = set_nocaf_bndr subst b
+        new_fb = FloatLet (NonRec b' (subst_expr subst r))
+
+    go (subst, fbs_out) (FloatLet (Rec prs) : fbs_in)
+      | all rhs_ok rs
+      = go (subst', fbs_out `snocOL` new_fb) fbs_in
+      where
+        (bs,rs) = unzip prs
+        (subst', bs') = mapAccumL set_nocaf_bndr subst bs
+        rs' = map (subst_expr subst') rs
+        new_fb = FloatLet (Rec (bs' `zip` rs'))
+
+    go (subst, fbs_out) (ft@FloatTick{} : fbs_in)
+      = go (subst, fbs_out `snocOL` ft) fbs_in
+
+    go _ _ = Nothing      -- Encountered a caffy binding
+
+    ------------
+    set_nocaf_bndr subst bndr
+      = (extendIdSubst subst bndr (Var bndr'), bndr')
+      where
+        bndr' = bndr `setIdCafInfo` NoCafRefs
+
+    ------------
+    rhs_ok :: CoreExpr -> Bool
+    -- We can only float to top level from a NoCaf thing if
+    -- the new binding is static. However it can't mention
+    -- any non-static things or it would *already* be Caffy
+    rhs_ok = rhsIsStatic platform (\_ -> False)
+                         (\_nt i -> pprPanic "rhsIsStatic" (integer i))
+                         -- Integer or Natural literals should not show up
+
+wantFloatNested :: RecFlag -> Demand -> Bool -> Floats -> CpeRhs -> Bool
+wantFloatNested is_rec dmd is_unlifted floats rhs
+  =  isEmptyFloats floats
+  || isStrictDmd dmd
+  || is_unlifted
+  || (allLazyNested is_rec floats && exprIsHNF rhs)
+        -- Why the test for allLazyNested?
+        --      v = f (x `divInt#` y)
+        -- we don't want to float the case, even if f has arity 2,
+        -- because floating the case would make it evaluated too early
+
+allLazyTop :: Floats -> Bool
+allLazyTop (Floats OkToSpec _) = True
+allLazyTop _                   = False
+
+allLazyNested :: RecFlag -> Floats -> Bool
+allLazyNested _      (Floats OkToSpec    _) = True
+allLazyNested _      (Floats NotOkToSpec _) = False
+allLazyNested is_rec (Floats IfUnboxedOk _) = isNonRec is_rec
+
+{-
+************************************************************************
+*                                                                      *
+                Cloning
+*                                                                      *
+************************************************************************
+-}
+
+-- ---------------------------------------------------------------------------
+--                      The environment
+-- ---------------------------------------------------------------------------
+
+-- Note [Inlining in CorePrep]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- There is a subtle but important invariant that must be upheld in the output
+-- of CorePrep: there are no "trivial" updatable thunks.  Thus, this Core
+-- is impermissible:
+--
+--      let x :: ()
+--          x = y
+--
+-- (where y is a reference to a GLOBAL variable).  Thunks like this are silly:
+-- they can always be profitably replaced by inlining x with y. Consequently,
+-- the code generator/runtime does not bother implementing this properly
+-- (specifically, there is no implementation of stg_ap_0_upd_info, which is the
+-- stack frame that would be used to update this thunk.  The "0" means it has
+-- zero free variables.)
+--
+-- In general, the inliner is good at eliminating these let-bindings.  However,
+-- there is one case where these trivial updatable thunks can arise: when
+-- we are optimizing away 'lazy' (see Note [lazyId magic], and also
+-- 'cpeRhsE'.)  Then, we could have started with:
+--
+--      let x :: ()
+--          x = lazy @ () y
+--
+-- which is a perfectly fine, non-trivial thunk, but then CorePrep will
+-- drop 'lazy', giving us 'x = y' which is trivial and impermissible.
+-- The solution is CorePrep to have a miniature inlining pass which deals
+-- with cases like this.  We can then drop the let-binding altogether.
+--
+-- Why does the removal of 'lazy' have to occur in CorePrep?
+-- The gory details are in Note [lazyId magic] in MkId, but the
+-- main reason is that lazy must appear in unfoldings (optimizer
+-- output) and it must prevent call-by-value for catch# (which
+-- is implemented by CorePrep.)
+--
+-- An alternate strategy for solving this problem is to have the
+-- inliner treat 'lazy e' as a trivial expression if 'e' is trivial.
+-- We decided not to adopt this solution to keep the definition
+-- of 'exprIsTrivial' simple.
+--
+-- There is ONE caveat however: for top-level bindings we have
+-- to preserve the binding so that we float the (hacky) non-recursive
+-- binding for data constructors; see Note [Data constructor workers].
+--
+-- Note [CorePrep inlines trivial CoreExpr not Id]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Why does cpe_env need to be an IdEnv CoreExpr, as opposed to an
+-- IdEnv Id?  Naively, we might conjecture that trivial updatable thunks
+-- as per Note [Inlining in CorePrep] always have the form
+-- 'lazy @ SomeType gbl_id'.  But this is not true: the following is
+-- perfectly reasonable Core:
+--
+--      let x :: ()
+--          x = lazy @ (forall a. a) y @ Bool
+--
+-- When we inline 'x' after eliminating 'lazy', we need to replace
+-- occurrences of 'x' with 'y @ bool', not just 'y'.  Situations like
+-- this can easily arise with higher-rank types; thus, cpe_env must
+-- map to CoreExprs, not Ids.
+
+data CorePrepEnv
+  = CPE { cpe_dynFlags        :: DynFlags
+        , cpe_env             :: IdEnv CoreExpr   -- Clone local Ids
+        -- ^ This environment is used for three operations:
+        --
+        --      1. To support cloning of local Ids so that they are
+        --      all unique (see item (6) of CorePrep overview).
+        --
+        --      2. To support beta-reduction of runRW, see
+        --      Note [runRW magic] and Note [runRW arg].
+        --
+        --      3. To let us inline trivial RHSs of non top-level let-bindings,
+        --      see Note [lazyId magic], Note [Inlining in CorePrep]
+        --      and Note [CorePrep inlines trivial CoreExpr not Id] (#12076)
+        , cpe_mkIntegerId     :: Id
+        , cpe_mkNaturalId     :: Id
+        , cpe_integerSDataCon :: Maybe DataCon
+        , cpe_naturalSDataCon :: Maybe DataCon
+    }
+
+lookupMkIntegerName :: DynFlags -> HscEnv -> IO Id
+lookupMkIntegerName dflags hsc_env
+    = guardIntegerUse dflags $ liftM tyThingId $
+      lookupGlobal hsc_env mkIntegerName
+
+lookupMkNaturalName :: DynFlags -> HscEnv -> IO Id
+lookupMkNaturalName dflags hsc_env
+    = guardNaturalUse dflags $ liftM tyThingId $
+      lookupGlobal hsc_env mkNaturalName
+
+-- See Note [The integer library] in PrelNames
+lookupIntegerSDataConName :: DynFlags -> HscEnv -> IO (Maybe DataCon)
+lookupIntegerSDataConName dflags hsc_env = case integerLibrary dflags of
+    IntegerGMP -> guardIntegerUse dflags $ liftM (Just . tyThingDataCon) $
+                  lookupGlobal hsc_env integerSDataConName
+    IntegerSimple -> return Nothing
+
+lookupNaturalSDataConName :: DynFlags -> HscEnv -> IO (Maybe DataCon)
+lookupNaturalSDataConName dflags hsc_env = case integerLibrary dflags of
+    IntegerGMP -> guardNaturalUse dflags $ liftM (Just . tyThingDataCon) $
+                  lookupGlobal hsc_env naturalSDataConName
+    IntegerSimple -> return Nothing
+
+-- | Helper for 'lookupMkIntegerName', 'lookupIntegerSDataConName'
+guardIntegerUse :: DynFlags -> IO a -> IO a
+guardIntegerUse dflags act
+  | thisPackage dflags == primUnitId
+  = return $ panic "Can't use Integer in ghc-prim"
+  | thisPackage dflags == integerUnitId
+  = return $ panic "Can't use Integer in integer-*"
+  | otherwise = act
+
+-- | Helper for 'lookupMkNaturalName', 'lookupNaturalSDataConName'
+--
+-- Just like we can't use Integer literals in `integer-*`, we can't use Natural
+-- literals in `base`. If we do, we get interface loading error for GHC.Natural.
+guardNaturalUse :: DynFlags -> IO a -> IO a
+guardNaturalUse dflags act
+  | thisPackage dflags == primUnitId
+  = return $ panic "Can't use Natural in ghc-prim"
+  | thisPackage dflags == integerUnitId
+  = return $ panic "Can't use Natural in integer-*"
+  | thisPackage dflags == baseUnitId
+  = return $ panic "Can't use Natural in base"
+  | otherwise = act
+
+mkInitialCorePrepEnv :: DynFlags -> HscEnv -> IO CorePrepEnv
+mkInitialCorePrepEnv dflags hsc_env
+    = do mkIntegerId <- lookupMkIntegerName dflags hsc_env
+         mkNaturalId <- lookupMkNaturalName dflags hsc_env
+         integerSDataCon <- lookupIntegerSDataConName dflags hsc_env
+         naturalSDataCon <- lookupNaturalSDataConName dflags hsc_env
+         return $ CPE {
+                      cpe_dynFlags = dflags,
+                      cpe_env = emptyVarEnv,
+                      cpe_mkIntegerId = mkIntegerId,
+                      cpe_mkNaturalId = mkNaturalId,
+                      cpe_integerSDataCon = integerSDataCon,
+                      cpe_naturalSDataCon = naturalSDataCon
+                  }
+
+extendCorePrepEnv :: CorePrepEnv -> Id -> Id -> CorePrepEnv
+extendCorePrepEnv cpe id id'
+    = cpe { cpe_env = extendVarEnv (cpe_env cpe) id (Var id') }
+
+extendCorePrepEnvExpr :: CorePrepEnv -> Id -> CoreExpr -> CorePrepEnv
+extendCorePrepEnvExpr cpe id expr
+    = cpe { cpe_env = extendVarEnv (cpe_env cpe) id expr }
+
+extendCorePrepEnvList :: CorePrepEnv -> [(Id,Id)] -> CorePrepEnv
+extendCorePrepEnvList cpe prs
+    = cpe { cpe_env = extendVarEnvList (cpe_env cpe)
+                        (map (\(id, id') -> (id, Var id')) prs) }
+
+lookupCorePrepEnv :: CorePrepEnv -> Id -> CoreExpr
+lookupCorePrepEnv cpe id
+  = case lookupVarEnv (cpe_env cpe) id of
+        Nothing  -> Var id
+        Just exp -> exp
+
+getMkIntegerId :: CorePrepEnv -> Id
+getMkIntegerId = cpe_mkIntegerId
+
+getMkNaturalId :: CorePrepEnv -> Id
+getMkNaturalId = cpe_mkNaturalId
+
+------------------------------------------------------------------------------
+-- Cloning binders
+-- ---------------------------------------------------------------------------
+
+cpCloneBndrs :: CorePrepEnv -> [InVar] -> UniqSM (CorePrepEnv, [OutVar])
+cpCloneBndrs env bs = mapAccumLM cpCloneBndr env bs
+
+cpCloneBndr  :: CorePrepEnv -> InVar -> UniqSM (CorePrepEnv, OutVar)
+cpCloneBndr env bndr
+  | not (isId bndr)
+  = return (env, bndr)
+
+  | otherwise
+  = do { bndr' <- clone_it bndr
+
+       -- Drop (now-useless) rules/unfoldings
+       -- See Note [Drop unfoldings and rules]
+       -- and Note [Preserve evaluatedness] in CoreTidy
+       ; let unfolding' = zapUnfolding (realIdUnfolding bndr)
+                          -- Simplifier will set the Id's unfolding
+
+             bndr'' = bndr' `setIdUnfolding`      unfolding'
+                            `setIdSpecialisation` emptyRuleInfo
+
+       ; return (extendCorePrepEnv env bndr bndr'', bndr'') }
+  where
+    clone_it bndr
+      | isLocalId bndr, not (isCoVar bndr)
+      = do { uniq <- getUniqueM; return (setVarUnique bndr uniq) }
+      | otherwise   -- Top level things, which we don't want
+                    -- to clone, have become GlobalIds by now
+                    -- And we don't clone tyvars, or coercion variables
+      = return bndr
+
+{- Note [Drop unfoldings and rules]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want to drop the unfolding/rules on every Id:
+
+  - We are now past interface-file generation, and in the
+    codegen pipeline, so we really don't need full unfoldings/rules
+
+  - The unfolding/rule may be keeping stuff alive that we'd like
+    to discard.  See  Note [Dead code in CorePrep]
+
+  - Getting rid of unnecessary unfoldings reduces heap usage
+
+  - We are changing uniques, so if we didn't discard unfoldings/rules
+    we'd have to substitute in them
+
+HOWEVER, we want to preserve evaluated-ness;
+see Note [Preserve evaluatedness] in CoreTidy.
+-}
+
+------------------------------------------------------------------------------
+-- Cloning ccall Ids; each must have a unique name,
+-- to give the code generator a handle to hang it on
+-- ---------------------------------------------------------------------------
+
+fiddleCCall :: Id -> UniqSM Id
+fiddleCCall id
+  | isFCallId id = (id `setVarUnique`) <$> getUniqueM
+  | otherwise    = return id
+
+------------------------------------------------------------------------------
+-- Generating new binders
+-- ---------------------------------------------------------------------------
+
+newVar :: Type -> UniqSM Id
+newVar ty
+ = seqType ty `seq` do
+     uniq <- getUniqueM
+     return (mkSysLocalOrCoVar (fsLit "sat") uniq ty)
+
+
+------------------------------------------------------------------------------
+-- Floating ticks
+-- ---------------------------------------------------------------------------
+--
+-- Note [Floating Ticks in CorePrep]
+--
+-- It might seem counter-intuitive to float ticks by default, given
+-- that we don't actually want to move them if we can help it. On the
+-- other hand, nothing gets very far in CorePrep anyway, and we want
+-- to preserve the order of let bindings and tick annotations in
+-- relation to each other. For example, if we just wrapped let floats
+-- when they pass through ticks, we might end up performing the
+-- following transformation:
+--
+--   src<...> let foo = bar in baz
+--   ==>  let foo = src<...> bar in src<...> baz
+--
+-- Because the let-binding would float through the tick, and then
+-- immediately materialize, achieving nothing but decreasing tick
+-- accuracy. The only special case is the following scenario:
+--
+--   let foo = src<...> (let a = b in bar) in baz
+--   ==>  let foo = src<...> bar; a = src<...> b in baz
+--
+-- Here we would not want the source tick to end up covering "baz" and
+-- therefore refrain from pushing ticks outside. Instead, we copy them
+-- into the floating binds (here "a") in cpePair. Note that where "b"
+-- or "bar" are (value) lambdas we have to push the annotations
+-- further inside in order to uphold our rules.
+--
+-- All of this is implemented below in @wrapTicks@.
+
+-- | Like wrapFloats, but only wraps tick floats
+wrapTicks :: Floats -> CoreExpr -> (Floats, CoreExpr)
+wrapTicks (Floats flag floats0) expr =
+    (Floats flag (toOL $ reverse floats1), foldr mkTick expr (reverse ticks1))
+  where (floats1, ticks1) = foldlOL go ([], []) $ floats0
+        -- Deeply nested constructors will produce long lists of
+        -- redundant source note floats here. We need to eliminate
+        -- those early, as relying on mkTick to spot it after the fact
+        -- can yield O(n^3) complexity [#11095]
+        go (floats, ticks) (FloatTick t)
+          = ASSERT(tickishPlace t == PlaceNonLam)
+            (floats, if any (flip tickishContains t) ticks
+                     then ticks else t:ticks)
+        go (floats, ticks) f
+          = (foldr wrap f (reverse ticks):floats, ticks)
+
+        wrap t (FloatLet bind)    = FloatLet (wrapBind t bind)
+        wrap t (FloatCase b r ok) = FloatCase b (mkTick t r) ok
+        wrap _ other              = pprPanic "wrapTicks: unexpected float!"
+                                             (ppr other)
+        wrapBind t (NonRec binder rhs) = NonRec binder (mkTick t rhs)
+        wrapBind t (Rec pairs)         = Rec (mapSnd (mkTick t) pairs)
+
+------------------------------------------------------------------------------
+-- Collecting cost centres
+-- ---------------------------------------------------------------------------
+
+-- | Collect cost centres defined in the current module, including those in
+-- unfoldings.
+collectCostCentres :: Module -> CoreProgram -> S.Set CostCentre
+collectCostCentres mod_name
+  = foldl' go_bind S.empty
+  where
+    go cs e = case e of
+      Var{} -> cs
+      Lit{} -> cs
+      App e1 e2 -> go (go cs e1) e2
+      Lam _ e -> go cs e
+      Let b e -> go (go_bind cs b) e
+      Case scrt _ _ alts -> go_alts (go cs scrt) alts
+      Cast e _ -> go cs e
+      Tick (ProfNote cc _ _) e ->
+        go (if ccFromThisModule cc mod_name then S.insert cc cs else cs) e
+      Tick _ e -> go cs e
+      Type{} -> cs
+      Coercion{} -> cs
+
+    go_alts = foldl' (\cs (_con, _bndrs, e) -> go cs e)
+
+    go_bind :: S.Set CostCentre -> CoreBind -> S.Set CostCentre
+    go_bind cs (NonRec b e) =
+      go (maybe cs (go cs) (get_unf b)) e
+    go_bind cs (Rec bs) =
+      foldl' (\cs' (b, e) -> go (maybe cs' (go cs') (get_unf b)) e) cs bs
+
+    -- Unfoldings may have cost centres that in the original definion are
+    -- optimized away, see #5889.
+    get_unf = maybeUnfoldingTemplate . realIdUnfolding
diff --git a/compiler/GHC/Stg/CSE.hs b/compiler/GHC/Stg/CSE.hs
new file mode 100644
index 0000000000..66f5004b49
--- /dev/null
+++ b/compiler/GHC/Stg/CSE.hs
@@ -0,0 +1,483 @@
+{-# LANGUAGE TypeFamilies #-}
+
+{-|
+Note [CSE for Stg]
+~~~~~~~~~~~~~~~~~~
+This module implements a simple common subexpression elimination pass for STG.
+This is useful because there are expressions that we want to common up (because
+they are operationally equivalent), but that we cannot common up in Core, because
+their types differ.
+This was originally reported as #9291.
+
+There are two types of common code occurrences that we aim for, see
+note [Case 1: CSEing allocated closures] and
+note [Case 2: CSEing case binders] below.
+
+
+Note [Case 1: CSEing allocated closures]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The first kind of CSE opportunity we aim for is generated by this Haskell code:
+
+    bar :: a -> (Either Int a, Either Bool a)
+    bar x = (Right x, Right x)
+
+which produces this Core:
+
+    bar :: forall a. a -> (Either Int a, Either Bool a)
+    bar @a x = (Right @Int @a x, Right @Bool @a x)
+
+where the two components of the tuple are different terms, and cannot be
+commoned up (easily). On the STG level we have
+
+    bar [x] = let c1 = Right [x]
+                  c2 = Right [x]
+              in (c1,c2)
+
+and now it is obvious that we can write
+
+    bar [x] = let c1 = Right [x]
+              in (c1,c1)
+
+instead.
+
+
+Note [Case 2: CSEing case binders]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The second kind of CSE opportunity we aim for is more interesting, and
+came up in #9291 and #5344: The Haskell code
+
+    foo :: Either Int a -> Either Bool a
+    foo (Right x) = Right x
+    foo _         = Left False
+
+produces this Core
+
+    foo :: forall a. Either Int a -> Either Bool a
+    foo @a e = case e of b { Left n -> …
+                           , Right x -> Right @Bool @a x }
+
+where we cannot CSE `Right @Bool @a x` with the case binder `b` as they have
+different types. But in STG we have
+
+    foo [e] = case e of b { Left [n] -> …
+                          , Right [x] -> Right [x] }
+
+and nothing stops us from transforming that to
+
+    foo [e] = case e of b { Left [n] -> …
+                          , Right [x] -> b}
+
+
+Note [StgCse after unarisation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider two unboxed sum terms:
+
+    (# 1 | #) :: (# Int | Int# #)
+    (# 1 | #) :: (# Int | Int  #)
+
+These two terms are not equal as they unarise to different unboxed
+tuples. However if we run StgCse before Unarise, it'll think the two
+terms (# 1 | #) are equal, and replace one of these with a binder to
+the other. That's bad -- #15300.
+
+Solution: do unarise first.
+
+-}
+
+module GHC.Stg.CSE (stgCse) where
+
+import GhcPrelude
+
+import DataCon
+import Id
+import GHC.Stg.Syntax
+import Outputable
+import VarEnv
+import CoreSyn (AltCon(..))
+import Data.List (mapAccumL)
+import Data.Maybe (fromMaybe)
+import CoreMap
+import NameEnv
+import Control.Monad( (>=>) )
+
+--------------
+-- The Trie --
+--------------
+
+-- A lookup trie for data constructor applications, i.e.
+-- keys of type `(DataCon, [StgArg])`, following the patterns in TrieMap.
+
+data StgArgMap a = SAM
+    { sam_var :: DVarEnv a
+    , sam_lit :: LiteralMap a
+    }
+
+instance TrieMap StgArgMap where
+    type Key StgArgMap = StgArg
+    emptyTM  = SAM { sam_var = emptyTM
+                   , sam_lit = emptyTM }
+    lookupTM (StgVarArg var) = sam_var >.> lkDFreeVar var
+    lookupTM (StgLitArg lit) = sam_lit >.> lookupTM lit
+    alterTM  (StgVarArg var) f m = m { sam_var = sam_var m |> xtDFreeVar var f }
+    alterTM  (StgLitArg lit) f m = m { sam_lit = sam_lit m |> alterTM lit f }
+    foldTM k m = foldTM k (sam_var m) . foldTM k (sam_lit m)
+    mapTM f (SAM {sam_var = varm, sam_lit = litm}) =
+        SAM { sam_var = mapTM f varm, sam_lit = mapTM f litm }
+
+newtype ConAppMap a = CAM { un_cam :: DNameEnv (ListMap StgArgMap a) }
+
+instance TrieMap ConAppMap where
+    type Key ConAppMap = (DataCon, [StgArg])
+    emptyTM  = CAM emptyTM
+    lookupTM (dataCon, args) = un_cam >.> lkDNamed dataCon >=> lookupTM args
+    alterTM  (dataCon, args) f m =
+        m { un_cam = un_cam m |> xtDNamed dataCon |>> alterTM args f }
+    foldTM k = un_cam >.> foldTM (foldTM k)
+    mapTM f  = un_cam >.> mapTM (mapTM f) >.> CAM
+
+-----------------
+-- The CSE Env --
+-----------------
+
+-- | The CSE environment. See note [CseEnv Example]
+data CseEnv = CseEnv
+    { ce_conAppMap :: ConAppMap OutId
+        -- ^ The main component of the environment is the trie that maps
+        --   data constructor applications (with their `OutId` arguments)
+        --   to an in-scope name that can be used instead.
+        --   This name is always either a let-bound variable or a case binder.
+    , ce_subst     :: IdEnv OutId
+        -- ^ This substitution is applied to the code as we traverse it.
+        --   Entries have one of two reasons:
+        --
+        --   * The input might have shadowing (see Note [Shadowing]), so we have
+        --     to rename some binders as we traverse the tree.
+        --   * If we remove `let x = Con z` because  `let y = Con z` is in scope,
+        --     we note this here as x ↦ y.
+    , ce_bndrMap     :: IdEnv OutId
+        -- ^ If we come across a case expression case x as b of … with a trivial
+        --   binder, we add b ↦ x to this.
+        --   This map is *only* used when looking something up in the ce_conAppMap.
+        --   See Note [Trivial case scrutinee]
+    , ce_in_scope  :: InScopeSet
+        -- ^ The third component is an in-scope set, to rename away any
+        --   shadowing binders
+    }
+
+{-|
+Note [CseEnv Example]
+~~~~~~~~~~~~~~~~~~~~~
+The following tables shows how the CseEnvironment changes as code is traversed,
+as well as the changes to that code.
+
+  InExpr                         OutExpr
+     conAppMap                   subst          in_scope
+  ───────────────────────────────────────────────────────────
+  -- empty                       {}             {}
+  case … as a of {Con x y ->     case … as a of {Con x y ->
+  -- Con x y ↦ a                 {}             {a,x,y}
+  let b = Con x y                (removed)
+  -- Con x y ↦ a                 b↦a            {a,x,y,b}
+  let c = Bar a                  let c = Bar a
+  -- Con x y ↦ a, Bar a ↦ c      b↦a            {a,x,y,b,c}
+  let c = some expression        let c' = some expression
+  -- Con x y ↦ a, Bar a ↦ c      b↦a, c↦c',     {a,x,y,b,c,c'}
+  let d = Bar b                  (removed)
+  -- Con x y ↦ a, Bar a ↦ c      b↦a, c↦c', d↦c {a,x,y,b,c,c',d}
+  (a, b, c d)                    (a, a, c' c)
+-}
+
+initEnv :: InScopeSet -> CseEnv
+initEnv in_scope = CseEnv
+    { ce_conAppMap = emptyTM
+    , ce_subst     = emptyVarEnv
+    , ce_bndrMap   = emptyVarEnv
+    , ce_in_scope  = in_scope
+    }
+
+envLookup :: DataCon -> [OutStgArg] -> CseEnv -> Maybe OutId
+envLookup dataCon args env = lookupTM (dataCon, args') (ce_conAppMap env)
+  where args' = map go args -- See Note [Trivial case scrutinee]
+        go (StgVarArg v  ) = StgVarArg (fromMaybe v $ lookupVarEnv (ce_bndrMap env) v)
+        go (StgLitArg lit) = StgLitArg lit
+
+addDataCon :: OutId -> DataCon -> [OutStgArg] -> CseEnv -> CseEnv
+-- do not bother with nullary data constructors, they are static anyways
+addDataCon _ _ [] env = env
+addDataCon bndr dataCon args env = env { ce_conAppMap = new_env }
+  where
+    new_env = insertTM (dataCon, args) bndr (ce_conAppMap env)
+
+forgetCse :: CseEnv -> CseEnv
+forgetCse env = env { ce_conAppMap = emptyTM }
+    -- See note [Free variables of an StgClosure]
+
+addSubst :: OutId -> OutId -> CseEnv -> CseEnv
+addSubst from to env
+    = env { ce_subst = extendVarEnv (ce_subst env) from to }
+
+addTrivCaseBndr :: OutId -> OutId -> CseEnv -> CseEnv
+addTrivCaseBndr from to env
+    = env { ce_bndrMap = extendVarEnv (ce_bndrMap env) from to }
+
+substArgs :: CseEnv -> [InStgArg] -> [OutStgArg]
+substArgs env = map (substArg env)
+
+substArg :: CseEnv -> InStgArg -> OutStgArg
+substArg env (StgVarArg from) = StgVarArg (substVar env from)
+substArg _   (StgLitArg lit)  = StgLitArg lit
+
+substVar :: CseEnv -> InId -> OutId
+substVar env id = fromMaybe id $ lookupVarEnv (ce_subst env) id
+
+-- Functions to enter binders
+
+-- This is much simpler than the equivalent code in CoreSubst:
+--  * We do not substitute type variables, and
+--  * There is nothing relevant in IdInfo at this stage
+--    that needs substitutions.
+-- Therefore, no special treatment for a recursive group is required.
+
+substBndr :: CseEnv -> InId -> (CseEnv, OutId)
+substBndr env old_id
+  = (new_env, new_id)
+  where
+    new_id = uniqAway (ce_in_scope env) old_id
+    no_change = new_id == old_id
+    env' = env { ce_in_scope = ce_in_scope env `extendInScopeSet` new_id }
+    new_env | no_change = env'
+            | otherwise = env' { ce_subst = extendVarEnv (ce_subst env) old_id new_id }
+
+substBndrs :: CseEnv -> [InVar] -> (CseEnv, [OutVar])
+substBndrs env bndrs = mapAccumL substBndr env bndrs
+
+substPairs :: CseEnv -> [(InVar, a)] -> (CseEnv, [(OutVar, a)])
+substPairs env bndrs = mapAccumL go env bndrs
+  where go env (id, x) = let (env', id') = substBndr env id
+                         in (env', (id', x))
+
+-- Main entry point
+
+stgCse :: [InStgTopBinding] -> [OutStgTopBinding]
+stgCse binds = snd $ mapAccumL stgCseTopLvl emptyInScopeSet binds
+
+-- Top level bindings.
+--
+-- We do not CSE these, as top-level closures are allocated statically anyways.
+-- Also, they might be exported.
+-- But we still have to collect the set of in-scope variables, otherwise
+-- uniqAway might shadow a top-level closure.
+
+stgCseTopLvl :: InScopeSet -> InStgTopBinding -> (InScopeSet, OutStgTopBinding)
+stgCseTopLvl in_scope t@(StgTopStringLit _ _) = (in_scope, t)
+stgCseTopLvl in_scope (StgTopLifted (StgNonRec bndr rhs))
+    = (in_scope'
+      , StgTopLifted (StgNonRec bndr (stgCseTopLvlRhs in_scope rhs)))
+  where in_scope' = in_scope `extendInScopeSet` bndr
+
+stgCseTopLvl in_scope (StgTopLifted (StgRec eqs))
+    = ( in_scope'
+      , StgTopLifted (StgRec [ (bndr, stgCseTopLvlRhs in_scope' rhs) | (bndr, rhs) <- eqs ]))
+  where in_scope' = in_scope `extendInScopeSetList` [ bndr | (bndr, _) <- eqs ]
+
+stgCseTopLvlRhs :: InScopeSet -> InStgRhs -> OutStgRhs
+stgCseTopLvlRhs in_scope (StgRhsClosure ext ccs upd args body)
+    = let body' = stgCseExpr (initEnv in_scope) body
+      in  StgRhsClosure ext ccs upd args body'
+stgCseTopLvlRhs _ (StgRhsCon ccs dataCon args)
+    = StgRhsCon ccs dataCon args
+
+------------------------------
+-- The actual AST traversal --
+------------------------------
+
+-- Trivial cases
+stgCseExpr :: CseEnv -> InStgExpr -> OutStgExpr
+stgCseExpr env (StgApp fun args)
+    = StgApp fun' args'
+  where fun' = substVar env fun
+        args' = substArgs env args
+stgCseExpr _ (StgLit lit)
+    = StgLit lit
+stgCseExpr env (StgOpApp op args tys)
+    = StgOpApp op args' tys
+  where args' = substArgs env args
+stgCseExpr _ (StgLam _ _)
+    = pprPanic "stgCseExp" (text "StgLam")
+stgCseExpr env (StgTick tick body)
+    = let body' = stgCseExpr env body
+      in StgTick tick body'
+stgCseExpr env (StgCase scrut bndr ty alts)
+    = mkStgCase scrut' bndr' ty alts'
+  where
+    scrut' = stgCseExpr env scrut
+    (env1, bndr') = substBndr env bndr
+    env2 | StgApp trivial_scrut [] <- scrut' = addTrivCaseBndr bndr trivial_scrut env1
+                 -- See Note [Trivial case scrutinee]
+         | otherwise                         = env1
+    alts' = map (stgCseAlt env2 ty bndr') alts
+
+
+-- A constructor application.
+-- To be removed by a variable use when found in the CSE environment
+stgCseExpr env (StgConApp dataCon args tys)
+    | Just bndr' <- envLookup dataCon args' env
+    = StgApp bndr' []
+    | otherwise
+    = StgConApp dataCon args' tys
+  where args' = substArgs env args
+
+-- Let bindings
+-- The binding might be removed due to CSE (we do not want trivial bindings on
+-- the STG level), so use the smart constructor `mkStgLet` to remove the binding
+-- if empty.
+stgCseExpr env (StgLet ext binds body)
+    = let (binds', env') = stgCseBind env binds
+          body' = stgCseExpr env' body
+      in mkStgLet (StgLet ext) binds' body'
+stgCseExpr env (StgLetNoEscape ext binds body)
+    = let (binds', env') = stgCseBind env binds
+          body' = stgCseExpr env' body
+      in mkStgLet (StgLetNoEscape ext) binds' body'
+
+-- Case alternatives
+-- Extend the CSE environment
+stgCseAlt :: CseEnv -> AltType -> OutId -> InStgAlt -> OutStgAlt
+stgCseAlt env ty case_bndr (DataAlt dataCon, args, rhs)
+    = let (env1, args') = substBndrs env args
+          env2
+            -- To avoid dealing with unboxed sums StgCse runs after unarise and
+            -- should maintain invariants listed in Note [Post-unarisation
+            -- invariants]. One of the invariants is that some binders are not
+            -- used (unboxed tuple case binders) which is what we check with
+            -- `stgCaseBndrInScope` here. If the case binder is not in scope we
+            -- don't add it to the CSE env. See also #15300.
+            | stgCaseBndrInScope ty True -- CSE runs after unarise
+            = addDataCon case_bndr dataCon (map StgVarArg args') env1
+            | otherwise
+            = env1
+            -- see note [Case 2: CSEing case binders]
+          rhs' = stgCseExpr env2 rhs
+      in (DataAlt dataCon, args', rhs')
+stgCseAlt env _ _ (altCon, args, rhs)
+    = let (env1, args') = substBndrs env args
+          rhs' = stgCseExpr env1 rhs
+      in (altCon, args', rhs')
+
+-- Bindings
+stgCseBind :: CseEnv -> InStgBinding -> (Maybe OutStgBinding, CseEnv)
+stgCseBind env (StgNonRec b e)
+    = let (env1, b') = substBndr env b
+      in case stgCseRhs env1 b' e of
+        (Nothing,      env2) -> (Nothing,                env2)
+        (Just (b2,e'), env2) -> (Just (StgNonRec b2 e'), env2)
+stgCseBind env (StgRec pairs)
+    = let (env1, pairs1) = substPairs env pairs
+      in case stgCsePairs env1 pairs1 of
+        ([],     env2) -> (Nothing, env2)
+        (pairs2, env2) -> (Just (StgRec pairs2), env2)
+
+stgCsePairs :: CseEnv -> [(OutId, InStgRhs)] -> ([(OutId, OutStgRhs)], CseEnv)
+stgCsePairs env [] = ([], env)
+stgCsePairs env0 ((b,e):pairs)
+  = let (pairMB, env1) = stgCseRhs env0 b e
+        (pairs', env2) = stgCsePairs env1 pairs
+    in (pairMB `mbCons` pairs', env2)
+  where
+    mbCons = maybe id (:)
+
+-- The RHS of a binding.
+-- If it is a constructor application, either short-cut it or extend the environment
+stgCseRhs :: CseEnv -> OutId -> InStgRhs -> (Maybe (OutId, OutStgRhs), CseEnv)
+stgCseRhs env bndr (StgRhsCon ccs dataCon args)
+    | Just other_bndr <- envLookup dataCon args' env
+    = let env' = addSubst bndr other_bndr env
+      in (Nothing, env')
+    | otherwise
+    = let env' = addDataCon bndr dataCon args' env
+            -- see note [Case 1: CSEing allocated closures]
+          pair = (bndr, StgRhsCon ccs dataCon args')
+      in (Just pair, env')
+  where args' = substArgs env args
+stgCseRhs env bndr (StgRhsClosure ext ccs upd args body)
+    = let (env1, args') = substBndrs env args
+          env2 = forgetCse env1 -- See note [Free variables of an StgClosure]
+          body' = stgCseExpr env2 body
+      in (Just (substVar env bndr, StgRhsClosure ext ccs upd args' body'), env)
+
+
+mkStgCase :: StgExpr -> OutId -> AltType -> [StgAlt] -> StgExpr
+mkStgCase scrut bndr ty alts | all isBndr alts = scrut
+                             | otherwise       = StgCase scrut bndr ty alts
+
+  where
+    -- see Note [All alternatives are the binder]
+    isBndr (_, _, StgApp f []) = f == bndr
+    isBndr _                   = False
+
+
+-- Utilities
+
+-- | This function short-cuts let-bindings that are now obsolete
+mkStgLet :: (a -> b -> b) -> Maybe a -> b -> b
+mkStgLet _      Nothing      body = body
+mkStgLet stgLet (Just binds) body = stgLet binds body
+
+
+{-
+Note [All alternatives are the binder]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When all alternatives simply refer to the case binder, then we do not have
+to bother with the case expression at all (#13588). CoreSTG does this as well,
+but sometimes, types get into the way:
+
+    newtype T = MkT Int
+    f :: (Int, Int) -> (T, Int)
+    f (x, y) = (MkT x, y)
+
+Core cannot just turn this into
+
+    f p = p
+
+as this would not be well-typed. But to STG, where MkT is no longer in the way,
+we can.
+
+Note [Trivial case scrutinee]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want to be able to handle nested reconstruction of constructors as in
+
+    nested :: Either Int (Either Int a) -> Either Bool (Either Bool a)
+    nested (Right (Right v)) = Right (Right v)
+    nested _ = Left True
+
+So if we come across
+
+    case x of r1
+      Right a -> case a of r2
+              Right b -> let v = Right b
+                         in Right v
+
+we first replace v with r2. Next we want to replace Right r2 with r1. But the
+ce_conAppMap contains Right a!
+
+Therefore, we add r1 ↦ x to ce_bndrMap when analysing the outer case, and use
+this substitution before looking Right r2 up in ce_conAppMap, and everything
+works out.
+
+Note [Free variables of an StgClosure]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+StgClosures (function and thunks) have an explicit list of free variables:
+
+foo [x] =
+    let not_a_free_var = Left [x]
+    let a_free_var = Right [x]
+    let closure = \[x a_free_var] -> \[y] -> bar y (Left [x]) a_free_var
+    in closure
+
+If we were to CSE `Left [x]` in the body of `closure` with `not_a_free_var`,
+then the list of free variables would be wrong, so for now, we do not CSE
+across such a closure, simply because I (Joachim) was not sure about possible
+knock-on effects. If deemed safe and worth the slight code complication of
+re-calculating this list during or after this pass, this can surely be done.
+-}
diff --git a/compiler/GHC/Stg/FVs.hs b/compiler/GHC/Stg/FVs.hs
new file mode 100644
index 0000000000..65f80d97af
--- /dev/null
+++ b/compiler/GHC/Stg/FVs.hs
@@ -0,0 +1,130 @@
+-- | Free variable analysis on STG terms.
+module GHC.Stg.FVs (
+    annTopBindingsFreeVars,
+    annBindingFreeVars
+  ) where
+
+import GhcPrelude
+
+import GHC.Stg.Syntax
+import Id
+import VarSet
+import CoreSyn    ( Tickish(Breakpoint) )
+import Outputable
+import Util
+
+import Data.Maybe ( mapMaybe )
+
+newtype Env
+  = Env
+  { locals :: IdSet
+  }
+
+emptyEnv :: Env
+emptyEnv = Env emptyVarSet
+
+addLocals :: [Id] -> Env -> Env
+addLocals bndrs env
+  = env { locals = extendVarSetList (locals env) bndrs }
+
+-- | Annotates a top-level STG binding group with its free variables.
+annTopBindingsFreeVars :: [StgTopBinding] -> [CgStgTopBinding]
+annTopBindingsFreeVars = map go
+  where
+    go (StgTopStringLit id bs) = StgTopStringLit id bs
+    go (StgTopLifted bind)
+      = StgTopLifted (annBindingFreeVars bind)
+
+-- | Annotates an STG binding with its free variables.
+annBindingFreeVars :: StgBinding -> CgStgBinding
+annBindingFreeVars = fst . binding emptyEnv emptyDVarSet
+
+boundIds :: StgBinding -> [Id]
+boundIds (StgNonRec b _) = [b]
+boundIds (StgRec pairs)  = map fst pairs
+
+-- Note [Tracking local binders]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- 'locals' contains non-toplevel, non-imported binders.
+-- We maintain the set in 'expr', 'alt' and 'rhs', which are the only
+-- places where new local binders are introduced.
+-- Why do it there rather than in 'binding'? Two reasons:
+--
+--   1. We call 'binding' from 'annTopBindingsFreeVars', which would
+--      add top-level bindings to the 'locals' set.
+--   2. In the let(-no-escape) case, we need to extend the environment
+--      prior to analysing the body, but we also need the fvs from the
+--      body to analyse the RHSs. No way to do this without some
+--      knot-tying.
+
+-- | This makes sure that only local, non-global free vars make it into the set.
+mkFreeVarSet :: Env -> [Id] -> DIdSet
+mkFreeVarSet env = mkDVarSet . filter (`elemVarSet` locals env)
+
+args :: Env -> [StgArg] -> DIdSet
+args env = mkFreeVarSet env . mapMaybe f
+  where
+    f (StgVarArg occ) = Just occ
+    f _               = Nothing
+
+binding :: Env -> DIdSet -> StgBinding -> (CgStgBinding, DIdSet)
+binding env body_fv (StgNonRec bndr r) = (StgNonRec bndr r', fvs)
+  where
+    -- See Note [Tracking local binders]
+    (r', rhs_fvs) = rhs env r
+    fvs = delDVarSet body_fv bndr `unionDVarSet` rhs_fvs
+binding env body_fv (StgRec pairs) = (StgRec pairs', fvs)
+  where
+    -- See Note [Tracking local binders]
+    bndrs = map fst pairs
+    (rhss, rhs_fvss) = mapAndUnzip (rhs env . snd) pairs
+    pairs' = zip bndrs rhss
+    fvs = delDVarSetList (unionDVarSets (body_fv:rhs_fvss)) bndrs
+
+expr :: Env -> StgExpr -> (CgStgExpr, DIdSet)
+expr env = go
+  where
+    go (StgApp occ as)
+      = (StgApp occ as, unionDVarSet (args env as) (mkFreeVarSet env [occ]))
+    go (StgLit lit) = (StgLit lit, emptyDVarSet)
+    go (StgConApp dc as tys) = (StgConApp dc as tys, args env as)
+    go (StgOpApp op as ty) = (StgOpApp op as ty, args env as)
+    go StgLam{} = pprPanic "StgFVs: StgLam" empty
+    go (StgCase scrut bndr ty alts) = (StgCase scrut' bndr ty alts', fvs)
+      where
+        (scrut', scrut_fvs) = go scrut
+        -- See Note [Tracking local binders]
+        (alts', alt_fvss) = mapAndUnzip (alt (addLocals [bndr] env)) alts
+        alt_fvs = unionDVarSets alt_fvss
+        fvs = delDVarSet (unionDVarSet scrut_fvs alt_fvs) bndr
+    go (StgLet ext bind body) = go_bind (StgLet ext) bind body
+    go (StgLetNoEscape ext bind body) = go_bind (StgLetNoEscape ext) bind body
+    go (StgTick tick e) = (StgTick tick e', fvs')
+      where
+        (e', fvs) = go e
+        fvs' = unionDVarSet (tickish tick) fvs
+        tickish (Breakpoint _ ids) = mkDVarSet ids
+        tickish _                  = emptyDVarSet
+
+    go_bind dc bind body = (dc bind' body', fvs)
+      where
+        -- See Note [Tracking local binders]
+        env' = addLocals (boundIds bind) env
+        (body', body_fvs) = expr env' body
+        (bind', fvs) = binding env' body_fvs bind
+
+rhs :: Env -> StgRhs -> (CgStgRhs, DIdSet)
+rhs env (StgRhsClosure _ ccs uf bndrs body)
+  = (StgRhsClosure fvs ccs uf bndrs body', fvs)
+  where
+    -- See Note [Tracking local binders]
+    (body', body_fvs) = expr (addLocals bndrs env) body
+    fvs = delDVarSetList body_fvs bndrs
+rhs env (StgRhsCon ccs dc as) = (StgRhsCon ccs dc as, args env as)
+
+alt :: Env -> StgAlt -> (CgStgAlt, DIdSet)
+alt env (con, bndrs, e) = ((con, bndrs, e'), fvs)
+  where
+    -- See Note [Tracking local binders]
+    (e', rhs_fvs) = expr (addLocals bndrs env) e
+    fvs = delDVarSetList rhs_fvs bndrs
diff --git a/compiler/GHC/Stg/Lift.hs b/compiler/GHC/Stg/Lift.hs
new file mode 100644
index 0000000000..cafcafbd42
--- /dev/null
+++ b/compiler/GHC/Stg/Lift.hs
@@ -0,0 +1,258 @@
+{-# LANGUAGE CPP #-}
+
+-- | Implements a selective lambda lifter, running late in the optimisation
+-- pipeline.
+--
+-- If you are interested in the cost model that is employed to decide whether
+-- to lift a binding or not, look at "GHC.Stg.Lift.Analysis".
+-- "GHC.Stg.Lift.Monad" contains the transformation monad that hides away some
+-- plumbing of the transformation.
+module GHC.Stg.Lift
+   (
+    -- * Late lambda lifting in STG
+    -- $note
+   stgLiftLams
+   )
+where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import BasicTypes
+import DynFlags
+import Id
+import IdInfo
+import GHC.Stg.FVs ( annBindingFreeVars )
+import GHC.Stg.Lift.Analysis
+import GHC.Stg.Lift.Monad
+import GHC.Stg.Syntax
+import Outputable
+import UniqSupply
+import Util
+import VarSet
+import Control.Monad ( when )
+import Data.Maybe ( isNothing )
+
+-- Note [Late lambda lifting in STG]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- $note
+-- See also the <https://gitlab.haskell.org/ghc/ghc/wikis/late-lam-lift wiki page>
+-- and #9476.
+--
+-- The basic idea behind lambda lifting is to turn locally defined functions
+-- into top-level functions. Free variables are then passed as additional
+-- arguments at *call sites* instead of having a closure allocated for them at
+-- *definition site*. Example:
+--
+-- @
+--    let x = ...; y = ... in
+--    let f = {x y} \a -> a + x + y in
+--    let g = {f x} \b -> f b + x in
+--    g 5
+-- @
+--
+-- Lambda lifting @f@ would
+--
+--   1. Turn @f@'s free variables into formal parameters
+--   2. Update @f@'s call site within @g@ to @f x y b@
+--   3. Update @g@'s closure: Add @y@ as an additional free variable, while
+--      removing @f@, because @f@ no longer allocates and can be floated to
+--      top-level.
+--   4. Actually float the binding of @f@ to top-level, eliminating the @let@
+--      in the process.
+--
+-- This results in the following program (with free var annotations):
+--
+-- @
+--    f x y a = a + x + y;
+--    let x = ...; y = ... in
+--    let g = {x y} \b -> f x y b + x in
+--    g 5
+-- @
+--
+-- This optimisation is all about lifting only when it is beneficial to do so.
+-- The above seems like a worthwhile lift, judging from heap allocation:
+-- We eliminate @f@'s closure, saving to allocate a closure with 2 words, while
+-- not changing the size of @g@'s closure.
+--
+-- You can probably sense that there's some kind of cost model at play here.
+-- And you are right! But we also employ a couple of other heuristics for the
+-- lifting decision which are outlined in "GHC.Stg.Lift.Analysis#when".
+--
+-- The transformation is done in "GHC.Stg.Lift", which calls out to
+-- 'GHC.Stg.Lift.Analysis.goodToLift' for its lifting decision.  It relies on
+-- "GHC.Stg.Lift.Monad", which abstracts some subtle STG invariants into a
+-- monadic substrate.
+--
+-- Suffice to say: We trade heap allocation for stack allocation.
+-- The additional arguments have to passed on the stack (or in registers,
+-- depending on architecture) every time we call the function to save a single
+-- heap allocation when entering the let binding. Nofib suggests a mean
+-- improvement of about 1% for this pass, so it seems like a worthwhile thing to
+-- do. Compile-times went up by 0.6%, so all in all a very modest change.
+--
+-- For a concrete example, look at @spectral/atom@. There's a call to 'zipWith'
+-- that is ultimately compiled to something like this
+-- (module desugaring/lowering to actual STG):
+--
+-- @
+--    propagate dt = ...;
+--    runExperiment ... =
+--      let xs = ... in
+--      let ys = ... in
+--      let go = {dt go} \xs ys -> case (xs, ys) of
+--            ([], []) -> []
+--            (x:xs', y:ys') -> propagate dt x y : go xs' ys'
+--      in go xs ys
+-- @
+--
+-- This will lambda lift @go@ to top-level, speeding up the resulting program
+-- by roughly one percent:
+--
+-- @
+--    propagate dt = ...;
+--    go dt xs ys = case (xs, ys) of
+--      ([], []) -> []
+--      (x:xs', y:ys') -> propagate dt x y : go dt xs' ys'
+--    runExperiment ... =
+--      let xs = ... in
+--      let ys = ... in
+--      in go dt xs ys
+-- @
+
+
+
+-- | Lambda lifts bindings to top-level deemed worth lifting (see 'goodToLift').
+--
+-- (Mostly) textbook instance of the lambda lifting transformation, selecting
+-- which bindings to lambda lift by consulting 'goodToLift'.
+stgLiftLams :: DynFlags -> UniqSupply -> [InStgTopBinding] -> [OutStgTopBinding]
+stgLiftLams dflags us = runLiftM dflags us . foldr liftTopLvl (pure ())
+
+liftTopLvl :: InStgTopBinding -> LiftM () -> LiftM ()
+liftTopLvl (StgTopStringLit bndr lit) rest = withSubstBndr bndr $ \bndr' -> do
+  addTopStringLit bndr' lit
+  rest
+liftTopLvl (StgTopLifted bind) rest = do
+  let is_rec = isRec $ fst $ decomposeStgBinding bind
+  when is_rec startBindingGroup
+  let bind_w_fvs = annBindingFreeVars bind
+  withLiftedBind TopLevel (tagSkeletonTopBind bind_w_fvs) NilSk $ \mb_bind' -> do
+    -- We signal lifting of a binding through returning Nothing.
+    -- Should never happen for a top-level binding, though, since we are already
+    -- at top-level.
+    case mb_bind' of
+      Nothing -> pprPanic "StgLiftLams" (text "Lifted top-level binding")
+      Just bind' -> addLiftedBinding bind'
+    when is_rec endBindingGroup
+    rest
+
+withLiftedBind
+  :: TopLevelFlag
+  -> LlStgBinding
+  -> Skeleton
+  -> (Maybe OutStgBinding -> LiftM a)
+  -> LiftM a
+withLiftedBind top_lvl bind scope k
+  | isTopLevel top_lvl
+  = withCaffyness (is_caffy pairs) go
+  | otherwise
+  = go
+  where
+    (rec, pairs) = decomposeStgBinding bind
+    is_caffy = any (mayHaveCafRefs . idCafInfo . binderInfoBndr . fst)
+    go = withLiftedBindPairs top_lvl rec pairs scope (k . fmap (mkStgBinding rec))
+
+withLiftedBindPairs
+  :: TopLevelFlag
+  -> RecFlag
+  -> [(BinderInfo, LlStgRhs)]
+  -> Skeleton
+  -> (Maybe [(Id, OutStgRhs)] -> LiftM a)
+  -> LiftM a
+withLiftedBindPairs top rec pairs scope k = do
+  let (infos, rhss) = unzip pairs
+  let bndrs = map binderInfoBndr infos
+  expander <- liftedIdsExpander
+  dflags <- getDynFlags
+  case goodToLift dflags top rec expander pairs scope of
+    -- @abs_ids@ is the set of all variables that need to become parameters.
+    Just abs_ids -> withLiftedBndrs abs_ids bndrs $ \bndrs' -> do
+      -- Within this block, all binders in @bndrs@ will be noted as lifted, so
+      -- that the return value of @liftedIdsExpander@ in this context will also
+      -- expand the bindings in @bndrs@ to their free variables.
+      -- Now we can recurse into the RHSs and see if we can lift any further
+      -- bindings. We pass the set of expanded free variables (thus OutIds) on
+      -- to @liftRhs@ so that it can add them as parameter binders.
+      when (isRec rec) startBindingGroup
+      rhss' <- traverse (liftRhs (Just abs_ids)) rhss
+      let pairs' = zip bndrs' rhss'
+      addLiftedBinding (mkStgBinding rec pairs')
+      when (isRec rec) endBindingGroup
+      k Nothing
+    Nothing -> withSubstBndrs bndrs $ \bndrs' -> do
+      -- Don't lift the current binding, but possibly some bindings in their
+      -- RHSs.
+      rhss' <- traverse (liftRhs Nothing) rhss
+      let pairs' = zip bndrs' rhss'
+      k (Just pairs')
+
+liftRhs
+  :: Maybe (DIdSet)
+  -- ^ @Just former_fvs@ <=> this RHS was lifted and we have to add @former_fvs@
+  -- as lambda binders, discarding all free vars.
+  -> LlStgRhs
+  -> LiftM OutStgRhs
+liftRhs mb_former_fvs rhs@(StgRhsCon ccs con args)
+  = ASSERT2(isNothing mb_former_fvs, text "Should never lift a constructor" $$ ppr rhs)
+    StgRhsCon ccs con <$> traverse liftArgs args
+liftRhs Nothing (StgRhsClosure _ ccs upd infos body) = do
+  -- This RHS wasn't lifted.
+  withSubstBndrs (map binderInfoBndr infos) $ \bndrs' ->
+    StgRhsClosure noExtFieldSilent ccs upd bndrs' <$> liftExpr body
+liftRhs (Just former_fvs) (StgRhsClosure _ ccs upd infos body) = do
+  -- This RHS was lifted. Insert extra binders for @former_fvs@.
+  withSubstBndrs (map binderInfoBndr infos) $ \bndrs' -> do
+    let bndrs'' = dVarSetElems former_fvs ++ bndrs'
+    StgRhsClosure noExtFieldSilent ccs upd bndrs'' <$> liftExpr body
+
+liftArgs :: InStgArg -> LiftM OutStgArg
+liftArgs a@(StgLitArg _) = pure a
+liftArgs (StgVarArg occ) = do
+  ASSERTM2( not <$> isLifted occ, text "StgArgs should never be lifted" $$ ppr occ )
+  StgVarArg <$> substOcc occ
+
+liftExpr :: LlStgExpr -> LiftM OutStgExpr
+liftExpr (StgLit lit) = pure (StgLit lit)
+liftExpr (StgTick t e) = StgTick t <$> liftExpr e
+liftExpr (StgApp f args) = do
+  f' <- substOcc f
+  args' <- traverse liftArgs args
+  fvs' <- formerFreeVars f
+  let top_lvl_args = map StgVarArg fvs' ++ args'
+  pure (StgApp f' top_lvl_args)
+liftExpr (StgConApp con args tys) = StgConApp con <$> traverse liftArgs args <*> pure tys
+liftExpr (StgOpApp op args ty) = StgOpApp op <$> traverse liftArgs args <*> pure ty
+liftExpr (StgLam _ _) = pprPanic "stgLiftLams" (text "StgLam")
+liftExpr (StgCase scrut info ty alts) = do
+  scrut' <- liftExpr scrut
+  withSubstBndr (binderInfoBndr info) $ \bndr' -> do
+    alts' <- traverse liftAlt alts
+    pure (StgCase scrut' bndr' ty alts')
+liftExpr (StgLet scope bind body)
+  = withLiftedBind NotTopLevel bind scope $ \mb_bind' -> do
+      body' <- liftExpr body
+      case mb_bind' of
+        Nothing -> pure body' -- withLiftedBindPairs decided to lift it and already added floats
+        Just bind' -> pure (StgLet noExtFieldSilent bind' body')
+liftExpr (StgLetNoEscape scope bind body)
+  = withLiftedBind NotTopLevel bind scope $ \mb_bind' -> do
+      body' <- liftExpr body
+      case mb_bind' of
+        Nothing -> pprPanic "stgLiftLams" (text "Should never decide to lift LNEs")
+        Just bind' -> pure (StgLetNoEscape noExtFieldSilent bind' body')
+
+liftAlt :: LlStgAlt -> LiftM OutStgAlt
+liftAlt (con, infos, rhs) = withSubstBndrs (map binderInfoBndr infos) $ \bndrs' ->
+  (,,) con bndrs' <$> liftExpr rhs
diff --git a/compiler/GHC/Stg/Lift/Analysis.hs b/compiler/GHC/Stg/Lift/Analysis.hs
new file mode 100644
index 0000000000..02d439cef7
--- /dev/null
+++ b/compiler/GHC/Stg/Lift/Analysis.hs
@@ -0,0 +1,565 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE DataKinds #-}
+
+-- | Provides the heuristics for when it's beneficial to lambda lift bindings.
+-- Most significantly, this employs a cost model to estimate impact on heap
+-- allocations, by looking at an STG expression's 'Skeleton'.
+module GHC.Stg.Lift.Analysis (
+    -- * #when# When to lift
+    -- $when
+
+    -- * #clogro# Estimating closure growth
+    -- $clogro
+
+    -- * AST annotation
+    Skeleton(..), BinderInfo(..), binderInfoBndr,
+    LlStgBinding, LlStgExpr, LlStgRhs, LlStgAlt, tagSkeletonTopBind,
+    -- * Lifting decision
+    goodToLift,
+    closureGrowth -- Exported just for the docs
+  ) where
+
+import GhcPrelude
+
+import BasicTypes
+import Demand
+import DynFlags
+import Id
+import SMRep ( WordOff )
+import GHC.Stg.Syntax
+import qualified GHC.StgToCmm.ArgRep  as StgToCmm.ArgRep
+import qualified GHC.StgToCmm.Closure as StgToCmm.Closure
+import qualified GHC.StgToCmm.Layout  as StgToCmm.Layout
+import Outputable
+import Util
+import VarSet
+
+import Data.Maybe ( mapMaybe )
+
+-- Note [When to lift]
+-- ~~~~~~~~~~~~~~~~~~~
+-- $when
+-- The analysis proceeds in two steps:
+--
+--   1. It tags the syntax tree with analysis information in the form of
+--      'BinderInfo' at each binder and 'Skeleton's at each let-binding
+--      by 'tagSkeletonTopBind' and friends.
+--   2. The resulting syntax tree is treated by the "GHC.Stg.Lift"
+--      module, calling out to 'goodToLift' to decide if a binding is worthwhile
+--      to lift.
+--      'goodToLift' consults argument occurrence information in 'BinderInfo'
+--      and estimates 'closureGrowth', for which it needs the 'Skeleton'.
+--
+-- So the annotations from 'tagSkeletonTopBind' ultimately fuel 'goodToLift',
+-- which employs a number of heuristics to identify and exclude lambda lifting
+-- opportunities deemed non-beneficial:
+--
+--  [Top-level bindings] can't be lifted.
+--  [Thunks] and data constructors shouldn't be lifted in order not to destroy
+--    sharing.
+--  [Argument occurrences] #arg_occs# of binders prohibit them to be lifted.
+--    Doing the lift would re-introduce the very allocation at call sites that
+--    we tried to get rid off in the first place. We capture analysis
+--    information in 'BinderInfo'. Note that we also consider a nullary
+--    application as argument occurrence, because it would turn into an n-ary
+--    partial application created by a generic apply function. This occurs in
+--    CPS-heavy code like the CS benchmark.
+--  [Join points] should not be lifted, simply because there's no reduction in
+--    allocation to be had.
+--  [Abstracting over join points] destroys join points, because they end up as
+--    arguments to the lifted function.
+--  [Abstracting over known local functions] turns a known call into an unknown
+--    call (e.g. some @stg_ap_*@), which is generally slower. Can be turned off
+--    with @-fstg-lift-lams-known@.
+--  [Calling convention] Don't lift when the resulting function would have a
+--    higher arity than available argument registers for the calling convention.
+--    Can be influenced with @-fstg-lift-(non)rec-args(-any)@.
+--  [Closure growth] introduced when former free variables have to be available
+--    at call sites may actually lead to an increase in overall allocations
+--  resulting from a lift. Estimating closure growth is described in
+--  "GHC.Stg.Lift.Analysis#clogro" and is what most of this module is ultimately
+--  concerned with.
+--
+-- There's a <https://gitlab.haskell.org/ghc/ghc/wikis/late-lam-lift wiki page> with
+-- some more background and history.
+
+-- Note [Estimating closure growth]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- $clogro
+-- We estimate closure growth by abstracting the syntax tree into a 'Skeleton',
+-- capturing only syntactic details relevant to 'closureGrowth', such as
+--
+--   * 'ClosureSk', representing closure allocation.
+--   * 'RhsSk', representing a RHS of a binding and how many times it's called
+--     by an appropriate 'DmdShell'.
+--   * 'AltSk', 'BothSk' and 'NilSk' for choice, sequence and empty element.
+--
+-- This abstraction is mostly so that the main analysis function 'closureGrowth'
+-- can stay simple and focused. Also, skeletons tend to be much smaller than
+-- the syntax tree they abstract, so it makes sense to construct them once and
+-- and operate on them instead of the actual syntax tree.
+--
+-- A more detailed treatment of computing closure growth, including examples,
+-- can be found in the paper referenced from the
+-- <https://gitlab.haskell.org/ghc/ghc/wikis/late-lam-lift wiki page>.
+
+llTrace :: String -> SDoc -> a -> a
+llTrace _ _ c = c
+-- llTrace a b c = pprTrace a b c
+
+type instance BinderP      'LiftLams = BinderInfo
+type instance XRhsClosure  'LiftLams = DIdSet
+type instance XLet         'LiftLams = Skeleton
+type instance XLetNoEscape 'LiftLams = Skeleton
+
+freeVarsOfRhs :: (XRhsClosure pass ~ DIdSet) => GenStgRhs pass -> DIdSet
+freeVarsOfRhs (StgRhsCon _ _ args) = mkDVarSet [ id | StgVarArg id <- args ]
+freeVarsOfRhs (StgRhsClosure fvs _ _ _ _) = fvs
+
+-- | Captures details of the syntax tree relevant to the cost model, such as
+-- closures, multi-shot lambdas and case expressions.
+data Skeleton
+  = ClosureSk !Id !DIdSet {- ^ free vars -} !Skeleton
+  | RhsSk !DmdShell {- ^ how often the RHS was entered -} !Skeleton
+  | AltSk !Skeleton !Skeleton
+  | BothSk !Skeleton !Skeleton
+  | NilSk
+
+bothSk :: Skeleton -> Skeleton -> Skeleton
+bothSk NilSk b = b
+bothSk a NilSk = a
+bothSk a b     = BothSk a b
+
+altSk :: Skeleton -> Skeleton -> Skeleton
+altSk NilSk b = b
+altSk a NilSk = a
+altSk a b     = AltSk a b
+
+rhsSk :: DmdShell -> Skeleton -> Skeleton
+rhsSk _        NilSk = NilSk
+rhsSk body_dmd skel  = RhsSk body_dmd skel
+
+-- | The type used in binder positions in 'GenStgExpr's.
+data BinderInfo
+  = BindsClosure !Id !Bool -- ^ Let(-no-escape)-bound thing with a flag
+                           --   indicating whether it occurs as an argument
+                           --   or in a nullary application
+                           --   (see "GHC.Stg.Lift.Analysis#arg_occs").
+  | BoringBinder !Id       -- ^ Every other kind of binder
+
+-- | Gets the bound 'Id' out a 'BinderInfo'.
+binderInfoBndr :: BinderInfo -> Id
+binderInfoBndr (BoringBinder bndr)   = bndr
+binderInfoBndr (BindsClosure bndr _) = bndr
+
+-- | Returns 'Nothing' for 'BoringBinder's and 'Just' the flag indicating
+-- occurrences as argument or in a nullary applications otherwise.
+binderInfoOccursAsArg :: BinderInfo -> Maybe Bool
+binderInfoOccursAsArg BoringBinder{}     = Nothing
+binderInfoOccursAsArg (BindsClosure _ b) = Just b
+
+instance Outputable Skeleton where
+  ppr NilSk = text ""
+  ppr (AltSk l r) = vcat
+    [ text "{ " <+> ppr l
+    , text "ALT"
+    , text "  " <+> ppr r
+    , text "}"
+    ]
+  ppr (BothSk l r) = ppr l $$ ppr r
+  ppr (ClosureSk f fvs body) = ppr f <+> ppr fvs $$ nest 2 (ppr body)
+  ppr (RhsSk body_dmd body) = hcat
+    [ text "λ["
+    , ppr str
+    , text ", "
+    , ppr use
+    , text "]. "
+    , ppr body
+    ]
+    where
+      str
+        | isStrictDmd body_dmd = '1'
+        | otherwise = '0'
+      use
+        | isAbsDmd body_dmd = '0'
+        | isUsedOnce body_dmd = '1'
+        | otherwise = 'ω'
+
+instance Outputable BinderInfo where
+  ppr = ppr . binderInfoBndr
+
+instance OutputableBndr BinderInfo where
+  pprBndr b = pprBndr b . binderInfoBndr
+  pprPrefixOcc = pprPrefixOcc . binderInfoBndr
+  pprInfixOcc = pprInfixOcc . binderInfoBndr
+  bndrIsJoin_maybe = bndrIsJoin_maybe . binderInfoBndr
+
+mkArgOccs :: [StgArg] -> IdSet
+mkArgOccs = mkVarSet . mapMaybe stg_arg_var
+  where
+    stg_arg_var (StgVarArg occ) = Just occ
+    stg_arg_var _               = Nothing
+
+-- | Tags every binder with its 'BinderInfo' and let bindings with their
+-- 'Skeleton's.
+tagSkeletonTopBind :: CgStgBinding -> LlStgBinding
+-- NilSk is OK when tagging top-level bindings. Also, top-level things are never
+-- lambda-lifted, so no need to track their argument occurrences. They can also
+-- never be let-no-escapes (thus we pass False).
+tagSkeletonTopBind bind = bind'
+  where
+    (_, _, _, bind') = tagSkeletonBinding False NilSk emptyVarSet bind
+
+-- | Tags binders of an 'StgExpr' with its 'BinderInfo' and let bindings with
+-- their 'Skeleton's. Additionally, returns its 'Skeleton' and the set of binder
+-- occurrences in argument and nullary application position
+-- (cf. "GHC.Stg.Lift.Analysis#arg_occs").
+tagSkeletonExpr :: CgStgExpr -> (Skeleton, IdSet, LlStgExpr)
+tagSkeletonExpr (StgLit lit)
+  = (NilSk, emptyVarSet, StgLit lit)
+tagSkeletonExpr (StgConApp con args tys)
+  = (NilSk, mkArgOccs args, StgConApp con args tys)
+tagSkeletonExpr (StgOpApp op args ty)
+  = (NilSk, mkArgOccs args, StgOpApp op args ty)
+tagSkeletonExpr (StgApp f args)
+  = (NilSk, arg_occs, StgApp f args)
+  where
+    arg_occs
+      -- This checks for nullary applications, which we treat the same as
+      -- argument occurrences, see "GHC.Stg.Lift.Analysis#arg_occs".
+      | null args = unitVarSet f
+      | otherwise = mkArgOccs args
+tagSkeletonExpr (StgLam _ _) = pprPanic "stgLiftLams" (text "StgLam")
+tagSkeletonExpr (StgCase scrut bndr ty alts)
+  = (skel, arg_occs, StgCase scrut' bndr' ty alts')
+  where
+    (scrut_skel, scrut_arg_occs, scrut') = tagSkeletonExpr scrut
+    (alt_skels, alt_arg_occss, alts') = mapAndUnzip3 tagSkeletonAlt alts
+    skel = bothSk scrut_skel (foldr altSk NilSk alt_skels)
+    arg_occs = unionVarSets (scrut_arg_occs:alt_arg_occss) `delVarSet` bndr
+    bndr' = BoringBinder bndr
+tagSkeletonExpr (StgTick t e)
+  = (skel, arg_occs, StgTick t e')
+  where
+    (skel, arg_occs, e') = tagSkeletonExpr e
+tagSkeletonExpr (StgLet _ bind body) = tagSkeletonLet False body bind
+tagSkeletonExpr (StgLetNoEscape _ bind body) = tagSkeletonLet True body bind
+
+mkLet :: Bool -> Skeleton -> LlStgBinding -> LlStgExpr -> LlStgExpr
+mkLet True = StgLetNoEscape
+mkLet _    = StgLet
+
+tagSkeletonLet
+  :: Bool
+  -- ^ Is the binding a let-no-escape?
+  -> CgStgExpr
+  -- ^ Let body
+  -> CgStgBinding
+  -- ^ Binding group
+  -> (Skeleton, IdSet, LlStgExpr)
+  -- ^ RHS skeletons, argument occurrences and annotated binding
+tagSkeletonLet is_lne body bind
+  = (let_skel, arg_occs, mkLet is_lne scope bind' body')
+  where
+    (body_skel, body_arg_occs, body') = tagSkeletonExpr body
+    (let_skel, arg_occs, scope, bind')
+      = tagSkeletonBinding is_lne body_skel body_arg_occs bind
+
+tagSkeletonBinding
+  :: Bool
+  -- ^ Is the binding a let-no-escape?
+  -> Skeleton
+  -- ^ Let body skeleton
+  -> IdSet
+  -- ^ Argument occurrences in the body
+  -> CgStgBinding
+  -- ^ Binding group
+  -> (Skeleton, IdSet, Skeleton, LlStgBinding)
+  -- ^ Let skeleton, argument occurrences, scope skeleton of binding and
+  --   the annotated binding
+tagSkeletonBinding is_lne body_skel body_arg_occs (StgNonRec bndr rhs)
+  = (let_skel, arg_occs, scope, bind')
+  where
+    (rhs_skel, rhs_arg_occs, rhs') = tagSkeletonRhs bndr rhs
+    arg_occs = (body_arg_occs `unionVarSet` rhs_arg_occs) `delVarSet` bndr
+    bind_skel
+      | is_lne    = rhs_skel -- no closure is allocated for let-no-escapes
+      | otherwise = ClosureSk bndr (freeVarsOfRhs rhs) rhs_skel
+    let_skel = bothSk body_skel bind_skel
+    occurs_as_arg = bndr `elemVarSet` body_arg_occs
+    -- Compared to the recursive case, this exploits the fact that @bndr@ is
+    -- never free in @rhs@.
+    scope = body_skel
+    bind' = StgNonRec (BindsClosure bndr occurs_as_arg) rhs'
+tagSkeletonBinding is_lne body_skel body_arg_occs (StgRec pairs)
+  = (let_skel, arg_occs, scope, StgRec pairs')
+  where
+    (bndrs, _) = unzip pairs
+    -- Local recursive STG bindings also regard the defined binders as free
+    -- vars. We want to delete those for our cost model, as these are known
+    -- calls anyway when we add them to the same top-level recursive group as
+    -- the top-level binding currently being analysed.
+    skel_occs_rhss' = map (uncurry tagSkeletonRhs) pairs
+    rhss_arg_occs = map sndOf3 skel_occs_rhss'
+    scope_occs = unionVarSets (body_arg_occs:rhss_arg_occs)
+    arg_occs = scope_occs `delVarSetList` bndrs
+    -- @skel_rhss@ aren't yet wrapped in closures. We'll do that in a moment,
+    -- but we also need the un-wrapped skeletons for calculating the @scope@
+    -- of the group, as the outer closures don't contribute to closure growth
+    -- when we lift this specific binding.
+    scope = foldr (bothSk . fstOf3) body_skel skel_occs_rhss'
+    -- Now we can build the actual Skeleton for the expression just by
+    -- iterating over each bind pair.
+    (bind_skels, pairs') = unzip (zipWith single_bind bndrs skel_occs_rhss')
+    let_skel = foldr bothSk body_skel bind_skels
+    single_bind bndr (skel_rhs, _, rhs') = (bind_skel, (bndr', rhs'))
+      where
+        -- Here, we finally add the closure around each @skel_rhs@.
+        bind_skel
+          | is_lne    = skel_rhs -- no closure is allocated for let-no-escapes
+          | otherwise = ClosureSk bndr fvs skel_rhs
+        fvs = freeVarsOfRhs rhs' `dVarSetMinusVarSet` mkVarSet bndrs
+        bndr' = BindsClosure bndr (bndr `elemVarSet` scope_occs)
+
+tagSkeletonRhs :: Id -> CgStgRhs -> (Skeleton, IdSet, LlStgRhs)
+tagSkeletonRhs _ (StgRhsCon ccs dc args)
+  = (NilSk, mkArgOccs args, StgRhsCon ccs dc args)
+tagSkeletonRhs bndr (StgRhsClosure fvs ccs upd bndrs body)
+  = (rhs_skel, body_arg_occs, StgRhsClosure fvs ccs upd bndrs' body')
+  where
+    bndrs' = map BoringBinder bndrs
+    (body_skel, body_arg_occs, body') = tagSkeletonExpr body
+    rhs_skel = rhsSk (rhsDmdShell bndr) body_skel
+
+-- | How many times will the lambda body of the RHS bound to the given
+-- identifier be evaluated, relative to its defining context? This function
+-- computes the answer in form of a 'DmdShell'.
+rhsDmdShell :: Id -> DmdShell
+rhsDmdShell bndr
+  | is_thunk = oneifyDmd ds
+  | otherwise = peelManyCalls (idArity bndr) cd
+  where
+    is_thunk = idArity bndr == 0
+    -- Let's pray idDemandInfo is still OK after unarise...
+    (ds, cd) = toCleanDmd (idDemandInfo bndr)
+
+tagSkeletonAlt :: CgStgAlt -> (Skeleton, IdSet, LlStgAlt)
+tagSkeletonAlt (con, bndrs, rhs)
+  = (alt_skel, arg_occs, (con, map BoringBinder bndrs, rhs'))
+  where
+    (alt_skel, alt_arg_occs, rhs') = tagSkeletonExpr rhs
+    arg_occs = alt_arg_occs `delVarSetList` bndrs
+
+-- | Combines several heuristics to decide whether to lambda-lift a given
+-- @let@-binding to top-level. See "GHC.Stg.Lift.Analysis#when" for details.
+goodToLift
+  :: DynFlags
+  -> TopLevelFlag
+  -> RecFlag
+  -> (DIdSet -> DIdSet) -- ^ An expander function, turning 'InId's into
+                        -- 'OutId's. See 'GHC.Stg.Lift.Monad.liftedIdsExpander'.
+  -> [(BinderInfo, LlStgRhs)]
+  -> Skeleton
+  -> Maybe DIdSet       -- ^ @Just abs_ids@ <=> This binding is beneficial to
+                        -- lift and @abs_ids@ are the variables it would
+                        -- abstract over
+goodToLift dflags top_lvl rec_flag expander pairs scope = decide
+  [ ("top-level", isTopLevel top_lvl) -- keep in sync with Note [When to lift]
+  , ("memoized", any_memoized)
+  , ("argument occurrences", arg_occs)
+  , ("join point", is_join_point)
+  , ("abstracts join points", abstracts_join_ids)
+  , ("abstracts known local function", abstracts_known_local_fun)
+  , ("args spill on stack", args_spill_on_stack)
+  , ("increases allocation", inc_allocs)
+  ] where
+      decide deciders
+        | not (fancy_or deciders)
+        = llTrace "stgLiftLams:lifting"
+                  (ppr bndrs <+> ppr abs_ids $$
+                   ppr allocs $$
+                   ppr scope) $
+          Just abs_ids
+        | otherwise
+        = Nothing
+      ppr_deciders = vcat . map (text . fst) . filter snd
+      fancy_or deciders
+        = llTrace "stgLiftLams:goodToLift" (ppr bndrs $$ ppr_deciders deciders) $
+          any snd deciders
+
+      bndrs = map (binderInfoBndr . fst) pairs
+      bndrs_set = mkVarSet bndrs
+      rhss = map snd pairs
+
+      -- First objective: Calculate @abs_ids@, e.g. the former free variables
+      -- the lifted binding would abstract over. We have to merge the free
+      -- variables of all RHS to get the set of variables that will have to be
+      -- passed through parameters.
+      fvs = unionDVarSets (map freeVarsOfRhs rhss)
+      -- To lift the binding to top-level, we want to delete the lifted binders
+      -- themselves from the free var set. Local let bindings track recursive
+      -- occurrences in their free variable set. We neither want to apply our
+      -- cost model to them (see 'tagSkeletonRhs'), nor pass them as parameters
+      -- when lifted, as these are known calls. We call the resulting set the
+      -- identifiers we abstract over, thus @abs_ids@. These are all 'OutId's.
+      -- We will save the set in 'LiftM.e_expansions' for each of the variables
+      -- if we perform the lift.
+      abs_ids = expander (delDVarSetList fvs bndrs)
+
+      -- We don't lift updatable thunks or constructors
+      any_memoized = any is_memoized_rhs rhss
+      is_memoized_rhs StgRhsCon{} = True
+      is_memoized_rhs (StgRhsClosure _ _ upd _ _) = isUpdatable upd
+
+      -- Don't lift binders occurring as arguments. This would result in complex
+      -- argument expressions which would have to be given a name, reintroducing
+      -- the very allocation at each call site that we wanted to get rid off in
+      -- the first place.
+      arg_occs = or (mapMaybe (binderInfoOccursAsArg . fst) pairs)
+
+      -- These don't allocate anyway.
+      is_join_point = any isJoinId bndrs
+
+      -- Abstracting over join points/let-no-escapes spoils them.
+      abstracts_join_ids = any isJoinId (dVarSetElems abs_ids)
+
+      -- Abstracting over known local functions that aren't floated themselves
+      -- turns a known, fast call into an unknown, slow call:
+      --
+      --    let f x = ...
+      --        g y = ... f x ... -- this was a known call
+      --    in g 4
+      --
+      -- After lifting @g@, but not @f@:
+      --
+      --    l_g f y = ... f y ... -- this is now an unknown call
+      --    let f x = ...
+      --    in l_g f 4
+      --
+      -- We can abuse the results of arity analysis for this:
+      -- idArity f > 0 ==> known
+      known_fun id = idArity id > 0
+      abstracts_known_local_fun
+        = not (liftLamsKnown dflags) && any known_fun (dVarSetElems abs_ids)
+
+      -- Number of arguments of a RHS in the current binding group if we decide
+      -- to lift it
+      n_args
+        = length
+        . StgToCmm.Closure.nonVoidIds -- void parameters don't appear in Cmm
+        . (dVarSetElems abs_ids ++)
+        . rhsLambdaBndrs
+      max_n_args
+        | isRec rec_flag = liftLamsRecArgs dflags
+        | otherwise      = liftLamsNonRecArgs dflags
+      -- We have 5 hardware registers on x86_64 to pass arguments in. Any excess
+      -- args are passed on the stack, which means slow memory accesses
+      args_spill_on_stack
+        | Just n <- max_n_args = maximum (map n_args rhss) > n
+        | otherwise = False
+
+      -- We only perform the lift if allocations didn't increase.
+      -- Note that @clo_growth@ will be 'infinity' if there was positive growth
+      -- under a multi-shot lambda.
+      -- Also, abstracting over LNEs is unacceptable. LNEs might return
+      -- unlifted tuples, which idClosureFootprint can't cope with.
+      inc_allocs = abstracts_join_ids || allocs > 0
+      allocs = clo_growth + mkIntWithInf (negate closuresSize)
+      -- We calculate and then add up the size of each binding's closure.
+      -- GHC does not currently share closure environments, and we either lift
+      -- the entire recursive binding group or none of it.
+      closuresSize = sum $ flip map rhss $ \rhs ->
+        closureSize dflags
+        . dVarSetElems
+        . expander
+        . flip dVarSetMinusVarSet bndrs_set
+        $ freeVarsOfRhs rhs
+      clo_growth = closureGrowth expander (idClosureFootprint dflags) bndrs_set abs_ids scope
+
+rhsLambdaBndrs :: LlStgRhs -> [Id]
+rhsLambdaBndrs StgRhsCon{} = []
+rhsLambdaBndrs (StgRhsClosure _ _ _ bndrs _) = map binderInfoBndr bndrs
+
+-- | The size in words of a function closure closing over the given 'Id's,
+-- including the header.
+closureSize :: DynFlags -> [Id] -> WordOff
+closureSize dflags ids = words + sTD_HDR_SIZE dflags
+  -- We go through sTD_HDR_SIZE rather than fixedHdrSizeW so that we don't
+  -- optimise differently when profiling is enabled.
+  where
+    (words, _, _)
+      -- Functions have a StdHeader (as opposed to ThunkHeader).
+      = StgToCmm.Layout.mkVirtHeapOffsets dflags StgToCmm.Layout.StdHeader
+      . StgToCmm.Closure.addIdReps
+      . StgToCmm.Closure.nonVoidIds
+      $ ids
+
+-- | The number of words a single 'Id' adds to a closure's size.
+-- Note that this can't handle unboxed tuples (which may still be present in
+-- let-no-escapes, even after Unarise), in which case
+-- @'GHC.StgToCmm.Closure.idPrimRep'@ will crash.
+idClosureFootprint:: DynFlags -> Id -> WordOff
+idClosureFootprint dflags
+  = StgToCmm.ArgRep.argRepSizeW dflags
+  . StgToCmm.ArgRep.idArgRep
+
+-- | @closureGrowth expander sizer f fvs@ computes the closure growth in words
+-- as a result of lifting @f@ to top-level. If there was any growing closure
+-- under a multi-shot lambda, the result will be 'infinity'.
+-- Also see "GHC.Stg.Lift.Analysis#clogro".
+closureGrowth
+  :: (DIdSet -> DIdSet)
+  -- ^ Expands outer free ids that were lifted to their free vars
+  -> (Id -> Int)
+  -- ^ Computes the closure footprint of an identifier
+  -> IdSet
+  -- ^ Binding group for which lifting is to be decided
+  -> DIdSet
+  -- ^ Free vars of the whole binding group prior to lifting it. These must be
+  --   available at call sites if we decide to lift the binding group.
+  -> Skeleton
+  -- ^ Abstraction of the scope of the function
+  -> IntWithInf
+  -- ^ Closure growth. 'infinity' indicates there was growth under a
+  --   (multi-shot) lambda.
+closureGrowth expander sizer group abs_ids = go
+  where
+    go NilSk = 0
+    go (BothSk a b) = go a + go b
+    go (AltSk a b) = max (go a) (go b)
+    go (ClosureSk _ clo_fvs rhs)
+      -- If no binder of the @group@ occurs free in the closure, the lifting
+      -- won't have any effect on it and we can omit the recursive call.
+      | n_occs == 0 = 0
+      -- Otherwise, we account the cost of allocating the closure and add it to
+      -- the closure growth of its RHS.
+      | otherwise   = mkIntWithInf cost + go rhs
+      where
+        n_occs = sizeDVarSet (clo_fvs' `dVarSetIntersectVarSet` group)
+        -- What we close over considering prior lifting decisions
+        clo_fvs' = expander clo_fvs
+        -- Variables that would additionally occur free in the closure body if
+        -- we lift @f@
+        newbies = abs_ids `minusDVarSet` clo_fvs'
+        -- Lifting @f@ removes @f@ from the closure but adds all @newbies@
+        cost = foldDVarSet (\id size -> sizer id + size) 0 newbies - n_occs
+    go (RhsSk body_dmd body)
+      -- The conservative assumption would be that
+      --   1. Every RHS with positive growth would be called multiple times,
+      --      modulo thunks.
+      --   2. Every RHS with negative growth wouldn't be called at all.
+      --
+      -- In the first case, we'd have to return 'infinity', while in the
+      -- second case, we'd have to return 0. But we can do far better
+      -- considering information from the demand analyser, which provides us
+      -- with conservative estimates on minimum and maximum evaluation
+      -- cardinality. The @body_dmd@ part of 'RhsSk' is the result of
+      -- 'rhsDmdShell' and accurately captures the cardinality of the RHSs body
+      -- relative to its defining context.
+      | isAbsDmd body_dmd   = 0
+      | cg <= 0             = if isStrictDmd body_dmd then cg else 0
+      | isUsedOnce body_dmd = cg
+      | otherwise           = infinity
+      where
+        cg = go body
diff --git a/compiler/GHC/Stg/Lift/Monad.hs b/compiler/GHC/Stg/Lift/Monad.hs
new file mode 100644
index 0000000000..7d17e53cd9
--- /dev/null
+++ b/compiler/GHC/Stg/Lift/Monad.hs
@@ -0,0 +1,348 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE TypeFamilies #-}
+
+-- | Hides away distracting bookkeeping while lambda lifting into a 'LiftM'
+-- monad.
+module GHC.Stg.Lift.Monad (
+    decomposeStgBinding, mkStgBinding,
+    Env (..),
+    -- * #floats# Handling floats
+    -- $floats
+    FloatLang (..), collectFloats, -- Exported just for the docs
+    -- * Transformation monad
+    LiftM, runLiftM, withCaffyness,
+    -- ** Adding bindings
+    startBindingGroup, endBindingGroup, addTopStringLit, addLiftedBinding,
+    -- ** Substitution and binders
+    withSubstBndr, withSubstBndrs, withLiftedBndr, withLiftedBndrs,
+    -- ** Occurrences
+    substOcc, isLifted, formerFreeVars, liftedIdsExpander
+  ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import BasicTypes
+import CostCentre ( isCurrentCCS, dontCareCCS )
+import DynFlags
+import FastString
+import Id
+import IdInfo
+import Name
+import Outputable
+import OrdList
+import GHC.Stg.Subst
+import GHC.Stg.Syntax
+import Type
+import UniqSupply
+import Util
+import VarEnv
+import VarSet
+
+import Control.Arrow ( second )
+import Control.Monad.Trans.Class
+import Control.Monad.Trans.RWS.Strict ( RWST, runRWST )
+import qualified Control.Monad.Trans.RWS.Strict as RWS
+import Control.Monad.Trans.Cont ( ContT (..) )
+import Data.ByteString ( ByteString )
+
+-- | @uncurry 'mkStgBinding' . 'decomposeStgBinding' = id@
+decomposeStgBinding :: GenStgBinding pass -> (RecFlag, [(BinderP pass, GenStgRhs pass)])
+decomposeStgBinding (StgRec pairs) = (Recursive, pairs)
+decomposeStgBinding (StgNonRec bndr rhs) = (NonRecursive, [(bndr, rhs)])
+
+mkStgBinding :: RecFlag -> [(BinderP pass, GenStgRhs pass)] -> GenStgBinding pass
+mkStgBinding Recursive = StgRec
+mkStgBinding NonRecursive = uncurry StgNonRec . head
+
+-- | Environment threaded around in a scoped, @Reader@-like fashion.
+data Env
+  = Env
+  { e_dflags     :: !DynFlags
+  -- ^ Read-only.
+  , e_subst      :: !Subst
+  -- ^ We need to track the renamings of local 'InId's to their lifted 'OutId',
+  -- because shadowing might make a closure's free variables unavailable at its
+  -- call sites. Consider:
+  -- @
+  --    let f y = x + y in let x = 4 in f x
+  -- @
+  -- Here, @f@ can't be lifted to top-level, because its free variable @x@ isn't
+  -- available at its call site.
+  , e_expansions :: !(IdEnv DIdSet)
+  -- ^ Lifted 'Id's don't occur as free variables in any closure anymore, because
+  -- they are bound at the top-level. Every occurrence must supply the formerly
+  -- free variables of the lifted 'Id', so they in turn become free variables of
+  -- the call sites. This environment tracks this expansion from lifted 'Id's to
+  -- their free variables.
+  --
+  -- 'InId's to 'OutId's.
+  --
+  -- Invariant: 'Id's not present in this map won't be substituted.
+  , e_in_caffy_context :: !Bool
+  -- ^ Are we currently analysing within a caffy context (e.g. the containing
+  -- top-level binder's 'idCafInfo' is 'MayHaveCafRefs')? If not, we can safely
+  -- assume that functions we lift out aren't caffy either.
+  }
+
+emptyEnv :: DynFlags -> Env
+emptyEnv dflags = Env dflags emptySubst emptyVarEnv False
+
+
+-- Note [Handling floats]
+-- ~~~~~~~~~~~~~~~~~~~~~~
+-- $floats
+-- Consider the following expression:
+--
+-- @
+--     f x =
+--       let g y = ... f y ...
+--       in g x
+-- @
+--
+-- What happens when we want to lift @g@? Normally, we'd put the lifted @l_g@
+-- binding above the binding for @f@:
+--
+-- @
+--     g f y = ... f y ...
+--     f x = g f x
+-- @
+--
+-- But this very unnecessarily turns a known call to @f@ into an unknown one, in
+-- addition to complicating matters for the analysis.
+-- Instead, we'd really like to put both functions in the same recursive group,
+-- thereby preserving the known call:
+--
+-- @
+--     Rec {
+--       g y = ... f y ...
+--       f x = g x
+--     }
+-- @
+--
+-- But we don't want this to happen for just /any/ binding. That would create
+-- possibly huge recursive groups in the process, calling for an occurrence
+-- analyser on STG.
+-- So, we need to track when we lift a binding out of a recursive RHS and add
+-- the binding to the same recursive group as the enclosing recursive binding
+-- (which must have either already been at the top-level or decided to be
+-- lifted itself in order to preserve the known call).
+--
+-- This is done by expressing this kind of nesting structure as a 'Writer' over
+-- @['FloatLang']@ and flattening this expression in 'runLiftM' by a call to
+-- 'collectFloats'.
+-- API-wise, the analysis will not need to know about the whole 'FloatLang'
+-- business and will just manipulate it indirectly through actions in 'LiftM'.
+
+-- | We need to detect when we are lifting something out of the RHS of a
+-- recursive binding (c.f. "GHC.Stg.Lift.Monad#floats"), in which case that
+-- binding needs to be added to the same top-level recursive group. This
+-- requires we detect a certain nesting structure, which is encoded by
+-- 'StartBindingGroup' and 'EndBindingGroup'.
+--
+-- Although 'collectFloats' will only ever care if the current binding to be
+-- lifted (through 'LiftedBinding') will occur inside such a binding group or
+-- not, e.g. doesn't care about the nesting level as long as its greater than 0.
+data FloatLang
+  = StartBindingGroup
+  | EndBindingGroup
+  | PlainTopBinding OutStgTopBinding
+  | LiftedBinding OutStgBinding
+
+instance Outputable FloatLang where
+  ppr StartBindingGroup = char '('
+  ppr EndBindingGroup = char ')'
+  ppr (PlainTopBinding StgTopStringLit{}) = text "<str>"
+  ppr (PlainTopBinding (StgTopLifted b)) = ppr (LiftedBinding b)
+  ppr (LiftedBinding bind) = (if isRec rec then char 'r' else char 'n') <+> ppr (map fst pairs)
+    where
+      (rec, pairs) = decomposeStgBinding bind
+
+-- | Flattens an expression in @['FloatLang']@ into an STG program, see #floats.
+-- Important pre-conditions: The nesting of opening 'StartBindinGroup's and
+-- closing 'EndBindinGroup's is balanced. Also, it is crucial that every binding
+-- group has at least one recursive binding inside. Otherwise there's no point
+-- in announcing the binding group in the first place and an @ASSERT@ will
+-- trigger.
+collectFloats :: [FloatLang] -> [OutStgTopBinding]
+collectFloats = go (0 :: Int) []
+  where
+    go 0 [] [] = []
+    go _ _ [] = pprPanic "collectFloats" (text "unterminated group")
+    go n binds (f:rest) = case f of
+      StartBindingGroup -> go (n+1) binds rest
+      EndBindingGroup
+        | n == 0 -> pprPanic "collectFloats" (text "no group to end")
+        | n == 1 -> StgTopLifted (merge_binds binds) : go 0 [] rest
+        | otherwise -> go (n-1) binds rest
+      PlainTopBinding top_bind
+        | n == 0 -> top_bind : go n binds rest
+        | otherwise -> pprPanic "collectFloats" (text "plain top binding inside group")
+      LiftedBinding bind
+        | n == 0 -> StgTopLifted (rm_cccs bind) : go n binds rest
+        | otherwise -> go n (bind:binds) rest
+
+    map_rhss f = uncurry mkStgBinding . second (map (second f)) . decomposeStgBinding
+    rm_cccs = map_rhss removeRhsCCCS
+    merge_binds binds = ASSERT( any is_rec binds )
+                        StgRec (concatMap (snd . decomposeStgBinding . rm_cccs) binds)
+    is_rec StgRec{} = True
+    is_rec _ = False
+
+-- | Omitting this makes for strange closure allocation schemes that crash the
+-- GC.
+removeRhsCCCS :: GenStgRhs pass -> GenStgRhs pass
+removeRhsCCCS (StgRhsClosure ext ccs upd bndrs body)
+  | isCurrentCCS ccs
+  = StgRhsClosure ext dontCareCCS upd bndrs body
+removeRhsCCCS (StgRhsCon ccs con args)
+  | isCurrentCCS ccs
+  = StgRhsCon dontCareCCS con args
+removeRhsCCCS rhs = rhs
+
+-- | The analysis monad consists of the following 'RWST' components:
+--
+--     * 'Env': Reader-like context. Contains a substitution, info about how
+--       how lifted identifiers are to be expanded into applications and details
+--       such as 'DynFlags' and a flag helping with determining if a lifted
+--       binding is caffy.
+--
+--     * @'OrdList' 'FloatLang'@: Writer output for the resulting STG program.
+--
+--     * No pure state component
+--
+--     * But wrapping around 'UniqSM' for generating fresh lifted binders.
+--       (The @uniqAway@ approach could give the same name to two different
+--       lifted binders, so this is necessary.)
+newtype LiftM a
+  = LiftM { unwrapLiftM :: RWST Env (OrdList FloatLang) () UniqSM a }
+  deriving (Functor, Applicative, Monad)
+
+instance HasDynFlags LiftM where
+  getDynFlags = LiftM (RWS.asks e_dflags)
+
+instance MonadUnique LiftM where
+  getUniqueSupplyM = LiftM (lift getUniqueSupplyM)
+  getUniqueM = LiftM (lift getUniqueM)
+  getUniquesM = LiftM (lift getUniquesM)
+
+runLiftM :: DynFlags -> UniqSupply -> LiftM () -> [OutStgTopBinding]
+runLiftM dflags us (LiftM m) = collectFloats (fromOL floats)
+  where
+    (_, _, floats) = initUs_ us (runRWST m (emptyEnv dflags) ())
+
+-- | Assumes a given caffyness for the execution of the passed action, which
+-- influences the 'cafInfo' of lifted bindings.
+withCaffyness :: Bool -> LiftM a -> LiftM a
+withCaffyness caffy action
+  = LiftM (RWS.local (\e -> e { e_in_caffy_context = caffy }) (unwrapLiftM action))
+
+-- | Writes a plain 'StgTopStringLit' to the output.
+addTopStringLit :: OutId -> ByteString -> LiftM ()
+addTopStringLit id = LiftM . RWS.tell . unitOL . PlainTopBinding . StgTopStringLit id
+
+-- | Starts a recursive binding group. See #floats# and 'collectFloats'.
+startBindingGroup :: LiftM ()
+startBindingGroup = LiftM $ RWS.tell $ unitOL $ StartBindingGroup
+
+-- | Ends a recursive binding group. See #floats# and 'collectFloats'.
+endBindingGroup :: LiftM ()
+endBindingGroup = LiftM $ RWS.tell $ unitOL $ EndBindingGroup
+
+-- | Lifts a binding to top-level. Depending on whether it's declared inside
+-- a recursive RHS (see #floats# and 'collectFloats'), this might be added to
+-- an existing recursive top-level binding group.
+addLiftedBinding :: OutStgBinding -> LiftM ()
+addLiftedBinding = LiftM . RWS.tell . unitOL . LiftedBinding
+
+-- | Takes a binder and a continuation which is called with the substituted
+-- binder. The continuation will be evaluated in a 'LiftM' context in which that
+-- binder is deemed in scope. Think of it as a 'RWS.local' computation: After
+-- the continuation finishes, the new binding won't be in scope anymore.
+withSubstBndr :: Id -> (Id -> LiftM a) -> LiftM a
+withSubstBndr bndr inner = LiftM $ do
+  subst <- RWS.asks e_subst
+  let (bndr', subst') = substBndr bndr subst
+  RWS.local (\e -> e { e_subst = subst' }) (unwrapLiftM (inner bndr'))
+
+-- | See 'withSubstBndr'.
+withSubstBndrs :: Traversable f => f Id -> (f Id -> LiftM a) -> LiftM a
+withSubstBndrs = runContT . traverse (ContT . withSubstBndr)
+
+-- | Similarly to 'withSubstBndr', this function takes a set of variables to
+-- abstract over, the binder to lift (and generate a fresh, substituted name
+-- for) and a continuation in which that fresh, lifted binder is in scope.
+--
+-- It takes care of all the details involved with copying and adjusting the
+-- binder, fresh name generation and caffyness.
+withLiftedBndr :: DIdSet -> Id -> (Id -> LiftM a) -> LiftM a
+withLiftedBndr abs_ids bndr inner = do
+  uniq <- getUniqueM
+  let str = "$l" ++ occNameString (getOccName bndr)
+  let ty = mkLamTypes (dVarSetElems abs_ids) (idType bndr)
+  -- When the enclosing top-level binding is not caffy, then the lifted
+  -- binding will not be caffy either. If we don't recognize this, non-caffy
+  -- things call caffy things and then codegen screws up.
+  in_caffy_ctxt <- LiftM (RWS.asks e_in_caffy_context)
+  let caf_info = if in_caffy_ctxt then MayHaveCafRefs else NoCafRefs
+  let bndr'
+        -- See Note [transferPolyIdInfo] in Id.hs. We need to do this at least
+        -- for arity information.
+        = transferPolyIdInfo bndr (dVarSetElems abs_ids)
+        -- Otherwise we confuse code gen if bndr was not caffy: the new bndr is
+        -- assumed to be caffy and will need an SRT. Transitive call sites might
+        -- not be caffy themselves and subsequently will miss a static link
+        -- field in their closure. Chaos ensues.
+        . flip setIdCafInfo caf_info
+        . mkSysLocal (mkFastString str) uniq
+        $ ty
+  LiftM $ RWS.local
+    (\e -> e
+      { e_subst = extendSubst bndr bndr' $ extendInScope bndr' $ e_subst e
+      , e_expansions = extendVarEnv (e_expansions e) bndr abs_ids
+      })
+    (unwrapLiftM (inner bndr'))
+
+-- | See 'withLiftedBndr'.
+withLiftedBndrs :: Traversable f => DIdSet -> f Id -> (f Id -> LiftM a) -> LiftM a
+withLiftedBndrs abs_ids = runContT . traverse (ContT . withLiftedBndr abs_ids)
+
+-- | Substitutes a binder /occurrence/, which was brought in scope earlier by
+-- 'withSubstBndr'\/'withLiftedBndr'.
+substOcc :: Id -> LiftM Id
+substOcc id = LiftM (RWS.asks (lookupIdSubst id . e_subst))
+
+-- | Whether the given binding was decided to be lambda lifted.
+isLifted :: InId -> LiftM Bool
+isLifted bndr = LiftM (RWS.asks (elemVarEnv bndr . e_expansions))
+
+-- | Returns an empty list for a binding that was not lifted and the list of all
+-- local variables the binding abstracts over (so, exactly the additional
+-- arguments at adjusted call sites) otherwise.
+formerFreeVars :: InId -> LiftM [OutId]
+formerFreeVars f = LiftM $ do
+  expansions <- RWS.asks e_expansions
+  pure $ case lookupVarEnv expansions f of
+    Nothing -> []
+    Just fvs -> dVarSetElems fvs
+
+-- | Creates an /expander function/ for the current set of lifted binders.
+-- This expander function will replace any 'InId' by their corresponding 'OutId'
+-- and, in addition, will expand any lifted binders by the former free variables
+-- it abstracts over.
+liftedIdsExpander :: LiftM (DIdSet -> DIdSet)
+liftedIdsExpander = LiftM $ do
+  expansions <- RWS.asks e_expansions
+  subst <- RWS.asks e_subst
+  -- We use @noWarnLookupIdSubst@ here in order to suppress "not in scope"
+  -- warnings generated by 'lookupIdSubst' due to local bindings within RHS.
+  -- These are not in the InScopeSet of @subst@ and extending the InScopeSet in
+  -- @goodToLift@/@closureGrowth@ before passing it on to @expander@ is too much
+  -- trouble.
+  let go set fv = case lookupVarEnv expansions fv of
+        Nothing -> extendDVarSet set (noWarnLookupIdSubst fv subst) -- Not lifted
+        Just fvs' -> unionDVarSet set fvs'
+  let expander fvs = foldl' go emptyDVarSet (dVarSetElems fvs)
+  pure expander
diff --git a/compiler/GHC/Stg/Lint.hs b/compiler/GHC/Stg/Lint.hs
new file mode 100644
index 0000000000..e7044a89e0
--- /dev/null
+++ b/compiler/GHC/Stg/Lint.hs
@@ -0,0 +1,396 @@
+{- |
+(c) The GRASP/AQUA Project, Glasgow University, 1993-1998
+
+A lint pass to check basic STG invariants:
+
+- Variables should be defined before used.
+
+- Let bindings should not have unboxed types (unboxed bindings should only
+  appear in case), except when they're join points (see Note [CoreSyn let/app
+  invariant] and #14117).
+
+- If linting after unarisation, invariants listed in Note [Post-unarisation
+  invariants].
+
+Because we don't have types and coercions in STG we can't really check types
+here.
+
+Some history:
+
+StgLint used to check types, but it never worked and so it was disabled in 2000
+with this note:
+
+    WARNING:
+    ~~~~~~~~
+
+    This module has suffered bit-rot; it is likely to yield lint errors
+    for Stg code that is currently perfectly acceptable for code
+    generation.  Solution: don't use it!  (KSW 2000-05).
+
+Since then there were some attempts at enabling it again, as summarised in
+#14787. It's finally decided that we remove all type checking and only look for
+basic properties listed above.
+-}
+
+{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, TypeFamilies,
+  DeriveFunctor #-}
+
+module GHC.Stg.Lint ( lintStgTopBindings ) where
+
+import GhcPrelude
+
+import GHC.Stg.Syntax
+
+import DynFlags
+import Bag              ( Bag, emptyBag, isEmptyBag, snocBag, bagToList )
+import BasicTypes       ( TopLevelFlag(..), isTopLevel )
+import CostCentre       ( isCurrentCCS )
+import Id               ( Id, idType, isJoinId, idName )
+import VarSet
+import DataCon
+import CoreSyn          ( AltCon(..) )
+import Name             ( getSrcLoc, nameIsLocalOrFrom )
+import ErrUtils         ( MsgDoc, Severity(..), mkLocMessage )
+import Type
+import GHC.Types.RepType
+import SrcLoc
+import Outputable
+import Module           ( Module )
+import qualified ErrUtils as Err
+import Control.Applicative ((<|>))
+import Control.Monad
+
+lintStgTopBindings :: forall a . (OutputablePass a, BinderP a ~ Id)
+                   => DynFlags
+                   -> Module -- ^ module being compiled
+                   -> Bool   -- ^ have we run Unarise yet?
+                   -> String -- ^ who produced the STG?
+                   -> [GenStgTopBinding a]
+                   -> IO ()
+
+lintStgTopBindings dflags this_mod unarised whodunnit binds
+  = {-# SCC "StgLint" #-}
+    case initL this_mod unarised top_level_binds (lint_binds binds) of
+      Nothing  ->
+        return ()
+      Just msg -> do
+        putLogMsg dflags NoReason Err.SevDump noSrcSpan
+          (defaultDumpStyle dflags)
+          (vcat [ text "*** Stg Lint ErrMsgs: in" <+>
+                        text whodunnit <+> text "***",
+                  msg,
+                  text "*** Offending Program ***",
+                  pprGenStgTopBindings binds,
+                  text "*** End of Offense ***"])
+        Err.ghcExit dflags 1
+  where
+    -- Bring all top-level binds into scope because CoreToStg does not generate
+    -- bindings in dependency order (so we may see a use before its definition).
+    top_level_binds = mkVarSet (bindersOfTopBinds binds)
+
+    lint_binds :: [GenStgTopBinding a] -> LintM ()
+
+    lint_binds [] = return ()
+    lint_binds (bind:binds) = do
+        binders <- lint_bind bind
+        addInScopeVars binders $
+            lint_binds binds
+
+    lint_bind (StgTopLifted bind) = lintStgBinds TopLevel bind
+    lint_bind (StgTopStringLit v _) = return [v]
+
+lintStgArg :: StgArg -> LintM ()
+lintStgArg (StgLitArg _) = return ()
+lintStgArg (StgVarArg v) = lintStgVar v
+
+lintStgVar :: Id -> LintM ()
+lintStgVar id = checkInScope id
+
+lintStgBinds
+    :: (OutputablePass a, BinderP a ~ Id)
+    => TopLevelFlag -> GenStgBinding a -> LintM [Id] -- Returns the binders
+lintStgBinds top_lvl (StgNonRec binder rhs) = do
+    lint_binds_help top_lvl (binder,rhs)
+    return [binder]
+
+lintStgBinds top_lvl (StgRec pairs)
+  = addInScopeVars binders $ do
+        mapM_ (lint_binds_help top_lvl) pairs
+        return binders
+  where
+    binders = [b | (b,_) <- pairs]
+
+lint_binds_help
+    :: (OutputablePass a, BinderP a ~ Id)
+    => TopLevelFlag
+    -> (Id, GenStgRhs a)
+    -> LintM ()
+lint_binds_help top_lvl (binder, rhs)
+  = addLoc (RhsOf binder) $ do
+        when (isTopLevel top_lvl) (checkNoCurrentCCS rhs)
+        lintStgRhs rhs
+        -- Check binder doesn't have unlifted type or it's a join point
+        checkL (isJoinId binder || not (isUnliftedType (idType binder)))
+               (mkUnliftedTyMsg binder rhs)
+
+-- | Top-level bindings can't inherit the cost centre stack from their
+-- (static) allocation site.
+checkNoCurrentCCS
+    :: (OutputablePass a, BinderP a ~ Id)
+    => GenStgRhs a
+    -> LintM ()
+checkNoCurrentCCS rhs@(StgRhsClosure _ ccs _ _ _)
+  | isCurrentCCS ccs
+  = addErrL (text "Top-level StgRhsClosure with CurrentCCS" $$ ppr rhs)
+checkNoCurrentCCS rhs@(StgRhsCon ccs _ _)
+  | isCurrentCCS ccs
+  = addErrL (text "Top-level StgRhsCon with CurrentCCS" $$ ppr rhs)
+checkNoCurrentCCS _
+  = return ()
+
+lintStgRhs :: (OutputablePass a, BinderP a ~ Id) => GenStgRhs a -> LintM ()
+
+lintStgRhs (StgRhsClosure _ _ _ [] expr)
+  = lintStgExpr expr
+
+lintStgRhs (StgRhsClosure _ _ _ binders expr)
+  = addLoc (LambdaBodyOf binders) $
+      addInScopeVars binders $
+        lintStgExpr expr
+
+lintStgRhs rhs@(StgRhsCon _ con args) = do
+    when (isUnboxedTupleCon con || isUnboxedSumCon con) $
+      addErrL (text "StgRhsCon is an unboxed tuple or sum application" $$
+               ppr rhs)
+    mapM_ lintStgArg args
+    mapM_ checkPostUnariseConArg args
+
+lintStgExpr :: (OutputablePass a, BinderP a ~ Id) => GenStgExpr a -> LintM ()
+
+lintStgExpr (StgLit _) = return ()
+
+lintStgExpr (StgApp fun args) = do
+    lintStgVar fun
+    mapM_ lintStgArg args
+
+lintStgExpr app@(StgConApp con args _arg_tys) = do
+    -- unboxed sums should vanish during unarise
+    lf <- getLintFlags
+    when (lf_unarised lf && isUnboxedSumCon con) $
+      addErrL (text "Unboxed sum after unarise:" $$
+               ppr app)
+    mapM_ lintStgArg args
+    mapM_ checkPostUnariseConArg args
+
+lintStgExpr (StgOpApp _ args _) =
+    mapM_ lintStgArg args
+
+lintStgExpr lam@(StgLam _ _) =
+    addErrL (text "Unexpected StgLam" <+> ppr lam)
+
+lintStgExpr (StgLet _ binds body) = do
+    binders <- lintStgBinds NotTopLevel binds
+    addLoc (BodyOfLetRec binders) $
+      addInScopeVars binders $
+        lintStgExpr body
+
+lintStgExpr (StgLetNoEscape _ binds body) = do
+    binders <- lintStgBinds NotTopLevel binds
+    addLoc (BodyOfLetRec binders) $
+      addInScopeVars binders $
+        lintStgExpr body
+
+lintStgExpr (StgTick _ expr) = lintStgExpr expr
+
+lintStgExpr (StgCase scrut bndr alts_type alts) = do
+    lintStgExpr scrut
+
+    lf <- getLintFlags
+    let in_scope = stgCaseBndrInScope alts_type (lf_unarised lf)
+
+    addInScopeVars [bndr | in_scope] (mapM_ lintAlt alts)
+
+lintAlt
+    :: (OutputablePass a, BinderP a ~ Id)
+    => (AltCon, [Id], GenStgExpr a) -> LintM ()
+
+lintAlt (DEFAULT, _, rhs) =
+    lintStgExpr rhs
+
+lintAlt (LitAlt _, _, rhs) =
+    lintStgExpr rhs
+
+lintAlt (DataAlt _, bndrs, rhs) = do
+    mapM_ checkPostUnariseBndr bndrs
+    addInScopeVars bndrs (lintStgExpr rhs)
+
+{-
+************************************************************************
+*                                                                      *
+Utilities
+*                                                                      *
+************************************************************************
+-}
+
+bindersOf :: BinderP a ~ Id => GenStgBinding a -> [Id]
+bindersOf (StgNonRec binder _) = [binder]
+bindersOf (StgRec pairs)       = [binder | (binder, _) <- pairs]
+
+bindersOfTop :: BinderP a ~ Id => GenStgTopBinding a -> [Id]
+bindersOfTop (StgTopLifted bind) = bindersOf bind
+bindersOfTop (StgTopStringLit binder _) = [binder]
+
+bindersOfTopBinds :: BinderP a ~ Id => [GenStgTopBinding a] -> [Id]
+bindersOfTopBinds = foldr ((++) . bindersOfTop) []
+
+{-
+************************************************************************
+*                                                                      *
+The Lint monad
+*                                                                      *
+************************************************************************
+-}
+
+newtype LintM a = LintM
+    { unLintM :: Module
+              -> LintFlags
+              -> [LintLocInfo]     -- Locations
+              -> IdSet             -- Local vars in scope
+              -> Bag MsgDoc        -- Error messages so far
+              -> (a, Bag MsgDoc)   -- Result and error messages (if any)
+    }
+    deriving (Functor)
+
+data LintFlags = LintFlags { lf_unarised :: !Bool
+                             -- ^ have we run the unariser yet?
+                           }
+
+data LintLocInfo
+  = RhsOf Id            -- The variable bound
+  | LambdaBodyOf [Id]   -- The lambda-binder
+  | BodyOfLetRec [Id]   -- One of the binders
+
+dumpLoc :: LintLocInfo -> (SrcSpan, SDoc)
+dumpLoc (RhsOf v) =
+  (srcLocSpan (getSrcLoc v), text " [RHS of " <> pp_binders [v] <> char ']' )
+dumpLoc (LambdaBodyOf bs) =
+  (srcLocSpan (getSrcLoc (head bs)), text " [in body of lambda with binders " <> pp_binders bs <> char ']' )
+
+dumpLoc (BodyOfLetRec bs) =
+  (srcLocSpan (getSrcLoc (head bs)), text " [in body of letrec with binders " <> pp_binders bs <> char ']' )
+
+
+pp_binders :: [Id] -> SDoc
+pp_binders bs
+  = sep (punctuate comma (map pp_binder bs))
+  where
+    pp_binder b
+      = hsep [ppr b, dcolon, ppr (idType b)]
+
+initL :: Module -> Bool -> IdSet -> LintM a -> Maybe MsgDoc
+initL this_mod unarised locals (LintM m) = do
+  let (_, errs) = m this_mod (LintFlags unarised) [] locals emptyBag
+  if isEmptyBag errs then
+      Nothing
+  else
+      Just (vcat (punctuate blankLine (bagToList errs)))
+
+instance Applicative LintM where
+      pure a = LintM $ \_mod _lf _loc _scope errs -> (a, errs)
+      (<*>) = ap
+      (*>)  = thenL_
+
+instance Monad LintM where
+    (>>=) = thenL
+    (>>)  = (*>)
+
+thenL :: LintM a -> (a -> LintM b) -> LintM b
+thenL m k = LintM $ \mod lf loc scope errs
+  -> case unLintM m mod lf loc scope errs of
+      (r, errs') -> unLintM (k r) mod lf loc scope errs'
+
+thenL_ :: LintM a -> LintM b -> LintM b
+thenL_ m k = LintM $ \mod lf loc scope errs
+  -> case unLintM m mod lf loc scope errs of
+      (_, errs') -> unLintM k mod lf loc scope errs'
+
+checkL :: Bool -> MsgDoc -> LintM ()
+checkL True  _   = return ()
+checkL False msg = addErrL msg
+
+-- Case alts shouldn't have unboxed sum, unboxed tuple, or void binders.
+checkPostUnariseBndr :: Id -> LintM ()
+checkPostUnariseBndr bndr = do
+    lf <- getLintFlags
+    when (lf_unarised lf) $
+      forM_ (checkPostUnariseId bndr) $ \unexpected ->
+        addErrL $
+          text "After unarisation, binder " <>
+          ppr bndr <> text " has " <> text unexpected <> text " type " <>
+          ppr (idType bndr)
+
+-- Arguments shouldn't have sum, tuple, or void types.
+checkPostUnariseConArg :: StgArg -> LintM ()
+checkPostUnariseConArg arg = case arg of
+    StgLitArg _ ->
+      return ()
+    StgVarArg id -> do
+      lf <- getLintFlags
+      when (lf_unarised lf) $
+        forM_ (checkPostUnariseId id) $ \unexpected ->
+          addErrL $
+            text "After unarisation, arg " <>
+            ppr id <> text " has " <> text unexpected <> text " type " <>
+            ppr (idType id)
+
+-- Post-unarisation args and case alt binders should not have unboxed tuple,
+-- unboxed sum, or void types. Return what the binder is if it is one of these.
+checkPostUnariseId :: Id -> Maybe String
+checkPostUnariseId id =
+    let
+      id_ty = idType id
+      is_sum, is_tuple, is_void :: Maybe String
+      is_sum = guard (isUnboxedSumType id_ty) >> return "unboxed sum"
+      is_tuple = guard (isUnboxedTupleType id_ty) >> return "unboxed tuple"
+      is_void = guard (isVoidTy id_ty) >> return "void"
+    in
+      is_sum <|> is_tuple <|> is_void
+
+addErrL :: MsgDoc -> LintM ()
+addErrL msg = LintM $ \_mod _lf loc _scope errs -> ((), addErr errs msg loc)
+
+addErr :: Bag MsgDoc -> MsgDoc -> [LintLocInfo] -> Bag MsgDoc
+addErr errs_so_far msg locs
+  = errs_so_far `snocBag` mk_msg locs
+  where
+    mk_msg (loc:_) = let (l,hdr) = dumpLoc loc
+                     in  mkLocMessage SevWarning l (hdr $$ msg)
+    mk_msg []      = msg
+
+addLoc :: LintLocInfo -> LintM a -> LintM a
+addLoc extra_loc m = LintM $ \mod lf loc scope errs
+   -> unLintM m mod lf (extra_loc:loc) scope errs
+
+addInScopeVars :: [Id] -> LintM a -> LintM a
+addInScopeVars ids m = LintM $ \mod lf loc scope errs
+ -> let
+        new_set = mkVarSet ids
+    in unLintM m mod lf loc (scope `unionVarSet` new_set) errs
+
+getLintFlags :: LintM LintFlags
+getLintFlags = LintM $ \_mod lf _loc _scope errs -> (lf, errs)
+
+checkInScope :: Id -> LintM ()
+checkInScope id = LintM $ \mod _lf loc scope errs
+ -> if nameIsLocalOrFrom mod (idName id) && not (id `elemVarSet` scope) then
+        ((), addErr errs (hsep [ppr id, dcolon, ppr (idType id),
+                                text "is out of scope"]) loc)
+    else
+        ((), errs)
+
+mkUnliftedTyMsg :: OutputablePass a => Id -> GenStgRhs a -> SDoc
+mkUnliftedTyMsg binder rhs
+  = (text "Let(rec) binder" <+> quotes (ppr binder) <+>
+     text "has unlifted type" <+> quotes (ppr (idType binder)))
+    $$
+    (text "RHS:" <+> ppr rhs)
diff --git a/compiler/GHC/Stg/Pipeline.hs b/compiler/GHC/Stg/Pipeline.hs
new file mode 100644
index 0000000000..13b403fc53
--- /dev/null
+++ b/compiler/GHC/Stg/Pipeline.hs
@@ -0,0 +1,141 @@
+{-
+(c) The GRASP/AQUA Project, Glasgow University, 1993-1998
+
+\section[SimplStg]{Driver for simplifying @STG@ programs}
+-}
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TypeFamilies #-}
+
+module GHC.Stg.Pipeline ( stg2stg ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import GHC.Stg.Syntax
+
+import GHC.Stg.Lint     ( lintStgTopBindings )
+import GHC.Stg.Stats    ( showStgStats )
+import GHC.Stg.Unarise  ( unarise )
+import GHC.Stg.CSE      ( stgCse )
+import GHC.Stg.Lift     ( stgLiftLams )
+import Module           ( Module )
+
+import DynFlags
+import ErrUtils
+import UniqSupply
+import Outputable
+import Control.Monad
+import Control.Monad.IO.Class
+import Control.Monad.Trans.State.Strict
+
+newtype StgM a = StgM { _unStgM :: StateT Char IO a }
+  deriving (Functor, Applicative, Monad, MonadIO)
+
+instance MonadUnique StgM where
+  getUniqueSupplyM = StgM $ do { mask <- get
+                               ; liftIO $! mkSplitUniqSupply mask}
+  getUniqueM = StgM $ do { mask <- get
+                         ; liftIO $! uniqFromMask mask}
+
+runStgM :: Char -> StgM a -> IO a
+runStgM mask (StgM m) = evalStateT m mask
+
+stg2stg :: DynFlags                  -- includes spec of what stg-to-stg passes to do
+        -> Module                    -- module being compiled
+        -> [StgTopBinding]           -- input program
+        -> IO [StgTopBinding]        -- output program
+
+stg2stg dflags this_mod binds
+  = do  { dump_when Opt_D_dump_stg "STG:" binds
+        ; showPass dflags "Stg2Stg"
+        -- Do the main business!
+        ; binds' <- runStgM 'g' $
+            foldM do_stg_pass binds (getStgToDo dflags)
+
+        ; dump_when Opt_D_dump_stg_final "Final STG:" binds'
+
+        ; return binds'
+   }
+
+  where
+    stg_linter unarised
+      | gopt Opt_DoStgLinting dflags
+      = lintStgTopBindings dflags this_mod unarised
+      | otherwise
+      = \ _whodunnit _binds -> return ()
+
+    -------------------------------------------
+    do_stg_pass :: [StgTopBinding] -> StgToDo -> StgM [StgTopBinding]
+    do_stg_pass binds to_do
+      = case to_do of
+          StgDoNothing ->
+            return binds
+
+          StgStats ->
+            trace (showStgStats binds) (return binds)
+
+          StgCSE -> do
+            let binds' = {-# SCC "StgCse" #-} stgCse binds
+            end_pass "StgCse" binds'
+
+          StgLiftLams -> do
+            us <- getUniqueSupplyM
+            let binds' = {-# SCC "StgLiftLams" #-} stgLiftLams dflags us binds
+            end_pass "StgLiftLams" binds'
+
+          StgUnarise -> do
+            us <- getUniqueSupplyM
+            liftIO (stg_linter False "Pre-unarise" binds)
+            let binds' = unarise us binds
+            liftIO (dump_when Opt_D_dump_stg_unarised "Unarised STG:" binds')
+            liftIO (stg_linter True "Unarise" binds')
+            return binds'
+
+    dump_when flag header binds
+      = dumpIfSet_dyn dflags flag header FormatSTG (pprStgTopBindings binds)
+
+    end_pass what binds2
+      = liftIO $ do -- report verbosely, if required
+          dumpIfSet_dyn dflags Opt_D_verbose_stg2stg what
+            FormatSTG (vcat (map ppr binds2))
+          stg_linter False what binds2
+          return binds2
+
+-- -----------------------------------------------------------------------------
+-- StgToDo:  abstraction of stg-to-stg passes to run.
+
+-- | Optional Stg-to-Stg passes.
+data StgToDo
+  = StgCSE
+  -- ^ Common subexpression elimination
+  | StgLiftLams
+  -- ^ Lambda lifting closure variables, trading stack/register allocation for
+  -- heap allocation
+  | StgStats
+  | StgUnarise
+  -- ^ Mandatory unarise pass, desugaring unboxed tuple and sum binders
+  | StgDoNothing
+  -- ^ Useful for building up 'getStgToDo'
+  deriving Eq
+
+-- | Which Stg-to-Stg passes to run. Depends on flags, ways etc.
+getStgToDo :: DynFlags -> [StgToDo]
+getStgToDo dflags =
+  filter (/= StgDoNothing)
+    [ mandatory StgUnarise
+    -- Important that unarisation comes first
+    -- See Note [StgCse after unarisation] in GHC.Stg.CSE
+    , optional Opt_StgCSE StgCSE
+    , optional Opt_StgLiftLams StgLiftLams
+    , optional Opt_StgStats StgStats
+    ] where
+      optional opt = runWhen (gopt opt dflags)
+      mandatory = id
+
+runWhen :: Bool -> StgToDo -> StgToDo
+runWhen True todo = todo
+runWhen _    _    = StgDoNothing
diff --git a/compiler/GHC/Stg/Stats.hs b/compiler/GHC/Stg/Stats.hs
new file mode 100644
index 0000000000..c70184e60b
--- /dev/null
+++ b/compiler/GHC/Stg/Stats.hs
@@ -0,0 +1,173 @@
+{-
+(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+
+\section[StgStats]{Gathers statistical information about programs}
+
+
+The program gather statistics about
+\begin{enumerate}
+\item number of boxed cases
+\item number of unboxed cases
+\item number of let-no-escapes
+\item number of non-updatable lets
+\item number of updatable lets
+\item number of applications
+\item number of primitive applications
+\item number of closures (does not include lets bound to constructors)
+\item number of free variables in closures
+%\item number of top-level functions
+%\item number of top-level CAFs
+\item number of constructors
+\end{enumerate}
+-}
+
+{-# LANGUAGE CPP #-}
+
+module GHC.Stg.Stats ( showStgStats ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import GHC.Stg.Syntax
+
+import Id (Id)
+import Panic
+
+import Data.Map (Map)
+import qualified Data.Map as Map
+
+data CounterType
+  = Literals
+  | Applications
+  | ConstructorApps
+  | PrimitiveApps
+  | LetNoEscapes
+  | StgCases
+  | FreeVariables
+  | ConstructorBinds Bool{-True<=>top-level-}
+  | ReEntrantBinds   Bool{-ditto-}
+  | SingleEntryBinds Bool{-ditto-}
+  | UpdatableBinds   Bool{-ditto-}
+  deriving (Eq, Ord)
+
+type Count      = Int
+type StatEnv    = Map CounterType Count
+
+emptySE :: StatEnv
+emptySE = Map.empty
+
+combineSE :: StatEnv -> StatEnv -> StatEnv
+combineSE = Map.unionWith (+)
+
+combineSEs :: [StatEnv] -> StatEnv
+combineSEs = foldr combineSE emptySE
+
+countOne :: CounterType -> StatEnv
+countOne c = Map.singleton c 1
+
+{-
+************************************************************************
+*                                                                      *
+\subsection{Top-level list of bindings (a ``program'')}
+*                                                                      *
+************************************************************************
+-}
+
+showStgStats :: [StgTopBinding] -> String
+
+showStgStats prog
+  = "STG Statistics:\n\n"
+    ++ concat (map showc (Map.toList (gatherStgStats prog)))
+  where
+    showc (x,n) = (showString (s x) . shows n) "\n"
+
+    s Literals                = "Literals                   "
+    s Applications            = "Applications               "
+    s ConstructorApps         = "ConstructorApps            "
+    s PrimitiveApps           = "PrimitiveApps              "
+    s LetNoEscapes            = "LetNoEscapes               "
+    s StgCases                = "StgCases                   "
+    s FreeVariables           = "FreeVariables              "
+    s (ConstructorBinds True) = "ConstructorBinds_Top       "
+    s (ReEntrantBinds True)   = "ReEntrantBinds_Top         "
+    s (SingleEntryBinds True) = "SingleEntryBinds_Top       "
+    s (UpdatableBinds True)   = "UpdatableBinds_Top         "
+    s (ConstructorBinds _)    = "ConstructorBinds_Nested    "
+    s (ReEntrantBinds _)      = "ReEntrantBindsBinds_Nested "
+    s (SingleEntryBinds _)    = "SingleEntryBinds_Nested    "
+    s (UpdatableBinds _)      = "UpdatableBinds_Nested      "
+
+gatherStgStats :: [StgTopBinding] -> StatEnv
+gatherStgStats binds = combineSEs (map statTopBinding binds)
+
+{-
+************************************************************************
+*                                                                      *
+\subsection{Bindings}
+*                                                                      *
+************************************************************************
+-}
+
+statTopBinding :: StgTopBinding -> StatEnv
+statTopBinding (StgTopStringLit _ _) = countOne Literals
+statTopBinding (StgTopLifted bind) = statBinding True bind
+
+statBinding :: Bool -- True <=> top-level; False <=> nested
+            -> StgBinding
+            -> StatEnv
+
+statBinding top (StgNonRec b rhs)
+  = statRhs top (b, rhs)
+
+statBinding top (StgRec pairs)
+  = combineSEs (map (statRhs top) pairs)
+
+statRhs :: Bool -> (Id, StgRhs) -> StatEnv
+
+statRhs top (_, StgRhsCon _ _ _)
+  = countOne (ConstructorBinds top)
+
+statRhs top (_, StgRhsClosure _ _ u _ body)
+  = statExpr body `combineSE`
+    countOne (
+      case u of
+        ReEntrant   -> ReEntrantBinds   top
+        Updatable   -> UpdatableBinds   top
+        SingleEntry -> SingleEntryBinds top
+    )
+
+{-
+************************************************************************
+*                                                                      *
+\subsection{Expressions}
+*                                                                      *
+************************************************************************
+-}
+
+statExpr :: StgExpr -> StatEnv
+
+statExpr (StgApp _ _)     = countOne Applications
+statExpr (StgLit _)       = countOne Literals
+statExpr (StgConApp _ _ _)= countOne ConstructorApps
+statExpr (StgOpApp _ _ _) = countOne PrimitiveApps
+statExpr (StgTick _ e)    = statExpr e
+
+statExpr (StgLetNoEscape _ binds body)
+  = statBinding False{-not top-level-} binds    `combineSE`
+    statExpr body                               `combineSE`
+    countOne LetNoEscapes
+
+statExpr (StgLet _ binds body)
+  = statBinding False{-not top-level-} binds    `combineSE`
+    statExpr body
+
+statExpr (StgCase expr _ _ alts)
+  = statExpr expr       `combineSE`
+    stat_alts alts      `combineSE`
+    countOne StgCases
+  where
+    stat_alts alts
+        = combineSEs (map statExpr [ e | (_,_,e) <- alts ])
+
+statExpr (StgLam {}) = panic "statExpr StgLam"
diff --git a/compiler/GHC/Stg/Subst.hs b/compiler/GHC/Stg/Subst.hs
new file mode 100644
index 0000000000..84b9f29c3c
--- /dev/null
+++ b/compiler/GHC/Stg/Subst.hs
@@ -0,0 +1,80 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.Stg.Subst where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import Id
+import VarEnv
+import Control.Monad.Trans.State.Strict
+import Outputable
+import Util
+
+-- | A renaming substitution from 'Id's to 'Id's. Like 'RnEnv2', but not
+-- maintaining pairs of substitutions. Like @"CoreSubst".'CoreSubst.Subst'@, but
+-- with the domain being 'Id's instead of entire 'CoreExpr'.
+data Subst = Subst InScopeSet IdSubstEnv
+
+type IdSubstEnv = IdEnv Id
+
+-- | @emptySubst = 'mkEmptySubst' 'emptyInScopeSet'@
+emptySubst :: Subst
+emptySubst = mkEmptySubst emptyInScopeSet
+
+-- | Constructs a new 'Subst' assuming the variables in the given 'InScopeSet'
+-- are in scope.
+mkEmptySubst :: InScopeSet -> Subst
+mkEmptySubst in_scope = Subst in_scope emptyVarEnv
+
+-- | Substitutes an 'Id' for another one according to the 'Subst' given in a way
+-- that avoids shadowing the 'InScopeSet', returning the result and an updated
+-- 'Subst' that should be used by subsequent substitutions.
+substBndr :: Id -> Subst -> (Id, Subst)
+substBndr id (Subst in_scope env)
+  = (new_id, Subst new_in_scope new_env)
+  where
+    new_id = uniqAway in_scope id
+    no_change = new_id == id -- in case nothing shadowed
+    new_in_scope = in_scope `extendInScopeSet` new_id
+    new_env
+      | no_change = delVarEnv env id
+      | otherwise = extendVarEnv env id new_id
+
+-- | @substBndrs = runState . traverse (state . substBndr)@
+substBndrs :: Traversable f => f Id -> Subst -> (f Id, Subst)
+substBndrs = runState . traverse (state . substBndr)
+
+-- | Substitutes an occurrence of an identifier for its counterpart recorded
+-- in the 'Subst'.
+lookupIdSubst :: HasCallStack => Id -> Subst -> Id
+lookupIdSubst id (Subst in_scope env)
+  | not (isLocalId id) = id
+  | Just id' <- lookupVarEnv env id = id'
+  | Just id' <- lookupInScope in_scope id = id'
+  | otherwise = WARN( True, text "StgSubst.lookupIdSubst" <+> ppr id $$ ppr in_scope)
+                id
+
+-- | Substitutes an occurrence of an identifier for its counterpart recorded
+-- in the 'Subst'. Does not generate a debug warning if the identifier to
+-- to substitute wasn't in scope.
+noWarnLookupIdSubst :: HasCallStack => Id -> Subst -> Id
+noWarnLookupIdSubst id (Subst in_scope env)
+  | not (isLocalId id) = id
+  | Just id' <- lookupVarEnv env id = id'
+  | Just id' <- lookupInScope in_scope id = id'
+  | otherwise = id
+
+-- | Add the 'Id' to the in-scope set and remove any existing substitutions for
+-- it.
+extendInScope :: Id -> Subst -> Subst
+extendInScope id (Subst in_scope env) = Subst (in_scope `extendInScopeSet` id) env
+
+-- | Add a substitution for an 'Id' to the 'Subst': you must ensure that the
+-- in-scope set is such that TyCoSubst Note [The substitution invariant]
+-- holds after extending the substitution like this.
+extendSubst :: Id -> Id -> Subst -> Subst
+extendSubst id new_id (Subst in_scope env)
+  = ASSERT2( new_id `elemInScopeSet` in_scope, ppr id <+> ppr new_id $$ ppr in_scope )
+    Subst in_scope (extendVarEnv env id new_id)
diff --git a/compiler/GHC/Stg/Syntax.hs b/compiler/GHC/Stg/Syntax.hs
new file mode 100644
index 0000000000..b82fea5de2
--- /dev/null
+++ b/compiler/GHC/Stg/Syntax.hs
@@ -0,0 +1,871 @@
+{-
+(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+
+Shared term graph (STG) syntax for spineless-tagless code generation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This data type represents programs just before code generation (conversion to
+@Cmm@): basically, what we have is a stylised form of @CoreSyntax@, the style
+being one that happens to be ideally suited to spineless tagless code
+generation.
+-}
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE ConstraintKinds #-}
+
+module GHC.Stg.Syntax (
+        StgArg(..),
+
+        GenStgTopBinding(..), GenStgBinding(..), GenStgExpr(..), GenStgRhs(..),
+        GenStgAlt, AltType(..),
+
+        StgPass(..), BinderP, XRhsClosure, XLet, XLetNoEscape,
+        NoExtFieldSilent, noExtFieldSilent,
+        OutputablePass,
+
+        UpdateFlag(..), isUpdatable,
+
+        -- a set of synonyms for the vanilla parameterisation
+        StgTopBinding, StgBinding, StgExpr, StgRhs, StgAlt,
+
+        -- a set of synonyms for the code gen parameterisation
+        CgStgTopBinding, CgStgBinding, CgStgExpr, CgStgRhs, CgStgAlt,
+
+        -- a set of synonyms for the lambda lifting parameterisation
+        LlStgTopBinding, LlStgBinding, LlStgExpr, LlStgRhs, LlStgAlt,
+
+        -- a set of synonyms to distinguish in- and out variants
+        InStgArg,  InStgTopBinding,  InStgBinding,  InStgExpr,  InStgRhs,  InStgAlt,
+        OutStgArg, OutStgTopBinding, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt,
+
+        -- StgOp
+        StgOp(..),
+
+        -- utils
+        topStgBindHasCafRefs, stgArgHasCafRefs, stgRhsArity,
+        isDllConApp,
+        stgArgType,
+        stripStgTicksTop, stripStgTicksTopE,
+        stgCaseBndrInScope,
+
+        pprStgBinding, pprGenStgTopBindings, pprStgTopBindings
+    ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import CoreSyn     ( AltCon, Tickish )
+import CostCentre  ( CostCentreStack )
+import Data.ByteString ( ByteString )
+import Data.Data   ( Data )
+import Data.List   ( intersperse )
+import DataCon
+import DynFlags
+import ForeignCall ( ForeignCall )
+import Id
+import IdInfo      ( mayHaveCafRefs )
+import VarSet
+import Literal     ( Literal, literalType )
+import Module      ( Module )
+import Outputable
+import Packages    ( isDllName )
+import GHC.Platform
+import PprCore     ( {- instances -} )
+import PrimOp      ( PrimOp, PrimCall )
+import TyCon       ( PrimRep(..), TyCon )
+import Type        ( Type )
+import GHC.Types.RepType     ( typePrimRep1 )
+import Util
+
+import Data.List.NonEmpty ( NonEmpty, toList )
+
+{-
+************************************************************************
+*                                                                      *
+GenStgBinding
+*                                                                      *
+************************************************************************
+
+As usual, expressions are interesting; other things are boring. Here are the
+boring things (except note the @GenStgRhs@), parameterised with respect to
+binder and occurrence information (just as in @CoreSyn@):
+-}
+
+-- | A top-level binding.
+data GenStgTopBinding pass
+-- See Note [CoreSyn top-level string literals]
+  = StgTopLifted (GenStgBinding pass)
+  | StgTopStringLit Id ByteString
+
+data GenStgBinding pass
+  = StgNonRec (BinderP pass) (GenStgRhs pass)
+  | StgRec    [(BinderP pass, GenStgRhs pass)]
+
+{-
+************************************************************************
+*                                                                      *
+StgArg
+*                                                                      *
+************************************************************************
+-}
+
+data StgArg
+  = StgVarArg  Id
+  | StgLitArg  Literal
+
+-- | Does this constructor application refer to anything in a different
+-- *Windows* DLL?
+-- If so, we can't allocate it statically
+isDllConApp :: DynFlags -> Module -> DataCon -> [StgArg] -> Bool
+isDllConApp dflags this_mod con args
+ | platformOS (targetPlatform dflags) == OSMinGW32
+    = isDllName dflags this_mod (dataConName con) || any is_dll_arg args
+ | otherwise = False
+  where
+    -- NB: typePrimRep1 is legit because any free variables won't have
+    -- unlifted type (there are no unlifted things at top level)
+    is_dll_arg :: StgArg -> Bool
+    is_dll_arg (StgVarArg v) =  isAddrRep (typePrimRep1 (idType v))
+                             && isDllName dflags this_mod (idName v)
+    is_dll_arg _             = False
+
+-- True of machine addresses; these are the things that don't work across DLLs.
+-- The key point here is that VoidRep comes out False, so that a top level
+-- nullary GADT constructor is False for isDllConApp
+--
+--    data T a where
+--      T1 :: T Int
+--
+-- gives
+--
+--    T1 :: forall a. (a~Int) -> T a
+--
+-- and hence the top-level binding
+--
+--    $WT1 :: T Int
+--    $WT1 = T1 Int (Coercion (Refl Int))
+--
+-- The coercion argument here gets VoidRep
+isAddrRep :: PrimRep -> Bool
+isAddrRep AddrRep     = True
+isAddrRep LiftedRep   = True
+isAddrRep UnliftedRep = True
+isAddrRep _           = False
+
+-- | Type of an @StgArg@
+--
+-- Very half baked because we have lost the type arguments.
+stgArgType :: StgArg -> Type
+stgArgType (StgVarArg v)   = idType v
+stgArgType (StgLitArg lit) = literalType lit
+
+
+-- | Strip ticks of a given type from an STG expression.
+stripStgTicksTop :: (Tickish Id -> Bool) -> GenStgExpr p -> ([Tickish Id], GenStgExpr p)
+stripStgTicksTop p = go []
+   where go ts (StgTick t e) | p t = go (t:ts) e
+         go ts other               = (reverse ts, other)
+
+-- | Strip ticks of a given type from an STG expression returning only the expression.
+stripStgTicksTopE :: (Tickish Id -> Bool) -> GenStgExpr p -> GenStgExpr p
+stripStgTicksTopE p = go
+   where go (StgTick t e) | p t = go e
+         go other               = other
+
+-- | Given an alt type and whether the program is unarised, return whether the
+-- case binder is in scope.
+--
+-- Case binders of unboxed tuple or unboxed sum type always dead after the
+-- unariser has run. See Note [Post-unarisation invariants].
+stgCaseBndrInScope :: AltType -> Bool {- ^ unarised? -} -> Bool
+stgCaseBndrInScope alt_ty unarised =
+    case alt_ty of
+      AlgAlt _      -> True
+      PrimAlt _     -> True
+      MultiValAlt _ -> not unarised
+      PolyAlt       -> True
+
+{-
+************************************************************************
+*                                                                      *
+STG expressions
+*                                                                      *
+************************************************************************
+
+The @GenStgExpr@ data type is parameterised on binder and occurrence info, as
+before.
+
+************************************************************************
+*                                                                      *
+GenStgExpr
+*                                                                      *
+************************************************************************
+
+An application is of a function to a list of atoms (not expressions).
+Operationally, we want to push the arguments on the stack and call the function.
+(If the arguments were expressions, we would have to build their closures
+first.)
+
+There is no constructor for a lone variable; it would appear as @StgApp var []@.
+-}
+
+data GenStgExpr pass
+  = StgApp
+        Id       -- function
+        [StgArg] -- arguments; may be empty
+
+{-
+************************************************************************
+*                                                                      *
+StgConApp and StgPrimApp --- saturated applications
+*                                                                      *
+************************************************************************
+
+There are specialised forms of application, for constructors, primitives, and
+literals.
+-}
+
+  | StgLit      Literal
+
+        -- StgConApp is vital for returning unboxed tuples or sums
+        -- which can't be let-bound
+  | StgConApp   DataCon
+                [StgArg] -- Saturated
+                [Type]   -- See Note [Types in StgConApp] in GHC.Stg.Unarise
+
+  | StgOpApp    StgOp    -- Primitive op or foreign call
+                [StgArg] -- Saturated.
+                Type     -- Result type
+                         -- We need to know this so that we can
+                         -- assign result registers
+
+{-
+************************************************************************
+*                                                                      *
+StgLam
+*                                                                      *
+************************************************************************
+
+StgLam is used *only* during CoreToStg's work. Before CoreToStg has finished it
+encodes (\x -> e) as (let f = \x -> e in f) TODO: Encode this via an extension
+to GenStgExpr à la TTG.
+-}
+
+  | StgLam
+        (NonEmpty (BinderP pass))
+        StgExpr    -- Body of lambda
+
+{-
+************************************************************************
+*                                                                      *
+GenStgExpr: case-expressions
+*                                                                      *
+************************************************************************
+
+This has the same boxed/unboxed business as Core case expressions.
+-}
+
+  | StgCase
+        (GenStgExpr pass) -- the thing to examine
+        (BinderP pass) -- binds the result of evaluating the scrutinee
+        AltType
+        [GenStgAlt pass]
+                    -- The DEFAULT case is always *first*
+                    -- if it is there at all
+
+{-
+************************************************************************
+*                                                                      *
+GenStgExpr: let(rec)-expressions
+*                                                                      *
+************************************************************************
+
+The various forms of let(rec)-expression encode most of the interesting things
+we want to do.
+
+-   let-closure x = [free-vars] [args] expr in e
+
+  is equivalent to
+
+    let x = (\free-vars -> \args -> expr) free-vars
+
+  @args@ may be empty (and is for most closures). It isn't under circumstances
+  like this:
+
+    let x = (\y -> y+z)
+
+  This gets mangled to
+
+    let-closure x = [z] [y] (y+z)
+
+  The idea is that we compile code for @(y+z)@ in an environment in which @z@ is
+  bound to an offset from Node, and `y` is bound to an offset from the stack
+  pointer.
+
+  (A let-closure is an @StgLet@ with a @StgRhsClosure@ RHS.)
+
+-   let-constructor x = Constructor [args] in e
+
+  (A let-constructor is an @StgLet@ with a @StgRhsCon@ RHS.)
+
+- Letrec-expressions are essentially the same deal as let-closure/
+  let-constructor, so we use a common structure and distinguish between them
+  with an @is_recursive@ boolean flag.
+
+-   let-unboxed u = <an arbitrary arithmetic expression in unboxed values> in e
+
+  All the stuff on the RHS must be fully evaluated. No function calls either!
+
+  (We've backed away from this toward case-expressions with suitably-magical
+  alts ...)
+
+- Advanced stuff here! Not to start with, but makes pattern matching generate
+  more efficient code.
+
+    let-escapes-not fail = expr
+    in e'
+
+  Here the idea is that @e'@ guarantees not to put @fail@ in a data structure,
+  or pass it to another function. All @e'@ will ever do is tail-call @fail@.
+  Rather than build a closure for @fail@, all we need do is to record the stack
+  level at the moment of the @let-escapes-not@; then entering @fail@ is just a
+  matter of adjusting the stack pointer back down to that point and entering the
+  code for it.
+
+  Another example:
+
+    f x y = let z = huge-expression in
+            if y==1 then z else
+            if y==2 then z else
+            1
+
+  (A let-escapes-not is an @StgLetNoEscape@.)
+
+- We may eventually want:
+
+    let-literal x = Literal in e
+
+And so the code for let(rec)-things:
+-}
+
+  | StgLet
+        (XLet pass)
+        (GenStgBinding pass)    -- right hand sides (see below)
+        (GenStgExpr pass)       -- body
+
+  | StgLetNoEscape
+        (XLetNoEscape pass)
+        (GenStgBinding pass)    -- right hand sides (see below)
+        (GenStgExpr pass)       -- body
+
+{-
+*************************************************************************
+*                                                                      *
+GenStgExpr: hpc, scc and other debug annotations
+*                                                                      *
+*************************************************************************
+
+Finally for @hpc@ expressions we introduce a new STG construct.
+-}
+
+  | StgTick
+    (Tickish Id)
+    (GenStgExpr pass)       -- sub expression
+
+-- END of GenStgExpr
+
+{-
+************************************************************************
+*                                                                      *
+STG right-hand sides
+*                                                                      *
+************************************************************************
+
+Here's the rest of the interesting stuff for @StgLet@s; the first flavour is for
+closures:
+-}
+
+data GenStgRhs pass
+  = StgRhsClosure
+        (XRhsClosure pass) -- ^ Extension point for non-global free var
+                           --   list just before 'CodeGen'.
+        CostCentreStack    -- ^ CCS to be attached (default is CurrentCCS)
+        !UpdateFlag        -- ^ 'ReEntrant' | 'Updatable' | 'SingleEntry'
+        [BinderP pass]     -- ^ arguments; if empty, then not a function;
+                           --   as above, order is important.
+        (GenStgExpr pass)  -- ^ body
+
+{-
+An example may be in order.  Consider:
+
+  let t = \x -> \y -> ... x ... y ... p ... q in e
+
+Pulling out the free vars and stylising somewhat, we get the equivalent:
+
+  let t = (\[p,q] -> \[x,y] -> ... x ... y ... p ...q) p q
+
+Stg-operationally, the @[x,y]@ are on the stack, the @[p,q]@ are offsets from
+@Node@ into the closure, and the code ptr for the closure will be exactly that
+in parentheses above.
+
+The second flavour of right-hand-side is for constructors (simple but
+important):
+-}
+
+  | StgRhsCon
+        CostCentreStack -- CCS to be attached (default is CurrentCCS).
+                        -- Top-level (static) ones will end up with
+                        -- DontCareCCS, because we don't count static
+                        -- data in heap profiles, and we don't set CCCS
+                        -- from static closure.
+        DataCon         -- Constructor. Never an unboxed tuple or sum, as those
+                        -- are not allocated.
+        [StgArg]        -- Args
+
+-- | Used as a data type index for the stgSyn AST
+data StgPass
+  = Vanilla
+  | LiftLams
+  | CodeGen
+
+-- | Like 'GHC.Hs.Extension.NoExtField', but with an 'Outputable' instance that
+-- returns 'empty'.
+data NoExtFieldSilent = NoExtFieldSilent
+  deriving (Data, Eq, Ord)
+
+instance Outputable NoExtFieldSilent where
+  ppr _ = empty
+
+-- | Used when constructing a term with an unused extension point that should
+-- not appear in pretty-printed output at all.
+noExtFieldSilent :: NoExtFieldSilent
+noExtFieldSilent = NoExtFieldSilent
+-- TODO: Maybe move this to GHC.Hs.Extension? I'm not sure about the
+-- implications on build time...
+
+-- TODO: Do we really want to the extension point type families to have a closed
+-- domain?
+type family BinderP (pass :: StgPass)
+type instance BinderP 'Vanilla = Id
+type instance BinderP 'CodeGen = Id
+
+type family XRhsClosure (pass :: StgPass)
+type instance XRhsClosure 'Vanilla = NoExtFieldSilent
+-- | Code gen needs to track non-global free vars
+type instance XRhsClosure 'CodeGen = DIdSet
+
+type family XLet (pass :: StgPass)
+type instance XLet 'Vanilla = NoExtFieldSilent
+type instance XLet 'CodeGen = NoExtFieldSilent
+
+type family XLetNoEscape (pass :: StgPass)
+type instance XLetNoEscape 'Vanilla = NoExtFieldSilent
+type instance XLetNoEscape 'CodeGen = NoExtFieldSilent
+
+stgRhsArity :: StgRhs -> Int
+stgRhsArity (StgRhsClosure _ _ _ bndrs _)
+  = ASSERT( all isId bndrs ) length bndrs
+  -- The arity never includes type parameters, but they should have gone by now
+stgRhsArity (StgRhsCon _ _ _) = 0
+
+-- Note [CAF consistency]
+-- ~~~~~~~~~~~~~~~~~~~~~~
+--
+-- `topStgBindHasCafRefs` is only used by an assert (`consistentCafInfo` in
+-- `CoreToStg`) to make sure CAF-ness predicted by `TidyPgm` is consistent with
+-- reality.
+--
+-- Specifically, if the RHS mentions any Id that itself is marked
+-- `MayHaveCafRefs`; or if the binding is a top-level updateable thunk; then the
+-- `Id` for the binding should be marked `MayHaveCafRefs`. The potential trouble
+-- is that `TidyPgm` computed the CAF info on the `Id` but some transformations
+-- have taken place since then.
+
+topStgBindHasCafRefs :: GenStgTopBinding pass -> Bool
+topStgBindHasCafRefs (StgTopLifted (StgNonRec _ rhs))
+  = topRhsHasCafRefs rhs
+topStgBindHasCafRefs (StgTopLifted (StgRec binds))
+  = any topRhsHasCafRefs (map snd binds)
+topStgBindHasCafRefs StgTopStringLit{}
+  = False
+
+topRhsHasCafRefs :: GenStgRhs pass -> Bool
+topRhsHasCafRefs (StgRhsClosure _ _ upd _ body)
+  = -- See Note [CAF consistency]
+    isUpdatable upd || exprHasCafRefs body
+topRhsHasCafRefs (StgRhsCon _ _ args)
+  = any stgArgHasCafRefs args
+
+exprHasCafRefs :: GenStgExpr pass -> Bool
+exprHasCafRefs (StgApp f args)
+  = stgIdHasCafRefs f || any stgArgHasCafRefs args
+exprHasCafRefs StgLit{}
+  = False
+exprHasCafRefs (StgConApp _ args _)
+  = any stgArgHasCafRefs args
+exprHasCafRefs (StgOpApp _ args _)
+  = any stgArgHasCafRefs args
+exprHasCafRefs (StgLam _ body)
+  = exprHasCafRefs body
+exprHasCafRefs (StgCase scrt _ _ alts)
+  = exprHasCafRefs scrt || any altHasCafRefs alts
+exprHasCafRefs (StgLet _ bind body)
+  = bindHasCafRefs bind || exprHasCafRefs body
+exprHasCafRefs (StgLetNoEscape _ bind body)
+  = bindHasCafRefs bind || exprHasCafRefs body
+exprHasCafRefs (StgTick _ expr)
+  = exprHasCafRefs expr
+
+bindHasCafRefs :: GenStgBinding pass -> Bool
+bindHasCafRefs (StgNonRec _ rhs)
+  = rhsHasCafRefs rhs
+bindHasCafRefs (StgRec binds)
+  = any rhsHasCafRefs (map snd binds)
+
+rhsHasCafRefs :: GenStgRhs pass -> Bool
+rhsHasCafRefs (StgRhsClosure _ _ _ _ body)
+  = exprHasCafRefs body
+rhsHasCafRefs (StgRhsCon _ _ args)
+  = any stgArgHasCafRefs args
+
+altHasCafRefs :: GenStgAlt pass -> Bool
+altHasCafRefs (_, _, rhs) = exprHasCafRefs rhs
+
+stgArgHasCafRefs :: StgArg -> Bool
+stgArgHasCafRefs (StgVarArg id)
+  = stgIdHasCafRefs id
+stgArgHasCafRefs _
+  = False
+
+stgIdHasCafRefs :: Id -> Bool
+stgIdHasCafRefs id =
+  -- We are looking for occurrences of an Id that is bound at top level, and may
+  -- have CAF refs. At this point (after TidyPgm) top-level Ids (whether
+  -- imported or defined in this module) are GlobalIds, so the test is easy.
+  isGlobalId id && mayHaveCafRefs (idCafInfo id)
+
+{-
+************************************************************************
+*                                                                      *
+STG case alternatives
+*                                                                      *
+************************************************************************
+
+Very like in @CoreSyntax@ (except no type-world stuff).
+
+The type constructor is guaranteed not to be abstract; that is, we can see its
+representation. This is important because the code generator uses it to
+determine return conventions etc. But it's not trivial where there's a module
+loop involved, because some versions of a type constructor might not have all
+the constructors visible. So mkStgAlgAlts (in CoreToStg) ensures that it gets
+the TyCon from the constructors or literals (which are guaranteed to have the
+Real McCoy) rather than from the scrutinee type.
+-}
+
+type GenStgAlt pass
+  = (AltCon,          -- alts: data constructor,
+     [BinderP pass],  -- constructor's parameters,
+     GenStgExpr pass) -- ...right-hand side.
+
+data AltType
+  = PolyAlt             -- Polymorphic (a lifted type variable)
+  | MultiValAlt Int     -- Multi value of this arity (unboxed tuple or sum)
+                        -- the arity could indeed be 1 for unary unboxed tuple
+                        -- or enum-like unboxed sums
+  | AlgAlt      TyCon   -- Algebraic data type; the AltCons will be DataAlts
+  | PrimAlt     PrimRep -- Primitive data type; the AltCons (if any) will be LitAlts
+
+{-
+************************************************************************
+*                                                                      *
+The Plain STG parameterisation
+*                                                                      *
+************************************************************************
+
+This happens to be the only one we use at the moment.
+-}
+
+type StgTopBinding = GenStgTopBinding 'Vanilla
+type StgBinding    = GenStgBinding    'Vanilla
+type StgExpr       = GenStgExpr       'Vanilla
+type StgRhs        = GenStgRhs        'Vanilla
+type StgAlt        = GenStgAlt        'Vanilla
+
+type LlStgTopBinding = GenStgTopBinding 'LiftLams
+type LlStgBinding    = GenStgBinding    'LiftLams
+type LlStgExpr       = GenStgExpr       'LiftLams
+type LlStgRhs        = GenStgRhs        'LiftLams
+type LlStgAlt        = GenStgAlt        'LiftLams
+
+type CgStgTopBinding = GenStgTopBinding 'CodeGen
+type CgStgBinding    = GenStgBinding    'CodeGen
+type CgStgExpr       = GenStgExpr       'CodeGen
+type CgStgRhs        = GenStgRhs        'CodeGen
+type CgStgAlt        = GenStgAlt        'CodeGen
+
+{- Many passes apply a substitution, and it's very handy to have type
+   synonyms to remind us whether or not the substitution has been applied.
+   See CoreSyn for precedence in Core land
+-}
+
+type InStgTopBinding  = StgTopBinding
+type InStgBinding     = StgBinding
+type InStgArg         = StgArg
+type InStgExpr        = StgExpr
+type InStgRhs         = StgRhs
+type InStgAlt         = StgAlt
+type OutStgTopBinding = StgTopBinding
+type OutStgBinding    = StgBinding
+type OutStgArg        = StgArg
+type OutStgExpr       = StgExpr
+type OutStgRhs        = StgRhs
+type OutStgAlt        = StgAlt
+
+{-
+
+************************************************************************
+*                                                                      *
+UpdateFlag
+*                                                                      *
+************************************************************************
+
+This is also used in @LambdaFormInfo@ in the @ClosureInfo@ module.
+
+A @ReEntrant@ closure may be entered multiple times, but should not be updated
+or blackholed. An @Updatable@ closure should be updated after evaluation (and
+may be blackholed during evaluation). A @SingleEntry@ closure will only be
+entered once, and so need not be updated but may safely be blackholed.
+-}
+
+data UpdateFlag = ReEntrant | Updatable | SingleEntry
+
+instance Outputable UpdateFlag where
+    ppr u = char $ case u of
+                       ReEntrant   -> 'r'
+                       Updatable   -> 'u'
+                       SingleEntry -> 's'
+
+isUpdatable :: UpdateFlag -> Bool
+isUpdatable ReEntrant   = False
+isUpdatable SingleEntry = False
+isUpdatable Updatable   = True
+
+{-
+************************************************************************
+*                                                                      *
+StgOp
+*                                                                      *
+************************************************************************
+
+An StgOp allows us to group together PrimOps and ForeignCalls. It's quite useful
+to move these around together, notably in StgOpApp and COpStmt.
+-}
+
+data StgOp
+  = StgPrimOp  PrimOp
+
+  | StgPrimCallOp PrimCall
+
+  | StgFCallOp ForeignCall Type
+        -- The Type, which is obtained from the foreign import declaration
+        -- itself, is needed by the stg-to-cmm pass to determine the offset to
+        -- apply to unlifted boxed arguments in GHC.StgToCmm.Foreign. See Note
+        -- [Unlifted boxed arguments to foreign calls]
+
+{-
+************************************************************************
+*                                                                      *
+Pretty-printing
+*                                                                      *
+************************************************************************
+
+Robin Popplestone asked for semi-colon separators on STG binds; here's hoping he
+likes terminators instead...  Ditto for case alternatives.
+-}
+
+type OutputablePass pass =
+  ( Outputable (XLet pass)
+  , Outputable (XLetNoEscape pass)
+  , Outputable (XRhsClosure pass)
+  , OutputableBndr (BinderP pass)
+  )
+
+pprGenStgTopBinding
+  :: OutputablePass pass => GenStgTopBinding pass -> SDoc
+pprGenStgTopBinding (StgTopStringLit bndr str)
+  = hang (hsep [pprBndr LetBind bndr, equals])
+        4 (pprHsBytes str <> semi)
+pprGenStgTopBinding (StgTopLifted bind)
+  = pprGenStgBinding bind
+
+pprGenStgBinding
+  :: OutputablePass pass => GenStgBinding pass -> SDoc
+
+pprGenStgBinding (StgNonRec bndr rhs)
+  = hang (hsep [pprBndr LetBind bndr, equals])
+        4 (ppr rhs <> semi)
+
+pprGenStgBinding (StgRec pairs)
+  = vcat [ text "Rec {"
+         , vcat (intersperse blankLine (map ppr_bind pairs))
+         , text "end Rec }" ]
+  where
+    ppr_bind (bndr, expr)
+      = hang (hsep [pprBndr LetBind bndr, equals])
+             4 (ppr expr <> semi)
+
+pprGenStgTopBindings
+  :: (OutputablePass pass) => [GenStgTopBinding pass] -> SDoc
+pprGenStgTopBindings binds
+  = vcat $ intersperse blankLine (map pprGenStgTopBinding binds)
+
+pprStgBinding :: StgBinding -> SDoc
+pprStgBinding = pprGenStgBinding
+
+pprStgTopBindings :: [StgTopBinding] -> SDoc
+pprStgTopBindings = pprGenStgTopBindings
+
+instance Outputable StgArg where
+    ppr = pprStgArg
+
+instance OutputablePass pass => Outputable (GenStgTopBinding pass) where
+    ppr = pprGenStgTopBinding
+
+instance OutputablePass pass => Outputable (GenStgBinding pass) where
+    ppr = pprGenStgBinding
+
+instance OutputablePass pass => Outputable (GenStgExpr pass) where
+    ppr = pprStgExpr
+
+instance OutputablePass pass => Outputable (GenStgRhs pass) where
+    ppr rhs = pprStgRhs rhs
+
+pprStgArg :: StgArg -> SDoc
+pprStgArg (StgVarArg var) = ppr var
+pprStgArg (StgLitArg con) = ppr con
+
+pprStgExpr :: OutputablePass pass => GenStgExpr pass -> SDoc
+-- special case
+pprStgExpr (StgLit lit)     = ppr lit
+
+-- general case
+pprStgExpr (StgApp func args)
+  = hang (ppr func) 4 (sep (map (ppr) args))
+
+pprStgExpr (StgConApp con args _)
+  = hsep [ ppr con, brackets (interppSP args) ]
+
+pprStgExpr (StgOpApp op args _)
+  = hsep [ pprStgOp op, brackets (interppSP args)]
+
+pprStgExpr (StgLam bndrs body)
+  = sep [ char '\\' <+> ppr_list (map (pprBndr LambdaBind) (toList bndrs))
+            <+> text "->",
+         pprStgExpr body ]
+  where ppr_list = brackets . fsep . punctuate comma
+
+-- special case: let v = <very specific thing>
+--               in
+--               let ...
+--               in
+--               ...
+--
+-- Very special!  Suspicious! (SLPJ)
+
+{-
+pprStgExpr (StgLet srt (StgNonRec bndr (StgRhsClosure cc bi free_vars upd_flag args rhs))
+                        expr@(StgLet _ _))
+  = ($$)
+      (hang (hcat [text "let { ", ppr bndr, ptext (sLit " = "),
+                          ppr cc,
+                          pp_binder_info bi,
+                          text " [", whenPprDebug (interppSP free_vars), ptext (sLit "] \\"),
+                          ppr upd_flag, text " [",
+                          interppSP args, char ']'])
+            8 (sep [hsep [ppr rhs, text "} in"]]))
+      (ppr expr)
+-}
+
+-- special case: let ... in let ...
+
+pprStgExpr (StgLet ext bind expr@StgLet{})
+  = ($$)
+      (sep [hang (text "let" <+> ppr ext <+> text "{")
+                2 (hsep [pprGenStgBinding bind, text "} in"])])
+      (ppr expr)
+
+-- general case
+pprStgExpr (StgLet ext bind expr)
+  = sep [hang (text "let" <+> ppr ext <+> text "{") 2 (pprGenStgBinding bind),
+           hang (text "} in ") 2 (ppr expr)]
+
+pprStgExpr (StgLetNoEscape ext bind expr)
+  = sep [hang (text "let-no-escape" <+> ppr ext <+> text "{")
+                2 (pprGenStgBinding bind),
+           hang (text "} in ")
+                2 (ppr expr)]
+
+pprStgExpr (StgTick tickish expr)
+  = sdocWithDynFlags $ \dflags ->
+    if gopt Opt_SuppressTicks dflags
+    then pprStgExpr expr
+    else sep [ ppr tickish, pprStgExpr expr ]
+
+
+-- Don't indent for a single case alternative.
+pprStgExpr (StgCase expr bndr alt_type [alt])
+  = sep [sep [text "case",
+           nest 4 (hsep [pprStgExpr expr,
+             whenPprDebug (dcolon <+> ppr alt_type)]),
+           text "of", pprBndr CaseBind bndr, char '{'],
+           pprStgAlt False alt,
+           char '}']
+
+pprStgExpr (StgCase expr bndr alt_type alts)
+  = sep [sep [text "case",
+           nest 4 (hsep [pprStgExpr expr,
+             whenPprDebug (dcolon <+> ppr alt_type)]),
+           text "of", pprBndr CaseBind bndr, char '{'],
+           nest 2 (vcat (map (pprStgAlt True) alts)),
+           char '}']
+
+
+pprStgAlt :: OutputablePass pass => Bool -> GenStgAlt pass -> SDoc
+pprStgAlt indent (con, params, expr)
+  | indent    = hang altPattern 4 (ppr expr <> semi)
+  | otherwise = sep [altPattern, ppr expr <> semi]
+    where
+      altPattern = (hsep [ppr con, sep (map (pprBndr CasePatBind) params), text "->"])
+
+
+pprStgOp :: StgOp -> SDoc
+pprStgOp (StgPrimOp  op)   = ppr op
+pprStgOp (StgPrimCallOp op)= ppr op
+pprStgOp (StgFCallOp op _) = ppr op
+
+instance Outputable AltType where
+  ppr PolyAlt         = text "Polymorphic"
+  ppr (MultiValAlt n) = text "MultiAlt" <+> ppr n
+  ppr (AlgAlt tc)     = text "Alg"    <+> ppr tc
+  ppr (PrimAlt tc)    = text "Prim"   <+> ppr tc
+
+pprStgRhs :: OutputablePass pass => GenStgRhs pass -> SDoc
+
+pprStgRhs (StgRhsClosure ext cc upd_flag args body)
+  = sdocWithDynFlags $ \dflags ->
+    hang (hsep [if gopt Opt_SccProfilingOn dflags then ppr cc else empty,
+                if not $ gopt Opt_SuppressStgExts dflags
+                  then ppr ext else empty,
+                char '\\' <> ppr upd_flag, brackets (interppSP args)])
+         4 (ppr body)
+
+pprStgRhs (StgRhsCon cc con args)
+  = hcat [ ppr cc,
+           space, ppr con, text "! ", brackets (interppSP args)]
diff --git a/compiler/GHC/Stg/Unarise.hs b/compiler/GHC/Stg/Unarise.hs
new file mode 100644
index 0000000000..bc2ce4cb87
--- /dev/null
+++ b/compiler/GHC/Stg/Unarise.hs
@@ -0,0 +1,769 @@
+{-
+(c) The GRASP/AQUA Project, Glasgow University, 1992-2012
+
+Note [Unarisation]
+~~~~~~~~~~~~~~~~~~
+The idea of this pass is to translate away *all* unboxed-tuple and unboxed-sum
+binders. So for example:
+
+  f (x :: (# Int, Bool #)) = f x + f (# 1, True #)
+
+  ==>
+
+  f (x1 :: Int) (x2 :: Bool) = f x1 x2 + f 1 True
+
+It is important that we do this at the STG level and NOT at the Core level
+because it would be very hard to make this pass Core-type-preserving. In this
+example the type of 'f' changes, for example.
+
+STG fed to the code generators *must* be unarised because the code generators do
+not support unboxed tuple and unboxed sum binders natively.
+
+In more detail: (see next note for unboxed sums)
+
+Suppose that a variable x : (# t1, t2 #).
+
+  * At the binding site for x, make up fresh vars  x1:t1, x2:t2
+
+  * Extend the UnariseEnv   x :-> MultiVal [x1,x2]
+
+  * Replace the binding with a curried binding for x1,x2
+
+       Lambda:   \x.e                ==>   \x1 x2. e
+       Case alt: MkT a b x c d -> e  ==>   MkT a b x1 x2 c d -> e
+
+  * Replace argument occurrences with a sequence of args via a lookup in
+    UnariseEnv
+
+       f a b x c d   ==>   f a b x1 x2 c d
+
+  * Replace tail-call occurrences with an unboxed tuple via a lookup in
+    UnariseEnv
+
+       x  ==>  (# x1, x2 #)
+
+    So, for example
+
+       f x = x    ==>   f x1 x2 = (# x1, x2 #)
+
+  * We /always/ eliminate a case expression when
+
+       - It scrutinises an unboxed tuple or unboxed sum
+
+       - The scrutinee is a variable (or when it is an explicit tuple, but the
+         simplifier eliminates those)
+
+    The case alternative (there can be only one) can be one of these two
+    things:
+
+      - An unboxed tuple pattern. e.g.
+
+          case v of x { (# x1, x2, x3 #) -> ... }
+
+        Scrutinee has to be in form `(# t1, t2, t3 #)` so we just extend the
+        environment with
+
+          x :-> MultiVal [t1,t2,t3]
+          x1 :-> UnaryVal t1, x2 :-> UnaryVal t2, x3 :-> UnaryVal t3
+
+      - A DEFAULT alternative. Just the same, without the bindings for x1,x2,x3
+
+By the end of this pass, we only have unboxed tuples in return positions.
+Unboxed sums are completely eliminated, see next note.
+
+Note [Translating unboxed sums to unboxed tuples]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Unarise also eliminates unboxed sum binders, and translates unboxed sums in
+return positions to unboxed tuples. We want to overlap fields of a sum when
+translating it to a tuple to have efficient memory layout. When translating a
+sum pattern to a tuple pattern, we need to translate it so that binders of sum
+alternatives will be mapped to right arguments after the term translation. So
+translation of sum DataCon applications to tuple DataCon applications and
+translation of sum patterns to tuple patterns need to be in sync.
+
+These translations work like this. Suppose we have
+
+  (# x1 | | ... #) :: (# t1 | t2 | ... #)
+
+remember that t1, t2 ... can be sums and tuples too. So we first generate
+layouts of those. Then we "merge" layouts of each alternative, which gives us a
+sum layout with best overlapping possible.
+
+Layout of a flat type 'ty1' is just [ty1].
+Layout of a tuple is just concatenation of layouts of its fields.
+
+For layout of a sum type,
+
+  - We first get layouts of all alternatives.
+  - We sort these layouts based on their "slot types".
+  - We merge all the alternatives.
+
+For example, say we have (# (# Int#, Char #) | (# Int#, Int# #) | Int# #)
+
+  - Layouts of alternatives: [ [Word, Ptr], [Word, Word], [Word] ]
+  - Sorted: [ [Ptr, Word], [Word, Word], [Word] ]
+  - Merge all alternatives together: [ Ptr, Word, Word ]
+
+We add a slot for the tag to the first position. So our tuple type is
+
+  (# Tag#, Any, Word#, Word# #)
+  (we use Any for pointer slots)
+
+Now, any term of this sum type needs to generate a tuple of this type instead.
+The translation works by simply putting arguments to first slots that they fit
+in. Suppose we had
+
+  (# (# 42#, 'c' #) | | #)
+
+42# fits in Word#, 'c' fits in Any, so we generate this application:
+
+  (# 1#, 'c', 42#, rubbish #)
+
+Another example using the same type: (# | (# 2#, 3# #) | #). 2# fits in Word#,
+3# fits in Word #, so we get:
+
+  (# 2#, rubbish, 2#, 3# #).
+
+Note [Types in StgConApp]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have this unboxed sum term:
+
+  (# 123 | #)
+
+What will be the unboxed tuple representation? We can't tell without knowing the
+type of this term. For example, these are all valid tuples for this:
+
+  (# 1#, 123 #)          -- when type is (# Int | String #)
+  (# 1#, 123, rubbish #) -- when type is (# Int | Float# #)
+  (# 1#, 123, rubbish, rubbish #)
+                         -- when type is (# Int | (# Int, Int, Int #) #)
+
+So we pass type arguments of the DataCon's TyCon in StgConApp to decide what
+layout to use. Note that unlifted values can't be let-bound, so we don't need
+types in StgRhsCon.
+
+Note [UnariseEnv can map to literals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To avoid redundant case expressions when unarising unboxed sums, UnariseEnv
+needs to map variables to literals too. Suppose we have this Core:
+
+  f (# x | #)
+
+  ==> (CorePrep)
+
+  case (# x | #) of y {
+    _ -> f y
+  }
+
+  ==> (MultiVal)
+
+  case (# 1#, x #) of [x1, x2] {
+    _ -> f x1 x2
+  }
+
+To eliminate this case expression we need to map x1 to 1# in UnariseEnv:
+
+  x1 :-> UnaryVal 1#, x2 :-> UnaryVal x
+
+so that `f x1 x2` becomes `f 1# x`.
+
+Note [Unarisation and arity]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Because of unarisation, the arity that will be recorded in the generated info
+table for an Id may be larger than the idArity. Instead we record what we call
+the RepArity, which is the Arity taking into account any expanded arguments, and
+corresponds to the number of (possibly-void) *registers* arguments will arrive
+in.
+
+Note [Post-unarisation invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+STG programs after unarisation have these invariants:
+
+  * No unboxed sums at all.
+
+  * No unboxed tuple binders. Tuples only appear in return position.
+
+  * DataCon applications (StgRhsCon and StgConApp) don't have void arguments.
+    This means that it's safe to wrap `StgArg`s of DataCon applications with
+    `GHC.StgToCmm.Env.NonVoid`, for example.
+
+  * Alt binders (binders in patterns) are always non-void.
+
+  * Binders always have zero (for void arguments) or one PrimRep.
+-}
+
+{-# LANGUAGE CPP, TupleSections #-}
+
+module GHC.Stg.Unarise (unarise) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import BasicTypes
+import CoreSyn
+import DataCon
+import FastString (FastString, mkFastString)
+import Id
+import Literal
+import MkCore (aBSENT_SUM_FIELD_ERROR_ID)
+import MkId (voidPrimId, voidArgId)
+import MonadUtils (mapAccumLM)
+import Outputable
+import GHC.Types.RepType
+import GHC.Stg.Syntax
+import Type
+import TysPrim (intPrimTy,wordPrimTy,word64PrimTy)
+import TysWiredIn
+import UniqSupply
+import Util
+import VarEnv
+
+import Data.Bifunctor (second)
+import Data.Maybe (mapMaybe)
+import qualified Data.IntMap as IM
+
+--------------------------------------------------------------------------------
+
+-- | A mapping from binders to the Ids they were expanded/renamed to.
+--
+--   x :-> MultiVal [a,b,c] in rho
+--
+-- iff  x's typePrimRep is not a singleton, or equivalently
+--      x's type is an unboxed tuple, sum or void.
+--
+--    x :-> UnaryVal x'
+--
+-- iff x's RepType is UnaryRep or equivalently
+--     x's type is not unboxed tuple, sum or void.
+--
+-- So
+--     x :-> MultiVal [a] in rho
+-- means x is represented by singleton tuple.
+--
+--     x :-> MultiVal [] in rho
+-- means x is void.
+--
+-- INVARIANT: OutStgArgs in the range only have NvUnaryTypes
+--            (i.e. no unboxed tuples, sums or voids)
+--
+type UnariseEnv = VarEnv UnariseVal
+
+data UnariseVal
+  = MultiVal [OutStgArg] -- MultiVal to tuple. Can be empty list (void).
+  | UnaryVal OutStgArg   -- See NOTE [Renaming during unarisation].
+
+instance Outputable UnariseVal where
+  ppr (MultiVal args) = text "MultiVal" <+> ppr args
+  ppr (UnaryVal arg)   = text "UnaryVal" <+> ppr arg
+
+-- | Extend the environment, checking the UnariseEnv invariant.
+extendRho :: UnariseEnv -> Id -> UnariseVal -> UnariseEnv
+extendRho rho x (MultiVal args)
+  = ASSERT(all (isNvUnaryType . stgArgType) args)
+    extendVarEnv rho x (MultiVal args)
+extendRho rho x (UnaryVal val)
+  = ASSERT(isNvUnaryType (stgArgType val))
+    extendVarEnv rho x (UnaryVal val)
+
+--------------------------------------------------------------------------------
+
+unarise :: UniqSupply -> [StgTopBinding] -> [StgTopBinding]
+unarise us binds = initUs_ us (mapM (unariseTopBinding emptyVarEnv) binds)
+
+unariseTopBinding :: UnariseEnv -> StgTopBinding -> UniqSM StgTopBinding
+unariseTopBinding rho (StgTopLifted bind)
+  = StgTopLifted <$> unariseBinding rho bind
+unariseTopBinding _ bind@StgTopStringLit{} = return bind
+
+unariseBinding :: UnariseEnv -> StgBinding -> UniqSM StgBinding
+unariseBinding rho (StgNonRec x rhs)
+  = StgNonRec x <$> unariseRhs rho rhs
+unariseBinding rho (StgRec xrhss)
+  = StgRec <$> mapM (\(x, rhs) -> (x,) <$> unariseRhs rho rhs) xrhss
+
+unariseRhs :: UnariseEnv -> StgRhs -> UniqSM StgRhs
+unariseRhs rho (StgRhsClosure ext ccs update_flag args expr)
+  = do (rho', args1) <- unariseFunArgBinders rho args
+       expr' <- unariseExpr rho' expr
+       return (StgRhsClosure ext ccs update_flag args1 expr')
+
+unariseRhs rho (StgRhsCon ccs con args)
+  = ASSERT(not (isUnboxedTupleCon con || isUnboxedSumCon con))
+    return (StgRhsCon ccs con (unariseConArgs rho args))
+
+--------------------------------------------------------------------------------
+
+unariseExpr :: UnariseEnv -> StgExpr -> UniqSM StgExpr
+
+unariseExpr rho e@(StgApp f [])
+  = case lookupVarEnv rho f of
+      Just (MultiVal args)  -- Including empty tuples
+        -> return (mkTuple args)
+      Just (UnaryVal (StgVarArg f'))
+        -> return (StgApp f' [])
+      Just (UnaryVal (StgLitArg f'))
+        -> return (StgLit f')
+      Nothing
+        -> return e
+
+unariseExpr rho e@(StgApp f args)
+  = return (StgApp f' (unariseFunArgs rho args))
+  where
+    f' = case lookupVarEnv rho f of
+           Just (UnaryVal (StgVarArg f')) -> f'
+           Nothing -> f
+           err -> pprPanic "unariseExpr - app2" (ppr e $$ ppr err)
+               -- Can't happen because 'args' is non-empty, and
+               -- a tuple or sum cannot be applied to anything
+
+unariseExpr _ (StgLit l)
+  = return (StgLit l)
+
+unariseExpr rho (StgConApp dc args ty_args)
+  | Just args' <- unariseMulti_maybe rho dc args ty_args
+  = return (mkTuple args')
+
+  | otherwise
+  , let args' = unariseConArgs rho args
+  = return (StgConApp dc args' (map stgArgType args'))
+
+unariseExpr rho (StgOpApp op args ty)
+  = return (StgOpApp op (unariseFunArgs rho args) ty)
+
+unariseExpr _ e@StgLam{}
+  = pprPanic "unariseExpr: found lambda" (ppr e)
+
+unariseExpr rho (StgCase scrut bndr alt_ty alts)
+  -- tuple/sum binders in the scrutinee can always be eliminated
+  | StgApp v [] <- scrut
+  , Just (MultiVal xs) <- lookupVarEnv rho v
+  = elimCase rho xs bndr alt_ty alts
+
+  -- Handle strict lets for tuples and sums:
+  --   case (# a,b #) of r -> rhs
+  -- and analogously for sums
+  | StgConApp dc args ty_args <- scrut
+  , Just args' <- unariseMulti_maybe rho dc args ty_args
+  = elimCase rho args' bndr alt_ty alts
+
+  -- general case
+  | otherwise
+  = do scrut' <- unariseExpr rho scrut
+       alts'  <- unariseAlts rho alt_ty bndr alts
+       return (StgCase scrut' bndr alt_ty alts')
+                       -- bndr may have a unboxed sum/tuple type but it will be
+                       -- dead after unarise (checked in GHC.Stg.Lint)
+
+unariseExpr rho (StgLet ext bind e)
+  = StgLet ext <$> unariseBinding rho bind <*> unariseExpr rho e
+
+unariseExpr rho (StgLetNoEscape ext bind e)
+  = StgLetNoEscape ext <$> unariseBinding rho bind <*> unariseExpr rho e
+
+unariseExpr rho (StgTick tick e)
+  = StgTick tick <$> unariseExpr rho e
+
+-- Doesn't return void args.
+unariseMulti_maybe :: UnariseEnv -> DataCon -> [InStgArg] -> [Type] -> Maybe [OutStgArg]
+unariseMulti_maybe rho dc args ty_args
+  | isUnboxedTupleCon dc
+  = Just (unariseConArgs rho args)
+
+  | isUnboxedSumCon dc
+  , let args1 = ASSERT(isSingleton args) (unariseConArgs rho args)
+  = Just (mkUbxSum dc ty_args args1)
+
+  | otherwise
+  = Nothing
+
+--------------------------------------------------------------------------------
+
+elimCase :: UnariseEnv
+         -> [OutStgArg] -- non-void args
+         -> InId -> AltType -> [InStgAlt] -> UniqSM OutStgExpr
+
+elimCase rho args bndr (MultiValAlt _) [(_, bndrs, rhs)]
+  = do let rho1 = extendRho rho bndr (MultiVal args)
+           rho2
+             | isUnboxedTupleBndr bndr
+             = mapTupleIdBinders bndrs args rho1
+             | otherwise
+             = ASSERT(isUnboxedSumBndr bndr)
+               if null bndrs then rho1
+                             else mapSumIdBinders bndrs args rho1
+
+       unariseExpr rho2 rhs
+
+elimCase rho args bndr (MultiValAlt _) alts
+  | isUnboxedSumBndr bndr
+  = do let (tag_arg : real_args) = args
+       tag_bndr <- mkId (mkFastString "tag") tagTy
+          -- this won't be used but we need a binder anyway
+       let rho1 = extendRho rho bndr (MultiVal args)
+           scrut' = case tag_arg of
+                      StgVarArg v     -> StgApp v []
+                      StgLitArg l     -> StgLit l
+
+       alts' <- unariseSumAlts rho1 real_args alts
+       return (StgCase scrut' tag_bndr tagAltTy alts')
+
+elimCase _ args bndr alt_ty alts
+  = pprPanic "elimCase - unhandled case"
+      (ppr args <+> ppr bndr <+> ppr alt_ty $$ ppr alts)
+
+--------------------------------------------------------------------------------
+
+unariseAlts :: UnariseEnv -> AltType -> InId -> [StgAlt] -> UniqSM [StgAlt]
+unariseAlts rho (MultiValAlt n) bndr [(DEFAULT, [], e)]
+  | isUnboxedTupleBndr bndr
+  = do (rho', ys) <- unariseConArgBinder rho bndr
+       e' <- unariseExpr rho' e
+       return [(DataAlt (tupleDataCon Unboxed n), ys, e')]
+
+unariseAlts rho (MultiValAlt n) bndr [(DataAlt _, ys, e)]
+  | isUnboxedTupleBndr bndr
+  = do (rho', ys1) <- unariseConArgBinders rho ys
+       MASSERT(ys1 `lengthIs` n)
+       let rho'' = extendRho rho' bndr (MultiVal (map StgVarArg ys1))
+       e' <- unariseExpr rho'' e
+       return [(DataAlt (tupleDataCon Unboxed n), ys1, e')]
+
+unariseAlts _ (MultiValAlt _) bndr alts
+  | isUnboxedTupleBndr bndr
+  = pprPanic "unariseExpr: strange multi val alts" (ppr alts)
+
+-- In this case we don't need to scrutinize the tag bit
+unariseAlts rho (MultiValAlt _) bndr [(DEFAULT, _, rhs)]
+  | isUnboxedSumBndr bndr
+  = do (rho_sum_bndrs, sum_bndrs) <- unariseConArgBinder rho bndr
+       rhs' <- unariseExpr rho_sum_bndrs rhs
+       return [(DataAlt (tupleDataCon Unboxed (length sum_bndrs)), sum_bndrs, rhs')]
+
+unariseAlts rho (MultiValAlt _) bndr alts
+  | isUnboxedSumBndr bndr
+  = do (rho_sum_bndrs, scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr
+       alts' <- unariseSumAlts rho_sum_bndrs (map StgVarArg real_bndrs) alts
+       let inner_case = StgCase (StgApp tag_bndr []) tag_bndr tagAltTy alts'
+       return [ (DataAlt (tupleDataCon Unboxed (length scrt_bndrs)),
+                 scrt_bndrs,
+                 inner_case) ]
+
+unariseAlts rho _ _ alts
+  = mapM (\alt -> unariseAlt rho alt) alts
+
+unariseAlt :: UnariseEnv -> StgAlt -> UniqSM StgAlt
+unariseAlt rho (con, xs, e)
+  = do (rho', xs') <- unariseConArgBinders rho xs
+       (con, xs',) <$> unariseExpr rho' e
+
+--------------------------------------------------------------------------------
+
+-- | Make alternatives that match on the tag of a sum
+-- (i.e. generate LitAlts for the tag)
+unariseSumAlts :: UnariseEnv
+               -> [StgArg] -- sum components _excluding_ the tag bit.
+               -> [StgAlt] -- original alternative with sum LHS
+               -> UniqSM [StgAlt]
+unariseSumAlts env args alts
+  = do alts' <- mapM (unariseSumAlt env args) alts
+       return (mkDefaultLitAlt alts')
+
+unariseSumAlt :: UnariseEnv
+              -> [StgArg] -- sum components _excluding_ the tag bit.
+              -> StgAlt   -- original alternative with sum LHS
+              -> UniqSM StgAlt
+unariseSumAlt rho _ (DEFAULT, _, e)
+  = ( DEFAULT, [], ) <$> unariseExpr rho e
+
+unariseSumAlt rho args (DataAlt sumCon, bs, e)
+  = do let rho' = mapSumIdBinders bs args rho
+       e' <- unariseExpr rho' e
+       return ( LitAlt (LitNumber LitNumInt (fromIntegral (dataConTag sumCon)) intPrimTy), [], e' )
+
+unariseSumAlt _ scrt alt
+  = pprPanic "unariseSumAlt" (ppr scrt $$ ppr alt)
+
+--------------------------------------------------------------------------------
+
+mapTupleIdBinders
+  :: [InId]       -- Un-processed binders of a tuple alternative.
+                  -- Can have void binders.
+  -> [OutStgArg]  -- Arguments that form the tuple (after unarisation).
+                  -- Can't have void args.
+  -> UnariseEnv
+  -> UnariseEnv
+mapTupleIdBinders ids args0 rho0
+  = ASSERT(not (any (isVoidTy . stgArgType) args0))
+    let
+      ids_unarised :: [(Id, [PrimRep])]
+      ids_unarised = map (\id -> (id, typePrimRep (idType id))) ids
+
+      map_ids :: UnariseEnv -> [(Id, [PrimRep])] -> [StgArg] -> UnariseEnv
+      map_ids rho [] _  = rho
+      map_ids rho ((x, x_reps) : xs) args =
+        let
+          x_arity = length x_reps
+          (x_args, args') =
+            ASSERT(args `lengthAtLeast` x_arity)
+            splitAt x_arity args
+
+          rho'
+            | x_arity == 1
+            = ASSERT(x_args `lengthIs` 1)
+              extendRho rho x (UnaryVal (head x_args))
+            | otherwise
+            = extendRho rho x (MultiVal x_args)
+        in
+          map_ids rho' xs args'
+    in
+      map_ids rho0 ids_unarised args0
+
+mapSumIdBinders
+  :: [InId]      -- Binder of a sum alternative (remember that sum patterns
+                 -- only have one binder, so this list should be a singleton)
+  -> [OutStgArg] -- Arguments that form the sum (NOT including the tag).
+                 -- Can't have void args.
+  -> UnariseEnv
+  -> UnariseEnv
+
+mapSumIdBinders [id] args rho0
+  = ASSERT(not (any (isVoidTy . stgArgType) args))
+    let
+      arg_slots = map primRepSlot $ concatMap (typePrimRep . stgArgType) args
+      id_slots  = map primRepSlot $ typePrimRep (idType id)
+      layout1   = layoutUbxSum arg_slots id_slots
+    in
+      if isMultiValBndr id
+        then extendRho rho0 id (MultiVal [ args !! i | i <- layout1 ])
+        else ASSERT(layout1 `lengthIs` 1)
+             extendRho rho0 id (UnaryVal (args !! head layout1))
+
+mapSumIdBinders ids sum_args _
+  = pprPanic "mapSumIdBinders" (ppr ids $$ ppr sum_args)
+
+-- | Build a unboxed sum term from arguments of an alternative.
+--
+-- Example, for (# x | #) :: (# (# #) | Int #) we call
+--
+--   mkUbxSum (# _ | #) [ (# #), Int ] [ voidPrimId ]
+--
+-- which returns
+--
+--   [ 1#, rubbish ]
+--
+mkUbxSum
+  :: DataCon      -- Sum data con
+  -> [Type]       -- Type arguments of the sum data con
+  -> [OutStgArg]  -- Actual arguments of the alternative.
+  -> [OutStgArg]  -- Final tuple arguments
+mkUbxSum dc ty_args args0
+  = let
+      (_ : sum_slots) = ubxSumRepType (map typePrimRep ty_args)
+        -- drop tag slot
+
+      tag = dataConTag dc
+
+      layout'  = layoutUbxSum sum_slots (mapMaybe (typeSlotTy . stgArgType) args0)
+      tag_arg  = StgLitArg (LitNumber LitNumInt (fromIntegral tag) intPrimTy)
+      arg_idxs = IM.fromList (zipEqual "mkUbxSum" layout' args0)
+
+      mkTupArgs :: Int -> [SlotTy] -> IM.IntMap StgArg -> [StgArg]
+      mkTupArgs _ [] _
+        = []
+      mkTupArgs arg_idx (slot : slots_left) arg_map
+        | Just stg_arg <- IM.lookup arg_idx arg_map
+        = stg_arg : mkTupArgs (arg_idx + 1) slots_left arg_map
+        | otherwise
+        = slotRubbishArg slot : mkTupArgs (arg_idx + 1) slots_left arg_map
+
+      slotRubbishArg :: SlotTy -> StgArg
+      slotRubbishArg PtrSlot    = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
+                         -- See Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore
+      slotRubbishArg WordSlot   = StgLitArg (LitNumber LitNumWord 0 wordPrimTy)
+      slotRubbishArg Word64Slot = StgLitArg (LitNumber LitNumWord64 0 word64PrimTy)
+      slotRubbishArg FloatSlot  = StgLitArg (LitFloat 0)
+      slotRubbishArg DoubleSlot = StgLitArg (LitDouble 0)
+    in
+      tag_arg : mkTupArgs 0 sum_slots arg_idxs
+
+--------------------------------------------------------------------------------
+
+{-
+For arguments (StgArg) and binders (Id) we have two kind of unarisation:
+
+  - When unarising function arg binders and arguments, we don't want to remove
+    void binders and arguments. For example,
+
+      f :: (# (# #), (# #) #) -> Void# -> RealWorld# -> ...
+      f x y z = <body>
+
+    Here after unarise we should still get a function with arity 3. Similarly
+    in the call site we shouldn't remove void arguments:
+
+      f (# (# #), (# #) #) voidId rw
+
+    When unarising <body>, we extend the environment with these binders:
+
+      x :-> MultiVal [], y :-> MultiVal [], z :-> MultiVal []
+
+    Because their rep types are `MultiRep []` (aka. void). This means that when
+    we see `x` in a function argument position, we actually replace it with a
+    void argument. When we see it in a DataCon argument position, we just get
+    rid of it, because DataCon applications in STG are always saturated.
+
+  - When unarising case alternative binders we remove void binders, but we
+    still update the environment the same way, because those binders may be
+    used in the RHS. Example:
+
+      case x of y {
+        (# x1, x2, x3 #) -> <RHS>
+      }
+
+    We know that y can't be void, because we don't scrutinize voids, so x will
+    be unarised to some number of arguments, and those arguments will have at
+    least one non-void thing. So in the rho we will have something like:
+
+      x :-> MultiVal [xu1, xu2]
+
+    Now, after we eliminate void binders in the pattern, we get exactly the same
+    number of binders, and extend rho again with these:
+
+      x1 :-> UnaryVal xu1
+      x2 :-> MultiVal [] -- x2 is void
+      x3 :-> UnaryVal xu2
+
+    Now when we see x2 in a function argument position or in return position, we
+    generate void#. In constructor argument position, we just remove it.
+
+So in short, when we have a void id,
+
+  - We keep it if it's a lambda argument binder or
+                       in argument position of an application.
+
+  - We remove it if it's a DataCon field binder or
+                         in argument position of a DataCon application.
+-}
+
+unariseArgBinder
+    :: Bool -- data con arg?
+    -> UnariseEnv -> Id -> UniqSM (UnariseEnv, [Id])
+unariseArgBinder is_con_arg rho x =
+  case typePrimRep (idType x) of
+    []
+      | is_con_arg
+      -> return (extendRho rho x (MultiVal []), [])
+      | otherwise -- fun arg, do not remove void binders
+      -> return (extendRho rho x (MultiVal []), [voidArgId])
+
+    [rep]
+      -- Arg represented as single variable, but original type may still be an
+      -- unboxed sum/tuple, e.g. (# Void# | Void# #).
+      --
+      -- While not unarising the binder in this case does not break any programs
+      -- (because it unarises to a single variable), it triggers StgLint as we
+      -- break the the post-unarisation invariant that says unboxed tuple/sum
+      -- binders should vanish. See Note [Post-unarisation invariants].
+      | isUnboxedSumType (idType x) || isUnboxedTupleType (idType x)
+      -> do x' <- mkId (mkFastString "us") (primRepToType rep)
+            return (extendRho rho x (MultiVal [StgVarArg x']), [x'])
+      | otherwise
+      -> return (rho, [x])
+
+    reps -> do
+      xs <- mkIds (mkFastString "us") (map primRepToType reps)
+      return (extendRho rho x (MultiVal (map StgVarArg xs)), xs)
+
+--------------------------------------------------------------------------------
+
+-- | MultiVal a function argument. Never returns an empty list.
+unariseFunArg :: UnariseEnv -> StgArg -> [StgArg]
+unariseFunArg rho (StgVarArg x) =
+  case lookupVarEnv rho x of
+    Just (MultiVal [])  -> [voidArg]   -- NB: do not remove void args
+    Just (MultiVal as)  -> as
+    Just (UnaryVal arg) -> [arg]
+    Nothing             -> [StgVarArg x]
+unariseFunArg _ arg = [arg]
+
+unariseFunArgs :: UnariseEnv -> [StgArg] -> [StgArg]
+unariseFunArgs = concatMap . unariseFunArg
+
+unariseFunArgBinders :: UnariseEnv -> [Id] -> UniqSM (UnariseEnv, [Id])
+unariseFunArgBinders rho xs = second concat <$> mapAccumLM unariseFunArgBinder rho xs
+
+-- Result list of binders is never empty
+unariseFunArgBinder :: UnariseEnv -> Id -> UniqSM (UnariseEnv, [Id])
+unariseFunArgBinder = unariseArgBinder False
+
+--------------------------------------------------------------------------------
+
+-- | MultiVal a DataCon argument. Returns an empty list when argument is void.
+unariseConArg :: UnariseEnv -> InStgArg -> [OutStgArg]
+unariseConArg rho (StgVarArg x) =
+  case lookupVarEnv rho x of
+    Just (UnaryVal arg) -> [arg]
+    Just (MultiVal as) -> as      -- 'as' can be empty
+    Nothing
+      | isVoidTy (idType x) -> [] -- e.g. C realWorld#
+                                  -- Here realWorld# is not in the envt, but
+                                  -- is a void, and so should be eliminated
+      | otherwise -> [StgVarArg x]
+unariseConArg _ arg@(StgLitArg lit) =
+    ASSERT(not (isVoidTy (literalType lit)))  -- We have no void literals
+    [arg]
+
+unariseConArgs :: UnariseEnv -> [InStgArg] -> [OutStgArg]
+unariseConArgs = concatMap . unariseConArg
+
+unariseConArgBinders :: UnariseEnv -> [Id] -> UniqSM (UnariseEnv, [Id])
+unariseConArgBinders rho xs = second concat <$> mapAccumLM unariseConArgBinder rho xs
+
+-- Different from `unariseFunArgBinder`: result list of binders may be empty.
+-- See DataCon applications case in Note [Post-unarisation invariants].
+unariseConArgBinder :: UnariseEnv -> Id -> UniqSM (UnariseEnv, [Id])
+unariseConArgBinder = unariseArgBinder True
+
+--------------------------------------------------------------------------------
+
+mkIds :: FastString -> [UnaryType] -> UniqSM [Id]
+mkIds fs tys = mapM (mkId fs) tys
+
+mkId :: FastString -> UnaryType -> UniqSM Id
+mkId = mkSysLocalM
+
+isMultiValBndr :: Id -> Bool
+isMultiValBndr id
+  | [_] <- typePrimRep (idType id)
+  = False
+  | otherwise
+  = True
+
+isUnboxedSumBndr :: Id -> Bool
+isUnboxedSumBndr = isUnboxedSumType . idType
+
+isUnboxedTupleBndr :: Id -> Bool
+isUnboxedTupleBndr = isUnboxedTupleType . idType
+
+mkTuple :: [StgArg] -> StgExpr
+mkTuple args = StgConApp (tupleDataCon Unboxed (length args)) args (map stgArgType args)
+
+tagAltTy :: AltType
+tagAltTy = PrimAlt IntRep
+
+tagTy :: Type
+tagTy = intPrimTy
+
+voidArg :: StgArg
+voidArg = StgVarArg voidPrimId
+
+mkDefaultLitAlt :: [StgAlt] -> [StgAlt]
+-- We have an exhauseive list of literal alternatives
+--    1# -> e1
+--    2# -> e2
+-- Since they are exhaustive, we can replace one with DEFAULT, to avoid
+-- generating a final test. Remember, the DEFAULT comes first if it exists.
+mkDefaultLitAlt [] = pprPanic "elimUbxSumExpr.mkDefaultAlt" (text "Empty alts")
+mkDefaultLitAlt alts@((DEFAULT, _, _) : _) = alts
+mkDefaultLitAlt ((LitAlt{}, [], rhs) : alts) = (DEFAULT, [], rhs) : alts
+mkDefaultLitAlt alts = pprPanic "mkDefaultLitAlt" (text "Not a lit alt:" <+> ppr alts)
diff --git a/compiler/GHC/StgToCmm.hs b/compiler/GHC/StgToCmm.hs
index f02d361fa4..10a9dc2c6a 100644
--- a/compiler/GHC/StgToCmm.hs
+++ b/compiler/GHC/StgToCmm.hs
@@ -30,7 +30,7 @@ import Cmm
 import CmmUtils
 import CLabel
 
-import StgSyn
+import GHC.Stg.Syntax
 import DynFlags
 import ErrUtils
 
@@ -38,7 +38,7 @@ import HscTypes
 import CostCentre
 import Id
 import IdInfo
-import RepType
+import GHC.Types.RepType
 import DataCon
 import TyCon
 import Module
@@ -147,7 +147,7 @@ cgTopRhs :: DynFlags -> RecFlag -> Id -> CgStgRhs -> (CgIdInfo, FCode ())
 cgTopRhs dflags _rec bndr (StgRhsCon _cc con args)
   = cgTopRhsCon dflags bndr con (assertNonVoidStgArgs args)
       -- con args are always non-void,
-      -- see Note [Post-unarisation invariants] in UnariseStg
+      -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
 
 cgTopRhs dflags rec bndr (StgRhsClosure fvs cc upd_flag args body)
   = ASSERT(isEmptyDVarSet fvs)    -- There should be no free variables
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index d373b79d0c..3eeb575da7 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -36,7 +36,7 @@ import Cmm
 import CmmInfo
 import CmmUtils
 import CLabel
-import StgSyn
+import GHC.Stg.Syntax
 import CostCentre
 import Id
 import IdInfo
@@ -206,7 +206,7 @@ cgRhs id (StgRhsCon cc con args)
   = withNewTickyCounterCon (idName id) $
     buildDynCon id True cc con (assertNonVoidStgArgs args)
       -- con args are always non-void,
-      -- see Note [Post-unarisation invariants] in UnariseStg
+      -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
 
 {- See Note [GC recovery] in compiler/GHC.StgToCmm/Closure.hs -}
 cgRhs id (StgRhsClosure fvs cc upd_flag args body)
@@ -275,7 +275,7 @@ mkRhsClosure    dflags bndr _cc
 
   , let (_, _, params_w_offsets) = mkVirtConstrOffsets dflags (addIdReps (assertNonVoidIds params))
                                    -- pattern binders are always non-void,
-                                   -- see Note [Post-unarisation invariants] in UnariseStg
+                                   -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
   , Just the_offset <- assocMaybe params_w_offsets (NonVoid selectee)
 
   , let offset_into_int = bytesToWordsRoundUp dflags the_offset
diff --git a/compiler/GHC/StgToCmm/Bind.hs-boot b/compiler/GHC/StgToCmm/Bind.hs-boot
index d16c34ebd3..6e8b2bdf7a 100644
--- a/compiler/GHC/StgToCmm/Bind.hs-boot
+++ b/compiler/GHC/StgToCmm/Bind.hs-boot
@@ -1,6 +1,6 @@
 module GHC.StgToCmm.Bind where
 
 import GHC.StgToCmm.Monad( FCode )
-import StgSyn( CgStgBinding )
+import GHC.Stg.Syntax( CgStgBinding )
 
 cgBind :: CgStgBinding -> FCode ()
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
index 07d3edab9a..df8cb046c4 100644
--- a/compiler/GHC/StgToCmm/Closure.hs
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -66,7 +66,7 @@ module GHC.StgToCmm.Closure (
 
 import GhcPrelude
 
-import StgSyn
+import GHC.Stg.Syntax
 import SMRep
 import Cmm
 import PprCmmExpr() -- For Outputable instances
@@ -82,7 +82,7 @@ import Type
 import TyCoRep
 import TcType
 import TyCon
-import RepType
+import GHC.Types.RepType
 import BasicTypes
 import Outputable
 import DynFlags
@@ -142,7 +142,7 @@ nonVoidIds ids = [NonVoid id | id <- ids, not (isVoidTy (idType id))]
 
 -- | Used in places where some invariant ensures that all these Ids are
 -- non-void; e.g. constructor field binders in case expressions.
--- See Note [Post-unarisation invariants] in UnariseStg.
+-- See Note [Post-unarisation invariants] in GHC.Stg.Unarise.
 assertNonVoidIds :: [Id] -> [NonVoid Id]
 assertNonVoidIds ids = ASSERT(not (any (isVoidTy . idType) ids))
                        coerce ids
@@ -152,7 +152,7 @@ nonVoidStgArgs args = [NonVoid arg | arg <- args, not (isVoidTy (stgArgType arg)
 
 -- | Used in places where some invariant ensures that all these arguments are
 -- non-void; e.g. constructor arguments.
--- See Note [Post-unarisation invariants] in UnariseStg.
+-- See Note [Post-unarisation invariants] in GHC.Stg.Unarise.
 assertNonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
 assertNonVoidStgArgs args = ASSERT(not (any (isVoidTy . stgArgType) args))
                             coerce args
@@ -169,7 +169,7 @@ assertNonVoidStgArgs args = ASSERT(not (any (isVoidTy . stgArgType) args))
 -- See Note [Post-unarisation invariants]
 idPrimRep :: Id -> PrimRep
 idPrimRep id = typePrimRep1 (idType id)
-    -- See also Note [VoidRep] in RepType
+    -- See also Note [VoidRep] in GHC.Types.RepType
 
 -- | Assumes that Ids have one PrimRep, which holds after unarisation.
 -- See Note [Post-unarisation invariants]
diff --git a/compiler/GHC/StgToCmm/DataCon.hs b/compiler/GHC/StgToCmm/DataCon.hs
index c7ad444e41..1e929663df 100644
--- a/compiler/GHC/StgToCmm/DataCon.hs
+++ b/compiler/GHC/StgToCmm/DataCon.hs
@@ -19,7 +19,7 @@ module GHC.StgToCmm.DataCon (
 
 import GhcPrelude
 
-import StgSyn
+import GHC.Stg.Syntax
 import CoreSyn  ( AltCon(..) )
 
 import GHC.StgToCmm.Monad
@@ -40,7 +40,7 @@ import DataCon
 import DynFlags
 import FastString
 import Id
-import RepType (countConRepArgs)
+import GHC.Types.RepType (countConRepArgs)
 import Literal
 import PrelInfo
 import Outputable
diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs
index e32c6a1ecb..45b09a3d26 100644
--- a/compiler/GHC/StgToCmm/Env.hs
+++ b/compiler/GHC/StgToCmm/Env.hs
@@ -41,7 +41,7 @@ import Id
 import MkGraph
 import Name
 import Outputable
-import StgSyn
+import GHC.Stg.Syntax
 import Type
 import TysPrim
 import UniqFM
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index 38d35fc031..f39d02839c 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -28,7 +28,7 @@ import GHC.StgToCmm.Ticky
 import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Closure
 
-import StgSyn
+import GHC.Stg.Syntax
 
 import MkGraph
 import BlockId
@@ -42,7 +42,7 @@ import Id
 import PrimOp
 import TyCon
 import Type             ( isUnliftedType )
-import RepType          ( isVoidTy, countConRepArgs )
+import GHC.Types.RepType          ( isVoidTy, countConRepArgs )
 import CostCentre       ( CostCentreStack, currentCCS )
 import Maybes
 import Util
@@ -585,7 +585,7 @@ isSimpleOp (StgPrimCallOp _) _                           = return False
 chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
 -- These are the binders of a case that are assigned by the evaluation of the
 -- scrutinee.
--- They're non-void, see Note [Post-unarisation invariants] in UnariseStg.
+-- They're non-void, see Note [Post-unarisation invariants] in GHC.Stg.Unarise.
 chooseReturnBndrs bndr (PrimAlt _) _alts
   = assertNonVoidIds [bndr]
 
@@ -882,7 +882,7 @@ cgAltRhss gc_plan bndr alts = do
         maybeAltHeapCheck gc_plan $
         do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
                     -- alt binders are always non-void,
-                    -- see Note [Post-unarisation invariants] in UnariseStg
+                    -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
            ; _ <- cgExpr rhs
            ; return con }
   forkAlts (map cg_alt alts)
@@ -910,7 +910,7 @@ cgConApp con stg_args
     do  { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
                                      currentCCS con (assertNonVoidStgArgs stg_args)
                                      -- con args are always non-void,
-                                     -- see Note [Post-unarisation invariants] in UnariseStg
+                                     -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
                 -- The first "con" says that the name bound to this
                 -- closure is "con", which is a bit of a fudge, but
                 -- it only affects profiling (hence the False)
diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
index dacaff41ba..3ef0872c2e 100644
--- a/compiler/GHC/StgToCmm/Foreign.hs
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -20,7 +20,7 @@ module GHC.StgToCmm.Foreign (
 
 import GhcPrelude hiding( succ, (<*>) )
 
-import StgSyn
+import GHC.Stg.Syntax
 import GHC.StgToCmm.Prof (storeCurCCS, ccsType)
 import GHC.StgToCmm.Env
 import GHC.StgToCmm.Monad
@@ -33,7 +33,7 @@ import Cmm
 import CmmUtils
 import MkGraph
 import Type
-import RepType
+import GHC.Types.RepType
 import CLabel
 import SMRep
 import ForeignCall
diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs
index a1f016c13c..d36cad5788 100644
--- a/compiler/GHC/StgToCmm/Heap.hs
+++ b/compiler/GHC/StgToCmm/Heap.hs
@@ -22,7 +22,7 @@ module GHC.StgToCmm.Heap (
 
 import GhcPrelude hiding ((<*>))
 
-import StgSyn
+import GHC.Stg.Syntax
 import CLabel
 import GHC.StgToCmm.Layout
 import GHC.StgToCmm.Utils
diff --git a/compiler/GHC/StgToCmm/Layout.hs b/compiler/GHC/StgToCmm/Layout.hs
index f4834376ed..6d7825eb93 100644
--- a/compiler/GHC/StgToCmm/Layout.hs
+++ b/compiler/GHC/StgToCmm/Layout.hs
@@ -48,7 +48,7 @@ import Cmm
 import CmmUtils
 import CmmInfo
 import CLabel
-import StgSyn
+import GHC.Stg.Syntax
 import Id
 import TyCon             ( PrimRep(..), primRepSizeB )
 import BasicTypes        ( RepArity )
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index 3728c0cac2..85924d984e 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -40,7 +40,7 @@ import GHC.Platform
 import BasicTypes
 import BlockId
 import MkGraph
-import StgSyn
+import GHC.Stg.Syntax
 import Cmm
 import Module   ( rtsUnitId )
 import Type     ( Type, tyConAppTyCon )
diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs
index 7548f3de13..9eeb134cc9 100644
--- a/compiler/GHC/StgToCmm/Ticky.hs
+++ b/compiler/GHC/StgToCmm/Ticky.hs
@@ -111,7 +111,7 @@ import GHC.StgToCmm.Closure
 import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Monad
 
-import StgSyn
+import GHC.Stg.Syntax
 import CmmExpr
 import MkGraph
 import CmmUtils
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index 0b3a8d8b08..34fb93468c 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -75,7 +75,7 @@ import UniqSupply (MonadUnique(..))
 import DynFlags
 import FastString
 import Outputable
-import RepType
+import GHC.Types.RepType
 
 import Data.ByteString (ByteString)
 import qualified Data.ByteString.Char8 as BS8
diff --git a/compiler/GHC/Types/RepType.hs b/compiler/GHC/Types/RepType.hs
new file mode 100644
index 0000000000..cdda659688
--- /dev/null
+++ b/compiler/GHC/Types/RepType.hs
@@ -0,0 +1,533 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleContexts #-}
+
+module GHC.Types.RepType
+  (
+    -- * Code generator views onto Types
+    UnaryType, NvUnaryType, isNvUnaryType,
+    unwrapType,
+
+    -- * Predicates on types
+    isVoidTy,
+
+    -- * Type representation for the code generator
+    typePrimRep, typePrimRep1,
+    runtimeRepPrimRep, typePrimRepArgs,
+    PrimRep(..), primRepToType,
+    countFunRepArgs, countConRepArgs, tyConPrimRep, tyConPrimRep1,
+
+    -- * Unboxed sum representation type
+    ubxSumRepType, layoutUbxSum, typeSlotTy, SlotTy (..),
+    slotPrimRep, primRepSlot
+  ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import BasicTypes (Arity, RepArity)
+import DataCon
+import Outputable
+import PrelNames
+import Coercion
+import TyCon
+import TyCoRep
+import Type
+import Util
+import TysPrim
+import {-# SOURCE #-} TysWiredIn ( anyTypeOfKind )
+
+import Data.List (sort)
+import qualified Data.IntSet as IS
+
+{- **********************************************************************
+*                                                                       *
+                Representation types
+*                                                                       *
+********************************************************************** -}
+
+type NvUnaryType = Type
+type UnaryType   = Type
+     -- Both are always a value type; i.e. its kind is TYPE rr
+     -- for some rr; moreover the rr is never a variable.
+     --
+     --   NvUnaryType : never an unboxed tuple or sum, or void
+     --
+     --   UnaryType   : never an unboxed tuple or sum;
+     --                 can be Void# or (# #)
+
+isNvUnaryType :: Type -> Bool
+isNvUnaryType ty
+  | [_] <- typePrimRep ty
+  = True
+  | otherwise
+  = False
+
+-- INVARIANT: the result list is never empty.
+typePrimRepArgs :: HasDebugCallStack => Type -> [PrimRep]
+typePrimRepArgs ty
+  | [] <- reps
+  = [VoidRep]
+  | otherwise
+  = reps
+  where
+    reps = typePrimRep ty
+
+-- | Gets rid of the stuff that prevents us from understanding the
+-- runtime representation of a type. Including:
+--   1. Casts
+--   2. Newtypes
+--   3. Foralls
+--   4. Synonyms
+-- But not type/data families, because we don't have the envs to hand.
+unwrapType :: Type -> Type
+unwrapType ty
+  | Just (_, unwrapped)
+      <- topNormaliseTypeX stepper mappend inner_ty
+  = unwrapped
+  | otherwise
+  = inner_ty
+  where
+    inner_ty = go ty
+
+    go t | Just t' <- coreView t = go t'
+    go (ForAllTy _ t)            = go t
+    go (CastTy t _)              = go t
+    go t                         = t
+
+     -- cf. Coercion.unwrapNewTypeStepper
+    stepper rec_nts tc tys
+      | Just (ty', _) <- instNewTyCon_maybe tc tys
+      = case checkRecTc rec_nts tc of
+          Just rec_nts' -> NS_Step rec_nts' (go ty') ()
+          Nothing       -> NS_Abort   -- infinite newtypes
+      | otherwise
+      = NS_Done
+
+countFunRepArgs :: Arity -> Type -> RepArity
+countFunRepArgs 0 _
+  = 0
+countFunRepArgs n ty
+  | FunTy _ arg res <- unwrapType ty
+  = length (typePrimRepArgs arg) + countFunRepArgs (n - 1) res
+  | otherwise
+  = pprPanic "countFunRepArgs: arity greater than type can handle" (ppr (n, ty, typePrimRep ty))
+
+countConRepArgs :: DataCon -> RepArity
+countConRepArgs dc = go (dataConRepArity dc) (dataConRepType dc)
+  where
+    go :: Arity -> Type -> RepArity
+    go 0 _
+      = 0
+    go n ty
+      | FunTy _ arg res <- unwrapType ty
+      = length (typePrimRep arg) + go (n - 1) res
+      | otherwise
+      = pprPanic "countConRepArgs: arity greater than type can handle" (ppr (n, ty, typePrimRep ty))
+
+-- | True if the type has zero width.
+isVoidTy :: Type -> Bool
+isVoidTy = null . typePrimRep
+
+
+{- **********************************************************************
+*                                                                       *
+                Unboxed sums
+ See Note [Translating unboxed sums to unboxed tuples] in GHC.Stg.Unarise
+*                                                                       *
+********************************************************************** -}
+
+type SortedSlotTys = [SlotTy]
+
+-- | Given the arguments of a sum type constructor application,
+--   return the unboxed sum rep type.
+--
+-- E.g.
+--
+--   (# Int# | Maybe Int | (# Int#, Float# #) #)
+--
+-- We call `ubxSumRepType [ [IntRep], [LiftedRep], [IntRep, FloatRep] ]`,
+-- which returns [WordSlot, PtrSlot, WordSlot, FloatSlot]
+--
+-- INVARIANT: Result slots are sorted (via Ord SlotTy), except that at the head
+-- of the list we have the slot for the tag.
+ubxSumRepType :: [[PrimRep]] -> [SlotTy]
+ubxSumRepType constrs0
+  -- These first two cases never classify an actual unboxed sum, which always
+  -- has at least two disjuncts. But it could happen if a user writes, e.g.,
+  -- forall (a :: TYPE (SumRep [IntRep])). ...
+  -- which could never be instantiated. We still don't want to panic.
+  | constrs0 `lengthLessThan` 2
+  = [WordSlot]
+
+  | otherwise
+  = let
+      combine_alts :: [SortedSlotTys]  -- slots of constructors
+                   -> SortedSlotTys    -- final slots
+      combine_alts constrs = foldl' merge [] constrs
+
+      merge :: SortedSlotTys -> SortedSlotTys -> SortedSlotTys
+      merge existing_slots []
+        = existing_slots
+      merge [] needed_slots
+        = needed_slots
+      merge (es : ess) (s : ss)
+        | Just s' <- s `fitsIn` es
+        = -- found a slot, use it
+          s' : merge ess ss
+        | s < es
+        = -- we need a new slot and this is the right place for it
+          s : merge (es : ess) ss
+        | otherwise
+        = -- keep searching for a slot
+          es : merge ess (s : ss)
+
+      -- Nesting unboxed tuples and sums is OK, so we need to flatten first.
+      rep :: [PrimRep] -> SortedSlotTys
+      rep ty = sort (map primRepSlot ty)
+
+      sumRep = WordSlot : combine_alts (map rep constrs0)
+               -- WordSlot: for the tag of the sum
+    in
+      sumRep
+
+layoutUbxSum :: SortedSlotTys -- Layout of sum. Does not include tag.
+                              -- We assume that they are in increasing order
+             -> [SlotTy]      -- Slot types of things we want to map to locations in the
+                              -- sum layout
+             -> [Int]         -- Where to map 'things' in the sum layout
+layoutUbxSum sum_slots0 arg_slots0 =
+    go arg_slots0 IS.empty
+  where
+    go :: [SlotTy] -> IS.IntSet -> [Int]
+    go [] _
+      = []
+    go (arg : args) used
+      = let slot_idx = findSlot arg 0 sum_slots0 used
+         in slot_idx : go args (IS.insert slot_idx used)
+
+    findSlot :: SlotTy -> Int -> SortedSlotTys -> IS.IntSet -> Int
+    findSlot arg slot_idx (slot : slots) useds
+      | not (IS.member slot_idx useds)
+      , Just slot == arg `fitsIn` slot
+      = slot_idx
+      | otherwise
+      = findSlot arg (slot_idx + 1) slots useds
+    findSlot _ _ [] _
+      = pprPanic "findSlot" (text "Can't find slot" $$ ppr sum_slots0 $$ ppr arg_slots0)
+
+--------------------------------------------------------------------------------
+
+-- We have 3 kinds of slots:
+--
+--   - Pointer slot: Only shared between actual pointers to Haskell heap (i.e.
+--     boxed objects)
+--
+--   - Word slots: Shared between IntRep, WordRep, Int64Rep, Word64Rep, AddrRep.
+--
+--   - Float slots: Shared between floating point types.
+--
+--   - Void slots: Shared between void types. Not used in sums.
+--
+-- TODO(michalt): We should probably introduce `SlotTy`s for 8-/16-/32-bit
+-- values, so that we can pack things more tightly.
+data SlotTy = PtrSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot
+  deriving (Eq, Ord)
+    -- Constructor order is important! If slot A could fit into slot B
+    -- then slot A must occur first.  E.g.  FloatSlot before DoubleSlot
+    --
+    -- We are assuming that WordSlot is smaller than or equal to Word64Slot
+    -- (would not be true on a 128-bit machine)
+
+instance Outputable SlotTy where
+  ppr PtrSlot    = text "PtrSlot"
+  ppr Word64Slot = text "Word64Slot"
+  ppr WordSlot   = text "WordSlot"
+  ppr DoubleSlot = text "DoubleSlot"
+  ppr FloatSlot  = text "FloatSlot"
+
+typeSlotTy :: UnaryType -> Maybe SlotTy
+typeSlotTy ty
+  | isVoidTy ty
+  = Nothing
+  | otherwise
+  = Just (primRepSlot (typePrimRep1 ty))
+
+primRepSlot :: PrimRep -> SlotTy
+primRepSlot VoidRep     = pprPanic "primRepSlot" (text "No slot for VoidRep")
+primRepSlot LiftedRep   = PtrSlot
+primRepSlot UnliftedRep = PtrSlot
+primRepSlot IntRep      = WordSlot
+primRepSlot Int8Rep     = WordSlot
+primRepSlot Int16Rep    = WordSlot
+primRepSlot Int32Rep    = WordSlot
+primRepSlot Int64Rep    = Word64Slot
+primRepSlot WordRep     = WordSlot
+primRepSlot Word8Rep    = WordSlot
+primRepSlot Word16Rep   = WordSlot
+primRepSlot Word32Rep   = WordSlot
+primRepSlot Word64Rep   = Word64Slot
+primRepSlot AddrRep     = WordSlot
+primRepSlot FloatRep    = FloatSlot
+primRepSlot DoubleRep   = DoubleSlot
+primRepSlot VecRep{}    = pprPanic "primRepSlot" (text "No slot for VecRep")
+
+slotPrimRep :: SlotTy -> PrimRep
+slotPrimRep PtrSlot     = LiftedRep   -- choice between lifted & unlifted seems arbitrary
+slotPrimRep Word64Slot  = Word64Rep
+slotPrimRep WordSlot    = WordRep
+slotPrimRep DoubleSlot  = DoubleRep
+slotPrimRep FloatSlot   = FloatRep
+
+-- | Returns the bigger type if one fits into the other. (commutative)
+fitsIn :: SlotTy -> SlotTy -> Maybe SlotTy
+fitsIn ty1 ty2
+  | isWordSlot ty1 && isWordSlot ty2
+  = Just (max ty1 ty2)
+  | isFloatSlot ty1 && isFloatSlot ty2
+  = Just (max ty1 ty2)
+  | isPtrSlot ty1 && isPtrSlot ty2
+  = Just PtrSlot
+  | otherwise
+  = Nothing
+  where
+    isPtrSlot PtrSlot = True
+    isPtrSlot _       = False
+
+    isWordSlot Word64Slot = True
+    isWordSlot WordSlot   = True
+    isWordSlot _          = False
+
+    isFloatSlot DoubleSlot = True
+    isFloatSlot FloatSlot  = True
+    isFloatSlot _          = False
+
+
+{- **********************************************************************
+*                                                                       *
+                   PrimRep
+*                                                                       *
+*************************************************************************
+
+Note [RuntimeRep and PrimRep]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This Note describes the relationship between GHC.Types.RuntimeRep
+(of levity-polymorphism fame) and TyCon.PrimRep, as these types
+are closely related.
+
+A "primitive entity" is one that can be
+ * stored in one register
+ * manipulated with one machine instruction
+
+
+Examples include:
+ * a 32-bit integer
+ * a 32-bit float
+ * a 64-bit float
+ * a machine address (heap pointer), etc.
+ * a quad-float (on a machine with SIMD register and instructions)
+ * ...etc...
+
+The "representation or a primitive entity" specifies what kind of register is
+needed and how many bits are required. The data type TyCon.PrimRep
+enumerates all the possibilities.
+
+data PrimRep
+  = VoidRep
+  | LiftedRep     -- ^ Lifted pointer
+  | UnliftedRep   -- ^ Unlifted pointer
+  | Int8Rep       -- ^ Signed, 8-bit value
+  | Int16Rep      -- ^ Signed, 16-bit value
+  ...etc...
+  | VecRep Int PrimElemRep  -- ^ SIMD fixed-width vector
+
+The Haskell source language is a bit more flexible: a single value may need multiple PrimReps.
+For example
+
+  utup :: (# Int, Int #) -> Bool
+  utup x = ...
+
+Here x :: (# Int, Int #), and that takes two registers, and two instructions to move around.
+Unboxed sums are similar.
+
+Every Haskell expression e has a type ty, whose kind is of form TYPE rep
+   e :: ty :: TYPE rep
+where rep :: RuntimeRep. Here rep describes the runtime representation for e's value,
+but RuntimeRep has some extra cases:
+
+data RuntimeRep = VecRep VecCount VecElem   -- ^ a SIMD vector type
+                | TupleRep [RuntimeRep]     -- ^ An unboxed tuple of the given reps
+                | SumRep [RuntimeRep]       -- ^ An unboxed sum of the given reps
+                | LiftedRep       -- ^ lifted; represented by a pointer
+                | UnliftedRep     -- ^ unlifted; represented by a pointer
+                | IntRep          -- ^ signed, word-sized value
+                ...etc...
+
+It's all in 1-1 correspondence with PrimRep except for TupleRep and SumRep,
+which describe unboxed products and sums respectively. RuntimeRep is defined
+in the library ghc-prim:GHC.Types. It is also "wired-in" to GHC: see
+TysWiredIn.runtimeRepTyCon. The unarisation pass, in GHC.Stg.Unarise, transforms the
+program, so that that every variable has a type that has a PrimRep. For
+example, unarisation transforms our utup function above, to take two Int
+arguments instead of one (# Int, Int #) argument.
+
+See also Note [Getting from RuntimeRep to PrimRep] and Note [VoidRep].
+
+Note [VoidRep]
+~~~~~~~~~~~~~~
+PrimRep contains a constructor VoidRep, while RuntimeRep does
+not. Yet representations are often characterised by a list of PrimReps,
+where a void would be denoted as []. (See also Note [RuntimeRep and PrimRep].)
+
+However, after the unariser, all identifiers have exactly one PrimRep, but
+void arguments still exist. Thus, PrimRep includes VoidRep to describe these
+binders. Perhaps post-unariser representations (which need VoidRep) should be
+a different type than pre-unariser representations (which use a list and do
+not need VoidRep), but we have what we have.
+
+RuntimeRep instead uses TupleRep '[] to denote a void argument. When
+converting a TupleRep '[] into a list of PrimReps, we get an empty list.
+
+Note [Getting from RuntimeRep to PrimRep]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+General info on RuntimeRep and PrimRep is in Note [RuntimeRep and PrimRep].
+
+How do we get from an Id to the the list or PrimReps used to store it? We get
+the Id's type ty (using idType), then ty's kind ki (using typeKind), then
+pattern-match on ki to extract rep (in kindPrimRep), then extract the PrimRep
+from the RuntimeRep (in runtimeRepPrimRep).
+
+We now must convert the RuntimeRep to a list of PrimReps. Let's look at two
+examples:
+
+  1. x :: Int#
+  2. y :: (# Int, Word# #)
+
+With these types, we can extract these kinds:
+
+  1. Int# :: TYPE IntRep
+  2. (# Int, Word# #) :: TYPE (TupleRep [LiftedRep, WordRep])
+
+In the end, we will get these PrimReps:
+
+  1. [IntRep]
+  2. [LiftedRep, WordRep]
+
+It would thus seem that we should have a function somewhere of
+type `RuntimeRep -> [PrimRep]`. This doesn't work though: when we
+look at the argument of TYPE, we get something of type Type (of course).
+RuntimeRep exists in the user's program, but not in GHC as such.
+Instead, we must decompose the Type of kind RuntimeRep into tycons and
+extract the PrimReps from the TyCons. This is what runtimeRepPrimRep does:
+it takes a Type and returns a [PrimRep]
+
+runtimeRepPrimRep works by using tyConRuntimeRepInfo. That function
+should be passed the TyCon produced by promoting one of the constructors
+of RuntimeRep into type-level data. The RuntimeRep promoted datacons are
+associated with a RuntimeRepInfo (stored directly in the PromotedDataCon
+constructor of TyCon). This pairing happens in TysWiredIn. A RuntimeRepInfo
+usually(*) contains a function from [Type] to [PrimRep]: the [Type] are
+the arguments to the promoted datacon. These arguments are necessary
+for the TupleRep and SumRep constructors, so that this process can recur,
+producing a flattened list of PrimReps. Calling this extracted function
+happens in runtimeRepPrimRep; the functions themselves are defined in
+tupleRepDataCon and sumRepDataCon, both in TysWiredIn.
+
+The (*) above is to support vector representations. RuntimeRep refers
+to VecCount and VecElem, whose promoted datacons have nuggets of information
+related to vectors; these form the other alternatives for RuntimeRepInfo.
+
+Returning to our examples, the Types we get (after stripping off TYPE) are
+
+  1. TyConApp (PromotedDataCon "IntRep") []
+  2. TyConApp (PromotedDataCon "TupleRep")
+              [TyConApp (PromotedDataCon ":")
+                        [ TyConApp (AlgTyCon "RuntimeRep") []
+                        , TyConApp (PromotedDataCon "LiftedRep") []
+                        , TyConApp (PromotedDataCon ":")
+                                   [ TyConApp (AlgTyCon "RuntimeRep") []
+                                   , TyConApp (PromotedDataCon "WordRep") []
+                                   , TyConApp (PromotedDataCon "'[]")
+                                              [TyConApp (AlgTyCon "RuntimeRep") []]]]]
+
+runtimeRepPrimRep calls tyConRuntimeRepInfo on (PromotedDataCon "IntRep"), resp.
+(PromotedDataCon "TupleRep"), extracting a function that will produce the PrimReps.
+In example 1, this function is passed an empty list (the empty list of args to IntRep)
+and returns the PrimRep IntRep. (See the definition of runtimeRepSimpleDataCons in
+TysWiredIn and its helper function mk_runtime_rep_dc.) Example 2 passes the promoted
+list as the one argument to the extracted function. The extracted function is defined
+as prim_rep_fun within tupleRepDataCon in TysWiredIn. It takes one argument, decomposes
+the promoted list (with extractPromotedList), and then recurs back to runtimeRepPrimRep
+to process the LiftedRep and WordRep, concatentating the results.
+
+-}
+
+-- | Discovers the primitive representation of a 'Type'. Returns
+-- a list of 'PrimRep': it's a list because of the possibility of
+-- no runtime representation (void) or multiple (unboxed tuple/sum)
+-- See also Note [Getting from RuntimeRep to PrimRep]
+typePrimRep :: HasDebugCallStack => Type -> [PrimRep]
+typePrimRep ty = kindPrimRep (text "typePrimRep" <+>
+                              parens (ppr ty <+> dcolon <+> ppr (typeKind ty)))
+                             (typeKind ty)
+
+-- | Like 'typePrimRep', but assumes that there is precisely one 'PrimRep' output;
+-- an empty list of PrimReps becomes a VoidRep.
+-- This assumption holds after unarise, see Note [Post-unarisation invariants].
+-- Before unarise it may or may not hold.
+-- See also Note [RuntimeRep and PrimRep] and Note [VoidRep]
+typePrimRep1 :: HasDebugCallStack => UnaryType -> PrimRep
+typePrimRep1 ty = case typePrimRep ty of
+  []    -> VoidRep
+  [rep] -> rep
+  _     -> pprPanic "typePrimRep1" (ppr ty $$ ppr (typePrimRep ty))
+
+-- | Find the runtime representation of a 'TyCon'. Defined here to
+-- avoid module loops. Returns a list of the register shapes necessary.
+-- See also Note [Getting from RuntimeRep to PrimRep]
+tyConPrimRep :: HasDebugCallStack => TyCon -> [PrimRep]
+tyConPrimRep tc
+  = kindPrimRep (text "kindRep tc" <+> ppr tc $$ ppr res_kind)
+                res_kind
+  where
+    res_kind = tyConResKind tc
+
+-- | Like 'tyConPrimRep', but assumed that there is precisely zero or
+-- one 'PrimRep' output
+-- See also Note [Getting from RuntimeRep to PrimRep] and Note [VoidRep]
+tyConPrimRep1 :: HasDebugCallStack => TyCon -> PrimRep
+tyConPrimRep1 tc = case tyConPrimRep tc of
+  []    -> VoidRep
+  [rep] -> rep
+  _     -> pprPanic "tyConPrimRep1" (ppr tc $$ ppr (tyConPrimRep tc))
+
+-- | Take a kind (of shape @TYPE rr@) and produce the 'PrimRep's
+-- of values of types of this kind.
+-- See also Note [Getting from RuntimeRep to PrimRep]
+kindPrimRep :: HasDebugCallStack => SDoc -> Kind -> [PrimRep]
+kindPrimRep doc ki
+  | Just ki' <- coreView ki
+  = kindPrimRep doc ki'
+kindPrimRep doc (TyConApp typ [runtime_rep])
+  = ASSERT( typ `hasKey` tYPETyConKey )
+    runtimeRepPrimRep doc runtime_rep
+kindPrimRep doc ki
+  = pprPanic "kindPrimRep" (ppr ki $$ doc)
+
+-- | Take a type of kind RuntimeRep and extract the list of 'PrimRep' that
+-- it encodes. See also Note [Getting from RuntimeRep to PrimRep]
+runtimeRepPrimRep :: HasDebugCallStack => SDoc -> Type -> [PrimRep]
+runtimeRepPrimRep doc rr_ty
+  | Just rr_ty' <- coreView rr_ty
+  = runtimeRepPrimRep doc rr_ty'
+  | TyConApp rr_dc args <- rr_ty
+  , RuntimeRep fun <- tyConRuntimeRepInfo rr_dc
+  = fun args
+  | otherwise
+  = pprPanic "runtimeRepPrimRep" (doc $$ ppr rr_ty)
+
+-- | Convert a PrimRep back to a Type. Used only in the unariser to give types
+-- to fresh Ids. Really, only the type's representation matters.
+-- See also Note [RuntimeRep and PrimRep]
+primRepToType :: PrimRep -> Type
+primRepToType = anyTypeOfKind . tYPE . primRepToRuntimeRep