Tag inference work.

This does three major things: * Enforce the invariant that all strict fields must contain tagged pointers. * Try to predict the tag on bindings in order to omit tag checks. * Allows functions to pass arguments unlifted (call-by-value). The former is "simply" achieved by wrapping any constructor allocations with a case which will evaluate the respective strict bindings. The prediction is done by a new data flow analysis based on the STG representation of a program. This also helps us to avoid generating redudant cases for the above invariant. StrictWorkers are created by W/W directly and SpecConstr indirectly. See the Note [Strict Worker Ids] Other minor changes: * Add StgUtil module containing a few functions needed by, but not specific to the tag analysis. ------------------------- Metric Decrease: T12545 T18698b T18140 T18923 LargeRecord Metric Increase: LargeRecord ManyAlternatives ManyConstructors T10421 T12425 T12707 T13035 T13056 T13253 T13253-spj T13379 T15164 T18282 T18304 T18698a T1969 T20049 T3294 T4801 T5321FD T5321Fun T783 T9233 T9675 T9961 T19695 WWRec -------------------------
author: Andreas Klebinger <klebinger.andreas@gmx.at> 2021-05-10 22:06:51 +0200
committer: Matthew Pickering <matthewtpickering@gmail.com> 2022-02-12 13:59:41 +0000
commit: 0e93023eef174262310737004d398bc7a606939a (patch)
tree: 091a34f78b7911d8b38f414ff8eab90796581c47
parent: 90a26f8b0dd99129d3fd7fe28127cb69abd46328 (diff)
download: haskell-0e93023eef174262310737004d398bc7a606939a.tar.gz
88 files changed, 3093 insertions, 310 deletions
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
index 3acace8be2..dd7e1f14f5 100644
--- a/compiler/GHC/Cmm/CLabel.hs
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -25,6 +25,7 @@ module GHC.Cmm.CLabel (
         mkInfoTableLabel,
         mkEntryLabel,
         mkRednCountsLabel,
+        mkTagHitLabel,
         mkConInfoTableLabel,
         mkApEntryLabel,
         mkApInfoTableLabel,
@@ -301,7 +302,7 @@ isIdLabel _ = False
 -- Used in SRT analysis. See Note [Ticky labels in SRT analysis] in
 -- GHC.Cmm.Info.Build.
 isTickyLabel :: CLabel -> Bool
-isTickyLabel (IdLabel _ _ RednCounts) = True
+isTickyLabel (IdLabel _ _ IdTickyInfo{}) = True
 isTickyLabel _ = False
 
 -- | Indicate if "GHC.CmmToC" has to generate an extern declaration for the
@@ -447,6 +448,26 @@ pprDebugCLabel platform lbl = pprCLabel platform AsmStyle lbl <> parens extra
 
          _  -> text "other CLabel"
 
+-- Dynamic ticky info for the id.
+data TickyIdInfo
+  = TickyRednCounts           -- ^ Used for dynamic allocations
+  | TickyInferedTag !Unique    -- ^ Used to track dynamic hits of tag inference.
+  deriving (Eq,Show)
+
+instance Outputable TickyIdInfo where
+    ppr TickyRednCounts = text "ct_rdn"
+    ppr (TickyInferedTag unique) = text "ct_tag[" <> ppr unique <> char ']'
+
+-- | Don't depend on this if you need determinism.
+-- No determinism in the ncg backend, so we use the unique for Ord.
+-- Even if it pains me slightly.
+instance Ord TickyIdInfo where
+    compare TickyRednCounts TickyRednCounts = EQ
+    compare TickyRednCounts _ = LT
+    compare _ TickyRednCounts = GT
+    compare (TickyInferedTag unique1) (TickyInferedTag unique2) =
+      nonDetCmpUnique unique1 unique2
+
 
 data IdLabelInfo
   = Closure             -- ^ Label for closure
@@ -457,7 +478,7 @@ data IdLabelInfo
   | LocalInfoTable      -- ^ Like InfoTable but not externally visible
   | LocalEntry          -- ^ Like Entry but not externally visible
 
-  | RednCounts          -- ^ Label of place to keep Ticky-ticky  info for this Id
+  | IdTickyInfo !TickyIdInfo -- ^ Label of place to keep Ticky-ticky hit info for this Id
 
   | ConEntry ConInfoTableLocation
   -- ^ Constructor entry point, when `-fdistinct-info-tables` is enabled then
@@ -504,12 +525,12 @@ instance Outputable IdLabelInfo where
   ppr LocalInfoTable  = text "LocalInfoTable"
   ppr LocalEntry      = text "LocalEntry"
 
-  ppr RednCounts      = text "RednCounts"
   ppr (ConEntry mn) = text "ConEntry" <+> ppr mn
   ppr (ConInfoTable mn) = text "ConInfoTable" <+> ppr mn
   ppr ClosureTable = text "ClosureTable"
   ppr Bytes        = text "Bytes"
   ppr BlockInfoTable  = text "BlockInfoTable"
+  ppr (IdTickyInfo info) = text "IdTickyInfo" <+> ppr info
 
 
 data RtsLabelInfo
@@ -559,8 +580,12 @@ data DynamicLinkerLabelInfo
 mkSRTLabel     :: Unique -> CLabel
 mkSRTLabel u = SRTLabel u
 
+-- See Note [ticky for LNE]
 mkRednCountsLabel :: Name -> CLabel
-mkRednCountsLabel name = IdLabel name NoCafRefs RednCounts  -- Note [ticky for LNE]
+mkRednCountsLabel name = IdLabel name NoCafRefs (IdTickyInfo TickyRednCounts)
+
+mkTagHitLabel :: Name -> Unique -> CLabel
+mkTagHitLabel name !uniq = IdLabel name NoCafRefs (IdTickyInfo (TickyInferedTag uniq))
 
 -- These have local & (possibly) external variants:
 mkLocalClosureLabel      :: Name -> CafInfo -> CLabel
@@ -892,7 +917,7 @@ hasIdLabelInfo _ = Nothing
 -- -----------------------------------------------------------------------------
 -- Does a CLabel's referent itself refer to a CAF?
 hasCAF :: CLabel -> Bool
-hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE]
+hasCAF (IdLabel _ _ (IdTickyInfo TickyRednCounts)) = False -- See Note [ticky for LNE]
 hasCAF (IdLabel _ MayHaveCafRefs _) = True
 hasCAF _                            = False
 
@@ -1146,7 +1171,7 @@ idInfoLabelType info =
     Closure       -> GcPtrLabel
     ConInfoTable {} -> DataLabel
     ClosureTable  -> DataLabel
-    RednCounts    -> DataLabel
+    IdTickyInfo{} -> DataLabel
     Bytes         -> DataLabel
     _             -> CodeLabel
 
@@ -1503,7 +1528,10 @@ ppIdFlavor x = pp_cSEP <> case x of
    Entry            -> text "entry"
    LocalEntry       -> text "entry"
    Slow             -> text "slow"
-   RednCounts       -> text "ct"
+   IdTickyInfo TickyRednCounts
+      -> text "ct"
+   IdTickyInfo (TickyInferedTag unique)
+      -> text "ct_inf_tag" <> char '_' <> ppr unique
    ConEntry loc      ->
       case loc of
         DefinitionSite -> text "con_entry"
diff --git a/compiler/GHC/Cmm/Liveness.hs b/compiler/GHC/Cmm/Liveness.hs
index f047ea4367..a1526be099 100644
--- a/compiler/GHC/Cmm/Liveness.hs
+++ b/compiler/GHC/Cmm/Liveness.hs
@@ -71,6 +71,8 @@ cmmGlobalLiveness platform graph =
     analyzeCmmBwd liveLattice (xferLive platform) graph mapEmpty
 
 -- | On entry to the procedure, there had better not be any LocalReg's live-in.
+-- If you see this error it most likely means you are trying to use a variable
+-- without it being defined in the given scope.
 noLiveOnEntry :: BlockId -> CmmLive LocalReg -> a -> a
 noLiveOnEntry bid in_fact x =
   if nullRegSet in_fact then x
diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs
index 9e9566b334..2060be5bda 100644
--- a/compiler/GHC/Cmm/Utils.hs
+++ b/compiler/GHC/Cmm/Utils.hs
@@ -48,7 +48,7 @@ module GHC.Cmm.Utils(
         currentTSOExpr, currentNurseryExpr, cccsExpr,
 
         -- Tagging
-        cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged,
+        cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, cmmIsNotTagged,
         cmmConstrTag1, mAX_PTR_TAG, tAG_MASK,
 
         -- Overlap and usage
@@ -447,13 +447,14 @@ cmmPointerMask platform = mkIntExpr platform (complement (tAG_MASK platform))
 
 -- Used to untag a possibly tagged pointer
 -- A static label need not be untagged
-cmmUntag, cmmIsTagged, cmmConstrTag1 :: Platform -> CmmExpr -> CmmExpr
+cmmUntag, cmmIsTagged, cmmIsNotTagged, cmmConstrTag1 :: Platform -> CmmExpr -> CmmExpr
 cmmUntag _ e@(CmmLit (CmmLabel _)) = e
 -- Default case
 cmmUntag platform e = cmmAndWord platform e (cmmPointerMask platform)
 
--- Test if a closure pointer is untagged
+-- Test if a closure pointer is untagged/tagged.
 cmmIsTagged platform e = cmmNeWord platform (cmmAndWord platform e (cmmTagMask platform)) (zeroExpr platform)
+cmmIsNotTagged platform e = cmmEqWord platform (cmmAndWord platform e (cmmTagMask platform)) (zeroExpr platform)
 
 -- Get constructor tag, but one based.
 cmmConstrTag1 platform e = cmmAndWord platform e (cmmTagMask platform)
diff --git a/compiler/GHC/Core/DataCon.hs b/compiler/GHC/Core/DataCon.hs
index feca5d3754..07419b9c5c 100644
--- a/compiler/GHC/Core/DataCon.hs
+++ b/compiler/GHC/Core/DataCon.hs
@@ -58,7 +58,7 @@ module GHC.Core.DataCon (
         isUnboxedSumDataCon,
         isVanillaDataCon, isNewDataCon, classDataCon, dataConCannotMatch,
         dataConUserTyVarsArePermuted,
-        isBanged, isMarkedStrict, eqHsBang, isSrcStrict, isSrcUnpacked,
+        isBanged, isMarkedStrict, cbvFromStrictMark, eqHsBang, isSrcStrict, isSrcUnpacked,
         specialPromotedDc,
 
         -- ** Promotion related functions
@@ -745,9 +745,10 @@ data SrcUnpackedness = SrcUnpack -- ^ {-# UNPACK #-} specified
 
 
 -------------------------
--- StrictnessMark is internal only, used to indicate strictness
+-- StrictnessMark is used to indicate strictness
 -- of the DataCon *worker* fields
 data StrictnessMark = MarkedStrict | NotMarkedStrict
+    deriving Eq
 
 -- | An 'EqSpec' is a tyvar/type pair representing an equality made in
 -- rejigging a GADT constructor
@@ -944,6 +945,16 @@ instance Outputable StrictnessMark where
     ppr MarkedStrict    = text "!"
     ppr NotMarkedStrict = empty
 
+instance Binary StrictnessMark where
+    put_ bh NotMarkedStrict = putByte bh 0
+    put_ bh MarkedStrict    = putByte bh 1
+    get bh =
+      do h <- getByte bh
+         case h of
+           0 -> return NotMarkedStrict
+           1 -> return MarkedStrict
+           _ -> panic "Invalid binary format"
+
 instance Binary SrcStrictness where
     put_ bh SrcLazy     = putByte bh 0
     put_ bh SrcStrict   = putByte bh 1
@@ -994,6 +1005,11 @@ isMarkedStrict :: StrictnessMark -> Bool
 isMarkedStrict NotMarkedStrict = False
 isMarkedStrict _               = True   -- All others are strict
 
+cbvFromStrictMark :: StrictnessMark -> CbvMark
+cbvFromStrictMark NotMarkedStrict = NotMarkedCbv
+cbvFromStrictMark MarkedStrict = MarkedCbv
+
+
 {- *********************************************************************
 *                                                                      *
 \subsection{Construction}
diff --git a/compiler/GHC/Core/Opt/CprAnal.hs b/compiler/GHC/Core/Opt/CprAnal.hs
index 65468cd037..64551f9498 100644
--- a/compiler/GHC/Core/Opt/CprAnal.hs
+++ b/compiler/GHC/Core/Opt/CprAnal.hs
@@ -179,7 +179,7 @@ cprAnalProgram logger fam_envs binds = do
   let env            = emptyAnalEnv fam_envs
   let binds_plus_cpr = snd $ mapAccumL cprAnalTopBind env binds
   putDumpFileMaybe logger Opt_D_dump_cpr_signatures "Cpr signatures" FormatText $
-    dumpIdInfoOfProgram (ppr . cprSigInfo) binds_plus_cpr
+    dumpIdInfoOfProgram False (ppr . cprSigInfo) binds_plus_cpr
   -- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Opt.DmdAnal
   seqBinds binds_plus_cpr `seq` return binds_plus_cpr
 
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs
index e6b404ff61..cd58ff89d7 100644
--- a/compiler/GHC/Core/Opt/DmdAnal.hs
+++ b/compiler/GHC/Core/Opt/DmdAnal.hs
@@ -1466,7 +1466,7 @@ finaliseArgBoxities env fn arity rhs
 
     go_arg :: Budgets -> (Type,StrictnessMark,Demand) -> (Budgets, Demand)
     go_arg bg@(MkB bg_top bg_inner) (ty, str_mark, dmd@(n :* _))
-      = case wantToUnboxArg fam_envs ty dmd of
+      = case wantToUnboxArg False fam_envs ty dmd of
           DropAbsent   -> (bg,                      dmd)
           StopUnboxing -> (MkB (bg_top-1) bg_inner, trimBoxity dmd)
 
@@ -1487,6 +1487,7 @@ finaliseArgBoxities env fn arity rhs
                   = (bg_inner', dmd')
                   | otherwise
                   = (bg_inner, trimBoxity dmd)
+          Unlift -> panic "No unlifting in DmdAnal"
 
     add_demands :: [Demand] -> CoreExpr -> CoreExpr
     -- Attach the demands to the outer lambdas of this expression
@@ -1509,7 +1510,7 @@ finaliseLetBoxity env ty dmd
   = go ty NotMarkedStrict dmd
   where
     go ty mark dmd@(n :* _) =
-      case wantToUnboxArg env ty dmd of
+      case wantToUnboxArg False env ty dmd of
         DropAbsent   -> dmd
         StopUnboxing -> trimBoxity dmd
         Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds
@@ -1520,6 +1521,7 @@ finaliseLetBoxity env ty dmd
           -> n :* (mkProd Unboxed $! dmds')
           | otherwise
           -> trimBoxity dmd
+        Unlift -> panic "No unlifting in DmdAnal"
 
 
 {- *********************************************************************
diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs
index 741552f815..276bfee45d 100644
--- a/compiler/GHC/Core/Opt/Pipeline.hs
+++ b/compiler/GHC/Core/Opt/Pipeline.hs
@@ -1079,6 +1079,6 @@ dmdAnal logger dflags fam_envs rules binds = do
                }
       binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds
   Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $
-    dumpIdInfoOfProgram (ppr . zapDmdEnvSig . dmdSigInfo) binds_plus_dmds
+    dumpIdInfoOfProgram (hasPprDebug dflags) (ppr . zapDmdEnvSig . dmdSigInfo) binds_plus_dmds
   -- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Opt.DmdAnal
   seqBinds binds_plus_dmds `seq` return binds_plus_dmds
diff --git a/compiler/GHC/Core/Opt/Simplify/Monad.hs b/compiler/GHC/Core/Opt/Simplify/Monad.hs
index c730a3e981..83d27f4fe5 100644
--- a/compiler/GHC/Core/Opt/Simplify/Monad.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Monad.hs
@@ -215,6 +215,7 @@ newId :: FastString -> Mult -> Type -> SimplM Id
 newId fs w ty = do uniq <- getUniqueM
                    return (mkSysLocalOrCoVar fs uniq w ty)
 
+-- | Make a join id with given type and arity but without call-by-value annotations.
 newJoinId :: [Var] -> Type -> SimplM Id
 newJoinId bndrs body_ty
   = do { uniq <- getUniqueM
@@ -223,7 +224,7 @@ newJoinId bndrs body_ty
              arity      = count isId bndrs
              -- arity: See Note [Invariants on join points] invariant 2b, in GHC.Core
              join_arity = length bndrs
-             details    = JoinId join_arity
+             details    = JoinId join_arity Nothing
              id_info    = vanillaIdInfo `setArityInfo` arity
 --                                        `setOccInfo` strongLoopBreaker
 
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index afd8afc5ea..14fe9bec00 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -656,6 +656,11 @@ containers package we have a merge function with this specialization:
 
 We give sc_s5lZ and sc_s5m0 a evaluated unfolding since they come out of
 strict field fields in the Bin constructor.
+This is especially important since tag inference can then use this
+information to adjust the calling convention of
+`$wmerge0_s4UK` to enforce arguments being passed fully evaluated+tagged.
+See Note [Tag Inference], Note [Strict Worker Ids] for more information on
+how we can take advantage of this.
 
 -----------------------------------------------------
                 Stuff not yet handled
@@ -1771,11 +1776,15 @@ spec_one env fn arg_bndrs body (call_pat, rule_number)
                   -- Annotate the variables with the strictness information from
                   -- the function (see Note [Strictness information in worker binders])
 
-              (spec_lam_args, spec_call_args) = mkWorkerArgs fn False
-                                                    spec_lam_args1
+              (spec_lam_args, spec_call_args,_) = mkWorkerArgs fn False
+                                                    spec_lam_args1 []
                                                     spec_body_ty
                 -- mkWorkerArgs: usual w/w hack to avoid generating
                 -- a spec_rhs of unlifted type and no args.
+                -- Unlike W/W we don't turn functions into strict workers
+                -- immediately here instead letting tidy handle this.
+                -- For this reason we can ignore the cbv marks.
+                -- See Note [Strict Worker Ids]. See Note [Tag Inference].
 
               spec_id    = mkLocalId spec_name Many
                                      (mkLamTypes spec_lam_args spec_body_ty)
diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs
index 3e4770a997..f07e8dde37 100644
--- a/compiler/GHC/Core/Opt/WorkWrap.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap.hs
@@ -763,11 +763,12 @@ splitFun ww_opts fn_id rhs
 
 mkWWBindPair :: WwOpts -> Id -> IdInfo
              -> [Var] -> CoreExpr -> Unique -> Divergence
-             -> ([Demand], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr)
+             -> ([Demand],[CbvMark], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr)
              -> [(Id, CoreExpr)]
 mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
-             (work_demands, join_arity, wrap_fn, work_fn)
-  = [(work_id, work_rhs), (wrap_id, wrap_rhs)]
+             (work_demands, cbv_marks :: [CbvMark], join_arity, wrap_fn, work_fn)
+  = -- pprTrace "mkWWBindPair" (ppr fn_id <+> ppr wrap_id <+> ppr work_id $$ ppr wrap_rhs) $
+    [(work_id, work_rhs), (wrap_id, wrap_rhs)]
      -- Worker first, because wrapper mentions it
   where
     arity = arityInfo fn_info
@@ -819,9 +820,13 @@ mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
                         -- Set the arity so that the Core Lint check that the
                         -- arity is consistent with the demand type goes
                         -- through
+
+                `setIdCbvMarks` cbv_marks
+
                 `asJoinId_maybe` work_join_arity
+                -- `setIdThing` (undefined cbv_marks)
 
-    work_arity = length work_demands
+    work_arity = length work_demands :: Int
 
     -- See Note [Demand on the worker]
     single_call = saturatedByOneShots arity (demandInfo fn_info)
@@ -1014,7 +1019,8 @@ splitThunk :: WwOpts -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
 splitThunk ww_opts is_rec x rhs
   = assert (not (isJoinId x)) $
     do { let x' = localiseId x -- See comment above
-       ; (useful,_, wrap_fn, fn_arg) <- mkWWstr_one ww_opts x'
+       ; (useful,_args, wrap_fn, fn_arg)
+           <- mkWWstr_one ww_opts x' NotMarkedCbv
        ; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ]
        ; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive
                    return res
diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
index 1b2d3ca1ba..8936ccdfe5 100644
--- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
@@ -6,6 +6,7 @@ A library for the ``worker\/wrapper'' back-end to the strictness analyser
 
 
 {-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
 
 module GHC.Core.Opt.WorkWrap.Utils
    ( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs
@@ -14,6 +15,7 @@ module GHC.Core.Opt.WorkWrap.Utils
    , findTypeShape, IsRecDataConResult(..), isRecDataCon
    , mkAbsentFiller
    , isWorkerSmallEnough, dubiousDataConInstArgTys
+   , isGoodWorker, badWorker , goodWorker
    )
 where
 
@@ -144,6 +146,9 @@ data WwOpts
 
   -- Used for absent argument error message
   , wo_module            :: !Module
+  , wo_unlift_strict     :: !Bool -- Generate workers even if the only effect is some args
+                                  -- get passed unlifted.
+                                  -- See Note [WW for calling convention]
   }
 
 initWwOpts :: Module -> DynFlags -> FamInstEnvs -> WwOpts
@@ -153,10 +158,12 @@ initWwOpts this_mod dflags fam_envs = MkWwOpts
   , wo_cpr_anal          = gopt Opt_CprAnal dflags
   , wo_fun_to_thunk      = gopt Opt_FunToThunk dflags
   , wo_module            = this_mod
+  , wo_unlift_strict     = gopt Opt_WorkerWrapperUnlift dflags
   }
 
 type WwResult
   = ([Demand],              -- Demands for worker (value) args
+     [CbvMark],             -- Cbv semantics for worker (value) args
      JoinArity,             -- Number of worker (type OR value) args
      Id -> CoreExpr,        -- Wrapper body, lacking only the worker Id
      CoreExpr -> CoreExpr)  -- Worker body, lacking the original function rhs
@@ -226,25 +233,29 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr
               zapped_arg_vars = map zap_var arg_vars
               (subst, cloned_arg_vars) = cloneBndrs empty_subst uniq_supply zapped_arg_vars
               res_ty' = GHC.Core.Subst.substTy subst res_ty
+              init_cbv_marks = map (const NotMarkedCbv) cloned_arg_vars
 
-        ; (useful1, work_args, wrap_fn_str, fn_args)
-             <- mkWWstr opts cloned_arg_vars
+        ; (useful1, work_args_cbv, wrap_fn_str, fn_args)
+             <- mkWWstr opts cloned_arg_vars init_cbv_marks
+
+        ; let (work_args, work_marks) = unzip work_args_cbv
 
         -- Do CPR w/w.  See Note [Always do CPR w/w]
         ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty)
               <- mkWWcpr_entry opts res_ty' res_cpr
 
-        ; let (work_lam_args, work_call_args) = mkWorkerArgs fun_id (wo_fun_to_thunk opts)
-                                                             work_args cpr_res_ty
+        ; let (work_lam_args, work_call_args, work_call_cbv) = mkWorkerArgs fun_id (wo_fun_to_thunk opts)
+                                                                 work_args work_marks cpr_res_ty
+
               call_work work_fn  = mkVarApps (Var work_fn) work_call_args
               call_rhs fn_rhs = mkAppsBeta fn_rhs fn_args
                                   -- See Note [Join points and beta-redexes]
               wrapper_body = mkLams cloned_arg_vars . wrap_fn_cpr . wrap_fn_str . call_work
               worker_body = mkLams work_lam_args . work_fn_cpr . call_rhs
-              worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v]
+              (worker_args_dmds, work_val_cbvs)= unzip [(idDemandInfo v,cbv) | (v,cbv) <- zipEqual "mkWwBodies" work_call_args work_call_cbv, isId v]
 
         ; if ((useful1 && not only_one_void_argument) || useful2)
-          then return (Just (worker_args_dmds, length work_call_args,
+          then return (Just (worker_args_dmds, work_val_cbvs, length work_call_args,
                        wrapper_body, worker_body))
           else return Nothing
         }
@@ -359,20 +370,25 @@ add a void argument.  E.g.
 We use the state-token type which generates no code.
 -}
 
+-- | Prevent a function from becoming a thunk by adding a void argument if
+-- required.
 mkWorkerArgs :: Id      -- The wrapper Id
-             -> Bool
+             -> Bool    -- Allow fun->thunk conversion.
              -> [Var]
+             -> [CbvMark]
              -> Type    -- Type of body
              -> ([Var], -- Lambda bound args
-                 [Var]) -- Args at call site
-mkWorkerArgs wrap_id fun_to_thunk args res_ty
+                 [Var], -- Args at call site
+                 [CbvMark] -- cbv semantics for the worker args.
+                 )
+mkWorkerArgs wrap_id fun_to_thunk args cbv_marks res_ty
     | not (isJoinId wrap_id) -- Join Ids never need an extra arg
     , not (any isId args)    -- No existing value lambdas
     , needs_a_value_lambda   -- and we need to add one
-    = (args ++ [voidArgId], args ++ [voidPrimId])
+    = (args ++ [voidArgId], args ++ [voidPrimId], cbv_marks ++ [NotMarkedCbv])
 
     | otherwise
-    = (args, args)
+    = (args, args, cbv_marks)
     where
       -- If fun_to_thunk is False we always keep at least one value
       --   argument: see Note [Protecting the last value argument]
@@ -512,17 +528,38 @@ data UnboxingDecision s
   -- instantiation with 'dataConRepInstPat'.
   -- The @[s]@ carries the bits of information with which we can continue
   -- unboxing, e.g. @s@ will be 'Demand' or 'Cpr'.
+  | Unlift
+  -- ^ The argument can't be unboxed, but we want it to be passed evaluated to the worker.
+
+-- Do we want to create workers just for unlifting?
+wwForUnlifting :: WwOpts -> Bool
+wwForUnlifting !opts
+    -- Always unlift if possible
+    | wo_unlift_strict opts = goodWorker
+    -- Don't unlift  it would cause additional W/W splits.
+    | otherwise = badWorker
+
+badWorker :: Bool
+badWorker = False
+
+goodWorker :: Bool
+goodWorker = True
+
+isGoodWorker :: Bool -> Bool
+isGoodWorker = id
+
 
 -- | Unwraps the 'Boxity' decision encoded in the given 'SubDemand' and returns
 -- a 'DataConPatContext' as well the nested demands on fields of the 'DataCon'
 -- to unbox.
 wantToUnboxArg
-  :: FamInstEnvs
+  :: Bool                -- ^ Consider unlifting
+  -> FamInstEnvs
   -> Type                -- ^ Type of the argument
   -> Demand              -- ^ How the arg was used
   -> UnboxingDecision Demand
 -- See Note [Which types are unboxed?]
-wantToUnboxArg fam_envs ty (n :* sd)
+wantToUnboxArg do_unlifting fam_envs ty dmd@(n :* sd)
   | isAbs n
   = DropAbsent
 
@@ -530,10 +567,17 @@ wantToUnboxArg fam_envs ty (n :* sd)
   , Just dc <- tyConSingleAlgDataCon_maybe tc
   , let arity = dataConRepArity dc
   , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity analysis]
-  -- NB: No strictness or evaluatedness checks here.
+  -- NB: No strictness or evaluatedness checks for unboxing here.
   -- That is done by 'finaliseArgBoxities'!
   = Unbox (DataConPatContext dc tc_args co) ds
 
+  -- See Note [Strict Worker Ids]
+  | do_unlifting
+  , isStrUsedDmd dmd
+  , not (isFunTy ty)
+  , not (isUnliftedType ty) -- Already unlifted!
+  = Unlift
+
   | otherwise
   = StopUnboxing
 
@@ -637,6 +681,65 @@ other cases where something went avoidably wrong.
 This warning also triggers for the stream fusion library within `text`.
 We can'easily W/W constructed results like `Stream` because we have no simple
 way to express existential types in the worker's type signature.
+
+Note [WW for calling convention]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we know a function f will always evaluate a particular argument
+we might decide that it should rather get evaluated by the caller.
+We call this "unlifting" the argument.
+Sometimes the caller knows that the argument is already evaluated,
+so we won't generate any code to enter/evaluate the argument.
+This evaluation avoidance can be quite beneficial.
+Especially for recursive functions who pass the same lifted argument
+along on each iteration or walk over strict data structures.
+
+One way to achieve this is to do a W/W split, where the wrapper does
+the evaluation, and the worker can treat its arguments as unlifted.
+The wrapper is small and will be inlined at almost all call sites and
+the evaluation code in the wrapper can then cancel out with evaluation
+done by the calling context if the argument is evaluated there.
+Same idea as W/W to avoid allocation really, just for a different kind
+of work.
+
+Performing W/W might not always be a win. In particular it's easy to break
+(badly written, but common) rule frameworks by doing additional W/W splits.
+See #20364 for a more detailed explaination.
+
+Hence we have the following strategies with different trade-offs:
+A) Never do W/W *just* for unlifting of arguments.
+  + Very conservative - doesn't break any rules
+  - Lot's of performance left on the table
+B) Do W/W on just about anything where it might be
+  beneficial.
+  + Exploits pretty much every oppertunity for unlifting.
+  - A bit of compile time/code size cost for all the wrappers.
+  - Can break rules which would otherwise fire. See #20364.
+C) Unlift *any* (non-boot exported) functions arguments if they are strict.
+  That is instead of creating a Worker with the new calling convention we
+  change the calling convention of the binding itself.
+  + Exploits every opportunity for unlifting.
+  + Maybe less bad interactions with rules.
+  - Requires tracking of boot-exported definitions.
+  - Requires either:
+    ~ Eta-expansion at *all* call sites in order to generate
+      an impedance matcher function. Leading to massive code bloat.
+      Essentially we end up creating a imprompto wrapper function
+      wherever we wouldn't inline the wrapper with a W/W approach.
+    ~ There is the option of achieving this without eta-expansion if we instead expand
+      the partial application code to check for demands on the calling convention and
+      for it to evaluate the arguments. The main downsides there would be the complexity
+      of the implementation and that it carries a certain overhead even for functions who
+      don't take advantage of this functionality. I haven't tried this approach because it's
+      not trivial to implement and doing W/W splits seems to work well enough.
+
+Currently we use the first approach A) by default, with a flag that allows users to fall back to the
+more aggressive approach B).
+I also tried the third approach C) using eta-expansion at call sites to avoid modifying the PAP-handling
+code which wasn't fruitful. See https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5614#note_389903.
+We could still try to do C) in the future by having PAP calls which will evaluate the required arguments
+before calling the partially applied function. But this would be neither a small nor simple change so we
+stick with A) and a flag for B) for now.
+See also Note [Tag Inference] and Note [Strict Worker Ids]
 -}
 
 {-
@@ -650,26 +753,29 @@ way to express existential types in the worker's type signature.
 mkWWstr :: WwOpts
         -> [Var]                         -- Wrapper args; have their demand info on them
                                          --  *Includes type variables*
-        -> UniqSM (Bool,                 -- Is this useful
-                   [Var],                -- Worker args
+        -> [CbvMark]                     -- cbv info for arguments
+        -> UniqSM (Bool,                 -- Will this result in a useful worker
+                   [(Var,CbvMark)],      -- Worker args/their call-by-value semantics.
                    CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call
                                          -- and without its lambdas
                                          -- This fn adds the unboxing
                    [CoreExpr])           -- Reboxed args for the call to the
                                          -- original RHS. Corresponds one-to-one
                                          -- with the wrapper arg vars
-mkWWstr opts args
-  = go args
+mkWWstr opts args cbv_info
+  = go args cbv_info
   where
-    go_one arg = mkWWstr_one opts arg
+    go_one arg cbv = mkWWstr_one opts arg cbv
 
-    go []           = return (False, [], nop_fn, [])
-    go (arg : args) = do { (useful1, args1, wrap_fn1, wrap_arg)  <- go_one arg
-                         ; (useful2, args2, wrap_fn2, wrap_args) <- go args
+    go []           _ = return (badWorker, [], nop_fn, [])
+    go (arg : args) (cbv:cbvs)
+      =               do { (useful1, args1, wrap_fn1, wrap_arg)  <- go_one arg cbv
+                         ; (useful2, args2, wrap_fn2, wrap_args) <- go args cbvs
                          ; return ( useful1 || useful2
                                   , args1 ++ args2
                                   , wrap_fn1 . wrap_fn2
                                   , wrap_arg:wrap_args ) }
+    go _ _ = panic "mkWWstr: Impossible - cbv/arg length missmatch"
 
 ----------------------
 -- mkWWstr_one wrap_var = (useful, work_args, wrap_fn, wrap_arg)
@@ -678,19 +784,28 @@ mkWWstr opts args
 --   * wrap_arg assumes work_args are in scope, and builds a ConApp that
 --        reconstructs the RHS of wrap_var that we pass to the original RHS
 -- See Note [Worker/wrapper for Strictness and Absence]
-mkWWstr_one :: WwOpts -> Var -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
-mkWWstr_one opts arg =
-  case wantToUnboxArg fam_envs arg_ty arg_dmd of
+mkWWstr_one :: WwOpts
+            -> Var
+            -> CbvMark
+            -> UniqSM (Bool, [(Var,CbvMark)], CoreExpr -> CoreExpr, CoreExpr)
+mkWWstr_one opts arg marked_cbv =
+  case wantToUnboxArg True fam_envs arg_ty arg_dmd of
     _ | isTyVar arg -> do_nothing
 
     DropAbsent
       | Just absent_filler <- mkAbsentFiller opts arg
-         -- Absent case.  We can't always handle absence for arbitrary
+         -- Absent case.  Dropt the argument from the worker.
+         -- We can't always handle absence for arbitrary
          -- unlifted types, so we need to choose just the cases we can
          -- (that's what mkAbsentFiller does)
-      -> return (True, [], nop_fn, absent_filler)
+      -> return (goodWorker, [], nop_fn, absent_filler)
+
+    Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc marked_cbv
 
-    Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc
+    Unlift -> return  ( wwForUnlifting opts
+                      , [(setIdUnfolding arg evaldUnfolding, MarkedCbv)]
+                      , nop_fn
+                      , varToCoreExpr arg)
 
     _ -> do_nothing -- Other cases, like StopUnboxing
 
@@ -698,27 +813,44 @@ mkWWstr_one opts arg =
     fam_envs   = wo_fam_envs opts
     arg_ty     = idType arg
     arg_dmd    = idDemandInfo arg
-    do_nothing = return (False, [arg], nop_fn, varToCoreExpr arg)
+    -- Type args don't get cbv marks
+    arg_cbv    = if isTyVar arg then NotMarkedCbv else marked_cbv
+    do_nothing = return (badWorker, [(arg,arg_cbv)], nop_fn, varToCoreExpr arg)
 
 unbox_one_arg :: WwOpts
           -> Var
           -> [Demand]
           -> DataConPatContext
-          -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
+          -> CbvMark
+          -> UniqSM (Bool, [(Var,CbvMark)], CoreExpr -> CoreExpr, CoreExpr)
 unbox_one_arg opts arg_var ds
           DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args
                             , dcpc_co = co }
+          _marked_cbv
   = do { pat_bndrs_uniqs <- getUniquesM
        ; let ex_name_fss = map getOccFS $ dataConExTyCoVars dc
+             -- Create new arguments we get when unboxing dc
              (ex_tvs', arg_ids) =
                dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args
+             -- Apply str info to new args.
              arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds
              unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var)
                                      dc (ex_tvs' ++ arg_ids')
-       ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids')
+             -- Mark arguments coming out of strict fields as evaluated and give them cbv semantics. See Note [Strict Worker Ids]
+             cbv_arg_marks = zipWithEqual "unbox_one_arg" bangToMark (dataConRepStrictness dc) arg_ids'
+             unf_args = zipWith setEvald arg_ids' cbv_arg_marks
+             cbv_marks = (map (const NotMarkedCbv) ex_tvs') ++ cbv_arg_marks
+       ; (_sub_args_quality, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ unf_args) cbv_marks
        ; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co
-       ; return (True, worker_args, unbox_fn . wrap_fn, wrap_arg) }
+       ; return (goodWorker, worker_args, unbox_fn . wrap_fn, wrap_arg) }
                           -- Don't pass the arg, rebox instead
+    where bangToMark :: StrictnessMark -> Id -> CbvMark
+          bangToMark NotMarkedStrict _ = NotMarkedCbv
+          bangToMark MarkedStrict v
+            | isUnliftedType (idType v) = NotMarkedCbv
+            | otherwise                 = MarkedCbv
+          setEvald var NotMarkedCbv  = var
+          setEvald var MarkedCbv     = setIdUnfolding var evaldUnfolding
 
 -- | Tries to find a suitable absent filler to bind the given absent identifier
 -- to. See Note [Absent fillers].
@@ -795,7 +927,7 @@ function is worthy for splitting:
      g c p = case p of (a,b) -> $gw c a b
      $gw c a b = if c then a else b
 
-2a But do /not/ split if Boxity Analysis said "Boxed".
+2a But do /not/ unbox if Boxity Analysis said "Boxed".
    In this case, 'wantToUnboxArg' returns 'StopUnboxing'.
    Otherwise we risk decomposing and reboxing a massive
    tuple which is barely used. Example:
@@ -809,6 +941,11 @@ function is worthy for splitting:
    Imagine that it had millions of fields. This actually happened
    in GHC itself where the tuple was DynFlags
 
+2b But if e.g. a large tuple or product type is always demanded we might
+   decide to "unlift" it. That is tighten the calling convention for that
+   argument to require it to be passed as a pointer to the value itself.
+   See Note [WW for calling convention].
+
 3. In all other cases (e.g., lazy, used demand and not eval'd),
    'finaliseArgBoxities' will have cleared the Boxity flag to 'Boxed'
    (see Note [Finalising boxity for demand signatures] in GHC.Core.Opt.DmdAnal)
@@ -1163,14 +1300,14 @@ mkWWcpr_entry
   :: WwOpts
   -> Type                              -- function body
   -> Cpr                               -- CPR analysis results
-  -> UniqSM (Bool,                     -- Is w/w'ing useful?
+  -> UniqSM (Bool,            -- Is w/w'ing useful?
              CoreExpr -> CoreExpr,     -- New wrapper. 'nop_fn' if not useful
              CoreExpr -> CoreExpr,     -- New worker.  'nop_fn' if not useful
              Type)                     -- Type of worker's body.
                                        -- Just the input body_ty if not useful
 -- ^ Entrypoint to CPR W/W. See Note [Worker/wrapper for CPR] for an overview.
 mkWWcpr_entry opts body_ty body_cpr
-  | not (wo_cpr_anal opts) = return (False, nop_fn, nop_fn, body_ty)
+  | not (wo_cpr_anal opts) = return (badWorker, nop_fn, nop_fn, body_ty)
   | otherwise = do
     -- Part (1)
     res_bndr <- mk_res_bndr body_ty
@@ -1188,8 +1325,8 @@ mkWWcpr_entry opts body_ty body_cpr
         work_fn body = bind_res_bndr body (work_unpack_res transit_tup) -- 1 2 3
         work_body_ty = exprType transit_tup
     return $ if not useful
-                then (False, nop_fn, nop_fn, body_ty)
-                else (True, wrap_fn, work_fn, work_body_ty)
+                then (badWorker, nop_fn, nop_fn, body_ty)
+                else (goodWorker, wrap_fn, work_fn, work_body_ty)
 
 -- | Part (1) of Note [Worker/wrapper for CPR].
 mk_res_bndr :: Type -> UniqSM Id
@@ -1212,7 +1349,7 @@ mkWWcpr :: WwOpts -> [Id] -> [Cpr] -> UniqSM CprWwResultMany
 mkWWcpr _opts vars []   =
   -- special case: No CPRs means all top (for example from FlatConCpr),
   -- hence stop WW.
-  return (False, toOL vars, map varToCoreExpr vars, nop_fn)
+  return (badWorker, toOL vars, map varToCoreExpr vars, nop_fn)
 mkWWcpr opts  vars cprs = do
   -- No existentials in 'vars'. 'wantToUnboxResult' should have checked that.
   massertPpr (not (any isTyVar vars)) (ppr vars $$ ppr cprs)
@@ -1231,7 +1368,7 @@ mkWWcpr_one opts res_bndr cpr
   , Unbox dcpc arg_cprs <- wantToUnboxResult (wo_fam_envs opts) (idType res_bndr) cpr
   = unbox_one_result opts res_bndr arg_cprs dcpc
   | otherwise
-  = return (False, unitOL res_bndr, varToCoreExpr res_bndr, nop_fn)
+  = return (badWorker, unitOL res_bndr, varToCoreExpr res_bndr, nop_fn)
 
 unbox_one_result
   :: WwOpts -> Id -> [Cpr] -> DataConPatContext -> UniqSM CprWwResultOne
@@ -1260,8 +1397,8 @@ unbox_one_result opts res_bndr arg_cprs
   -- Don't try to WW an unboxed tuple return type when there's nothing inside
   -- to unbox further.
   return $ if isUnboxedTupleDataCon dc && not nested_useful
-              then ( False, unitOL res_bndr, Var res_bndr, nop_fn )
-              else ( True
+              then ( badWorker, unitOL res_bndr, Var res_bndr, nop_fn )
+              else ( goodWorker
                    , transit_vars
                    , rebuilt_result
                    , this_work_unbox_res . work_unbox_res
diff --git a/compiler/GHC/Core/Rules.hs b/compiler/GHC/Core/Rules.hs
index 2eabc1db0c..14bcd4564f 100644
--- a/compiler/GHC/Core/Rules.hs
+++ b/compiler/GHC/Core/Rules.hs
@@ -387,8 +387,10 @@ pprRuleBase rules = pprUFM rules $ \rss ->
 -- successful.
 lookupRule :: RuleOpts -> InScopeEnv
            -> (Activation -> Bool)      -- When rule is active
-           -> Id -> [CoreExpr]
-           -> [CoreRule] -> Maybe (CoreRule, CoreExpr)
+           -> Id -- Function head
+           -> [CoreExpr] -- Args
+           -> [CoreRule] -- Rules
+           -> Maybe (CoreRule, CoreExpr)
 
 -- See Note [Extra args in the target]
 -- See comments on matchRule
diff --git a/compiler/GHC/Core/Tidy.hs b/compiler/GHC/Core/Tidy.hs
index aaf42eafd2..63473ca68a 100644
--- a/compiler/GHC/Core/Tidy.hs
+++ b/compiler/GHC/Core/Tidy.hs
@@ -10,26 +10,30 @@ The code for *top-level* bindings is in GHC.Iface.Tidy.
 
 {-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
 module GHC.Core.Tidy (
-        tidyExpr, tidyRules, tidyUnfolding
+        tidyExpr, tidyRules, tidyUnfolding, tidyCbvInfoTop
     ) where
 
 import GHC.Prelude
 
 import GHC.Core
+import GHC.Core.Type
+
 import GHC.Core.Seq ( seqUnfolding )
+import GHC.Core.Utils ( computeCbvInfo )
 import GHC.Types.Id
 import GHC.Types.Id.Info
 import GHC.Types.Demand ( zapDmdEnvSig )
-import GHC.Core.Type     ( tidyType, tidyVarBndr )
 import GHC.Core.Coercion ( tidyCo )
 import GHC.Types.Var
 import GHC.Types.Var.Env
 import GHC.Types.Unique (getUnique)
 import GHC.Types.Unique.FM
 import GHC.Types.Name hiding (tidyNameOcc)
+import GHC.Types.Name.Set
 import GHC.Types.SrcLoc
 import GHC.Types.Tickish
 import GHC.Data.Maybe
+import GHC.Utils.Misc
 import Data.List (mapAccumL)
 
 {-
@@ -45,18 +49,67 @@ tidyBind :: TidyEnv
          ->  (TidyEnv, CoreBind)
 
 tidyBind env (NonRec bndr rhs)
-  = tidyLetBndr env env bndr =: \ (env', bndr') ->
-    (env', NonRec bndr' (tidyExpr env' rhs))
+  = -- pprTrace "tidyBindNonRec" (ppr bndr) $
+    let cbv_bndr = (tidyCbvInfoLocal bndr rhs)
+        (env', bndr') = tidyLetBndr env env cbv_bndr
+        tidy_rhs = (tidyExpr env' rhs)
+    in (env', NonRec bndr' tidy_rhs)
 
 tidyBind env (Rec prs)
-  = let
-       (bndrs, rhss)  = unzip prs
-       (env', bndrs') = mapAccumL (tidyLetBndr env') env bndrs
+  = -- pprTrace "tidyBindRec" (ppr $ map fst prs) $
+    let
+       cbv_bndrs = map ((\(bnd,rhs) -> tidyCbvInfoLocal bnd rhs)) prs
+       (_bndrs, rhss)  = unzip prs
+       (env', bndrs') = mapAccumL (tidyLetBndr env') env cbv_bndrs
     in
     map (tidyExpr env') rhss =: \ rhss' ->
     (env', Rec (zip bndrs' rhss'))
 
 
+-- Note [Attaching CBV Marks to ids]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Before tidy, function arguments which have a call-by-value semantics are identified
+-- by having an `OtherCon[]` unfolding. During tidy, we transform this information into CBV (call-by-value)
+-- marks. The marks themselves then are put onto the function id itself.
+-- This means the code generator can get the full calling convention by only looking at the function
+-- itself without having to inspect the RHS for potential argument unfoldings.
+--
+-- The actual logic is in tidyCbvInfo and takes:
+-- * The function id
+-- * The functions rhs
+-- And gives us back the function annotated with the marks.
+-- We call it in:
+-- * tidyTopPair for top level bindings
+-- * tidyBind for local bindings.
+--
+-- Not that we *have* to look at the untidied rhs.
+-- During tidying some knot-tying occurs which can blow up
+-- if we look at the types of the arguments, but here we dont:
+-- we only check if the manifest lambdas have OtherCon unfoldings
+-- and these remain valid post tidy.
+--
+-- If the id is boot-exported we don't use a cbv calling convention via marks,
+-- as the boot file won't contain them. Which means code calling boot-exported
+-- ids might expect these ids to have a vanilla calling convention even if we
+-- determine a different one here.
+-- To be able to avoid this we pass a set of boot exported ids for this module around.
+-- For non top level ids we can skip this. Local ids are never boot-exported
+-- as boot files don't have unfoldings. So there this isn't a concern.
+-- See also Note [Strict Worker Ids]
+
+
+-- See Note [Attaching CBV Marks to ids]
+tidyCbvInfoTop :: HasDebugCallStack => NameSet -> Id -> CoreExpr -> Id
+tidyCbvInfoTop boot_exports id rhs
+  -- Can't change calling convention for boot exported things
+  | elemNameSet (idName id) boot_exports = id
+  | otherwise = computeCbvInfo id rhs
+
+-- See Note [Attaching CBV Marks to ids]
+tidyCbvInfoLocal :: HasDebugCallStack => Id -> CoreExpr -> Id
+tidyCbvInfoLocal id rhs
+  | otherwise = computeCbvInfo id rhs
+
 ------------  Expressions  --------------
 tidyExpr :: TidyEnv -> CoreExpr -> CoreExpr
 tidyExpr env (Var v)       = Var (tidyVarOcc env v)
diff --git a/compiler/GHC/Core/Type.hs b/compiler/GHC/Core/Type.hs
index bd3b0122d5..1cacfca468 100644
--- a/compiler/GHC/Core/Type.hs
+++ b/compiler/GHC/Core/Type.hs
@@ -126,6 +126,7 @@ module GHC.Core.Type (
         isBoxedRuntimeRep,
         isLiftedLevity, isUnliftedLevity,
         isUnliftedType, isBoxedType, mightBeUnliftedType, isUnboxedTupleType, isUnboxedSumType,
+        isStateType,
         isAlgType, isDataFamilyAppType,
         isPrimitiveType, isStrictType,
         isLevityTy, isLevityVar,
@@ -321,6 +322,8 @@ import Control.Monad    ( guard )
 --
 -- [Primitive]          Iff it is a built-in type that can't be expressed in Haskell.
 --
+-- [Unlifted]           Anything that isn't lifted is considered unlifted.
+--
 -- Currently, all primitive types are unlifted, but that's not necessarily
 -- the case: for example, @Int@ could be primitive.
 --
@@ -2424,6 +2427,7 @@ buildSynTyCon name binders res_kind roles rhs
 isLiftedType_maybe :: HasDebugCallStack => Type -> Maybe Bool
 isLiftedType_maybe ty = case coreFullView (getRuntimeRep ty) of
   ty' | isLiftedRuntimeRep ty'  -> Just True
+      | isUnliftedRuntimeRep ty' -> Just False
   TyConApp {}                   -> Just False  -- Everything else is unlifted
   _                             -> Nothing     -- representation-polymorphic
 
@@ -2441,6 +2445,13 @@ isUnliftedType ty
   = not (isLiftedType_maybe ty `orElse`
          pprPanic "isUnliftedType" (ppr ty <+> dcolon <+> ppr (typeKind ty)))
 
+-- | State token type.
+isStateType :: Type -> Bool
+isStateType ty
+  = case tyConAppTyCon_maybe ty of
+        Just tycon -> tycon == statePrimTyCon
+        _          -> False
+
 -- | Returns:
 --
 -- * 'False' if the type is /guaranteed/ lifted or
diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs
index 411628c261..3116b6bd04 100644
--- a/compiler/GHC/Core/Utils.hs
+++ b/compiler/GHC/Core/Utils.hs
@@ -56,6 +56,9 @@ module GHC.Core.Utils (
         -- * Join points
         isJoinBind,
 
+        -- * Tag inference
+        computeCbvInfo,
+
         -- * unsafeEqualityProof
         isUnsafeEqualityProof,
 
@@ -93,7 +96,7 @@ import GHC.Types.Tickish
 import GHC.Types.Id
 import GHC.Types.Id.Info
 import GHC.Types.Unique
-import GHC.Types.Basic     ( Arity )
+import GHC.Types.Basic     ( Arity, CbvMark(..), isMarkedCbv )
 import GHC.Types.Unique.Set
 
 import GHC.Data.FastString
@@ -2443,7 +2446,11 @@ tryEtaReduce bndrs body
                     -- We always want args for join points so
                     -- we should never eta-reduce to a trivial expression.
                     -- See Note [Invariants on join points] in GHC.Core, and #20599
-                    not (isJoinId fun)
+                    not (isJoinId fun) &&
+                    -- And the function doesn't require visible arguments as part of
+                    -- it's calling convention. See Note [Strict Worker Ids]
+                    idCbvMarkArity fun == 0
+
 
     ---------------
     fun_arity fun             -- See Note [Arity care]
@@ -2591,15 +2598,89 @@ isJoinBind (NonRec b _)       = isJoinId b
 isJoinBind (Rec ((b, _) : _)) = isJoinId b
 isJoinBind _                  = False
 
-dumpIdInfoOfProgram :: (IdInfo -> SDoc) -> CoreProgram -> SDoc
-dumpIdInfoOfProgram ppr_id_info binds = vcat (map printId ids)
+dumpIdInfoOfProgram :: Bool -> (IdInfo -> SDoc) -> CoreProgram -> SDoc
+dumpIdInfoOfProgram dump_locals ppr_id_info binds = vcat (map printId ids)
   where
   ids = sortBy (stableNameCmp `on` getName) (concatMap getIds binds)
   getIds (NonRec i _) = [ i ]
   getIds (Rec bs)     = map fst bs
-  printId id | isExportedId id = ppr id <> colon <+> (ppr_id_info (idInfo id))
+  -- By default only include full info for exported ids, unless we run in the verbose
+  -- pprDebug mode.
+  printId id | isExportedId id || dump_locals = ppr id <> colon <+> (ppr_id_info (idInfo id))
              | otherwise       = empty
 
+{-
+************************************************************************
+*                                                                      *
+\subsection{Tag inference things}
+*                                                                      *
+************************************************************************
+-}
+
+-- | For a binding we:
+-- * Look at the args
+-- * Mark any with Unf=OtherCon[] as call-by-value, unless it's an unlifted type already.
+-- * Potentially combine it with existing call-by-value marks (from ww)
+-- * Update the id
+-- See Note [Attaching CBV Marks to ids].
+computeCbvInfo :: HasCallStack
+               => Id            -- The function
+               -> CoreExpr      -- It's RHS
+               -> Id
+computeCbvInfo id rhs =
+  -- pprTrace "computeCbv" (hang (ppr id) 2 (ppr dmd $$ ppr dmds)) $
+  -- TODO: For perf reasons we could skip looking at non VanillaId/StrictWorkerId/JoinId bindings
+  cbv_bndr
+  where
+    (_,val_args,_body) = collectTyAndValBinders rhs
+    new_marks = mkCbvMarks val_args
+    cbv_marks = assertPpr (checkMarks id new_marks)
+        (ppr id <+> ppr (idType id) $$ text "old:" <> ppr (idCbvMarks_maybe id) $$ text "new:" <> ppr new_marks $$ text "rhs:" <> ppr rhs)
+        new_marks
+    cbv_bndr
+        | valid_unlifted_worker val_args
+        -- Avoid retaining the original rhs
+        = cbv_marks `seqList` setIdCbvMarks id cbv_marks
+        | otherwise =
+          -- pprTraceDebug "tidyCbvInfo: Worker seems to take unboxed tuple/sum types!" (ppr id <+> ppr rhs)
+          id
+    -- We don't set CBV marks on workers which take unboxed tuples or sums as arguments.
+    -- Doing so would require us to compute the result of unarise here in order to properly determine
+    -- argument positions at runtime.
+    -- In practice this doesn't matter much. Most "interesting" functions will get a W/W split which will eliminate
+    -- unboxed tuple arguments, and unboxed sums are rarely used.
+    valid_unlifted_worker args =
+      -- pprTrace "valid_unlifted" (ppr id $$ ppr args) $
+      not $ (any (\arg -> isMultiValArg arg) args)
+    isMultiValArg id =
+      let ty = idType id
+      in not (isStateType ty) && (isUnboxedTupleType ty || isUnboxedSumType ty)
+    -- Only keep relevant marks. We *don't* have to cover all arguments. Only these
+    -- that we might want to pass call-by-value.
+    trimMarks :: [CbvMark] -> [Id] -> [CbvMark]
+    trimMarks marks val_args =
+        map fst .
+          -- Starting at the end, drop all non-cbv marks, and marks applied to unlifted types
+          dropWhileEndLE (\(m,v) -> not (isMarkedCbv m) || isUnliftedType (idType v)) $
+          zip marks val_args
+
+    mkCbvMarks :: ([Id]) -> [CbvMark]
+    mkCbvMarks = map mkMark
+      where
+        cbv_arg arg = isEvaldUnfolding (idUnfolding arg)
+        mkMark arg = if cbv_arg arg && (not $ isUnliftedType (idType arg))
+          then MarkedCbv
+          else NotMarkedCbv
+    -- If we determined earlier one an argument should be passed cbv it should
+    -- still be so here.
+    checkMarks id new_marks
+      | Just old_marks <- idCbvMarks_maybe id
+      = length (trimMarks old_marks val_args) <= length new_marks &&
+        and (zipWith checkNewMark old_marks new_marks)
+      | otherwise = True
+    checkNewMark old new =
+      isMarkedCbv new || (not $ isMarkedCbv old)
+
 
 {- *********************************************************************
 *                                                                      *
diff --git a/compiler/GHC/CoreToIface.hs b/compiler/GHC/CoreToIface.hs
index 426dcae9cf..11fd63e0bc 100644
--- a/compiler/GHC/CoreToIface.hs
+++ b/compiler/GHC/CoreToIface.hs
@@ -441,6 +441,7 @@ toIfaceLetBndr id  = IfLetBndr (occNameFS (getOccName id))
 
 toIfaceIdDetails :: IdDetails -> IfaceIdDetails
 toIfaceIdDetails VanillaId                      = IfVanillaId
+toIfaceIdDetails (StrictWorkerId dmds)          = IfStrictWorkerId dmds
 toIfaceIdDetails (DFunId {})                    = IfDFunId
 toIfaceIdDetails (RecSelId { sel_naughty = n
                            , sel_tycon = tc })  =
diff --git a/compiler/GHC/CoreToStg.hs b/compiler/GHC/CoreToStg.hs
index 431d831125..6e0e4600dd 100644
--- a/compiler/GHC/CoreToStg.hs
+++ b/compiler/GHC/CoreToStg.hs
@@ -30,6 +30,7 @@ import GHC.Core.DataCon
 
 import GHC.Stg.Syntax
 import GHC.Stg.Debug
+import GHC.Stg.Utils
 
 import GHC.Types.RepType
 import GHC.Types.Id.Make ( coercionTokenId )
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index 99c4cd4e8c..ac626931cd 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -1024,7 +1024,7 @@ cpeApp top_env expr
         go terminal as = (terminal, as)
 
     cpe_app :: CorePrepEnv
-            -> CoreExpr
+            -> CoreExpr -- The thing we are calling
             -> [ArgInfo]
             -> UniqSM (Floats, CpeRhs)
     cpe_app env (Var f) (CpeApp Type{} : CpeApp arg : args)
@@ -1170,7 +1170,7 @@ cpeApp top_env expr
     rebuild_app
         :: CorePrepEnv
         -> [ArgInfo]                  -- The arguments (inner to outer)
-        -> CpeApp
+        -> CpeApp                     -- The function
         -> Floats
         -> [Demand]
         -> Maybe Arity
@@ -1519,7 +1519,8 @@ Note [Eta expansion of hasNoBinding things in CorePrep]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 maybeSaturate deals with eta expanding to saturate things that can't deal with
 unsaturated applications (identified by 'hasNoBinding', currently just
-foreign calls and unboxed tuple/sum constructors).
+foreign calls, unboxed tuple/sum constructors and strict workers).
+See Note [Strict Worker Ids]
 
 Historical Note: Note that eta expansion in CorePrep used to be very fragile
 due to the "prediction" of CAFfyness that we used to make during tidying.
@@ -1533,14 +1534,27 @@ maybeSaturate fn expr n_args unsat_ticks
   | hasNoBinding fn        -- There's no binding
   = return $ wrapLamBody (\body -> foldr mkTick body unsat_ticks) sat_expr
 
+  | mark_arity > 0 -- A strict worker. See Note [Strict Worker Ids]
+  , not applied_marks
+  = assertPpr
+      ( not (isJoinId fn)) -- See Note [Do not eta-expand join points]
+      ( ppr fn $$ text "expr:" <+> ppr expr $$ text "n_args:" <+> ppr n_args $$
+          text "marks:" <+> ppr (idCbvMarks_maybe fn) $$
+          text "join_arity" <+> ppr (idJoinArity fn)
+       ) $
+    return sat_expr
+
   | otherwise
   = assert (null unsat_ticks) $
     return expr
   where
-    fn_arity     = idArity fn
-    excess_arity = fn_arity - n_args
-    sat_expr     = cpeEtaExpand excess_arity expr
-
+    mark_arity    = idCbvMarkArity fn
+    fn_arity      = idArity fn
+    excess_arity  = (max fn_arity mark_arity) - n_args
+    sat_expr      = cpeEtaExpand excess_arity expr
+    applied_marks = n_args >= (length . dropWhile (not . isMarkedCbv) . reverse . expectJust "maybeSaturate" $ (idCbvMarks_maybe fn))
+    -- For join points we never eta-expand (See Note [Do not eta-expand join points])
+    -- so we assert all arguments that need to be passed cbv are visible so that the backend can evalaute them if required..
 {-
 ************************************************************************
 *                                                                      *
@@ -1620,13 +1634,19 @@ tryEtaReducePrep bndrs expr@(App _ _)
   , not (any (`elemVarSet` fvs_remaining) bndrs)
   , exprIsHNF remaining_expr   -- Don't turn value into a non-value
                                -- else the behaviour with 'seq' changes
-  = Just remaining_expr
+  =
+    -- pprTrace "prep-reduce" (
+    --   text "reduced:" <> ppr remaining_expr $$
+    --   ppr (remaining_args)
+    --   ) $
+    Just remaining_expr
   where
     (f, args) = collectArgs expr
     remaining_expr = mkApps f remaining_args
     fvs_remaining = exprFreeVars remaining_expr
     (remaining_args, last_args) = splitAt n_remaining args
     n_remaining = length args - length bndrs
+    n_remaining_vals = length $ filter isRuntimeArg remaining_args
 
     ok bndr (Var arg) = bndr == arg
     ok _    _         = False
@@ -1634,9 +1654,11 @@ tryEtaReducePrep bndrs expr@(App _ _)
     -- We can't eta reduce something which must be saturated.
     ok_to_eta_reduce (Var f) =  not (hasNoBinding f) &&
                                 not (isLinearType (idType f)) && -- Unsure why this is unsafe.
-                                (not (isJoinId f) || idJoinArity f <= n_remaining)
+                                (not (isJoinId f) || idJoinArity f <= n_remaining) &&
                                 -- Don't undersaturate join points.
                                 -- See Note [Invariants on join points] in GHC.Core, and #20599
+                                (idCbvMarkArity f <= n_remaining_vals)
+                                -- Similar for StrictWorkerIds. See Note [Strict Worker Ids]
 
 
     ok_to_eta_reduce _       = False -- Safe. ToDo: generalise
diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs
index ae59e41fdf..ab64076a79 100644
--- a/compiler/GHC/Driver/Config/StgToCmm.hs
+++ b/compiler/GHC/Driver/Config/StgToCmm.hs
@@ -43,6 +43,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig
   , stgToCmmPIE           = gopt Opt_PIE                   dflags
   , stgToCmmExtDynRefs    = gopt Opt_ExternalDynamicRefs   dflags
   , stgToCmmDoBoundsCheck = gopt Opt_DoBoundsChecking      dflags
+  , stgToCmmDoTagCheck    = gopt Opt_DoTagInferenceChecks  dflags
   -- backend flags
   , stgToCmmAllowBigArith             = not ncg
   , stgToCmmAllowQuotRemInstr         = ncg  && (x86ish || ppc)
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index e95b181743..759d8b48c0 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -92,7 +92,9 @@ data DumpFlag
    | Opt_D_dump_prep
    | Opt_D_dump_stg_from_core -- ^ Initial STG (CoreToStg output)
    | Opt_D_dump_stg_unarised  -- ^ STG after unarise
-   | Opt_D_dump_stg_final     -- ^ Final STG (after stg2stg)
+   | Opt_D_dump_stg_cg        -- ^ STG (after stg2stg)
+   | Opt_D_dump_stg_tags      -- ^ Result of tag inference analysis.
+   | Opt_D_dump_stg_final     -- ^ Final STG (before cmm gen)
    | Opt_D_dump_call_arity
    | Opt_D_dump_exitify
    | Opt_D_dump_stranal
@@ -224,6 +226,7 @@ data GeneralFlag
    | Opt_WeightlessBlocklayout         -- ^ Layout based on last instruction per block.
    | Opt_CprAnal
    | Opt_WorkerWrapper
+   | Opt_WorkerWrapperUnlift  -- ^ Do W/W split for unlifting even if we won't unbox anything.
    | Opt_SolveConstantDicts
    | Opt_AlignmentSanitisation
    | Opt_CatchNonexhaustiveCases
@@ -231,6 +234,9 @@ data GeneralFlag
    | Opt_CoreConstantFolding
    | Opt_FastPAPCalls                  -- #6084
 
+   -- Inference flags
+   | Opt_DoTagInferenceChecks
+
    -- PreInlining is on by default. The option is there just to see how
    -- bad things get if you turn it off!
    | Opt_SimplPreInlining
@@ -443,6 +449,7 @@ optimisationFlags = EnumSet.fromList
    , Opt_WeightlessBlocklayout
    , Opt_CprAnal
    , Opt_WorkerWrapper
+   , Opt_WorkerWrapperUnlift
    , Opt_SolveConstantDicts
    , Opt_CatchNonexhaustiveCases
    , Opt_IgnoreAsserts
diff --git a/compiler/GHC/Driver/GenerateCgIPEStub.hs b/compiler/GHC/Driver/GenerateCgIPEStub.hs
index e1a751e762..96bf352e51 100644
--- a/compiler/GHC/Driver/GenerateCgIPEStub.hs
+++ b/compiler/GHC/Driver/GenerateCgIPEStub.hs
@@ -27,8 +27,10 @@ import GHC.Settings (Platform, platformUnregisterised)
 import GHC.StgToCmm.Monad (getCmm, initC, runC, initFCodeState)
 import GHC.StgToCmm.Prof (initInfoTableProv)
 import GHC.StgToCmm.Types (CgInfos (..), ModuleLFInfos)
+import GHC.Stg.InferTags.TagSig (TagSig)
 import GHC.Types.IPE (InfoTableProvMap (provInfoTables), IpeSourceLocation)
 import GHC.Types.Name.Set (NonCaffySet)
+import GHC.Types.Name.Env (NameEnv)
 import GHC.Types.Tickish (GenTickish (SourceNote))
 import GHC.Unit.Types (Module)
 import GHC.Utils.Misc
@@ -178,8 +180,8 @@ The find the tick:
     remembered in a `Maybe`.
 -}
 
-generateCgIPEStub :: HscEnv -> Module -> InfoTableProvMap -> Stream IO CmmGroupSRTs (NonCaffySet, ModuleLFInfos) -> Stream IO CmmGroupSRTs CgInfos
-generateCgIPEStub hsc_env this_mod denv s = do
+generateCgIPEStub :: HscEnv -> Module -> InfoTableProvMap -> NameEnv TagSig -> Stream IO CmmGroupSRTs (NonCaffySet, ModuleLFInfos) -> Stream IO CmmGroupSRTs CgInfos
+generateCgIPEStub hsc_env this_mod denv tag_sigs s = do
   let dflags   = hsc_dflags hsc_env
       platform = targetPlatform dflags
       fstate   = initFCodeState platform
@@ -196,7 +198,7 @@ generateCgIPEStub hsc_env this_mod denv s = do
   (_, ipeCmmGroupSRTs) <- liftIO $ cmmPipeline hsc_env (emptySRT this_mod) ipeCmmGroup
   Stream.yield ipeCmmGroupSRTs
 
-  return CgInfos {cgNonCafs = nonCaffySet, cgLFInfos = moduleLFInfos, cgIPEStub = ipeStub}
+  return CgInfos {cgNonCafs = nonCaffySet, cgLFInfos = moduleLFInfos, cgIPEStub = ipeStub, cgTagSigs = tag_sigs}
   where
     collect :: Platform -> [(Label, CmmInfoTable, Maybe IpeSourceLocation)] -> CmmGroupSRTs -> IO ([(Label, CmmInfoTable, Maybe IpeSourceLocation)], CmmGroupSRTs)
     collect platform acc cmmGroupSRTs = do
diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs
index 4c8e494321..b07d566a5b 100644
--- a/compiler/GHC/Driver/Main.hs
+++ b/compiler/GHC/Driver/Main.hs
@@ -246,6 +246,8 @@ import GHC.Driver.GenerateCgIPEStub (generateCgIPEStub)
 import Data.List.NonEmpty (NonEmpty ((:|)))
 
 
+import GHC.Stg.InferTags
+
 {- **********************************************************************
 %*                                                                      *
                 Initialisation
@@ -1801,7 +1803,15 @@ doCodeGen hsc_env this_mod denv data_tycons
         tmpfs      = hsc_tmpfs  hsc_env
         platform   = targetPlatform dflags
 
-    putDumpFileMaybe logger Opt_D_dump_stg_final "Final STG:" FormatSTG (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_fvs)
+    putDumpFileMaybe logger Opt_D_dump_stg_cg "CodeGenInput STG:" FormatSTG
+        (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_fvs)
+
+    -- Do tag inference on optimized STG
+    (!stg_post_infer,export_tag_info) <-
+        {-# SCC "StgTagFields" #-} inferTags dflags logger this_mod stg_binds_w_fvs
+
+    putDumpFileMaybe logger Opt_D_dump_stg_final "Final STG:" FormatSTG
+        (pprGenStgTopBindings (initStgPprOpts dflags) stg_post_infer)
 
     let stg_to_cmm dflags mod = case stgToCmmHook hooks of
                         Nothing -> StgToCmm.codeGen logger tmpfs (initStgToCmmConfig dflags mod)
@@ -1809,8 +1819,8 @@ doCodeGen hsc_env this_mod denv data_tycons
 
     let cmm_stream :: Stream IO CmmGroup ModuleLFInfos
         -- See Note [Forcing of stg_binds]
-        cmm_stream = stg_binds_w_fvs `seqList` {-# SCC "StgToCmm" #-}
-            stg_to_cmm dflags this_mod denv data_tycons cost_centre_info stg_binds_w_fvs hpc_info
+        cmm_stream = stg_post_infer `seqList` {-# SCC "StgToCmm" #-}
+            stg_to_cmm dflags this_mod denv data_tycons cost_centre_info stg_post_infer hpc_info
 
         -- codegen consumes a stream of CmmGroup, and produces a new
         -- stream of CmmGroup (not necessarily synchronised: one
@@ -1839,7 +1849,7 @@ doCodeGen hsc_env this_mod denv data_tycons
             putDumpFileMaybe logger Opt_D_dump_cmm "Output Cmm" FormatCMM (pdoc platform a)
           return a
 
-    return $ Stream.mapM dump2 $ generateCgIPEStub hsc_env this_mod denv pipeline_stream
+    return $ Stream.mapM dump2 $ generateCgIPEStub hsc_env this_mod denv export_tag_info pipeline_stream
 
 myCoreToStgExpr :: Logger -> DynFlags -> InteractiveContext
                 -> Bool
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index 1f5626cec8..91bf2adf39 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -1611,7 +1611,7 @@ setSafeHaskell s = updM f
               safeM <- combineSafeFlags sf s
               case s of
                 Sf_Safe -> return $ dfs { safeHaskell = safeM, safeInfer = False }
-                -- leave safe inferrence on in Trustworthy mode so we can warn
+                -- leave safe inference on in Trustworthy mode so we can warn
                 -- if it could have been inferred safe.
                 Sf_Trustworthy -> do
                   l <- getCurLoc
@@ -2485,9 +2485,13 @@ dynamic_flags_deps = [
         (setDumpFlag Opt_D_dump_stg_unarised)
   , make_ord_flag defGhcFlag "ddump-stg-final"
         (setDumpFlag Opt_D_dump_stg_final)
+  , make_ord_flag defGhcFlag "ddump-stg-cg"
+        (setDumpFlag Opt_D_dump_stg_cg)
   , make_dep_flag defGhcFlag "ddump-stg"
         (setDumpFlag Opt_D_dump_stg_from_core)
         "Use `-ddump-stg-from-core` or `-ddump-stg-final` instead"
+  , make_ord_flag defGhcFlag "ddump-stg-tags"
+        (setDumpFlag Opt_D_dump_stg_tags)
   , make_ord_flag defGhcFlag "ddump-call-arity"
         (setDumpFlag Opt_D_dump_call_arity)
   , make_ord_flag defGhcFlag "ddump-exitify"
@@ -2580,6 +2584,8 @@ dynamic_flags_deps = [
         (NoArg (setGeneralFlag Opt_DoAsmLinting))
   , make_ord_flag defGhcFlag "dannot-lint"
         (NoArg (setGeneralFlag Opt_DoAnnotationLinting))
+  , make_ord_flag defGhcFlag "dtag-inference-checks"
+        (NoArg (setGeneralFlag Opt_DoTagInferenceChecks))
   , make_ord_flag defGhcFlag "dshow-passes"
         (NoArg $ forceRecompile >> (setVerbosity $ Just 2))
   , make_ord_flag defGhcFlag "dfaststring-stats"
@@ -3434,6 +3440,7 @@ fFlagsDeps = [
   flagSpec "unbox-strict-fields"              Opt_UnboxStrictFields,
   flagSpec "version-macros"                   Opt_VersionMacros,
   flagSpec "worker-wrapper"                   Opt_WorkerWrapper,
+  flagSpec "worker-wrapper-cbv"               Opt_WorkerWrapperUnlift,
   flagSpec "solve-constant-dicts"             Opt_SolveConstantDicts,
   flagSpec "catch-nonexhaustive-cases"        Opt_CatchNonexhaustiveCases,
   flagSpec "alignment-sanitisation"           Opt_AlignmentSanitisation,
diff --git a/compiler/GHC/HsToCore.hs b/compiler/GHC/HsToCore.hs
index c32f03eb97..0a210e0871 100644
--- a/compiler/GHC/HsToCore.hs
+++ b/compiler/GHC/HsToCore.hs
@@ -133,7 +133,8 @@ deSugar hsc_env
                             tcg_insts        = insts,
                             tcg_fam_insts    = fam_insts,
                             tcg_hpc          = other_hpc_info,
-                            tcg_complete_matches = complete_matches
+                            tcg_complete_matches = complete_matches,
+                            tcg_self_boot    = self_boot
                             })
 
   = do { let dflags = hsc_dflags hsc_env
@@ -242,6 +243,7 @@ deSugar hsc_env
                 mg_fam_insts    = fam_insts,
                 mg_inst_env     = inst_env,
                 mg_fam_inst_env = fam_inst_env,
+                mg_boot_exports = bootExports self_boot,
                 mg_patsyns      = patsyns,
                 mg_rules        = ds_rules_for_imps,
                 mg_binds        = ds_binds,
diff --git a/compiler/GHC/Iface/Make.hs b/compiler/GHC/Iface/Make.hs
index 28ddcac422..c0471cd413 100644
--- a/compiler/GHC/Iface/Make.hs
+++ b/compiler/GHC/Iface/Make.hs
@@ -159,16 +159,19 @@ mkFullIface hsc_env partial_iface mb_cg_infos = do
 
 updateDecl :: [IfaceDecl] -> Maybe CgInfos -> [IfaceDecl]
 updateDecl decls Nothing = decls
-updateDecl decls (Just CgInfos{ cgNonCafs = NonCaffySet non_cafs, cgLFInfos = lf_infos }) = map update_decl decls
+updateDecl decls (Just CgInfos{ cgNonCafs = NonCaffySet non_cafs, cgLFInfos = lf_infos, cgTagSigs = tag_sigs })
+  = map update_decl decls
   where
     update_decl (IfaceId nm ty details infos)
       | let not_caffy = elemNameSet nm non_cafs
       , let mb_lf_info = lookupNameEnv lf_infos nm
-      , warnPprTrace (isNothing mb_lf_info) "Name without LFInfo" (ppr nm) True
+      , let sig = lookupNameEnv tag_sigs nm
+      , warnPprTrace (isNothing mb_lf_info) "updateDecl" (text "Name without LFInfo:" <+> ppr nm) True
         -- Only allocate a new IfaceId if we're going to update the infos
-      , isJust mb_lf_info || not_caffy
+      , isJust mb_lf_info || not_caffy || isJust sig
       = IfaceId nm ty details $
-          (if not_caffy then (HsNoCafRefs :) else id)
+          (if not_caffy then (HsNoCafRefs :) else id) $
+          (if isJust sig then (HsTagSig (fromJust sig):) else id) $
           (case mb_lf_info of
              Nothing -> infos -- LFInfos not available when building .cmm files
              Just lf_info -> HsLFInfo (toIfaceLFInfo nm lf_info) : infos)
diff --git a/compiler/GHC/Iface/Syntax.hs b/compiler/GHC/Iface/Syntax.hs
index 39f0bd5336..c735a2f94f 100644
--- a/compiler/GHC/Iface/Syntax.hs
+++ b/compiler/GHC/Iface/Syntax.hs
@@ -67,6 +67,7 @@ import GHC.Types.Var( VarBndr(..), binderVar, tyVarSpecToBinders )
 import GHC.Core.TyCon ( Role (..), Injectivity(..), tyConBndrVisArgFlag )
 import GHC.Core.DataCon (SrcStrictness(..), SrcUnpackedness(..))
 import GHC.Builtin.Types ( constraintKindTyConName )
+import GHC.Stg.InferTags.TagSig
 
 import GHC.Utils.Lexeme (isLexSym)
 import GHC.Utils.Fingerprint
@@ -352,6 +353,7 @@ data IfaceInfoItem
   | HsNoCafRefs
   | HsLevity                         -- Present <=> never representation-polymorphic
   | HsLFInfo        IfaceLFInfo
+  | HsTagSig        TagSig
 
 -- NB: Specialisations and rules come in separately and are
 -- only later attached to the Id.  Partial reason: some are orphans.
@@ -380,6 +382,7 @@ data IfaceUnfolding
 
 data IfaceIdDetails
   = IfVanillaId
+  | IfStrictWorkerId [CbvMark]
   | IfRecSelId (Either IfaceTyCon IfaceDecl) Bool
   | IfDFunId
 
@@ -1459,6 +1462,7 @@ instance Outputable IfaceConAlt where
 ------------------
 instance Outputable IfaceIdDetails where
   ppr IfVanillaId       = Outputable.empty
+  ppr (IfStrictWorkerId dmd) = text "StrWork" <> parens (ppr dmd)
   ppr (IfRecSelId tc b) = text "RecSel" <+> ppr tc
                           <+> if b
                                 then text "<naughty>"
@@ -1476,6 +1480,7 @@ instance Outputable IfaceInfoItem where
   ppr HsNoCafRefs           = text "HasNoCafRefs"
   ppr HsLevity              = text "Never levity-polymorphic"
   ppr (HsLFInfo lf_info)    = text "LambdaFormInfo:" <+> ppr lf_info
+  ppr (HsTagSig tag_sig)    = text "TagSig:" <+> ppr tag_sig
 
 instance Outputable IfaceJoinInfo where
   ppr IfaceNotJoinPoint   = empty
@@ -2223,12 +2228,14 @@ instance Binary IfaceAnnotation where
 instance Binary IfaceIdDetails where
     put_ bh IfVanillaId      = putByte bh 0
     put_ bh (IfRecSelId a b) = putByte bh 1 >> put_ bh a >> put_ bh b
-    put_ bh IfDFunId         = putByte bh 2
+    put_ bh (IfStrictWorkerId dmds) = putByte bh 2 >> put_ bh dmds
+    put_ bh IfDFunId         = putByte bh 3
     get bh = do
         h <- getByte bh
         case h of
             0 -> return IfVanillaId
             1 -> do { a <- get bh; b <- get bh; return (IfRecSelId a b) }
+            2 -> do { dmds <- get bh; return (IfStrictWorkerId dmds) }
             _ -> return IfDFunId
 
 instance Binary IfaceInfoItem where
@@ -2240,6 +2247,7 @@ instance Binary IfaceInfoItem where
     put_ bh HsLevity              = putByte bh 5
     put_ bh (HsCprSig cpr)        = putByte bh 6 >> put_ bh cpr
     put_ bh (HsLFInfo lf_info)    = putByte bh 7 >> put_ bh lf_info
+    put_ bh (HsTagSig sig)        = putByte bh 8 >> put_ bh sig
 
     get bh = do
         h <- getByte bh
@@ -2253,7 +2261,8 @@ instance Binary IfaceInfoItem where
             4 -> return HsNoCafRefs
             5 -> return HsLevity
             6 -> HsCprSig <$> get bh
-            _ -> HsLFInfo <$> get bh
+            7 -> HsLFInfo <$> get bh
+            _ -> HsTagSig <$> get bh
 
 instance Binary IfaceUnfolding where
     put_ bh (IfCoreUnfold s e) = do
@@ -2586,6 +2595,7 @@ instance NFData IfaceBang where
 instance NFData IfaceIdDetails where
   rnf = \case
     IfVanillaId -> ()
+    IfStrictWorkerId dmds -> dmds `seqList` ()
     IfRecSelId (Left tycon) b -> rnf tycon `seq` rnf b
     IfRecSelId (Right decl) b -> rnf decl `seq` rnf b
     IfDFunId -> ()
@@ -2600,6 +2610,7 @@ instance NFData IfaceInfoItem where
     HsLevity -> ()
     HsCprSig cpr -> cpr `seq` ()
     HsLFInfo lf_info -> lf_info `seq` () -- TODO: seq further?
+    HsTagSig sig -> sig `seq` ()
 
 instance NFData IfaceUnfolding where
   rnf = \case
diff --git a/compiler/GHC/Iface/Tidy.hs b/compiler/GHC/Iface/Tidy.hs
index b1a079205e..c453cc5336 100644
--- a/compiler/GHC/Iface/Tidy.hs
+++ b/compiler/GHC/Iface/Tidy.hs
@@ -365,6 +365,7 @@ tidyProgram hsc_env  (ModGuts { mg_module           = mod
                               , mg_foreign_files    = foreign_files
                               , mg_hpc_info         = hpc_info
                               , mg_modBreaks        = modBreaks
+                              , mg_boot_exports     = boot_exports
                               })
 
   = Err.withTiming logger
@@ -384,7 +385,7 @@ tidyProgram hsc_env  (ModGuts { mg_module           = mod
 
         ; let uf_opts = unfoldingOpts dflags
         ; (tidy_env, tidy_binds)
-                 <- tidyTopBinds uf_opts unfold_env tidy_occ_env trimmed_binds
+                 <- tidyTopBinds uf_opts unfold_env boot_exports tidy_occ_env trimmed_binds
 
           -- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable.
         ; (spt_entries, tidy_binds') <-
@@ -1180,39 +1181,41 @@ tidyTopName mod name_cache maybe_ref occ_env id
 
 tidyTopBinds :: UnfoldingOpts
              -> UnfoldEnv
+             -> NameSet
              -> TidyOccEnv
              -> CoreProgram
              -> IO (TidyEnv, CoreProgram)
 
-tidyTopBinds uf_opts unfold_env init_occ_env binds
+tidyTopBinds uf_opts unfold_env boot_exports init_occ_env binds
   = do let result = tidy init_env binds
        seqBinds (snd result) `seq` return result
        -- This seqBinds avoids a spike in space usage (see #13564)
   where
     init_env = (init_occ_env, emptyVarEnv)
 
-    tidy = mapAccumL (tidyTopBind uf_opts unfold_env)
+    tidy = mapAccumL (tidyTopBind uf_opts unfold_env boot_exports)
 
 ------------------------
 tidyTopBind  :: UnfoldingOpts
              -> UnfoldEnv
+             -> NameSet
              -> TidyEnv
              -> CoreBind
              -> (TidyEnv, CoreBind)
 
-tidyTopBind uf_opts unfold_env
+tidyTopBind uf_opts unfold_env boot_exports
             (occ_env,subst1) (NonRec bndr rhs)
   = (tidy_env2,  NonRec bndr' rhs')
   where
     Just (name',show_unfold) = lookupVarEnv unfold_env bndr
-    (bndr', rhs') = tidyTopPair uf_opts show_unfold tidy_env2 name' (bndr, rhs)
+    (bndr', rhs') = tidyTopPair uf_opts show_unfold boot_exports tidy_env2 name' (bndr, rhs)
     subst2        = extendVarEnv subst1 bndr bndr'
     tidy_env2     = (occ_env, subst2)
 
-tidyTopBind uf_opts unfold_env (occ_env, subst1) (Rec prs)
+tidyTopBind uf_opts unfold_env boot_exports (occ_env, subst1) (Rec prs)
   = (tidy_env2, Rec prs')
   where
-    prs' = [ tidyTopPair uf_opts show_unfold tidy_env2 name' (id,rhs)
+    prs' = [ tidyTopPair uf_opts show_unfold boot_exports tidy_env2 name' (id,rhs)
            | (id,rhs) <- prs,
              let (name',show_unfold) =
                     expectJust "tidyTopBind" $ lookupVarEnv unfold_env id
@@ -1226,6 +1229,7 @@ tidyTopBind uf_opts unfold_env (occ_env, subst1) (Rec prs)
 -----------------------------------------------------------
 tidyTopPair :: UnfoldingOpts
             -> Bool  -- show unfolding
+            -> NameSet
             -> TidyEnv  -- The TidyEnv is used to tidy the IdInfo
                         -- It is knot-tied: don't look at it!
             -> Name             -- New name
@@ -1237,14 +1241,17 @@ tidyTopPair :: UnfoldingOpts
         -- group, a variable late in the group might be mentioned
         -- in the IdInfo of one early in the group
 
-tidyTopPair uf_opts show_unfold rhs_tidy_env name' (bndr, rhs)
-  = (bndr1, rhs1)
+tidyTopPair uf_opts show_unfold boot_exports rhs_tidy_env name' (bndr, rhs)
+  = -- pprTrace "tidyTop" (ppr name' <+> ppr details <+> ppr rhs) $
+    (bndr1, rhs1)
+
   where
+    !cbv_bndr = tidyCbvInfoTop boot_exports bndr rhs
     bndr1    = mkGlobalId details name' ty' idinfo'
-    details  = idDetails bndr   -- Preserve the IdDetails
-    ty'      = tidyTopType (idType bndr)
+    details  = idDetails cbv_bndr -- Preserve the IdDetails
+    ty'      = tidyTopType (idType cbv_bndr)
     rhs1     = tidyExpr rhs_tidy_env rhs
-    idinfo'  = tidyTopIdInfo uf_opts rhs_tidy_env name' rhs rhs1 (idInfo bndr)
+    idinfo'  = tidyTopIdInfo uf_opts rhs_tidy_env name' rhs rhs1 (idInfo cbv_bndr)
                              show_unfold
 
 -- tidyTopIdInfo creates the final IdInfo for top-level
diff --git a/compiler/GHC/IfaceToCore.hs b/compiler/GHC/IfaceToCore.hs
index 3a11c30e79..2982df668d 100644
--- a/compiler/GHC/IfaceToCore.hs
+++ b/compiler/GHC/IfaceToCore.hs
@@ -1618,6 +1618,7 @@ tcIfaceDataAlt mult con inst_tys arg_strs rhs
 
 tcIdDetails :: Type -> IfaceIdDetails -> IfL IdDetails
 tcIdDetails _  IfVanillaId = return VanillaId
+tcIdDetails _  (IfStrictWorkerId dmds) = return $ StrictWorkerId dmds
 tcIdDetails ty IfDFunId
   = return (DFunId (isNewTyCon (classTyCon cls)))
   where
@@ -1665,6 +1666,9 @@ tcIdInfo ignore_prags toplvl name ty info = do
       lf_info <- tcLFInfo lf_info
       return (info `setLFInfo` lf_info)
 
+    tcPrag info (HsTagSig sig) = do
+      return (info `setTagSig` sig)
+
         -- The next two are lazy, so they don't transitively suck stuff in
     tcPrag info (HsUnfold lb if_unf)
       = do { unf <- tcUnfolding toplvl name ty info if_unf
diff --git a/compiler/GHC/Prelude.hs b/compiler/GHC/Prelude.hs
index f61dad9517..6a810ac200 100644
--- a/compiler/GHC/Prelude.hs
+++ b/compiler/GHC/Prelude.hs
@@ -1,5 +1,6 @@
 {-# LANGUAGE CPP #-}
 {-# OPTIONS_HADDOCK not-home #-}
+{-# OPTIONS_GHC -O2 #-} -- See Note [-O2 Prelude]
 
 -- | Custom GHC "Prelude"
 --
@@ -18,6 +19,17 @@ module GHC.Prelude
   ) where
 
 
+{- Note [-O2 Prelude]
+~~~~~~~~~~~~~~~~~~~~~
+There is some code in GHC that is *always* compiled with -O[2] because
+of it's impact on compile time performance. Some of this code might depend
+on the definitions like shiftL being defined here being performant.
+
+So we always compile this module with -O2. It's (currently) tiny so I
+have little reason to suspect this impacts overall GHC compile times
+negatively.
+
+-}
 -- We export the 'Semigroup' class but w/o the (<>) operator to avoid
 -- clashing with the (Outputable.<>) operator which is heavily used
 -- through GHC's code-base.
diff --git a/compiler/GHC/Stg/FVs.hs b/compiler/GHC/Stg/FVs.hs
index 00b4bdcc0b..636ec5eb61 100644
--- a/compiler/GHC/Stg/FVs.hs
+++ b/compiler/GHC/Stg/FVs.hs
@@ -47,6 +47,7 @@ module GHC.Stg.FVs (
 import GHC.Prelude hiding (mod)
 
 import GHC.Stg.Syntax
+import GHC.Stg.Utils (bindersOf)
 import GHC.Types.Id
 import GHC.Types.Name (Name, nameIsLocalOrFrom)
 import GHC.Types.Tickish ( GenTickish(Breakpoint) )
diff --git a/compiler/GHC/Stg/InferTags.hs b/compiler/GHC/Stg/InferTags.hs
new file mode 100644
index 0000000000..477650da3c
--- /dev/null
+++ b/compiler/GHC/Stg/InferTags.hs
@@ -0,0 +1,631 @@
+{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-}
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE UndecidableInstances #-}
+ -- To permit: type instance XLet 'InferTaggedBinders = XLet 'SomePass
+
+{-# OPTIONS_GHC -Wname-shadowing #-}
+module GHC.Stg.InferTags ( inferTags ) where
+
+import GHC.Prelude hiding (id)
+
+import GHC.Core.DataCon
+import GHC.Core.Type
+import GHC.Types.Id
+import GHC.Types.Id.Info (tagSigInfo)
+import GHC.Types.Name
+import GHC.Stg.Syntax
+import GHC.Types.Basic ( CbvMark (..) )
+import GHC.Types.Unique.Supply (mkSplitUniqSupply)
+import GHC.Types.RepType (dataConRuntimeRepStrictness)
+import GHC.Core (AltCon(..))
+import Data.List (mapAccumL)
+import GHC.Utils.Outputable
+import GHC.Utils.Misc( zipWithEqual, zipEqual, notNull )
+
+import GHC.Stg.InferTags.Types
+import GHC.Stg.InferTags.Rewrite (rewriteTopBinds)
+import Data.Maybe
+import GHC.Types.Name.Env (mkNameEnv, NameEnv)
+import GHC.Driver.Config.Stg.Ppr
+import GHC.Driver.Session
+import GHC.Utils.Logger
+import qualified GHC.Unit.Types
+
+{- Note [Tag Inference]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The purpose of this pass is to attach to every binder a flag
+to indicate whether or not it is "properly tagged".  A binder
+is properly tagged if it is guaranteed:
+ - to point to a heap-allocated *value*
+ - and to have the tag of the value encoded in the pointer
+
+For example
+  let x = Just y in ...
+
+Here x will be properly tagged: it will point to the heap-allocated
+values for (Just y), and the tag-bits of the pointer will encode
+the tag for Just so there is no need to re-enter the closure or even
+check for the presence of tag bits. The impacts of this can be very large.
+
+For containers the reduction in runtimes with this optimization was as follows:
+
+intmap-benchmarks:    89.30%
+intset-benchmarks:    90.87%
+map-benchmarks:       88.00%
+sequence-benchmarks:  99.84%
+set-benchmarks:       85.00%
+set-operations-intmap:88.64%
+set-operations-map:   74.23%
+set-operations-set:   76.50%
+lookupge-intmap:      89.57%
+lookupge-map:         70.95%
+
+With nofib being ~0.3% faster as well.
+
+See Note [Tag inference passes] for how we proceed to generate and use this information.
+
+Note [Strict Field Invariant]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As part of tag inference we introduce the Strict Field Invariant.
+Which consists of us saying that:
+
+* Pointers in strict fields must (a) point directly to the value, and
+  (b) must be properly tagged.
+
+For example, given
+  data T = MkT ![Int]
+
+the Strict Field Invariant guarantees that the first field of any `MkT` constructor
+will either point directly to nil, or directly to a cons cell;
+and will be tagged with `001` or `010` respectively.
+It will never point to a thunk, nor will it be tagged `000` (meaning "might be a thunk").
+NB: Note that the proper tag for some objects is indeed `000`. Currently this is the case for PAPs.
+
+Why do we care? Because if we have code like:
+
+case strictPair of
+  SP x y ->
+    case x of ...
+
+It allows us to safely omit the code to enter x and the check
+for the presence of a tag that goes along with it.
+However we might still branch on the tag as usual.
+See Note [Tag Inference] for how much impact this can have for
+some code.
+
+This is enforced by the code GHC.Stg.InferTags.Rewrite
+where we:
+
+* Look at all constructor allocations.
+* Check if arguments to their strict fields are known to be properly tagged
+* If not we convert `StrictJust x` into `case x of x' -> StrictJust x'`
+
+This is usually very beneficial but can cause regressions in rare edge cases where
+we fail to proof that x is properly tagged, or where it simply isn't.
+See Note [How untagged pointers can end up in strict fields] for how the second case
+can arise.
+
+For a full example of the worst case consider this code:
+
+foo ... = ...
+  let c = StrictJust x
+  in ...
+
+Here we would rewrite `let c = StrictJust x` into `let c = case x of x' -> StrictJust x'`
+However that is horrible! We end up allocating a thunk for `c` first, which only when
+evaluated will allocate the constructor.
+
+So we do our best to establish that `x` is already tagged (which it almost always is)
+to avoid this cost. In my benchmarks I haven't seen any cases where this causes regressions.
+
+Note [How untagged pointers can end up in strict fields]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+  data Set a = Tip | Bin !a (Set a) (Set a)
+
+We make a wrapper for Bin that evaluates its arguments
+  $WBin x a b = case x of xv -> Bin xv a b
+Here `xv` will always be evaluated and properly tagged, just as the
+Strict Field Invariant requires.
+
+But alas the Simplifier can destroy the invariant: see #15696.
+We start with
+  thk = f ()
+  g x = ...(case thk of xv -> Bin xv Tip Tip)...
+
+So far so good; the argument to Bin (which is strict) is evaluated.
+Now we do float-out. And in doing so we do a reverse binder-swap (see
+Note [Binder-swap during float-out] in SetLevels) thus
+
+  g x = ...(case thk of xv -> Bin thk Nil Nil)...
+
+The goal of the reverse binder-swap is to allow more floating -- and
+indeed it does! We float the Bin to top level:
+
+  lvl = Bin thk Tip Tip
+  g x = ...(case thk of xv -> lvl)...
+
+Now you can see that the argument of Bin, namely thk, points to the
+thunk, not to the value as it did before.
+
+In short, although it may be rare, the output of optimisation passes
+cannot guarantee to obey the Strict Field Invariant. For this reason
+we run tag inference. See Note [Tag inference passes].
+
+Note [Tag inference passes]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Tag inference proceeds in two passes:
+* The first pass is an analysis to compute which binders are properly tagged.
+  The result is then attached to /binders/.
+  This is implemented by `inferTagsAnal` in GHC.Stg.InferTags
+* The second pass walks over the AST checking if the Strict Field Invariant is upheld.
+  See Note [Strict Field Invariant].
+  If required this pass modifies the program to uphold this invariant.
+  Tag information is also moved from /binders/ to /occurrences/ during this pass.
+  This is done by `GHC.Stg.InferTags.Rewrite (rewriteTopBinds)`.
+* Finally the code generation uses this information to skip the thunk check when branching on
+  values. This is done by `cgExpr`/`cgCase` in the backend.
+
+Last but not least we also export the tag sigs of top level bindings to allow this optimization
+ to work across module boundaries.
+
+Note [TagInfo of functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The purpose of tag inference is really to figure out when we don't have to enter
+value closures. There the meaning of the tag is fairly obvious.
+For functions we never make use of the tag info so we have two choices:
+* Treat them as TagDunno
+* Treat them as TagProper (as they *are* tagged with their arity) and be really
+  careful to make sure we still enter them when needed.
+As it makes little difference for runtime performance I've treated functions as TagDunno in a few places where
+it made the code simpler. But besides implementation complexity there isn't any reason
+why we couldn't be more rigourous in dealing with functions.
+
+Note [Tag inference debugging]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+There is a flag -dtag-inference-checks which inserts various
+compile/runtime checks in order to ensure the Strict Field Invariant
+holds. It should cover all places
+where tags matter and disable optimizations which interfere with checking
+the invariant like generation of AP-Thunks.
+
+Note [Polymorphic StgPass for inferTagExpr]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In order to reach a fixpoint we sometimes have to re-analyse an expression
+multiple times. But after the initial run the Ast will be parameterized by
+a different StgPass! To handle this a large part of the analysis is polymorphic
+over the exact StgPass we are using. Which allows us to run the analysis on
+the output of itself.
+
+-}
+
+{- *********************************************************************
+*                                                                      *
+                         Tag inference pass
+*                                                                      *
+********************************************************************* -}
+
+-- doCodeGen :: HscEnv -> Module -> InfoTableProvMap -> [TyCon]
+--           -> CollectedCCs
+--           -> [CgStgTopBinding] -- ^ Bindings come already annotated with fvs
+--           -> HpcInfo
+--           -> IO (Stream IO CmmGroupSRTs CgInfos)
+--          -- Note we produce a 'Stream' of CmmGroups, so that the
+--          -- backend can be run incrementally.  Otherwise it generates all
+--          -- the C-- up front, which has a significant space cost.
+inferTags :: DynFlags -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
+inferTags dflags logger this_mod stg_binds = do
+
+    -- Annotate binders with tag information.
+    let (!stg_binds_w_tags) = {-# SCC "StgTagFields" #-}
+                                        inferTagsAnal stg_binds
+    putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_tags)
+
+    let export_tag_info = collectExportInfo stg_binds_w_tags
+
+    -- Rewrite STG to uphold the strict field invariant
+    us_t <- mkSplitUniqSupply 't'
+    let rewritten_binds = {-# SCC "StgTagRewrite" #-} rewriteTopBinds this_mod us_t stg_binds_w_tags :: [TgStgTopBinding]
+
+    return (rewritten_binds,export_tag_info)
+
+{- *********************************************************************
+*                                                                      *
+                         Main inference algorithm
+*                                                                      *
+********************************************************************* -}
+
+type OutputableInferPass p = (Outputable (TagEnv p)
+                              , Outputable (GenStgExpr p)
+                              , Outputable (BinderP p)
+                              , Outputable (GenStgRhs p))
+
+-- | This constraint encodes the fact that no matter what pass
+-- we use the Let/Closure extension points are the same as these for
+-- 'InferTaggedBinders.
+type InferExtEq i = ( XLet i ~ XLet 'InferTaggedBinders
+                    , XLetNoEscape i ~ XLetNoEscape 'InferTaggedBinders
+                    , XRhsClosure i ~ XRhsClosure 'InferTaggedBinders)
+
+inferTagsAnal :: [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders]
+inferTagsAnal binds =
+  -- pprTrace "Binds" (pprGenStgTopBindings shortStgPprOpts $ binds) $
+  snd (mapAccumL inferTagTopBind initEnv binds)
+
+-----------------------
+inferTagTopBind :: TagEnv 'CodeGen -> GenStgTopBinding 'CodeGen
+                -> (TagEnv 'CodeGen, GenStgTopBinding 'InferTaggedBinders)
+inferTagTopBind env (StgTopStringLit id bs)
+  = (env, StgTopStringLit id bs)
+inferTagTopBind env (StgTopLifted bind)
+  = (env', StgTopLifted bind')
+  where
+    (env', bind') = inferTagBind env bind
+
+
+-- Why is this polymorphic over the StgPass? See Note [Polymorphic StgPass for inferTagExpr]
+-----------------------
+inferTagExpr :: forall p. (OutputableInferPass p, InferExtEq p)
+  => TagEnv p -> GenStgExpr p -> (TagInfo, GenStgExpr 'InferTaggedBinders)
+inferTagExpr env (StgApp fun args)
+  =  --pprTrace "inferTagExpr1"
+      -- (ppr fun <+> ppr args $$ ppr info $$
+      --  text "deadEndInfo:" <> ppr (isDeadEndId fun, idArity fun, length args)
+      -- )
+    (info, StgApp fun args)
+  where
+    !fun_arity = idArity fun
+    info | fun_arity == 0 -- Unknown arity => Thunk or unknown call
+         = TagDunno
+
+         | isDeadEndId fun
+         , fun_arity == length args -- Implies we will simply call the function.
+         = TagTagged -- See Note [Bottom functions are TagTagged]
+
+         | Just (TagSig res_info) <- tagSigInfo (idInfo fun)
+         , fun_arity == length args  -- Saturated
+         = res_info
+
+         | Just (TagSig res_info) <- lookupSig env fun
+         , fun_arity == length args  -- Saturated
+         = res_info
+
+         | otherwise
+         = --pprTrace "inferAppUnknown" (ppr fun) $
+           TagDunno
+-- TODO:
+-- If we have something like:
+--   let x = thunk in
+--   f g = case g of g' -> (# x, g' #)
+-- then we *do* know that g' will be properly tagged,
+-- so we should return TagTagged [TagDunno,TagProper] but currently we infer
+-- TagTagged [TagDunno,TagDunno] because of the unknown arity case in inferTagExpr.
+-- Seems not to matter much but should be changed eventually.
+
+inferTagExpr env (StgConApp con cn args tys)
+  = (inferConTag env con args, StgConApp con cn args tys)
+
+inferTagExpr _ (StgLit l)
+  = (TagTagged, StgLit l)
+
+inferTagExpr env (StgTick tick body)
+  = (info, StgTick tick body')
+  where
+    (info, body') = inferTagExpr env body
+
+inferTagExpr _ (StgOpApp op args ty)
+  = -- Do any primops guarantee to return a properly tagged value?
+    -- I think not.  Ditto foreign calls.
+    (TagDunno, StgOpApp op args ty)
+
+inferTagExpr env (StgLet ext bind body)
+  = (info, StgLet ext bind' body')
+  where
+    (env', bind') = inferTagBind env bind
+    (info, body') = inferTagExpr env' body
+
+inferTagExpr env (StgLetNoEscape ext bind body)
+  = (info, StgLetNoEscape ext bind' body')
+  where
+    (env', bind') = inferTagBind env bind
+    (info, body') = inferTagExpr env' body
+
+inferTagExpr in_env (StgCase scrut bndr ty alts)
+  -- Unboxed tuples get their info from the expression we scrutinise if any
+  | [(DataAlt con, bndrs, rhs)] <- alts
+  , isUnboxedTupleDataCon con
+  , Just infos <- scrut_infos bndrs
+  , let bndrs' = zipWithEqual "inferTagExpr" mk_bndr bndrs infos
+        mk_bndr :: BinderP p -> TagInfo -> (Id, TagSig)
+        mk_bndr tup_bndr tup_info =
+            --  pprTrace "mk_ubx_bndr_info" ( ppr bndr <+> ppr info ) $
+            (getBinderId in_env tup_bndr, TagSig tup_info)
+        -- no case binder in alt_env here, unboxed tuple binders are dead after unarise
+        alt_env = extendSigEnv in_env bndrs'
+        (info, rhs') = inferTagExpr alt_env rhs
+  =
+    -- pprTrace "inferCase1" (
+    --   text "scrut:" <> ppr scrut $$
+    --   text "bndr:" <> ppr bndr $$
+    --   text "infos" <> ppr infos $$
+    --   text "out_bndrs" <> ppr bndrs') $
+    (info, StgCase scrut' (noSig in_env bndr) ty [(DataAlt con, bndrs', rhs')])
+
+  | null alts -- Empty case, but I might just be paranoid.
+  = -- pprTrace "inferCase2" empty $
+    (TagDunno, StgCase scrut' bndr' ty [])
+  -- More than one alternative OR non-TagTuple single alternative.
+  | otherwise
+  =
+    let
+        case_env = extendSigEnv in_env [bndr']
+
+        (infos, alts')
+          = unzip [ (info, (con, bndrs', rhs'))
+                  | (con, bndrs, rhs) <- alts
+                  , let (alt_env,bndrs') = addAltBndrInfo case_env con bndrs
+                        (info, rhs') = inferTagExpr alt_env rhs
+                         ]
+        alt_info = foldr combineAltInfo TagTagged infos
+    in ( alt_info, StgCase scrut' bndr' ty alts')
+  where
+    -- Single unboxed tuple alternative
+    scrut_infos bndrs = case scrut_info of
+      TagTagged -> Just $ replicate (length bndrs) TagProper
+      TagTuple infos -> Just infos
+      _ -> Nothing
+    (scrut_info, scrut') = inferTagExpr in_env scrut
+    bndr' = (getBinderId in_env bndr, TagSig TagProper)
+
+-- Compute binder sigs based on the constructors strict fields.
+-- NB: Not used if we have tuple info from the scrutinee.
+addAltBndrInfo :: forall p. TagEnv p -> AltCon -> [BinderP p] -> (TagEnv p, [BinderP 'InferTaggedBinders])
+addAltBndrInfo env (DataAlt con) bndrs
+  | not (isUnboxedTupleDataCon con || isUnboxedSumDataCon con)
+  = (out_env, out_bndrs)
+  where
+    marks = dataConRuntimeRepStrictness con :: [StrictnessMark]
+    out_bndrs = zipWith mk_bndr bndrs marks
+    out_env = extendSigEnv env out_bndrs
+
+    mk_bndr :: (BinderP p -> StrictnessMark -> (Id, TagSig))
+    mk_bndr bndr mark
+      | isUnliftedType (idType id) || isMarkedStrict mark
+      = (id, TagSig TagProper)
+      | otherwise
+      = noSig env bndr
+        where
+          id = getBinderId env bndr
+
+addAltBndrInfo env _ bndrs = (env, map (noSig env) bndrs)
+
+-----------------------------
+inferTagBind :: (OutputableInferPass p, InferExtEq p)
+  => TagEnv p -> GenStgBinding p -> (TagEnv p, GenStgBinding 'InferTaggedBinders)
+inferTagBind in_env (StgNonRec bndr rhs)
+  =
+    -- pprTrace "inferBindNonRec" (
+    --   ppr bndr $$
+    --   ppr (isDeadEndId id) $$
+    --   ppr sig)
+    (env', StgNonRec (id, sig) rhs')
+  where
+    id   = getBinderId in_env bndr
+    env' = extendSigEnv in_env [(id, sig)]
+    (sig,rhs') = inferTagRhs id in_env rhs
+
+inferTagBind in_env (StgRec pairs)
+  = -- pprTrace "rec" (ppr (map fst pairs) $$ ppr (in_env { te_env = out_env }, StgRec pairs')) $
+    (in_env { te_env = out_env }, StgRec pairs')
+  where
+    (bndrs, rhss)     = unzip pairs
+    in_ids            = map (getBinderId in_env) bndrs
+    init_sigs         = map (initSig) $ zip in_ids rhss
+    (out_env, pairs') = go in_env init_sigs rhss
+
+    go :: forall q. (OutputableInferPass q , InferExtEq q) => TagEnv q -> [TagSig] -> [GenStgRhs q]
+                 -> (TagSigEnv, [((Id,TagSig), GenStgRhs 'InferTaggedBinders)])
+    go go_env in_sigs go_rhss
+      --   | pprTrace "go" (ppr in_ids $$ ppr in_sigs $$ ppr out_sigs $$ ppr rhss') False
+      --  = undefined
+       | in_sigs == out_sigs = (te_env rhs_env, out_bndrs `zip` rhss')
+       | otherwise     = go env' out_sigs rhss'
+       where
+         out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive
+         in_bndrs = in_ids `zip` in_sigs
+         rhs_env = extendSigEnv go_env in_bndrs
+         (out_sigs, rhss') = unzip (zipWithEqual "inferTagBind" anaRhs in_ids go_rhss)
+         env' = makeTagged go_env
+
+         anaRhs :: Id -> GenStgRhs q -> (TagSig, GenStgRhs 'InferTaggedBinders)
+         anaRhs bnd rhs = inferTagRhs bnd rhs_env rhs
+
+         updateBndr :: (Id,TagSig) -> (Id,TagSig)
+         updateBndr (v,sig) = (setIdTagSig v sig, sig)
+
+initSig :: forall p. (Id, GenStgRhs p) -> TagSig
+-- Initial signature for the fixpoint loop
+initSig (_bndr, StgRhsCon {})               = TagSig TagTagged
+initSig (bndr, StgRhsClosure _ _ _ _ _) =
+  fromMaybe defaultSig (idTagSig_maybe bndr)
+  where defaultSig = (TagSig TagTagged)
+
+{- Note [Bottom functions are TagTagged]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have a function with two branches with one
+being bottom, and the other returning a tagged
+unboxed tuple what is the result? We give it TagTagged!
+To answer why consider this function:
+
+foo :: Bool -> (# Bool, Bool #)
+foo x = case x of
+    True -> (# True,True #)
+    False -> undefined
+
+The true branch is obviously tagged. The other branch isn't.
+We want to treat the *result* of foo as tagged as well so that
+the combination of the branches also is tagged if all non-bottom
+branches are tagged.
+This is safe because the function is still always called/entered as long
+as it's applied to arguments. Since the function will never return we can give
+it safely any tag sig we like.
+So we give it TagTagged, as it allows the combined tag sig of the case expression
+to be the combination of all non-bottoming branches.
+
+-}
+
+-----------------------------
+inferTagRhs :: forall p.
+     (OutputableInferPass p, InferExtEq p)
+  => Id -- ^ Id we are binding to.
+  -> TagEnv p -- ^
+  -> GenStgRhs p -- ^
+  -> (TagSig, GenStgRhs 'InferTaggedBinders)
+inferTagRhs bnd_id in_env (StgRhsClosure ext cc upd bndrs body)
+  | isDeadEndId bnd_id && (notNull) bndrs
+  -- See Note [Bottom functions are TagTagged]
+  = (TagSig TagTagged, StgRhsClosure ext cc upd out_bndrs body')
+  | otherwise
+  = --pprTrace "inferTagRhsClosure" (ppr (_top, _grp_ids, env,info')) $
+    (TagSig info', StgRhsClosure ext cc upd out_bndrs body')
+  where
+    out_bndrs
+      | Just marks <- idCbvMarks_maybe bnd_id
+      -- Sometimes an we eta-expand foo with additional arguments after ww, and we also trim
+      -- the list of marks to the last strict entry. So we can conservatively
+      -- assume these are not strict
+      = zipWith (mkArgSig) bndrs (marks ++ repeat NotMarkedCbv)
+      | otherwise = map (noSig env') bndrs :: [(Id,TagSig)]
+
+    env' = extendSigEnv in_env out_bndrs
+    (info, body') = inferTagExpr env' body
+    info'
+      -- It's a thunk
+      | null bndrs
+      = TagDunno
+      -- TODO: We could preserve tuple fields for thunks
+      -- as well. But likely not worth the complexity.
+
+      | otherwise  = info
+
+    mkArgSig :: BinderP p -> CbvMark -> (Id,TagSig)
+    mkArgSig bndp mark =
+      let id = getBinderId in_env bndp
+          tag = case mark of
+            MarkedCbv -> TagProper
+            _
+              | isUnliftedType (idType id) -> TagProper
+              | otherwise -> TagDunno
+      in (id, TagSig tag)
+
+inferTagRhs _ env _rhs@(StgRhsCon cc con cn ticks args)
+-- Constructors, which have untagged arguments to strict fields
+-- become thunks. We encode this by giving changing RhsCon nodes the info TagDunno
+  = --pprTrace "inferTagRhsCon" (ppr grp_ids) $
+    (TagSig (inferConTag env con args), StgRhsCon cc con cn ticks args)
+
+{- Note [Constructor TagSigs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@inferConTag@ will infer the proper tag signature for a binding who's RHS is a constructor
+or a StgConApp expression.
+Usually these will simply be TagProper. But there are exceptions.
+If any of the fields in the constructor are strict, but any argument to these
+fields is not tagged then we will have to case on the argument before storing
+in the constructor. Which means for let bindings the RHS turns into a thunk
+which obviously is no longer properly tagged.
+For example we might start with:
+
+    let x<TagDunno> = f ...
+    let c<TagProper> = StrictPair x True
+
+But we know during the rewrite stage x will need to be evaluated in the RHS
+of `c` so we will infer:
+
+    let x<TagDunno> = f ...
+    let c<TagDunno> = StrictPair x True
+
+Which in the rewrite stage will then be rewritten into:
+
+    let x<TagDunno> = f ...
+    let c<TagDunno> = case x of x' -> StrictPair x' True
+
+The other exception is unboxed tuples. These will get a TagTuple
+signature with a list of TagInfo about their individual binders
+as argument. As example:
+
+    let c<TagProper> = True
+    let x<TagDunno> = ...
+    let f<?> z = case z of z'<TagProper> -> (# c, x #)
+
+Here we will infer for f the Signature <TagTuple[TagProper,TagDunno]>.
+This information will be used if we scrutinze a saturated application of
+`f` in order to determine the taggedness of the result.
+That is for `case f x of (# r1,r2 #) -> rhs` we can infer
+r1<TagProper> and r2<TagDunno> which allows us to skip all tag checks on `r1`
+in `rhs`.
+
+Things get a bit more complicated with nesting:
+
+    let closeFd<TagTuple[...]> = ...
+    let f x = ...
+        case x of
+          _ -> Solo# closeFd
+
+The "natural" signature for the Solo# branch in `f` would be <TagTuple[TagTuple[...]]>.
+But we flatten this out to <TagTuple[TagDunno]> for the time being as it improves compile
+time and there doesn't seem to huge benefit to doing differently.
+
+  -}
+
+-- See Note [Constructor TagSigs]
+inferConTag :: TagEnv p -> DataCon -> [StgArg] -> TagInfo
+inferConTag env con args
+  | isUnboxedTupleDataCon con
+  = TagTuple $ map (flatten_arg_tag . lookupInfo env) args
+  | otherwise =
+    -- pprTrace "inferConTag"
+    --   ( text "con:" <> ppr con $$
+    --     text "args:" <> ppr args $$
+    --     text "marks:" <> ppr (dataConRuntimeRepStrictness con) $$
+    --     text "arg_info:" <> ppr (map (lookupInfo env) args) $$
+    --     text "info:" <> ppr info) $
+    info
+  where
+    info = if any arg_needs_eval strictArgs then TagDunno else TagProper
+    strictArgs = zipEqual "inferTagRhs" args (dataConRuntimeRepStrictness con) :: ([(StgArg, StrictnessMark)])
+    arg_needs_eval (arg,strict)
+      -- lazy args
+      | not (isMarkedStrict strict) = False
+      | tag <- (lookupInfo env arg)
+      -- banged args need to be tagged, or require eval
+      = not (isTaggedInfo tag)
+
+    flatten_arg_tag (TagTagged) = TagProper
+    flatten_arg_tag (TagProper ) = TagProper
+    flatten_arg_tag (TagTuple _) = TagDunno -- See Note [Constructor TagSigs]
+    flatten_arg_tag (TagDunno) = TagDunno
+
+
+collectExportInfo :: [GenStgTopBinding 'InferTaggedBinders] -> NameEnv TagSig
+collectExportInfo binds =
+  mkNameEnv bndr_info
+  where
+    bndr_info = concatMap collect binds :: [(Name,TagSig)]
+
+    collect (StgTopStringLit {}) = []
+    collect (StgTopLifted bnd) =
+      case bnd of
+        StgNonRec (id,sig) _rhs
+          | TagSig TagDunno <- sig -> []
+          | otherwise -> [(idName id,sig)]
+        StgRec bnds -> collectRec bnds
+
+    collectRec :: [(BinderP 'InferTaggedBinders, rhs)] -> [(Name,TagSig)]
+    collectRec [] = []
+    collectRec (bnd:bnds)
+      | (p,_rhs)  <- bnd
+      , (id,sig) <- p
+      , TagSig TagDunno <- sig
+      = (idName id,sig) : collectRec bnds
+      | otherwise = collectRec bnds
diff --git a/compiler/GHC/Stg/InferTags/Rewrite.hs b/compiler/GHC/Stg/InferTags/Rewrite.hs
new file mode 100644
index 0000000000..8b2c24cbdb
--- /dev/null
+++ b/compiler/GHC/Stg/InferTags/Rewrite.hs
@@ -0,0 +1,495 @@
+--
+-- Copyright (c) 2019 Andreas Klebinger
+--
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE GeneralisedNewtypeDeriving #-}
+
+module GHC.Stg.InferTags.Rewrite (rewriteTopBinds)
+where
+
+import GHC.Prelude
+
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Types.Unique.Supply
+import GHC.Types.Unique.FM
+import GHC.Types.RepType
+import GHC.Unit.Types (Module)
+
+import GHC.Core.DataCon
+import GHC.Core (AltCon(..) )
+import GHC.Core.Type
+
+import GHC.StgToCmm.Types
+
+import GHC.Stg.Utils
+import GHC.Stg.Syntax as StgSyn
+
+import GHC.Data.Maybe
+import GHC.Utils.Panic
+import GHC.Utils.Panic.Plain
+
+import GHC.Utils.Outputable
+import GHC.Utils.Monad.State.Strict
+import GHC.Utils.Misc
+
+import GHC.Stg.InferTags.Types
+
+import Control.Monad
+import GHC.Types.Basic (CbvMark (NotMarkedCbv, MarkedCbv), isMarkedCbv, TopLevelFlag(..), isTopLevel)
+import GHC.Types.Var.Set
+
+-- import GHC.Utils.Trace
+-- import GHC.Driver.Ppr
+
+newtype RM a = RM { unRM :: (State (UniqFM Id TagSig, UniqSupply, Module, IdSet) a) }
+    deriving (Functor, Monad, Applicative)
+
+------------------------------------------------------------
+-- Add cases around strict fields where required.
+------------------------------------------------------------
+{-
+The work of this pass is simple:
+* We traverse the STG AST looking for constructor allocations.
+* For all allocations we check if there are strict fields in the constructor.
+* For any strict field we check if the argument is known to be properly tagged.
+* If it's not known to be properly tagged, we wrap the whole thing in a case,
+  which will force the argument before allocation.
+This is described in detail in Note [Strict Field Invariant].
+
+The only slight complication is that we have to make sure not to invalidate free
+variable analysis in the process.
+
+Note [Partially applied workers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Sometimes we will get a function f of the form
+    -- Arity 1
+    f :: Dict a -> a -> b -> (c -> d)
+    f dict a b = case dict of
+        C m1 m2 -> m1 a b
+
+Which will result in a W/W split along the lines of
+    -- Arity 1
+    f :: Dict a -> a -> b -> (c -> d)
+    f dict a = case dict of
+        C m1 m2 -> $wf m1 a b
+
+    -- Arity 4
+    $wf :: (a -> b -> d -> c) -> a -> b -> c -> d
+    $wf m1 a b c = m1 a b c
+
+It's notable that the worker is called *undersatured* in the wrapper.
+At runtime what happens is that the wrapper will allocate a PAP which
+once fully applied will call the worker. And all is fine.
+
+But what about a strict worker! Well the function returned by `f` would
+be a unknown call, so we lose the ability to enfore the invariant that
+cbv marked arguments from StictWorkerId's are actually properly tagged
+as the annotations would be unavailable at the (unknown) call site.
+
+The fix is easy. We eta-expand all calls to functions taking call-by-value
+arguments during CorePrep just like we do with constructor allocations.
+
+Note [Upholding free variable annotations]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The code generator requires us to maintain exact information
+about free variables about closures. Since we convert some
+RHSs from constructor allocations to closures we have to provide
+fvs of these closures. Not all constructor arguments will become
+free variables. Only these which are not bound at the top level
+have to be captured.
+To facilitate this we keep track of a set of locally bound variables in
+the current context which we then use to filter constructor arguments
+when building the free variable list.
+-}
+
+--------------------------------
+-- Utilities
+--------------------------------
+
+instance MonadUnique RM where
+    getUniqueSupplyM = RM $ do
+        (m, us, mod,lcls) <- get
+        let (us1, us2) = splitUniqSupply us
+        (put) (m,us2,mod,lcls)
+        return us1
+
+getMap :: RM (UniqFM Id TagSig)
+getMap = RM $ ((\(fst,_,_,_) -> fst) <$> get)
+
+setMap :: (UniqFM Id TagSig) -> RM ()
+setMap m = RM $ do
+    (_,us,mod,lcls) <- get
+    put (m, us,mod,lcls)
+
+getMod :: RM Module
+getMod = RM $ ( (\(_,_,thrd,_) -> thrd) <$> get)
+
+getFVs :: RM IdSet
+getFVs = RM $ ((\(_,_,_,lcls) -> lcls) <$> get)
+
+setFVs :: IdSet -> RM ()
+setFVs fvs = RM $ do
+    (tag_map,us,mod,_lcls) <- get
+    put (tag_map, us,mod,fvs)
+
+-- Rewrite the RHS(s) while making the id and it's sig available
+-- to determine if things are tagged/need to be captured as FV.
+withBind :: TopLevelFlag -> GenStgBinding 'InferTaggedBinders -> RM a -> RM a
+withBind top_flag (StgNonRec bnd _) cont = withBinder top_flag bnd cont
+withBind top_flag (StgRec binds) cont = do
+    let (bnds,_rhss) = unzip binds :: ([(Id, TagSig)], [GenStgRhs 'InferTaggedBinders])
+    withBinders top_flag bnds cont
+
+addTopBind :: GenStgBinding 'InferTaggedBinders -> RM ()
+addTopBind (StgNonRec (id, tag) _) = do
+    s <- getMap
+    -- pprTraceM "AddBind" (ppr id)
+    setMap $ addToUFM s id tag
+    return ()
+addTopBind (StgRec binds) = do
+    let (bnds,_rhss) = unzip binds
+    !s <- getMap
+    -- pprTraceM "AddBinds" (ppr $ map fst bnds)
+    setMap $! addListToUFM s bnds
+
+withBinder :: TopLevelFlag ->  (Id, TagSig) -> RM a -> RM a
+withBinder top_flag (id,sig) cont = do
+    oldMap <- getMap
+    setMap $ addToUFM oldMap id sig
+    a <- if isTopLevel top_flag
+            then cont
+            else withLcl id cont
+    setMap oldMap
+    return a
+
+withBinders :: TopLevelFlag -> [(Id, TagSig)] -> RM a -> RM a
+withBinders TopLevel sigs cont = do
+    oldMap <- getMap
+    setMap $ addListToUFM oldMap sigs
+    a <- cont
+    setMap oldMap
+    return a
+withBinders NotTopLevel sigs cont = do
+    oldMap <- getMap
+    oldFvs <- getFVs
+    setMap $ addListToUFM oldMap sigs
+    setFVs $ extendVarSetList oldFvs (map fst sigs)
+    a <- cont
+    setMap oldMap
+    setFVs oldFvs
+    return a
+
+-- | Compute the argument with the given set of ids treated as requiring capture
+-- as free variables.
+withClosureLcls :: DIdSet -> RM a -> RM a
+withClosureLcls fvs act = do
+    old_fvs <- getFVs
+    let fvs' = nonDetStrictFoldDVarSet (flip extendVarSet) old_fvs fvs
+    setFVs fvs'
+    r <- act
+    setFVs old_fvs
+    return r
+
+-- | Compute the argument with the given id treated as requiring capture
+-- as free variables in closures.
+withLcl :: Id -> RM a -> RM a
+withLcl fv act = do
+    old_fvs <- getFVs
+    let fvs' = extendVarSet old_fvs fv
+    setFVs fvs'
+    r <- act
+    setFVs old_fvs
+    return r
+
+isTagged :: Id -> RM Bool
+isTagged v = do
+    this_mod <- getMod
+    case nameIsLocalOrFrom this_mod (idName v) of
+        True
+            | isUnliftedType (idType v)
+            -> return True
+            | otherwise -> do -- Local binding
+                !s <- getMap
+                let !sig = lookupWithDefaultUFM s (pprPanic "unknown Id:" (ppr v)) v
+                return $ case sig of
+                    TagSig info ->
+                        case info of
+                            TagDunno -> False
+                            TagProper -> True
+                            TagTagged -> True
+                            TagTuple _ -> True -- Consider unboxed tuples tagged.
+        False -- Imported
+            | Just con <- (isDataConWorkId_maybe v)
+            , isNullaryRepDataCon con
+            -> return True
+            | Just lf_info <- idLFInfo_maybe v
+            -> return $
+                -- Can we treat the thing as tagged based on it's LFInfo?
+                case lf_info of
+                    -- Function, applied not entered.
+                    LFReEntrant {}
+                        -> True
+                    -- Thunks need to be entered.
+                    LFThunk {}
+                        -> False
+                    -- LFCon means we already know the tag, and it's tagged.
+                    LFCon {}
+                        -> True
+                    LFUnknown {}
+                        -> False
+                    LFUnlifted {}
+                        -> True
+                    LFLetNoEscape {}
+                    -- Shouldn't be possible. I don't think we can export letNoEscapes
+                        -> True
+
+            | otherwise
+            -> return False
+
+
+isArgTagged :: StgArg -> RM Bool
+isArgTagged (StgLitArg _) = return True
+isArgTagged (StgVarArg v) = isTagged v
+
+mkLocalArgId :: Id -> RM Id
+mkLocalArgId id = do
+    !u <- getUniqueM
+    return $! setIdUnique (localiseId id) u
+
+---------------------------
+-- Actual rewrite pass
+---------------------------
+
+
+rewriteTopBinds :: Module -> UniqSupply -> [GenStgTopBinding 'InferTaggedBinders] -> [TgStgTopBinding]
+rewriteTopBinds mod us binds =
+    let doBinds = mapM rewriteTop binds
+
+    in evalState (unRM doBinds) (mempty, us, mod, mempty)
+
+rewriteTop :: InferStgTopBinding -> RM TgStgTopBinding
+rewriteTop (StgTopStringLit v s) = return $! (StgTopStringLit v s)
+rewriteTop (StgTopLifted bind)   = do
+    -- Top level bindings can, and must remain in scope
+    addTopBind bind
+    (StgTopLifted) <$!> (rewriteBinds TopLevel bind)
+
+-- For top level binds, the wrapper is guaranteed to be `id`
+rewriteBinds :: TopLevelFlag -> InferStgBinding -> RM (TgStgBinding)
+rewriteBinds _top_flag (StgNonRec v rhs) = do
+        (!rhs) <-  rewriteRhs v rhs
+        return $! (StgNonRec (fst v) rhs)
+rewriteBinds top_flag b@(StgRec binds) =
+    -- Bring sigs of binds into scope for all rhss
+    withBind top_flag b $ do
+        (rhss) <- mapM (uncurry rewriteRhs) binds
+        return $! (mkRec rhss)
+        where
+            mkRec :: [TgStgRhs] -> TgStgBinding
+            mkRec rhss = StgRec (zip (map (fst . fst) binds) rhss)
+
+-- Rewrite a RHS
+rewriteRhs :: (Id,TagSig) -> InferStgRhs
+           -> RM (TgStgRhs)
+rewriteRhs (_id, _tagSig) (StgRhsCon ccs con cn ticks args) = {-# SCC rewriteRhs_ #-} do
+    -- pprTraceM "rewriteRhs" (ppr _id)
+
+    -- Look up the nodes representing the constructor arguments.
+    fieldInfos <- mapM isArgTagged args
+
+    -- Filter out non-strict fields.
+    let strictFields =
+            getStrictConArgs con (zip args fieldInfos) :: [(StgArg,Bool)] -- (nth-argument, tagInfo)
+    -- Filter out already tagged arguments.
+    let needsEval = map fst . --get the actual argument
+                        filter (not . snd) $ -- Keep untagged (False) elements.
+                        strictFields :: [StgArg]
+    let evalArgs = [v | StgVarArg v <- needsEval] :: [Id]
+
+    if (null evalArgs)
+        then return $! (StgRhsCon ccs con cn ticks args)
+        else do
+            --assert not (isTaggedSig tagSig)
+            -- pprTraceM "CreatingSeqs for " $ ppr _id <+> ppr node_id
+
+            -- At this point iff we have  possibly untagged arguments to strict fields
+            -- we convert the RHS into a RhsClosure which will evaluate the arguments
+            -- before allocating the constructor.
+            let ty_stub = panic "mkSeqs shouldn't use the type arg"
+            conExpr <- mkSeqs args evalArgs (\taggedArgs -> StgConApp con cn taggedArgs ty_stub)
+
+            fvs <- fvArgs args
+            -- lcls <- getFVs
+            -- pprTraceM "RhsClosureConversion" (ppr (StgRhsClosure fvs ccs ReEntrant [] $! conExpr) $$ text "lcls:" <> ppr lcls)
+            return $! (StgRhsClosure fvs ccs ReEntrant [] $! conExpr)
+rewriteRhs _binding (StgRhsClosure fvs ccs flag args body) = do
+    withBinders NotTopLevel args $
+        withClosureLcls fvs $
+            StgRhsClosure fvs ccs flag (map fst args) <$> rewriteExpr False body
+        -- return (closure)
+
+fvArgs :: [StgArg] -> RM DVarSet
+fvArgs args = do
+    fv_lcls <- getFVs
+    -- pprTraceM "fvArgs" (text "args:" <> ppr args $$ text "lcls:" <> pprVarSet (fv_lcls) (braces . fsep . map ppr) )
+    return $ mkDVarSet [ v | StgVarArg v <- args, elemVarSet v fv_lcls]
+
+type IsScrut = Bool
+
+rewriteExpr :: IsScrut -> InferStgExpr -> RM TgStgExpr
+rewriteExpr _ (e@StgCase {})          = rewriteCase e
+rewriteExpr _ (e@StgLet {})           = rewriteLet e
+rewriteExpr _ (e@StgLetNoEscape {})   = rewriteLetNoEscape e
+rewriteExpr isScrut (StgTick t e)     = StgTick t <$!> rewriteExpr isScrut e
+rewriteExpr _ e@(StgConApp {})        = rewriteConApp e
+
+rewriteExpr isScrut e@(StgApp {})     = rewriteApp isScrut e
+rewriteExpr _ (StgLit lit)           = return $! (StgLit lit)
+rewriteExpr _ (StgOpApp op args res_ty) = return $! (StgOpApp op args res_ty)
+
+rewriteCase :: InferStgExpr -> RM TgStgExpr
+rewriteCase (StgCase scrut bndr alt_type alts) =
+    withBinder NotTopLevel bndr $
+        pure StgCase <*>
+            rewriteExpr True scrut <*>
+            pure (fst bndr) <*>
+            pure alt_type <*>
+            mapM rewriteAlt alts
+
+rewriteCase _ = panic "Impossible: nodeCase"
+
+rewriteAlt :: InferStgAlt -> RM TgStgAlt
+rewriteAlt (altCon, bndrs, rhs) = do
+    withBinders NotTopLevel bndrs $ do
+        !rhs' <- rewriteExpr False rhs
+        return $! (altCon, map fst bndrs, rhs')
+
+rewriteLet :: InferStgExpr -> RM TgStgExpr
+rewriteLet (StgLet xt bind expr) = do
+    (!bind') <- rewriteBinds NotTopLevel bind
+    withBind NotTopLevel bind $ do
+        -- pprTraceM "withBindLet" (ppr $ bindersOfX bind)
+        !expr' <- rewriteExpr False expr
+        return $! (StgLet xt bind' expr')
+rewriteLet _ = panic "Impossible"
+
+rewriteLetNoEscape :: InferStgExpr -> RM TgStgExpr
+rewriteLetNoEscape (StgLetNoEscape xt bind expr) = do
+    (!bind') <- rewriteBinds NotTopLevel bind
+    withBind NotTopLevel bind $ do
+        !expr' <- rewriteExpr False expr
+        return $! (StgLetNoEscape xt bind' expr')
+rewriteLetNoEscape _ = panic "Impossible"
+
+rewriteConApp :: InferStgExpr -> RM TgStgExpr
+rewriteConApp (StgConApp con cn args tys) = do
+    -- We check if the strict field arguments are already known to be tagged.
+    -- If not we evaluate them first.
+    fieldInfos <- mapM isArgTagged args
+    let strictIndices = getStrictConArgs con (zip fieldInfos args) :: [(Bool, StgArg)]
+    let needsEval = map snd . filter (not . fst) $ strictIndices :: [StgArg]
+    let evalArgs = [v | StgVarArg v <- needsEval] :: [Id]
+    if (not $ null evalArgs)
+        then do
+            -- pprTraceM "Creating conAppSeqs for " $ ppr nodeId <+> parens ( ppr evalArgs ) -- <+> parens ( ppr fieldInfos )
+            mkSeqs args evalArgs (\taggedArgs -> StgConApp con cn taggedArgs tys)
+        else return $! (StgConApp con cn args tys)
+
+rewriteConApp _ = panic "Impossible"
+
+-- Special case: Expressions like `case x of { ... }`
+rewriteApp :: IsScrut -> InferStgExpr -> RM TgStgExpr
+rewriteApp True (StgApp f []) = do
+    -- pprTraceM "rewriteAppScrut" (ppr f)
+    f_tagged <- isTagged f
+    -- isTagged looks at more than the result of our analysis.
+    -- So always update here if useful.
+    let f' = if f_tagged
+                then setIdTagSig f (TagSig TagProper)
+                else f
+    return $! StgApp f' []
+  where
+rewriteApp _ (StgApp f args)
+    -- | pprTrace "rewriteAppOther" (ppr f <+> ppr args) False
+    -- = undefined
+    | Just marks <- idCbvMarks_maybe f
+    , relevant_marks <- dropWhileEndLE (not . isMarkedCbv) marks
+    , any isMarkedCbv relevant_marks
+    = assert (length relevant_marks <= length args)
+      unliftArg relevant_marks
+
+    where
+      -- If the function expects any argument to be call-by-value ensure the argument is already
+      -- evaluated.
+      unliftArg relevant_marks = do
+        argTags <- mapM isArgTagged args
+        let argInfo = zipWith3 ((,,)) args (relevant_marks++repeat NotMarkedCbv)  argTags :: [(StgArg, CbvMark, Bool)]
+
+            -- untagged cbv argument positions
+            cbvArgInfo = filter (\x -> sndOf3 x == MarkedCbv && thdOf3 x == False) argInfo
+            cbvArgIds = [x | StgVarArg x <- map fstOf3 cbvArgInfo] :: [Id]
+        mkSeqs args cbvArgIds (\cbv_args -> StgApp f cbv_args)
+
+rewriteApp _ (StgApp f args) = return $ StgApp f args
+rewriteApp _ _ = panic "Impossible"
+
+-- `mkSeq` x x' e generates `case x of x' -> e`
+-- We could also substitute x' for x in e but that's so rarely beneficial
+-- that we don't bother.
+mkSeq :: Id -> Id -> TgStgExpr -> TgStgExpr
+mkSeq id bndr !expr =
+    -- pprTrace "mkSeq" (ppr (id,bndr)) $
+    let altTy = mkStgAltTypeFromStgAlts bndr [(DEFAULT, [], panic "Not used")]
+    in
+    StgCase (StgApp id []) bndr altTy [(DEFAULT, [], expr)]
+
+-- `mkSeqs args vs mkExpr` will force all vs, and construct
+-- an argument list args' where each v is replaced by it's evaluated
+-- counterpart v'.
+-- That is if we call `mkSeqs [StgVar x, StgLit l] [x] mkExpr` then
+-- the result will be (case x of x' { _DEFAULT -> <mkExpr [StgVar x', StgLit l]>}
+{-# INLINE mkSeqs #-} -- We inline to avoid allocating mkExpr
+mkSeqs  :: [StgArg] -- ^ Original arguments
+        -> [Id]     -- ^ var args to be evaluated ahead of time
+        -> ([StgArg] -> TgStgExpr)
+                    -- ^ Function that reconstructs the expressions when passed
+                    -- the list of evaluated arguments.
+        -> RM TgStgExpr
+mkSeqs args untaggedIds mkExpr = do
+    argMap <- mapM (\arg -> (arg,) <$> mkLocalArgId arg ) untaggedIds :: RM [(InId, OutId)]
+    -- mapM_ (pprTraceM "Forcing strict args before allocation:" . ppr) argMap
+    let taggedArgs :: [StgArg]
+            = map   (\v -> case v of
+                        StgVarArg v' -> StgVarArg $ fromMaybe v' $ lookup v' argMap
+                        lit -> lit)
+                    args
+
+    let conBody = mkExpr taggedArgs
+    let body = foldr (\(v,bndr) expr -> mkSeq v bndr expr) conBody argMap
+    return $! body
+
+-- Out of all arguments passed at runtime only return these ending up in a
+-- strict field
+getStrictConArgs :: DataCon -> [a] -> [a]
+getStrictConArgs con args
+    -- These are always lazy in their arguments.
+    | isUnboxedTupleDataCon con = []
+    | isUnboxedSumDataCon con = []
+    -- For proper data cons we have to check.
+    | otherwise =
+        [ arg | (arg,MarkedStrict)
+                    <- zipEqual "getStrictConArgs"
+                                args
+                                (dataConRuntimeRepStrictness con)]
diff --git a/compiler/GHC/Stg/InferTags/TagSig.hs b/compiler/GHC/Stg/InferTags/TagSig.hs
new file mode 100644
index 0000000000..a1381881f1
--- /dev/null
+++ b/compiler/GHC/Stg/InferTags/TagSig.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ConstraintKinds #-}
+
+-- We export this type from this module instead of GHC.Stg.InferTags.Types
+-- because it's used by more than the analysis itself. For example in interface
+-- files where we record a tag signature for bindings.
+-- By putting the sig into it's own module we can avoid module loops.
+module GHC.Stg.InferTags.TagSig
+
+where
+
+import GHC.Prelude
+
+import GHC.Types.Var
+import GHC.Utils.Outputable
+import GHC.Utils.Binary
+import GHC.Utils.Panic.Plain
+
+data TagInfo
+  = TagDunno            -- We don't know anything about the tag.
+  | TagTuple [TagInfo]  -- Represents a function/thunk which when evaluated
+                        -- will return a Unboxed tuple whos components have
+                        -- the given TagInfos.
+  | TagProper           -- Heap pointer to properly-tagged value
+  | TagTagged           -- Bottom of the domain.
+  deriving (Eq)
+
+instance Outputable TagInfo where
+  ppr TagTagged      = text "TagTagged"
+  ppr TagDunno       = text "TagDunno"
+  ppr TagProper      = text "TagProper"
+  ppr (TagTuple tis) = text "TagTuple" <> brackets (pprWithCommas ppr tis)
+
+instance Binary TagInfo where
+  put_ bh TagDunno  = putByte bh 1
+  put_ bh (TagTuple flds) = putByte bh 2 >> put_ bh flds
+  put_ bh TagProper = putByte bh 3
+  put_ bh TagTagged = putByte bh 4
+
+  get bh = do tag <- getByte bh
+              case tag of 1 -> return TagDunno
+                          2 -> TagTuple <$> get bh
+                          3 -> return TagProper
+                          4 -> return TagTagged
+                          _ -> panic ("get TagInfo " ++ show tag)
+
+newtype TagSig  -- The signature for each binding, this is a newtype as we might
+                -- want to track more information in the future.
+  = TagSig TagInfo
+  deriving (Eq)
+
+instance Outputable TagSig where
+  ppr (TagSig ti) = char '<' <> ppr ti <> char '>'
+instance OutputableBndr (Id,TagSig) where
+  pprInfixOcc  = ppr
+  pprPrefixOcc = ppr
+
+instance Binary TagSig where
+  put_ bh (TagSig sig) = put_ bh sig
+  get bh = pure TagSig <*> get bh
+
+isTaggedSig :: TagSig -> Bool
+isTaggedSig (TagSig TagProper) = True
+isTaggedSig (TagSig TagTagged) = True
+isTaggedSig _ = False
diff --git a/compiler/GHC/Stg/InferTags/Types.hs b/compiler/GHC/Stg/InferTags/Types.hs
new file mode 100644
index 0000000000..bcb1f3300b
--- /dev/null
+++ b/compiler/GHC/Stg/InferTags/Types.hs
@@ -0,0 +1,137 @@
+{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ConstraintKinds #-}
+
+{-# LANGUAGE UndecidableInstances #-}
+ -- To permit: type instance XLet 'InferTaggedBinders = XLet 'CodeGen
+
+module GHC.Stg.InferTags.Types
+    ( module GHC.Stg.InferTags.Types
+    , module TagSig)
+where
+
+import GHC.Prelude
+
+import GHC.Core.DataCon
+import GHC.Core.Type (isUnliftedType)
+import GHC.Types.Id
+import GHC.Stg.Syntax
+import GHC.Stg.InferTags.TagSig as TagSig
+import GHC.Types.Var.Env
+import GHC.Utils.Outputable
+import GHC.Utils.Misc( zipWithEqual )
+import GHC.Utils.Panic
+
+import GHC.StgToCmm.Types
+
+{- *********************************************************************
+*                                                                      *
+                         Supporting data types
+*                                                                      *
+********************************************************************* -}
+
+type instance BinderP      'InferTaggedBinders = (Id, TagSig)
+type instance XLet         'InferTaggedBinders = XLet         'CodeGen
+type instance XLetNoEscape 'InferTaggedBinders = XLetNoEscape 'CodeGen
+type instance XRhsClosure  'InferTaggedBinders = XRhsClosure  'CodeGen
+
+type InferStgTopBinding = GenStgTopBinding 'InferTaggedBinders
+type InferStgBinding    = GenStgBinding    'InferTaggedBinders
+type InferStgExpr       = GenStgExpr       'InferTaggedBinders
+type InferStgRhs        = GenStgRhs        'InferTaggedBinders
+type InferStgAlt        = GenStgAlt        'InferTaggedBinders
+
+combineAltInfo :: TagInfo -> TagInfo -> TagInfo
+combineAltInfo TagDunno         _              = TagDunno
+combineAltInfo _                TagDunno       = TagDunno
+combineAltInfo (TagTuple {})    TagProper      = panic "Combining unboxed tuple with non-tuple result"
+combineAltInfo TagProper       (TagTuple {})   = panic "Combining unboxed tuple with non-tuple result"
+combineAltInfo TagProper        TagProper      = TagProper
+combineAltInfo (TagTuple is1)  (TagTuple is2)  = TagTuple (zipWithEqual "combineAltInfo" combineAltInfo is1 is2)
+combineAltInfo (TagTagged)      ti             = ti
+combineAltInfo ti               TagTagged      = ti
+
+type TagSigEnv = IdEnv TagSig
+data TagEnv p = TE { te_env :: TagSigEnv
+                   , te_get :: BinderP p -> Id
+                   }
+
+instance Outputable (TagEnv p) where
+    ppr te = ppr (te_env te)
+
+
+getBinderId :: TagEnv p -> BinderP p -> Id
+getBinderId = te_get
+
+initEnv :: TagEnv 'CodeGen
+initEnv = TE { te_env = emptyVarEnv
+             , te_get = \x -> x}
+
+-- | Simple convert env to a env of the 'InferTaggedBinders pass
+-- with no other changes.
+makeTagged :: TagEnv p -> TagEnv 'InferTaggedBinders
+makeTagged env = TE { te_env = te_env env
+                    , te_get = fst }
+
+noSig :: TagEnv p -> BinderP p -> (Id, TagSig)
+noSig env bndr
+  | isUnliftedType (idType var) = (var, TagSig TagProper)
+  | otherwise = (var, TagSig TagDunno)
+  where
+    var = getBinderId env bndr
+
+lookupSig :: TagEnv p -> Id -> Maybe TagSig
+lookupSig env fun = lookupVarEnv (te_env env) fun
+
+lookupInfo :: TagEnv p -> StgArg -> TagInfo
+lookupInfo env (StgVarArg var)
+  -- Nullary data constructors like True, False
+  | Just dc <- isDataConWorkId_maybe var
+  , isNullaryRepDataCon dc
+  = TagProper
+
+  | isUnliftedType (idType var)
+  = TagProper
+
+  -- Variables in the environment.
+  | Just (TagSig info) <- lookupVarEnv (te_env env) var
+  = info
+
+  | Just lf_info <- idLFInfo_maybe var
+  =   case lf_info of
+          -- Function, tagged (with arity)
+          LFReEntrant {}
+              -> TagProper
+          -- Thunks need to be entered.
+          LFThunk {}
+              -> TagDunno
+          -- Constructors, already tagged.
+          LFCon {}
+              -> TagProper
+          LFUnknown {}
+              -> TagDunno
+          LFUnlifted {}
+              -> TagProper
+          -- Shouldn't be possible. I don't think we can export letNoEscapes
+          LFLetNoEscape {} -> panic "LFLetNoEscape exported"
+
+  | otherwise
+  = TagDunno
+
+lookupInfo _ (StgLitArg {})
+  = TagProper
+
+isDunnoSig :: TagSig -> Bool
+isDunnoSig (TagSig TagDunno) = True
+isDunnoSig (TagSig TagProper) = False
+isDunnoSig (TagSig TagTuple{}) = False
+isDunnoSig (TagSig TagTagged{}) = False
+
+isTaggedInfo :: TagInfo -> Bool
+isTaggedInfo TagProper = True
+isTaggedInfo TagTagged = True
+isTaggedInfo _         = False
+
+extendSigEnv :: TagEnv p -> [(Id,TagSig)] -> TagEnv p
+extendSigEnv env@(TE { te_env = sig_env }) bndrs
+  = env { te_env = extendVarEnvList sig_env bndrs }
diff --git a/compiler/GHC/Stg/Lint.hs b/compiler/GHC/Stg/Lint.hs
index aa7d03cacc..405abdd1f4 100644
--- a/compiler/GHC/Stg/Lint.hs
+++ b/compiler/GHC/Stg/Lint.hs
@@ -40,6 +40,7 @@ module GHC.Stg.Lint ( lintStgTopBindings ) where
 import GHC.Prelude
 
 import GHC.Stg.Syntax
+import GHC.Stg.Utils
 
 import GHC.Core.Lint        ( interactiveInScope )
 import GHC.Core.DataCon
@@ -67,6 +68,7 @@ import GHC.Data.Bag         ( Bag, emptyBag, isEmptyBag, snocBag, bagToList )
 
 import Control.Applicative ((<|>))
 import Control.Monad
+import Data.Maybe
 
 lintStgTopBindings :: forall a . (OutputablePass a, BinderP a ~ Id)
                    => Logger
@@ -188,10 +190,24 @@ lintStgExpr :: (OutputablePass a, BinderP a ~ Id) => GenStgExpr a -> LintM ()
 
 lintStgExpr (StgLit _) = return ()
 
-lintStgExpr (StgApp fun args) = do
+lintStgExpr e@(StgApp fun args) = do
     lintStgVar fun
     mapM_ lintStgArg args
 
+    lf <- getLintFlags
+    when (lf_unarised lf) $ do
+      -- A function which expects a unlifted argument as n'th argument
+      -- always needs to be applied to n arguments.
+      -- See Note [Strict Worker Ids].
+      let marks = fromMaybe [] $ idCbvMarks_maybe fun
+      if length marks > length args
+        then addErrL $ hang (text "Undersatured cbv marked ID in App" <+> ppr e ) 2 $
+          (text "marks" <> ppr marks $$
+          text "args" <> ppr args $$
+          text "arity" <> ppr (idArity fun) $$
+          text "join_arity" <> ppr (isJoinId_maybe fun))
+        else return ()
+
 lintStgExpr app@(StgConApp con _n args _arg_tys) = do
     -- unboxed sums should vanish during unarise
     lf <- getLintFlags
diff --git a/compiler/GHC/Stg/Stats.hs b/compiler/GHC/Stg/Stats.hs
index a4d36136bf..7c7df34f27 100644
--- a/compiler/GHC/Stg/Stats.hs
+++ b/compiler/GHC/Stg/Stats.hs
@@ -146,7 +146,7 @@ statExpr :: StgExpr -> StatEnv
 
 statExpr (StgApp _ _)     = countOne Applications
 statExpr (StgLit _)       = countOne Literals
-statExpr (StgConApp _ _ _ _)= countOne ConstructorApps
+statExpr (StgConApp {})   = countOne ConstructorApps
 statExpr (StgOpApp _ _ _) = countOne PrimitiveApps
 statExpr (StgTick _ e)    = statExpr e
 
diff --git a/compiler/GHC/Stg/Syntax.hs b/compiler/GHC/Stg/Syntax.hs
index bf6bac3853..6726bbe526 100644
--- a/compiler/GHC/Stg/Syntax.hs
+++ b/compiler/GHC/Stg/Syntax.hs
@@ -39,6 +39,9 @@ module GHC.Stg.Syntax (
         -- a set of synonyms for the code gen parameterisation
         CgStgTopBinding, CgStgBinding, CgStgExpr, CgStgRhs, CgStgAlt,
 
+        -- Same for taggedness
+        TgStgTopBinding, TgStgBinding, TgStgExpr, TgStgRhs, TgStgAlt,
+
         -- a set of synonyms for the lambda lifting parameterisation
         LlStgTopBinding, LlStgBinding, LlStgExpr, LlStgRhs, LlStgAlt,
 
@@ -53,9 +56,7 @@ module GHC.Stg.Syntax (
         stgRhsArity, freeVarsOfRhs,
         isDllConApp,
         stgArgType,
-        stripStgTicksTop, stripStgTicksTopE,
         stgCaseBndrInScope,
-        bindersOf, bindersOfTop, bindersOfTopBinds,
 
         -- ppr
         StgPprOpts(..),
@@ -176,21 +177,6 @@ stgArgType :: StgArg -> Type
 stgArgType (StgVarArg v)   = idType v
 stgArgType (StgLitArg lit) = literalType lit
 
-
--- | Strip ticks of a given type from an STG expression.
-stripStgTicksTop :: (StgTickish -> Bool) -> GenStgExpr p -> ([StgTickish], GenStgExpr p)
-stripStgTicksTop p = go []
-   where go ts (StgTick t e) | p t = go (t:ts) e
-         -- This special case avoid building a thunk for "reverse ts" when there are no ticks
-         go [] other               = ([], other)
-         go ts other               = (reverse ts, other)
-
--- | Strip ticks of a given type from an STG expression returning only the expression.
-stripStgTicksTopE :: (StgTickish -> Bool) -> GenStgExpr p -> GenStgExpr p
-stripStgTicksTopE p = go
-   where go (StgTick t e) | p t = go e
-         go other               = other
-
 -- | Given an alt type and whether the program is unarised, return whether the
 -- case binder is in scope.
 --
@@ -428,36 +414,6 @@ important):
         [StgTickish]
         [StgArg]        -- Args
 
-{-
-Note Stg Passes
-~~~~~~~~~~~~~~~
-Here is a short summary of the STG pipeline and where we use the different
-StgPass data type indexes:
-
-  1. CoreToStg.Prep performs several transformations that prepare the desugared
-     and simplified core to be converted to STG. One of these transformations is
-     making it so that value lambdas only exist as the RHS of a binding.
-
-  2. CoreToStg converts the prepared core to STG, specifically GenStg*
-     parameterised by 'Vanilla.
-
-  3. Stg.Pipeline does a number of passes on the generated STG. One of these is
-     the lambda-lifting pass, which internally uses the 'LiftLams
-     parameterisation to store information for deciding whether or not to lift
-     each binding.
-
-  4. Stg.FVs annotates closures with their free variables. To store these
-     annotations we use the 'CodeGen parameterisation.
-
-  5. Stg.StgToCmm generates Cmm from the annotated STG.
--}
-
--- | Used as a data type index for the stgSyn AST
-data StgPass
-  = Vanilla
-  | LiftLams
-  | CodeGen
-
 -- | Like 'GHC.Hs.Extension.NoExtField', but with an 'Outputable' instance that
 -- returns 'empty'.
 data NoExtFieldSilent = NoExtFieldSilent
@@ -473,40 +429,11 @@ noExtFieldSilent = NoExtFieldSilent
 -- TODO: Maybe move this to GHC.Hs.Extension? I'm not sure about the
 -- implications on build time...
 
--- TODO: Do we really want to the extension point type families to have a closed
--- domain?
-type family BinderP (pass :: StgPass)
-type instance BinderP 'Vanilla = Id
-type instance BinderP 'CodeGen = Id
-
-type family XRhsClosure (pass :: StgPass)
-type instance XRhsClosure 'Vanilla = NoExtFieldSilent
--- | Code gen needs to track non-global free vars
-type instance XRhsClosure 'CodeGen = DIdSet
-
-type family XLet (pass :: StgPass)
-type instance XLet 'Vanilla = NoExtFieldSilent
-type instance XLet 'CodeGen = NoExtFieldSilent
-
--- | When `-fdistinct-constructor-tables` is turned on then
--- each usage of a constructor is given an unique number and
--- an info table is generated for each different constructor.
-data ConstructorNumber =
-      NoNumber | Numbered Int
-
-instance Outputable ConstructorNumber where
-  ppr NoNumber = empty
-  ppr (Numbered n) = text "#" <> ppr n
-
-type family XLetNoEscape (pass :: StgPass)
-type instance XLetNoEscape 'Vanilla = NoExtFieldSilent
-type instance XLetNoEscape 'CodeGen = NoExtFieldSilent
-
 stgRhsArity :: StgRhs -> Int
 stgRhsArity (StgRhsClosure _ _ _ bndrs _)
   = assert (all isId bndrs) $ length bndrs
   -- The arity never includes type parameters, but they should have gone by now
-stgRhsArity (StgRhsCon _ _ _ _ _) = 0
+stgRhsArity (StgRhsCon {}) = 0
 
 freeVarsOfRhs :: (XRhsClosure pass ~ DIdSet) => GenStgRhs pass -> DIdSet
 freeVarsOfRhs (StgRhsCon _ _ _ _ args) = mkDVarSet [ id | StgVarArg id <- args ]
@@ -550,7 +477,31 @@ The Plain STG parameterisation
 *                                                                      *
 ************************************************************************
 
-This happens to be the only one we use at the moment.
+  Note [STG Extension points]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  We now make use of extension points in STG for different passes which want
+  to associate information with AST nodes.
+
+  Currently the pipeline is roughly:
+
+  CoreToStg: Core -> Stg
+  StgSimpl: Stg -> Stg
+  CodeGen: Stg -> Cmm
+
+    As part of StgSimpl we run late lambda lifting (Ll).
+    Late lambda lift:
+    Stg -> FvStg -> LlStg -> Stg
+
+  CodeGen:
+    As part of CodeGen we run tag inference.
+    Tag Inference:
+      Stg -> Stg 'InferTaggedBinders` -> Stg
+
+    And at a last step we add the free Variables:
+      Stg -> CgStg
+
+  Which finally CgStg being used to generate Cmm.
+
 -}
 
 type StgTopBinding = GenStgTopBinding 'Vanilla
@@ -571,6 +522,12 @@ type CgStgExpr       = GenStgExpr       'CodeGen
 type CgStgRhs        = GenStgRhs        'CodeGen
 type CgStgAlt        = GenStgAlt        'CodeGen
 
+type TgStgTopBinding = GenStgTopBinding 'CodeGen
+type TgStgBinding    = GenStgBinding    'CodeGen
+type TgStgExpr       = GenStgExpr       'CodeGen
+type TgStgRhs        = GenStgRhs        'CodeGen
+type TgStgAlt        = GenStgAlt        'CodeGen
+
 {- Many passes apply a substitution, and it's very handy to have type
    synonyms to remind us whether or not the substitution has been applied.
    See GHC.Core for precedence in Core land
@@ -589,6 +546,79 @@ type OutStgExpr       = StgExpr
 type OutStgRhs        = StgRhs
 type OutStgAlt        = StgAlt
 
+-- | When `-fdistinct-constructor-tables` is turned on then
+-- each usage of a constructor is given an unique number and
+-- an info table is generated for each different constructor.
+data ConstructorNumber =
+      NoNumber | Numbered Int
+
+instance Outputable ConstructorNumber where
+  ppr NoNumber = empty
+  ppr (Numbered n) = text "#" <> ppr n
+
+{-
+Note Stg Passes
+~~~~~~~~~~~~~~~
+Here is a short summary of the STG pipeline and where we use the different
+StgPass data type indexes:
+
+  1. CoreToStg.Prep performs several transformations that prepare the desugared
+     and simplified core to be converted to STG. One of these transformations is
+     making it so that value lambdas only exist as the RHS of a binding.
+     See Note [CorePrep Overview].
+
+  2. CoreToStg converts the prepared core to STG, specifically GenStg*
+     parameterised by 'Vanilla. See the GHC.CoreToStg Module.
+
+  3. Stg.Pipeline does a number of passes on the generated STG. One of these is
+     the lambda-lifting pass, which internally uses the 'LiftLams
+     parameterisation to store information for deciding whether or not to lift
+     each binding.
+     See Note [Late lambda lifting in STG].
+
+  4. Tag inference takes in 'Vanilla and produces 'InferTagged STG, while using
+     the InferTaggedBinders annotated AST internally.
+     See Note [Tag Inference].
+
+  5. Stg.FVs annotates closures with their free variables. To store these
+     annotations we use the 'CodeGen parameterisation.
+     See the GHC.Stg.FVs module.
+
+  6. The Module Stg.StgToCmm generates Cmm from the CodeGen annotated STG.
+-}
+
+
+-- | Used as a data type index for the stgSyn AST
+data StgPass
+  = Vanilla
+  | LiftLams -- ^ Use internally by the lambda lifting pass
+  | InferTaggedBinders -- ^ Tag inference information on binders.
+                       -- See Note [Tag inference passes] in GHC.Stg.InferTags
+  | InferTagged -- ^ Tag inference information put on relevant StgApp nodes
+                -- See Note [Tag inference passes] in GHC.Stg.InferTags
+  | CodeGen
+
+type family BinderP (pass :: StgPass)
+type instance BinderP 'Vanilla = Id
+type instance BinderP 'CodeGen = Id
+type instance BinderP 'InferTagged = Id
+
+type family XRhsClosure (pass :: StgPass)
+type instance XRhsClosure 'Vanilla = NoExtFieldSilent
+type instance XRhsClosure 'InferTagged = NoExtFieldSilent
+-- | Code gen needs to track non-global free vars
+type instance XRhsClosure 'CodeGen = DIdSet
+
+type family XLet (pass :: StgPass)
+type instance XLet 'Vanilla = NoExtFieldSilent
+type instance XLet 'InferTagged = NoExtFieldSilent
+type instance XLet 'CodeGen = NoExtFieldSilent
+
+type family XLetNoEscape (pass :: StgPass)
+type instance XLetNoEscape 'Vanilla = NoExtFieldSilent
+type instance XLetNoEscape 'InferTagged = NoExtFieldSilent
+type instance XLetNoEscape 'CodeGen = NoExtFieldSilent
+
 {-
 
 ************************************************************************
@@ -643,25 +673,6 @@ data StgOp
 {-
 ************************************************************************
 *                                                                      *
-Utilities
-*                                                                      *
-************************************************************************
--}
-
-bindersOf :: BinderP a ~ Id => GenStgBinding a -> [Id]
-bindersOf (StgNonRec binder _) = [binder]
-bindersOf (StgRec pairs)       = [binder | (binder, _) <- pairs]
-
-bindersOfTop :: BinderP a ~ Id => GenStgTopBinding a -> [Id]
-bindersOfTop (StgTopLifted bind) = bindersOf bind
-bindersOfTop (StgTopStringLit binder _) = [binder]
-
-bindersOfTopBinds :: BinderP a ~ Id => [GenStgTopBinding a] -> [Id]
-bindersOfTopBinds = foldr ((++) . bindersOfTop) []
-
-{-
-************************************************************************
-*                                                                      *
 Pretty-printing
 *                                                                      *
 ************************************************************************
@@ -712,6 +723,9 @@ pprGenStgBinding opts b = case b of
                              = hang (hsep [pprBndr LetBind bndr, equals])
                                     4 (pprStgRhs opts expr <> semi)
 
+instance OutputablePass pass => Outputable  (GenStgBinding pass) where
+  ppr = pprGenStgBinding panicStgPprOpts
+
 pprGenStgTopBindings :: (OutputablePass pass) => StgPprOpts -> [GenStgTopBinding pass] -> SDoc
 pprGenStgTopBindings opts binds
   = vcat $ intersperse blankLine (map (pprGenStgTopBinding opts) binds)
@@ -732,12 +746,19 @@ pprStgArg :: StgArg -> SDoc
 pprStgArg (StgVarArg var) = ppr var
 pprStgArg (StgLitArg con) = ppr con
 
+instance OutputablePass pass => Outputable  (GenStgExpr pass) where
+  ppr = pprStgExpr panicStgPprOpts
+
 pprStgExpr :: OutputablePass pass => StgPprOpts -> GenStgExpr pass -> SDoc
 pprStgExpr opts e = case e of
                            -- special case
    StgLit lit           -> ppr lit
                            -- general case
-   StgApp func args     -> hang (ppr func) 4 (interppSP args)
+   StgApp func args
+      | null args
+      , Just sig <- idTagSig_maybe func
+      -> ppr func <> ppr sig
+      | otherwise -> hang (ppr func) 4 (interppSP args) -- TODO: Print taggedness
    StgConApp con n args _ -> hsep [ ppr con, ppr n, brackets (interppSP args) ]
    StgOpApp op args _   -> hsep [ pprStgOp op, brackets (interppSP args)]
 
@@ -850,4 +871,8 @@ pprStgRhs opts rhs = case rhs of
               , case mid of
                   NoNumber -> empty
                   Numbered n -> hcat [ppr n, space]
+              -- The bang indicates this is an StgRhsCon instead of an StgConApp.
               , ppr con, text "! ", brackets (sep (map pprStgArg args))]
+
+instance OutputablePass pass => Outputable  (GenStgRhs pass) where
+  ppr = pprStgRhs panicStgPprOpts
diff --git a/compiler/GHC/Stg/Unarise.hs b/compiler/GHC/Stg/Unarise.hs
index 15635e754a..25b9c5e582 100644
--- a/compiler/GHC/Stg/Unarise.hs
+++ b/compiler/GHC/Stg/Unarise.hs
@@ -258,6 +258,7 @@ import GHC.Utils.Panic
 import GHC.Utils.Panic.Plain
 import GHC.Types.RepType
 import GHC.Stg.Syntax
+import GHC.Stg.Utils
 import GHC.Core.Type
 import GHC.Builtin.Types.Prim (intPrimTy)
 import GHC.Builtin.Types
@@ -802,10 +803,10 @@ unariseConArgBinder = unariseArgBinder True
 --------------------------------------------------------------------------------
 
 mkIds :: FastString -> [UnaryType] -> UniqSM [Id]
-mkIds fs tys = mapM (mkId fs) tys
+mkIds fs tys = mkUnarisedIds fs tys
 
 mkId :: FastString -> UnaryType -> UniqSM Id
-mkId s t = mkSysLocalM s Many t
+mkId s t = mkUnarisedId s t
 
 isMultiValBndr :: Id -> Bool
 isMultiValBndr id
diff --git a/compiler/GHC/Stg/Utils.hs b/compiler/GHC/Stg/Utils.hs
new file mode 100644
index 0000000000..4561e25765
--- /dev/null
+++ b/compiler/GHC/Stg/Utils.hs
@@ -0,0 +1,124 @@
+{-# LANGUAGE CPP, ScopedTypeVariables, TypeFamilies #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+
+module GHC.Stg.Utils
+    ( mkStgAltTypeFromStgAlts
+    , bindersOf, bindersOfX, bindersOfTop, bindersOfTopBinds
+
+    , stripStgTicksTop, stripStgTicksTopE
+    , idArgs
+
+    , mkUnarisedId, mkUnarisedIds
+    ) where
+
+import GHC.Prelude
+
+import GHC.Types.Id
+import GHC.Core.Type
+import GHC.Core.TyCon
+import GHC.Core.DataCon
+import GHC.Core ( AltCon(..) )
+import GHC.Types.Tickish
+import GHC.Types.Unique.Supply
+
+import GHC.Types.RepType
+import GHC.Stg.Syntax
+
+import GHC.Utils.Outputable
+
+import GHC.Utils.Panic.Plain
+import GHC.Utils.Panic
+
+import GHC.Data.FastString
+
+mkUnarisedIds :: MonadUnique m => FastString -> [UnaryType] -> m [Id]
+mkUnarisedIds fs tys = mapM (mkUnarisedId fs) tys
+
+mkUnarisedId :: MonadUnique m => FastString -> UnaryType -> m Id
+mkUnarisedId s t = mkSysLocalM s Many t
+
+-- Checks if id is a top level error application.
+-- isErrorAp_maybe :: Id ->
+
+-- | Extract the default case alternative
+-- findDefaultStg :: [Alt b] -> ([Alt b], Maybe (Expr b))
+findDefaultStg :: [GenStgAlt p] -> ([(AltCon, [BinderP p], GenStgExpr p)],
+                       Maybe (GenStgExpr p))
+findDefaultStg ((DEFAULT, args, rhs) : alts) = assert( null args ) (alts, Just rhs)
+findDefaultStg alts                        =                     (alts, Nothing)
+
+mkStgAltTypeFromStgAlts :: forall p. Id -> [GenStgAlt p] -> AltType
+mkStgAltTypeFromStgAlts bndr alts
+  | isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty
+  = MultiValAlt (length prim_reps)  -- always use MultiValAlt for unboxed tuples
+
+  | otherwise
+  = case prim_reps of
+      [rep] | isGcPtrRep rep ->
+        case tyConAppTyCon_maybe (unwrapType bndr_ty) of
+          Just tc
+            | isAbstractTyCon tc -> look_for_better_tycon
+            | isAlgTyCon tc      -> AlgAlt tc
+            | otherwise          -> assertPpr ( _is_poly_alt_tycon tc) (ppr tc)
+                                    PolyAlt
+          Nothing                -> PolyAlt
+      [non_gcd] -> PrimAlt non_gcd
+      not_unary -> MultiValAlt (length not_unary)
+  where
+   bndr_ty   = idType bndr
+   prim_reps = typePrimRep bndr_ty
+
+   _is_poly_alt_tycon tc
+        =  isFunTyCon tc
+        || isPrimTyCon tc   -- "Any" is lifted but primitive
+        || isFamilyTyCon tc -- Type family; e.g. Any, or arising from strict
+                            -- function application where argument has a
+                            -- type-family type
+
+   -- Sometimes, the TyCon is a AbstractTyCon which may not have any
+   -- constructors inside it.  Then we may get a better TyCon by
+   -- grabbing the one from a constructor alternative
+   -- if one exists.
+   look_for_better_tycon
+        | (((DataAlt con) ,_, _) : _) <- data_alts =
+                AlgAlt (dataConTyCon con)
+        | otherwise =
+                assert(null data_alts)
+                PolyAlt
+        where
+                (data_alts, _deflt) = findDefaultStg alts
+
+bindersOf :: BinderP a ~ Id => GenStgBinding a -> [Id]
+bindersOf (StgNonRec binder _) = [binder]
+bindersOf (StgRec pairs)       = [binder | (binder, _) <- pairs]
+
+bindersOfX :: GenStgBinding a -> [BinderP a]
+bindersOfX (StgNonRec binder _) = [binder]
+bindersOfX (StgRec pairs)       = [binder | (binder, _) <- pairs]
+
+bindersOfTop :: BinderP a ~ Id => GenStgTopBinding a -> [Id]
+bindersOfTop (StgTopLifted bind) = bindersOf bind
+bindersOfTop (StgTopStringLit binder _) = [binder]
+
+-- All ids we bind something to on the top level.
+bindersOfTopBinds :: BinderP a ~ Id => [GenStgTopBinding a] -> [Id]
+-- bindersOfTopBinds binds = mapUnionVarSet (mkVarSet . bindersOfTop) binds
+bindersOfTopBinds binds = foldr ((++) . bindersOfTop) [] binds
+
+idArgs :: [StgArg] -> [Id]
+idArgs args = [v | StgVarArg v <- args]
+
+-- | Strip ticks of a given type from an STG expression.
+stripStgTicksTop :: (StgTickish -> Bool) -> GenStgExpr p -> ([StgTickish], GenStgExpr p)
+stripStgTicksTop p = go []
+   where go ts (StgTick t e) | p t = go (t:ts) e
+         -- This special case avoid building a thunk for "reverse ts" when there are no ticks
+         go [] other               = ([], other)
+         go ts other               = (reverse ts, other)
+
+-- | Strip ticks of a given type from an STG expression returning only the expression.
+stripStgTicksTopE :: (StgTickish -> Bool) -> GenStgExpr p -> GenStgExpr p
+stripStgTicksTopE p = go
+   where go (StgTick t e) | p t = go e
+         go other               = other
diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs
index c574327665..885af12944 100644
--- a/compiler/GHC/StgToByteCode.hs
+++ b/compiler/GHC/StgToByteCode.hs
@@ -661,7 +661,7 @@ schemeT _d _s _p (StgOpApp StgPrimCallOp{} _args _ty)
    = unsupportedCConvException
 
    -- Case 2: Unboxed tuple
-schemeT d s p (StgConApp con _ext args _tys)
+schemeT d s p (StgConApp con _cn args _tys)
    | isUnboxedTupleDataCon con || isUnboxedSumDataCon con
    = returnUnboxedTuple d s p args
 
diff --git a/compiler/GHC/StgToCmm.hs b/compiler/GHC/StgToCmm.hs
index 9931b81e6e..eea77198aa 100644
--- a/compiler/GHC/StgToCmm.hs
+++ b/compiler/GHC/StgToCmm.hs
@@ -70,6 +70,7 @@ import GHC.Utils.Misc
 import System.IO.Unsafe
 import qualified Data.ByteString as BS
 import Data.IORef
+import GHC.Utils.Panic (assertPpr)
 
 codeGen :: Logger
         -> TmpFs
@@ -206,7 +207,7 @@ cgTopRhs cfg _rec bndr (StgRhsCon _cc con mn _ts args)
       -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
 
 cgTopRhs cfg rec bndr (StgRhsClosure fvs cc upd_flag args body)
-  = assert (isEmptyDVarSet fvs)    -- There should be no free variables
+  = assertPpr (isEmptyDVarSet fvs) (text "fvs:" <> ppr fvs) $   -- There should be no free variables
     cgTopRhsClosure (stgToCmmPlatform cfg) rec bndr cc upd_flag args body
 
 
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 4fb3fd1fbe..25d04b323c 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -32,6 +32,7 @@ import GHC.StgToCmm.DataCon
 import GHC.StgToCmm.Heap
 import GHC.StgToCmm.Prof (ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk,
                    initUpdFrameProf)
+import GHC.StgToCmm.TagCheck
 import GHC.StgToCmm.Ticky
 import GHC.StgToCmm.Layout
 import GHC.StgToCmm.Utils
@@ -45,6 +46,7 @@ import GHC.Cmm.Info
 import GHC.Cmm.Utils
 import GHC.Cmm.CLabel
 
+import GHC.Stg.Utils
 import GHC.Types.CostCentre
 import GHC.Types.Id
 import GHC.Types.Id.Info
@@ -217,14 +219,19 @@ cgRhs id (StgRhsCon cc con mn _ts args)
 
 {- See Note [GC recovery] in "GHC.StgToCmm.Closure" -}
 cgRhs id (StgRhsClosure fvs cc upd_flag args body)
-  = do profile <- getProfile
-       mkRhsClosure profile id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
+  = do
+    checkFunctionArgTags (text "TagCheck Failed: Rhs of" <> ppr id) id args
+    profile <- getProfile
+    check_tags <- stgToCmmDoTagCheck <$> getStgToCmmConfig
+    mkRhsClosure profile check_tags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
+
 
 ------------------------------------------------------------------------
 --              Non-constructor right hand sides
 ------------------------------------------------------------------------
 
-mkRhsClosure :: Profile -> Id -> CostCentreStack
+mkRhsClosure :: Profile -> Bool
+             -> Id -> CostCentreStack
              -> [NonVoid Id]                    -- Free vars
              -> UpdateFlag
              -> [Id]                            -- Args
@@ -267,7 +274,7 @@ for semi-obvious reasons.
 -}
 
 ---------- See Note [Selectors] ------------------
-mkRhsClosure    profile bndr _cc
+mkRhsClosure    profile _check_tags bndr _cc
                 [NonVoid the_fv]                -- Just one free var
                 upd_flag                -- Updatable thunk
                 []                      -- A thunk
@@ -300,7 +307,7 @@ mkRhsClosure    profile bndr _cc
     in cgRhsStdThunk bndr lf_info [StgVarArg the_fv]
 
 ---------- See Note [Ap thunks] ------------------
-mkRhsClosure    profile bndr _cc
+mkRhsClosure    profile check_tags bndr _cc
                 fvs
                 upd_flag
                 []                      -- No args; a thunk
@@ -321,8 +328,8 @@ mkRhsClosure    profile bndr _cc
                          -- lose information about this particular
                          -- thunk (e.g. its type) (#949)
   , idArity fun_id == unknownArity -- don't spoil a known call
-
           -- Ha! an Ap thunk
+  , not check_tags -- See Note [Tag inference debugging]
   = cgRhsStdThunk bndr lf_info payload
 
   where
@@ -333,7 +340,7 @@ mkRhsClosure    profile bndr _cc
     payload = StgVarArg fun_id : args
 
 ---------- Default case ------------------
-mkRhsClosure profile bndr cc fvs upd_flag args body
+mkRhsClosure profile _check_tags bndr cc fvs upd_flag args body
   = do  { let lf_info = mkClosureLFInfo (profilePlatform profile) bndr NotTopLevel fvs upd_flag args
         ; (id_info, reg) <- rhsIdInfo bndr lf_info
         ; return (id_info, gen_code lf_info reg) }
@@ -517,6 +524,7 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
                 -- Load free vars out of closure *after*
                 -- heap check, to reduce live vars over check
                 ; when node_points $ load_fvs node lf_info fv_bindings
+                ; checkFunctionArgTags (text "TagCheck failed - Argument to local function:" <> ppr bndr) bndr (map fromNonVoid nv_args)
                 ; void $ cgExpr body
                 }}}
 
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
index b2f51c60fd..0d048a6be8 100644
--- a/compiler/GHC/StgToCmm/Closure.hs
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -4,7 +4,6 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE FlexibleInstances #-}
 
-
 -----------------------------------------------------------------------------
 --
 -- Stg to C-- code generation:
@@ -101,6 +100,7 @@ import GHC.Utils.Misc
 import Data.Coerce (coerce)
 import qualified Data.ByteString.Char8 as BS8
 import GHC.StgToCmm.Config
+import GHC.Stg.InferTags.TagSig (isTaggedSig)
 
 -----------------------------------------------------------------------------
 --                Data types and synonyms
@@ -478,20 +478,38 @@ When black-holing, single-entry closures could also be entered via node
 (rather than directly) to catch double-entry. -}
 
 data CallMethod
-  = EnterIt             -- No args, not a function
+  = EnterIt             -- ^ No args, not a function
 
   | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
 
   | ReturnIt            -- It's a value (function, unboxed value,
                         -- or constructor), so just return it.
 
-  | SlowCall                -- Unknown fun, or known fun with
+  | InferedReturnIt     -- A properly tagged value, as determined by tag inference.
+                        -- See Note [Tag Inference] and Note [Tag inference passes] in
+                        -- GHC.Stg.InferTags.
+                        -- It behaves /precisely/ like `ReturnIt`, except that when debugging is
+                        -- enabled we emit an extra assertion to check that the returned value is
+                        -- properly tagged.  We can use this as a check that tag inference is working
+                        -- correctly.
+                        -- TODO: SPJ suggested we could combine this with EnterIt, but for now I decided
+                        -- not to do so.
+
+  | SlowCall            -- Unknown fun, or known fun with
                         -- too few args.
 
   | DirectEntry         -- Jump directly, with args in regs
         CLabel          --   The code label
         RepArity        --   Its arity
 
+instance Outputable CallMethod where
+  ppr (EnterIt) = text "Enter"
+  ppr (JumpToIt {}) = text "JumpToIt"
+  ppr (ReturnIt ) = text "ReturnIt"
+  ppr (InferedReturnIt) = text "InferedReturnIt"
+  ppr (SlowCall ) = text "SlowCall"
+  ppr (DirectEntry {}) = text "DirectEntry"
+
 getCallMethod :: StgToCmmConfig
               -> Name           -- Function being applied
               -> Id             -- Function Id used to chech if it can refer to
@@ -538,6 +556,12 @@ getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
 
 getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
               n_args _v_args _cg_loc _self_loop_info
+
+  | Just sig <- idTagSig_maybe id
+  , isTaggedSig sig -- Infered to be already evaluated by Tag Inference
+  , n_args == 0     -- See Note [Tag Inference]
+  = InferedReturnIt
+
   | is_fun      -- it *might* be a function, so we must "call" it (which is always safe)
   = SlowCall    -- We cannot just enter it [in eval/apply, the entry code
                 -- is the fast-entry code]
@@ -568,12 +592,27 @@ getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
     DirectEntry (thunkEntryLabel (stgToCmmPlatform cfg) name (idCafInfo id) std_form_info
                 updatable) 0
 
-getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info
-  = SlowCall -- might be a function
-
-getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
-  = assertPpr (n_args == 0) (ppr name <+> ppr n_args)
-    EnterIt -- Not a function
+-- Imported(Unknown) Ids
+getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs _self_loop_info
+  | n_args == 0
+  , Just sig <- idTagSig_maybe id
+  , isTaggedSig sig -- Infered to be already evaluated by Tag Inference
+  -- When profiling we enter functions to update the SCC so we
+  -- can't use the infered enterInfo here.
+  -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
+  , not (profileIsProfiling (stgToCmmProfile cfg) && might_be_a_function)
+  = InferedReturnIt -- See Note [Tag Inference]
+
+  | might_be_a_function = SlowCall
+
+  | otherwise =
+      assertPpr ( n_args == 0) ( ppr name <+> ppr n_args )
+      EnterIt   -- Not a function
+
+-- TODO: Redundant with above match?
+-- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
+--   = assertPpr (n_args == 0) (ppr name <+> ppr n_args)
+--     EnterIt -- Not a function
 
 getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) _self_loop_info
   = JumpToIt blk_id lne_regs
diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs
index 8751f5ffe5..28c11e1321 100644
--- a/compiler/GHC/StgToCmm/Config.hs
+++ b/compiler/GHC/StgToCmm/Config.hs
@@ -52,6 +52,7 @@ data StgToCmmConfig = StgToCmmConfig
                                                  -- code for linking against dynamic libraries
   , stgToCmmDoBoundsCheck  :: !Bool              -- ^ decides whether to check array bounds in StgToCmm.Prim
                                                  -- or not
+  , stgToCmmDoTagCheck     :: !Bool              -- ^ Verify tag inference predictions.
   ------------------------------ Backend Flags ----------------------------------
   , stgToCmmAllowBigArith             :: !Bool   -- ^ Allowed to emit larger than native size arithmetic (only LLVM and C backends)
   , stgToCmmAllowQuotRemInstr         :: !Bool   -- ^ Allowed to generate QuotRem instructions
diff --git a/compiler/GHC/StgToCmm/DataCon.hs b/compiler/GHC/StgToCmm/DataCon.hs
index bed2f164eb..2a7203e101 100644
--- a/compiler/GHC/StgToCmm/DataCon.hs
+++ b/compiler/GHC/StgToCmm/DataCon.hs
@@ -52,6 +52,8 @@ import GHC.Utils.Monad (mapMaybeM)
 import Control.Monad
 import Data.Char
 import GHC.StgToCmm.Config (stgToCmmPlatform)
+import GHC.StgToCmm.TagCheck (checkConArgsStatic, checkConArgsDyn)
+import GHC.Utils.Outputable
 
 ---------------------------------------------------------------
 --      Top-level constructors
@@ -93,7 +95,7 @@ cgTopRhsCon cfg id con mn args
               -- Windows DLLs have a problem with static cross-DLL refs.
               massert (not (isDllConApp platform (stgToCmmExtDynRefs cfg) this_mod con (map fromNonVoid args)))
         ; assert (args `lengthIs` countConRepArgs con ) return ()
-
+        ; checkConArgsStatic (text "TagCheck failed - Top level con") con (map fromNonVoid args)
         -- LAY IT OUT
         ; let
             (tot_wds, --  #ptr_wds + #nonptr_wds
@@ -210,6 +212,7 @@ buildDynCon' binder mn actually_bound ccs con args
           ; let ticky_name | actually_bound = Just binder
                            | otherwise = Nothing
 
+          ; checkConArgsDyn (text "TagCheck failed - con_alloc:" <> ppr binder) con (map fromNonVoid args)
           ; hp_plus_n <- allocDynClosure ticky_name info_tbl lf_info
                                           use_cc blame_cc args_w_offsets
           ; return (mkRhsInit platform reg lf_info hp_plus_n) }
diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs
index 8f82c02e8e..eef1420a72 100644
--- a/compiler/GHC/StgToCmm/Env.hs
+++ b/compiler/GHC/StgToCmm/Env.hs
@@ -19,7 +19,7 @@ module GHC.StgToCmm.Env (
         bindArgToReg, idToReg,
         getCgIdInfo,
         maybeLetNoEscape,
-    ) where
+        ) where
 
 import GHC.Prelude
 
@@ -44,7 +44,6 @@ import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Utils.Panic.Plain
 
-
 -------------------------------------
 --        Manipulating CgIdInfo
 -------------------------------------
@@ -116,12 +115,20 @@ addBindsC new_bindings = do
                                new_bindings
         setBinds new_binds
 
+-- Inside GHC the average module creates 385 external references
+-- with noteable cgIdInfo (so not generated by mkLFArgument).
+-- On average 200 of these are covered by True/False/[]
+-- and nullary constructors make up ~80.
+-- One would think it would be worthwhile to cache these.
+-- Sadly it's not. See #16937
+
 getCgIdInfo :: Id -> FCode CgIdInfo
 getCgIdInfo id
   = do  { platform <- getPlatform
         ; local_binds <- getBinds -- Try local bindings first
         ; case lookupVarEnv local_binds id of {
-            Just info -> return info ;
+            Just info -> -- pprTrace "getCgIdInfoLocal" (ppr id) $
+              return info ;
             Nothing   -> do {
 
                 -- Should be imported; make up a CgIdInfo for it
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index 3a3c1db647..5129a45b1c 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE BangPatterns #-}
-
 {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE TypeFamilies #-}
 
 -----------------------------------------------------------------------------
 --
@@ -25,6 +26,7 @@ import GHC.StgToCmm.Layout
 import GHC.StgToCmm.Lit
 import GHC.StgToCmm.Prim
 import GHC.StgToCmm.Hpc
+import GHC.StgToCmm.TagCheck
 import GHC.StgToCmm.Ticky
 import GHC.StgToCmm.Utils
 import GHC.StgToCmm.Closure
@@ -36,6 +38,7 @@ import GHC.Cmm.BlockId
 import GHC.Cmm hiding ( succ )
 import GHC.Cmm.Info
 import GHC.Cmm.Utils ( zeroExpr, cmmTagMask, mkWordCLit, mAX_PTR_TAG )
+import GHC.Cmm.Ppr
 import GHC.Core
 import GHC.Core.DataCon
 import GHC.Types.ForeignCall
@@ -56,6 +59,7 @@ import GHC.Utils.Panic.Plain
 import Control.Monad ( unless, void )
 import Control.Arrow ( first )
 import Data.List     ( partition )
+import GHC.Stg.InferTags.TagSig (isTaggedSig)
 
 ------------------------------------------------------------------------
 --              cgExpr: the main function
@@ -291,7 +295,7 @@ We adopt (b) because that is more likely to put the heap check at the
 entry to a function, when not many things are live.  After a bunch of
 single-branch cases, we may have lots of things live
 
-Hence: two basic plans for
+Hence: Two basic plans for
 
         case e of r { alts }
 
@@ -306,6 +310,13 @@ Hence: two basic plans for
         ...code for alts...
         ...alts do their own heap checks
 
+   When using GcInAlts the return point for heap checks and evaluating
+   the scrutinee is shared. This does mean we might execute the actual
+   branching code twice but it's rare enough to not matter.
+   The huge advantage of this pattern is that we do not require multiple
+   info tables for returning from gc as they can be shared between all
+   cases. Reducing code size nicely.
+
 ------ Plan B: special case when ---------
   (i)  e does not allocate or call GC
   (ii) either upstream code performs allocation
@@ -320,6 +331,80 @@ Hence: two basic plans for
 
         ...code for alts...
         ...no heap check...
+
+   There is a variant B.2 which we use if:
+
+  (i)   e is already evaluated+tagged
+  (ii)  We have multiple alternatives
+  (iii) and there is no upstream allocation.
+
+  Here we also place one heap check before the `case` which
+  branches on `e`. Hopefully to be absorbed by an already existing
+  heap check further up. However the big difference in this case is that
+  there is no code for e. So we are not guaranteed that the heap
+  checks of the alts will be combined with an heap check further up.
+
+  Very common example: Casing on strict fields.
+
+        ...heap check...
+        ...assign bindings...
+
+        ...code for alts...
+        ...no heap check...
+
+  -- Reasoning for Plan B.2:
+   Since the scrutinee is already evaluated there is no evaluation
+   call which would force a info table that we can use as a shared
+   return point.
+   This means currently if we were to do GcInAlts like in Plan A then
+   we would end up with one info table per alternative.
+
+   To avoid this we unconditionally do gc outside of the alts with all
+   the pros and cons described in Note [Compiling case expressions].
+   Rewriting the logic to generate a shared return point before the case
+   expression while keeping the heap checks in the alternatives would be
+   possible. But it's unclear to me that this would actually be an improvement.
+
+   This means if we have code along these lines:
+
+      g x y = case x of
+         True -> Left $ (y + 1,y,y-1)
+         False -> Right $! y - (2 :: Int)
+
+   We get these potential heap check placements:
+
+   f = ...
+      !max(L,R)!; -- Might be absorbed upstream.
+      case x of
+         True  -> !L!; ...L...
+         False -> !R!; ...R...
+
+   And we place a heap check at !max(L,R)!
+
+   The downsides of using !max(L,R)! are:
+
+   * If f is recursive, and the hot loop wouldn't allocate, but the exit branch does then we do
+   a redundant heap check.
+   * We use one more instruction to de-allocate the unused heap in the branch using less heap. (Neglible)
+   * A small risk of running gc slightly more often than needed especially if one branch allocates a lot.
+
+   The upsides are:
+   * May save a heap overflow test if there is an upstream check already.
+   * If the heap check is absorbed upstream we can also eliminate its info table.
+   * We generate at most one heap check (versus one per alt otherwise).
+   * No need to save volatile vars etc across heap checks in !L!, !R!
+   * We can use relative addressing from a single Hp to get at all the closures so allocated. (seems neglible)
+   * It fits neatly in the logic we already have for handling A/B
+
+   For containers:Data/Sequence/Internal/Sorting.o the difference is
+   about 10% in terms of code size compared to using Plan A for this case.
+   The main downside is we might put heap checks into loops, even if we
+   could avoid it (See Note [Compiling case expressions]).
+
+   Potential improvement: Investigate if heap checks in alts would be an
+   improvement if we generate and use a shared return point that is placed
+   in the common path for all alts.
+
 -}
 
 
@@ -461,11 +546,13 @@ cgCase scrut bndr alt_type alts
        ; up_hp_usg <- getVirtHp        -- Upstream heap usage
        ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
              alt_regs  = map (idToReg platform) ret_bndrs
+
        ; simple_scrut <- isSimpleScrut scrut alt_type
        ; let do_gc  | is_cmp_op scrut  = False  -- See Note [GC for conditionals]
                     | not simple_scrut = True
                     | isSingleton alts = False
                     | up_hp_usg > 0    = False
+                    | evaluatedScrut   = False
                     | otherwise        = True
                -- cf Note [Compiling case expressions]
              gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
@@ -481,6 +568,13 @@ cgCase scrut bndr alt_type alts
   where
     is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
     is_cmp_op _                             = False
+    evaluatedScrut
+      | (StgApp v []) <- scrut
+      , Just sig <- idTagSig_maybe v
+      , isTaggedSig sig = True
+      | otherwise = False
+
+
 
 {- Note [GC for conditionals]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -530,10 +624,13 @@ isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
 -- heap usage from alternatives into the stuff before the case
 -- NB: if you get this wrong, and claim that the expression doesn't allocate
 --     when it does, you'll deeply mess up allocation
-isSimpleScrut (StgOpApp op args _) _       = isSimpleOp op args
-isSimpleScrut (StgLit _)       _           = return True       -- case 1# of { 0# -> ..; ... }
-isSimpleScrut (StgApp _ [])    (PrimAlt _) = return True       -- case x# of { 0# -> ..; ... }
-isSimpleScrut _                _           = return False
+isSimpleScrut (StgOpApp op args _) _         = isSimpleOp op args
+isSimpleScrut (StgLit _)           _         = return True       -- case 1# of { 0# -> ..; ... }
+isSimpleScrut (StgApp _ [])    (PrimAlt _)   = return True       -- case x# of { 0# -> ..; ... }
+isSimpleScrut (StgApp f [])   _
+  | Just sig <- idTagSig_maybe f
+  = return $! isTaggedSig sig       -- case !x of { ... }
+isSimpleScrut _                    _         = return False
 
 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
 -- True iff the op cannot block or allocate
@@ -890,6 +987,7 @@ cgConApp con mn stg_args
 
 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
 cgIdApp fun_id args = do
+    platform       <- getPlatform
     fun_info       <- getCgIdInfo fun_id
     cfg            <- getStgToCmmConfig
     self_loop      <- getSelfLoop
@@ -904,8 +1002,26 @@ cgIdApp fun_id args = do
             -- A value in WHNF, so we can just return it.
         ReturnIt
           | isZeroBitTy (idType fun_id) -> emitReturn []
-          | otherwise                   -> emitReturn [fun]
-          -- ToDo: does ReturnIt guarantee tagged?
+          | otherwise                -> emitReturn [fun]
+
+        -- A value infered to be in WHNF, so we can just return it.
+        InferedReturnIt
+          | isZeroBitTy (idType fun_id) -> trace >> emitReturn []
+          | otherwise                   -> trace >> assertTag >>
+                                                    emitReturn [fun]
+            where
+              trace = do
+                tickyTagged
+                use_id <- newUnique
+                _lbl <- emitTickyCounterTag use_id (NonVoid fun_id)
+                tickyTagSkip use_id fun_id
+
+                -- pprTraceM "WHNF:" (ppr fun_id <+> ppr args )
+              assertTag = whenCheckTags $ do
+                  mod <- getModuleName
+                  emitTagAssertion (showPprUnsafe
+                      (text "TagCheck failed on entry in" <+> ppr mod <+> text "- value:" <> ppr fun_id <+> pprExpr platform fun))
+                      fun
 
         EnterIt -> assert (null args) $  -- Discarding arguments
                    emitEnter fun
diff --git a/compiler/GHC/StgToCmm/TagCheck.hs b/compiler/GHC/StgToCmm/TagCheck.hs
new file mode 100644
index 0000000000..4c9fb23cc5
--- /dev/null
+++ b/compiler/GHC/StgToCmm/TagCheck.hs
@@ -0,0 +1,156 @@
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+--
+-- Code generator utilities; mostly monadic
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.TagCheck
+  ( emitTagAssertion, emitArgTagCheck, checkArg, whenCheckTags,
+    checkArgStatic, checkFunctionArgTags,checkConArgsStatic,checkConArgsDyn) where
+
+#include "ClosureTypes.h"
+
+import GHC.Prelude
+
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Utils
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.Graph as CmmGraph
+
+import GHC.Core.Type
+import GHC.Types.Id
+import GHC.Utils.Misc
+import GHC.Utils.Outputable
+
+import GHC.Core.DataCon
+import Control.Monad
+import GHC.StgToCmm.Types
+import GHC.Utils.Panic (pprPanic)
+import GHC.Utils.Panic.Plain (panic)
+import GHC.Stg.Syntax
+import GHC.StgToCmm.Closure
+import GHC.Types.RepType (dataConRuntimeRepStrictness)
+import GHC.Types.Basic
+import GHC.Data.FastString (mkFastString)
+import GHC.Cmm.Info (cmmGetClosureType)
+import GHC.Cmm.Utils (mkWordCLit)
+
+-- | Check all arguments marked as already tagged for a function
+-- are tagged by inserting runtime checks.
+checkFunctionArgTags :: SDoc -> Id -> [Id] -> FCode ()
+checkFunctionArgTags msg f args = whenCheckTags $ do
+  onJust (return ()) (idCbvMarks_maybe f) $ \marks -> do
+    -- Only check args marked as strict, and only lifted ones.
+    let cbv_args = filter (isLiftedRuntimeRep . idType) $ filterByList (map isMarkedCbv marks) args
+    -- Get their (cmm) address
+    arg_infos <- mapM getCgIdInfo cbv_args
+    let arg_cmms = map idInfoToAmode arg_infos
+    mapM_ (emitTagAssertion (showPprUnsafe msg))  (arg_cmms)
+
+-- | Check all required-tagged arguments of a constructor are tagged *at compile time*.
+checkConArgsStatic :: SDoc -> DataCon -> [StgArg] -> FCode ()
+checkConArgsStatic msg con args = whenCheckTags $ do
+  let marks = dataConRuntimeRepStrictness con
+  zipWithM_ (checkArgStatic msg) marks args
+
+-- Check all required arguments of a constructor are tagged.
+-- Possible by emitting checks at runtime.
+checkConArgsDyn :: SDoc -> DataCon -> [StgArg] -> FCode ()
+checkConArgsDyn msg con args = whenCheckTags $ do
+  let marks = dataConRuntimeRepStrictness con
+  zipWithM_ (checkArg msg) (map cbvFromStrictMark marks) args
+
+whenCheckTags :: FCode () -> FCode ()
+whenCheckTags act = do
+  check_tags <- stgToCmmDoTagCheck <$> getStgToCmmConfig
+  when check_tags act
+
+-- | Call barf if we failed to predict a tag correctly.
+-- This is immensly useful when debugging issues in tag inference
+-- as it will result in a program abort when we encounter an invalid
+-- call/heap object, rather than leaving it be and segfaulting arbitrary
+-- or producing invalid results.
+-- We check if either:
+-- * A tag is present
+-- * Or the object is a PAP (for which zero is the proper tag)
+emitTagAssertion :: String -> CmmExpr -> FCode ()
+emitTagAssertion onWhat fun = do
+  { platform <- getPlatform
+  ; lret <- newBlockId
+  ; lno_tag <- newBlockId
+  ; lbarf <- newBlockId
+  -- Check for presence of any tag.
+  ; emit $ mkCbranch (cmmIsTagged platform fun)
+                     lret lno_tag (Just True)
+  -- If there is no tag check if we are dealing with a PAP
+  ; emitLabel lno_tag
+  ; emitComment (mkFastString "closereTypeCheck")
+  ; align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
+  ; profile <- getProfile
+  ; let closure_ty = cmmGetClosureType profile align_check fun
+  ; ty_reg <- newTemp (bWord platform)
+  ; emitAssign (CmmLocal ty_reg) closure_ty
+  ; emit $ mkCbranch (cmmEqWord platform
+                        (CmmReg $ CmmLocal ty_reg)
+                        (CmmLit $ mkWordCLit platform PAP))
+                      lret lbarf (Just True)
+
+  ; emitLabel lbarf
+  ; emitBarf ("Tag inference failed on:" ++ onWhat)
+  ; emitLabel lret
+  }
+
+emitArgTagCheck :: SDoc -> [CbvMark] -> [Id] -> FCode ()
+emitArgTagCheck info marks args = whenCheckTags $ do
+  mod <- getModuleName
+  let cbv_args = filter (isLiftedRuntimeRep . idType) $ filterByList (map isMarkedCbv marks) args
+  arg_infos <- mapM getCgIdInfo cbv_args
+  let arg_cmms = map idInfoToAmode arg_infos
+      mk_msg arg = showPprUnsafe (text "Untagged arg:" <> (ppr mod) <> char ':' <> info <+> ppr arg)
+  zipWithM_ emitTagAssertion (map mk_msg args) (arg_cmms)
+
+taggedCgInfo :: CgIdInfo -> Bool
+taggedCgInfo cg_info
+  = case lf of
+      LFCon {} -> True
+      LFReEntrant {} -> True
+      LFUnlifted {} -> True
+      LFThunk {} -> False
+      LFUnknown {} -> False
+      LFLetNoEscape -> panic "Let no escape binding passed to top level con"
+  where
+    lf = cg_lf cg_info
+
+-- Check that one argument is properly tagged.
+checkArg :: SDoc -> CbvMark -> StgArg -> FCode ()
+checkArg _ NotMarkedCbv _ = return ()
+checkArg msg MarkedCbv arg = whenCheckTags $
+  case arg of
+    StgLitArg _ -> return ()
+    StgVarArg v -> do
+      info <- getCgIdInfo v
+      if taggedCgInfo info
+          then return ()
+          else case (cg_loc info) of
+            CmmLoc loc -> emitTagAssertion (showPprUnsafe msg) loc
+            LneLoc {} -> panic "LNE-arg"
+
+-- Check that argument is properly tagged.
+checkArgStatic :: SDoc -> StrictnessMark  -> StgArg -> FCode ()
+checkArgStatic _   NotMarkedStrict _ = return ()
+checkArgStatic msg MarkedStrict arg = whenCheckTags $
+  case arg of
+    StgLitArg _ -> return ()
+    StgVarArg v -> do
+      info <- getCgIdInfo v
+      if taggedCgInfo info
+          then return ()
+          else pprPanic "Arg not tagged as expectd" (ppr msg <+> ppr arg)
+
+
diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs
index 118c05d920..57bf1a97b6 100644
--- a/compiler/GHC/StgToCmm/Ticky.hs
+++ b/compiler/GHC/StgToCmm/Ticky.hs
@@ -64,6 +64,15 @@ the code generator as well as the RTS because:
 
   * someone else might know how to repair it!
 
+
+Note [Ticky counters are static]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Currently GHC only supports static ticky events. That is -ticky emits
+code containing labels containing counters which then get bumped at runtime.
+
+There are currently only *static* ticky counters. Either we bump one of the
+static counters included in the RTS. Or we emit StgEntCounter structures in
+the object code and bump these.
 -}
 
 module GHC.StgToCmm.Ticky (
@@ -72,6 +81,7 @@ module GHC.StgToCmm.Ticky (
   withNewTickyCounterThunk,
   withNewTickyCounterStdThunk,
   withNewTickyCounterCon,
+  emitTickyCounterTag,
 
   tickyDynAlloc,
   tickyAllocHeap,
@@ -97,7 +107,10 @@ module GHC.StgToCmm.Ticky (
   tickyUnboxedTupleReturn,
   tickyReturnOldCon, tickyReturnNewCon,
 
-  tickySlowCall
+  tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
+  tickySlowCall, tickySlowCallPat,
+
+  tickyTagged, tickyUntagged, tickyTagSkip
   ) where
 
 import GHC.Prelude
@@ -260,6 +273,93 @@ emitTickyCounter cloType name args
             ]
         }
 
+{- Note [TagSkip ticky counters]
+   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+These counters keep track how often we execute code where we
+would have performed a tag check if we hadn't run tag inference.
+
+If we have some code of the form:
+    case v[tagged] of ...
+and we want to record how often we avoid a tag check on v
+through tag inference we have to emit a new StgEntCounter for
+each such case statement in order to record how often it's executed.
+
+In theory we could emit one per *binding*. But then we
+would have to either keep track of the bindings which
+already have a StgEntCounter associated with them in the
+code gen state or preallocate such a structure for each binding
+in the code unconditionally (since ticky-code can call non-ticky code)
+
+The first makes the compiler slower, even when ticky is not
+used (a big no no). The later is fairly complex but increases code size
+unconditionally. See also Note [Ticky counters are static].
+
+So instead we emit a new StgEntCounter for each use site of a binding
+where we infered a tag to be present. And increment the counter whenever
+this use site is executed.
+
+We use the fields as follows:
+
+entry_count: Entries avoided.
+str:       : Name of the id.
+
+We use emitTickyCounterTag to emit the counter.
+
+Unlike the closure counters each *use* site of v has it's own
+counter. So there is no need to keep track of the closure/case we are
+in.
+
+We also have to pass a unique for the counter. An Id might be
+scrutinized in more than one place, so the ID alone isn't enough
+to distinguish between use sites.
+-}
+
+emitTickyCounterTag :: Unique -> NonVoid Id -> FCode CLabel
+emitTickyCounterTag unique (NonVoid id) =
+  let name = idName id
+      ctr_lbl = mkTagHitLabel name unique in
+    (>> return ctr_lbl) $
+    ifTicky $ do
+        { platform <- getPlatform
+        ; parent <- getTickyCtrLabel
+        ; mod_name <- getModuleName
+
+          -- When printing the name of a thing in a ticky file, we
+          -- want to give the module name even for *local* things.  We
+          -- print just "x (M)" rather that "M.x" to distinguish them
+          -- from the global kind.
+        ; let ppr_for_ticky_name :: SDoc
+              ppr_for_ticky_name =
+                let n = ppr name
+                    ext = empty -- parens (text "tagged")
+                    p = case hasHaskellName parent of
+                            -- NB the default "top" ticky ctr does not
+                            -- have a Haskell name
+                          Just pname -> text "at" <+> ppr (nameSrcLoc pname) <+>
+                                          text "in" <+> pprNameUnqualified name
+                          _ -> empty
+                in if isInternalName name
+                   then n <+> parens (ppr mod_name) <+> ext <+> p
+                   else n <+> ext <+> p
+        ; sdoc_context <- stgToCmmContext <$> getStgToCmmConfig
+        ; fun_descr_lit <- newStringCLit $ renderWithContext sdoc_context ppr_for_ticky_name
+        ; arg_descr_lit <- newStringCLit $ "infer"
+        ; emitDataLits ctr_lbl
+        -- Must match layout of includes/rts/Ticky.h's StgEntCounter
+        --
+        -- krc: note that all the fields are I32 now; some were I16
+        -- before, but the code generator wasn't handling that
+        -- properly and it led to chaos, panic and disorder.
+            [ mkIntCLit platform 0,               -- registered?
+              mkIntCLit platform 0,   -- Arity
+              mkIntCLit platform 0,               -- Heap allocated for this thing
+              fun_descr_lit,
+              arg_descr_lit,
+              zeroCLit platform,          -- Entries into this thing
+              zeroCLit platform,          -- Heap allocated by this thing
+              zeroCLit platform           -- Link to next StgEntCounter
+            ]
+        }
 -- -----------------------------------------------------------------------------
 -- Ticky stack frames
 
@@ -560,6 +660,27 @@ tickyStackCheck :: FCode ()
 tickyStackCheck = ifTicky $ bumpTickyCounter (fsLit "STK_CHK_ctr")
 
 -- -----------------------------------------------------------------------------
+-- Ticky for tag inference characterisation
+
+-- | Predicted a pointer would be tagged correctly (GHC will crash if not so no miss case)
+tickyTagged :: FCode ()
+tickyTagged         = ifTicky $ bumpTickyCounter (fsLit "TAG_TAGGED_pred")
+
+-- | Pass a boolean expr indicating if tag was present.
+tickyUntagged :: CmmExpr -> FCode ()
+tickyUntagged e     = do
+    ifTicky $ bumpTickyCounter (fsLit "TAG_UNTAGGED_pred")
+    ifTicky $ bumpTickyCounterByE (fsLit "TAG_UNTAGGED_miss") e
+
+-- | Called when for `case v of ...` we can avoid entering v based on
+-- tag inference information.
+tickyTagSkip :: Unique -> Id -> FCode ()
+tickyTagSkip unique id = ifTicky $ do
+  let ctr_lbl = mkTagHitLabel (idName id) unique
+  registerTickyCtr ctr_lbl
+  bumpTickyTagSkip ctr_lbl
+
+-- -----------------------------------------------------------------------------
 -- Ticky utils
 
 isEnabled :: (StgToCmmConfig -> Bool) -> FCode Bool
@@ -597,7 +718,12 @@ bumpTickyEntryCount lbl = do
 bumpTickyAllocd :: CLabel -> Int -> FCode ()
 bumpTickyAllocd lbl bytes = do
   platform <- getPlatform
-  bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_allocd (platformConstants platform))) bytes
+  bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_entry_count (platformConstants platform))) bytes
+
+bumpTickyTagSkip :: CLabel -> FCode ()
+bumpTickyTagSkip lbl = do
+  platform <- getPlatform
+  bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_entry_count (platformConstants platform))) 1
 
 bumpTickyLbl :: CLabel -> FCode ()
 bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1
diff --git a/compiler/GHC/StgToCmm/Types.hs b/compiler/GHC/StgToCmm/Types.hs
index 1d50f0ae70..1a4d06ed9b 100644
--- a/compiler/GHC/StgToCmm/Types.hs
+++ b/compiler/GHC/StgToCmm/Types.hs
@@ -19,6 +19,7 @@ import GHC.Types.ForeignStubs
 import GHC.Core.DataCon
 import GHC.Types.Name.Env
 import GHC.Types.Name.Set
+import GHC.Stg.InferTags.TagSig
 import GHC.Utils.Outputable
 
 
@@ -90,6 +91,7 @@ data CgInfos = CgInfos
       -- ^ LambdaFormInfos of exported closures in the current module.
   , cgIPEStub :: !CStub
       -- ^ The C stub which is used for IPE information
+  , cgTagSigs :: !(NameEnv TagSig)
   }
 
 --------------------------------------------------------------------------------
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index 270699b9ed..ddda97ab2a 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -12,7 +12,8 @@ module GHC.StgToCmm.Utils (
         emitDataLits, emitRODataLits,
         emitDataCon,
         emitRtsCall, emitRtsCallWithResult, emitRtsCallGen,
-        assignTemp,
+        emitBarf,
+        assignTemp, newTemp,
 
         newUnboxedTupleRegs,
 
@@ -50,7 +51,7 @@ import GHC.Prelude
 import GHC.Platform
 import GHC.StgToCmm.Monad
 import GHC.StgToCmm.Closure
-import GHC.StgToCmm.Lit (mkSimpleLit)
+import GHC.StgToCmm.Lit (mkSimpleLit, newStringCLit)
 import GHC.Cmm
 import GHC.Cmm.BlockId
 import GHC.Cmm.Graph as CmmGraph
@@ -157,6 +158,11 @@ tagToClosure platform tycon tag
 --
 -------------------------------------------------------------------------
 
+emitBarf :: String -> FCode ()
+emitBarf msg = do
+  strLbl <- newStringCLit msg
+  emitRtsCall rtsUnitId (fsLit "barf") [(CmmLit strLbl,AddrHint)] False
+
 emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
 emitRtsCall pkg fun = emitRtsCallGen [] (mkCmmCodeLabel pkg fun)
 
@@ -299,8 +305,6 @@ newUnboxedTupleRegs res_ty
     choose_regs _ (AssignTo regs _) = return regs
     choose_regs platform _          = mapM (newTemp . primRepCmmType platform) reps
 
-
-
 -------------------------------------------------------------------------
 --      emitMultiAssign
 -------------------------------------------------------------------------
diff --git a/compiler/GHC/Tc/Module.hs b/compiler/GHC/Tc/Module.hs
index 026686b8cc..d9b59b4fd8 100644
--- a/compiler/GHC/Tc/Module.hs
+++ b/compiler/GHC/Tc/Module.hs
@@ -2975,6 +2975,7 @@ ppr_types debug type_env
       | otherwise = hasTopUserName id
                     && case idDetails id of
                          VanillaId    -> True
+                         StrictWorkerId{} -> True
                          RecSelId {}  -> True
                          ClassOpId {} -> True
                          FCallId {}   -> True
diff --git a/compiler/GHC/Tc/Solver.hs b/compiler/GHC/Tc/Solver.hs
index 6a1f2d3315..8b6ac9928d 100644
--- a/compiler/GHC/Tc/Solver.hs
+++ b/compiler/GHC/Tc/Solver.hs
@@ -677,7 +677,7 @@ type-class or type defined in N.
 Secondly, when should these heuristics be enforced? We enforced them when the
 type-class method call site is in a module marked `-XSafe` or `-XTrustworthy`.
 This allows `-XUnsafe` modules to operate without restriction, and for Safe
-Haskell inferrence to infer modules with unsafe overlaps as unsafe.
+Haskell inference to infer modules with unsafe overlaps as unsafe.
 
 One alternative design would be to also consider if an instance was imported as
 a `safe` import or not and only apply the restriction to instances imported
@@ -745,7 +745,7 @@ How is this implemented? It's complicated! So we'll step through it all:
       IORefs called `tcg_safe_infer` and `tcg_safe_infer_reason`.
 
  7) `GHC.Driver.Main.tcRnModule'` -- Reads `tcg_safe_infer` after type-checking, calling
-    `GHC.Driver.Main.markUnsafeInfer` (passing the reason along) when safe-inferrence
+    `GHC.Driver.Main.markUnsafeInfer` (passing the reason along) when safe-inference
     failed.
 
 Note [No defaulting in the ambiguity check]
diff --git a/compiler/GHC/Tc/Types.hs b/compiler/GHC/Tc/Types.hs
index 97f11c8a0b..e1f0400e44 100644
--- a/compiler/GHC/Tc/Types.hs
+++ b/compiler/GHC/Tc/Types.hs
@@ -50,7 +50,7 @@ module GHC.Tc.Types(
         PromotionErr(..),
         IdBindingInfo(..), ClosedTypeId, RhsNames,
         IsGroupClosed(..),
-        SelfBootInfo(..),
+        SelfBootInfo(..), bootExports,
         tcTyThingCategory, pprTcTyThingCategory,
         peCategory, pprPECategory,
         CompleteMatch, CompleteMatches,
@@ -696,6 +696,15 @@ data SelfBootInfo
 -- What is sb_tcs used for?  See Note [Extra dependencies from .hs-boot files]
 -- in GHC.Rename.Module
 
+bootExports :: SelfBootInfo -> NameSet
+bootExports boot =
+  case boot of
+    NoSelfBoot -> emptyNameSet
+    SelfBoot { sb_mds = mds} ->
+      let exports = md_exports mds
+      in availsToNameSet exports
+
+
 
 {- Note [Tracking unused binding and imports]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/GHC/Tc/Types.hs-boot b/compiler/GHC/Tc/Types.hs-boot
index c6302adb57..9f107936a5 100644
--- a/compiler/GHC/Tc/Types.hs-boot
+++ b/compiler/GHC/Tc/Types.hs-boot
@@ -6,6 +6,8 @@ import GHC.Utils.Outputable
 
 data TcLclEnv
 
+data SelfBootInfo
+
 data TcIdSigInfo
 instance Outputable TcIdSigInfo
 
diff --git a/compiler/GHC/Types/Basic.hs b/compiler/GHC/Types/Basic.hs
index e7c384d0a4..2ba185c25e 100644
--- a/compiler/GHC/Types/Basic.hs
+++ b/compiler/GHC/Types/Basic.hs
@@ -45,6 +45,8 @@ module GHC.Types.Basic (
 
         Boxity(..), isBoxed,
 
+        CbvMark(..), isMarkedCbv,
+
         PprPrec(..), topPrec, sigPrec, opPrec, funPrec, starPrec, appPrec,
         maybeParen,
 
@@ -517,6 +519,37 @@ instance Binary Boxity where -- implemented via isBoxed-isomorphism to Bool
 {-
 ************************************************************************
 *                                                                      *
+                Call by value flag
+*                                                                      *
+************************************************************************
+-}
+
+-- | Should an argument be passed evaluated *and* tagged.
+data CbvMark = MarkedCbv | NotMarkedCbv
+    deriving Eq
+
+instance Outputable CbvMark where
+  ppr MarkedCbv    = text "!"
+  ppr NotMarkedCbv = text "~"
+
+instance Binary CbvMark where
+    put_ bh NotMarkedCbv = putByte bh 0
+    put_ bh MarkedCbv    = putByte bh 1
+    get bh =
+      do h <- getByte bh
+         case h of
+           0 -> return NotMarkedCbv
+           1 -> return MarkedCbv
+           _ -> panic "Invalid binary format"
+
+isMarkedCbv :: CbvMark -> Bool
+isMarkedCbv MarkedCbv = True
+isMarkedCbv NotMarkedCbv = False
+
+
+{-
+************************************************************************
+*                                                                      *
                 Recursive/Non-Recursive flag
 *                                                                      *
 ************************************************************************
diff --git a/compiler/GHC/Types/Id.hs b/compiler/GHC/Types/Id.hs
index 1245b372af..76dc4d0721 100644
--- a/compiler/GHC/Types/Id.hs
+++ b/compiler/GHC/Types/Id.hs
@@ -114,17 +114,22 @@ module GHC.Types.Id (
         setIdDemandInfo,
         setIdDmdSig,
         setIdCprSig,
+        setIdCbvMarks,
+        idCbvMarks_maybe,
+        idCbvMarkArity,
 
         idDemandInfo,
         idDmdSig,
         idCprSig,
 
+        idTagSig_maybe,
+        setIdTagSig
     ) where
 
 import GHC.Prelude
 
 import GHC.Core ( CoreRule, isStableUnfolding, evaldUnfolding,
-                 isCompulsoryUnfolding, Unfolding( NoUnfolding ) )
+                 isCompulsoryUnfolding, Unfolding( NoUnfolding ), isEvaldUnfolding )
 
 import GHC.Types.Id.Info
 import GHC.Types.Basic
@@ -163,6 +168,7 @@ import GHC.Utils.Panic
 import GHC.Utils.Panic.Plain
 import GHC.Utils.GlobalVars
 import GHC.Utils.Trace
+import GHC.Stg.InferTags.TagSig
 
 -- infixl so you can say (id `set` a `set` b)
 infixl  1 `setIdUnfolding`,
@@ -181,7 +187,8 @@ infixl  1 `setIdUnfolding`,
           `setIdCprSig`,
 
           `asJoinId`,
-          `asJoinId_maybe`
+          `asJoinId_maybe`,
+          `setIdCbvMarks`
 
 {-
 ************************************************************************
@@ -259,6 +266,11 @@ maybeModifyIdInfo :: Maybe IdInfo -> Id -> Id
 maybeModifyIdInfo (Just new_info) id = lazySetIdInfo id new_info
 maybeModifyIdInfo Nothing         id = id
 
+-- maybeModifyIdInfo tries to avoid unnecessary thrashing
+maybeModifyIdDetails :: Maybe IdDetails  -> Id -> Id
+maybeModifyIdDetails (Just new_details) id = setIdDetails id new_details
+maybeModifyIdDetails Nothing         id = id
+
 {-
 ************************************************************************
 *                                                                      *
@@ -540,11 +552,12 @@ isJoinId id
                 _         -> False
   | otherwise = False
 
+-- | Doesn't return strictness marks
 isJoinId_maybe :: Var -> Maybe JoinArity
 isJoinId_maybe id
  | isId id  = assertPpr (isId id) (ppr id) $
               case Var.idDetails id of
-                JoinId arity -> Just arity
+                JoinId arity _marks -> Just arity
                 _            -> Nothing
  | otherwise = Nothing
 
@@ -609,10 +622,12 @@ asJoinId id arity = warnPprTrace (not (isLocalId id))
                     warnPprTrace (not (is_vanilla_or_join id))
                          "asJoinId"
                          (ppr id <+> pprIdDetails (idDetails id)) $
-                    id `setIdDetails` JoinId arity
+                    id `setIdDetails` JoinId arity (idCbvMarks_maybe id)
   where
     is_vanilla_or_join id = case Var.idDetails id of
                               VanillaId -> True
+                              -- Can workers become join ids? Yes!
+                              StrictWorkerId {} -> pprTraceDebug "asJoinId (strict worker)" (ppr id) True
                               JoinId {} -> True
                               _         -> False
 
@@ -689,6 +704,9 @@ isStrictId id
                   isStrUsedDmd (idDemandInfo id)
                   -- Take the best of both strictnesses - old and new
 
+idTagSig_maybe :: Id -> Maybe TagSig
+idTagSig_maybe = tagSig . idInfo
+
 ---------------------------------
 -- UNFOLDING
 
@@ -712,6 +730,49 @@ idDemandInfo       id = demandInfo (idInfo id)
 setIdDemandInfo :: Id -> Demand -> Id
 setIdDemandInfo id dmd = modifyIdInfo (`setDemandInfo` dmd) id
 
+setIdTagSig :: Id -> TagSig -> Id
+setIdTagSig id sig = modifyIdInfo (`setTagSig` sig) id
+
+-- | If all marks are NotMarkedStrict we just set nothing.
+setIdCbvMarks :: Id -> [CbvMark] -> Id
+setIdCbvMarks id marks
+  | not (any isMarkedCbv marks) = maybeModifyIdDetails (removeMarks $ idDetails id) id
+  | otherwise =
+      -- pprTrace "setMarks:" (ppr id <> text ":" <> ppr marks) $
+      case idDetails id of
+        -- good ol (likely worker) function
+        VanillaId ->      id `setIdDetails` (StrictWorkerId trimmedMarks)
+        JoinId arity _ -> id `setIdDetails` (JoinId arity (Just trimmedMarks))
+        -- Updating an existing strict worker.
+        StrictWorkerId _ -> id `setIdDetails` (StrictWorkerId trimmedMarks)
+        -- Do nothing for these
+        RecSelId{} -> id
+        DFunId{} -> id
+        _ -> pprTrace "setIdCbvMarks: Unable to set cbv marks for" (ppr id $$
+              text "marks:" <> ppr marks $$
+              text "idDetails:" <> ppr (idDetails id)) id
+
+    where
+      -- (Currently) no point in passing args beyond the arity unlifted.
+      -- We would have to eta expand all call sites to (length marks).
+      -- Perhaps that's sensible but for now be conservative.
+      trimmedMarks = take (idArity id) marks
+      removeMarks details = case details of
+        JoinId arity (Just _) -> Just $ JoinId arity Nothing
+        StrictWorkerId _ -> Just VanillaId
+        _ -> Nothing
+
+idCbvMarks_maybe :: Id -> Maybe [CbvMark]
+idCbvMarks_maybe id = case idDetails id of
+  StrictWorkerId marks -> Just marks
+  JoinId _arity marks  -> marks
+  _                    -> Nothing
+
+-- Id must be called with at least this arity in order to allow arguments to
+-- be passed unlifted.
+idCbvMarkArity :: Id -> Arity
+idCbvMarkArity fn = maybe 0 length (idCbvMarks_maybe fn)
+
 setCaseBndrEvald :: StrictnessMark -> Id -> Id
 -- Used for variables bound by a case expressions, both the case-binder
 -- itself, and any pattern-bound variables that are argument of a
@@ -884,6 +945,7 @@ updOneShotInfo id one_shot
 --      f = \x -> e
 -- If we change the one-shot-ness of x, f's type changes
 
+-- Replaces the id info if the zapper returns @Just idinfo@
 zapInfo :: (IdInfo -> Maybe IdInfo) -> Id -> Id
 zapInfo zapper id = maybeModifyIdInfo (zapper (idInfo id)) id
 
@@ -969,7 +1031,7 @@ transferPolyIdInfo :: Id        -- Original Id
                    -> Id        -- New Id
                    -> Id
 transferPolyIdInfo old_id abstract_wrt new_id
-  = modifyIdInfo transfer new_id
+  = modifyIdInfo transfer new_id `setIdCbvMarks` new_cbv_marks
   where
     arity_increase = count isId abstract_wrt    -- Arity increases by the
                                                 -- number of value binders
@@ -985,6 +1047,18 @@ transferPolyIdInfo old_id abstract_wrt new_id
     new_strictness  = prependArgsDmdSig arity_increase old_strictness
     old_cpr         = cprSigInfo old_info
 
+    old_cbv_marks   = fromMaybe (replicate old_arity NotMarkedCbv) (idCbvMarks_maybe old_id)
+    abstr_cbv_marks = mapMaybe getMark abstract_wrt
+    new_cbv_marks   = abstr_cbv_marks ++ old_cbv_marks
+
+    getMark v
+      | not (isId v)
+      = Nothing
+      | isId v
+      , isEvaldUnfolding (idUnfolding v)
+      , not (isUnliftedType $ idType v)
+      = Just MarkedCbv
+      | otherwise = Just NotMarkedCbv
     transfer new_info = new_info `setArityInfo` new_arity
                                  `setInlinePragInfo` old_inline_prag
                                  `setOccInfo` new_occ_info
diff --git a/compiler/GHC/Types/Id/Info.hs b/compiler/GHC/Types/Id/Info.hs
index ec5607d40f..73e7169181 100644
--- a/compiler/GHC/Types/Id/Info.hs
+++ b/compiler/GHC/Types/Id/Info.hs
@@ -68,7 +68,7 @@ module GHC.Types.Id.Info (
         emptyRuleInfo,
         isEmptyRuleInfo, ruleInfoFreeVars,
         ruleInfoRules, setRuleInfoHead,
-        ruleInfo, setRuleInfo,
+        ruleInfo, setRuleInfo, tagSigInfo,
 
         -- ** The CAFInfo type
         CafInfo(..),
@@ -76,8 +76,10 @@ module GHC.Types.Id.Info (
         cafInfo, setCafInfo,
 
         -- ** The LambdaFormInfo type
-        LambdaFormInfo(..),
-        lfInfo, setLFInfo,
+        LambdaFormInfo,
+        lfInfo, setLFInfo, setTagSig,
+
+        tagSig,
 
         -- ** Tick-box Info
         TickBoxOp(..), TickBoxId,
@@ -108,10 +110,11 @@ import GHC.Utils.Misc
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Utils.Panic.Plain
+import GHC.Stg.InferTags.TagSig
 
 import Data.Word
 
-import GHC.StgToCmm.Types (LambdaFormInfo (..))
+import GHC.StgToCmm.Types (LambdaFormInfo)
 
 -- infixl so you can say (id `set` a `set` b)
 infixl  1 `setRuleInfo`,
@@ -126,7 +129,6 @@ infixl  1 `setRuleInfo`,
           `setDemandInfo`,
           `setNeverRepPoly`,
           `setLevityInfoWithType`
-
 {-
 ************************************************************************
 *                                                                      *
@@ -173,8 +175,59 @@ data IdDetails
   | CoVarId    -- ^ A coercion variable
                -- This only covers /un-lifted/ coercions, of type
                -- (t1 ~# t2) or (t1 ~R# t2), not their lifted variants
-  | JoinId JoinArity           -- ^ An 'Id' for a join point taking n arguments
-       -- Note [Join points] in "GHC.Core"
+  | JoinId JoinArity (Maybe [CbvMark])
+        -- ^ An 'Id' for a join point taking n arguments
+        -- Note [Join points] in "GHC.Core"
+  | StrictWorkerId [CbvMark]
+        -- ^ An 'Id' for a worker function, which expects some arguments to be
+        -- passed both evaluated and tagged.
+        -- See Note [Strict Worker Ids]
+        -- See Note [Tag Inference]
+
+{- Note [Strict Worker Ids]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+StrictWorkerId essentially constrains the calling convention for the given Id.
+It requires arguments marked as tagged to be passed properly evaluated+*tagged*.
+
+While we were always able to express the fact that an argument is evaluated
+via attaching a evaldUnfolding to the functions arguments there used to be
+no way to express that an lifted argument is already properly tagged once we jump
+into the RHS.
+This means when branching on a value the RHS always needed to perform
+a tag check to ensure the argument wasn't an indirection (the evaldUnfolding
+already ruling out thunks).
+
+StrictWorkerIds give us this additional expressiveness which we use to improve
+runtime. This is all part of the TagInference work. See also Note [Tag Inference].
+
+What we do is:
+* If we think a function might benefit from passing certain arguments unlifted
+  for performance reasons we attach an evaldUnfolding to these arguments.
+* Either during W/W, but at latest during Tidy VanillaIds with arguments that
+  have evaldUnfoldings are turned into StrictWorkerIds.
+* During CorePrep calls to StrictWorkerIds are eta expanded.
+* During Stg CodeGen:
+  * When we call a binding that is a StrictWorkerId:
+    * We check if all arguments marked to be passed unlifted are already tagged.
+    * If they aren't we will wrap the call in case expressions which will evaluate+tag
+      these arguments before jumping to the function.
+* During Cmm codeGen:
+  * When generating code for the RHS of a StrictWorker binding
+    we omit tag checks when using arguments marked as tagged.
+
+We primarily use this for workers where we mark strictly demanded arguments
+and arguments representing strict fields as call-by-value during W/W. But we
+also check other functions during tidy and potentially turn some of them into
+strict workers and mark some of their arguments as call-by-value by looking at
+argument unfoldings.
+
+NB: I choose to put the information into a new Id constructor since these are loaded
+at all optimization levels. This makes it trivial to ensure the additional
+calling convention demands are available at all call sites. Putting it into
+IdInfo would require us at the very least to always decode the IdInfo
+just to decide if we need to throw it away or not after.
+
+-}
 
 -- | Recursive Selector Parent
 data RecSelParent = RecSelData TyCon | RecSelPatSyn PatSyn deriving Eq
@@ -198,8 +251,8 @@ isCoVarDetails :: IdDetails -> Bool
 isCoVarDetails CoVarId = True
 isCoVarDetails _       = False
 
-isJoinIdDetails_maybe :: IdDetails -> Maybe JoinArity
-isJoinIdDetails_maybe (JoinId join_arity) = Just join_arity
+isJoinIdDetails_maybe :: IdDetails -> Maybe (JoinArity, (Maybe [CbvMark]))
+isJoinIdDetails_maybe (JoinId join_arity marks) = Just (join_arity, marks)
 isJoinIdDetails_maybe _                   = Nothing
 
 instance Outputable IdDetails where
@@ -210,6 +263,7 @@ pprIdDetails VanillaId = empty
 pprIdDetails other     = brackets (pp other)
  where
    pp VanillaId               = panic "pprIdDetails"
+   pp (StrictWorkerId dmds)   = text "StrictWorker" <> parens (ppr dmds)
    pp (DataConWorkId _)       = text "DataCon"
    pp (DataConWrapId _)       = text "DataConWrapper"
    pp (ClassOpId {})          = text "ClassOp"
@@ -221,7 +275,7 @@ pprIdDetails other     = brackets (pp other)
                               = brackets $ text "RecSel" <>
                                            ppWhen is_naughty (text "(naughty)")
    pp CoVarId                 = text "CoVarId"
-   pp (JoinId arity)          = text "JoinId" <> parens (int arity)
+   pp (JoinId arity marks)    = text "JoinId" <> parens (int arity) <> parens (ppr marks)
 
 {-
 ************************************************************************
@@ -274,7 +328,10 @@ data IdInfo
         -- 4% in some programs. See #17497 and associated MR.
         --
         -- See documentation of the getters for what these packed fields mean.
-        lfInfo          :: !(Maybe LambdaFormInfo)
+        lfInfo          :: !(Maybe LambdaFormInfo),
+
+        -- See documentation of the getters for what these packed fields mean.
+        tagSig          :: !(Maybe TagSig)
     }
 
 -- | Encodes arities, OneShotInfo, CafInfo and LevityInfo.
@@ -365,6 +422,9 @@ cafInfo = bitfieldGetCafInfo . bitfield
 callArityInfo :: IdInfo -> ArityInfo
 callArityInfo = bitfieldGetCallArityInfo . bitfield
 
+tagSigInfo :: IdInfo -> Maybe TagSig
+tagSigInfo = tagSig
+
 -- Setters
 
 setRuleInfo :: IdInfo -> RuleInfo -> IdInfo
@@ -414,6 +474,9 @@ setCafInfo info caf =
 setLFInfo :: IdInfo -> LambdaFormInfo -> IdInfo
 setLFInfo info lf = info { lfInfo = Just lf }
 
+setTagSig :: IdInfo -> TagSig -> IdInfo
+setTagSig info sig = info { tagSig = Just sig }
+
 setOneShotInfo :: IdInfo -> OneShotInfo -> IdInfo
 setOneShotInfo info lb =
     info { bitfield = bitfieldSetOneShotInfo lb (bitfield info) }
@@ -444,7 +507,8 @@ vanillaIdInfo
                              bitfieldSetOneShotInfo NoOneShotInfo $
                              bitfieldSetLevityInfo NoLevityInfo $
                              emptyBitField,
-            lfInfo         = Nothing
+            lfInfo         = Nothing,
+            tagSig         = Nothing
            }
 
 -- | More informative 'IdInfo' we can use when we know the 'Id' has no CAF references
diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs
index 6fe9f0dafe..67f7e405a3 100644
--- a/compiler/GHC/Types/Id/Make.hs
+++ b/compiler/GHC/Types/Id/Make.hs
@@ -491,6 +491,10 @@ mkDictSelId name clas
 
          | otherwise
          = base_info `setRuleInfo` mkRuleInfo [rule]
+                     `setInlinePragInfo` neverInlinePragma
+                     `setUnfoldingInfo`  mkInlineUnfoldingWithArity 1
+                                           defaultSimpleOpts
+                                           (mkDictSelRhs clas val_index)
                    -- Add a magic BuiltinRule, but no unfolding
                    -- so that the rule is always available to fire.
                    -- See Note [ClassOp/DFun selection] in GHC.Tc.TyCl.Instance
diff --git a/compiler/GHC/Types/RepType.hs b/compiler/GHC/Types/RepType.hs
index b2e8a1c3b8..cf6517bd39 100644
--- a/compiler/GHC/Types/RepType.hs
+++ b/compiler/GHC/Types/RepType.hs
@@ -14,7 +14,8 @@ module GHC.Types.RepType
     typePrimRep, typePrimRep1, typeMonoPrimRep_maybe,
     runtimeRepPrimRep, typePrimRepArgs,
     PrimRep(..), primRepToType,
-    countFunRepArgs, countConRepArgs, tyConPrimRep, tyConPrimRep1,
+    countFunRepArgs, countConRepArgs, dataConRuntimeRepStrictness,
+    tyConPrimRep, tyConPrimRep1,
 
     -- * Unboxed sum representation type
     ubxSumRepType, layoutUbxSum, typeSlotTy, SlotTy (..),
@@ -127,8 +128,41 @@ countConRepArgs dc = go (dataConRepArity dc) (dataConRepType dc)
       | otherwise
       = pprPanic "countConRepArgs: arity greater than type can handle" (ppr (n, ty, typePrimRep ty))
 
+dataConRuntimeRepStrictness :: HasDebugCallStack => DataCon -> [StrictnessMark]
+-- ^ Give the demands on the arguments of a
+-- Core constructor application (Con dc args) at runtime.
+-- Assumes the constructor is not levity polymorphic. For example
+-- unboxed tuples won't work.
+dataConRuntimeRepStrictness dc =
+
+  -- pprTrace "dataConRuntimeRepStrictness" (ppr dc $$ ppr (dataConRepArgTys dc)) $
+
+  let repMarks = dataConRepStrictness dc
+      repTys = map irrelevantMult $ dataConRepArgTys dc
+  in -- todo: assert dc != unboxedTuple/unboxedSum
+     go repMarks repTys []
+  where
+    go (mark:marks) (ty:types) out_marks
+      -- Zero-width argument, mark is irrelevant at runtime.
+      |  -- pprTrace "VoidTy" (ppr ty) $
+        (isZeroBitTy ty)
+      = go marks types out_marks
+      -- Single rep argument, e.g. Int
+      -- Keep mark as-is
+      | [_] <- reps
+      = go marks types (mark:out_marks)
+      -- Multi-rep argument, e.g. (# Int, Bool #) or (# Int | Bool #)
+      -- Make up one non-strict mark per runtime argument.
+      | otherwise -- TODO: Assert real_reps /= null
+      = go marks types ((replicate (length real_reps) NotMarkedStrict)++out_marks)
+      where
+        reps = typePrimRep ty
+        real_reps = filter (not . isVoidRep) $ reps
+    go [] [] out_marks = reverse out_marks
+    go _m _t _o = pprPanic "dataConRuntimeRepStrictness2" (ppr dc $$ ppr _m $$ ppr _t $$ ppr _o)
+
 -- | True if the type has zero width.
-isZeroBitTy :: Type -> Bool
+isZeroBitTy :: HasDebugCallStack => Type -> Bool
 isZeroBitTy = null . typePrimRep
 
 
diff --git a/compiler/GHC/Unit/Module/ModGuts.hs b/compiler/GHC/Unit/Module/ModGuts.hs
index 1558e5944a..4fc683d844 100644
--- a/compiler/GHC/Unit/Module/ModGuts.hs
+++ b/compiler/GHC/Unit/Module/ModGuts.hs
@@ -31,6 +31,7 @@ import GHC.Types.Fixity.Env
 import GHC.Types.ForeignStubs
 import GHC.Types.HpcInfo
 import GHC.Types.Name.Reader
+import GHC.Types.Name.Set (NameSet)
 import GHC.Types.SafeHaskell
 import GHC.Types.SourceFile ( HscSource(..), hscSourceToIsBoot )
 import GHC.Types.SrcLoc
@@ -88,6 +89,7 @@ data ModGuts
         mg_fam_inst_env :: FamInstEnv,          -- ^ Type-family instance environment for
                                                 -- /home-package/ modules (including this
                                                 -- one); c.f. 'tcg_fam_inst_env'
+        mg_boot_exports :: !NameSet,             -- Things that are also export via hs-boot file
 
         mg_safe_haskell :: SafeHaskellMode,     -- ^ Safe Haskell mode
         mg_trust_pkg    :: Bool,                -- ^ Do we need to trust our
diff --git a/compiler/GHC/Utils/Misc.hs b/compiler/GHC/Utils/Misc.hs
index 8f33944b86..3bc4e6adb8 100644
--- a/compiler/GHC/Utils/Misc.hs
+++ b/compiler/GHC/Utils/Misc.hs
@@ -30,7 +30,7 @@ module GHC.Utils.Misc (
         mapAndUnzip, mapAndUnzip3,
         filterOut, partitionWith,
 
-        dropWhileEndLE, spanEnd, last2, lastMaybe,
+        dropWhileEndLE, spanEnd, last2, lastMaybe, onJust,
 
         List.foldl1', foldl2, count, countWhile, all2,
 
@@ -764,6 +764,10 @@ lastMaybe :: [a] -> Maybe a
 lastMaybe [] = Nothing
 lastMaybe xs = Just $ last xs
 
+-- | @onJust x m f@ applies f to the value inside the Just or returns the default.
+onJust :: b -> Maybe a -> (a->b) -> b
+onJust dflt = flip (maybe dflt)
+
 -- | Split a list into its last element and the initial part of the list.
 -- @snocView xs = Just (init xs, last xs)@ for non-empty lists.
 -- @snocView xs = Nothing@ otherwise.
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index efc4639b96..068ded4e9c 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -546,10 +546,15 @@ Library
         GHC.Stg.Lift.Config
         GHC.Stg.Lift.Monad
         GHC.Stg.Lint
+        GHC.Stg.InferTags
+        GHC.Stg.InferTags.Rewrite
+        GHC.Stg.InferTags.TagSig
+        GHC.Stg.InferTags.Types
         GHC.Stg.Pipeline
         GHC.Stg.Stats
         GHC.Stg.Subst
         GHC.Stg.Syntax
+        GHC.Stg.Utils
         GHC.StgToByteCode
         GHC.StgToCmm
         GHC.StgToCmm.ArgRep
@@ -570,6 +575,7 @@ Library
         GHC.StgToCmm.Prim
         GHC.StgToCmm.Prof
         GHC.StgToCmm.Sequel
+        GHC.StgToCmm.TagCheck
         GHC.StgToCmm.Ticky
         GHC.StgToCmm.Types
         GHC.StgToCmm.Utils
diff --git a/docs/users_guide/9.4.1-notes.rst b/docs/users_guide/9.4.1-notes.rst
index 75e7268cc1..c29d00e9d7 100644
--- a/docs/users_guide/9.4.1-notes.rst
+++ b/docs/users_guide/9.4.1-notes.rst
@@ -65,6 +65,30 @@ Compiler
   that is used boxed in a hot path). Do post a bug report with your example!
   Then wrap the uses of the parameter in ``GHC.Exts.lazy`` for a short-term fix.
 
+- Tag inference has been implemented.
+
+  It's a new backend optimization pass aimed at avoiding
+  redundant evaluatedness checks. The basic pass is always enabled and not optional.
+  When using :ghc-flag:`-fworker-wrapper-cbv` it additionally will generate workers for functions
+  with strict arguments, pushing the evaluation+tagging of the arguments into the wrapper
+  and allowing the worker to simply assume all arguments are fully evaluated and properly
+  tagged. Usually the wrapper will then inline, and if the argument is known to be properly
+  tagged at the call site the wrapper will become a no-op. Giving us a more efficient
+  worker without adding any overhead. If the argument *isn't* known to be evaluated we
+  perform the same amount of work, but do it at call sites instead of inside the called
+  function.
+
+  In general :ghc-flag:`-fworker-wrapper-cbv` is very beneficial and can be safely enabled.
+  However sadly there are two exceptions. It can break rules for code which made assumptions about
+  which functions get a W/W split which now no longer hold.
+  See #20364 for the details. For this reason it isn't enabled by default.
+  For code which has the proper ``INLINABLE`` (:ref:`inlinable-pragma`) and ``INLINE`` (:ref:`inline-pragma`)
+  or that doesn't define any rule-relevant functions this shouldn't happen. The longterm fix here is to
+  apply the proper pragmas.
+  There is also a known issue where a function taking multiple unlifted arguments can cause excessive
+  spilling (#20334). This seems to be an edge case. But if you think you are hitting this case please
+  comment on the ticket so that we can prioritize it accordingly.
+
 - Support for Sun SPARC architecture has been dropped (:ghc-ticket:`16883`).
 
 - A fix for GHC's handling of the XDG Base Directory Specification
diff --git a/docs/users_guide/debugging.rst b/docs/users_guide/debugging.rst
index 30f31a64ef..6802fa71fb 100644
--- a/docs/users_guide/debugging.rst
+++ b/docs/users_guide/debugging.rst
@@ -457,6 +457,19 @@ These flags dump various phases of GHC's STG pipeline.
 
     Show the output of the unarise pass.
 
+.. ghc-flag:: -ddump-stg-cg
+    :shortdesc: Show output after Stg2Stg
+    :type: dynamic
+
+    Show the output of the STG after Stg2Stg. This is the result after
+    applying the Stg2Stg optimization passes.
+
+.. ghc-flag:: -ddump-stg-tags
+    :shortdesc: Show output of the tag inference pass.
+    :type: dynamic
+
+    Show the output of the tag inference pass.
+
 .. ghc-flag:: -ddump-stg-final
     :shortdesc: Show output of last STG pass.
     :type: dynamic
@@ -1060,3 +1073,13 @@ Other
     is useful when debugging because it gives smaller modules and dumps, but the
     compiler will panic if you try to use Typeable instances of things that you
     built with this flag.
+
+.. ghc-flag:: -dtag-inference-checks
+    :shortdesc: Affirm tag inference results are correct at runtime.
+    :type: dynamic
+
+    When tag inference tells as a specific value is supposed to be tagged then
+    generate code to check this at runtime. If the check fails the program will
+    be terminated. This helps narrowing down if an issue is due to tag inference
+    if things go wrong. Which would otherwise be quite difficult.
+
diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst
index b368544696..390b179f33 100644
--- a/docs/users_guide/using-optimisation.rst
+++ b/docs/users_guide/using-optimisation.rst
@@ -1611,6 +1611,32 @@ by saying ``-fno-wombat``.
     while demand analysis is disabled (by :ghc-flag:`-fno-strictness`)
     has no effect.
 
+.. ghc-flag:: -fworker-wrapper-cbv
+    :shortdesc: Enable w/w splits for wrappers whos sole purpose is evaluating arguments.
+    :type: dynamic
+    :category: optimization
+
+    Disabling this flag prevents a W/W split if the only benefit would be call-by-value
+    for some arguments.
+
+    Otherwise this exploits strictness information by passing strict value arguments
+    call-by-value to the functions worker. Even for functions who would
+    otherwise not get a worker.
+
+    This avoids (potentially repeated) checks for evaluatedness of arguments in
+    the rhs of the worker by pushing this check to the call site.
+    If the argument is statically visible to be a value at the call site the
+    overhead for the check disappears completely.
+
+    This can cause slight codesize increases. It will also cause many more functions
+    to get a worker/wrapper split which can play badly with rules (see Ticket #20364)
+    which is why it's currently disabled by default.
+    In particular if you depend on rules firing on functions marked as NOINLINE without
+    marking use sites of these functions as INLINE or INLINEABLE then things will break
+    unless this flag is disabled.
+
+    While WorkerWrapper is disabled this has no effect.
+
 .. ghc-flag:: -fbinary-blob-threshold=⟨n⟩
     :shortdesc: *default: 500K.* Tweak assembly generator for binary blobs.
     :type: dynamic
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index e186830b4e..5f97568b62 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -541,6 +541,12 @@ extern char **environ;
       SymI_HasProto(RET_OLD_ctr)                        \
       SymI_HasProto(RET_UNBOXED_TUP_ctr)                \
       SymI_HasProto(RET_SEMI_loads_avoided)             \
+                                                        \
+      SymI_HasProto(TAG_UNTAGGED_pred)                  \
+      SymI_HasProto(TAG_UNTAGGED_miss)                  \
+      SymI_HasProto(TAG_TAGGED_pred)                    \
+      SymI_HasProto(TAG_TAGGED_miss)                    \
+                                                        \
       SymI_HasProto(RET_NEW_hst)                        \
       SymI_HasProto(RET_OLD_hst)                        \
       SymI_HasProto(RET_UNBOXED_TUP_hst)
diff --git a/rts/Ticky.c b/rts/Ticky.c
index c045f43f56..2ce4aab658 100644
--- a/rts/Ticky.c
+++ b/rts/Ticky.c
@@ -88,6 +88,10 @@ PrintTickyInfo(void)
   unsigned long tot_old_updates   = UPD_OLD_IND_ctr + UPD_OLD_PERM_IND_ctr;
   unsigned long tot_gengc_updates = tot_new_updates + tot_old_updates;
 
+  // Number of times tag inference predicted tagged/untagged correctly
+  // allowing us to skip a tag check (when ticky is disabled)
+  unsigned long tot_tag_preds = TAG_UNTAGGED_pred + TAG_TAGGED_pred;
+
   FILE *tf = RtsFlags.TickyFlags.tickyFile;
 
   /* If tf = NULL, that means the user passed in stderr for the ticky stats
@@ -196,6 +200,17 @@ PrintTickyInfo(void)
               PC(INTAVG(tot_old_updates,tot_gengc_updates)));
   }
 
+  if (tot_tag_preds != 0) {
+      fprintf(tf, "\nTOTAL TAG PREDICTIONS MADE: %9" FMT_Word64 " \n",
+              (StgWord64) tot_tag_preds);
+      fprintf(tf, "TAGGED PREDICTIONS HIT:     %9" FMT_Word64 " \n",
+              (StgWord64) TAG_TAGGED_pred);
+      fprintf(tf, "UNTAGGED PREDICTIONS HIT:   %9" FMT_Word64 " \n",
+              (StgWord64) (TAG_UNTAGGED_pred - TAG_UNTAGGED_miss));
+      fprintf(tf, "UNTAGGED PREDICTIONS MISS:  %9" FMT_Word64 " \n",
+              (StgWord64) TAG_UNTAGGED_miss);
+  }
+
   printRegisteredCounterInfo(tf);
 
   fprintf(tf,"\n**************************************************\n");
diff --git a/rts/include/stg/Ticky.h b/rts/include/stg/Ticky.h
index 3d9d4aee4f..3353bb2f1a 100644
--- a/rts/include/stg/Ticky.h
+++ b/rts/include/stg/Ticky.h
@@ -178,6 +178,13 @@ EXTERN StgInt RET_OLD_ctr INIT(0);
 EXTERN StgInt RET_UNBOXED_TUP_ctr INIT(0);
 
 EXTERN StgInt RET_SEMI_loads_avoided INIT(0);
+
+/* Performance characterization for tag inference */
+EXTERN StgInt TAG_UNTAGGED_pred INIT(0);
+EXTERN StgInt TAG_UNTAGGED_miss INIT(0);
+EXTERN StgInt TAG_TAGGED_pred INIT(0);
+EXTERN StgInt TAG_TAGGED_miss INIT(0);
+
 /* End of counter declarations. */
 
 /* How many bins in ticky's histograms */
diff --git a/testsuite/tests/arityanal/should_compile/T18793.stderr b/testsuite/tests/arityanal/should_compile/T18793.stderr
index 39ce9b9a54..6de3dea08a 100644
--- a/testsuite/tests/arityanal/should_compile/T18793.stderr
+++ b/testsuite/tests/arityanal/should_compile/T18793.stderr
@@ -10,9 +10,12 @@ stuff = \ (i :: Int) -> case i of i1 { GHC.Types.I# ipv -> GHC.Types.: @Int i1 (
 Rec {
 -- RHS size: {terms: 23, types: 11, coercions: 0, joins: 0/0}
 T18793.$wgo1 [InlPrag=[2], Occ=LoopBreaker] :: [Int] -> GHC.Prim.Int# -> GHC.Prim.Int#
-[GblId, Arity=2, Str=<1L><L>, Unf=OtherCon []]
+[GblId[StrictWorker([!, ~])], Arity=2, Str=<1L><L>, Unf=OtherCon []]
 T18793.$wgo1
-  = \ (ds :: [Int]) (ww :: GHC.Prim.Int#) ->
+  = \ (ds
+         :: [Int]
+         Unf=OtherCon [])
+      (ww :: GHC.Prim.Int#) ->
       case ds of {
         [] -> ww;
         : y ys ->
diff --git a/testsuite/tests/codeGen/should_compile/Makefile b/testsuite/tests/codeGen/should_compile/Makefile
index 2da161c879..90cf78fb4b 100644
--- a/testsuite/tests/codeGen/should_compile/Makefile
+++ b/testsuite/tests/codeGen/should_compile/Makefile
@@ -68,10 +68,10 @@ T17648:
 	#  NoCafRefs) to the interface files.
 	'$(TEST_HC)' $(TEST_HC_OPTS) -dno-typeable-binds -O T17648.hs -v0
 	'$(TEST_HC)' --show-iface T17648.hi | tr -d '\n\r' | \
-		grep -F 'f :: T GHC.Types.Int -> ()  [HasNoCafRefs, LambdaFormInfo' >/dev/null
+		grep -F 'f :: T GHC.Types.Int -> ()  [HasNoCafRefs' >/dev/null
 
 	# Second compilation with -fcatch-nonexhaustive-cases, f should be CAFFY
 	'$(TEST_HC)' $(TEST_HC_OPTS) -dno-typeable-binds -O \
 		-fcatch-nonexhaustive-cases T17648.hs -v0 -fforce-recomp
 	'$(TEST_HC)' --show-iface T17648.hi | tr -d '\n\r' | \
-		grep -F 'f :: T GHC.Types.Int -> ()  [LambdaFormInfo' >/dev/null
+		grep -F 'f :: T GHC.Types.Int -> ()  [TagSig' >/dev/null
diff --git a/testsuite/tests/count-deps/CountDepsAst.stdout b/testsuite/tests/count-deps/CountDepsAst.stdout
index 3963c5eda9..056b797342 100644
--- a/testsuite/tests/count-deps/CountDepsAst.stdout
+++ b/testsuite/tests/count-deps/CountDepsAst.stdout
@@ -1,4 +1,4 @@
-Found 280 Language.Haskell.Syntax module dependencies
+Found 281 Language.Haskell.Syntax module dependencies
 GHC.Builtin.Names
 GHC.Builtin.PrimOps
 GHC.Builtin.PrimOps.Ids
@@ -154,6 +154,7 @@ GHC.Runtime.Interpreter.Types
 GHC.Settings
 GHC.Settings.Config
 GHC.Settings.Constants
+GHC.Stg.InferTags.TagSig
 GHC.Stg.Syntax
 GHC.StgToCmm.Config
 GHC.StgToCmm.Types
diff --git a/testsuite/tests/count-deps/CountDepsParser.stdout b/testsuite/tests/count-deps/CountDepsParser.stdout
index c86fb84f86..aa5af3c8c5 100644
--- a/testsuite/tests/count-deps/CountDepsParser.stdout
+++ b/testsuite/tests/count-deps/CountDepsParser.stdout
@@ -1,4 +1,4 @@
-Found 286 GHC.Parser module dependencies
+Found 287 GHC.Parser module dependencies
 GHC.Builtin.Names
 GHC.Builtin.PrimOps
 GHC.Builtin.PrimOps.Ids
@@ -160,6 +160,7 @@ GHC.Runtime.Interpreter.Types
 GHC.Settings
 GHC.Settings.Config
 GHC.Settings.Constants
+GHC.Stg.InferTags.TagSig
 GHC.Stg.Syntax
 GHC.StgToCmm.Config
 GHC.StgToCmm.Types
diff --git a/testsuite/tests/ghci/scripts/T7627.stdout b/testsuite/tests/ghci/scripts/T7627.stdout
index a304546e0f..1a390d152f 100644
--- a/testsuite/tests/ghci/scripts/T7627.stdout
+++ b/testsuite/tests/ghci/scripts/T7627.stdout
@@ -27,13 +27,13 @@ instance (Semigroup a, Semigroup b) => Semigroup (a, b)
 instance Foldable ((,) a) -- Defined in ‘Data.Foldable’
 instance (Bounded a, Bounded b) => Bounded (a, b)
   -- Defined in ‘GHC.Enum’
-instance (Eq a, Eq b) => Eq (a, b) -- Defined in ‘GHC.Classes’
-instance (Ord a, Ord b) => Ord (a, b) -- Defined in ‘GHC.Classes’
 instance (Read a, Read b) => Read (a, b) -- Defined in ‘GHC.Read’
-instance (Show a, Show b) => Show (a, b) -- Defined in ‘GHC.Show’
 instance Monoid a => Applicative ((,) a) -- Defined in ‘GHC.Base’
 instance Functor ((,) a) -- Defined in ‘GHC.Base’
 instance Monoid a => Monad ((,) a) -- Defined in ‘GHC.Base’
+instance (Eq a, Eq b) => Eq (a, b) -- Defined in ‘GHC.Classes’
+instance (Ord a, Ord b) => Ord (a, b) -- Defined in ‘GHC.Classes’
+instance (Show a, Show b) => Show (a, b) -- Defined in ‘GHC.Show’
 type (#,#) :: *
               -> *
               -> TYPE
diff --git a/testsuite/tests/ghci/scripts/ghci011.stdout b/testsuite/tests/ghci/scripts/ghci011.stdout
index 10fd9bc264..d03977e3b5 100644
--- a/testsuite/tests/ghci/scripts/ghci011.stdout
+++ b/testsuite/tests/ghci/scripts/ghci011.stdout
@@ -35,9 +35,9 @@ instance (Semigroup a, Semigroup b) => Semigroup (a, b)
 instance Foldable ((,) a) -- Defined in ‘Data.Foldable’
 instance (Bounded a, Bounded b) => Bounded (a, b)
   -- Defined in ‘GHC.Enum’
+instance (Read a, Read b) => Read (a, b) -- Defined in ‘GHC.Read’
 instance (Eq a, Eq b) => Eq (a, b) -- Defined in ‘GHC.Classes’
 instance (Ord a, Ord b) => Ord (a, b) -- Defined in ‘GHC.Classes’
-instance (Read a, Read b) => Read (a, b) -- Defined in ‘GHC.Read’
 instance (Show a, Show b) => Show (a, b) -- Defined in ‘GHC.Show’
 instance Monoid a => Applicative ((,) a) -- Defined in ‘GHC.Base’
 instance Functor ((,) a) -- Defined in ‘GHC.Base’
diff --git a/testsuite/tests/simplCore/should_compile/T13143.stderr b/testsuite/tests/simplCore/should_compile/T13143.stderr
index 6123f91292..a2c77f86ea 100644
--- a/testsuite/tests/simplCore/should_compile/T13143.stderr
+++ b/testsuite/tests/simplCore/should_compile/T13143.stderr
@@ -68,9 +68,18 @@ Rec {
 -- RHS size: {terms: 28, types: 7, coercions: 0, joins: 0/0}
 T13143.$wg [InlPrag=[2], Occ=LoopBreaker]
   :: Bool -> Bool -> GHC.Prim.Int# -> GHC.Prim.Int#
-[GblId, Arity=3, Str=<1L><1L><L>, Unf=OtherCon []]
+[GblId[StrictWorker([!, !, ~])],
+ Arity=3,
+ Str=<1L><1L><L>,
+ Unf=OtherCon []]
 T13143.$wg
-  = \ (ds :: Bool) (ds1 :: Bool) (ww :: GHC.Prim.Int#) ->
+  = \ (ds
+         :: Bool
+         Unf=OtherCon [])
+      (ds1
+         :: Bool
+         Unf=OtherCon [])
+      (ww :: GHC.Prim.Int#) ->
       case ds of {
         False ->
           case ds1 of {
diff --git a/testsuite/tests/simplCore/should_compile/T20103.hs b/testsuite/tests/simplCore/should_compile/T20103.hs
index e19ca5d127..2e2d6f0df4 100644
--- a/testsuite/tests/simplCore/should_compile/T20103.hs
+++ b/testsuite/tests/simplCore/should_compile/T20103.hs
@@ -2,6 +2,7 @@ module T20103 where
 
 import GHC.Stack
 
+-- foo should receive Arity=2
 foo :: HasCallStack => Int -> Int
 foo 0 = length . fst . head $ getCallStack callStack
 foo n = foo (n-1)
diff --git a/testsuite/tests/simplCore/should_compile/T20103.stderr b/testsuite/tests/simplCore/should_compile/T20103.stderr
index 485d2d4617..f89fe3a8f0 100644
--- a/testsuite/tests/simplCore/should_compile/T20103.stderr
+++ b/testsuite/tests/simplCore/should_compile/T20103.stderr
@@ -11,7 +11,7 @@ lvl = GHC.Types.I# 28#
 -- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
 lvl1 :: Int
 [GblId, Unf=OtherCon []]
-lvl1 = GHC.Types.I# 6#
+lvl1 = GHC.Types.I# 8#
 
 -- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
 lvl2 :: Int
@@ -113,9 +113,15 @@ Rec {
 -- RHS size: {terms: 44, types: 41, coercions: 21, joins: 0/0}
 T20103.$wfoo [InlPrag=[2], Occ=LoopBreaker]
   :: HasCallStack => GHC.Prim.Int# -> GHC.Prim.Int#
-[GblId, Arity=2, Str=<SL><1L>, Unf=OtherCon []]
+[GblId[StrictWorker([!, ~])],
+ Arity=2,
+ Str=<1L><1L>,
+ Unf=OtherCon []]
 T20103.$wfoo
-  = \ ($dIP :: HasCallStack) (ww :: GHC.Prim.Int#) ->
+  = \ ($dIP
+         :: HasCallStack
+         Unf=OtherCon [])
+      (ww :: GHC.Prim.Int#) ->
       case ww of ds {
         __DEFAULT ->
           case $dIP
diff --git a/testsuite/tests/simplCore/should_compile/T4201.stdout b/testsuite/tests/simplCore/should_compile/T4201.stdout
index 1e338e43fd..baeee0b0af 100644
--- a/testsuite/tests/simplCore/should_compile/T4201.stdout
+++ b/testsuite/tests/simplCore/should_compile/T4201.stdout
@@ -1,4 +1,4 @@
   lift :: Foo -> T
-  [HasNoCafRefs, LambdaFormInfo: LFReEntrant 1, Arity: 1,
-   Strictness: <1!A>, CPR: 1,
+  [HasNoCafRefs, TagSig: <TagProper>, LambdaFormInfo: LFReEntrant 1,
+   Arity: 1, Strictness: <1!A>, CPR: 1,
    Unfolding: (bof `cast` (Sym (N:Foo[0]) %<'Many>_N ->_R <T>_R))]
diff --git a/testsuite/tests/simplCore/should_compile/T7360.stderr b/testsuite/tests/simplCore/should_compile/T7360.stderr
index d4b68ee22e..ce0f5117bb 100644
--- a/testsuite/tests/simplCore/should_compile/T7360.stderr
+++ b/testsuite/tests/simplCore/should_compile/T7360.stderr
@@ -1,7 +1,7 @@
 
 ==================== Tidy Core ====================
 Result size of Tidy Core
-  = {terms: 116, types: 50, coercions: 0, joins: 0/0}
+  = {terms: 113, types: 49, coercions: 0, joins: 0/0}
 
 -- RHS size: {terms: 6, types: 3, coercions: 0, joins: 0/0}
 T7360.$WFoo3 [InlPrag=INLINE[final] CONLIKE] :: Int %1 -> Foo
@@ -20,17 +20,16 @@ T7360.$WFoo3
   = \ (conrep [Occ=Once1!] :: Int) ->
       case conrep of { GHC.Types.I# unbx [Occ=Once1] -> T7360.Foo3 unbx }
 
--- RHS size: {terms: 5, types: 2, coercions: 0, joins: 0/0}
+-- RHS size: {terms: 2, types: 1, coercions: 0, joins: 0/0}
 T7360.$wfun1 [InlPrag=NOINLINE] :: Foo -> (# #)
-[GblId, Arity=1, Str=<1A>, Unf=OtherCon []]
-T7360.$wfun1
-  = \ (x :: Foo) -> case x of { __DEFAULT -> GHC.Prim.(##) }
+[GblId[StrictWorker([!])], Arity=1, Str=<A>, Unf=OtherCon []]
+T7360.$wfun1 = \ _ [Occ=Dead] -> GHC.Prim.(##)
 
 -- RHS size: {terms: 6, types: 2, coercions: 0, joins: 0/0}
 fun1 [InlPrag=[final]] :: Foo -> ()
 [GblId,
  Arity=1,
- Str=<1A>,
+ Str=<A>,
  Cpr=1,
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
diff --git a/testsuite/tests/simplCore/should_compile/spec-inline.stderr b/testsuite/tests/simplCore/should_compile/spec-inline.stderr
index 40746ff717..6ecc7340a2 100644
--- a/testsuite/tests/simplCore/should_compile/spec-inline.stderr
+++ b/testsuite/tests/simplCore/should_compile/spec-inline.stderr
@@ -74,13 +74,17 @@ end Rec }
 
 -- RHS size: {terms: 61, types: 18, coercions: 0, joins: 0/0}
 Roman.$wgo [InlPrag=[2]] :: Maybe Int -> Maybe Int -> GHC.Prim.Int#
-[GblId,
+[GblId[StrictWorker([!, !])],
  Arity=2,
  Str=<1L><1L>,
  Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True, Guidance=IF_ARGS [61 30] 249 0}]
 Roman.$wgo
-  = \ (u :: Maybe Int) (ds :: Maybe Int) ->
+  = \ (u :: Maybe Int
+         Unf=OtherCon [])
+      (ds
+         :: Maybe Int
+         Unf=OtherCon []) ->
       case ds of {
         Nothing -> case Roman.foo3 of wild1 { };
         Just x ->
diff --git a/testsuite/tests/simplStg/should_run/all.T b/testsuite/tests/simplStg/should_run/all.T
index 2f7c69f5db..1d017043d6 100644
--- a/testsuite/tests/simplStg/should_run/all.T
+++ b/testsuite/tests/simplStg/should_run/all.T
@@ -18,4 +18,4 @@ test('T13536a',
     compile_and_run,
     [''])
 
-
+test('inferTags001', normal, multimod_compile_and_run, ['inferTags001', 'inferTags001_a'])
diff --git a/testsuite/tests/simplStg/should_run/inferTags001.hs b/testsuite/tests/simplStg/should_run/inferTags001.hs
new file mode 100644
index 0000000000..ba6facaf7e
--- /dev/null
+++ b/testsuite/tests/simplStg/should_run/inferTags001.hs
@@ -0,0 +1,7 @@
+module Main (main, foo) where
+
+import A
+
+main = return ()
+
+foo = g
diff --git a/testsuite/tests/simplStg/should_run/inferTags001_a.hs b/testsuite/tests/simplStg/should_run/inferTags001_a.hs
new file mode 100644
index 0000000000..9fdd07c98c
--- /dev/null
+++ b/testsuite/tests/simplStg/should_run/inferTags001_a.hs
@@ -0,0 +1,9 @@
+{-# OPTIONS_GHC -fno-worker-wrapper #-}
+
+module A where
+
+data T = T !Int !Bool
+
+{-# NOINLINE g #-}
+g :: T -> Int
+g (T x _) = x
diff --git a/testsuite/tests/simplStg/should_run/inferTags001_b.hs b/testsuite/tests/simplStg/should_run/inferTags001_b.hs
new file mode 100644
index 0000000000..6b89406625
--- /dev/null
+++ b/testsuite/tests/simplStg/should_run/inferTags001_b.hs
@@ -0,0 +1,9 @@
+{-# OPTIONS_GHC -fno-worker-wrapper #-}
+
+module B where
+
+-- data T = T !Int !Bool
+
+{-# NOINLINE g #-}
+g :: T -> Int
+g (T x _) = x
diff --git a/testsuite/tests/stranal/should_compile/T20817.stderr b/testsuite/tests/stranal/should_compile/T20817.stderr
index 49fa683b42..ba364234db 100644
--- a/testsuite/tests/stranal/should_compile/T20817.stderr
+++ b/testsuite/tests/stranal/should_compile/T20817.stderr
@@ -237,7 +237,7 @@ $wg [InlPrag=[2],
      Dmd=LCL(C1(C1(C1(C1(C1(C1(!L)))))))]
   :: forall {a} {b} {c} {d} {e} {t} {t} {t}.
      Bool -> a -> b -> c -> t -> t -> t -> (# a, b, c, t, t, t #)
-[LclId,
+[LclId[StrictWorker([!, ~, ~, ~, ~, ~, ~])],
  Arity=7,
  Str=<1L><L><L><L><L><L><L>,
  Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
@@ -252,7 +252,9 @@ $wg
       (@t)
       (@t)
       (@t)
-      (ds [Dmd=1L] :: Bool)
+      (ds [Dmd=1L]
+         :: Bool
+         Unf=OtherCon [])
       (ww :: a)
       (ww :: b)
       (ww :: c)
diff --git a/testsuite/tests/stranal/sigs/T16859.stderr b/testsuite/tests/stranal/sigs/T16859.stderr
index 56c711f807..4e32752e35 100644
--- a/testsuite/tests/stranal/sigs/T16859.stderr
+++ b/testsuite/tests/stranal/sigs/T16859.stderr
@@ -45,10 +45,10 @@ T16859.$tcName:
 T16859.$tcNameSort:
 T16859.$trModule:
 T16859.bar: <1!A><L>
-T16859.baz: <1L><1!L><1C1(L)>
+T16859.baz: <L><1!L><1C1(L)>
 T16859.buz: <1!L>
-T16859.foo: <1L><L>
-T16859.mkInternalName: <1!L><1L><1L>
+T16859.foo: <L><L>
+T16859.mkInternalName: <1!L><L><L>
 T16859.n_loc: <1!P(A,A,A,1L)>
 T16859.n_occ: <1!P(A,1!L,A,A)>
 T16859.n_sort: <1!P(1L,A,A,A)>
diff --git a/testsuite/tests/typecheck/should_compile/T14774.stdout b/testsuite/tests/typecheck/should_compile/T14774.stdout
index e98489e760..f958cd8215 100644
--- a/testsuite/tests/typecheck/should_compile/T14774.stdout
+++ b/testsuite/tests/typecheck/should_compile/T14774.stdout
@@ -1,3 +1,3 @@
-T14774.$p1D :: forall a. D a => C a
+T14774.$p1D [InlPrag=[~]] :: forall a. D a => C a
  RULES: Built in rule for T14774.$p1D: "Class op $p1D"]
 T14774.$p1D
author	Andreas Klebinger <klebinger.andreas@gmx.at>	2021-05-10 22:06:51 +0200
committer	Matthew Pickering <matthewtpickering@gmail.com>	2022-02-12 13:59:41 +0000
commit	0e93023eef174262310737004d398bc7a606939a (patch)
tree	091a34f78b7911d8b38f414ff8eab90796581c47
parent	90a26f8b0dd99129d3fd7fe28127cb69abd46328 (diff)
download	haskell-0e93023eef174262310737004d398bc7a606939a.tar.gz