8 files changed, 522 insertions, 274 deletions
diff --git a/compiler/GHC/Core/Lint.hs b/compiler/GHC/Core/Lint.hs
index 6390d83b1e..040af373ec 100644
--- a/compiler/GHC/Core/Lint.hs
+++ b/compiler/GHC/Core/Lint.hs
@@ -1652,6 +1652,11 @@ lintIdBndr top_lvl bind_site id thing_inside
        ; lintL (not (isCoVarType id_ty))
                (text "Non-CoVar has coercion type" <+> ppr id <+> dcolon <+> ppr id_ty)
 
+       -- Check that the lambda binder has no value or OtherCon unfolding.
+       -- See #21496
+       ; lintL (not (bind_site == LambdaBind && isEvaldUnfolding (idUnfolding id)))
+                (text "Lambda binder with value or OtherCon unfolding.")
+
        ; linted_ty <- addLoc (IdTy id) (lintValueType id_ty)
 
        ; addInScopeId id linted_ty $
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index e1b54cf5da..cb25d5aec3 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -74,7 +74,6 @@ import GHC.Utils.Logger
 
 import Control.Monad
 
-
 {-
 The guts of the simplifier is in this module, but the driver loop for
 the simplifier is in GHC.Core.Opt.Pipeline
@@ -1675,8 +1674,9 @@ simplLamBndr :: SimplEnv -> InBndr -> SimplM (SimplEnv, OutBndr)
 -- Historically this had a special case for when a lambda-binder
 -- could have a stable unfolding;
 -- see Historical Note [Case binders and join points]
--- But now it is much simpler!
-simplLamBndr env bndr = simplBinder env bndr
+-- But now it is much simpler! We now only remove unfoldings.
+-- See Note [Never put `OtherCon` unfoldings on lambda binders]
+simplLamBndr env bndr = simplBinder env (zapIdUnfolding bndr)
 
 simplLamBndrs :: SimplEnv -> [InBndr] -> SimplM (SimplEnv, [OutBndr])
 simplLamBndrs env bndrs = mapAccumLM simplLamBndr env bndrs
@@ -3092,7 +3092,7 @@ simplAlt env scrut' _ case_bndr' cont' (Alt (LitAlt lit) bndrs rhs)
 simplAlt env scrut' _ case_bndr' cont' (Alt (DataAlt con) vs rhs)
   = do  { -- See Note [Adding evaluatedness info to pattern-bound variables]
           let vs_with_evals = addEvals scrut' con vs
-        ; (env', vs') <- simplLamBndrs env vs_with_evals
+        ; (env', vs') <- simplBinders env vs_with_evals
 
                 -- Bind the case-binder to (con args)
         ; let inst_tys' = tyConAppArgs (idType case_bndr')
@@ -3616,37 +3616,59 @@ mkDupableStrictBind env arg_bndr join_rhs res_ty
 mkDupableAlt :: Platform -> OutId
              -> JoinFloats -> OutAlt
              -> SimplM (JoinFloats, OutAlt)
-mkDupableAlt _platform case_bndr jfloats (Alt con bndrs' rhs')
-  | exprIsTrivial rhs'   -- See point (2) of Note [Duplicating join points]
-  = return (jfloats, Alt con bndrs' rhs')
+mkDupableAlt _platform case_bndr jfloats (Alt con alt_bndrs alt_rhs_in)
+  | exprIsTrivial alt_rhs_in   -- See point (2) of Note [Duplicating join points]
+  = return (jfloats, Alt con alt_bndrs alt_rhs_in)
 
   | otherwise
-  = do  { let rhs_ty'  = exprType rhs'
-
-              final_bndrs'
-                | isDeadBinder case_bndr = filter abstract_over bndrs'
-                | otherwise              = bndrs' ++ [case_bndr]
-
-              abstract_over bndr
-                  | isTyVar bndr = True -- Abstract over all type variables just in case
-                  | otherwise    = not (isDeadBinder bndr)
-                        -- The deadness info on the new Ids is preserved by simplBinders
-              final_args = varsToCoreExprs final_bndrs'
+  = do  { let rhs_ty'  = exprType alt_rhs_in
+
+              bangs
+                | DataAlt c <- con
+                = dataConRepStrictness c
+                | otherwise = []
+
+              abstracted_binders = abstract_binders alt_bndrs bangs
+
+              abstract_binders :: [Var] -> [StrictnessMark] -> [(Id,StrictnessMark)]
+              abstract_binders [] []
+                -- Abstract over the case binder too if it's used.
+                | isDeadBinder case_bndr  = []
+                | otherwise               = [(case_bndr,MarkedStrict)]
+              abstract_binders (alt_bndr:alt_bndrs) marks
+                -- Abstract over all type variables just in case
+                | isTyVar alt_bndr        = (alt_bndr,NotMarkedStrict) : abstract_binders alt_bndrs marks
+              abstract_binders (alt_bndr:alt_bndrs) (mark:marks)
+                -- The deadness info on the new Ids is preserved by simplBinders
+                -- We don't abstract over dead ids here.
+                | isDeadBinder alt_bndr   = abstract_binders alt_bndrs marks
+                | otherwise               = (alt_bndr,mark) : abstract_binders alt_bndrs marks
+              abstract_binders _ _ = pprPanic "abstrict_binders - failed to abstract" (ppr $ Alt con alt_bndrs alt_rhs_in)
+
+              filtered_binders = map fst abstracted_binders
+              -- We want to make any binder with an evaldUnfolding strict in the rhs.
+              -- See Note [Call-by-value for worker args] (which also applies to join points)
+              (rhs_with_seqs) = mkStrictFieldSeqs abstracted_binders alt_rhs_in
+
+              final_args = varsToCoreExprs filtered_binders
                            -- Note [Join point abstraction]
 
                 -- We make the lambdas into one-shot-lambdas.  The
                 -- join point is sure to be applied at most once, and doing so
                 -- prevents the body of the join point being floated out by
                 -- the full laziness pass
-              really_final_bndrs     = map one_shot final_bndrs'
+              final_bndrs     = map one_shot filtered_binders
               one_shot v | isId v    = setOneShotLambda v
                          | otherwise = v
-              join_rhs   = mkLams really_final_bndrs rhs'
 
-        ; join_bndr <- newJoinId final_bndrs' rhs_ty'
+              -- No lambda binder has an unfolding, but (currently) case binders can,
+              -- so we must zap them here.
+              join_rhs   = mkLams (map zapIdUnfolding final_bndrs) rhs_with_seqs
+
+        ; join_bndr <- newJoinId filtered_binders rhs_ty'
 
         ; let join_call = mkApps (Var join_bndr) final_args
-              alt'      = Alt con bndrs' join_call
+              alt'      = Alt con alt_bndrs join_call
 
         ; return ( jfloats `addJoinFlts` unitJoinFloat (NonRec join_bndr join_rhs)
                  , alt') }
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index 90f492ffea..f0c361dd54 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -626,41 +626,12 @@ regardless of size; and then we needed a way to turn that *off*.  Now
 that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
 (Used only for PArray, TODO: remove?)
 
-Note [SpecConstr and evaluated unfoldings]
+Note [SpecConstr and strict fields]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-SpecConstr will attach evaldUnfolding unfoldings to function
-arguments representing things that should be fully evaluated
-by the time we execute the RHS.
-
-This primarily concerns strict fields. To give an example in the
-containers package we have a merge function with this specialization:
-
-  "SC:$wmerge01" [2]
-      forall (sc_s5lX :: ghc-prim:GHC.Prim.Int#)
-              (sc_s5lY :: ghc-prim:GHC.Prim.Int#)
-              (sc_s5lZ
-                :: IntMap a_s4UX
-                Unf=OtherCon [])
-              (sc_s5m0
-                :: IntMap a_s4UX
-                Unf=OtherCon [])
-              (sc_s5lW :: ghc-prim:GHC.Prim.Int#)
-              (sc_s5lU :: ghc-prim:GHC.Prim.Int#)
-              (sc_s5lV :: a_s4UX).
-        $wmerge0_s4UK (Data.IntMap.Internal.Tip @a_s4UX sc_s5lU sc_s5lV)
-                      (ghc-prim:GHC.Types.I# sc_s5lW)
-                      (Data.IntMap.Internal.Bin
-                          @a_s4UX sc_s5lX sc_s5lY sc_s5lZ sc_s5m0)
-        = $s$wmerge0_s5m2
-            sc_s5lX sc_s5lY sc_s5lZ sc_s5m0 sc_s5lW sc_s5lU sc_s5lV]
-
-We give sc_s5lZ and sc_s5m0 a evaluated unfolding since they come out of
-strict field fields in the Bin constructor.
-This is especially important since tag inference can then use this
-information to adjust the calling convention of
-`$wmerge0_s4UK` to enforce arguments being passed fully evaluated+tagged.
-See Note [Tag Inference], Note [Strict Worker Ids] for more information on
-how we can take advantage of this.
+We treat strict fields in SpecConstr the same way we do in W/W.
+That is we make the specialized function strict in arguments
+representing strict fields. See Note [Call-by-value for worker args]
+for why we do this.
 
 -----------------------------------------------------
                 Stuff not yet handled
@@ -751,6 +722,7 @@ specConstrProgram guts
       us     <- getUniqueSupplyM
       (_, annos) <- getFirstAnnotations deserializeWithData guts
       this_mod <- getModule
+      -- pprTraceM "specConstrInput" (ppr $ mg_binds guts)
       let binds' = reverse $ fst $ initUs us $ do
                     -- Note [Top-level recursive groups]
                     (env, binds) <- goEnv (initScEnv dflags this_mod annos)
@@ -1738,7 +1710,7 @@ spec_one :: ScEnv
 -}
 
 spec_one env fn arg_bndrs body (call_pat, rule_number)
-  | CP { cp_qvars = qvars, cp_args = pats } <- call_pat
+  | CP { cp_qvars = qvars, cp_args = pats, cp_strict_args = cbv_args } <- call_pat
   = do  { spec_uniq <- getUniqueM
         ; let env1 = extendScSubstList (extendScInScope env qvars)
                                        (arg_bndrs `zip` pats)
@@ -1788,17 +1760,14 @@ spec_one env fn arg_bndrs body (call_pat, rule_number)
                   , (spec_lam_args, spec_call_args, _) <- addVoidWorkerArg spec_lam_args1 []
                       -- needsVoidWorkerArg: usual w/w hack to avoid generating
                       -- a spec_rhs of unlifted type and no args.
-                      -- Unlike W/W we don't turn functions into strict workers
-                      -- immediately here instead letting tidy handle this.
-                      -- For this reason we can ignore the cbv marks.
-                      -- See Note [Strict Worker Ids]. See Note [Tag Inference].
                   , !spec_arity      <- spec_arity1 + 1
                   , !spec_join_arity <- fmap (+ 1) spec_join_arity1
                   = (spec_lam_args,  spec_call_args, spec_arity,  spec_join_arity)
                   | otherwise
                   = (spec_lam_args1, spec_lam_args1, spec_arity1, spec_join_arity1)
 
-              spec_id    = mkLocalId spec_name Many
+              spec_id    = asWorkerLikeId $
+                           mkLocalId spec_name Many
                                      (mkLamTypes spec_lam_args spec_body_ty)
                              -- See Note [Transfer strictness]
                              `setIdDmdSig`    spec_sig
@@ -1807,7 +1776,7 @@ spec_one env fn arg_bndrs body (call_pat, rule_number)
                              `asJoinId_maybe` spec_join_arity
 
                 -- Conditionally use result of new worker-wrapper transform
-              spec_rhs   = mkLams spec_lam_args spec_body
+              spec_rhs   = mkLams spec_lam_args (mkSeqs cbv_args spec_body_ty spec_body)
               rule_rhs   = mkVarApps (Var spec_id) $
                            dropTail (length extra_bndrs) spec_call_args
               inline_act = idInlineActivation fn
@@ -1819,6 +1788,47 @@ spec_one env fn arg_bndrs body (call_pat, rule_number)
                                , os_id = spec_id
                                , os_rhs = spec_rhs }) }
 
+-- See Note [SpecConstr and strict fields]
+mkSeqs :: [Var] -> Type -> CoreExpr -> CoreExpr
+mkSeqs seqees res_ty rhs =
+  foldr addEval rhs seqees
+    where
+      addEval :: Var -> CoreExpr -> CoreExpr
+      addEval arg_id rhs
+        -- Argument representing strict field and it's worth passing via cbv
+        | shouldStrictifyIdForCbv arg_id
+        = Case (Var arg_id) arg_id res_ty ([Alt DEFAULT [] rhs])
+        | otherwise
+        = rhs
+
+
+{- Note [SpecConst needs to add void args first]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider a function
+    f start @t = e
+We want to specialize for a partially applied call `f True`.
+See also Note [SpecConstr call patterns], second Wrinkle.
+Naively we would expect to get
+    $sf @t = $se
+    RULE: f True = $sf
+The specialized function only takes a single type argument
+so we add a void argument to prevent it from turning into
+a thunk. See Note [Protecting the last value argument] for details
+why. Normally we would add the void argument after the
+type argument giving us:
+    $sf :: forall t. Void# -> bla
+    $sf @t void = $se
+    RULE: f True = $sf void# (wrong)
+But if you look closely this wouldn't typecheck!
+If we substitute `f True` with `$sf void#` we expect the type argument to be applied first
+but we apply void# first.
+The easist fix seems to be just to add the void argument to the front of the arguments.
+Now we get:
+    $sf :: Void# -> forall t. bla
+    $sf void @t = $se
+    RULE: f True = $sf void#
+And now we can substitute `f True` with `$sf void#` with everything working out nicely!
+-}
 
 calcSpecInfo :: Id                     -- The original function
              -> CallPat                -- Call pattern
@@ -2081,13 +2091,15 @@ only in kind-casts, but I'm doing the simple thing for now.
 -}
 
 data CallPat = CP { cp_qvars :: [Var]           -- Quantified variables
-                  , cp_args  :: [CoreExpr] }    -- Arguments
+                  , cp_args  :: [CoreExpr]      -- Arguments
+                  , cp_strict_args :: [Var] }   -- Arguments we want to pass unlifted even if they are boxed
      -- See Note [SpecConstr call patterns]
 
 instance Outputable CallPat where
-  ppr (CP { cp_qvars = qvars, cp_args = args })
+  ppr (CP { cp_qvars = qvars, cp_args = args, cp_strict_args =  strict })
     = text "CP" <> braces (sep [ text "cp_qvars =" <+> ppr qvars <> comma
-                               , text "cp_args =" <+> ppr args ])
+                               , text "cp_args =" <+> ppr args
+                               , text "cp_strict_args = " <> ppr strict ])
 
 callsToNewPats :: ScEnv -> Id
                -> SpecInfo
@@ -2200,16 +2212,16 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
 callToPats env bndr_occs call@(Call fn args con_env)
   = do  { let in_scope = substInScope (sc_subst env)
 
-        ; pairs <- zipWith3M (argToPat env in_scope con_env) args bndr_occs (map (const NotMarkedStrict) args)
+        ; arg_tripples <- zipWith3M (argToPat env in_scope con_env) args bndr_occs (map (const NotMarkedStrict) args)
                    -- This zip trims the args to be no longer than
                    -- the lambdas in the function definition (bndr_occs)
 
           -- Drop boring patterns from the end
           -- See Note [SpecConstr call patterns]
-        ; let pairs' | isJoinId fn = pairs
-                     | otherwise   = dropWhileEnd is_boring pairs
-              is_boring (interesting, _) = not interesting
-              (interesting_s, pats) = unzip pairs'
+        ; let arg_tripples' | isJoinId fn = arg_tripples
+                            | otherwise   = dropWhileEnd is_boring arg_tripples
+              is_boring (interesting, _,_) = not interesting
+              (interesting_s, pats, cbv_ids) = unzip3 arg_tripples'
               interesting           = or interesting_s
 
         ; let pat_fvs = exprsFreeVarsList pats
@@ -2254,7 +2266,7 @@ callToPats env bndr_occs call@(Call fn args con_env)
               -- pprTraceM "callToPatsOut" (
               --   text "fun" <> ppr fn $$
               --   ppr (CP { cp_qvars = qvars', cp_args = pats })) >>
-                return (Just (CP { cp_qvars = qvars', cp_args = pats }))
+              return (Just (CP { cp_qvars = qvars', cp_args = pats, cp_strict_args = concat cbv_ids }))
           else return Nothing }
 
     -- argToPat takes an actual argument, and returns an abstracted
@@ -2269,8 +2281,8 @@ argToPat :: ScEnv
          -> CoreArg                     -- A call arg (or component thereof)
          -> ArgOcc
          -> StrictnessMark              -- Tells us if this argument is a strict field of a data constructor
-                                        -- See Note [SpecConstr and evaluated unfoldings]
-         -> UniqSM (Bool, CoreArg)
+                                        -- See Note [SpecConstr and strict fields]
+         -> UniqSM (Bool, CoreArg, [Id])
 
 -- Returns (interesting, pat),
 -- where pat is the pattern derived from the argument
@@ -2295,12 +2307,12 @@ argToPat1 :: ScEnv
   -> Expr CoreBndr
   -> ArgOcc
   -> StrictnessMark
-  -> UniqSM (Bool, Expr CoreBndr)
+  -> UniqSM (Bool, Expr CoreBndr, [Id])
 argToPat1 _env _in_scope _val_env arg@(Type {}) _arg_occ _arg_str
-  = return (False, arg)
+  = return (False, arg, [])
 
-argToPat1 env in_scope val_env (Tick _ arg) arg_occ _arg_str
-  = argToPat env in_scope val_env arg arg_occ _arg_str
+argToPat1 env in_scope val_env (Tick _ arg) arg_occ arg_str
+  = argToPat env in_scope val_env arg arg_occ arg_str
         -- Note [Tick annotations in call patterns]
         -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         -- Ignore Notes.  In particular, we want to ignore any InlineMe notes
@@ -2324,7 +2336,7 @@ argToPat env in_scope val_env (Case scrut _ _ [(_, _, rhs)]) arg_occ
 
 argToPat1 env in_scope val_env (Cast arg co) arg_occ arg_str
   | not (ignoreType env ty2)
-  = do  { (interesting, arg') <- argToPat env in_scope val_env arg arg_occ arg_str
+  = do  { (interesting, arg', strict_args) <- argToPat env in_scope val_env arg arg_occ arg_str
         ; if not interesting then
                 wildCardPat ty2 arg_str
           else do
@@ -2332,7 +2344,7 @@ argToPat1 env in_scope val_env (Cast arg co) arg_occ arg_str
           uniq <- getUniqueM
         ; let co_name = mkSysTvName uniq (fsLit "sg")
               co_var  = mkCoVar co_name (mkCoercionType Representational ty1 ty2)
-        ; return (interesting, Cast arg' (mkCoVarCo co_var)) } }
+        ; return (interesting, Cast arg' (mkCoVarCo co_var), strict_args) } }
   where
     Pair ty1 ty2 = coercionKind co
 
@@ -2368,17 +2380,12 @@ argToPat1 env in_scope val_env arg arg_occ _arg_str
       --       ppr rest_args $$
       --       ppr (map isTypeArg rest_args))
        ; prs <- zipWith3M (argToPat env in_scope val_env) rest_args arg_occs matched_str
-       ; let args' = map snd prs :: [CoreArg]
+       ; let args' = map sndOf3 prs :: [CoreArg]
        ; assertPpr (length con_str == length (filter isRuntimeArg rest_args))
             ( ppr con_str $$ ppr rest_args $$
               ppr (length con_str) $$ ppr (length rest_args)
             ) $ return ()
-      --  ; assert (length con_str == length rest_args) $
-      --    pprTraceM "argToPat"
-      --       ( parens (int $ length con_str) <> ppr con_str  $$
-      --         ppr rest_args $$
-      --         ppr prs)
-       ; return (True, mkConApp dc (ty_args ++ args')) }
+       ; return (True, mkConApp dc (ty_args ++ args'), concat (map thdOf3 prs)) }
   where
     mb_scrut dc = case arg_occ of
                 ScrutOcc bs | Just occs <- lookupUFM bs dc
@@ -2406,7 +2413,7 @@ argToPat1 env in_scope val_env arg arg_occ _arg_str
   --         business of absence analysis, not SpecConstr.)
   --    (b) we know what its value is
   -- In that case it counts as "interesting"
-argToPat1 env in_scope val_env (Var v) arg_occ _arg_str
+argToPat1 env in_scope val_env (Var v) arg_occ arg_str
   | sc_force env || case arg_occ of { ScrutOcc {} -> True
                                     ; UnkOcc      -> False
                                     ; NoOcc       -> False } -- (a)
@@ -2415,7 +2422,8 @@ argToPat1 env in_scope val_env (Var v) arg_occ _arg_str
        -- So sc_keen focused just on f (I# x), where we have freshly-allocated
        -- box that we can eliminate in the caller
   , not (ignoreType env (varType v))
-  = return (True, Var (setStrUnfolding v MarkedStrict))
+  -- See Note [SpecConstr and strict fields]
+  = return (True, Var v, if isMarkedStrict arg_str then [v] else mempty)
   where
     is_value
         | isLocalId v = v `elemInScopeSet` in_scope
@@ -2447,40 +2455,12 @@ argToPat1 env in_scope val_env (Var v) arg_occ _arg_str
 argToPat1 _env _in_scope _val_env arg _arg_occ arg_str
   = wildCardPat (exprType arg) arg_str
 
--- We want the given id to be passed call-by-value if it's MarkedCbv.
--- For some, but not all ids this can be achieved by giving them an OtherCon unfolding.
--- Doesn't touch existing value unfoldings.
--- See Note [SpecConstr and evaluated unfoldings]
-setStrUnfolding :: Id -> StrictnessMark  -> Id
--- setStrUnfolding id str = id
-setStrUnfolding id str
-  -- pprTrace "setStrUnfolding"
-  --   (ppr id <+> ppr (isMarkedCbv str) $$
-  --    ppr (idType id) $$
-  --    text "boxed:" <> ppr (isBoxedType (idType id)) $$
-  --    text "unlifted:" <> ppr (isUnliftedType (idType id))
-  --    )
-  --   False
-  -- = undefined
-  | not (isId id) || isEvaldUnfolding (idUnfolding id)
-  = id
-  | isMarkedStrict str
-  , not $ isUnliftedType (idType id) -- Pointless to stick an evald unfolding on unlifted types
-  = -- trace "setStrUnfolding2" $
-    assert (isId id) $
-    assert (not $ hasCoreUnfolding $ idUnfolding id) $
-    id `setIdUnfolding` evaldUnfolding
-  | otherwise
-  = -- trace "setStrUnfolding3"
-    id
-
-wildCardPat :: Type -> StrictnessMark -> UniqSM (Bool, CoreArg)
+-- | wildCardPats are always boring
+wildCardPat :: Type -> StrictnessMark -> UniqSM (Bool, CoreArg, [Id])
 wildCardPat ty str
-  = do { uniq <- getUniqueM
-       ; let id = mkSysLocalOrCoVar (fsLit "sc") uniq Many ty `setStrUnfolding` str
-       -- See Note [SpecConstr and evaluated unfoldings]
-       -- ; pprTraceM "wildCardPat" (ppr id <+> ppr (idUnfolding id))
-       ; return (False, varToCoreExpr id) }
+  = do { id <- mkSysLocalOrCoVarM (fsLit "sc") Many ty
+       -- ; pprTraceM "wildCardPat" (ppr id' <+> ppr (idUnfolding id'))
+       ; return (False, varToCoreExpr id, if isMarkedStrict str then [id] else []) }
 
 isValue :: ValueEnv -> CoreExpr -> Maybe Value
 isValue _env (Lit lit)
@@ -2536,6 +2516,7 @@ samePat (CP { cp_qvars = vs1, cp_args = as1 })
         (CP { cp_qvars = vs2, cp_args = as2 })
   = all2 same as1 as2
   where
+    -- If the args are the same, their strictness marks will be too so we don't compare those.
     same (Var v1) (Var v2)
         | v1 `elem` vs1 = v2 `elem` vs2
         | v2 `elem` vs2 = False
diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs
index 93c4c31995..ebbd766b71 100644
--- a/compiler/GHC/Core/Opt/WorkWrap.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap.hs
@@ -36,6 +36,7 @@ import GHC.Utils.Panic.Plain
 import GHC.Utils.Monad
 import GHC.Utils.Trace
 import GHC.Unit.Types
+import GHC.Core.DataCon
 
 {-
 We take Core bindings whose binders have:
@@ -537,7 +538,7 @@ tryWW ww_opts is_rec fn_id rhs
   -- See Note [Drop absent bindings]
   | isAbsDmd (demandInfo fn_info)
   , not (isJoinId fn_id)
-  , Just filler <- mkAbsentFiller ww_opts fn_id
+  , Just filler <- mkAbsentFiller ww_opts fn_id NotMarkedStrict
   = return [(new_fn_id, filler)]
 
   -- See Note [Don't w/w INLINE things]
@@ -789,10 +790,10 @@ splitFun ww_opts fn_id rhs
 
 mkWWBindPair :: WwOpts -> Id -> IdInfo
              -> [Var] -> CoreExpr -> Unique -> Divergence
-             -> ([Demand],[CbvMark], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr)
+             -> ([Demand],JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr)
              -> [(Id, CoreExpr)]
 mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
-             (work_demands, cbv_marks :: [CbvMark], join_arity, wrap_fn, work_fn)
+             (work_demands, join_arity, wrap_fn, work_fn)
   = -- pprTrace "mkWWBindPair" (ppr fn_id <+> ppr wrap_id <+> ppr work_id $$ ppr wrap_rhs) $
     [(work_id, work_rhs), (wrap_id, wrap_rhs)]
      -- Worker first, because wrapper mentions it
@@ -822,7 +823,8 @@ mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
       -- worker is join point iff wrapper is join point
       -- (see Note [Don't w/w join points for CPR])
 
-    work_id  = mkWorkerId work_uniq fn_id (exprType work_rhs)
+    work_id  = asWorkerLikeId $
+               mkWorkerId work_uniq fn_id (exprType work_rhs)
                 `setIdOccInfo` occInfo fn_info
                         -- Copy over occurrence info from parent
                         -- Notably whether it's a loop breaker
@@ -847,10 +849,7 @@ mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
                         -- arity is consistent with the demand type goes
                         -- through
 
-                `setIdCbvMarks` cbv_marks
-
                 `asJoinId_maybe` work_join_arity
-                -- `setIdThing` (undefined cbv_marks)
 
     work_arity = length work_demands :: Int
 
@@ -1034,7 +1033,7 @@ splitThunk ww_opts is_rec x rhs
   = assert (not (isJoinId x)) $
     do { let x' = localiseId x -- See comment above
        ; (useful,_args, wrap_fn, fn_arg)
-           <- mkWWstr_one ww_opts x' NotMarkedCbv
+           <- mkWWstr_one ww_opts x' NotMarkedStrict
        ; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ]
        ; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive
                    return res
diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
index 63ac670418..3968a939c4 100644
--- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
@@ -161,7 +161,6 @@ initWwOpts this_mod dflags fam_envs = MkWwOpts
 
 type WwResult
   = ([Demand],              -- Demands for worker (value) args
-     [CbvMark],             -- Cbv semantics for worker (value) args
      JoinArity,             -- Number of worker (type OR value) args
      Id -> CoreExpr,        -- Wrapper body, lacking only the worker Id
      CoreExpr -> CoreExpr)  -- Worker body, lacking the original function rhs
@@ -231,7 +230,7 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr
               zapped_arg_vars = map zap_var arg_vars
               (subst, cloned_arg_vars) = cloneBndrs empty_subst uniq_supply zapped_arg_vars
               res_ty' = GHC.Core.Subst.substTy subst res_ty
-              init_cbv_marks = map (const NotMarkedCbv) cloned_arg_vars
+              init_cbv_marks = map (const NotMarkedStrict) cloned_arg_vars
 
         ; (useful1, work_args_cbv, wrap_fn_str, fn_args)
              <- mkWWstr opts cloned_arg_vars init_cbv_marks
@@ -252,11 +251,13 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr
               call_rhs fn_rhs = mkAppsBeta fn_rhs fn_args
                                   -- See Note [Join points and beta-redexes]
               wrapper_body = mkLams cloned_arg_vars . wrap_fn_cpr . wrap_fn_str . call_work
-              worker_body = mkLams work_lam_args . work_fn_cpr . call_rhs
-              (worker_args_dmds, work_val_cbvs)= unzip [(idDemandInfo v,cbv) | (v,cbv) <- zipEqual "mkWwBodies" work_call_args work_call_cbv, isId v]
+                                  -- See Note [Call-by-value for worker args]
+              work_seq_str_flds = mkStrictFieldSeqs (zip work_lam_args work_call_cbv)
+              worker_body = mkLams work_lam_args . work_seq_str_flds . work_fn_cpr . call_rhs
+              worker_args_dmds= [(idDemandInfo v) | v <- work_call_args, isId v]
 
         ; if ((useful1 && not only_one_void_argument) || useful2)
-          then return (Just (worker_args_dmds, work_val_cbvs, length work_call_args,
+          then return (Just (worker_args_dmds, length work_call_args,
                        wrapper_body, worker_body))
           else return Nothing
         }
@@ -395,15 +396,16 @@ needsVoidWorkerArg fn_id wrap_args work_args
     work_has_barrier    = any is_float_barrier work_args
     needs_float_barrier = wrap_had_barrier && not work_has_barrier
 
--- | Inserts a `Void#` arg before the first value argument (but after leading type args).
-addVoidWorkerArg :: [Var] -> [CbvMark]
+-- | Inserts a `Void#` arg before the first argument.
+--
+-- Why as the first argument? See Note [SpecConst needs to add void args first]
+-- in SpecConstr.
+addVoidWorkerArg :: [Var] -> [StrictnessMark]
                  -> ([Var],     -- Lambda bound args
                      [Var],     -- Args at call site
-                     [CbvMark]) -- cbv semantics for the worker args.
+                     [StrictnessMark]) -- cbv semantics for the worker args.
 addVoidWorkerArg work_args cbv_marks
-  = (ty_args ++ voidArgId:rest, ty_args ++ voidPrimId:rest, NotMarkedCbv:cbv_marks)
-  where
-    (ty_args, rest) = break isId work_args
+  = (voidArgId : work_args, voidPrimId:work_args, NotMarkedStrict:cbv_marks)
 
 {-
 Note [Protecting the last value argument]
@@ -625,7 +627,7 @@ wantToUnboxArg do_unlifting fam_envs ty dmd@(n :* sd)
   -- That is done by 'finaliseArgBoxities'!
   = Unbox (DataConPatContext dc tc_args co) ds
 
-  -- See Note [Strict Worker Ids]
+  -- See Note [CBV Function Ids]
   | do_unlifting
   , isStrUsedDmd dmd
   , not (isFunTy ty)
@@ -796,7 +798,7 @@ code which wasn't fruitful. See https://gitlab.haskell.org/ghc/ghc/-/merge_reque
 We could still try to do C) in the future by having PAP calls which will evaluate the required arguments
 before calling the partially applied function. But this would be neither a small nor simple change so we
 stick with A) and a flag for B) for now.
-See also Note [Tag Inference] and Note [Strict Worker Ids]
+See also Note [Tag Inference] and Note [CBV Function Ids]
 -}
 
 {-
@@ -810,9 +812,9 @@ See also Note [Tag Inference] and Note [Strict Worker Ids]
 mkWWstr :: WwOpts
         -> [Var]                         -- Wrapper args; have their demand info on them
                                          --  *Includes type variables*
-        -> [CbvMark]                     -- cbv info for arguments
+        -> [StrictnessMark]                     -- cbv info for arguments
         -> UniqSM (Bool,                 -- Will this result in a useful worker
-                   [(Var,CbvMark)],      -- Worker args/their call-by-value semantics.
+                   [(Var,StrictnessMark)],      -- Worker args/their call-by-value semantics.
                    CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call
                                          -- and without its lambdas
                                          -- This fn adds the unboxing
@@ -843,24 +845,24 @@ mkWWstr opts args cbv_info
 -- See Note [Worker/wrapper for Strictness and Absence]
 mkWWstr_one :: WwOpts
             -> Var
-            -> CbvMark
-            -> UniqSM (Bool, [(Var,CbvMark)], CoreExpr -> CoreExpr, CoreExpr)
-mkWWstr_one opts arg marked_cbv =
+            -> StrictnessMark
+            -> UniqSM (Bool, [(Var,StrictnessMark)], CoreExpr -> CoreExpr, CoreExpr)
+mkWWstr_one opts arg banged =
   case wantToUnboxArg True fam_envs arg_ty arg_dmd of
     _ | isTyVar arg -> do_nothing
 
     DropAbsent
-      | Just absent_filler <- mkAbsentFiller opts arg
+      | Just absent_filler <- mkAbsentFiller opts arg banged
          -- Absent case.  Dropt the argument from the worker.
          -- We can't always handle absence for arbitrary
          -- unlifted types, so we need to choose just the cases we can
          -- (that's what mkAbsentFiller does)
       -> return (goodWorker, [], nop_fn, absent_filler)
 
-    Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc marked_cbv
+    Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc banged
 
     Unlift -> return  ( wwForUnlifting opts
-                      , [(setIdUnfolding arg evaldUnfolding, MarkedCbv)]
+                      , [(arg, MarkedStrict)]
                       , nop_fn
                       , varToCoreExpr arg)
 
@@ -871,15 +873,16 @@ mkWWstr_one opts arg marked_cbv =
     arg_ty     = idType arg
     arg_dmd    = idDemandInfo arg
     -- Type args don't get cbv marks
-    arg_cbv    = if isTyVar arg then NotMarkedCbv else marked_cbv
+    arg_cbv    = if isTyVar arg then NotMarkedStrict else banged
+
     do_nothing = return (badWorker, [(arg,arg_cbv)], nop_fn, varToCoreExpr arg)
 
 unbox_one_arg :: WwOpts
           -> Var
           -> [Demand]
           -> DataConPatContext
-          -> CbvMark
-          -> UniqSM (Bool, [(Var,CbvMark)], CoreExpr -> CoreExpr, CoreExpr)
+          -> StrictnessMark
+          -> UniqSM (Bool, [(Var,StrictnessMark)], CoreExpr -> CoreExpr, CoreExpr)
 unbox_one_arg opts arg_var ds
           DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args
                             , dcpc_co = co }
@@ -889,37 +892,32 @@ unbox_one_arg opts arg_var ds
              -- Create new arguments we get when unboxing dc
              (ex_tvs', arg_ids) =
                dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args
-             -- Apply str info to new args.
-             arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds
+             con_str_marks = dataConRepStrictness dc
+             -- Apply str info to new args. Also remove OtherCon unfoldings so they don't end up in lambda binders
+             -- of the worker. See Note [Never put `OtherCon` unfoldings on lambda binders]
+             arg_ids' = map zapIdUnfolding $ zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds
              unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var)
                                      dc (ex_tvs' ++ arg_ids')
-             -- Mark arguments coming out of strict fields as evaluated and give them cbv semantics. See Note [Strict Worker Ids]
-             cbv_arg_marks = zipWithEqual "unbox_one_arg" bangToMark (dataConRepStrictness dc) arg_ids'
-             unf_args = zipWith setEvald arg_ids' cbv_arg_marks
-             cbv_marks = (map (const NotMarkedCbv) ex_tvs') ++ cbv_arg_marks
-       ; (_sub_args_quality, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ unf_args) cbv_marks
+             -- Mark arguments coming out of strict fields so we can make the worker strict on those
+             -- argumnets later. seq them later. See Note [Call-by-value for worker args]
+             strict_marks = (map (const NotMarkedStrict) ex_tvs') ++ con_str_marks
+       ; (_sub_args_quality, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids') strict_marks
        ; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co
        ; return (goodWorker, worker_args, unbox_fn . wrap_fn, wrap_arg) }
                           -- Don't pass the arg, rebox instead
-    where bangToMark :: StrictnessMark -> Id -> CbvMark
-          bangToMark NotMarkedStrict _ = NotMarkedCbv
-          bangToMark MarkedStrict v
-            | isUnliftedType (idType v) = NotMarkedCbv
-            | otherwise                 = MarkedCbv
-          setEvald var NotMarkedCbv  = var
-          setEvald var MarkedCbv     = setIdUnfolding var evaldUnfolding
 
 -- | Tries to find a suitable absent filler to bind the given absent identifier
 -- to. See Note [Absent fillers].
 --
 -- If @mkAbsentFiller _ id == Just e@, then @e@ is an absent filler with the
 -- same type as @id@. Otherwise, no suitable filler could be found.
-mkAbsentFiller :: WwOpts -> Id -> Maybe CoreExpr
-mkAbsentFiller opts arg
+mkAbsentFiller :: WwOpts -> Id -> StrictnessMark -> Maybe CoreExpr
+mkAbsentFiller opts arg str
   -- The lifted case: Bind 'absentError' for a nice panic message if we are
   -- wrong (like we were in #11126). See (1) in Note [Absent fillers]
   | mightBeLiftedType arg_ty
-  , not is_strict, not is_evald -- See (2) in Note [Absent fillers]
+  , not is_strict
+  , not (isMarkedStrict str) -- See (2) in Note [Absent fillers]
   = Just (mkAbsentErrorApp arg_ty msg)
 
   -- The default case for mono rep: Bind `RUBBISH[rr] arg_ty`
@@ -930,7 +928,6 @@ mkAbsentFiller opts arg
   where
     arg_ty    = idType arg
     is_strict = isStrictDmd (idDemandInfo arg)
-    is_evald  = isEvaldUnfolding $ idUnfolding arg
 
     msg = renderWithContext
             (defaultSDocContext { sdocSuppressUniques = True })
@@ -1103,29 +1100,28 @@ Needless to say, there are some wrinkles:
 
   2. We also mustn't put an error-thunk (that fills in for an absent value of
      lifted rep) in a strict field, because #16970 establishes the invariant
-     that strict fields are always evaluated, by (re-)evaluating what is put in
+     that strict fields are always evaluated, by possibly (re-)evaluating what is put in
      a strict field. That's the reason why 'zs' binds a rubbish literal instead
      of an error-thunk, see #19133.
 
      How do we detect when we are about to put an error-thunk in a strict field?
-     Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field, but
-     it's quite nasty to thread the marks though 'mkWWstr' and 'mkWWstr_one'.
-     So we rather look out for a necessary condition for strict fields:
+     Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field. So that's
+     what we do!
+
+     There are other necessary conditions for strict fields:
      Note [Unboxing evaluated arguments] in DmdAnal makes it so that the demand on
      'zs' is absent and /strict/: It will get cardinality 'C_10', the empty
-     interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It guarantees
-     we never fill in an error-thunk for an absent strict field.
+     interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It further
+     guarantees e never fill in an error-thunk for an absent strict field.
      But that also means we emit a rubbish lit for other args that have
      cardinality 'C_10' (say, the arg to a bottoming function) where we could've
-     used an error-thunk, but that's a small price to pay for simplicity.
-
-     In #19766, we discovered that even if the binder has eval cardinality
-     'C_00', it may end up in a strict field, with no surrounding seq
-     whatsoever! That happens if the calling code has already evaluated
-     said lambda binder, which will then have an evaluated unfolding
-     ('isEvaldUnfolding'). That in turn tells the Simplifier it is free to drop
-     the seq. So we better don't fill in an error-thunk for eval'd arguments
-     either, just in case it ends up in a strict field!
+     used an error-thunk.
+     NB from Andreas: But I think using an error thunk there would be dodgy no matter what
+     for example if we decide to pass the argument to the bottoming function cbv.
+     As we might do if the function in question is a worker.
+     See Note [CBV Function Ids] in GHC.CoreToStg.Prep. So I just left the strictness check
+     in place on top of threading through the marks from the constructor. It's a *really* cheap
+     and easy check to make anyway.
 
   3. We can only emit a LitRubbish if the arg's type @arg_ty@ is mono-rep, e.g.
      of the form @TYPE rep@ where @rep@ is not (and doesn't contain) a variable.
diff --git a/compiler/GHC/Core/Tidy.hs b/compiler/GHC/Core/Tidy.hs
index 63473ca68a..3f6c212f49 100644
--- a/compiler/GHC/Core/Tidy.hs
+++ b/compiler/GHC/Core/Tidy.hs
@@ -19,10 +19,9 @@ import GHC.Core
 import GHC.Core.Type
 
 import GHC.Core.Seq ( seqUnfolding )
-import GHC.Core.Utils ( computeCbvInfo )
 import GHC.Types.Id
 import GHC.Types.Id.Info
-import GHC.Types.Demand ( zapDmdEnvSig )
+import GHC.Types.Demand ( zapDmdEnvSig, isStrUsedDmd )
 import GHC.Core.Coercion ( tidyCo )
 import GHC.Types.Var
 import GHC.Types.Var.Env
@@ -35,6 +34,12 @@ import GHC.Types.Tickish
 import GHC.Data.Maybe
 import GHC.Utils.Misc
 import Data.List (mapAccumL)
+-- import GHC.Utils.Trace
+import GHC.Utils.Outputable
+import GHC.Types.RepType (typePrimRep)
+import GHC.Utils.Panic
+import GHC.Types.Basic (isMarkedCbv, CbvMark (..))
+import GHC.Core.Utils (shouldUseCbvForId)
 
 {-
 ************************************************************************
@@ -68,11 +73,16 @@ tidyBind env (Rec prs)
 
 -- Note [Attaching CBV Marks to ids]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- Before tidy, function arguments which have a call-by-value semantics are identified
--- by having an `OtherCon[]` unfolding. During tidy, we transform this information into CBV (call-by-value)
--- marks. The marks themselves then are put onto the function id itself.
+-- See Note [CBV Function Ids] for the *why*.
+-- Before tidy, we turn all worker functions into worker like ids.
+-- This way we can later tell if we can assume the existence of a wrapper. This also applies to
+-- specialized versions of functions generated by SpecConstr for which we, in a sense,
+-- consider the unspecialized version to be the wrapper.
+-- During tidy we take the demands on the arguments for these ids and compute
+-- CBV (call-by-value) semantics for each individual argument.
+-- The marks themselves then are put onto the function id itself.
 -- This means the code generator can get the full calling convention by only looking at the function
--- itself without having to inspect the RHS for potential argument unfoldings.
+-- itself without having to inspect the RHS.
 --
 -- The actual logic is in tidyCbvInfo and takes:
 -- * The function id
@@ -84,9 +94,9 @@ tidyBind env (Rec prs)
 --
 -- Not that we *have* to look at the untidied rhs.
 -- During tidying some knot-tying occurs which can blow up
--- if we look at the types of the arguments, but here we dont:
--- we only check if the manifest lambdas have OtherCon unfoldings
--- and these remain valid post tidy.
+-- if we look at the post-tidy types of the arguments here.
+-- However we only care if the types are unlifted and that doesn't change during tidy.
+-- so we can just look at the untidied types.
 --
 -- If the id is boot-exported we don't use a cbv calling convention via marks,
 -- as the boot file won't contain them. Which means code calling boot-exported
@@ -95,21 +105,91 @@ tidyBind env (Rec prs)
 -- To be able to avoid this we pass a set of boot exported ids for this module around.
 -- For non top level ids we can skip this. Local ids are never boot-exported
 -- as boot files don't have unfoldings. So there this isn't a concern.
--- See also Note [Strict Worker Ids]
+-- See also Note [CBV Function Ids]
 
 
--- See Note [Attaching CBV Marks to ids]
+-- See Note [CBV Function Ids]
 tidyCbvInfoTop :: HasDebugCallStack => NameSet -> Id -> CoreExpr -> Id
 tidyCbvInfoTop boot_exports id rhs
   -- Can't change calling convention for boot exported things
   | elemNameSet (idName id) boot_exports = id
   | otherwise = computeCbvInfo id rhs
 
--- See Note [Attaching CBV Marks to ids]
+-- See Note [CBV Function Ids]
 tidyCbvInfoLocal :: HasDebugCallStack => Id -> CoreExpr -> Id
 tidyCbvInfoLocal id rhs
   | otherwise = computeCbvInfo id rhs
 
+-- | For a binding we:
+-- * Look at the args
+-- * Mark any argument as call-by-value if:
+--   - It's argument to a worker and demanded strictly
+--   - Unless it's an unlifted type already
+-- * Update the id
+-- See Note [CBV Function Ids]
+-- See Note [Attaching CBV Marks to ids]
+
+computeCbvInfo :: HasCallStack
+               => Id            -- The function
+               -> CoreExpr      -- It's RHS
+               -> Id
+-- computeCbvInfo fun_id rhs = fun_id
+computeCbvInfo fun_id rhs
+  | (isWorkerLike || isJoinId fun_id) &&  (valid_unlifted_worker val_args)
+  =
+    -- pprTrace "computeCbvInfo"
+    --   (text "fun" <+> ppr fun_id $$
+    --     text "arg_tys" <+> ppr (map idType val_args) $$
+
+    --     text "prim_rep" <+> ppr (map typePrimRep_maybe $ map idType val_args) $$
+    --     text "rrarg" <+> ppr (map isRuntimeVar val_args) $$
+    --     text "cbv_marks" <+> ppr cbv_marks $$
+    --     text "out_id" <+> ppr cbv_bndr $$
+    --     ppr rhs)
+      cbv_bndr
+  | otherwise = fun_id
+  where
+    val_args = filter isId . fst $ collectBinders rhs
+    cbv_marks =
+      -- CBV marks are only set during tidy so none should be present already.
+      assertPpr (maybe True null $ idCbvMarks_maybe fun_id) (ppr fun_id <+> (ppr $ idCbvMarks_maybe fun_id) $$ ppr rhs) $
+      map mkMark val_args
+    cbv_bndr
+        | valid_unlifted_worker val_args
+        , any isMarkedCbv cbv_marks
+        -- seqList to avoid retaining the original rhs
+        = cbv_marks `seqList` setIdCbvMarks fun_id cbv_marks
+        | otherwise =
+          -- pprTraceDebug "tidyCbvInfo: Worker seems to take unboxed tuple/sum types!" (ppr fun_id <+> ppr rhs)
+          asNonWorkerLikeId fun_id
+    -- We don't set CBV marks on functions which take unboxed tuples or sums as arguments.
+    -- Doing so would require us to compute the result of unarise here in order to properly determine
+    -- argument positions at runtime.
+    -- In practice this doesn't matter much. Most "interesting" functions will get a W/W split which will eliminate
+    -- unboxed tuple arguments, and unboxed sums are rarely used. But we could change this in the future and support
+    -- unboxed sums/tuples as well.
+    valid_unlifted_worker args =
+      -- pprTrace "valid_unlifted" (ppr fun_id $$ ppr args) $
+      all isSingleUnarisedArg args
+    isSingleUnarisedArg v
+      | isUnboxedSumType ty = False
+      | isUnboxedTupleType ty = isSimplePrimRep (typePrimRep ty)
+      | otherwise = isSimplePrimRep (typePrimRep ty)
+      where
+        ty = idType v
+        isSimplePrimRep []  = True
+        isSimplePrimRep [_] = True
+        isSimplePrimRep _   = False
+
+    mkMark arg
+      | not $ shouldUseCbvForId arg = NotMarkedCbv
+      -- We can only safely use cbv for strict arguments
+      | (isStrUsedDmd (idDemandInfo arg))
+      , not (isDeadEndId fun_id) = MarkedCbv
+      | otherwise = NotMarkedCbv
+
+    isWorkerLike = isWorkerLikeId fun_id
+
 ------------  Expressions  --------------
 tidyExpr :: TidyEnv -> CoreExpr -> CoreExpr
 tidyExpr env (Var v)       = Var (tidyVarOcc env v)
@@ -215,7 +295,7 @@ tidyIdBndr env@(tidy_env, var_env) id
                                  `setOneShotInfo` oneShotInfo old_info
         old_info = idInfo id
         old_unf  = realUnfoldingInfo old_info
-        new_unf  = zapUnfolding old_unf  -- See Note [Preserve evaluatedness]
+        new_unf  = trimUnfolding old_unf  -- See Note [Preserve evaluatedness]
     in
     ((tidy_env', var_env'), id')
    }
@@ -266,7 +346,7 @@ tidyLetBndr rec_tidy_env env@(tidy_env, var_env) id
 
         old_unf = realUnfoldingInfo old_info
         new_unf | isStableUnfolding old_unf = tidyUnfolding rec_tidy_env old_unf old_unf
-                | otherwise                 = zapUnfolding old_unf
+                | otherwise                 = trimUnfolding old_unf
                                               -- See Note [Preserve evaluatedness]
 
     in
diff --git a/compiler/GHC/Core/Type.hs b/compiler/GHC/Core/Type.hs
index 927b7be88f..6eb247db16 100644
--- a/compiler/GHC/Core/Type.hs
+++ b/compiler/GHC/Core/Type.hs
@@ -1502,7 +1502,7 @@ tyConAppArgs_maybe ty = case coreFullView ty of
     -> Just [w, rep1, rep2, arg, res]
   _ -> Nothing
 
-tyConAppArgs :: Type -> [Type]
+tyConAppArgs :: HasCallStack => Type -> [Type]
 tyConAppArgs ty = tyConAppArgs_maybe ty `orElse` pprPanic "tyConAppArgs" (ppr ty)
 
 tyConAppArgN :: Int -> Type -> Type
diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs
index 90f8f3f032..4fa3e84bb2 100644
--- a/compiler/GHC/Core/Utils.hs
+++ b/compiler/GHC/Core/Utils.hs
@@ -57,7 +57,7 @@ module GHC.Core.Utils (
         isJoinBind,
 
         -- * Tag inference
-        computeCbvInfo,
+        mkStrictFieldSeqs, shouldStrictifyIdForCbv, shouldUseCbvForId,
 
         -- * unsafeEqualityProof
         isUnsafeEqualityProof,
@@ -95,9 +95,9 @@ import GHC.Types.Literal
 import GHC.Types.Tickish
 import GHC.Types.Id
 import GHC.Types.Id.Info
+import GHC.Types.Basic( Arity, Levity(..)
+                       )
 import GHC.Types.Unique
-import GHC.Types.Basic     ( Arity, CbvMark(..), Levity(..)
-                           , isMarkedCbv )
 import GHC.Types.Unique.Set
 
 import GHC.Data.FastString
@@ -118,6 +118,8 @@ import Data.Function       ( on )
 import Data.List           ( sort, sortBy, partition, zipWith4, mapAccumL )
 import Data.Ord            ( comparing )
 import qualified Data.Set as Set
+import GHC.Types.RepType (isZeroBitTy)
+import GHC.Types.Demand (isStrictDmd, isAbsDmd)
 
 {-
 ************************************************************************
@@ -2517,7 +2519,7 @@ canEtaReduceToArity fun dest_join_arity dest_arity =
 
     || ( dest_arity < idCbvMarkArity fun )
        -- Don't undersaturate StrictWorkerIds.
-       -- See Note [Strict Worker Ids] in GHC.CoreToStg.Prep.
+       -- See Note [CBV Function Ids]  in GHC.CoreToStg.Prep.
 
     ||  isLinearType (idType fun)
        -- Don't perform eta reduction on linear types.
@@ -2644,75 +2646,238 @@ dumpIdInfoOfProgram dump_locals ppr_id_info binds = vcat (map printId ids)
 ************************************************************************
 -}
 
--- | For a binding we:
--- * Look at the args
--- * Mark any with Unf=OtherCon[] as call-by-value, unless it's an unlifted type already.
--- * Potentially combine it with existing call-by-value marks (from ww)
--- * Update the id
--- See Note [Attaching CBV Marks to ids].
-computeCbvInfo :: HasCallStack
-               => Id            -- The function
-               -> CoreExpr      -- It's RHS
-               -> Id
-computeCbvInfo id rhs =
-  -- pprTrace "computeCbv" (hang (ppr id) 2 (ppr dmd $$ ppr dmds)) $
-  -- TODO: For perf reasons we could skip looking at non VanillaId/StrictWorkerId/JoinId bindings
-  cbv_bndr
-  where
-    (_,val_args,_body) = collectTyAndValBinders rhs
-    new_marks = mkCbvMarks val_args
-    cbv_marks = assertPpr (checkMarks id new_marks)
-        (ppr id <+> ppr (idType id) $$ text "old:" <> ppr (idCbvMarks_maybe id) $$ text "new:" <> ppr new_marks $$ text "rhs:" <> ppr rhs)
-        new_marks
-    cbv_bndr
-        | valid_unlifted_worker val_args
-        -- Avoid retaining the original rhs
-        = cbv_marks `seqList` setIdCbvMarks id cbv_marks
-        | otherwise =
-          -- pprTraceDebug "tidyCbvInfo: Worker seems to take unboxed tuple/sum types!" (ppr id <+> ppr rhs)
-          id
-    -- We don't set CBV marks on workers which take unboxed tuples or sums as arguments.
-    -- Doing so would require us to compute the result of unarise here in order to properly determine
-    -- argument positions at runtime.
-    -- In practice this doesn't matter much. Most "interesting" functions will get a W/W split which will eliminate
-    -- unboxed tuple arguments, and unboxed sums are rarely used.
-    valid_unlifted_worker args =
-      -- pprTrace "valid_unlifted" (ppr id $$ ppr args) $
-      not $ (any (\arg -> isMultiValArg arg) args)
-    isMultiValArg id =
-      let ty = idType id
-      in not (isStateType ty) && (isUnboxedTupleType ty || isUnboxedSumType ty)
-    -- Only keep relevant marks. We *don't* have to cover all arguments. Only these
-    -- that we might want to pass call-by-value.
-    trimMarks :: [CbvMark] -> [Id] -> [CbvMark]
-    trimMarks marks val_args =
-        map fst .
-          -- Starting at the end, drop all non-cbv marks, and marks applied to unlifted types
-          dropWhileEndLE (\(m,v) -> not (isMarkedCbv m) || isUnliftedType (idType v)) $
-            -- NB: function arguments must have a fixed RuntimeRep, so isUnliftedType can't crash.
-          zip marks val_args
-
-    mkCbvMarks :: ([Id]) -> [CbvMark]
-    mkCbvMarks = map mkMark
-      where
-        cbv_arg arg = isEvaldUnfolding (idUnfolding arg)
-        mkMark arg
-          | cbv_arg arg
-          , not $ isUnliftedType (idType arg)
-            -- NB: isUnliftedType can't crash here as function arguments have a fixed RuntimeRep
-          = MarkedCbv
-          | otherwise
-          = NotMarkedCbv
-    -- If we determined earlier one an argument should be passed cbv it should
-    -- still be so here.
-    checkMarks id new_marks
-      | Just old_marks <- idCbvMarks_maybe id
-      = length (trimMarks old_marks val_args) <= length new_marks &&
-        and (zipWith checkNewMark old_marks new_marks)
-      | otherwise = True
-    checkNewMark old new =
-      isMarkedCbv new || (not $ isMarkedCbv old)
+{- Note [Call-by-value for worker args]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we unbox a constructor with strict fields we want to
+preserve the information that some of the arguments came
+out of strict fields and therefore should be already properly
+tagged, however we can't express this directly in core.
+
+Instead what we do is generate a worker like this:
+
+  data T = MkT A !B
+
+  foo = case T of MkT a b -> $wfoo a b
+
+  $wfoo a b = case b of b' -> rhs[b/b']
+
+This makes the worker strict in b causing us to use a more efficient
+calling convention for `b` where the caller needs to ensure `b` is
+properly tagged and evaluated before it's passed to $wfoo. See Note [CBV Function Ids].
+
+Usually the argument will be known to be properly tagged at the call site so there is
+no additional work for the caller and the worker can be more efficient since it can
+assume the presence of a tag.
+
+This is especially true for recursive functions like this:
+    -- myPred expect it's argument properly tagged
+    myPred !x = ...
+
+    loop :: MyPair -> Int
+    loop (MyPair !x !y) =
+        case x of
+            A -> 1
+            B -> 2
+            _ -> loop (MyPair (myPred x) (myPred y))
+
+Here we would ordinarily not be strict in y after unboxing.
+However if we pass it as a regular argument then this means on
+every iteration of loop we will incur an extra seq on y before
+we can pass it to `myPred` which isn't great! That is in STG after
+tag inference we get:
+
+    Rec {
+    Find.$wloop [InlPrag=[2], Occ=LoopBreaker]
+      :: Find.MyEnum -> Find.MyEnum -> GHC.Prim.Int#
+    [GblId[StrictWorker([!, ~])],
+    Arity=2,
+    Str=<1L><ML>,
+    Unf=OtherCon []] =
+        {} \r [x y]
+            case x<TagProper> of x' [Occ=Once1] {
+              __DEFAULT ->
+                  case y of y' [Occ=Once1] {
+                  __DEFAULT ->
+                  case Find.$wmyPred y' of pred_y [Occ=Once1] {
+                  __DEFAULT ->
+                  case Find.$wmyPred x' of pred_x [Occ=Once1] {
+                  __DEFAULT -> Find.$wloop pred_x pred_y;
+                  };
+                  };
+              Find.A -> 1#;
+              Find.B -> 2#;
+            };
+    end Rec }
+
+Here comes the tricky part: If we make $wloop strict in both x/y and we get:
+
+    Rec {
+    Find.$wloop [InlPrag=[2], Occ=LoopBreaker]
+      :: Find.MyEnum -> Find.MyEnum -> GHC.Prim.Int#
+    [GblId[StrictWorker([!, !])],
+    Arity=2,
+    Str=<1L><!L>,
+    Unf=OtherCon []] =
+        {} \r [x y]
+            case y<TagProper> of y' [Occ=Once1] { __DEFAULT ->
+            case x<TagProper> of x' [Occ=Once1] {
+              __DEFAULT ->
+                  case Find.$wmyPred y' of pred_y [Occ=Once1] {
+                  __DEFAULT ->
+                  case Find.$wmyPred x' of pred_x [Occ=Once1] {
+                  __DEFAULT -> Find.$wloop pred_x pred_y;
+                  };
+                  };
+              Find.A -> 1#;
+              Find.B -> 2#;
+            };
+    end Rec }
+
+Here both x and y are known to be tagged in the function body since we pass strict worker args using unlifted cbv.
+This means the seqs on x and y both become no-ops and compared to the first version the seq on `y` disappears at runtime.
+
+The downside is that the caller of $wfoo potentially has to evaluate `y` once if we can't prove it isn't already evaluated.
+But y coming out of a strict field is in WHNF so safe to evaluated. And most of the time it will be properly tagged+evaluated
+already at the call site because of the Strict Field Invariant! See Note [Strict Field Invariant] for more in this.
+This makes GHC itself around 1% faster despite doing slightly more work! So this is generally quite good.
+
+We only apply this when we think there is a benefit in doing so however. There are a number of cases in which
+it would be useless to insert an extra seq. ShouldStrictifyIdForCbv tries to identify these to avoid churn in the
+simplifier. See Note [Which Ids should be strictified] for details on this.
+-}
+mkStrictFieldSeqs :: [(Id,StrictnessMark)] -> CoreExpr -> (CoreExpr)
+mkStrictFieldSeqs args rhs =
+  foldr addEval rhs args
+    where
+      case_ty = exprType rhs
+      addEval :: (Id,StrictnessMark) -> (CoreExpr) -> (CoreExpr)
+      addEval (arg_id,arg_cbv) (rhs)
+        -- Argument representing strict field.
+        | isMarkedStrict arg_cbv
+        , shouldStrictifyIdForCbv arg_id
+        -- Make sure to remove unfoldings here to avoid the simplifier dropping those for OtherCon[] unfoldings.
+        = Case (Var $! zapIdUnfolding arg_id) arg_id case_ty ([Alt DEFAULT [] rhs])
+        -- Normal argument
+        | otherwise = do
+          rhs
+
+{- Note [Which Ids should be strictified]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For some arguments we would like to convince GHC to pass them call by value.
+One way to achieve this is described in see Note [Call-by-value for worker args].
+
+We separate the concerns of "should we pass this argument using cbv" and
+"should we do so by making the rhs strict in this argument".
+This note deals with the second part.
+
+There are multiple reasons why we might not want to insert a seq in the rhs to
+strictify a functions argument:
+
+1) The argument doesn't exist at runtime.
+
+For zero width types (like Types) there is no benefit as we don't operate on them
+at runtime at all. This includes things like void#, coercions and state tokens.
+
+2) The argument is a unlifted type.
+
+If the argument is a unlifted type the calling convention already is explicitly
+cbv. This means inserting a seq on this argument wouldn't do anything as the seq
+would be a no-op *and* it wouldn't affect the calling convention.
+
+3) The argument is absent.
+
+If the argument is absent in the body there is no advantage to it being passed as
+cbv to the function. The function won't ever look at it so we don't safe any work.
+
+This mostly happens for join point. For example we might have:
+
+    data T = MkT ![Int] [Char]
+    f t = case t of MkT xs{strict} ys-> snd (xs,ys)
+
+and abstract the case alternative to:
+
+    f t = join j1 = \xs ys -> snd (xs,ys)
+          in case t of MkT xs{strict} ys-> j1 xs xy
+
+While we "use" xs inside `j1` it's not used inside the function `snd` we pass it to.
+In short a absent demand means neither our RHS, nor any function we pass the argument
+to will inspect it. So there is no work to be saved by forcing `xs` early.
 
+NB: There is an edge case where if we rebox we *can* end up seqing an absent value.
+Note [Absent fillers] has an example of this. However this is so rare it's not worth
+caring about here.
+
+4) The argument is already strict.
+
+Consider this code:
+
+    data T = MkT ![Int]
+    f t = case t of MkT xs{strict} -> reverse xs
+
+The `xs{strict}` indicates that `xs` is used strictly by the `reverse xs`.
+If we do a w/w split, and add the extra eval on `xs`, we'll get
+
+    $wf xs =
+        case xs of xs1 ->
+            let t = MkT xs1 in
+            case t of MkT xs2 -> reverse xs2
+
+That's not wrong; but the w/w body will simplify to
+
+    $wf xs = case xs of xs1 -> reverse xs1
+
+and now we'll drop the `case xs` because `xs1` is used strictly in its scope.
+Adding that eval was a waste of time.  So don't add it for strictly-demanded Ids.
+
+5) Functions
+
+Functions are tricky (see Note [TagInfo of functions] in InferTags).
+But the gist of it even if we make a higher order function argument strict
+we can't avoid the tag check when it's used later in the body.
+So there is no benefit.
+
+-}
+-- | Do we expect there to be any benefit if we make this var strict
+-- in order for it to get treated as as cbv argument?
+-- See Note [Which Ids should be strictified]
+-- See Note [CBV Function Ids] for more background.
+shouldStrictifyIdForCbv :: Var -> Bool
+shouldStrictifyIdForCbv = wantCbvForId False
+
+-- Like shouldStrictifyIdForCbv but also wants to use cbv for strict args.
+shouldUseCbvForId :: Var -> Bool
+shouldUseCbvForId = wantCbvForId True
+
+-- When we strictify we want to skip strict args otherwise the logic is the same
+-- as for shouldUseCbvForId so we common up the logic here.
+-- Basically returns true if it would be benefitial for runtime to pass this argument
+-- as CBV independent of weither or not it's correct. E.g. it might return true for lazy args
+-- we are not allowed to force.
+wantCbvForId :: Bool -> Var -> Bool
+wantCbvForId cbv_for_strict v
+  -- Must be a runtime var.
+  -- See Note [Which Ids should be strictified] point 1)
+  | isId v
+  , not $ isZeroBitTy ty
+  -- Unlifted things don't need special measures to be treated as cbv
+  -- See Note [Which Ids should be strictified] point 2)
+  , mightBeLiftedType ty
+  -- Functions sometimes get a zero tag so we can't eliminate the tag check.
+  -- See Note [TagInfo of functions] in InferTags.
+  -- See Note [Which Ids should be strictified] point 5)
+  , not $ isFunTy ty
+  -- If the var is strict already a seq is redundant.
+  -- See Note [Which Ids should be strictified] point 4)
+  , not (isStrictDmd dmd) || cbv_for_strict
+  -- If the var is absent a seq is almost always useless.
+  -- See Note [Which Ids should be strictified] point 3)
+  , not (isAbsDmd dmd)
+  = True
+  | otherwise
+  = False
+  where
+    ty = idType v
+    dmd = idDemandInfo v
 
 {- *********************************************************************
 *                                                                      *