summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/GHC/Builtin/PrimOps.hs2
-rw-r--r--compiler/GHC/Core/Make.hs1
-rw-r--r--compiler/GHC/Core/Opt/CprAnal.hs36
-rw-r--r--compiler/GHC/Core/Opt/DmdAnal.hs271
-rw-r--r--compiler/GHC/Core/Opt/Pipeline.hs1
-rw-r--r--compiler/GHC/Core/Opt/SpecConstr.hs2
-rw-r--r--compiler/GHC/Core/Opt/WorkWrap.hs5
-rw-r--r--compiler/GHC/Core/Opt/WorkWrap/Utils.hs385
-rw-r--r--compiler/GHC/Driver/Session.hs6
-rw-r--r--compiler/GHC/Types/Basic.hs6
-rw-r--r--compiler/GHC/Types/Demand.hs755
-rw-r--r--compiler/GHC/Types/Id/Make.hs9
-rw-r--r--compiler/GHC/Utils/Misc.hs23
13 files changed, 977 insertions, 525 deletions
diff --git a/compiler/GHC/Builtin/PrimOps.hs b/compiler/GHC/Builtin/PrimOps.hs
index 40f5e65605..164d1e2ea7 100644
--- a/compiler/GHC/Builtin/PrimOps.hs
+++ b/compiler/GHC/Builtin/PrimOps.hs
@@ -38,7 +38,7 @@ import GHC.Builtin.Names ( gHC_PRIMOPWRAPPERS )
import GHC.Core.TyCon ( TyCon, isPrimTyCon, PrimRep(..) )
import GHC.Core.Type
import GHC.Types.RepType ( tyConPrimRep1 )
-import GHC.Types.Basic ( Arity, Boxity(..) )
+import GHC.Types.Basic ( Arity )
import GHC.Types.Fixity ( Fixity(..), FixityDirection(..) )
import GHC.Types.SrcLoc ( wiredInSrcSpan )
import GHC.Types.ForeignCall ( CLabelString )
diff --git a/compiler/GHC/Core/Make.hs b/compiler/GHC/Core/Make.hs
index b43344a92c..3387523e20 100644
--- a/compiler/GHC/Core/Make.hs
+++ b/compiler/GHC/Core/Make.hs
@@ -65,7 +65,6 @@ import GHC.Types.Demand
import GHC.Types.Name hiding ( varName )
import GHC.Types.Literal
import GHC.Types.Unique.Supply
-import GHC.Types.Basic
import GHC.Core
import GHC.Core.Utils ( exprType, needsCaseBinding, mkSingleAltCase, bindNonRec )
diff --git a/compiler/GHC/Core/Opt/CprAnal.hs b/compiler/GHC/Core/Opt/CprAnal.hs
index d3f6a248ce..65468cd037 100644
--- a/compiler/GHC/Core/Opt/CprAnal.hs
+++ b/compiler/GHC/Core/Opt/CprAnal.hs
@@ -21,14 +21,13 @@ import GHC.Types.Id.Info
import GHC.Types.Demand
import GHC.Types.Cpr
-import GHC.Core.DataCon
import GHC.Core.FamInstEnv
-import GHC.Core.Multiplicity
-import GHC.Core.Opt.WorkWrap.Utils
+import GHC.Core.DataCon
import GHC.Core.Type
import GHC.Core.Utils
import GHC.Core
import GHC.Core.Seq
+import GHC.Core.Opt.WorkWrap.Utils
import GHC.Data.Graph.UnVar -- for UnVarSet
@@ -639,30 +638,23 @@ nonVirgin env = env { ae_virgin = False }
-- See Note [CPR for binders that will be unboxed].
extendSigEnvForArg :: AnalEnv -> Id -> AnalEnv
extendSigEnvForArg env id
- = extendSigEnv env id (CprSig (argCprType env (idType id) (idDemandInfo id)))
+ = extendSigEnv env id (CprSig (argCprType (idDemandInfo id)))
-- | Produces a 'CprType' according to how a strict argument will be unboxed.
-- Examples:
--
--- * A head-strict demand @1L@ on @Int@ would translate to @1@
--- * A product demand @1P(1L,L)@ on @(Int, Bool)@ would translate to @1(1,)@
--- * A product demand @1P(1L,L)@ on @(a , Bool)@ would translate to @1(,)@,
--- because the unboxing strategy would not unbox the @a@.
-argCprType :: AnalEnv -> Type -> Demand -> CprType
-argCprType env arg_ty dmd = CprType 0 (go arg_ty dmd)
+-- * A head-strict demand @1!L@ would translate to @1@
+-- * A product demand @1!P(1!L,L)@ would translate to @1(1,)@
+-- * A product demand @1!P(1L,L)@ would translate to @1(,)@,
+-- because the first field will not be unboxed.
+argCprType :: Demand -> CprType
+argCprType dmd = CprType 0 (go dmd)
where
- go ty dmd
- | Unbox (DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args }) ds
- <- wantToUnboxArg (ae_fam_envs env) MaybeArgOfInlineableFun ty dmd
- -- No existentials; see Note [Which types are unboxed?])
- -- Otherwise we'd need to call dataConRepInstPat here and thread a
- -- UniqSupply. So argCprType is a bit less aggressive than it could
- -- be, for the sake of coding convenience.
- , null (dataConExTyCoVars dc)
- , let arg_tys = map scaledThing (dataConInstArgTys dc tc_args)
- = ConCpr (dataConTag dc) (zipWith go arg_tys ds)
- | otherwise
- = topCpr
+ go (n :* sd)
+ | isAbs n = topCpr
+ | Prod Unboxed ds <- sd = ConCpr fIRST_TAG (strictMap go ds)
+ | Poly Unboxed _ <- sd = ConCpr fIRST_TAG []
+ | otherwise = topCpr
{- Note [Safe abortion in the fixed-point iteration]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs
index 5f209701a9..fa4bed48f0 100644
--- a/compiler/GHC/Core/Opt/DmdAnal.hs
+++ b/compiler/GHC/Core/Opt/DmdAnal.hs
@@ -32,7 +32,8 @@ import GHC.Core.Utils
import GHC.Core.TyCon
import GHC.Core.Type
import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds )
-import GHC.Core.Coercion ( Coercion, coVarsOfCo )
+import GHC.Core.Coercion ( Coercion )
+import GHC.Core.TyCo.FVs ( coVarsOfCos )
import GHC.Core.FamInstEnv
import GHC.Core.Opt.Arity ( typeArity )
import GHC.Utils.Misc
@@ -55,8 +56,9 @@ _ = pprTrace -- Tired of commenting out the import all the time
-}
-- | Options for the demand analysis
-newtype DmdAnalOpts = DmdAnalOpts
- { dmd_strict_dicts :: Bool -- ^ Use strict dictionaries
+data DmdAnalOpts = DmdAnalOpts
+ { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries
+ , dmd_unbox_width :: !Int -- ^ Use strict dictionaries
}
-- This is a strict alternative to (,)
@@ -276,8 +278,10 @@ dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec
where
WithDmdType body_ty body' = anal_body env
WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id
- !id' = setBindIdDemandInfo top_lvl id id_dmd
- (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd) rhs
+ -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils"
+ id_dmd' = finaliseBoxity (ae_fam_envs env) NotInsideInlineableFun (idType id) id_dmd
+ !id' = setBindIdDemandInfo top_lvl id id_dmd'
+ (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd') rhs
-- See Note [Absence analysis for stable unfoldings and RULES]
rule_fvs = bndrRuleAndUnfoldingIds id
@@ -425,21 +429,24 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs])
| is_single_data_alt alt
= let
WithDmdType rhs_ty rhs' = dmdAnal env dmd rhs
- WithDmdType alt_ty1 dmds = findBndrsDmds env rhs_ty bndrs
+ WithDmdType alt_ty1 fld_dmds = findBndrsDmds env rhs_ty bndrs
WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env alt_ty1 case_bndr
+ !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd
-- Evaluation cardinality on the case binder is irrelevant and a no-op.
-- What matters is its nested sub-demand!
+ -- NB: If case_bndr_dmd is absDmd, boxity will say Unboxed, which is
+ -- what we want, because then `seq` will put a `seqDmd` on its scrut.
(_ :* case_bndr_sd) = case_bndr_dmd
-- Compute demand on the scrutinee
-- FORCE the result, otherwise thunks will end up retaining the
-- whole DmdEnv
!(!bndrs', !scrut_sd)
| DataAlt _ <- alt
- , id_dmds <- addCaseBndrDmd case_bndr_sd dmds
- -- See Note [Demand on scrutinee of a product case]
- = let !new_info = setBndrsDemandInfo bndrs id_dmds
- !new_prod = mkProd id_dmds
- in (new_info, new_prod)
+ -- See Note [Demand on the scrutinee of a product case]
+ -- See Note [Demand on case-alternative binders]
+ , (!scrut_sd, fld_dmds') <- addCaseBndrDmd case_bndr_sd fld_dmds
+ , let !bndrs' = setBndrsDemandInfo bndrs fld_dmds'
+ = (bndrs', scrut_sd)
| otherwise
-- __DEFAULT and literal alts. Simply add demands and discard the
-- evaluation cardinality, as we evaluate the scrutinee exactly once.
@@ -454,7 +461,6 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs])
WithDmdType scrut_ty scrut' = dmdAnal env scrut_sd scrut
res_ty = alt_ty3 `plusDmdType` toPlusDmdArg scrut_ty
- !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd
in
-- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut
-- , text "dmd" <+> ppr dmd
@@ -482,8 +488,9 @@ dmdAnal' env dmd (Case scrut case_bndr ty alts)
WithDmdType rest_ty as' = combineAltDmds as
in WithDmdType (lubDmdType cur_ty rest_ty) (a':as')
- WithDmdType scrut_ty scrut' = dmdAnal env topSubDmd scrut
- WithDmdType alt_ty1 case_bndr' = annotateBndr env alt_ty case_bndr
+ WithDmdType alt_ty1 case_bndr_dmd = findBndrDmd env alt_ty case_bndr
+ !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd
+ WithDmdType scrut_ty scrut' = dmdAnal env topSubDmd scrut
-- NB: Base case is botDmdType, for empty case alternatives
-- This is a unit for lubDmdType, and the right result
-- when there really are no alternatives
@@ -549,12 +556,30 @@ dmdAnalSumAlt env dmd case_bndr (Alt con bndrs rhs)
| WithDmdType rhs_ty rhs' <- dmdAnal env dmd rhs
, WithDmdType alt_ty dmds <- findBndrsDmds env rhs_ty bndrs
, let (_ :* case_bndr_sd) = findIdDemand alt_ty case_bndr
- -- See Note [Demand on scrutinee of a product case]
- id_dmds = addCaseBndrDmd case_bndr_sd dmds
+ -- See Note [Demand on case-alternative binders]
+ -- we can't use the scrut_sd, because it says 'Prod' and we'll use
+ -- topSubDmd anyway for scrutinees of sum types.
+ (!_scrut_sd, dmds') = addCaseBndrDmd case_bndr_sd dmds
-- Do not put a thunk into the Alt
- !new_ids = setBndrsDemandInfo bndrs id_dmds
+ !new_ids = setBndrsDemandInfo bndrs dmds'
= WithDmdType alt_ty (Alt con new_ids rhs')
+-- Precondition: The SubDemand is not a Call
+-- See Note [Demand on the scrutinee of a product case]
+-- and Note [Demand on case-alternative binders]
+addCaseBndrDmd :: SubDemand -- On the case binder
+ -> [Demand] -- On the fields of the constructor
+ -> (SubDemand, [Demand])
+ -- SubDemand on the case binder incl. field demands
+ -- and final demands for the components of the constructor
+addCaseBndrDmd case_sd fld_dmds
+ | Just (_, ds) <- viewProd (length fld_dmds) scrut_sd
+ = (scrut_sd, ds)
+ | otherwise
+ = pprPanic "was a call demand" (ppr case_sd $$ ppr fld_dmds) -- See the Precondition
+ where
+ scrut_sd = case_sd `plusSubDmd` mkProd Unboxed fld_dmds
+
{-
Note [Analysing with absent demand]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -674,6 +699,51 @@ worker, so the worker will rebuild
x = (a, absent-error)
and that'll crash.
+Note [Demand on case-alternative binders]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The demand on a binder in a case alternative comes
+ (a) From the demand on the binder itself
+ (b) From the demand on the case binder
+Forgetting (b) led directly to #10148.
+
+Example. Source code:
+ f x@(p,_) = if p then foo x else True
+
+ foo (p,True) = True
+ foo (p,q) = foo (q,p)
+
+After strictness analysis, forgetting (b):
+ f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) ->
+ case x_an1
+ of wild_X7 [Dmd=MP(ML,ML)]
+ { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) ->
+ case p_an2 of _ {
+ False -> GHC.Types.True;
+ True -> foo wild_X7 }
+
+Note that ds_dnz is syntactically dead, but the expression bound to it is
+reachable through the case binder wild_X7. Now watch what happens if we inline
+foo's wrapper:
+ f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) ->
+ case x_an1
+ of _ [Dmd=MP(ML,ML)]
+ { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) ->
+ case p_an2 of _ {
+ False -> GHC.Types.True;
+ True -> $wfoo_soq GHC.Types.True ds_dnz }
+
+Look at that! ds_dnz has come back to life in the call to $wfoo_soq! A second
+run of demand analysis would no longer infer ds_dnz to be absent.
+But unlike occurrence analysis, which infers properties of the *syntactic*
+shape of the program, the results of demand analysis describe expressions
+*semantically* and are supposed to be mostly stable across Simplification.
+That's why we should better account for (b).
+In #10148, we ended up emitting a single-entry thunk instead of an updateable
+thunk for a let binder that was an an absent case-alt binder during DmdAnal.
+
+This is needed even for non-product types, in case the case-binder
+is used but the components of the case alternative are not.
+
Note [Aggregated demand for cardinality]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FIXME: This Note should be named [LetUp vs. LetDown] and probably predates
@@ -725,43 +795,42 @@ strict in |y|.
************************************************************************
-}
-dmdTransform :: AnalEnv -- ^ The strictness environment
- -> Id -- ^ The function
- -> SubDemand -- ^ The demand on the function
- -> DmdType -- ^ The demand type of the function in this context
- -- Returned DmdEnv includes the demand on
- -- this function plus demand on its free variables
-
+dmdTransform :: AnalEnv -- ^ The analysis environment
+ -> Id -- ^ The variable
+ -> SubDemand -- ^ The evaluation context of the var
+ -> DmdType -- ^ The demand type unleashed by the variable in this
+ -- context. The returned DmdEnv includes the demand on
+ -- this function plus demand on its free variables
-- See Note [What are demand signatures?] in "GHC.Types.Demand"
-dmdTransform env var dmd
+dmdTransform env var sd
-- Data constructors
| isDataConWorkId var
- = dmdTransformDataConSig (idArity var) dmd
+ = dmdTransformDataConSig (idArity var) sd
-- Dictionary component selectors
-- Used to be controlled by a flag.
-- See #18429 for some perf measurements.
| Just _ <- isClassOpId_maybe var
- = -- pprTrace "dmdTransform:DictSel" (ppr var $$ ppr dmd) $
- dmdTransformDictSelSig (idDmdSig var) dmd
+ = -- pprTrace "dmdTransform:DictSel" (ppr var $$ ppr (idDmdSig var) $$ ppr sd) $
+ dmdTransformDictSelSig (idDmdSig var) sd
-- Imported functions
| isGlobalId var
- , let res = dmdTransformSig (idDmdSig var) dmd
- = -- pprTrace "dmdTransform:import" (vcat [ppr var, ppr (idDmdSig var), ppr dmd, ppr res])
+ , let res = dmdTransformSig (idDmdSig var) sd
+ = -- pprTrace "dmdTransform:import" (vcat [ppr var, ppr (idDmdSig var), ppr sd, ppr res])
res
-- Top-level or local let-bound thing for which we use LetDown ('useLetUp').
-- In that case, we have a strictness signature to unleash in our AnalEnv.
| Just (sig, top_lvl) <- lookupSigEnv env var
- , let fn_ty = dmdTransformSig sig dmd
- = -- pprTrace "dmdTransform:LetDown" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $
+ , let fn_ty = dmdTransformSig sig sd
+ = -- pprTrace "dmdTransform:LetDown" (vcat [ppr var, ppr sig, ppr sd, ppr fn_ty]) $
case top_lvl of
- NotTopLevel -> addVarDmd fn_ty var (C_11 :* dmd)
+ NotTopLevel -> addVarDmd fn_ty var (C_11 :* sd)
TopLevel
| isInterestingTopLevelFn var
-- Top-level things will be used multiple times or not at
-- all anyway, hence the multDmd below: It means we don't
-- have to track whether @var@ is used strictly or at most
-- once, because ultimately it never will.
- -> addVarDmd fn_ty var (C_0N `multDmd` (C_11 :* dmd)) -- discard strictness
+ -> addVarDmd fn_ty var (C_0N `multDmd` (C_11 :* sd)) -- discard strictness
| otherwise
-> fn_ty -- don't bother tracking; just annotate with 'topDmd' later
-- Everything else:
@@ -769,8 +838,8 @@ dmdTransform env var dmd
-- * Lambda binders
-- * Case and constructor field binders
| otherwise
- = -- pprTrace "dmdTransform:other" (vcat [ppr var, ppr sig, ppr dmd, ppr res]) $
- unitDmdType (unitVarEnv var (C_11 :* dmd))
+ = -- pprTrace "dmdTransform:other" (vcat [ppr var, ppr boxity, ppr sd]) $
+ unitDmdType (unitVarEnv var (C_11 :* sd))
{- *********************************************************************
* *
@@ -802,15 +871,21 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs
where
rhs_arity = idArity id
-- See Note [Demand signatures are computed for a threshold demand based on idArity]
- rhs_dmd -- See Note [Demand analysis for join points]
- -- See Note [Invariants on join points] invariant 2b, in GHC.Core
- -- rhs_arity matches the join arity of the join point
- | isJoinId id
- = mkCalledOnceDmds rhs_arity let_dmd
- | otherwise
- = mkCalledOnceDmds rhs_arity topSubDmd
-
- WithDmdType rhs_dmd_ty rhs' = dmdAnal env rhs_dmd rhs
+
+ rhs_dmd = mkCalledOnceDmds rhs_arity body_dmd
+
+ body_dmd
+ | isJoinId id
+ -- See Note [Demand analysis for join points]
+ -- See Note [Invariants on join points] invariant 2b, in GHC.Core
+ -- rhs_arity matches the join arity of the join point
+ = let_dmd
+ | otherwise
+ -- See Note [Unboxed demand on function bodies returning small products]
+ = unboxedWhenSmall (ae_opts env) (unboxableResultWidth env id) topSubDmd
+
+ -- See Note [Do not unbox class dictionaries]
+ WithDmdType rhs_dmd_ty rhs' = dmdAnal (adjustInlFun id env) rhs_dmd rhs
DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty
sig = mkDmdSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div)
@@ -829,6 +904,7 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs
-- might turn into used-many even if the signature was stable and
-- we'd have to do an additional iteration. reuseEnv makes sure that
-- we never get used-once info for FVs of recursive functions.
+ -- See #14816 where we try to get rid of reuseEnv.
rhs_fv1 = case rec_flag of
Recursive -> reuseEnv rhs_fv
NonRecursive -> rhs_fv
@@ -839,6 +915,26 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs
-- See Note [Lazy and unleashable free variables]
!(!lazy_fv, !sig_fv) = partitionVarEnv isWeakDmd rhs_fv2
+unboxableResultWidth :: AnalEnv -> Id -> Maybe Arity
+unboxableResultWidth env id
+ | (pis,ret_ty) <- splitPiTys (idType id)
+ , count (not . isNamedBinder) pis == idArity id
+ , Just (tc, _tc_args, _co) <- normSplitTyConApp_maybe (ae_fam_envs env) ret_ty
+ , Just dc <- tyConSingleAlgDataCon_maybe tc
+ , null (dataConExTyCoVars dc) -- Can't unbox results with existentials
+ = Just (dataConRepArity dc)
+ | otherwise
+ = Nothing
+
+unboxedWhenSmall :: DmdAnalOpts -> Maybe Arity -> SubDemand -> SubDemand
+-- See Note [Unboxed demand on function bodies returning small products]
+unboxedWhenSmall opts mb_n sd
+ | Just n <- mb_n
+ , n <= dmd_unbox_width opts
+ = unboxSubDemand sd
+ | otherwise
+ = sd
+
-- | If given the (local, non-recursive) let-bound 'Id', 'useLetUp' determines
-- whether we should process the binding up (body before rhs) or down (rhs
-- before body).
@@ -1056,34 +1152,6 @@ Now f's optimised RHS will be \x.a, but if we change g to (error "..")
(since it is apparently Absent) and then inline (\x. fst g) we get
disaster. But regardless, #18638 was a more complicated version of
this, that actually happened in practice.
-
-Historical Note [Product demands for function body]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In 2013 I spotted this example, in shootout/binary_trees:
-
- Main.check' = \ b z ds. case z of z' { I# ip ->
- case ds_d13s of
- Main.Nil -> z'
- Main.Node s14k s14l s14m ->
- Main.check' (not b)
- (Main.check' b
- (case b {
- False -> I# (-# s14h s14k);
- True -> I# (+# s14h s14k)
- })
- s14l)
- s14m } } }
-
-Here we *really* want to unbox z, even though it appears to be used boxed in
-the Nil case. Partly the Nil case is not a hot path. But more specifically,
-the whole function gets the CPR property if we do.
-
-That motivated using a demand of C1(C1(C1(P(L,L)))) for the RHS, where
-(solely because the result was a product) we used a product demand
-(albeit with lazy components) for the body. But that gives very silly
-behaviour -- see #17932. Happily it turns out now to be entirely
-unnecessary: we get good results with C1(C1(C1(L))). So I simply
-deleted the special case.
-}
{- *********************************************************************
@@ -1159,7 +1227,6 @@ dmdFix top_lvl env let_dmd orig_pairs
{- Note [Safe abortion in the fixed-point iteration]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
Fixed-point iteration may fail to terminate. But we cannot simply give up and
return the environment and code unchanged! We still need to do one additional
round, for two reasons:
@@ -1231,8 +1298,11 @@ unitDmdType :: DmdEnv -> DmdType
unitDmdType dmd_env = DmdType dmd_env [] topDiv
coercionDmdEnv :: Coercion -> DmdEnv
-coercionDmdEnv co = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCo co)
- -- The VarSet from coVarsOfCo is really a VarEnv Var
+coercionDmdEnv co = coercionsDmdEnv [co]
+
+coercionsDmdEnv :: [Coercion] -> DmdEnv
+coercionsDmdEnv cos = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCos cos)
+ -- The VarSet from coVarsOfCos is really a VarEnv Var
addVarDmd :: DmdType -> Var -> Demand -> DmdType
addVarDmd (DmdType fv ds res) var dmd
@@ -1283,18 +1353,6 @@ setBndrsDemandInfo (b:bs) (d:ds) =
setBndrsDemandInfo [] ds = assert (null ds) []
setBndrsDemandInfo bs _ = pprPanic "setBndrsDemandInfo" (ppr bs)
-annotateBndr :: AnalEnv -> DmdType -> Var -> WithDmdType Var
--- The returned env has the var deleted
--- The returned var is annotated with demand info
--- according to the result demand of the provided demand type
--- No effect on the argument demands
-annotateBndr env dmd_ty var
- | isId var = WithDmdType dmd_ty' new_id
- | otherwise = WithDmdType dmd_ty var
- where
- new_id = setIdDemandInfo var dmd
- WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty var
-
annotateLamIdBndr :: AnalEnv
-> DmdType -- Demand type of body
-> Id -- Lambda binder
@@ -1308,8 +1366,11 @@ annotateLamIdBndr env dmd_ty id
-- pprTrace "annLamBndr" (vcat [ppr id, ppr dmd_ty, ppr final_ty]) $
WithDmdType main_ty new_id
where
- new_id = setIdDemandInfo id dmd
- main_ty = addDemand dmd dmd_ty'
+ -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils"
+ -- and Note [Do not unbox class dictionaries]
+ dmd' = finaliseBoxity (ae_fam_envs env) (ae_inl_fun env) (idType id) dmd
+ new_id = setIdDemandInfo id dmd'
+ main_ty = addDemand dmd' dmd_ty'
WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id
{- Note [NOINLINE and strictness]
@@ -1389,11 +1450,14 @@ demand put on them (topDmd), and add that to the "lazy_fv" returned by "dmdFix".
data AnalEnv = AE
- { ae_strict_dicts :: !Bool -- ^ Enable strict dict
- , ae_sigs :: !SigEnv
- , ae_virgin :: !Bool -- ^ True on first iteration only
- -- See Note [Initialising strictness]
- , ae_fam_envs :: !FamInstEnvs
+ { ae_opts :: !DmdAnalOpts -- ^ Analysis options
+ , ae_sigs :: !SigEnv
+ , ae_virgin :: !Bool -- ^ True on first iteration only
+ -- See Note [Initialising strictness]
+ , ae_fam_envs :: !FamInstEnvs
+ , ae_inl_fun :: !InsideInlineableFun
+ -- ^ Whether we analyse the body of an inlineable fun.
+ -- See Note [Do not unbox class dictionaries].
}
-- We use the se_env to tell us whether to
@@ -1408,16 +1472,16 @@ type SigEnv = VarEnv (DmdSig, TopLevelFlag)
instance Outputable AnalEnv where
ppr env = text "AE" <+> braces (vcat
[ text "ae_virgin =" <+> ppr (ae_virgin env)
- , text "ae_strict_dicts =" <+> ppr (ae_strict_dicts env)
, text "ae_sigs =" <+> ppr (ae_sigs env)
])
emptyAnalEnv :: DmdAnalOpts -> FamInstEnvs -> AnalEnv
emptyAnalEnv opts fam_envs
- = AE { ae_strict_dicts = dmd_strict_dicts opts
+ = AE { ae_opts = opts
, ae_sigs = emptySigEnv
, ae_virgin = True
, ae_fam_envs = fam_envs
+ , ae_inl_fun = NotInsideInlineableFun
}
emptySigEnv :: SigEnv
@@ -1445,6 +1509,13 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id
nonVirgin :: AnalEnv -> AnalEnv
nonVirgin env = env { ae_virgin = False }
+-- | Sets 'ae_inl_fun' according to whether the given 'Id' has an inlineable
+-- unfolding. See Note [Do not unbox class dictionaries].
+adjustInlFun :: Id -> AnalEnv -> AnalEnv
+adjustInlFun id env
+ | isStableUnfolding (realIdUnfolding id) = env { ae_inl_fun = InsideInlineableFun }
+ | otherwise = env { ae_inl_fun = NotInsideInlineableFun }
+
findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> WithDmdType [Demand]
-- Return the demands on the Ids in the [Var]
findBndrsDmds env dmd_ty bndrs
@@ -1472,9 +1543,9 @@ findBndrDmd env dmd_ty id
strictify dmd
-- See Note [Making dictionaries strict]
- | ae_strict_dicts env
+ | dmd_strict_dicts (ae_opts env)
-- We never want to strictify a recursive let. At the moment
- -- annotateBndr is only call for non-recursive lets; if that
+ -- findBndrDmd is never called for recursive lets; if that
-- changes, we need a RecFlag parameter and another guard here.
= strictifyDictDmd id_ty dmd
| otherwise
@@ -1522,7 +1593,7 @@ to inline one applied to a function. Sometimes this makes just enough
of a difference to stop a function from inlining. This is documented in
#18421.
-It's somewhat similar to Note [Do not unpack class dictionaries] although
+It's somewhat similar to Note [Do not unbox class dictionaries] although
here our problem is with the inliner, not the specializer.
Note [Initialising strictness]
diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs
index 18ac910d15..ee79e28b60 100644
--- a/compiler/GHC/Core/Opt/Pipeline.hs
+++ b/compiler/GHC/Core/Opt/Pipeline.hs
@@ -1067,6 +1067,7 @@ dmdAnal :: Logger -> DynFlags -> FamInstEnvs -> [CoreRule] -> CoreProgram -> IO
dmdAnal logger dflags fam_envs rules binds = do
let !opts = DmdAnalOpts
{ dmd_strict_dicts = gopt Opt_DictsStrict dflags
+ , dmd_unbox_width = dmdUnboxWidth dflags
}
binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds
Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index 718c840c96..966e86a344 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -1806,7 +1806,7 @@ calcSpecInfo fn (CP { cp_qvars = qvars, cp_args = pats }) extra_bndrs
go_one env d (Var v) = extendVarEnv_C plusDmd env v d
go_one env (_n :* cd) e -- NB: _n does not have to be strict
| (Var _, args) <- collectArgs e
- , Just ds <- viewProd (length args) cd
+ , Just (_b, ds) <- viewProd (length args) cd -- TODO: We may want to look at boxity _b, though...
= go env ds args
go_one env _ _ = env
diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs
index 976dcd5fe5..9becea0c18 100644
--- a/compiler/GHC/Core/Opt/WorkWrap.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap.hs
@@ -734,8 +734,6 @@ splitFun ww_opts fn_id rhs
uf_opts = so_uf_opts (wo_simple_opts ww_opts)
fn_info = idInfo fn_id
(arg_vars, body) = collectBinders rhs
- -- collectBinders was not enough for GHC.Event.IntTable.insertWith
- -- last time I checked, where manifest lambdas were wrapped in casts
(wrap_dmds, div) = splitDmdSig (dmdSigInfo fn_info)
@@ -978,8 +976,7 @@ splitThunk :: WwOpts -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
splitThunk ww_opts is_rec x rhs
= assert (not (isJoinId x)) $
do { let x' = localiseId x -- See comment above
- ; (useful,_, wrap_fn, fn_arg)
- <- mkWWstr_one ww_opts NotArgOfInlineableFun x'
+ ; (useful,_, wrap_fn, fn_arg) <- mkWWstr_one ww_opts x'
; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ]
; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive
return res
diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
index 38c103d866..c2287916db 100644
--- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
@@ -10,8 +10,9 @@ A library for the ``worker\/wrapper'' back-end to the strictness analyser
module GHC.Core.Opt.WorkWrap.Utils
( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs
, DataConPatContext(..)
- , UnboxingDecision(..), ArgOfInlineableFun(..), wantToUnboxArg
- , findTypeShape, mkAbsentFiller, IsRecDataConResult(..), isRecDataCon
+ , UnboxingDecision(..), InsideInlineableFun(..), wantToUnboxArg
+ , findTypeShape, IsRecDataConResult(..), isRecDataCon, finaliseBoxity
+ , mkAbsentFiller
, isWorkerSmallEnough
)
where
@@ -229,7 +230,7 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr
res_ty' = GHC.Core.Subst.substTy subst res_ty
; (useful1, work_args, wrap_fn_str, fn_args)
- <- mkWWstr opts inlineable_flag cloned_arg_vars
+ <- mkWWstr opts cloned_arg_vars
-- Do CPR w/w. See Note [Always do CPR w/w]
; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty)
@@ -265,9 +266,6 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr
= info `setOccInfo` noOccInfo
mb_join_arity = isJoinId_maybe fun_id
- inlineable_flag -- See Note [Do not unpack class dictionaries]
- | isStableUnfolding (realIdUnfolding fun_id) = MaybeArgOfInlineableFun
- | otherwise = NotArgOfInlineableFun
-- Note [Do not split void functions]
only_one_void_argument
@@ -562,59 +560,30 @@ data UnboxingDecision s
-- The @[s]@ carries the bits of information with which we can continue
-- unboxing, e.g. @s@ will be 'Demand' or 'Cpr'.
--- | A specialised Bool for an argument to 'wantToUnboxArg'.
--- See Note [Do not unpack class dictionaries].
-data ArgOfInlineableFun
- = NotArgOfInlineableFun -- ^ Definitely not in an inlineable fun.
- | MaybeArgOfInlineableFun -- ^ We might be in an inlineable fun, so we won't
- -- unbox dictionary args.
- deriving Eq
-
--- | Unboxing strategy for strict arguments.
-wantToUnboxArg :: FamInstEnvs -> ArgOfInlineableFun -> Type -> Demand -> UnboxingDecision Demand
+-- | Unwraps the 'Boxity' decision encoded in the given 'SubDemand' and returns
+-- a 'DataConPatContext' as well the nested demands on fields of the 'DataCon'
+-- to unbox.
+wantToUnboxArg
+ :: FamInstEnvs
+ -> Type -- ^ Type of the argument
+ -> Demand -- ^ How the arg was used
+ -> UnboxingDecision Demand
-- See Note [Which types are unboxed?]
-wantToUnboxArg fam_envs inlineable_flag ty dmd
- | isAbsDmd dmd
+wantToUnboxArg fam_envs ty (n :* sd)
+ | isAbs n
= DropAbsent
- | isStrUsedDmd dmd
- , Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty
+ | Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty
, Just dc <- tyConSingleAlgDataCon_maybe tc
, let arity = dataConRepArity dc
- -- See Note [Unpacking arguments with product and polymorphic demands]
- , Just cs <- split_prod_dmd_arity dmd arity
- -- See Note [Do not unpack class dictionaries]
- , inlineable_flag == NotArgOfInlineableFun || not (isClassPred ty)
- -- See Note [mkWWstr and unsafeCoerce]
- , cs `lengthIs` arity
- -- See Note [Add demands for strict constructors]
- , let cs' = addDataConStrictness dc cs
- = Unbox (DataConPatContext dc tc_args co) cs'
+ , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity Analysis]
+ -- NB: No strictness or evaluatedness checks here. That is done by
+ -- 'finaliseBoxity'!
+ = Unbox (DataConPatContext dc tc_args co) ds
| otherwise
= StopUnboxing
- where
- split_prod_dmd_arity dmd arity
- -- For seqDmd, it should behave like <S(AAAA)>, for some
- -- suitable arity
- | isSeqDmd dmd = Just (replicate arity absDmd)
- | _ :* Prod ds <- dmd = Just ds
- | otherwise = Nothing
-
-addDataConStrictness :: DataCon -> [Demand] -> [Demand]
--- See Note [Add demands for strict constructors]
-addDataConStrictness con ds
- | Nothing <- dataConWrapId_maybe con
- -- DataCon worker=wrapper. Implies no strict fields, so nothing to do
- = ds
-addDataConStrictness con ds
- = zipWithEqual "addDataConStrictness" add ds strs
- where
- strs = dataConRepStrictness con
- add dmd str | isMarkedStrict str = strictifyDmd dmd
- | otherwise = dmd
-
-- | Unboxing strategy for constructed results.
wantToUnboxResult :: FamInstEnvs -> Type -> Cpr -> UnboxingDecision Cpr
@@ -688,35 +657,8 @@ Note that the data constructor /can/ have evidence arguments: equality
constraints, type classes etc. So it can be GADT. These evidence
arguments are simply value arguments, and should not get in the way.
-Note [Unpacking arguments with product and polymorphic demands]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The argument is unpacked in a case if it has a product type and has a
-strict *and* used demand put on it. I.e., arguments, with demands such
-as the following ones:
-
- <S,U(U, L)>
- <S(L,S),U>
-
-will be unpacked, but
-
- <S,U> or <B,U>
-
-will not, because the pieces aren't used. This is quite important otherwise
-we end up unpacking massive tuples passed to the bottoming function. Example:
-
- f :: ((Int,Int) -> String) -> (Int,Int) -> a
- f g pr = error (g pr)
-
- main = print (f fst (1, error "no"))
-
-Does 'main' print "error 1" or "error no"? We don't really want 'f'
-to unbox its second argument. This actually happened in GHC's onwn
-source code, in Packages.applyPackageFlag, which ended up un-boxing
-the enormous DynFlags tuple, and being strict in the
-as-yet-un-filled-in unitState files.
-
-Note [Do not unpack class dictionaries]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Do not unbox class dictionaries]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If we have
f :: Ord a => [a] -> Int -> a
{-# INLINABLE f #-}
@@ -729,12 +671,19 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get
fw :: (a->a->Bool) -> [a] -> Int# -> a
and the type-class specialiser can't specialise that. An example is #6056.
-But in any other situation a dictionary is just an ordinary value,
-and can be unpacked. So we track the INLINABLE pragma, and switch
-off the unpacking in mkWWstr_one (see the isClassPred test).
+But in any other situation, a dictionary is just an ordinary value,
+and can be unpacked. So we track the INLINABLE pragma, and discard the boxity
+flag in finaliseBoxity (see the isClassPred test).
Historical note: #14955 describes how I got this fix wrong the first time.
+Note that the simplicity of this fix implies that INLINE functions (such as
+wrapper functions after the WW run) will never say that they unbox class
+dictionaries. That's not ideal, but not worth losing sleep over, as INLINE
+functions will have been inlined by the time we run demand analysis so we'll
+see the unboxing around the worker in client modules. I got aware of the issue
+in T5075 by the change in boxity of loop between demand analysis runs.
+
Note [mkWWstr and unsafeCoerce]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
By using unsafeCoerce, it is possible to make the number of demands fail to
@@ -742,14 +691,14 @@ match the number of constructor arguments; this happened in #8037.
If so, the worker/wrapper split doesn't work right and we get a Core Lint
bug. The fix here is simply to decline to do w/w if that happens.
-Note [Add demands for strict constructors]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Unboxing evaluated arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this program (due to Roman):
data X a = X !a
foo :: X Int -> Int -> Int
- foo (X a) n = go 0
+ foo x@(X a) n = go 0
where
go i | i < n = a + go (i+1)
| otherwise = 0
@@ -758,12 +707,12 @@ We want the worker for 'foo' too look like this:
$wfoo :: Int# -> Int# -> Int#
-with the first argument unboxed, so that it is not eval'd each time
-around the 'go' loop (which would otherwise happen, since 'foo' is not
-strict in 'a'). It is sound for the wrapper to pass an unboxed arg
-because X is strict, so its argument must be evaluated. And if we
-*don't* pass an unboxed argument, we can't even repair it by adding a
-`seq` thus:
+with the first argument unboxed, so that it is not eval'd each time around the
+'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It
+is sound for the wrapper to pass an unboxed arg because X is strict
+(see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument
+must be evaluated. And if we *don't* pass an unboxed argument, we can't even
+repair it by adding a `seq` thus:
foo (X a) n = a `seq` go 0
@@ -771,34 +720,38 @@ because the seq is discarded (very early) since X is strict!
So here's what we do
-* We leave the demand-analysis alone. The demand on 'a' in the
- definition of 'foo' is <L, U(U)>; the strictness info is Lazy
- because foo's body may or may not evaluate 'a'; but the usage info
- says that 'a' is unpacked and its content is used.
+* Since this has nothing to do with how 'foo' uses 'a', we leave demand analysis
+ alone, but account for the additional evaluatedness when annotating the binder
+ in 'annotateLamIdBndr' via 'finaliseBoxity', which will retain the Unboxed boxity
+ on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning it's used
+ lazily but unboxed nonetheless. This seems to contradict
+ Note [No lazy, Unboxed demands in demand signature], but we know that 'a' is
+ evaluated and thus can be unboxed.
-* During worker/wrapper, if we unpack a strict constructor (as we do
- for 'foo'), we use 'addDataConStrictness' to bump up the strictness on
- the strict arguments of the data constructor.
+* When 'finaliseBoxity' decides to unbox a record, it will zip the field demands
+ together with the respective 'StrictnessMark'. In case of 'x', it will pair
+ up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for
+ the strict field.
-* That in turn means that, if the usage info supports doing so
- (i.e. splitProdDmd_maybe returns Just), we will unpack that argument
- -- even though the original demand (e.g. on 'a') was lazy.
+* Said 'StrictnessMark' is passed to the recursive invocation of
+ 'finaliseBoxity' when deciding whether to unbox 'a'. 'a' was used lazily, but
+ since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'.
-* What does "bump up the strictness" mean? Just add a head-strict
- demand to the strictness! Even for a demand like <L,A> we can
- safely turn it into <S,A>; remember case (1) of
- Note [Worker/wrapper for Strictness and Absence].
+* Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will
+ /not/ look at the strictness bits of the demand, only at Boxity flags. As such,
+ it will happily unbox 'a' despite the lazy demand on it.
-The net effect is that the w/w transformation is more aggressive about
-unpacking the strict arguments of a data constructor, when that
-eagerness is supported by the usage info.
+The net effect is that boxity analysis and the w/w transformation are more
+aggressive about unboxing the strict arguments of a data constructor than when
+looking at strictness info exclusively. It is very much like (Nested) CPR, which
+needs its nested fields to be evaluated in order for it to unbox nestedly.
There is the usual danger of reboxing, which as usual we ignore. But
if X is monomorphic, and has an UNPACK pragma, then this optimisation
is even more important. We don't want the wrapper to rebox an unboxed
argument, and pass an Int to $wfoo!
-This works in nested situations like
+This works in nested situations like T10482
data family Bar a
data instance Bar (a, b) = BarPair !(Bar a) !(Bar b)
@@ -863,6 +816,68 @@ applying the strictness demands to the final result of DmdAnal. The result is
that we get the strict demand signature we wanted even if we can't float
the case on `x` up through the case on `burble`.
+Note [No nested Unboxed inside Boxed in demand signature]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+```
+f p@(x,y)
+ | even (x+y) = []
+ | otherwise = [p]
+```
+Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)`
+on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper
+can't unbox the pair components without unboxing the pair! So we better say
+`1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info.
+That happens in 'finaliseBoxity'.
+
+Note [No lazy, Unboxed demands in demand signature]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider T19407:
+
+ data Huge = Huge Bool () ... () -- think: DynFlags
+ data T = T { h :: Huge, n :: Int }
+ f t@(T h _) = g h t
+ g (H b _ ... _) t = if b then 1 else n t
+
+The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better
+not put that demand in `g`'s demand signature, because worker/wrapper will not
+in general unbox a lazy-and-unboxed demand like `L!P(..)`.
+(The exception are known-to-be-evaluated arguments like strict fields,
+see Note [Unboxing evaluated arguments].)
+
+The program above is an example where spreading misinformed boxity through the
+signature is particularly egregious. If we give `g` that signature, then `f`
+puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and
+we get
+
+ f (T (H b _ ... _) n) = $wf b n
+ $wf b n = $wg b (T (H b x ... x) n)
+ $wg = ...
+
+Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in
+'finaliseBoxity', modulo Note [Unboxing evaluated arguments].
+
+Note [Finalising boxity for demand signature]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The worker/wrapper pass must strictly adhere to the boxity decisions encoded
+in the demand signature, because that is the information that demand analysis
+propagates throughout the program. Failing to implement the strategy laid out
+in the signature can result in reboxing in unexpected places. Hence, we must
+completely anticipate unboxing decisions during demand analysis and reflect
+these decicions in demand annotations. That is the job of 'finaliseBoxity',
+which is defined here and called from demand analysis.
+
+Here is a list of different Notes it has to take care of:
+
+ * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in
+ general, but still allow Note [Unboxing evaluated arguments]
+ * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)`
+ * Implement fixes for corner cases Note [Do not unbox class dictionaries]
+ and Note [mkWWstr and unsafeCoerce]
+
+Then, in worker/wrapper blindly trusts the boxity info in the demand signature
+and will not look at strictness info *at all*, in 'wantToUnboxArg'.
+
Note [non-algebraic or open body type warning]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are a few cases where the W/W transformation is told that something
@@ -894,7 +909,6 @@ way to express existential types in the worker's type signature.
-}
mkWWstr :: WwOpts
- -> ArgOfInlineableFun -- See Note [Do not unpack class dictionaries]
-> [Var] -- Wrapper args; have their demand info on them
-- *Includes type variables*
-> UniqSM (Bool, -- Is this useful
@@ -905,10 +919,10 @@ mkWWstr :: WwOpts
[CoreExpr]) -- Reboxed args for the call to the
-- original RHS. Corresponds one-to-one
-- with the wrapper arg vars
-mkWWstr opts inlineable_flag args
+mkWWstr opts args
= go args
where
- go_one arg = mkWWstr_one opts inlineable_flag arg
+ go_one arg = mkWWstr_one opts arg
go [] = return (False, [], nop_fn, [])
go (arg : args) = do { (useful1, args1, wrap_fn1, wrap_arg) <- go_one arg
@@ -925,12 +939,9 @@ mkWWstr opts inlineable_flag args
-- * wrap_arg assumes work_args are in scope, and builds a ConApp that
-- reconstructs the RHS of wrap_var that we pass to the original RHS
-- See Note [Worker/wrapper for Strictness and Absence]
-mkWWstr_one :: WwOpts
- -> ArgOfInlineableFun -- See Note [Do not unpack class dictionaries]
- -> Var
- -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
-mkWWstr_one opts inlineable_flag arg =
- case wantToUnboxArg fam_envs inlineable_flag arg_ty arg_dmd of
+mkWWstr_one :: WwOpts -> Var -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
+mkWWstr_one opts arg =
+ case wantToUnboxArg fam_envs arg_ty arg_dmd of
_ | isTyVar arg -> do_nothing
DropAbsent
@@ -940,7 +951,7 @@ mkWWstr_one opts inlineable_flag arg =
-- (that's what mkAbsentFiller does)
-> return (True, [], nop_fn, absent_filler)
- Unbox dcpc cs -> unbox_one_arg opts arg cs dcpc
+ Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc
_ -> do_nothing -- Other cases, like StopUnboxing
@@ -955,17 +966,17 @@ unbox_one_arg :: WwOpts
-> [Demand]
-> DataConPatContext
-> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
-unbox_one_arg opts arg_var cs
+unbox_one_arg opts arg_var ds
DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args
, dcpc_co = co }
= do { pat_bndrs_uniqs <- getUniquesM
; let ex_name_fss = map getOccFS $ dataConExTyCoVars dc
(ex_tvs', arg_ids) =
dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args
- arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids cs
+ arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds
unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var)
dc (ex_tvs' ++ arg_ids')
- ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts NotArgOfInlineableFun (ex_tvs' ++ arg_ids')
+ ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids')
; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co
; return (True, worker_args, unbox_fn . wrap_fn, wrap_arg) }
-- Don't pass the arg, rebox instead
@@ -1009,42 +1020,45 @@ mkAbsentFiller opts arg
{- Note [Worker/wrapper for Strictness and Absence]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The worker/wrapper transformation, mkWWstr_one, takes into account
-several possibilities to decide if the function is worthy for
-splitting:
+The worker/wrapper transformation, mkWWstr_one, takes concrete action
+based on the 'UnboxingDescision' returned by 'wantToUnboxArg'.
+The latter takes into account several possibilities to decide if the
+function is worthy for splitting:
1. If an argument is absent, it would be silly to pass it to
- the worker. Hence the isAbsDmd case. This case must come
- first because a demand like <S,A> or <B,A> is possible.
- E.g. <B,A> comes from a function like
+ the worker. Hence the DropAbsent case. This case must come
+ first because the bottom demand B is also strict.
+ E.g. B comes from a function like
f x = error "urk"
- and <S,A> can come from Note [Add demands for strict constructors]
-
-2. If the argument is evaluated strictly, and we can split the
- product demand (splitProdDmd_maybe), then unbox it and w/w its
- pieces. For example
-
- f :: (Int, Int) -> Int
- f p = (case p of (a,b) -> a) + 1
- is split to
- f :: (Int, Int) -> Int
- f p = case p of (a,b) -> $wf a
-
- $wf :: Int -> Int
- $wf a = a + 1
-
- and
- g :: Bool -> (Int, Int) -> Int
- g c p = case p of (a,b) ->
- if c then a else b
- is split to
- g c p = case p of (a,b) -> $gw c a b
- $gw c a b = if c then a else b
-
-2a But do /not/ split if the components are not used; that is, the
- usage is just 'Used' rather than 'UProd'. In this case
- splitProdDmd_maybe returns Nothing. Otherwise we risk decomposing
- a massive tuple which is barely used. Example:
+ and the absent demand A can come from Note [Unboxing evaluated arguments]
+
+2. If the argument is evaluated strictly (or known to be eval'd),
+ we can take a view into the product demand ('viewProd'). In accordance
+ with Note [Boxity analysis], 'wantToUnboxArg' will say 'Unbox'.
+ 'mkWWstr_one' then follows suit it and recurses into the fields of the
+ product demand. For example
+
+ f :: (Int, Int) -> Int
+ f p = (case p of (a,b) -> a) + 1
+ is split to
+ f :: (Int, Int) -> Int
+ f p = case p of (a,b) -> $wf a
+
+ $wf :: Int -> Int
+ $wf a = a + 1
+
+ and
+ g :: Bool -> (Int, Int) -> Int
+ g c p = case p of (a,b) ->
+ if c then a else b
+ is split to
+ g c p = case p of (a,b) -> $gw c a b
+ $gw c a b = if c then a else b
+
+2a But do /not/ split if Boxity Analysis said "Boxed".
+ In this case, 'wantToUnboxArg' returns 'StopUnboxing'.
+ Otherwise we risk decomposing and reboxing a massive
+ tuple which is barely used. Example:
f :: ((Int,Int) -> String) -> (Int,Int) -> a
f g pr = error (g pr)
@@ -1055,10 +1069,11 @@ splitting:
Imagine that it had millions of fields. This actually happened
in GHC itself where the tuple was DynFlags
-3. A plain 'seqDmd', which is head-strict with usage UHead, can't
- be split by splitProdDmd_maybe. But we want it to behave just
- like U(AAAA) for suitable number of absent demands. So we have
- a special case for it, with arity coming from the data constructor.
+3. In all other cases (e.g., lazy, used demand and not eval'd),
+ 'finaliseBoxity' will have cleared the Boxity flag to 'Boxed'
+ (see Note [Finalising boxity for demand signature]) and
+ 'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one'
+ stops unboxing.
Note [Worker/wrapper for bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1162,7 +1177,7 @@ Needless to say, there are some wrinkles:
Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field, but
it's quite nasty to thread the marks though 'mkWWstr' and 'mkWWstr_one'.
So we rather look out for a necessary condition for strict fields:
- Note [Add demands for strict constructors] makes it so that the demand on
+ Note [Unboxing evaluated arguments] makes it so that the demand on
'zs' is absent and /strict/: It will get cardinality 'C_10', the empty
interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It guarantees
we never fill in an error-thunk for an absent strict field.
@@ -1395,6 +1410,56 @@ isRecDataCon fam_envs fuel dc
-> combineIRDCRs (map (\dc -> go_dc (subWithInf fuel 1) rec_tc' dc) dcs)
-- See Note [Detecting recursive data constructors], point (4)
+-- | A specialised Bool for an argument to 'finaliseBoxity'.
+-- See Note [Do not unbox class dictionaries].
+data InsideInlineableFun
+ = NotInsideInlineableFun -- ^ Not in an inlineable fun.
+ | InsideInlineableFun -- ^ We are in an inlineable fun, so we won't
+ -- unbox dictionary args.
+ deriving Eq
+
+-- | This function makes sure that the demand only says 'Unboxed' where
+-- worker/wrapper should actually unbox and trims any boxity beyond that.
+-- Called for every demand annotation during DmdAnal.
+--
+-- > data T a = T !a
+-- > f :: (T (Int,Int), Int) -> ()
+-- > f p = ... -- demand on p: 1!P(L!P(L!P(L), L!P(L)), L!P(L))
+--
+-- 'finaliseBoxity' will trim the demand on 'p' to 1!P(L!P(LP(L), LP(L)), LP(L)).
+-- This is done when annotating lambdas and thunk bindings.
+-- See Note [Finalising boxity for demand signature]
+finaliseBoxity
+ :: FamInstEnvs
+ -> InsideInlineableFun -- ^ See the haddocks on 'InsideInlineableFun'
+ -> Type -- ^ Type of the argument
+ -> Demand -- ^ How the arg was used
+ -> Demand
+finaliseBoxity env in_inl_fun ty dmd = go NotMarkedStrict ty dmd
+ where
+ go mark ty dmd@(n :* _) =
+ case wantToUnboxArg env ty dmd of
+ DropAbsent -> dmd
+ Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} ds
+ -- See Note [No lazy, Unboxed demands in demand signature]
+ -- See Note [Unboxing evaluated arguments]
+ | isStrict n || isMarkedStrict mark
+ -- See Note [Do not unbox class dictionaries]
+ , in_inl_fun == NotInsideInlineableFun || not (isClassPred ty)
+ -- See Note [mkWWstr and unsafeCoerce]
+ , ds `lengthIs` dataConRepArity dc
+ , let arg_tys = dubiousDataConInstArgTys dc tc_args
+ -> -- pprTrace "finaliseBoxity:Unbox" (ppr ty $$ ppr dmd $$ ppr ds) $
+ n :* (mkProd Unboxed $! zip_go_with_marks dc arg_tys ds)
+ -- See Note [No nested Unboxed inside Boxed in demand signature]
+ _ -> trimBoxity dmd
+
+ -- See Note [Unboxing evaluated arguments]
+ zip_go_with_marks dc arg_tys ds = case dataConWrapId_maybe dc of
+ Nothing -> strictZipWith (go NotMarkedStrict) arg_tys ds
+ -- Shortcut when DataCon worker=wrapper
+ Just _ -> strictZipWith3 go (dataConRepStrictness dc) arg_tys ds
+
{-
************************************************************************
* *
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index aaedf4f2d7..478a0f7737 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -479,6 +479,9 @@ data DynFlags = DynFlags {
-- a pattern against. A safe guard
-- against exponential blow-up.
simplTickFactor :: Int, -- ^ Multiplier for simplifier ticks
+ dmdUnboxWidth :: !Int, -- ^ Whether DmdAnal should optimistically put an
+ -- Unboxed demand on returned products with at most
+ -- this number of fields
specConstrThreshold :: Maybe Int, -- ^ Threshold for SpecConstr
specConstrCount :: Maybe Int, -- ^ Max number of specialisations for any one function
specConstrRecursive :: Int, -- ^ Max number of specialisations for recursive types
@@ -1101,6 +1104,7 @@ defaultDynFlags mySettings llvmConfig =
maxUncoveredPatterns = 4,
maxPmCheckModels = 30,
simplTickFactor = 100,
+ dmdUnboxWidth = 3, -- Default: Assume an unboxed demand on function bodies returning a triple
specConstrThreshold = Just 2000,
specConstrCount = Just 3,
specConstrRecursive = 3,
@@ -2657,6 +2661,8 @@ dynamic_flags_deps = [
; return d })))
, make_ord_flag defFlag "fsimpl-tick-factor"
(intSuffix (\n d -> d { simplTickFactor = n }))
+ , make_ord_flag defFlag "fdmd-unbox-width"
+ (intSuffix (\n d -> d { dmdUnboxWidth = n }))
, make_ord_flag defFlag "fspec-constr-threshold"
(intSuffix (\n d -> d { specConstrThreshold = Just n }))
, make_ord_flag defFlag "fno-spec-constr-threshold"
diff --git a/compiler/GHC/Types/Basic.hs b/compiler/GHC/Types/Basic.hs
index db4b8f9e23..342d9d3688 100644
--- a/compiler/GHC/Types/Basic.hs
+++ b/compiler/GHC/Types/Basic.hs
@@ -503,6 +503,12 @@ instance Outputable Boxity where
ppr Boxed = text "Boxed"
ppr Unboxed = text "Unboxed"
+instance Binary Boxity where -- implemented via isBoxed-isomorphism to Bool
+ put_ bh = put_ bh . isBoxed
+ get bh = do
+ b <- get bh
+ pure $ if b then Boxed else Unboxed
+
{-
************************************************************************
* *
diff --git a/compiler/GHC/Types/Demand.hs b/compiler/GHC/Types/Demand.hs
index 96b5b21bf7..ed7ef25aa8 100644
--- a/compiler/GHC/Types/Demand.hs
+++ b/compiler/GHC/Types/Demand.hs
@@ -16,9 +16,10 @@
-- Lays out the abstract domain for "GHC.Core.Opt.DmdAnal".
module GHC.Types.Demand (
-- * Demands
+ Boxity(..),
Card(C_00, C_01, C_0N, C_10, C_11, C_1N), CardNonAbs, CardNonOnce,
Demand(AbsDmd, BotDmd, (:*)),
- SubDemand(Prod), mkProd, viewProd,
+ SubDemand(Prod, Poly), mkProd, viewProd, unboxSubDemand,
-- ** Algebra
absDmd, topDmd, botDmd, seqDmd, topSubDmd,
-- *** Least upper bound
@@ -30,7 +31,7 @@ module GHC.Types.Demand (
-- ** Predicates on @Card@inalities and @Demand@s
isAbs, isUsedOnce, isStrict,
isAbsDmd, isUsedOnceDmd, isStrUsedDmd, isStrictDmd,
- isTopDmd, isSeqDmd, isWeakDmd,
+ isTopDmd, isWeakDmd,
-- ** Special demands
evalDmd,
-- *** Demands used in PrimOp signatures
@@ -38,7 +39,6 @@ module GHC.Types.Demand (
-- ** Other @Demand@ operations
oneifyCard, oneifyDmd, strictifyDmd, strictifyDictDmd, mkWorkerDemand,
peelCallDmd, peelManyCalls, mkCalledOnceDmd, mkCalledOnceDmds,
- addCaseBndrDmd,
-- ** Extracting one-shot information
argOneShots, argsOneShots, saturatedByOneShots,
@@ -71,7 +71,7 @@ module GHC.Types.Demand (
DmdTransformer, dmdTransformSig, dmdTransformDataConSig, dmdTransformDictSelSig,
-- * Trim to a type shape
- TypeShape(..), trimToType,
+ TypeShape(..), trimToType, trimBoxity,
-- * @seq@ing stuff
seqDemand, seqDemandList, seqDmdType, seqDmdSig,
@@ -108,6 +108,260 @@ _ = pprTrace -- Tired of commenting out the import all the time
{-
************************************************************************
* *
+ Boxity: Whether the box of something is used
+* *
+************************************************************************
+-}
+
+{- Note [Strictness and Unboxing]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If an argument is used strictly by the function body, we may use use
+call-by-value instead of call-by-need for that argument. What's more, we may
+unbox an argument that is used strictly, discarding the box at the call site.
+This can reduce allocations of the program drastically if the box really isn't
+needed in the function body. Here's an example:
+```
+even :: Int -> Bool
+even (I# 0) = True
+even (I# 1) = False
+even (I# n) = even (I# (n -# 2))
+```
+All three code paths of 'even' are (a) strict in the argument, and (b)
+immediately discard the boxed 'Int'. Now if we have a call site like
+`even (I# 42)`, then it would be terrible to allocate the 'I#' box for the
+argument only to tear it apart immediately in the body of 'even'! Hence,
+worker/wrapper will allocate a wrapper for 'even' that not only uses
+call-by-value for the argument (e.g., `case I# 42 of b { $weven b }`), but also
+*unboxes* the argument, resulting in
+```
+even :: Int -> Bool
+even (I# n) = $weven n
+$weven :: Int# -> Bool
+$weven 0 = True
+$weven 1 = False
+$weven n = $weven (n -# 2)
+```
+And now the box in `even (I# 42)` will cancel away after inlining the wrapper.
+
+As far as the permission to unbox is concerned, *evaluatedness* of the argument
+is the important trait. Unboxing implies eager evaluation of an argument and
+we don't want to change the termination properties of the function. One way
+to ensure that is to unbox strict arguments only, but strictness is only a
+sufficient condition for evaluatedness.
+See Note [Unboxing evaluated arguments] in "GHC.Core.Opt.WorkWrap.Utils", where
+we manage to unbox *strict fields* of unboxed arguments that the function is not
+actually strict in, simply by realising that those fields have to be evaluated.
+
+Note [Boxity analysis]
+~~~~~~~~~~~~~~~~~~~~~~
+Alas, we don't want to unbox *every* strict argument
+(as Note [Strictness and Unboxing] might suggest).
+Here's an example (from T19871):
+```
+data Huge = H Bool Bool ... Bool
+ann :: Huge -> (Bool, Huge)
+ann h@(Huge True _ ... _) = (False, h)
+ann h = (True, h)
+```
+Unboxing 'h' yields
+```
+$wann :: Bool -> Bool -> ... -> Bool -> (Bool, Huge)
+$wann True b2 ... bn = (False, Huge True b2 ... bn)
+$wann b1 b2 ... bn = (True, Huge b1 b2 ... bn)
+```
+The pair constructor really needs its fields boxed. But '$wann' doesn't get
+passed 'h' anymore, only its components! Ergo it has to reallocate the 'Huge'
+box, in a process called "reboxing". After w/w, call sites like
+`case ... of Just h -> ann h` pay for the allocation of the additional box.
+In earlier versions of GHC we simply accepted that reboxing would sometimes
+happen, but we found some cases where it made a big difference: #19407, for
+example.
+
+We therefore perform a simple syntactic boxity analysis that piggy-backs on
+demand analysis in order to determine whether the box of a strict argument is
+always discarded in the function body, in which case we can pass it unboxed
+without risking regressions such as in 'ann' above. But as soon as one use needs
+the box, we want Boxed to win over any Unboxed uses.
+(We don't adhere to that in 'lubBoxity', see Note [lubBoxity and plusBoxity].)
+
+The demand signature (cf. Note [Demand notation]) will say whether it uses
+its arguments boxed or unboxed. Indeed it does so for every sub-component of
+the argument demand. Here's an example:
+```
+f :: (Int, Int) -> Bool
+f (a, b) = even (a + b) -- demand signature: <1!P(1!L,1!L)>
+```
+The '!' indicates places where we want to unbox, the lack thereof indicates the
+box is used by the function. Boxity flags are part of the 'Poly' and 'Prod'
+'SubDemand's, see Note [Why Boxity in SubDemand and not in Demand?].
+The given demand signature says "Unbox the pair and then nestedly unbox its
+two fields". By contrast, the demand signature of 'ann' above would look like
+<1P(1L,L,...,L)>, lacking any '!'.
+
+A demand signature like <1P(1!L)> -- Boxed outside but Unboxed in the field --
+doesn't make a lot of sense, as we can never unbox the field without unboxing
+the containing record. See Note [Finalising boxity for demand signature] in
+"GHC.Core.Opt.WorkWrap.Utils" for how we avoid to spread this and other kinds of
+misinformed boxities.
+
+Due to various practical reasons, Boxity Analysis is not conservative at times.
+Here are reasons for too much optimism:
+
+ * Note [Function body boxity and call sites] is an observation about when it is
+ beneficial to unbox a parameter that is returned from a function.
+ Note [Unboxed demand on function bodies returning small products] derives
+ a heuristic from the former Note, pretending that all call sites of a
+ function need returned small products Unboxed.
+ * Note [lubBoxity and plusBoxity] describes why we optimistically let Unboxed
+ win when combining different case alternatives.
+
+Boxity analysis fixes a number of issues:
+#19871, #19407, #4267, #16859, #18907, #13331
+
+Note [Function body boxity and call sites]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (from T5949)
+```
+f n p = case n of
+ 0 -> p :: (a, b)
+ _ -> f (n-1) p
+-- Worker/wrapper split if we decide to unbox:
+$wf n x y = case n of
+ 0 -> (# x, y #)
+ _ -> $wf (n-1) x y
+f n (x,y) = case $wf n x y of (# r, s #) -> (r,s)
+```
+When is it better to /not/ to unbox 'p'? That depends on the callers of 'f'!
+If all call sites
+
+ 1. Wouldn't need to allocate fresh boxes for 'p', and
+ 2. Needed the result pair of 'f' boxed
+
+Only then we'd see an increase in allocation resulting from unboxing. But as
+soon as only one of (1) or (2) holds, it really doesn't matter if 'f' unboxes
+'p' (and its result, it's important that CPR follows suit). For example
+```
+res = ... case f m (field t) of (r1,r2) -> ... -- (1) holds
+arg = ... [ f m (x,y) ] ... -- (2) holds
+```
+Because one of the boxes in the call site can cancel away:
+```
+res = ... case field1 t of (x1,x2) ->
+ case field2 t of (y1,y2) ->
+ case $wf x1 x2 y1 y2 of (#r1,r2#) -> ...
+arg = ... [ case $wf x1 x2 y1 y2 of (#r1,r2#) -> (r1,r2) ] ...
+```
+And when call sites neither have arg boxes (1) nor need the result boxed (2),
+then hesitating to unbox means /more/ allocation in the call site because of the
+need for fresh argument boxes.
+
+Summary: If call sites that satisfy both (1) and (2) occur more often than call
+sites that satisfy neither condition, then it's best /not/ to unbox 'p'.
+
+Note [Unboxed demand on function bodies returning small products]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Boxity analysis] achieves its biggest wins when we avoid reboxing huge
+records. But when we return small products from a function, we often get faster
+programs by pretending that the caller unboxes the result. Long version:
+
+Observation: Big record arguments (e.g., DynFlags) tend to be modified much less
+ frequently than small records (e.g., Int).
+Result: Big records tend to be passed around boxed (unmodified) much more
+ frequently than small records.
+Consequnce: The larger the record, the more likely conditions (1) and (2) from
+ Note [Function body boxity and call sites] are met, in which case
+ unboxing returned parameters leads to reboxing.
+
+So we put an Unboxed demand on function bodies returning small products and a
+Boxed demand on the others. What is regarded a small product is controlled by
+the -fdmd-unbox-width flag.
+
+This also manages to unbox functions like
+```
+sum z [] = z
+sum (I# n) ((I# x):xs) = sum (I# (n +# x)) xs
+```
+where we can unbox 'z' on the grounds that it's but a small box anyway. That in
+turn means that the I# allocation in the recursive call site can cancel away and
+we get a non-allocating loop, nice and tight.
+Note that this is the typical case in "Observation" above: A small box is
+unboxed, modified, the result reboxed for the recursive call.
+
+Originally, this came up in binary-trees' check' function and #4267 which
+(similarly) features a strict fold over a tree. We'd also regress in join004 and
+join007 if we didn't assume an optimistic Unboxed demand on the function body.
+T17932 features a (non-recursive) function that returns a large record, e.g.,
+```
+flags (Options f x) = <huge> `seq` f
+```
+and here we won't unbox 'f' because it has 5 fields (which is larger than the
+default -fdmd-unbox-width threshold).
+
+Why not focus on putting Unboxed demands on all recursive function?
+Then we'd unbox
+```
+flags 0 (Options f x) = <huge> `seq` f
+flags n o = flags (n-1) o
+```
+and that seems hardly useful.
+(NB: Similar to 'f' from Note [Preserving Boxity of results is rarely a win],
+but there we only had 2 fields.)
+
+Note [lubBoxity and plusBoxity]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Should 'Boxed' win in 'lubBoxity' and 'plusBoxity'?
+The first intuition is Yes, because that would be the conservative choice:
+Responding 'Boxed' when there's the slightest chance we might need the box means
+we'll never need to rebox a value.
+
+For 'plusBoxity' the choice of 'boxedWins' is clear: When we need a value to be
+Boxed and Unboxed /in the same trace/, then we clearly need it to be Boxed.
+
+But if we chose 'boxedWins' for 'lubBoxity', we'd regress T3586. Smaller example
+```
+sumIO :: Int -> Int -> IO Int
+sumIO 0 !z = return z
+sumIO n !z = sumIO (n-1) (z+n)
+```
+We really want 'z' to unbox here. Yet its use in the returned unboxed pair
+is fundamentally a Boxed one! CPR would manage to unbox it, but DmdAnal runs
+before that. There is an Unboxed use in the recursive call to 'go' though.
+So we choose 'unboxedWins' for 'lubBoxity' to collect this win.
+
+Choosing 'unboxedWins' is not conservative. There clearly is ample room for
+examples that get worse by our choice. Here's a simple one (from T19871):
+```
+data Huge = H { f1 :: Bool, ... many fields ... }
+update :: Huge -> (Bool, Huge)
+update h@(Huge{f1=True}) = (False, h{f1=False})
+update h = (True, h)
+```
+Here, we decide to unbox 'h' because it's used Unboxed in the first branch.
+
+Note that this is fundamentally working around a phase problem, namely that the
+results of boxity analysis depend on CPR analysis (and vice versa, of course).
+-}
+
+boxedWins :: Boxity -> Boxity -> Boxity
+boxedWins Unboxed Unboxed = Unboxed
+boxedWins _ !_ = Boxed
+
+unboxedWins :: Boxity -> Boxity -> Boxity
+unboxedWins Boxed Boxed = Boxed
+unboxedWins _ !_ = Unboxed
+
+lubBoxity :: Boxity -> Boxity -> Boxity
+-- See Note [Boxity analysis] for the lattice.
+-- See Note [lubBoxity and plusBoxity].
+lubBoxity = unboxedWins
+
+plusBoxity :: Boxity -> Boxity -> Boxity
+-- See Note [lubBoxity and plusBoxity].
+plusBoxity = boxedWins
+
+{-
+************************************************************************
+* *
Card: Combining Strictness and Usage
* *
************************************************************************
@@ -406,20 +660,26 @@ pattern n :* sd <- (viewDmdPair -> (n, sd)) where
--
-- See Note [Call demands are relative]
-- and Note [Demand notation].
+-- See also Note [Why Boxity in SubDemand and not in Demand?].
data SubDemand
- = Poly !CardNonOnce
+ = Poly !Boxity !CardNonOnce
-- ^ Polymorphic demand, the denoted thing is evaluated arbitrarily deep,
- -- with the specified cardinality at every level.
+ -- with the specified cardinality at every level. The 'Boxity' applies only
+ -- to the outer evaluation context; inner evaluation context can be regarded
+ -- as 'Boxed'. See Note [Boxity in Poly] for why we want it to carry 'Boxity'.
-- Expands to 'Call' via 'viewCall' and to 'Prod' via 'viewProd'.
--
- -- @Poly n@ is semantically equivalent to @Prod [n :* Poly n, ...]@ or
- -- @Call n (Poly n)@. 'mkCall' and 'mkProd' do these rewrites.
+ -- @Poly b n@ is semantically equivalent to @Prod b [n :* Poly Boxed n, ...]@
+ -- or @Call n (Poly Boxed n)@. 'viewCall' and 'viewProd' do these rewrites.
--
- -- In Note [Demand notation]: @L === P(L,L,...)@ and @L === CL(L)@,
- -- @B === P(B,B,...)@ and @B === CB(B)@, and so on.
+ -- In Note [Demand notation]: @L === P(L,L,...)@ and @L === CL(L)@,
+ -- @B === P(B,B,...)@ and @B === CB(B)@,
+ -- @!A === !P(A,A,...)@ and @!A === !CA(A)@,
+ -- and so on.
--
- -- We only really use 'Poly' with 'C_10' (B), 'C_00' (A), 'C_0N' (L) and
- -- sometimes 'C_1N' (S), hence 'CardNonOnce'.
+ -- We'll only see 'Poly' with 'C_10' (B), 'C_00' (A), 'C_0N' (L) and sometimes
+ -- 'C_1N' (S) through 'plusSubDmd', never 'C_01' (M) or 'C_11' (1) (grep the
+ -- source code). Hence 'CardNonOnce', which is closed under 'lub' and 'plus'.
| Call !CardNonAbs !SubDemand
-- ^ @Call n sd@ describes the evaluation context of @n@ function
-- applications, where every individual result is evaluated according to @sd@.
@@ -429,53 +689,68 @@ data SubDemand
-- expressed with 'Poly'.
-- Used only for values of function type. Use the smart constructor 'mkCall'
-- whenever possible!
- | Prod ![Demand]
- -- ^ @Prod ds@ describes the evaluation context of a case scrutinisation
+ | Prod !Boxity ![Demand]
+ -- ^ @Prod b ds@ describes the evaluation context of a case scrutinisation
-- on an expression of product type, where the product components are
- -- evaluated according to @ds@.
- deriving Eq
+ -- evaluated according to @ds@. The 'Boxity' @b@ says whether or not the box
+ -- of the product was used.
+
+-- | We have to respect Poly rewrites through 'viewCall' and 'viewProd'.
+instance Eq SubDemand where
+ d1 == d2 = case d1 of
+ Prod b1 ds1
+ | Just (b2, ds2) <- viewProd (length ds1) d2 -> b1 == b2 && ds1 == ds2
+ Call n1 sd1
+ | Just (n2, sd2) <- viewCall d2 -> n1 == n2 && sd1 == sd2
+ Poly b1 n1
+ | Poly b2 n2 <- d2 -> b1 == b2 && n1 == n2
+ _ -> False
-poly00, poly0N, poly1N, poly10 :: SubDemand
topSubDmd, botSubDmd, seqSubDmd :: SubDemand
-poly00 = Poly C_00
-poly0N = Poly C_0N
-poly1N = Poly C_1N
-poly10 = Poly C_10
-topSubDmd = poly0N
-botSubDmd = poly10
-seqSubDmd = poly00
-
-polyDmd :: CardNonOnce -> Demand
-polyDmd C_00 = AbsDmd
-polyDmd C_10 = BotDmd
-polyDmd C_0N = C_0N :* poly0N
-polyDmd C_1N = C_1N :* poly1N
-polyDmd c = pprPanic "non-once Card" (ppr c)
+topSubDmd = Poly Boxed C_0N
+botSubDmd = Poly Unboxed C_10
+seqSubDmd = Poly Unboxed C_00
+
+-- | The uniform field demand when viewing a 'Poly' as a 'Prod', as in
+-- 'viewProd'.
+polyFieldDmd :: CardNonOnce -> Demand
+polyFieldDmd C_00 = AbsDmd
+polyFieldDmd C_10 = BotDmd
+polyFieldDmd C_0N = topDmd
+polyFieldDmd n = C_1N :* Poly Boxed C_1N & assertPpr (isCardNonOnce n) (ppr n)
-- | A smart constructor for 'Prod', applying rewrite rules along the semantic
--- equality @Prod [polyDmd n, ...] === polyDmd n@, simplifying to 'Poly'
--- 'SubDemand's when possible. Note that this degrades boxity information! E.g. a
--- polymorphic demand will never unbox.
-mkProd :: [Demand] -> SubDemand
-mkProd [] = seqSubDmd
--- We only want to simplify absent and bottom demands and unbox the others.
--- See also Note [L should win] and Note [Don't optimise LP(L,L,...) to L].
-mkProd ds
- | all (== AbsDmd) ds = seqSubDmd
- | all (== BotDmd) ds = botSubDmd
- | otherwise = Prod ds
+-- equality @Prod b [n :* Poly Boxed n, ...] === Poly b n@, simplifying to
+-- 'Poly' 'SubDemand's when possible. Examples:
+--
+-- * Rewrites @P(L,L)@ (e.g., arguments @Boxed@, @[L,L]@) to @L@
+-- * Rewrites @!P(L,L)@ (e.g., arguments @Unboxed@, @[L,L]@) to @!L@
+-- * Does not rewrite @P(1L)@, @P(L!L)@ or @P(L,A)@
+--
+mkProd :: Boxity -> [Demand] -> SubDemand
+mkProd b ds
+ | all (== AbsDmd) ds = Poly b C_00
+ | all (== BotDmd) ds = Poly b C_10
+ | dmd@(n :* Poly Boxed m):_ <- ds -- don't rewrite P(L!L)
+ , n == m -- don't rewrite P(1L)
+ , all (== dmd) ds -- don't rewrite P(L,A)
+ = Poly b n
+ | otherwise = Prod b ds
-- | @viewProd n sd@ interprets @sd@ as a 'Prod' of arity @n@, expanding 'Poly'
-- demands as necessary.
-viewProd :: Arity -> SubDemand -> Maybe [Demand]
+viewProd :: Arity -> SubDemand -> Maybe (Boxity, [Demand])
-- It's quite important that this function is optimised well;
--- it is used by lubSubDmd and plusSubDmd. Note the strict
--- application to 'polyDmd':
-viewProd n (Prod ds) | ds `lengthIs` n = Just ds
+-- it is used by lubSubDmd and plusSubDmd.
+viewProd n (Prod b ds)
+ | ds `lengthIs` n = Just (b, ds)
-- Note the strict application to replicate: This makes sure we don't allocate
-- a thunk for it, inlines it and lets case-of-case fire at call sites.
-viewProd n (Poly card) = Just $! (replicate n $! polyDmd card)
-viewProd _ _ = Nothing
+viewProd n (Poly b card)
+ | let !ds = replicate n $! polyFieldDmd card
+ = Just (b, ds)
+viewProd _ _
+ = Nothing
{-# INLINE viewProd #-} -- we want to fuse away the replicate and the allocation
-- for Arity. Otherwise, #18304 bites us.
@@ -483,38 +758,54 @@ viewProd _ _ = Nothing
-- equality @Call n (Poly n) === Poly n@, simplifying to 'Poly' 'SubDemand's
-- when possible.
mkCall :: CardNonAbs -> SubDemand -> SubDemand
-mkCall C_1N sd@(Poly C_1N) = sd
-mkCall C_0N sd@(Poly C_0N) = sd
-mkCall n cd = assertPpr (isCardNonAbs n) (ppr n $$ ppr cd) $
- Call n cd
+mkCall C_1N sd@(Poly Boxed C_1N) = sd
+mkCall C_0N sd@(Poly Boxed C_0N) = sd
+mkCall n cd = assertPpr (isCardNonAbs n) (ppr n $$ ppr cd) $
+ Call n cd
+
+-- | @viewCall sd@ interprets @sd@ as a 'Call', expanding 'Poly' subdemands as
+-- necessary.
+viewCall :: SubDemand -> Maybe (Card, SubDemand)
+viewCall (Call n sd) = Just (n :: Card, sd)
+viewCall (Poly _ n) = Just (n :: Card, Poly Boxed n)
+viewCall _ = Nothing
topDmd, absDmd, botDmd, seqDmd :: Demand
-topDmd = polyDmd C_0N
+topDmd = C_0N :* topSubDmd
absDmd = AbsDmd
botDmd = BotDmd
seqDmd = C_11 :* seqSubDmd
+-- | Sets 'Boxity' to 'Unboxed' for non-'Call' sub-demands.
+unboxSubDemand :: SubDemand -> SubDemand
+unboxSubDemand (Poly _ n) = Poly Unboxed n
+unboxSubDemand (Prod _ ds) = mkProd Unboxed ds
+unboxSubDemand sd@Call{} = sd
+
-- | Denotes '∪' on 'SubDemand'.
lubSubDmd :: SubDemand -> SubDemand -> SubDemand
-- Handle botSubDmd (just an optimisation, the general case would do the same)
-lubSubDmd (Poly C_10) d2 = d2
-lubSubDmd d1 (Poly C_10) = d1
+lubSubDmd (Poly Unboxed C_10) d2 = d2
+lubSubDmd d1 (Poly Unboxed C_10) = d1
-- Handle Prod
-lubSubDmd (Prod ds1) (Poly n2) = Prod $ strictMap (lubDmd (polyDmd n2)) ds1
-lubSubDmd (Prod ds1) (Prod ds2)
- | equalLength ds1 ds2 = Prod $ strictZipWith lubDmd ds1 ds2
+lubSubDmd (Prod b1 ds1) (Poly b2 n2)
+ | let !d = polyFieldDmd n2
+ = mkProd (lubBoxity b1 b2) (strictMap (lubDmd d) ds1)
+lubSubDmd (Prod b1 ds1) (Prod b2 ds2)
+ | equalLength ds1 ds2
+ = mkProd (lubBoxity b1 b2) (strictZipWith lubDmd ds1 ds2)
-- Handle Call
-lubSubDmd (Call n1 sd1) (Poly n2)
+lubSubDmd (Call n1 sd1) sd2@(Poly _ n2)
-- See Note [Call demands are relative]
| isAbs n2 = mkCall (lubCard n2 n1) sd1
- | otherwise = mkCall (lubCard n2 n1) (lubSubDmd sd1 (Poly n2))
+ | otherwise = mkCall (lubCard n2 n1) (lubSubDmd sd1 sd2)
lubSubDmd (Call n1 d1) (Call n2 d2)
| otherwise = mkCall (lubCard n1 n2) (lubSubDmd d1 d2)
-- Handle Poly. Exploit reflexivity (so we'll match the Prod or Call cases again).
-lubSubDmd (Poly n1) (Poly n2) = Poly (lubCard n1 n2)
-lubSubDmd sd1@Poly{} sd2 = lubSubDmd sd2 sd1
+lubSubDmd (Poly b1 n1) (Poly b2 n2) = Poly (lubBoxity b1 b2) (lubCard n1 n2)
+lubSubDmd sd1@Poly{} sd2 = lubSubDmd sd2 sd1
-- Otherwise (Call `lub` Prod) return Top
-lubSubDmd _ _ = topSubDmd
+lubSubDmd _ _ = topSubDmd
-- | Denotes '∪' on 'Demand'.
lubDmd :: Demand -> Demand -> Demand
@@ -523,24 +814,27 @@ lubDmd (n1 :* sd1) (n2 :* sd2) = lubCard n1 n2 :* lubSubDmd sd1 sd2
-- | Denotes '+' on 'SubDemand'.
plusSubDmd :: SubDemand -> SubDemand -> SubDemand
-- Handle seqSubDmd (just an optimisation, the general case would do the same)
-plusSubDmd (Poly C_00) d2 = d2
-plusSubDmd d1 (Poly C_00) = d1
+plusSubDmd (Poly Unboxed C_00) d2 = d2
+plusSubDmd d1 (Poly Unboxed C_00) = d1
-- Handle Prod
-plusSubDmd (Prod ds1) (Poly n2) = Prod $ strictMap (plusDmd (polyDmd n2)) ds1
-plusSubDmd (Prod ds1) (Prod ds2)
- | equalLength ds1 ds2 = Prod $ strictZipWith plusDmd ds1 ds2
+plusSubDmd (Prod b1 ds1) (Poly b2 n2)
+ | let !d = polyFieldDmd n2
+ = mkProd (plusBoxity b1 b2) (strictMap (plusDmd d) ds1)
+plusSubDmd (Prod b1 ds1) (Prod b2 ds2)
+ | equalLength ds1 ds2
+ = mkProd (plusBoxity b1 b2) (strictZipWith plusDmd ds1 ds2)
-- Handle Call
-plusSubDmd (Call n1 d1) (Poly n2)
+plusSubDmd (Call n1 sd1) sd2@(Poly _ n2)
-- See Note [Call demands are relative]
- | isAbs n2 = mkCall (plusCard n2 n1) d1
- | otherwise = mkCall (plusCard n2 n1) (lubSubDmd d1 (Poly n2))
-plusSubDmd (Call n1 d1) (Call n2 d2)
- | otherwise = mkCall (plusCard n1 n2) (lubSubDmd d1 d2)
--- Handle Poly. Exploit (so we'll match the Prod or Call cases again).
-plusSubDmd (Poly n1) (Poly n2) = Poly (plusCard n1 n2)
-plusSubDmd sd1@Poly{} sd2 = plusSubDmd sd2 sd1
+ | isAbs n2 = mkCall (plusCard n2 n1) sd1
+ | otherwise = mkCall (plusCard n2 n1) (lubSubDmd sd1 sd2)
+plusSubDmd (Call n1 sd1) (Call n2 sd2)
+ | otherwise = mkCall (plusCard n1 n2) (lubSubDmd sd1 sd2)
+-- Handle Poly. Exploit reflexivity (so we'll match the Prod or Call cases again).
+plusSubDmd (Poly b1 n1) (Poly b2 n2) = Poly (plusBoxity b1 b2) (plusCard n1 n2)
+plusSubDmd sd1@Poly{} sd2 = plusSubDmd sd2 sd1
-- Otherwise (Call `lub` Prod) return Top
-plusSubDmd _ _ = topSubDmd
+plusSubDmd _ _ = topSubDmd
-- | Denotes '+' on 'Demand'.
plusDmd :: Demand -> Demand -> Demand
@@ -549,11 +843,11 @@ plusDmd (n1 :* sd1) (n2 :* sd2) = plusCard n1 n2 :* plusSubDmd sd1 sd2
multSubDmd :: Card -> SubDemand -> SubDemand
multSubDmd C_11 sd = sd
multSubDmd C_00 _ = seqSubDmd
-multSubDmd C_10 (Poly n) = if isStrict n then botSubDmd else seqSubDmd
+multSubDmd C_10 (Poly _ n) = if isStrict n then botSubDmd else seqSubDmd
multSubDmd C_10 (Call n _) = if isStrict n then botSubDmd else seqSubDmd
-multSubDmd n (Poly m) = Poly (multCard n m)
+multSubDmd n (Poly b m) = Poly b (multCard n m)
multSubDmd n (Call n' sd) = mkCall (multCard n n') sd -- See Note [Call demands are relative]
-multSubDmd n (Prod ds) = Prod (strictMap (multDmd n) ds)
+multSubDmd n (Prod b ds) = mkProd b (strictMap (multDmd n) ds)
multDmd :: Card -> Demand -> Demand
-- The first two lines compute the same result as the last line, but won't
@@ -578,11 +872,6 @@ isStrictDmd (n :* _) = isStrict n
isStrUsedDmd :: Demand -> Bool
isStrUsedDmd (n :* _) = isStrict n && not (isAbs n)
-isSeqDmd :: Demand -> Bool
-isSeqDmd (C_11 :* sd) = sd == seqSubDmd
-isSeqDmd (C_1N :* sd) = sd == seqSubDmd
-isSeqDmd _ = False
-
-- | Is the value used at most once?
isUsedOnceDmd :: Demand -> Bool
isUsedOnceDmd (n :* _) = isUsedOnce n
@@ -602,9 +891,9 @@ isWeakDmd dmd@(n :* _) = not (isStrict n) && is_plus_idem_dmd dmd
is_plus_idem_dmd BotDmd = True
is_plus_idem_dmd (n :* sd) = is_plus_idem_card n && is_plus_idem_sub_dmd sd
-- is_plus_idem_sub_dmd sd = plusSubDmd sd sd == sd
- is_plus_idem_sub_dmd (Poly n) = assert (isCardNonOnce n) True
- is_plus_idem_sub_dmd (Prod ds) = all is_plus_idem_dmd ds
- is_plus_idem_sub_dmd (Call n _) = is_plus_idem_card n -- See Note [Call demands are relative]
+ is_plus_idem_sub_dmd (Poly _ n) = assert (isCardNonOnce n) True
+ is_plus_idem_sub_dmd (Prod _ ds) = all is_plus_idem_dmd ds
+ is_plus_idem_sub_dmd (Call n _) = is_plus_idem_card n -- See Note [Call demands are relative]
evalDmd :: Demand
evalDmd = C_1N :* topSubDmd
@@ -637,7 +926,7 @@ oneifyDmd (n :* sd) = oneifyCard n :* sd
-- | Make a 'Demand' evaluated at-least-once (e.g. strict).
strictifyDmd :: Demand -> Demand
-strictifyDmd AbsDmd = BotDmd
+strictifyDmd AbsDmd = seqDmd
strictifyDmd BotDmd = BotDmd
strictifyDmd (n :* sd) = plusCard C_10 n :* sd
@@ -646,17 +935,11 @@ strictifyDmd (n :* sd) = plusCard C_10 n :* sd
-- strictify the argument's contained used non-newtype superclass dictionaries.
-- We use the demand as our recursive measure to guarantee termination.
strictifyDictDmd :: Type -> Demand -> Demand
-strictifyDictDmd ty (n :* Prod ds)
+strictifyDictDmd ty (n :* Prod b ds)
| not (isAbs n)
, Just field_tys <- as_non_newtype_dict ty
- = C_1N :* -- main idea: ensure it's strict
- if all (not . isAbsDmd) ds
- then topSubDmd -- abstract to strict w/ arbitrary component use,
- -- since this smells like reboxing; results in CBV
- -- boxed
- --
- -- TODO revisit this if we ever do boxity analysis
- else Prod (zipWith strictifyDictDmd field_tys ds)
+ = C_1N :* mkProd b (zipWith strictifyDictDmd field_tys ds)
+ -- main idea: ensure it's strict
where
-- | Return a TyCon and a list of field types if the given
-- type is a non-newtype dictionary type
@@ -678,13 +961,6 @@ mkCalledOnceDmd sd = mkCall C_11 sd
mkCalledOnceDmds :: Arity -> SubDemand -> SubDemand
mkCalledOnceDmds arity sd = iterate mkCalledOnceDmd sd !! arity
--- | @viewCall sd@ interprets @sd@ as a 'Call', expanding 'TopSubDmd' and
--- 'SeqSubDmd' as necessary.
-viewCall :: SubDemand -> Maybe (Card, SubDemand)
-viewCall (Call n sd) = Just (n :: Card, sd)
-viewCall (Poly n) = Just (n :: Card, Poly n)
-viewCall _ = Nothing
-
-- | Peels one call level from the sub-demand, and also returns how many
-- times we entered the lambda body.
peelCallDmd :: SubDemand -> (Card, SubDemand)
@@ -706,15 +982,6 @@ mkWorkerDemand n = C_01 :* go n
where go 0 = topSubDmd
go n = Call C_01 $ go (n-1)
--- | Precondition: The SubDemand is not a Call
-addCaseBndrDmd :: SubDemand -- On the case binder
- -> [Demand] -- On the components of the constructor
- -> [Demand] -- Final demands for the components of the constructor
-addCaseBndrDmd sd alt_dmds
- -- See Note [Demand on case-alternative binders]
- | Prod ds <- plusSubDmd sd (Prod alt_dmds) = ds
- | otherwise = alt_dmds
-
argsOneShots :: DmdSig -> Arity -> [[OneShotInfo]]
-- ^ See Note [Computing one-shot info]
argsOneShots (DmdSig (DmdType _ arg_ds _)) n_val_args
@@ -810,77 +1077,6 @@ is hurt and we can assume that the nested demand is 'botSubDmd'. That ensures
that @g@ above actually gets the @1P(L)@ demand on its second pair component,
rather than the lazy @MP(L)@ if we 'lub'bed with an absent demand.
-Note [Demand on case-alternative binders]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The demand on a binder in a case alternative comes
- (a) From the demand on the binder itself
- (b) From the demand on the case binder
-Forgetting (b) led directly to #10148.
-
-Example. Source code:
- f x@(p,_) = if p then foo x else True
-
- foo (p,True) = True
- foo (p,q) = foo (q,p)
-
-After strictness analysis:
- f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) ->
- case x_an1
- of wild_X7 [Dmd=MP(ML,ML)]
- { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) ->
- case p_an2 of _ {
- False -> GHC.Types.True;
- True -> foo wild_X7 }
-
-It's true that ds_dnz is *itself* absent, but the use of wild_X7 means
-that it is very much alive and demanded. See #10148 for how the
-consequences play out.
-
-This is needed even for non-product types, in case the case-binder
-is used but the components of the case alternative are not.
-
-Note [Don't optimise LP(L,L,...) to L]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-These two SubDemands:
- LP(L,L) (@Prod [topDmd, topDmd]@) and L (@topSubDmd@)
-are semantically equivalent, but we do not turn the former into
-the latter, for a regrettable-subtle reason. Consider
- f p1@(x,y) = (y,x)
- g h p2@(_,_) = h p
-We want to unbox @p1@ of @f@, but not @p2@ of @g@, because @g@ only uses
-@p2@ boxed and we'd have to rebox. So we give @p1@ demand LP(L,L) and @p2@
-demand @L@ to inform 'GHC.Core.Opt.WorkWrap.Utils.wantToUnboxArg', which will
-say "unbox" for @p1@ and "don't unbox" for @p2@.
-
-So the solution is: don't aggressively collapse @Prod [topDmd, topDmd]@ to
-@topSubDmd@; instead leave it as-is. In effect we are using the UseDmd to do a
-little bit of boxity analysis. Not very nice.
-
-Note [L should win]
-~~~~~~~~~~~~~~~~~~~
-Both in 'lubSubDmd' and 'plusSubDmd' we want @L `plusSubDmd` LP(..))@ to be @L@.
-Why? Because U carries the implication the whole thing is used, box and all,
-so we don't want to w/w it, cf. Note [Don't optimise LP(L,L,...) to L].
-If we use it both boxed and unboxed, then we are definitely using the box,
-and so we are quite likely to pay a reboxing cost. So we make U win here.
-TODO: Investigate why since 2013, we don't.
-
-Example is in the Buffer argument of GHC.IO.Handle.Internals.writeCharBuffer
-
-Baseline: (A) Not making Used win (LP(..) wins)
-Compare with: (B) making Used win for lub and both
-
- Min -0.3% -5.6% -10.7% -11.0% -33.3%
- Max +0.3% +45.6% +11.5% +11.5% +6.9%
- Geometric Mean -0.0% +0.5% +0.3% +0.2% -0.8%
-
-Baseline: (B) Making L win for both lub and both
-Compare with: (C) making L win for plus, but LP(..) win for lub
-
- Min -0.1% -0.3% -7.9% -8.0% -6.5%
- Max +0.1% +1.0% +21.0% +21.0% +0.5%
- Geometric Mean +0.0% +0.0% -0.0% -0.1% -0.1%
-
Note [Computing one-shot info]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider a call
@@ -892,6 +1088,57 @@ Then argsOneShots returns a [[OneShotInfo]] of
The occurrence analyser propagates this one-shot infor to the
binders \pqr and \xyz;
see Note [Use one-shot information] in "GHC.Core.Opt.OccurAnal".
+
+Note [Boxity in Poly]
+~~~~~~~~~~~~~~~~~~~~~
+To support Note [Boxity analysis], it makes sense that 'Prod' carries a
+'Boxity'. But why does 'Poly' have to carry a 'Boxity', too? Shouldn't all
+'Poly's be 'Boxed'? Couldn't we simply use 'Prod Unboxed' when we need to
+express an unboxing demand?
+
+'botSubDmd' (B) needs to be the bottom of the lattice, so it needs to be an
+Unboxed demand. Similarly, 'seqSubDmd' (A) is an Unboxed demand.
+So why not say that Polys with absent cardinalities have Unboxed boxity?
+That doesn't work, because we also need the boxed equivalents. Here's an example
+for A (function 'absent' in T19871):
+```
+f _ True = 1
+f a False = a `seq` 2
+ -- demand on a: MA, the A is short for `Poly Boxed C_00`
+
+g a = a `seq` f a True
+ -- demand on a: SA, which is `Poly Boxed C_00`
+
+h True p = g p -- SA on p (inherited from g)
+h False p@(x,y) = x+y -- S!P(1!L,1!L) on p
+```
+(Caveat: Since Unboxed wins in lubBoxity, we'll unbox here anyway.)
+If A is treated as Unboxed, we get reboxing in the call site to 'g'.
+So we obviously would need a Boxed variant of A. Rather than introducing a lot
+of special cases, we just carry the Boxity in 'Poly'. Plus, we could most likely
+find examples like the above for any other cardinality.
+
+Note [Why Boxity in SubDemand and not in Demand?]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In #19871, we started out by storing 'Boxity' in 'SubDemand', in the 'Prod'
+constructor only. But then we found that we weren't able to express the unboxing
+'seqSubDmd', because that one really is a `Poly C_00` sub-demand.
+We then tried to store the Boxity in 'Demand' instead, for these reasons:
+
+ 1. The whole boxity-of-seq business comes to a satisfying conclusion
+ 2. Putting Boxity in the SubDemand is weird to begin with, because it
+ describes the box and not its fields, just as the evaluation cardinality
+ of a Demand describes how often the box is used. It makes more sense that
+ Card and Boxity travel together. Also the alternative would have been to
+ store Boxity with Poly, which is even weirder and more redundant.
+
+But then we regressed in T7837 (grep #19871 for boring specifics), which needed
+to transfer an ambient unboxed *demand* on a dictionary selector to its argument
+dictionary, via a 'Call' sub-demand `C1(sd)`, as
+Note [Demand transformer for a dictionary selector] explains. Annoyingly,
+the boxity info has to be stored in the *sub-demand* `sd`! There's no demand
+to store the boxity in. So we bit the bullet and now we store Boxity in
+'SubDemand', both in 'Prod' *and* 'Poly'. See also Note [Boxity in Poly].
-}
{- *********************************************************************
@@ -1289,6 +1536,9 @@ plusDmdType (DmdType fv1 ds1 r1) (fv2, t2)
-- See Note [Asymmetry of 'plus*']
-- 'plus' takes the argument/result info from its *first* arg,
-- using its second arg just for its free-var info.
+ | isEmptyVarEnv fv2, defaultFvDmd t2 == absDmd
+ = DmdType fv1 ds1 (r1 `plusDivergence` t2) -- a very common case that is much more efficient
+ | otherwise
= DmdType (plusVarEnv_CD plusDmd fv1 (defaultFvDmd r1) fv2 (defaultFvDmd t2))
ds1
(r1 `plusDivergence` t2)
@@ -1566,10 +1816,10 @@ newtype DmdSig
deriving Eq
-- | Turns a 'DmdType' computed for the particular 'Arity' into a 'DmdSig'
--- unleashable at that arity. See Note [Understanding DmdType and DmdSig]
+-- unleashable at that arity. See Note [Understanding DmdType and DmdSig].
mkDmdSigForArity :: Arity -> DmdType -> DmdSig
mkDmdSigForArity arity dmd_ty@(DmdType fvs args div)
- | arity < dmdTypeDepth dmd_ty = DmdSig (DmdType fvs (take arity args) div)
+ | arity < dmdTypeDepth dmd_ty = DmdSig $ DmdType fvs (take arity args) div
| otherwise = DmdSig (etaExpandDmdType arity dmd_ty)
mkClosedDmdSig :: [Demand] -> Divergence -> DmdSig
@@ -1671,26 +1921,28 @@ dmdTransformDataConSig arity sd = case go arity sd of
Just dmds -> DmdType emptyDmdEnv dmds topDiv
Nothing -> nopDmdType -- Not saturated
where
- go 0 sd = viewProd arity sd
+ go 0 sd = snd <$> viewProd arity sd
go n (Call C_11 sd) = go (n-1) sd -- strict calls only!
go _ _ = Nothing
-- | A special 'DmdTransformer' for dictionary selectors that feeds the demand
-- on the result into the indicated dictionary component (if saturated).
+-- See Note [Demand transformer for a dictionary selector].
dmdTransformDictSelSig :: DmdSig -> DmdTransformer
--- NB: This currently doesn't handle newtype dictionaries and it's unclear how
--- it could without additional parameters.
-dmdTransformDictSelSig (DmdSig (DmdType _ [(_ :* sig_sd)] _)) call_sd
+-- NB: This currently doesn't handle newtype dictionaries.
+-- It should simply apply call_sd directly to the dictionary, I suppose.
+dmdTransformDictSelSig (DmdSig (DmdType _ [_ :* prod] _)) call_sd
| (n, sd') <- peelCallDmd call_sd
- , Prod sig_ds <- sig_sd
+ , Prod _ sig_ds <- prod
= multDmdType n $
- DmdType emptyDmdEnv [C_11 :* Prod (map (enhance sd') sig_ds)] topDiv
+ DmdType emptyDmdEnv [C_11 :* mkProd Unboxed (map (enhance sd') sig_ds)] topDiv
| otherwise
= nopDmdType -- See Note [Demand transformer for a dictionary selector]
where
- enhance sd old | isAbsDmd old = old
- | otherwise = C_11 :* sd -- This is the one!
-
+ enhance _ AbsDmd = AbsDmd
+ enhance _ BotDmd = BotDmd
+ enhance sd _dmd_var = C_11 :* sd -- This is the one!
+ -- C_11, because we multiply with n above
dmdTransformDictSelSig sig sd = pprPanic "dmdTransformDictSelSig: no args" (ppr sig $$ ppr sd)
{-
@@ -1751,16 +2003,49 @@ is computed by calling @peelManyCalls n@, which corresponds to α above.
Note [Demand transformer for a dictionary selector]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have a superclass selector 'sc_sel' and a class method
+selector 'op_sel', and a function that uses both, like this
+
+-- Strictness sig: 1P(1,A)
+sc_sel (x,y) = x
+
+-- Strictness sig: 1P(A,1)
+op_sel (p,q)= q
+
+f d v = op_sel (sc_sel d) v
+
+What do we learn about the demand on 'd'? Alas, we see only the
+demand from 'sc_sel', namely '1P(1,A)'. We /don't/ see that 'd' really has a nested
+demand '1P(1P(A,1C1(1)),A)'. On the other hand, if we inlined the two selectors
+we'd have
+
+f d x = case d of (x,_) ->
+ case x of (_,q) ->
+ q v
+
+If we analyse that, we'll get a richer, nested demand on 'd'.
+
+We want to behave /as if/ we'd inlined 'op_sel' and 'sc_sel'. We can do this
+easily by building a richer demand transformer for dictionary selectors than
+is expressible by a regular demand signature.
+And that is what 'dmdTransformDictSelSig' does: it transforms the demand on the
+result to a demand on the (single) argument.
+
+How does it do that?
If we evaluate (op dict-expr) under demand 'd', then we can push the demand 'd'
into the appropriate field of the dictionary. What *is* the appropriate field?
We just look at the strictness signature of the class op, which will be
-something like: P(AAA1AAAAA). Then replace the '1' by the demand 'd'.
+something like: P(AAA1AAAAA). Then replace the '1' (or any other non-absent
+demand, really) by the demand 'd'. The '1' acts as if it was a demand variable,
+the whole signature really means `\d. P(AAAdAAAAA)` for any incoming
+demand 'd'.
For single-method classes, which are represented by newtypes the signature
of 'op' won't look like P(...), so matching on Prod will fail.
That's fine: if we are doing strictness analysis we are also doing inlining,
so we'll have inlined 'op' into a cast. So we can bale out in a conservative
-way, returning nopDmdType.
+way, returning nopDmdType. SG: Although we then probably want to apply the eval
+demand 'd' directly to 'op' rather than turning it into 'topSubDmd'...
It is (just.. #8329) possible to be running strictness analysis *without*
having inlined class ops from single-method classes. Suppose you are using
@@ -1818,10 +2103,10 @@ kill_usage kfs (n :* sd) = kill_usage_card kfs n :* kill_usage_sd kfs sd
kill_usage_sd :: KillFlags -> SubDemand -> SubDemand
kill_usage_sd kfs (Call n sd)
- | kf_called_once kfs = mkCall (lubCard C_1N n) (kill_usage_sd kfs sd)
- | otherwise = mkCall n (kill_usage_sd kfs sd)
-kill_usage_sd kfs (Prod ds) = Prod (map (kill_usage kfs) ds)
-kill_usage_sd _ sd = sd
+ | kf_called_once kfs = mkCall (lubCard C_1N n) (kill_usage_sd kfs sd)
+ | otherwise = mkCall n (kill_usage_sd kfs sd)
+kill_usage_sd kfs (Prod b ds) = mkProd b (map (kill_usage kfs) ds)
+kill_usage_sd _ sd = sd
{- *********************************************************************
* *
@@ -1843,12 +2128,22 @@ trimToType BotDmd _ = BotDmd
trimToType (n :* sd) ts
= n :* go sd ts
where
- go (Prod ds) (TsProd tss)
- | equalLength ds tss = Prod (zipWith trimToType ds tss)
+ go (Prod b ds) (TsProd tss)
+ | equalLength ds tss = mkProd b (zipWith trimToType ds tss)
go (Call n sd) (TsFun ts) = mkCall n (go sd ts)
go sd@Poly{} _ = sd
go _ _ = topSubDmd
+-- | Drop all boxity
+trimBoxity :: Demand -> Demand
+trimBoxity AbsDmd = AbsDmd
+trimBoxity BotDmd = BotDmd
+trimBoxity (n :* sd) = n :* go sd
+ where
+ go (Poly _ n) = Poly Boxed n
+ go (Prod _ ds) = mkProd Boxed (map trimBoxity ds)
+ go (Call n sd) = mkCall n $ go sd
+
{-
************************************************************************
* *
@@ -1863,9 +2158,9 @@ seqDemand BotDmd = ()
seqDemand (_ :* sd) = seqSubDemand sd
seqSubDemand :: SubDemand -> ()
-seqSubDemand (Prod ds) = seqDemandList ds
+seqSubDemand (Prod _ ds) = seqDemandList ds
seqSubDemand (Call _ sd) = seqSubDemand sd
-seqSubDemand (Poly _) = ()
+seqSubDemand (Poly _ _) = ()
seqDemandList :: [Demand] -> ()
seqDemandList = foldr (seq . seqDemand) ()
@@ -1905,19 +2200,21 @@ in the user's guide.
For pretty-printing demands, we use quite a compact notation with some
abbreviations. Here's the BNF:
- card ::= B {}
- | A {0}
- | M {0,1}
- | L {0,1,n}
- | 1 {1}
- | S {1,n}
+ card ::= B {}
+ | A {0}
+ | M {0,1}
+ | L {0,1,n}
+ | 1 {1}
+ | S {1,n}
+
+ box ::= ! Unboxed
+ | <empty> Boxed
d ::= card sd The :* constructor, just juxtaposition
- | card abbreviation: Same as "card card",
- in code @polyDmd card@
+ | card abbreviation: Same as "card card"
- sd ::= card @Poly card@
- | P(d,d,..) @Prod [d1,d2,..]@
+ sd ::= box card @Poly box card@
+ | box P(d,d,..) @Prod box [d1,d2,..]@
| Ccard(sd) @Call card sd@
So, L can denote a 'Card', polymorphic 'SubDemand' or polymorphic 'Demand',
@@ -1957,22 +2254,26 @@ instance Outputable Card where
-- | See Note [Demand notation]
instance Outputable Demand where
- ppr AbsDmd = char 'A'
- ppr BotDmd = char 'B'
- ppr (C_0N :* Poly C_0N) = char 'L' -- Print LL as just L
- ppr (C_1N :* Poly C_1N) = char 'S' -- Dito SS
- ppr (n :* sd) = ppr n <> ppr sd
+ ppr AbsDmd = char 'A'
+ ppr BotDmd = char 'B'
+ ppr (C_0N :* Poly Boxed C_0N) = char 'L' -- Print LL as just L
+ ppr (C_1N :* Poly Boxed C_1N) = char 'S' -- Dito SS
+ ppr (n :* sd) = ppr n <> ppr sd
-- | See Note [Demand notation]
instance Outputable SubDemand where
- ppr (Poly sd) = ppr sd
+ ppr (Poly b sd) = pp_boxity b <> ppr sd
ppr (Call n sd) = char 'C' <> ppr n <> parens (ppr sd)
- ppr (Prod ds) = char 'P' <> parens (fields ds)
+ ppr (Prod b ds) = pp_boxity b <> char 'P' <> parens (fields ds)
where
fields [] = empty
fields [x] = ppr x
fields (x:xs) = ppr x <> char ',' <> fields xs
+pp_boxity :: Boxity -> SDoc
+pp_boxity Unboxed = char '!'
+pp_boxity _ = empty
+
instance Outputable Divergence where
ppr Diverges = char 'b' -- for (b)ottom
ppr ExnOrDiv = char 'x' -- for e(x)ception
@@ -2026,15 +2327,15 @@ instance Binary Demand where
_ -> (n :*) <$> get bh
instance Binary SubDemand where
- put_ bh (Poly sd) = putByte bh 0 *> put_ bh sd
+ put_ bh (Poly b sd) = putByte bh 0 *> put_ bh b *> put_ bh sd
put_ bh (Call n sd) = putByte bh 1 *> put_ bh n *> put_ bh sd
- put_ bh (Prod ds) = putByte bh 2 *> put_ bh ds
+ put_ bh (Prod b ds) = putByte bh 2 *> put_ bh b *> put_ bh ds
get bh = do
h <- getByte bh
case h of
- 0 -> Poly <$> get bh
+ 0 -> Poly <$> get bh <*> get bh
1 -> mkCall <$> get bh <*> get bh
- 2 -> Prod <$> get bh
+ 2 -> Prod <$> get bh <*> get bh
_ -> pprPanic "Binary:SubDemand" (ppr (fromIntegral h :: Int))
instance Binary DmdSig where
diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs
index e52c12ce71..9955a025ea 100644
--- a/compiler/GHC/Types/Id/Make.hs
+++ b/compiler/GHC/Types/Id/Make.hs
@@ -516,9 +516,12 @@ mkDictSelId name clas
strict_sig = mkClosedDmdSig [arg_dmd] topDiv
arg_dmd | new_tycon = evalDmd
- | otherwise = C_1N :*
- Prod [ if name == sel_name then evalDmd else absDmd
- | sel_name <- sel_names ]
+ | otherwise = C_1N :* mkProd Unboxed dict_field_dmds
+ where
+ -- The evalDmd below is just a placeholder and will be replaced in
+ -- GHC.Types.Demand.dmdTransformDictSel
+ dict_field_dmds = [ if name == sel_name then evalDmd else absDmd
+ | sel_name <- sel_names ]
mkDictSelRhs :: Class
-> Int -- 0-indexed selector among (superclasses ++ methods)
diff --git a/compiler/GHC/Utils/Misc.hs b/compiler/GHC/Utils/Misc.hs
index 05e8365745..fbf52d1bdb 100644
--- a/compiler/GHC/Utils/Misc.hs
+++ b/compiler/GHC/Utils/Misc.hs
@@ -79,7 +79,7 @@ module GHC.Utils.Misc (
transitiveClosure,
-- * Strictness
- seqList, strictMap, strictZipWith,
+ seqList, strictMap, strictZipWith, strictZipWith3,
-- * Module names
looksLikeModuleName,
@@ -991,8 +991,8 @@ seqList [] b = b
seqList (x:xs) b = x `seq` seqList xs b
strictMap :: (a -> b) -> [a] -> [b]
-strictMap _ [] = []
-strictMap f (x : xs) =
+strictMap _ [] = []
+strictMap f (x:xs) =
let
!x' = f x
!xs' = strictMap f xs
@@ -1000,15 +1000,26 @@ strictMap f (x : xs) =
x' : xs'
strictZipWith :: (a -> b -> c) -> [a] -> [b] -> [c]
-strictZipWith _ [] _ = []
-strictZipWith _ _ [] = []
-strictZipWith f (x : xs) (y: ys) =
+strictZipWith _ [] _ = []
+strictZipWith _ _ [] = []
+strictZipWith f (x:xs) (y:ys) =
let
!x' = f x y
!xs' = strictZipWith f xs ys
in
x' : xs'
+strictZipWith3 :: (a -> b -> c -> d) -> [a] -> [b] -> [c] -> [d]
+strictZipWith3 _ [] _ _ = []
+strictZipWith3 _ _ [] _ = []
+strictZipWith3 _ _ _ [] = []
+strictZipWith3 f (x:xs) (y:ys) (z:zs) =
+ let
+ !x' = f x y z
+ !xs' = strictZipWith3 f xs ys zs
+ in
+ x' : xs'
+
-- Module names: