diff options
author | Sebastian Graf <sebastian.graf@kit.edu> | 2021-04-28 14:55:26 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-10-24 01:26:46 -0400 |
commit | 3bab222c585343f8febe2a627d280b7be9401e92 (patch) | |
tree | bb95653710d6ac277a88f8011c4e491a73531a64 /compiler | |
parent | 8300ca2e3bcc3e74f7524116f85688da6167bb2f (diff) | |
download | haskell-3bab222c585343f8febe2a627d280b7be9401e92.tar.gz |
DmdAnal: Implement Boxity Analysis (#19871)
This patch fixes some abundant reboxing of `DynFlags` in
`GHC.HsToCore.Match.Literal.warnAboutOverflowedLit` (which was the topic
of #19407) by introducing a Boxity analysis to GHC, done as part of demand
analysis. This allows to accurately capture ad-hoc unboxing decisions previously
made in worker/wrapper in demand analysis now, where the boxity info can
propagate through demand signatures.
See the new `Note [Boxity analysis]`. The actual fix for #19407 is described in
`Note [No lazy, Unboxed demand in demand signature]`, but
`Note [Finalising boxity for demand signature]` is probably a better entry-point.
To support the fix for #19407, I had to change (what was)
`Note [Add demands for strict constructors]` a bit
(now `Note [Unboxing evaluated arguments]`). In particular, we now take care of
it in `finaliseBoxity` (which is only called from demand analaysis) instead of
`wantToUnboxArg`.
I also had to resurrect `Note [Product demands for function body]` and rename
it to `Note [Unboxed demand on function bodies returning small products]` to
avoid huge regressions in `join004` and `join007`, thereby fixing #4267 again.
See the updated Note for details.
A nice side-effect is that the worker/wrapper transformation no longer needs to
look at strictness info and other bits such as `InsideInlineableFun` flags
(needed for `Note [Do not unbox class dictionaries]`) at all. It simply collects
boxity info from argument demands and interprets them with a severely simplified
`wantToUnboxArg`. All the smartness is in `finaliseBoxity`, which could be moved
to DmdAnal completely, if it wasn't for the call to `dubiousDataConInstArgTys`
which would be awkward to export.
I spent some time figuring out the reason for why `T16197` failed prior to my
amendments to `Note [Unboxing evaluated arguments]`. After having it figured
out, I minimised it a bit and added `T16197b`, which simply compares computed
strictness signatures and thus should be far simpler to eyeball.
The 12% ghc/alloc regression in T11545 is because of the additional `Boxity`
field in `Poly` and `Prod` that results in more allocation during `lubSubDmd`
and `plusSubDmd`. I made sure in the ticky profiles that the number of calls
to those functions stayed the same. We can bear such an increase here, as we
recently improved it by -68% (in b760c1f).
T18698* regress slightly because there is more unboxing of dictionaries
happening and that causes Lint (mostly) to allocate more.
Fixes #19871, #19407, #4267, #16859, #18907 and #13331.
Metric Increase:
T11545
T18698a
T18698b
Metric Decrease:
T12425
T16577
T18223
T18282
T4267
T9961
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/GHC/Builtin/PrimOps.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Core/Make.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/CprAnal.hs | 36 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/DmdAnal.hs | 271 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Pipeline.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/SpecConstr.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap/Utils.hs | 385 | ||||
-rw-r--r-- | compiler/GHC/Driver/Session.hs | 6 | ||||
-rw-r--r-- | compiler/GHC/Types/Basic.hs | 6 | ||||
-rw-r--r-- | compiler/GHC/Types/Demand.hs | 755 | ||||
-rw-r--r-- | compiler/GHC/Types/Id/Make.hs | 9 | ||||
-rw-r--r-- | compiler/GHC/Utils/Misc.hs | 23 |
13 files changed, 977 insertions, 525 deletions
diff --git a/compiler/GHC/Builtin/PrimOps.hs b/compiler/GHC/Builtin/PrimOps.hs index 40f5e65605..164d1e2ea7 100644 --- a/compiler/GHC/Builtin/PrimOps.hs +++ b/compiler/GHC/Builtin/PrimOps.hs @@ -38,7 +38,7 @@ import GHC.Builtin.Names ( gHC_PRIMOPWRAPPERS ) import GHC.Core.TyCon ( TyCon, isPrimTyCon, PrimRep(..) ) import GHC.Core.Type import GHC.Types.RepType ( tyConPrimRep1 ) -import GHC.Types.Basic ( Arity, Boxity(..) ) +import GHC.Types.Basic ( Arity ) import GHC.Types.Fixity ( Fixity(..), FixityDirection(..) ) import GHC.Types.SrcLoc ( wiredInSrcSpan ) import GHC.Types.ForeignCall ( CLabelString ) diff --git a/compiler/GHC/Core/Make.hs b/compiler/GHC/Core/Make.hs index b43344a92c..3387523e20 100644 --- a/compiler/GHC/Core/Make.hs +++ b/compiler/GHC/Core/Make.hs @@ -65,7 +65,6 @@ import GHC.Types.Demand import GHC.Types.Name hiding ( varName ) import GHC.Types.Literal import GHC.Types.Unique.Supply -import GHC.Types.Basic import GHC.Core import GHC.Core.Utils ( exprType, needsCaseBinding, mkSingleAltCase, bindNonRec ) diff --git a/compiler/GHC/Core/Opt/CprAnal.hs b/compiler/GHC/Core/Opt/CprAnal.hs index d3f6a248ce..65468cd037 100644 --- a/compiler/GHC/Core/Opt/CprAnal.hs +++ b/compiler/GHC/Core/Opt/CprAnal.hs @@ -21,14 +21,13 @@ import GHC.Types.Id.Info import GHC.Types.Demand import GHC.Types.Cpr -import GHC.Core.DataCon import GHC.Core.FamInstEnv -import GHC.Core.Multiplicity -import GHC.Core.Opt.WorkWrap.Utils +import GHC.Core.DataCon import GHC.Core.Type import GHC.Core.Utils import GHC.Core import GHC.Core.Seq +import GHC.Core.Opt.WorkWrap.Utils import GHC.Data.Graph.UnVar -- for UnVarSet @@ -639,30 +638,23 @@ nonVirgin env = env { ae_virgin = False } -- See Note [CPR for binders that will be unboxed]. extendSigEnvForArg :: AnalEnv -> Id -> AnalEnv extendSigEnvForArg env id - = extendSigEnv env id (CprSig (argCprType env (idType id) (idDemandInfo id))) + = extendSigEnv env id (CprSig (argCprType (idDemandInfo id))) -- | Produces a 'CprType' according to how a strict argument will be unboxed. -- Examples: -- --- * A head-strict demand @1L@ on @Int@ would translate to @1@ --- * A product demand @1P(1L,L)@ on @(Int, Bool)@ would translate to @1(1,)@ --- * A product demand @1P(1L,L)@ on @(a , Bool)@ would translate to @1(,)@, --- because the unboxing strategy would not unbox the @a@. -argCprType :: AnalEnv -> Type -> Demand -> CprType -argCprType env arg_ty dmd = CprType 0 (go arg_ty dmd) +-- * A head-strict demand @1!L@ would translate to @1@ +-- * A product demand @1!P(1!L,L)@ would translate to @1(1,)@ +-- * A product demand @1!P(1L,L)@ would translate to @1(,)@, +-- because the first field will not be unboxed. +argCprType :: Demand -> CprType +argCprType dmd = CprType 0 (go dmd) where - go ty dmd - | Unbox (DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args }) ds - <- wantToUnboxArg (ae_fam_envs env) MaybeArgOfInlineableFun ty dmd - -- No existentials; see Note [Which types are unboxed?]) - -- Otherwise we'd need to call dataConRepInstPat here and thread a - -- UniqSupply. So argCprType is a bit less aggressive than it could - -- be, for the sake of coding convenience. - , null (dataConExTyCoVars dc) - , let arg_tys = map scaledThing (dataConInstArgTys dc tc_args) - = ConCpr (dataConTag dc) (zipWith go arg_tys ds) - | otherwise - = topCpr + go (n :* sd) + | isAbs n = topCpr + | Prod Unboxed ds <- sd = ConCpr fIRST_TAG (strictMap go ds) + | Poly Unboxed _ <- sd = ConCpr fIRST_TAG [] + | otherwise = topCpr {- Note [Safe abortion in the fixed-point iteration] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs index 5f209701a9..fa4bed48f0 100644 --- a/compiler/GHC/Core/Opt/DmdAnal.hs +++ b/compiler/GHC/Core/Opt/DmdAnal.hs @@ -32,7 +32,8 @@ import GHC.Core.Utils import GHC.Core.TyCon import GHC.Core.Type import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds ) -import GHC.Core.Coercion ( Coercion, coVarsOfCo ) +import GHC.Core.Coercion ( Coercion ) +import GHC.Core.TyCo.FVs ( coVarsOfCos ) import GHC.Core.FamInstEnv import GHC.Core.Opt.Arity ( typeArity ) import GHC.Utils.Misc @@ -55,8 +56,9 @@ _ = pprTrace -- Tired of commenting out the import all the time -} -- | Options for the demand analysis -newtype DmdAnalOpts = DmdAnalOpts - { dmd_strict_dicts :: Bool -- ^ Use strict dictionaries +data DmdAnalOpts = DmdAnalOpts + { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries + , dmd_unbox_width :: !Int -- ^ Use strict dictionaries } -- This is a strict alternative to (,) @@ -276,8 +278,10 @@ dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec where WithDmdType body_ty body' = anal_body env WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id - !id' = setBindIdDemandInfo top_lvl id id_dmd - (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd) rhs + -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils" + id_dmd' = finaliseBoxity (ae_fam_envs env) NotInsideInlineableFun (idType id) id_dmd + !id' = setBindIdDemandInfo top_lvl id id_dmd' + (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd') rhs -- See Note [Absence analysis for stable unfoldings and RULES] rule_fvs = bndrRuleAndUnfoldingIds id @@ -425,21 +429,24 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs]) | is_single_data_alt alt = let WithDmdType rhs_ty rhs' = dmdAnal env dmd rhs - WithDmdType alt_ty1 dmds = findBndrsDmds env rhs_ty bndrs + WithDmdType alt_ty1 fld_dmds = findBndrsDmds env rhs_ty bndrs WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env alt_ty1 case_bndr + !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd -- Evaluation cardinality on the case binder is irrelevant and a no-op. -- What matters is its nested sub-demand! + -- NB: If case_bndr_dmd is absDmd, boxity will say Unboxed, which is + -- what we want, because then `seq` will put a `seqDmd` on its scrut. (_ :* case_bndr_sd) = case_bndr_dmd -- Compute demand on the scrutinee -- FORCE the result, otherwise thunks will end up retaining the -- whole DmdEnv !(!bndrs', !scrut_sd) | DataAlt _ <- alt - , id_dmds <- addCaseBndrDmd case_bndr_sd dmds - -- See Note [Demand on scrutinee of a product case] - = let !new_info = setBndrsDemandInfo bndrs id_dmds - !new_prod = mkProd id_dmds - in (new_info, new_prod) + -- See Note [Demand on the scrutinee of a product case] + -- See Note [Demand on case-alternative binders] + , (!scrut_sd, fld_dmds') <- addCaseBndrDmd case_bndr_sd fld_dmds + , let !bndrs' = setBndrsDemandInfo bndrs fld_dmds' + = (bndrs', scrut_sd) | otherwise -- __DEFAULT and literal alts. Simply add demands and discard the -- evaluation cardinality, as we evaluate the scrutinee exactly once. @@ -454,7 +461,6 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs]) WithDmdType scrut_ty scrut' = dmdAnal env scrut_sd scrut res_ty = alt_ty3 `plusDmdType` toPlusDmdArg scrut_ty - !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd in -- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut -- , text "dmd" <+> ppr dmd @@ -482,8 +488,9 @@ dmdAnal' env dmd (Case scrut case_bndr ty alts) WithDmdType rest_ty as' = combineAltDmds as in WithDmdType (lubDmdType cur_ty rest_ty) (a':as') - WithDmdType scrut_ty scrut' = dmdAnal env topSubDmd scrut - WithDmdType alt_ty1 case_bndr' = annotateBndr env alt_ty case_bndr + WithDmdType alt_ty1 case_bndr_dmd = findBndrDmd env alt_ty case_bndr + !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd + WithDmdType scrut_ty scrut' = dmdAnal env topSubDmd scrut -- NB: Base case is botDmdType, for empty case alternatives -- This is a unit for lubDmdType, and the right result -- when there really are no alternatives @@ -549,12 +556,30 @@ dmdAnalSumAlt env dmd case_bndr (Alt con bndrs rhs) | WithDmdType rhs_ty rhs' <- dmdAnal env dmd rhs , WithDmdType alt_ty dmds <- findBndrsDmds env rhs_ty bndrs , let (_ :* case_bndr_sd) = findIdDemand alt_ty case_bndr - -- See Note [Demand on scrutinee of a product case] - id_dmds = addCaseBndrDmd case_bndr_sd dmds + -- See Note [Demand on case-alternative binders] + -- we can't use the scrut_sd, because it says 'Prod' and we'll use + -- topSubDmd anyway for scrutinees of sum types. + (!_scrut_sd, dmds') = addCaseBndrDmd case_bndr_sd dmds -- Do not put a thunk into the Alt - !new_ids = setBndrsDemandInfo bndrs id_dmds + !new_ids = setBndrsDemandInfo bndrs dmds' = WithDmdType alt_ty (Alt con new_ids rhs') +-- Precondition: The SubDemand is not a Call +-- See Note [Demand on the scrutinee of a product case] +-- and Note [Demand on case-alternative binders] +addCaseBndrDmd :: SubDemand -- On the case binder + -> [Demand] -- On the fields of the constructor + -> (SubDemand, [Demand]) + -- SubDemand on the case binder incl. field demands + -- and final demands for the components of the constructor +addCaseBndrDmd case_sd fld_dmds + | Just (_, ds) <- viewProd (length fld_dmds) scrut_sd + = (scrut_sd, ds) + | otherwise + = pprPanic "was a call demand" (ppr case_sd $$ ppr fld_dmds) -- See the Precondition + where + scrut_sd = case_sd `plusSubDmd` mkProd Unboxed fld_dmds + {- Note [Analysing with absent demand] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -674,6 +699,51 @@ worker, so the worker will rebuild x = (a, absent-error) and that'll crash. +Note [Demand on case-alternative binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The demand on a binder in a case alternative comes + (a) From the demand on the binder itself + (b) From the demand on the case binder +Forgetting (b) led directly to #10148. + +Example. Source code: + f x@(p,_) = if p then foo x else True + + foo (p,True) = True + foo (p,q) = foo (q,p) + +After strictness analysis, forgetting (b): + f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) -> + case x_an1 + of wild_X7 [Dmd=MP(ML,ML)] + { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) -> + case p_an2 of _ { + False -> GHC.Types.True; + True -> foo wild_X7 } + +Note that ds_dnz is syntactically dead, but the expression bound to it is +reachable through the case binder wild_X7. Now watch what happens if we inline +foo's wrapper: + f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) -> + case x_an1 + of _ [Dmd=MP(ML,ML)] + { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) -> + case p_an2 of _ { + False -> GHC.Types.True; + True -> $wfoo_soq GHC.Types.True ds_dnz } + +Look at that! ds_dnz has come back to life in the call to $wfoo_soq! A second +run of demand analysis would no longer infer ds_dnz to be absent. +But unlike occurrence analysis, which infers properties of the *syntactic* +shape of the program, the results of demand analysis describe expressions +*semantically* and are supposed to be mostly stable across Simplification. +That's why we should better account for (b). +In #10148, we ended up emitting a single-entry thunk instead of an updateable +thunk for a let binder that was an an absent case-alt binder during DmdAnal. + +This is needed even for non-product types, in case the case-binder +is used but the components of the case alternative are not. + Note [Aggregated demand for cardinality] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FIXME: This Note should be named [LetUp vs. LetDown] and probably predates @@ -725,43 +795,42 @@ strict in |y|. ************************************************************************ -} -dmdTransform :: AnalEnv -- ^ The strictness environment - -> Id -- ^ The function - -> SubDemand -- ^ The demand on the function - -> DmdType -- ^ The demand type of the function in this context - -- Returned DmdEnv includes the demand on - -- this function plus demand on its free variables - +dmdTransform :: AnalEnv -- ^ The analysis environment + -> Id -- ^ The variable + -> SubDemand -- ^ The evaluation context of the var + -> DmdType -- ^ The demand type unleashed by the variable in this + -- context. The returned DmdEnv includes the demand on + -- this function plus demand on its free variables -- See Note [What are demand signatures?] in "GHC.Types.Demand" -dmdTransform env var dmd +dmdTransform env var sd -- Data constructors | isDataConWorkId var - = dmdTransformDataConSig (idArity var) dmd + = dmdTransformDataConSig (idArity var) sd -- Dictionary component selectors -- Used to be controlled by a flag. -- See #18429 for some perf measurements. | Just _ <- isClassOpId_maybe var - = -- pprTrace "dmdTransform:DictSel" (ppr var $$ ppr dmd) $ - dmdTransformDictSelSig (idDmdSig var) dmd + = -- pprTrace "dmdTransform:DictSel" (ppr var $$ ppr (idDmdSig var) $$ ppr sd) $ + dmdTransformDictSelSig (idDmdSig var) sd -- Imported functions | isGlobalId var - , let res = dmdTransformSig (idDmdSig var) dmd - = -- pprTrace "dmdTransform:import" (vcat [ppr var, ppr (idDmdSig var), ppr dmd, ppr res]) + , let res = dmdTransformSig (idDmdSig var) sd + = -- pprTrace "dmdTransform:import" (vcat [ppr var, ppr (idDmdSig var), ppr sd, ppr res]) res -- Top-level or local let-bound thing for which we use LetDown ('useLetUp'). -- In that case, we have a strictness signature to unleash in our AnalEnv. | Just (sig, top_lvl) <- lookupSigEnv env var - , let fn_ty = dmdTransformSig sig dmd - = -- pprTrace "dmdTransform:LetDown" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $ + , let fn_ty = dmdTransformSig sig sd + = -- pprTrace "dmdTransform:LetDown" (vcat [ppr var, ppr sig, ppr sd, ppr fn_ty]) $ case top_lvl of - NotTopLevel -> addVarDmd fn_ty var (C_11 :* dmd) + NotTopLevel -> addVarDmd fn_ty var (C_11 :* sd) TopLevel | isInterestingTopLevelFn var -- Top-level things will be used multiple times or not at -- all anyway, hence the multDmd below: It means we don't -- have to track whether @var@ is used strictly or at most -- once, because ultimately it never will. - -> addVarDmd fn_ty var (C_0N `multDmd` (C_11 :* dmd)) -- discard strictness + -> addVarDmd fn_ty var (C_0N `multDmd` (C_11 :* sd)) -- discard strictness | otherwise -> fn_ty -- don't bother tracking; just annotate with 'topDmd' later -- Everything else: @@ -769,8 +838,8 @@ dmdTransform env var dmd -- * Lambda binders -- * Case and constructor field binders | otherwise - = -- pprTrace "dmdTransform:other" (vcat [ppr var, ppr sig, ppr dmd, ppr res]) $ - unitDmdType (unitVarEnv var (C_11 :* dmd)) + = -- pprTrace "dmdTransform:other" (vcat [ppr var, ppr boxity, ppr sd]) $ + unitDmdType (unitVarEnv var (C_11 :* sd)) {- ********************************************************************* * * @@ -802,15 +871,21 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs where rhs_arity = idArity id -- See Note [Demand signatures are computed for a threshold demand based on idArity] - rhs_dmd -- See Note [Demand analysis for join points] - -- See Note [Invariants on join points] invariant 2b, in GHC.Core - -- rhs_arity matches the join arity of the join point - | isJoinId id - = mkCalledOnceDmds rhs_arity let_dmd - | otherwise - = mkCalledOnceDmds rhs_arity topSubDmd - - WithDmdType rhs_dmd_ty rhs' = dmdAnal env rhs_dmd rhs + + rhs_dmd = mkCalledOnceDmds rhs_arity body_dmd + + body_dmd + | isJoinId id + -- See Note [Demand analysis for join points] + -- See Note [Invariants on join points] invariant 2b, in GHC.Core + -- rhs_arity matches the join arity of the join point + = let_dmd + | otherwise + -- See Note [Unboxed demand on function bodies returning small products] + = unboxedWhenSmall (ae_opts env) (unboxableResultWidth env id) topSubDmd + + -- See Note [Do not unbox class dictionaries] + WithDmdType rhs_dmd_ty rhs' = dmdAnal (adjustInlFun id env) rhs_dmd rhs DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty sig = mkDmdSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div) @@ -829,6 +904,7 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs -- might turn into used-many even if the signature was stable and -- we'd have to do an additional iteration. reuseEnv makes sure that -- we never get used-once info for FVs of recursive functions. + -- See #14816 where we try to get rid of reuseEnv. rhs_fv1 = case rec_flag of Recursive -> reuseEnv rhs_fv NonRecursive -> rhs_fv @@ -839,6 +915,26 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs -- See Note [Lazy and unleashable free variables] !(!lazy_fv, !sig_fv) = partitionVarEnv isWeakDmd rhs_fv2 +unboxableResultWidth :: AnalEnv -> Id -> Maybe Arity +unboxableResultWidth env id + | (pis,ret_ty) <- splitPiTys (idType id) + , count (not . isNamedBinder) pis == idArity id + , Just (tc, _tc_args, _co) <- normSplitTyConApp_maybe (ae_fam_envs env) ret_ty + , Just dc <- tyConSingleAlgDataCon_maybe tc + , null (dataConExTyCoVars dc) -- Can't unbox results with existentials + = Just (dataConRepArity dc) + | otherwise + = Nothing + +unboxedWhenSmall :: DmdAnalOpts -> Maybe Arity -> SubDemand -> SubDemand +-- See Note [Unboxed demand on function bodies returning small products] +unboxedWhenSmall opts mb_n sd + | Just n <- mb_n + , n <= dmd_unbox_width opts + = unboxSubDemand sd + | otherwise + = sd + -- | If given the (local, non-recursive) let-bound 'Id', 'useLetUp' determines -- whether we should process the binding up (body before rhs) or down (rhs -- before body). @@ -1056,34 +1152,6 @@ Now f's optimised RHS will be \x.a, but if we change g to (error "..") (since it is apparently Absent) and then inline (\x. fst g) we get disaster. But regardless, #18638 was a more complicated version of this, that actually happened in practice. - -Historical Note [Product demands for function body] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In 2013 I spotted this example, in shootout/binary_trees: - - Main.check' = \ b z ds. case z of z' { I# ip -> - case ds_d13s of - Main.Nil -> z' - Main.Node s14k s14l s14m -> - Main.check' (not b) - (Main.check' b - (case b { - False -> I# (-# s14h s14k); - True -> I# (+# s14h s14k) - }) - s14l) - s14m } } } - -Here we *really* want to unbox z, even though it appears to be used boxed in -the Nil case. Partly the Nil case is not a hot path. But more specifically, -the whole function gets the CPR property if we do. - -That motivated using a demand of C1(C1(C1(P(L,L)))) for the RHS, where -(solely because the result was a product) we used a product demand -(albeit with lazy components) for the body. But that gives very silly -behaviour -- see #17932. Happily it turns out now to be entirely -unnecessary: we get good results with C1(C1(C1(L))). So I simply -deleted the special case. -} {- ********************************************************************* @@ -1159,7 +1227,6 @@ dmdFix top_lvl env let_dmd orig_pairs {- Note [Safe abortion in the fixed-point iteration] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Fixed-point iteration may fail to terminate. But we cannot simply give up and return the environment and code unchanged! We still need to do one additional round, for two reasons: @@ -1231,8 +1298,11 @@ unitDmdType :: DmdEnv -> DmdType unitDmdType dmd_env = DmdType dmd_env [] topDiv coercionDmdEnv :: Coercion -> DmdEnv -coercionDmdEnv co = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCo co) - -- The VarSet from coVarsOfCo is really a VarEnv Var +coercionDmdEnv co = coercionsDmdEnv [co] + +coercionsDmdEnv :: [Coercion] -> DmdEnv +coercionsDmdEnv cos = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCos cos) + -- The VarSet from coVarsOfCos is really a VarEnv Var addVarDmd :: DmdType -> Var -> Demand -> DmdType addVarDmd (DmdType fv ds res) var dmd @@ -1283,18 +1353,6 @@ setBndrsDemandInfo (b:bs) (d:ds) = setBndrsDemandInfo [] ds = assert (null ds) [] setBndrsDemandInfo bs _ = pprPanic "setBndrsDemandInfo" (ppr bs) -annotateBndr :: AnalEnv -> DmdType -> Var -> WithDmdType Var --- The returned env has the var deleted --- The returned var is annotated with demand info --- according to the result demand of the provided demand type --- No effect on the argument demands -annotateBndr env dmd_ty var - | isId var = WithDmdType dmd_ty' new_id - | otherwise = WithDmdType dmd_ty var - where - new_id = setIdDemandInfo var dmd - WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty var - annotateLamIdBndr :: AnalEnv -> DmdType -- Demand type of body -> Id -- Lambda binder @@ -1308,8 +1366,11 @@ annotateLamIdBndr env dmd_ty id -- pprTrace "annLamBndr" (vcat [ppr id, ppr dmd_ty, ppr final_ty]) $ WithDmdType main_ty new_id where - new_id = setIdDemandInfo id dmd - main_ty = addDemand dmd dmd_ty' + -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils" + -- and Note [Do not unbox class dictionaries] + dmd' = finaliseBoxity (ae_fam_envs env) (ae_inl_fun env) (idType id) dmd + new_id = setIdDemandInfo id dmd' + main_ty = addDemand dmd' dmd_ty' WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id {- Note [NOINLINE and strictness] @@ -1389,11 +1450,14 @@ demand put on them (topDmd), and add that to the "lazy_fv" returned by "dmdFix". data AnalEnv = AE - { ae_strict_dicts :: !Bool -- ^ Enable strict dict - , ae_sigs :: !SigEnv - , ae_virgin :: !Bool -- ^ True on first iteration only - -- See Note [Initialising strictness] - , ae_fam_envs :: !FamInstEnvs + { ae_opts :: !DmdAnalOpts -- ^ Analysis options + , ae_sigs :: !SigEnv + , ae_virgin :: !Bool -- ^ True on first iteration only + -- See Note [Initialising strictness] + , ae_fam_envs :: !FamInstEnvs + , ae_inl_fun :: !InsideInlineableFun + -- ^ Whether we analyse the body of an inlineable fun. + -- See Note [Do not unbox class dictionaries]. } -- We use the se_env to tell us whether to @@ -1408,16 +1472,16 @@ type SigEnv = VarEnv (DmdSig, TopLevelFlag) instance Outputable AnalEnv where ppr env = text "AE" <+> braces (vcat [ text "ae_virgin =" <+> ppr (ae_virgin env) - , text "ae_strict_dicts =" <+> ppr (ae_strict_dicts env) , text "ae_sigs =" <+> ppr (ae_sigs env) ]) emptyAnalEnv :: DmdAnalOpts -> FamInstEnvs -> AnalEnv emptyAnalEnv opts fam_envs - = AE { ae_strict_dicts = dmd_strict_dicts opts + = AE { ae_opts = opts , ae_sigs = emptySigEnv , ae_virgin = True , ae_fam_envs = fam_envs + , ae_inl_fun = NotInsideInlineableFun } emptySigEnv :: SigEnv @@ -1445,6 +1509,13 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id nonVirgin :: AnalEnv -> AnalEnv nonVirgin env = env { ae_virgin = False } +-- | Sets 'ae_inl_fun' according to whether the given 'Id' has an inlineable +-- unfolding. See Note [Do not unbox class dictionaries]. +adjustInlFun :: Id -> AnalEnv -> AnalEnv +adjustInlFun id env + | isStableUnfolding (realIdUnfolding id) = env { ae_inl_fun = InsideInlineableFun } + | otherwise = env { ae_inl_fun = NotInsideInlineableFun } + findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> WithDmdType [Demand] -- Return the demands on the Ids in the [Var] findBndrsDmds env dmd_ty bndrs @@ -1472,9 +1543,9 @@ findBndrDmd env dmd_ty id strictify dmd -- See Note [Making dictionaries strict] - | ae_strict_dicts env + | dmd_strict_dicts (ae_opts env) -- We never want to strictify a recursive let. At the moment - -- annotateBndr is only call for non-recursive lets; if that + -- findBndrDmd is never called for recursive lets; if that -- changes, we need a RecFlag parameter and another guard here. = strictifyDictDmd id_ty dmd | otherwise @@ -1522,7 +1593,7 @@ to inline one applied to a function. Sometimes this makes just enough of a difference to stop a function from inlining. This is documented in #18421. -It's somewhat similar to Note [Do not unpack class dictionaries] although +It's somewhat similar to Note [Do not unbox class dictionaries] although here our problem is with the inliner, not the specializer. Note [Initialising strictness] diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index 18ac910d15..ee79e28b60 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -1067,6 +1067,7 @@ dmdAnal :: Logger -> DynFlags -> FamInstEnvs -> [CoreRule] -> CoreProgram -> IO dmdAnal logger dflags fam_envs rules binds = do let !opts = DmdAnalOpts { dmd_strict_dicts = gopt Opt_DictsStrict dflags + , dmd_unbox_width = dmdUnboxWidth dflags } binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $ diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs index 718c840c96..966e86a344 100644 --- a/compiler/GHC/Core/Opt/SpecConstr.hs +++ b/compiler/GHC/Core/Opt/SpecConstr.hs @@ -1806,7 +1806,7 @@ calcSpecInfo fn (CP { cp_qvars = qvars, cp_args = pats }) extra_bndrs go_one env d (Var v) = extendVarEnv_C plusDmd env v d go_one env (_n :* cd) e -- NB: _n does not have to be strict | (Var _, args) <- collectArgs e - , Just ds <- viewProd (length args) cd + , Just (_b, ds) <- viewProd (length args) cd -- TODO: We may want to look at boxity _b, though... = go env ds args go_one env _ _ = env diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs index 976dcd5fe5..9becea0c18 100644 --- a/compiler/GHC/Core/Opt/WorkWrap.hs +++ b/compiler/GHC/Core/Opt/WorkWrap.hs @@ -734,8 +734,6 @@ splitFun ww_opts fn_id rhs uf_opts = so_uf_opts (wo_simple_opts ww_opts) fn_info = idInfo fn_id (arg_vars, body) = collectBinders rhs - -- collectBinders was not enough for GHC.Event.IntTable.insertWith - -- last time I checked, where manifest lambdas were wrapped in casts (wrap_dmds, div) = splitDmdSig (dmdSigInfo fn_info) @@ -978,8 +976,7 @@ splitThunk :: WwOpts -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] splitThunk ww_opts is_rec x rhs = assert (not (isJoinId x)) $ do { let x' = localiseId x -- See comment above - ; (useful,_, wrap_fn, fn_arg) - <- mkWWstr_one ww_opts NotArgOfInlineableFun x' + ; (useful,_, wrap_fn, fn_arg) <- mkWWstr_one ww_opts x' ; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ] ; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive return res diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs index 38c103d866..c2287916db 100644 --- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs +++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs @@ -10,8 +10,9 @@ A library for the ``worker\/wrapper'' back-end to the strictness analyser module GHC.Core.Opt.WorkWrap.Utils ( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs , DataConPatContext(..) - , UnboxingDecision(..), ArgOfInlineableFun(..), wantToUnboxArg - , findTypeShape, mkAbsentFiller, IsRecDataConResult(..), isRecDataCon + , UnboxingDecision(..), InsideInlineableFun(..), wantToUnboxArg + , findTypeShape, IsRecDataConResult(..), isRecDataCon, finaliseBoxity + , mkAbsentFiller , isWorkerSmallEnough ) where @@ -229,7 +230,7 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr res_ty' = GHC.Core.Subst.substTy subst res_ty ; (useful1, work_args, wrap_fn_str, fn_args) - <- mkWWstr opts inlineable_flag cloned_arg_vars + <- mkWWstr opts cloned_arg_vars -- Do CPR w/w. See Note [Always do CPR w/w] ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) @@ -265,9 +266,6 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr = info `setOccInfo` noOccInfo mb_join_arity = isJoinId_maybe fun_id - inlineable_flag -- See Note [Do not unpack class dictionaries] - | isStableUnfolding (realIdUnfolding fun_id) = MaybeArgOfInlineableFun - | otherwise = NotArgOfInlineableFun -- Note [Do not split void functions] only_one_void_argument @@ -562,59 +560,30 @@ data UnboxingDecision s -- The @[s]@ carries the bits of information with which we can continue -- unboxing, e.g. @s@ will be 'Demand' or 'Cpr'. --- | A specialised Bool for an argument to 'wantToUnboxArg'. --- See Note [Do not unpack class dictionaries]. -data ArgOfInlineableFun - = NotArgOfInlineableFun -- ^ Definitely not in an inlineable fun. - | MaybeArgOfInlineableFun -- ^ We might be in an inlineable fun, so we won't - -- unbox dictionary args. - deriving Eq - --- | Unboxing strategy for strict arguments. -wantToUnboxArg :: FamInstEnvs -> ArgOfInlineableFun -> Type -> Demand -> UnboxingDecision Demand +-- | Unwraps the 'Boxity' decision encoded in the given 'SubDemand' and returns +-- a 'DataConPatContext' as well the nested demands on fields of the 'DataCon' +-- to unbox. +wantToUnboxArg + :: FamInstEnvs + -> Type -- ^ Type of the argument + -> Demand -- ^ How the arg was used + -> UnboxingDecision Demand -- See Note [Which types are unboxed?] -wantToUnboxArg fam_envs inlineable_flag ty dmd - | isAbsDmd dmd +wantToUnboxArg fam_envs ty (n :* sd) + | isAbs n = DropAbsent - | isStrUsedDmd dmd - , Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty + | Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty , Just dc <- tyConSingleAlgDataCon_maybe tc , let arity = dataConRepArity dc - -- See Note [Unpacking arguments with product and polymorphic demands] - , Just cs <- split_prod_dmd_arity dmd arity - -- See Note [Do not unpack class dictionaries] - , inlineable_flag == NotArgOfInlineableFun || not (isClassPred ty) - -- See Note [mkWWstr and unsafeCoerce] - , cs `lengthIs` arity - -- See Note [Add demands for strict constructors] - , let cs' = addDataConStrictness dc cs - = Unbox (DataConPatContext dc tc_args co) cs' + , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity Analysis] + -- NB: No strictness or evaluatedness checks here. That is done by + -- 'finaliseBoxity'! + = Unbox (DataConPatContext dc tc_args co) ds | otherwise = StopUnboxing - where - split_prod_dmd_arity dmd arity - -- For seqDmd, it should behave like <S(AAAA)>, for some - -- suitable arity - | isSeqDmd dmd = Just (replicate arity absDmd) - | _ :* Prod ds <- dmd = Just ds - | otherwise = Nothing - -addDataConStrictness :: DataCon -> [Demand] -> [Demand] --- See Note [Add demands for strict constructors] -addDataConStrictness con ds - | Nothing <- dataConWrapId_maybe con - -- DataCon worker=wrapper. Implies no strict fields, so nothing to do - = ds -addDataConStrictness con ds - = zipWithEqual "addDataConStrictness" add ds strs - where - strs = dataConRepStrictness con - add dmd str | isMarkedStrict str = strictifyDmd dmd - | otherwise = dmd - -- | Unboxing strategy for constructed results. wantToUnboxResult :: FamInstEnvs -> Type -> Cpr -> UnboxingDecision Cpr @@ -688,35 +657,8 @@ Note that the data constructor /can/ have evidence arguments: equality constraints, type classes etc. So it can be GADT. These evidence arguments are simply value arguments, and should not get in the way. -Note [Unpacking arguments with product and polymorphic demands] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The argument is unpacked in a case if it has a product type and has a -strict *and* used demand put on it. I.e., arguments, with demands such -as the following ones: - - <S,U(U, L)> - <S(L,S),U> - -will be unpacked, but - - <S,U> or <B,U> - -will not, because the pieces aren't used. This is quite important otherwise -we end up unpacking massive tuples passed to the bottoming function. Example: - - f :: ((Int,Int) -> String) -> (Int,Int) -> a - f g pr = error (g pr) - - main = print (f fst (1, error "no")) - -Does 'main' print "error 1" or "error no"? We don't really want 'f' -to unbox its second argument. This actually happened in GHC's onwn -source code, in Packages.applyPackageFlag, which ended up un-boxing -the enormous DynFlags tuple, and being strict in the -as-yet-un-filled-in unitState files. - -Note [Do not unpack class dictionaries] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note [Do not unbox class dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If we have f :: Ord a => [a] -> Int -> a {-# INLINABLE f #-} @@ -729,12 +671,19 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get fw :: (a->a->Bool) -> [a] -> Int# -> a and the type-class specialiser can't specialise that. An example is #6056. -But in any other situation a dictionary is just an ordinary value, -and can be unpacked. So we track the INLINABLE pragma, and switch -off the unpacking in mkWWstr_one (see the isClassPred test). +But in any other situation, a dictionary is just an ordinary value, +and can be unpacked. So we track the INLINABLE pragma, and discard the boxity +flag in finaliseBoxity (see the isClassPred test). Historical note: #14955 describes how I got this fix wrong the first time. +Note that the simplicity of this fix implies that INLINE functions (such as +wrapper functions after the WW run) will never say that they unbox class +dictionaries. That's not ideal, but not worth losing sleep over, as INLINE +functions will have been inlined by the time we run demand analysis so we'll +see the unboxing around the worker in client modules. I got aware of the issue +in T5075 by the change in boxity of loop between demand analysis runs. + Note [mkWWstr and unsafeCoerce] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By using unsafeCoerce, it is possible to make the number of demands fail to @@ -742,14 +691,14 @@ match the number of constructor arguments; this happened in #8037. If so, the worker/wrapper split doesn't work right and we get a Core Lint bug. The fix here is simply to decline to do w/w if that happens. -Note [Add demands for strict constructors] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note [Unboxing evaluated arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider this program (due to Roman): data X a = X !a foo :: X Int -> Int -> Int - foo (X a) n = go 0 + foo x@(X a) n = go 0 where go i | i < n = a + go (i+1) | otherwise = 0 @@ -758,12 +707,12 @@ We want the worker for 'foo' too look like this: $wfoo :: Int# -> Int# -> Int# -with the first argument unboxed, so that it is not eval'd each time -around the 'go' loop (which would otherwise happen, since 'foo' is not -strict in 'a'). It is sound for the wrapper to pass an unboxed arg -because X is strict, so its argument must be evaluated. And if we -*don't* pass an unboxed argument, we can't even repair it by adding a -`seq` thus: +with the first argument unboxed, so that it is not eval'd each time around the +'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It +is sound for the wrapper to pass an unboxed arg because X is strict +(see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument +must be evaluated. And if we *don't* pass an unboxed argument, we can't even +repair it by adding a `seq` thus: foo (X a) n = a `seq` go 0 @@ -771,34 +720,38 @@ because the seq is discarded (very early) since X is strict! So here's what we do -* We leave the demand-analysis alone. The demand on 'a' in the - definition of 'foo' is <L, U(U)>; the strictness info is Lazy - because foo's body may or may not evaluate 'a'; but the usage info - says that 'a' is unpacked and its content is used. +* Since this has nothing to do with how 'foo' uses 'a', we leave demand analysis + alone, but account for the additional evaluatedness when annotating the binder + in 'annotateLamIdBndr' via 'finaliseBoxity', which will retain the Unboxed boxity + on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning it's used + lazily but unboxed nonetheless. This seems to contradict + Note [No lazy, Unboxed demands in demand signature], but we know that 'a' is + evaluated and thus can be unboxed. -* During worker/wrapper, if we unpack a strict constructor (as we do - for 'foo'), we use 'addDataConStrictness' to bump up the strictness on - the strict arguments of the data constructor. +* When 'finaliseBoxity' decides to unbox a record, it will zip the field demands + together with the respective 'StrictnessMark'. In case of 'x', it will pair + up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for + the strict field. -* That in turn means that, if the usage info supports doing so - (i.e. splitProdDmd_maybe returns Just), we will unpack that argument - -- even though the original demand (e.g. on 'a') was lazy. +* Said 'StrictnessMark' is passed to the recursive invocation of + 'finaliseBoxity' when deciding whether to unbox 'a'. 'a' was used lazily, but + since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'. -* What does "bump up the strictness" mean? Just add a head-strict - demand to the strictness! Even for a demand like <L,A> we can - safely turn it into <S,A>; remember case (1) of - Note [Worker/wrapper for Strictness and Absence]. +* Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will + /not/ look at the strictness bits of the demand, only at Boxity flags. As such, + it will happily unbox 'a' despite the lazy demand on it. -The net effect is that the w/w transformation is more aggressive about -unpacking the strict arguments of a data constructor, when that -eagerness is supported by the usage info. +The net effect is that boxity analysis and the w/w transformation are more +aggressive about unboxing the strict arguments of a data constructor than when +looking at strictness info exclusively. It is very much like (Nested) CPR, which +needs its nested fields to be evaluated in order for it to unbox nestedly. There is the usual danger of reboxing, which as usual we ignore. But if X is monomorphic, and has an UNPACK pragma, then this optimisation is even more important. We don't want the wrapper to rebox an unboxed argument, and pass an Int to $wfoo! -This works in nested situations like +This works in nested situations like T10482 data family Bar a data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) @@ -863,6 +816,68 @@ applying the strictness demands to the final result of DmdAnal. The result is that we get the strict demand signature we wanted even if we can't float the case on `x` up through the case on `burble`. +Note [No nested Unboxed inside Boxed in demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider +``` +f p@(x,y) + | even (x+y) = [] + | otherwise = [p] +``` +Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)` +on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper +can't unbox the pair components without unboxing the pair! So we better say +`1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info. +That happens in 'finaliseBoxity'. + +Note [No lazy, Unboxed demands in demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider T19407: + + data Huge = Huge Bool () ... () -- think: DynFlags + data T = T { h :: Huge, n :: Int } + f t@(T h _) = g h t + g (H b _ ... _) t = if b then 1 else n t + +The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better +not put that demand in `g`'s demand signature, because worker/wrapper will not +in general unbox a lazy-and-unboxed demand like `L!P(..)`. +(The exception are known-to-be-evaluated arguments like strict fields, +see Note [Unboxing evaluated arguments].) + +The program above is an example where spreading misinformed boxity through the +signature is particularly egregious. If we give `g` that signature, then `f` +puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and +we get + + f (T (H b _ ... _) n) = $wf b n + $wf b n = $wg b (T (H b x ... x) n) + $wg = ... + +Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in +'finaliseBoxity', modulo Note [Unboxing evaluated arguments]. + +Note [Finalising boxity for demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The worker/wrapper pass must strictly adhere to the boxity decisions encoded +in the demand signature, because that is the information that demand analysis +propagates throughout the program. Failing to implement the strategy laid out +in the signature can result in reboxing in unexpected places. Hence, we must +completely anticipate unboxing decisions during demand analysis and reflect +these decicions in demand annotations. That is the job of 'finaliseBoxity', +which is defined here and called from demand analysis. + +Here is a list of different Notes it has to take care of: + + * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in + general, but still allow Note [Unboxing evaluated arguments] + * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)` + * Implement fixes for corner cases Note [Do not unbox class dictionaries] + and Note [mkWWstr and unsafeCoerce] + +Then, in worker/wrapper blindly trusts the boxity info in the demand signature +and will not look at strictness info *at all*, in 'wantToUnboxArg'. + Note [non-algebraic or open body type warning] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are a few cases where the W/W transformation is told that something @@ -894,7 +909,6 @@ way to express existential types in the worker's type signature. -} mkWWstr :: WwOpts - -> ArgOfInlineableFun -- See Note [Do not unpack class dictionaries] -> [Var] -- Wrapper args; have their demand info on them -- *Includes type variables* -> UniqSM (Bool, -- Is this useful @@ -905,10 +919,10 @@ mkWWstr :: WwOpts [CoreExpr]) -- Reboxed args for the call to the -- original RHS. Corresponds one-to-one -- with the wrapper arg vars -mkWWstr opts inlineable_flag args +mkWWstr opts args = go args where - go_one arg = mkWWstr_one opts inlineable_flag arg + go_one arg = mkWWstr_one opts arg go [] = return (False, [], nop_fn, []) go (arg : args) = do { (useful1, args1, wrap_fn1, wrap_arg) <- go_one arg @@ -925,12 +939,9 @@ mkWWstr opts inlineable_flag args -- * wrap_arg assumes work_args are in scope, and builds a ConApp that -- reconstructs the RHS of wrap_var that we pass to the original RHS -- See Note [Worker/wrapper for Strictness and Absence] -mkWWstr_one :: WwOpts - -> ArgOfInlineableFun -- See Note [Do not unpack class dictionaries] - -> Var - -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr) -mkWWstr_one opts inlineable_flag arg = - case wantToUnboxArg fam_envs inlineable_flag arg_ty arg_dmd of +mkWWstr_one :: WwOpts -> Var -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr) +mkWWstr_one opts arg = + case wantToUnboxArg fam_envs arg_ty arg_dmd of _ | isTyVar arg -> do_nothing DropAbsent @@ -940,7 +951,7 @@ mkWWstr_one opts inlineable_flag arg = -- (that's what mkAbsentFiller does) -> return (True, [], nop_fn, absent_filler) - Unbox dcpc cs -> unbox_one_arg opts arg cs dcpc + Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc _ -> do_nothing -- Other cases, like StopUnboxing @@ -955,17 +966,17 @@ unbox_one_arg :: WwOpts -> [Demand] -> DataConPatContext -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr) -unbox_one_arg opts arg_var cs +unbox_one_arg opts arg_var ds DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args , dcpc_co = co } = do { pat_bndrs_uniqs <- getUniquesM ; let ex_name_fss = map getOccFS $ dataConExTyCoVars dc (ex_tvs', arg_ids) = dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args - arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids cs + arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var) dc (ex_tvs' ++ arg_ids') - ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts NotArgOfInlineableFun (ex_tvs' ++ arg_ids') + ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids') ; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co ; return (True, worker_args, unbox_fn . wrap_fn, wrap_arg) } -- Don't pass the arg, rebox instead @@ -1009,42 +1020,45 @@ mkAbsentFiller opts arg {- Note [Worker/wrapper for Strictness and Absence] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The worker/wrapper transformation, mkWWstr_one, takes into account -several possibilities to decide if the function is worthy for -splitting: +The worker/wrapper transformation, mkWWstr_one, takes concrete action +based on the 'UnboxingDescision' returned by 'wantToUnboxArg'. +The latter takes into account several possibilities to decide if the +function is worthy for splitting: 1. If an argument is absent, it would be silly to pass it to - the worker. Hence the isAbsDmd case. This case must come - first because a demand like <S,A> or <B,A> is possible. - E.g. <B,A> comes from a function like + the worker. Hence the DropAbsent case. This case must come + first because the bottom demand B is also strict. + E.g. B comes from a function like f x = error "urk" - and <S,A> can come from Note [Add demands for strict constructors] - -2. If the argument is evaluated strictly, and we can split the - product demand (splitProdDmd_maybe), then unbox it and w/w its - pieces. For example - - f :: (Int, Int) -> Int - f p = (case p of (a,b) -> a) + 1 - is split to - f :: (Int, Int) -> Int - f p = case p of (a,b) -> $wf a - - $wf :: Int -> Int - $wf a = a + 1 - - and - g :: Bool -> (Int, Int) -> Int - g c p = case p of (a,b) -> - if c then a else b - is split to - g c p = case p of (a,b) -> $gw c a b - $gw c a b = if c then a else b - -2a But do /not/ split if the components are not used; that is, the - usage is just 'Used' rather than 'UProd'. In this case - splitProdDmd_maybe returns Nothing. Otherwise we risk decomposing - a massive tuple which is barely used. Example: + and the absent demand A can come from Note [Unboxing evaluated arguments] + +2. If the argument is evaluated strictly (or known to be eval'd), + we can take a view into the product demand ('viewProd'). In accordance + with Note [Boxity analysis], 'wantToUnboxArg' will say 'Unbox'. + 'mkWWstr_one' then follows suit it and recurses into the fields of the + product demand. For example + + f :: (Int, Int) -> Int + f p = (case p of (a,b) -> a) + 1 + is split to + f :: (Int, Int) -> Int + f p = case p of (a,b) -> $wf a + + $wf :: Int -> Int + $wf a = a + 1 + + and + g :: Bool -> (Int, Int) -> Int + g c p = case p of (a,b) -> + if c then a else b + is split to + g c p = case p of (a,b) -> $gw c a b + $gw c a b = if c then a else b + +2a But do /not/ split if Boxity Analysis said "Boxed". + In this case, 'wantToUnboxArg' returns 'StopUnboxing'. + Otherwise we risk decomposing and reboxing a massive + tuple which is barely used. Example: f :: ((Int,Int) -> String) -> (Int,Int) -> a f g pr = error (g pr) @@ -1055,10 +1069,11 @@ splitting: Imagine that it had millions of fields. This actually happened in GHC itself where the tuple was DynFlags -3. A plain 'seqDmd', which is head-strict with usage UHead, can't - be split by splitProdDmd_maybe. But we want it to behave just - like U(AAAA) for suitable number of absent demands. So we have - a special case for it, with arity coming from the data constructor. +3. In all other cases (e.g., lazy, used demand and not eval'd), + 'finaliseBoxity' will have cleared the Boxity flag to 'Boxed' + (see Note [Finalising boxity for demand signature]) and + 'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one' + stops unboxing. Note [Worker/wrapper for bottoming functions] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1162,7 +1177,7 @@ Needless to say, there are some wrinkles: Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field, but it's quite nasty to thread the marks though 'mkWWstr' and 'mkWWstr_one'. So we rather look out for a necessary condition for strict fields: - Note [Add demands for strict constructors] makes it so that the demand on + Note [Unboxing evaluated arguments] makes it so that the demand on 'zs' is absent and /strict/: It will get cardinality 'C_10', the empty interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It guarantees we never fill in an error-thunk for an absent strict field. @@ -1395,6 +1410,56 @@ isRecDataCon fam_envs fuel dc -> combineIRDCRs (map (\dc -> go_dc (subWithInf fuel 1) rec_tc' dc) dcs) -- See Note [Detecting recursive data constructors], point (4) +-- | A specialised Bool for an argument to 'finaliseBoxity'. +-- See Note [Do not unbox class dictionaries]. +data InsideInlineableFun + = NotInsideInlineableFun -- ^ Not in an inlineable fun. + | InsideInlineableFun -- ^ We are in an inlineable fun, so we won't + -- unbox dictionary args. + deriving Eq + +-- | This function makes sure that the demand only says 'Unboxed' where +-- worker/wrapper should actually unbox and trims any boxity beyond that. +-- Called for every demand annotation during DmdAnal. +-- +-- > data T a = T !a +-- > f :: (T (Int,Int), Int) -> () +-- > f p = ... -- demand on p: 1!P(L!P(L!P(L), L!P(L)), L!P(L)) +-- +-- 'finaliseBoxity' will trim the demand on 'p' to 1!P(L!P(LP(L), LP(L)), LP(L)). +-- This is done when annotating lambdas and thunk bindings. +-- See Note [Finalising boxity for demand signature] +finaliseBoxity + :: FamInstEnvs + -> InsideInlineableFun -- ^ See the haddocks on 'InsideInlineableFun' + -> Type -- ^ Type of the argument + -> Demand -- ^ How the arg was used + -> Demand +finaliseBoxity env in_inl_fun ty dmd = go NotMarkedStrict ty dmd + where + go mark ty dmd@(n :* _) = + case wantToUnboxArg env ty dmd of + DropAbsent -> dmd + Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} ds + -- See Note [No lazy, Unboxed demands in demand signature] + -- See Note [Unboxing evaluated arguments] + | isStrict n || isMarkedStrict mark + -- See Note [Do not unbox class dictionaries] + , in_inl_fun == NotInsideInlineableFun || not (isClassPred ty) + -- See Note [mkWWstr and unsafeCoerce] + , ds `lengthIs` dataConRepArity dc + , let arg_tys = dubiousDataConInstArgTys dc tc_args + -> -- pprTrace "finaliseBoxity:Unbox" (ppr ty $$ ppr dmd $$ ppr ds) $ + n :* (mkProd Unboxed $! zip_go_with_marks dc arg_tys ds) + -- See Note [No nested Unboxed inside Boxed in demand signature] + _ -> trimBoxity dmd + + -- See Note [Unboxing evaluated arguments] + zip_go_with_marks dc arg_tys ds = case dataConWrapId_maybe dc of + Nothing -> strictZipWith (go NotMarkedStrict) arg_tys ds + -- Shortcut when DataCon worker=wrapper + Just _ -> strictZipWith3 go (dataConRepStrictness dc) arg_tys ds + {- ************************************************************************ * * diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index aaedf4f2d7..478a0f7737 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -479,6 +479,9 @@ data DynFlags = DynFlags { -- a pattern against. A safe guard -- against exponential blow-up. simplTickFactor :: Int, -- ^ Multiplier for simplifier ticks + dmdUnboxWidth :: !Int, -- ^ Whether DmdAnal should optimistically put an + -- Unboxed demand on returned products with at most + -- this number of fields specConstrThreshold :: Maybe Int, -- ^ Threshold for SpecConstr specConstrCount :: Maybe Int, -- ^ Max number of specialisations for any one function specConstrRecursive :: Int, -- ^ Max number of specialisations for recursive types @@ -1101,6 +1104,7 @@ defaultDynFlags mySettings llvmConfig = maxUncoveredPatterns = 4, maxPmCheckModels = 30, simplTickFactor = 100, + dmdUnboxWidth = 3, -- Default: Assume an unboxed demand on function bodies returning a triple specConstrThreshold = Just 2000, specConstrCount = Just 3, specConstrRecursive = 3, @@ -2657,6 +2661,8 @@ dynamic_flags_deps = [ ; return d }))) , make_ord_flag defFlag "fsimpl-tick-factor" (intSuffix (\n d -> d { simplTickFactor = n })) + , make_ord_flag defFlag "fdmd-unbox-width" + (intSuffix (\n d -> d { dmdUnboxWidth = n })) , make_ord_flag defFlag "fspec-constr-threshold" (intSuffix (\n d -> d { specConstrThreshold = Just n })) , make_ord_flag defFlag "fno-spec-constr-threshold" diff --git a/compiler/GHC/Types/Basic.hs b/compiler/GHC/Types/Basic.hs index db4b8f9e23..342d9d3688 100644 --- a/compiler/GHC/Types/Basic.hs +++ b/compiler/GHC/Types/Basic.hs @@ -503,6 +503,12 @@ instance Outputable Boxity where ppr Boxed = text "Boxed" ppr Unboxed = text "Unboxed" +instance Binary Boxity where -- implemented via isBoxed-isomorphism to Bool + put_ bh = put_ bh . isBoxed + get bh = do + b <- get bh + pure $ if b then Boxed else Unboxed + {- ************************************************************************ * * diff --git a/compiler/GHC/Types/Demand.hs b/compiler/GHC/Types/Demand.hs index 96b5b21bf7..ed7ef25aa8 100644 --- a/compiler/GHC/Types/Demand.hs +++ b/compiler/GHC/Types/Demand.hs @@ -16,9 +16,10 @@ -- Lays out the abstract domain for "GHC.Core.Opt.DmdAnal". module GHC.Types.Demand ( -- * Demands + Boxity(..), Card(C_00, C_01, C_0N, C_10, C_11, C_1N), CardNonAbs, CardNonOnce, Demand(AbsDmd, BotDmd, (:*)), - SubDemand(Prod), mkProd, viewProd, + SubDemand(Prod, Poly), mkProd, viewProd, unboxSubDemand, -- ** Algebra absDmd, topDmd, botDmd, seqDmd, topSubDmd, -- *** Least upper bound @@ -30,7 +31,7 @@ module GHC.Types.Demand ( -- ** Predicates on @Card@inalities and @Demand@s isAbs, isUsedOnce, isStrict, isAbsDmd, isUsedOnceDmd, isStrUsedDmd, isStrictDmd, - isTopDmd, isSeqDmd, isWeakDmd, + isTopDmd, isWeakDmd, -- ** Special demands evalDmd, -- *** Demands used in PrimOp signatures @@ -38,7 +39,6 @@ module GHC.Types.Demand ( -- ** Other @Demand@ operations oneifyCard, oneifyDmd, strictifyDmd, strictifyDictDmd, mkWorkerDemand, peelCallDmd, peelManyCalls, mkCalledOnceDmd, mkCalledOnceDmds, - addCaseBndrDmd, -- ** Extracting one-shot information argOneShots, argsOneShots, saturatedByOneShots, @@ -71,7 +71,7 @@ module GHC.Types.Demand ( DmdTransformer, dmdTransformSig, dmdTransformDataConSig, dmdTransformDictSelSig, -- * Trim to a type shape - TypeShape(..), trimToType, + TypeShape(..), trimToType, trimBoxity, -- * @seq@ing stuff seqDemand, seqDemandList, seqDmdType, seqDmdSig, @@ -108,6 +108,260 @@ _ = pprTrace -- Tired of commenting out the import all the time {- ************************************************************************ * * + Boxity: Whether the box of something is used +* * +************************************************************************ +-} + +{- Note [Strictness and Unboxing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If an argument is used strictly by the function body, we may use use +call-by-value instead of call-by-need for that argument. What's more, we may +unbox an argument that is used strictly, discarding the box at the call site. +This can reduce allocations of the program drastically if the box really isn't +needed in the function body. Here's an example: +``` +even :: Int -> Bool +even (I# 0) = True +even (I# 1) = False +even (I# n) = even (I# (n -# 2)) +``` +All three code paths of 'even' are (a) strict in the argument, and (b) +immediately discard the boxed 'Int'. Now if we have a call site like +`even (I# 42)`, then it would be terrible to allocate the 'I#' box for the +argument only to tear it apart immediately in the body of 'even'! Hence, +worker/wrapper will allocate a wrapper for 'even' that not only uses +call-by-value for the argument (e.g., `case I# 42 of b { $weven b }`), but also +*unboxes* the argument, resulting in +``` +even :: Int -> Bool +even (I# n) = $weven n +$weven :: Int# -> Bool +$weven 0 = True +$weven 1 = False +$weven n = $weven (n -# 2) +``` +And now the box in `even (I# 42)` will cancel away after inlining the wrapper. + +As far as the permission to unbox is concerned, *evaluatedness* of the argument +is the important trait. Unboxing implies eager evaluation of an argument and +we don't want to change the termination properties of the function. One way +to ensure that is to unbox strict arguments only, but strictness is only a +sufficient condition for evaluatedness. +See Note [Unboxing evaluated arguments] in "GHC.Core.Opt.WorkWrap.Utils", where +we manage to unbox *strict fields* of unboxed arguments that the function is not +actually strict in, simply by realising that those fields have to be evaluated. + +Note [Boxity analysis] +~~~~~~~~~~~~~~~~~~~~~~ +Alas, we don't want to unbox *every* strict argument +(as Note [Strictness and Unboxing] might suggest). +Here's an example (from T19871): +``` +data Huge = H Bool Bool ... Bool +ann :: Huge -> (Bool, Huge) +ann h@(Huge True _ ... _) = (False, h) +ann h = (True, h) +``` +Unboxing 'h' yields +``` +$wann :: Bool -> Bool -> ... -> Bool -> (Bool, Huge) +$wann True b2 ... bn = (False, Huge True b2 ... bn) +$wann b1 b2 ... bn = (True, Huge b1 b2 ... bn) +``` +The pair constructor really needs its fields boxed. But '$wann' doesn't get +passed 'h' anymore, only its components! Ergo it has to reallocate the 'Huge' +box, in a process called "reboxing". After w/w, call sites like +`case ... of Just h -> ann h` pay for the allocation of the additional box. +In earlier versions of GHC we simply accepted that reboxing would sometimes +happen, but we found some cases where it made a big difference: #19407, for +example. + +We therefore perform a simple syntactic boxity analysis that piggy-backs on +demand analysis in order to determine whether the box of a strict argument is +always discarded in the function body, in which case we can pass it unboxed +without risking regressions such as in 'ann' above. But as soon as one use needs +the box, we want Boxed to win over any Unboxed uses. +(We don't adhere to that in 'lubBoxity', see Note [lubBoxity and plusBoxity].) + +The demand signature (cf. Note [Demand notation]) will say whether it uses +its arguments boxed or unboxed. Indeed it does so for every sub-component of +the argument demand. Here's an example: +``` +f :: (Int, Int) -> Bool +f (a, b) = even (a + b) -- demand signature: <1!P(1!L,1!L)> +``` +The '!' indicates places where we want to unbox, the lack thereof indicates the +box is used by the function. Boxity flags are part of the 'Poly' and 'Prod' +'SubDemand's, see Note [Why Boxity in SubDemand and not in Demand?]. +The given demand signature says "Unbox the pair and then nestedly unbox its +two fields". By contrast, the demand signature of 'ann' above would look like +<1P(1L,L,...,L)>, lacking any '!'. + +A demand signature like <1P(1!L)> -- Boxed outside but Unboxed in the field -- +doesn't make a lot of sense, as we can never unbox the field without unboxing +the containing record. See Note [Finalising boxity for demand signature] in +"GHC.Core.Opt.WorkWrap.Utils" for how we avoid to spread this and other kinds of +misinformed boxities. + +Due to various practical reasons, Boxity Analysis is not conservative at times. +Here are reasons for too much optimism: + + * Note [Function body boxity and call sites] is an observation about when it is + beneficial to unbox a parameter that is returned from a function. + Note [Unboxed demand on function bodies returning small products] derives + a heuristic from the former Note, pretending that all call sites of a + function need returned small products Unboxed. + * Note [lubBoxity and plusBoxity] describes why we optimistically let Unboxed + win when combining different case alternatives. + +Boxity analysis fixes a number of issues: +#19871, #19407, #4267, #16859, #18907, #13331 + +Note [Function body boxity and call sites] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider (from T5949) +``` +f n p = case n of + 0 -> p :: (a, b) + _ -> f (n-1) p +-- Worker/wrapper split if we decide to unbox: +$wf n x y = case n of + 0 -> (# x, y #) + _ -> $wf (n-1) x y +f n (x,y) = case $wf n x y of (# r, s #) -> (r,s) +``` +When is it better to /not/ to unbox 'p'? That depends on the callers of 'f'! +If all call sites + + 1. Wouldn't need to allocate fresh boxes for 'p', and + 2. Needed the result pair of 'f' boxed + +Only then we'd see an increase in allocation resulting from unboxing. But as +soon as only one of (1) or (2) holds, it really doesn't matter if 'f' unboxes +'p' (and its result, it's important that CPR follows suit). For example +``` +res = ... case f m (field t) of (r1,r2) -> ... -- (1) holds +arg = ... [ f m (x,y) ] ... -- (2) holds +``` +Because one of the boxes in the call site can cancel away: +``` +res = ... case field1 t of (x1,x2) -> + case field2 t of (y1,y2) -> + case $wf x1 x2 y1 y2 of (#r1,r2#) -> ... +arg = ... [ case $wf x1 x2 y1 y2 of (#r1,r2#) -> (r1,r2) ] ... +``` +And when call sites neither have arg boxes (1) nor need the result boxed (2), +then hesitating to unbox means /more/ allocation in the call site because of the +need for fresh argument boxes. + +Summary: If call sites that satisfy both (1) and (2) occur more often than call +sites that satisfy neither condition, then it's best /not/ to unbox 'p'. + +Note [Unboxed demand on function bodies returning small products] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note [Boxity analysis] achieves its biggest wins when we avoid reboxing huge +records. But when we return small products from a function, we often get faster +programs by pretending that the caller unboxes the result. Long version: + +Observation: Big record arguments (e.g., DynFlags) tend to be modified much less + frequently than small records (e.g., Int). +Result: Big records tend to be passed around boxed (unmodified) much more + frequently than small records. +Consequnce: The larger the record, the more likely conditions (1) and (2) from + Note [Function body boxity and call sites] are met, in which case + unboxing returned parameters leads to reboxing. + +So we put an Unboxed demand on function bodies returning small products and a +Boxed demand on the others. What is regarded a small product is controlled by +the -fdmd-unbox-width flag. + +This also manages to unbox functions like +``` +sum z [] = z +sum (I# n) ((I# x):xs) = sum (I# (n +# x)) xs +``` +where we can unbox 'z' on the grounds that it's but a small box anyway. That in +turn means that the I# allocation in the recursive call site can cancel away and +we get a non-allocating loop, nice and tight. +Note that this is the typical case in "Observation" above: A small box is +unboxed, modified, the result reboxed for the recursive call. + +Originally, this came up in binary-trees' check' function and #4267 which +(similarly) features a strict fold over a tree. We'd also regress in join004 and +join007 if we didn't assume an optimistic Unboxed demand on the function body. +T17932 features a (non-recursive) function that returns a large record, e.g., +``` +flags (Options f x) = <huge> `seq` f +``` +and here we won't unbox 'f' because it has 5 fields (which is larger than the +default -fdmd-unbox-width threshold). + +Why not focus on putting Unboxed demands on all recursive function? +Then we'd unbox +``` +flags 0 (Options f x) = <huge> `seq` f +flags n o = flags (n-1) o +``` +and that seems hardly useful. +(NB: Similar to 'f' from Note [Preserving Boxity of results is rarely a win], +but there we only had 2 fields.) + +Note [lubBoxity and plusBoxity] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Should 'Boxed' win in 'lubBoxity' and 'plusBoxity'? +The first intuition is Yes, because that would be the conservative choice: +Responding 'Boxed' when there's the slightest chance we might need the box means +we'll never need to rebox a value. + +For 'plusBoxity' the choice of 'boxedWins' is clear: When we need a value to be +Boxed and Unboxed /in the same trace/, then we clearly need it to be Boxed. + +But if we chose 'boxedWins' for 'lubBoxity', we'd regress T3586. Smaller example +``` +sumIO :: Int -> Int -> IO Int +sumIO 0 !z = return z +sumIO n !z = sumIO (n-1) (z+n) +``` +We really want 'z' to unbox here. Yet its use in the returned unboxed pair +is fundamentally a Boxed one! CPR would manage to unbox it, but DmdAnal runs +before that. There is an Unboxed use in the recursive call to 'go' though. +So we choose 'unboxedWins' for 'lubBoxity' to collect this win. + +Choosing 'unboxedWins' is not conservative. There clearly is ample room for +examples that get worse by our choice. Here's a simple one (from T19871): +``` +data Huge = H { f1 :: Bool, ... many fields ... } +update :: Huge -> (Bool, Huge) +update h@(Huge{f1=True}) = (False, h{f1=False}) +update h = (True, h) +``` +Here, we decide to unbox 'h' because it's used Unboxed in the first branch. + +Note that this is fundamentally working around a phase problem, namely that the +results of boxity analysis depend on CPR analysis (and vice versa, of course). +-} + +boxedWins :: Boxity -> Boxity -> Boxity +boxedWins Unboxed Unboxed = Unboxed +boxedWins _ !_ = Boxed + +unboxedWins :: Boxity -> Boxity -> Boxity +unboxedWins Boxed Boxed = Boxed +unboxedWins _ !_ = Unboxed + +lubBoxity :: Boxity -> Boxity -> Boxity +-- See Note [Boxity analysis] for the lattice. +-- See Note [lubBoxity and plusBoxity]. +lubBoxity = unboxedWins + +plusBoxity :: Boxity -> Boxity -> Boxity +-- See Note [lubBoxity and plusBoxity]. +plusBoxity = boxedWins + +{- +************************************************************************ +* * Card: Combining Strictness and Usage * * ************************************************************************ @@ -406,20 +660,26 @@ pattern n :* sd <- (viewDmdPair -> (n, sd)) where -- -- See Note [Call demands are relative] -- and Note [Demand notation]. +-- See also Note [Why Boxity in SubDemand and not in Demand?]. data SubDemand - = Poly !CardNonOnce + = Poly !Boxity !CardNonOnce -- ^ Polymorphic demand, the denoted thing is evaluated arbitrarily deep, - -- with the specified cardinality at every level. + -- with the specified cardinality at every level. The 'Boxity' applies only + -- to the outer evaluation context; inner evaluation context can be regarded + -- as 'Boxed'. See Note [Boxity in Poly] for why we want it to carry 'Boxity'. -- Expands to 'Call' via 'viewCall' and to 'Prod' via 'viewProd'. -- - -- @Poly n@ is semantically equivalent to @Prod [n :* Poly n, ...]@ or - -- @Call n (Poly n)@. 'mkCall' and 'mkProd' do these rewrites. + -- @Poly b n@ is semantically equivalent to @Prod b [n :* Poly Boxed n, ...]@ + -- or @Call n (Poly Boxed n)@. 'viewCall' and 'viewProd' do these rewrites. -- - -- In Note [Demand notation]: @L === P(L,L,...)@ and @L === CL(L)@, - -- @B === P(B,B,...)@ and @B === CB(B)@, and so on. + -- In Note [Demand notation]: @L === P(L,L,...)@ and @L === CL(L)@, + -- @B === P(B,B,...)@ and @B === CB(B)@, + -- @!A === !P(A,A,...)@ and @!A === !CA(A)@, + -- and so on. -- - -- We only really use 'Poly' with 'C_10' (B), 'C_00' (A), 'C_0N' (L) and - -- sometimes 'C_1N' (S), hence 'CardNonOnce'. + -- We'll only see 'Poly' with 'C_10' (B), 'C_00' (A), 'C_0N' (L) and sometimes + -- 'C_1N' (S) through 'plusSubDmd', never 'C_01' (M) or 'C_11' (1) (grep the + -- source code). Hence 'CardNonOnce', which is closed under 'lub' and 'plus'. | Call !CardNonAbs !SubDemand -- ^ @Call n sd@ describes the evaluation context of @n@ function -- applications, where every individual result is evaluated according to @sd@. @@ -429,53 +689,68 @@ data SubDemand -- expressed with 'Poly'. -- Used only for values of function type. Use the smart constructor 'mkCall' -- whenever possible! - | Prod ![Demand] - -- ^ @Prod ds@ describes the evaluation context of a case scrutinisation + | Prod !Boxity ![Demand] + -- ^ @Prod b ds@ describes the evaluation context of a case scrutinisation -- on an expression of product type, where the product components are - -- evaluated according to @ds@. - deriving Eq + -- evaluated according to @ds@. The 'Boxity' @b@ says whether or not the box + -- of the product was used. + +-- | We have to respect Poly rewrites through 'viewCall' and 'viewProd'. +instance Eq SubDemand where + d1 == d2 = case d1 of + Prod b1 ds1 + | Just (b2, ds2) <- viewProd (length ds1) d2 -> b1 == b2 && ds1 == ds2 + Call n1 sd1 + | Just (n2, sd2) <- viewCall d2 -> n1 == n2 && sd1 == sd2 + Poly b1 n1 + | Poly b2 n2 <- d2 -> b1 == b2 && n1 == n2 + _ -> False -poly00, poly0N, poly1N, poly10 :: SubDemand topSubDmd, botSubDmd, seqSubDmd :: SubDemand -poly00 = Poly C_00 -poly0N = Poly C_0N -poly1N = Poly C_1N -poly10 = Poly C_10 -topSubDmd = poly0N -botSubDmd = poly10 -seqSubDmd = poly00 - -polyDmd :: CardNonOnce -> Demand -polyDmd C_00 = AbsDmd -polyDmd C_10 = BotDmd -polyDmd C_0N = C_0N :* poly0N -polyDmd C_1N = C_1N :* poly1N -polyDmd c = pprPanic "non-once Card" (ppr c) +topSubDmd = Poly Boxed C_0N +botSubDmd = Poly Unboxed C_10 +seqSubDmd = Poly Unboxed C_00 + +-- | The uniform field demand when viewing a 'Poly' as a 'Prod', as in +-- 'viewProd'. +polyFieldDmd :: CardNonOnce -> Demand +polyFieldDmd C_00 = AbsDmd +polyFieldDmd C_10 = BotDmd +polyFieldDmd C_0N = topDmd +polyFieldDmd n = C_1N :* Poly Boxed C_1N & assertPpr (isCardNonOnce n) (ppr n) -- | A smart constructor for 'Prod', applying rewrite rules along the semantic --- equality @Prod [polyDmd n, ...] === polyDmd n@, simplifying to 'Poly' --- 'SubDemand's when possible. Note that this degrades boxity information! E.g. a --- polymorphic demand will never unbox. -mkProd :: [Demand] -> SubDemand -mkProd [] = seqSubDmd --- We only want to simplify absent and bottom demands and unbox the others. --- See also Note [L should win] and Note [Don't optimise LP(L,L,...) to L]. -mkProd ds - | all (== AbsDmd) ds = seqSubDmd - | all (== BotDmd) ds = botSubDmd - | otherwise = Prod ds +-- equality @Prod b [n :* Poly Boxed n, ...] === Poly b n@, simplifying to +-- 'Poly' 'SubDemand's when possible. Examples: +-- +-- * Rewrites @P(L,L)@ (e.g., arguments @Boxed@, @[L,L]@) to @L@ +-- * Rewrites @!P(L,L)@ (e.g., arguments @Unboxed@, @[L,L]@) to @!L@ +-- * Does not rewrite @P(1L)@, @P(L!L)@ or @P(L,A)@ +-- +mkProd :: Boxity -> [Demand] -> SubDemand +mkProd b ds + | all (== AbsDmd) ds = Poly b C_00 + | all (== BotDmd) ds = Poly b C_10 + | dmd@(n :* Poly Boxed m):_ <- ds -- don't rewrite P(L!L) + , n == m -- don't rewrite P(1L) + , all (== dmd) ds -- don't rewrite P(L,A) + = Poly b n + | otherwise = Prod b ds -- | @viewProd n sd@ interprets @sd@ as a 'Prod' of arity @n@, expanding 'Poly' -- demands as necessary. -viewProd :: Arity -> SubDemand -> Maybe [Demand] +viewProd :: Arity -> SubDemand -> Maybe (Boxity, [Demand]) -- It's quite important that this function is optimised well; --- it is used by lubSubDmd and plusSubDmd. Note the strict --- application to 'polyDmd': -viewProd n (Prod ds) | ds `lengthIs` n = Just ds +-- it is used by lubSubDmd and plusSubDmd. +viewProd n (Prod b ds) + | ds `lengthIs` n = Just (b, ds) -- Note the strict application to replicate: This makes sure we don't allocate -- a thunk for it, inlines it and lets case-of-case fire at call sites. -viewProd n (Poly card) = Just $! (replicate n $! polyDmd card) -viewProd _ _ = Nothing +viewProd n (Poly b card) + | let !ds = replicate n $! polyFieldDmd card + = Just (b, ds) +viewProd _ _ + = Nothing {-# INLINE viewProd #-} -- we want to fuse away the replicate and the allocation -- for Arity. Otherwise, #18304 bites us. @@ -483,38 +758,54 @@ viewProd _ _ = Nothing -- equality @Call n (Poly n) === Poly n@, simplifying to 'Poly' 'SubDemand's -- when possible. mkCall :: CardNonAbs -> SubDemand -> SubDemand -mkCall C_1N sd@(Poly C_1N) = sd -mkCall C_0N sd@(Poly C_0N) = sd -mkCall n cd = assertPpr (isCardNonAbs n) (ppr n $$ ppr cd) $ - Call n cd +mkCall C_1N sd@(Poly Boxed C_1N) = sd +mkCall C_0N sd@(Poly Boxed C_0N) = sd +mkCall n cd = assertPpr (isCardNonAbs n) (ppr n $$ ppr cd) $ + Call n cd + +-- | @viewCall sd@ interprets @sd@ as a 'Call', expanding 'Poly' subdemands as +-- necessary. +viewCall :: SubDemand -> Maybe (Card, SubDemand) +viewCall (Call n sd) = Just (n :: Card, sd) +viewCall (Poly _ n) = Just (n :: Card, Poly Boxed n) +viewCall _ = Nothing topDmd, absDmd, botDmd, seqDmd :: Demand -topDmd = polyDmd C_0N +topDmd = C_0N :* topSubDmd absDmd = AbsDmd botDmd = BotDmd seqDmd = C_11 :* seqSubDmd +-- | Sets 'Boxity' to 'Unboxed' for non-'Call' sub-demands. +unboxSubDemand :: SubDemand -> SubDemand +unboxSubDemand (Poly _ n) = Poly Unboxed n +unboxSubDemand (Prod _ ds) = mkProd Unboxed ds +unboxSubDemand sd@Call{} = sd + -- | Denotes '∪' on 'SubDemand'. lubSubDmd :: SubDemand -> SubDemand -> SubDemand -- Handle botSubDmd (just an optimisation, the general case would do the same) -lubSubDmd (Poly C_10) d2 = d2 -lubSubDmd d1 (Poly C_10) = d1 +lubSubDmd (Poly Unboxed C_10) d2 = d2 +lubSubDmd d1 (Poly Unboxed C_10) = d1 -- Handle Prod -lubSubDmd (Prod ds1) (Poly n2) = Prod $ strictMap (lubDmd (polyDmd n2)) ds1 -lubSubDmd (Prod ds1) (Prod ds2) - | equalLength ds1 ds2 = Prod $ strictZipWith lubDmd ds1 ds2 +lubSubDmd (Prod b1 ds1) (Poly b2 n2) + | let !d = polyFieldDmd n2 + = mkProd (lubBoxity b1 b2) (strictMap (lubDmd d) ds1) +lubSubDmd (Prod b1 ds1) (Prod b2 ds2) + | equalLength ds1 ds2 + = mkProd (lubBoxity b1 b2) (strictZipWith lubDmd ds1 ds2) -- Handle Call -lubSubDmd (Call n1 sd1) (Poly n2) +lubSubDmd (Call n1 sd1) sd2@(Poly _ n2) -- See Note [Call demands are relative] | isAbs n2 = mkCall (lubCard n2 n1) sd1 - | otherwise = mkCall (lubCard n2 n1) (lubSubDmd sd1 (Poly n2)) + | otherwise = mkCall (lubCard n2 n1) (lubSubDmd sd1 sd2) lubSubDmd (Call n1 d1) (Call n2 d2) | otherwise = mkCall (lubCard n1 n2) (lubSubDmd d1 d2) -- Handle Poly. Exploit reflexivity (so we'll match the Prod or Call cases again). -lubSubDmd (Poly n1) (Poly n2) = Poly (lubCard n1 n2) -lubSubDmd sd1@Poly{} sd2 = lubSubDmd sd2 sd1 +lubSubDmd (Poly b1 n1) (Poly b2 n2) = Poly (lubBoxity b1 b2) (lubCard n1 n2) +lubSubDmd sd1@Poly{} sd2 = lubSubDmd sd2 sd1 -- Otherwise (Call `lub` Prod) return Top -lubSubDmd _ _ = topSubDmd +lubSubDmd _ _ = topSubDmd -- | Denotes '∪' on 'Demand'. lubDmd :: Demand -> Demand -> Demand @@ -523,24 +814,27 @@ lubDmd (n1 :* sd1) (n2 :* sd2) = lubCard n1 n2 :* lubSubDmd sd1 sd2 -- | Denotes '+' on 'SubDemand'. plusSubDmd :: SubDemand -> SubDemand -> SubDemand -- Handle seqSubDmd (just an optimisation, the general case would do the same) -plusSubDmd (Poly C_00) d2 = d2 -plusSubDmd d1 (Poly C_00) = d1 +plusSubDmd (Poly Unboxed C_00) d2 = d2 +plusSubDmd d1 (Poly Unboxed C_00) = d1 -- Handle Prod -plusSubDmd (Prod ds1) (Poly n2) = Prod $ strictMap (plusDmd (polyDmd n2)) ds1 -plusSubDmd (Prod ds1) (Prod ds2) - | equalLength ds1 ds2 = Prod $ strictZipWith plusDmd ds1 ds2 +plusSubDmd (Prod b1 ds1) (Poly b2 n2) + | let !d = polyFieldDmd n2 + = mkProd (plusBoxity b1 b2) (strictMap (plusDmd d) ds1) +plusSubDmd (Prod b1 ds1) (Prod b2 ds2) + | equalLength ds1 ds2 + = mkProd (plusBoxity b1 b2) (strictZipWith plusDmd ds1 ds2) -- Handle Call -plusSubDmd (Call n1 d1) (Poly n2) +plusSubDmd (Call n1 sd1) sd2@(Poly _ n2) -- See Note [Call demands are relative] - | isAbs n2 = mkCall (plusCard n2 n1) d1 - | otherwise = mkCall (plusCard n2 n1) (lubSubDmd d1 (Poly n2)) -plusSubDmd (Call n1 d1) (Call n2 d2) - | otherwise = mkCall (plusCard n1 n2) (lubSubDmd d1 d2) --- Handle Poly. Exploit (so we'll match the Prod or Call cases again). -plusSubDmd (Poly n1) (Poly n2) = Poly (plusCard n1 n2) -plusSubDmd sd1@Poly{} sd2 = plusSubDmd sd2 sd1 + | isAbs n2 = mkCall (plusCard n2 n1) sd1 + | otherwise = mkCall (plusCard n2 n1) (lubSubDmd sd1 sd2) +plusSubDmd (Call n1 sd1) (Call n2 sd2) + | otherwise = mkCall (plusCard n1 n2) (lubSubDmd sd1 sd2) +-- Handle Poly. Exploit reflexivity (so we'll match the Prod or Call cases again). +plusSubDmd (Poly b1 n1) (Poly b2 n2) = Poly (plusBoxity b1 b2) (plusCard n1 n2) +plusSubDmd sd1@Poly{} sd2 = plusSubDmd sd2 sd1 -- Otherwise (Call `lub` Prod) return Top -plusSubDmd _ _ = topSubDmd +plusSubDmd _ _ = topSubDmd -- | Denotes '+' on 'Demand'. plusDmd :: Demand -> Demand -> Demand @@ -549,11 +843,11 @@ plusDmd (n1 :* sd1) (n2 :* sd2) = plusCard n1 n2 :* plusSubDmd sd1 sd2 multSubDmd :: Card -> SubDemand -> SubDemand multSubDmd C_11 sd = sd multSubDmd C_00 _ = seqSubDmd -multSubDmd C_10 (Poly n) = if isStrict n then botSubDmd else seqSubDmd +multSubDmd C_10 (Poly _ n) = if isStrict n then botSubDmd else seqSubDmd multSubDmd C_10 (Call n _) = if isStrict n then botSubDmd else seqSubDmd -multSubDmd n (Poly m) = Poly (multCard n m) +multSubDmd n (Poly b m) = Poly b (multCard n m) multSubDmd n (Call n' sd) = mkCall (multCard n n') sd -- See Note [Call demands are relative] -multSubDmd n (Prod ds) = Prod (strictMap (multDmd n) ds) +multSubDmd n (Prod b ds) = mkProd b (strictMap (multDmd n) ds) multDmd :: Card -> Demand -> Demand -- The first two lines compute the same result as the last line, but won't @@ -578,11 +872,6 @@ isStrictDmd (n :* _) = isStrict n isStrUsedDmd :: Demand -> Bool isStrUsedDmd (n :* _) = isStrict n && not (isAbs n) -isSeqDmd :: Demand -> Bool -isSeqDmd (C_11 :* sd) = sd == seqSubDmd -isSeqDmd (C_1N :* sd) = sd == seqSubDmd -isSeqDmd _ = False - -- | Is the value used at most once? isUsedOnceDmd :: Demand -> Bool isUsedOnceDmd (n :* _) = isUsedOnce n @@ -602,9 +891,9 @@ isWeakDmd dmd@(n :* _) = not (isStrict n) && is_plus_idem_dmd dmd is_plus_idem_dmd BotDmd = True is_plus_idem_dmd (n :* sd) = is_plus_idem_card n && is_plus_idem_sub_dmd sd -- is_plus_idem_sub_dmd sd = plusSubDmd sd sd == sd - is_plus_idem_sub_dmd (Poly n) = assert (isCardNonOnce n) True - is_plus_idem_sub_dmd (Prod ds) = all is_plus_idem_dmd ds - is_plus_idem_sub_dmd (Call n _) = is_plus_idem_card n -- See Note [Call demands are relative] + is_plus_idem_sub_dmd (Poly _ n) = assert (isCardNonOnce n) True + is_plus_idem_sub_dmd (Prod _ ds) = all is_plus_idem_dmd ds + is_plus_idem_sub_dmd (Call n _) = is_plus_idem_card n -- See Note [Call demands are relative] evalDmd :: Demand evalDmd = C_1N :* topSubDmd @@ -637,7 +926,7 @@ oneifyDmd (n :* sd) = oneifyCard n :* sd -- | Make a 'Demand' evaluated at-least-once (e.g. strict). strictifyDmd :: Demand -> Demand -strictifyDmd AbsDmd = BotDmd +strictifyDmd AbsDmd = seqDmd strictifyDmd BotDmd = BotDmd strictifyDmd (n :* sd) = plusCard C_10 n :* sd @@ -646,17 +935,11 @@ strictifyDmd (n :* sd) = plusCard C_10 n :* sd -- strictify the argument's contained used non-newtype superclass dictionaries. -- We use the demand as our recursive measure to guarantee termination. strictifyDictDmd :: Type -> Demand -> Demand -strictifyDictDmd ty (n :* Prod ds) +strictifyDictDmd ty (n :* Prod b ds) | not (isAbs n) , Just field_tys <- as_non_newtype_dict ty - = C_1N :* -- main idea: ensure it's strict - if all (not . isAbsDmd) ds - then topSubDmd -- abstract to strict w/ arbitrary component use, - -- since this smells like reboxing; results in CBV - -- boxed - -- - -- TODO revisit this if we ever do boxity analysis - else Prod (zipWith strictifyDictDmd field_tys ds) + = C_1N :* mkProd b (zipWith strictifyDictDmd field_tys ds) + -- main idea: ensure it's strict where -- | Return a TyCon and a list of field types if the given -- type is a non-newtype dictionary type @@ -678,13 +961,6 @@ mkCalledOnceDmd sd = mkCall C_11 sd mkCalledOnceDmds :: Arity -> SubDemand -> SubDemand mkCalledOnceDmds arity sd = iterate mkCalledOnceDmd sd !! arity --- | @viewCall sd@ interprets @sd@ as a 'Call', expanding 'TopSubDmd' and --- 'SeqSubDmd' as necessary. -viewCall :: SubDemand -> Maybe (Card, SubDemand) -viewCall (Call n sd) = Just (n :: Card, sd) -viewCall (Poly n) = Just (n :: Card, Poly n) -viewCall _ = Nothing - -- | Peels one call level from the sub-demand, and also returns how many -- times we entered the lambda body. peelCallDmd :: SubDemand -> (Card, SubDemand) @@ -706,15 +982,6 @@ mkWorkerDemand n = C_01 :* go n where go 0 = topSubDmd go n = Call C_01 $ go (n-1) --- | Precondition: The SubDemand is not a Call -addCaseBndrDmd :: SubDemand -- On the case binder - -> [Demand] -- On the components of the constructor - -> [Demand] -- Final demands for the components of the constructor -addCaseBndrDmd sd alt_dmds - -- See Note [Demand on case-alternative binders] - | Prod ds <- plusSubDmd sd (Prod alt_dmds) = ds - | otherwise = alt_dmds - argsOneShots :: DmdSig -> Arity -> [[OneShotInfo]] -- ^ See Note [Computing one-shot info] argsOneShots (DmdSig (DmdType _ arg_ds _)) n_val_args @@ -810,77 +1077,6 @@ is hurt and we can assume that the nested demand is 'botSubDmd'. That ensures that @g@ above actually gets the @1P(L)@ demand on its second pair component, rather than the lazy @MP(L)@ if we 'lub'bed with an absent demand. -Note [Demand on case-alternative binders] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The demand on a binder in a case alternative comes - (a) From the demand on the binder itself - (b) From the demand on the case binder -Forgetting (b) led directly to #10148. - -Example. Source code: - f x@(p,_) = if p then foo x else True - - foo (p,True) = True - foo (p,q) = foo (q,p) - -After strictness analysis: - f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) -> - case x_an1 - of wild_X7 [Dmd=MP(ML,ML)] - { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) -> - case p_an2 of _ { - False -> GHC.Types.True; - True -> foo wild_X7 } - -It's true that ds_dnz is *itself* absent, but the use of wild_X7 means -that it is very much alive and demanded. See #10148 for how the -consequences play out. - -This is needed even for non-product types, in case the case-binder -is used but the components of the case alternative are not. - -Note [Don't optimise LP(L,L,...) to L] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -These two SubDemands: - LP(L,L) (@Prod [topDmd, topDmd]@) and L (@topSubDmd@) -are semantically equivalent, but we do not turn the former into -the latter, for a regrettable-subtle reason. Consider - f p1@(x,y) = (y,x) - g h p2@(_,_) = h p -We want to unbox @p1@ of @f@, but not @p2@ of @g@, because @g@ only uses -@p2@ boxed and we'd have to rebox. So we give @p1@ demand LP(L,L) and @p2@ -demand @L@ to inform 'GHC.Core.Opt.WorkWrap.Utils.wantToUnboxArg', which will -say "unbox" for @p1@ and "don't unbox" for @p2@. - -So the solution is: don't aggressively collapse @Prod [topDmd, topDmd]@ to -@topSubDmd@; instead leave it as-is. In effect we are using the UseDmd to do a -little bit of boxity analysis. Not very nice. - -Note [L should win] -~~~~~~~~~~~~~~~~~~~ -Both in 'lubSubDmd' and 'plusSubDmd' we want @L `plusSubDmd` LP(..))@ to be @L@. -Why? Because U carries the implication the whole thing is used, box and all, -so we don't want to w/w it, cf. Note [Don't optimise LP(L,L,...) to L]. -If we use it both boxed and unboxed, then we are definitely using the box, -and so we are quite likely to pay a reboxing cost. So we make U win here. -TODO: Investigate why since 2013, we don't. - -Example is in the Buffer argument of GHC.IO.Handle.Internals.writeCharBuffer - -Baseline: (A) Not making Used win (LP(..) wins) -Compare with: (B) making Used win for lub and both - - Min -0.3% -5.6% -10.7% -11.0% -33.3% - Max +0.3% +45.6% +11.5% +11.5% +6.9% - Geometric Mean -0.0% +0.5% +0.3% +0.2% -0.8% - -Baseline: (B) Making L win for both lub and both -Compare with: (C) making L win for plus, but LP(..) win for lub - - Min -0.1% -0.3% -7.9% -8.0% -6.5% - Max +0.1% +1.0% +21.0% +21.0% +0.5% - Geometric Mean +0.0% +0.0% -0.0% -0.1% -0.1% - Note [Computing one-shot info] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider a call @@ -892,6 +1088,57 @@ Then argsOneShots returns a [[OneShotInfo]] of The occurrence analyser propagates this one-shot infor to the binders \pqr and \xyz; see Note [Use one-shot information] in "GHC.Core.Opt.OccurAnal". + +Note [Boxity in Poly] +~~~~~~~~~~~~~~~~~~~~~ +To support Note [Boxity analysis], it makes sense that 'Prod' carries a +'Boxity'. But why does 'Poly' have to carry a 'Boxity', too? Shouldn't all +'Poly's be 'Boxed'? Couldn't we simply use 'Prod Unboxed' when we need to +express an unboxing demand? + +'botSubDmd' (B) needs to be the bottom of the lattice, so it needs to be an +Unboxed demand. Similarly, 'seqSubDmd' (A) is an Unboxed demand. +So why not say that Polys with absent cardinalities have Unboxed boxity? +That doesn't work, because we also need the boxed equivalents. Here's an example +for A (function 'absent' in T19871): +``` +f _ True = 1 +f a False = a `seq` 2 + -- demand on a: MA, the A is short for `Poly Boxed C_00` + +g a = a `seq` f a True + -- demand on a: SA, which is `Poly Boxed C_00` + +h True p = g p -- SA on p (inherited from g) +h False p@(x,y) = x+y -- S!P(1!L,1!L) on p +``` +(Caveat: Since Unboxed wins in lubBoxity, we'll unbox here anyway.) +If A is treated as Unboxed, we get reboxing in the call site to 'g'. +So we obviously would need a Boxed variant of A. Rather than introducing a lot +of special cases, we just carry the Boxity in 'Poly'. Plus, we could most likely +find examples like the above for any other cardinality. + +Note [Why Boxity in SubDemand and not in Demand?] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In #19871, we started out by storing 'Boxity' in 'SubDemand', in the 'Prod' +constructor only. But then we found that we weren't able to express the unboxing +'seqSubDmd', because that one really is a `Poly C_00` sub-demand. +We then tried to store the Boxity in 'Demand' instead, for these reasons: + + 1. The whole boxity-of-seq business comes to a satisfying conclusion + 2. Putting Boxity in the SubDemand is weird to begin with, because it + describes the box and not its fields, just as the evaluation cardinality + of a Demand describes how often the box is used. It makes more sense that + Card and Boxity travel together. Also the alternative would have been to + store Boxity with Poly, which is even weirder and more redundant. + +But then we regressed in T7837 (grep #19871 for boring specifics), which needed +to transfer an ambient unboxed *demand* on a dictionary selector to its argument +dictionary, via a 'Call' sub-demand `C1(sd)`, as +Note [Demand transformer for a dictionary selector] explains. Annoyingly, +the boxity info has to be stored in the *sub-demand* `sd`! There's no demand +to store the boxity in. So we bit the bullet and now we store Boxity in +'SubDemand', both in 'Prod' *and* 'Poly'. See also Note [Boxity in Poly]. -} {- ********************************************************************* @@ -1289,6 +1536,9 @@ plusDmdType (DmdType fv1 ds1 r1) (fv2, t2) -- See Note [Asymmetry of 'plus*'] -- 'plus' takes the argument/result info from its *first* arg, -- using its second arg just for its free-var info. + | isEmptyVarEnv fv2, defaultFvDmd t2 == absDmd + = DmdType fv1 ds1 (r1 `plusDivergence` t2) -- a very common case that is much more efficient + | otherwise = DmdType (plusVarEnv_CD plusDmd fv1 (defaultFvDmd r1) fv2 (defaultFvDmd t2)) ds1 (r1 `plusDivergence` t2) @@ -1566,10 +1816,10 @@ newtype DmdSig deriving Eq -- | Turns a 'DmdType' computed for the particular 'Arity' into a 'DmdSig' --- unleashable at that arity. See Note [Understanding DmdType and DmdSig] +-- unleashable at that arity. See Note [Understanding DmdType and DmdSig]. mkDmdSigForArity :: Arity -> DmdType -> DmdSig mkDmdSigForArity arity dmd_ty@(DmdType fvs args div) - | arity < dmdTypeDepth dmd_ty = DmdSig (DmdType fvs (take arity args) div) + | arity < dmdTypeDepth dmd_ty = DmdSig $ DmdType fvs (take arity args) div | otherwise = DmdSig (etaExpandDmdType arity dmd_ty) mkClosedDmdSig :: [Demand] -> Divergence -> DmdSig @@ -1671,26 +1921,28 @@ dmdTransformDataConSig arity sd = case go arity sd of Just dmds -> DmdType emptyDmdEnv dmds topDiv Nothing -> nopDmdType -- Not saturated where - go 0 sd = viewProd arity sd + go 0 sd = snd <$> viewProd arity sd go n (Call C_11 sd) = go (n-1) sd -- strict calls only! go _ _ = Nothing -- | A special 'DmdTransformer' for dictionary selectors that feeds the demand -- on the result into the indicated dictionary component (if saturated). +-- See Note [Demand transformer for a dictionary selector]. dmdTransformDictSelSig :: DmdSig -> DmdTransformer --- NB: This currently doesn't handle newtype dictionaries and it's unclear how --- it could without additional parameters. -dmdTransformDictSelSig (DmdSig (DmdType _ [(_ :* sig_sd)] _)) call_sd +-- NB: This currently doesn't handle newtype dictionaries. +-- It should simply apply call_sd directly to the dictionary, I suppose. +dmdTransformDictSelSig (DmdSig (DmdType _ [_ :* prod] _)) call_sd | (n, sd') <- peelCallDmd call_sd - , Prod sig_ds <- sig_sd + , Prod _ sig_ds <- prod = multDmdType n $ - DmdType emptyDmdEnv [C_11 :* Prod (map (enhance sd') sig_ds)] topDiv + DmdType emptyDmdEnv [C_11 :* mkProd Unboxed (map (enhance sd') sig_ds)] topDiv | otherwise = nopDmdType -- See Note [Demand transformer for a dictionary selector] where - enhance sd old | isAbsDmd old = old - | otherwise = C_11 :* sd -- This is the one! - + enhance _ AbsDmd = AbsDmd + enhance _ BotDmd = BotDmd + enhance sd _dmd_var = C_11 :* sd -- This is the one! + -- C_11, because we multiply with n above dmdTransformDictSelSig sig sd = pprPanic "dmdTransformDictSelSig: no args" (ppr sig $$ ppr sd) {- @@ -1751,16 +2003,49 @@ is computed by calling @peelManyCalls n@, which corresponds to α above. Note [Demand transformer for a dictionary selector] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have a superclass selector 'sc_sel' and a class method +selector 'op_sel', and a function that uses both, like this + +-- Strictness sig: 1P(1,A) +sc_sel (x,y) = x + +-- Strictness sig: 1P(A,1) +op_sel (p,q)= q + +f d v = op_sel (sc_sel d) v + +What do we learn about the demand on 'd'? Alas, we see only the +demand from 'sc_sel', namely '1P(1,A)'. We /don't/ see that 'd' really has a nested +demand '1P(1P(A,1C1(1)),A)'. On the other hand, if we inlined the two selectors +we'd have + +f d x = case d of (x,_) -> + case x of (_,q) -> + q v + +If we analyse that, we'll get a richer, nested demand on 'd'. + +We want to behave /as if/ we'd inlined 'op_sel' and 'sc_sel'. We can do this +easily by building a richer demand transformer for dictionary selectors than +is expressible by a regular demand signature. +And that is what 'dmdTransformDictSelSig' does: it transforms the demand on the +result to a demand on the (single) argument. + +How does it do that? If we evaluate (op dict-expr) under demand 'd', then we can push the demand 'd' into the appropriate field of the dictionary. What *is* the appropriate field? We just look at the strictness signature of the class op, which will be -something like: P(AAA1AAAAA). Then replace the '1' by the demand 'd'. +something like: P(AAA1AAAAA). Then replace the '1' (or any other non-absent +demand, really) by the demand 'd'. The '1' acts as if it was a demand variable, +the whole signature really means `\d. P(AAAdAAAAA)` for any incoming +demand 'd'. For single-method classes, which are represented by newtypes the signature of 'op' won't look like P(...), so matching on Prod will fail. That's fine: if we are doing strictness analysis we are also doing inlining, so we'll have inlined 'op' into a cast. So we can bale out in a conservative -way, returning nopDmdType. +way, returning nopDmdType. SG: Although we then probably want to apply the eval +demand 'd' directly to 'op' rather than turning it into 'topSubDmd'... It is (just.. #8329) possible to be running strictness analysis *without* having inlined class ops from single-method classes. Suppose you are using @@ -1818,10 +2103,10 @@ kill_usage kfs (n :* sd) = kill_usage_card kfs n :* kill_usage_sd kfs sd kill_usage_sd :: KillFlags -> SubDemand -> SubDemand kill_usage_sd kfs (Call n sd) - | kf_called_once kfs = mkCall (lubCard C_1N n) (kill_usage_sd kfs sd) - | otherwise = mkCall n (kill_usage_sd kfs sd) -kill_usage_sd kfs (Prod ds) = Prod (map (kill_usage kfs) ds) -kill_usage_sd _ sd = sd + | kf_called_once kfs = mkCall (lubCard C_1N n) (kill_usage_sd kfs sd) + | otherwise = mkCall n (kill_usage_sd kfs sd) +kill_usage_sd kfs (Prod b ds) = mkProd b (map (kill_usage kfs) ds) +kill_usage_sd _ sd = sd {- ********************************************************************* * * @@ -1843,12 +2128,22 @@ trimToType BotDmd _ = BotDmd trimToType (n :* sd) ts = n :* go sd ts where - go (Prod ds) (TsProd tss) - | equalLength ds tss = Prod (zipWith trimToType ds tss) + go (Prod b ds) (TsProd tss) + | equalLength ds tss = mkProd b (zipWith trimToType ds tss) go (Call n sd) (TsFun ts) = mkCall n (go sd ts) go sd@Poly{} _ = sd go _ _ = topSubDmd +-- | Drop all boxity +trimBoxity :: Demand -> Demand +trimBoxity AbsDmd = AbsDmd +trimBoxity BotDmd = BotDmd +trimBoxity (n :* sd) = n :* go sd + where + go (Poly _ n) = Poly Boxed n + go (Prod _ ds) = mkProd Boxed (map trimBoxity ds) + go (Call n sd) = mkCall n $ go sd + {- ************************************************************************ * * @@ -1863,9 +2158,9 @@ seqDemand BotDmd = () seqDemand (_ :* sd) = seqSubDemand sd seqSubDemand :: SubDemand -> () -seqSubDemand (Prod ds) = seqDemandList ds +seqSubDemand (Prod _ ds) = seqDemandList ds seqSubDemand (Call _ sd) = seqSubDemand sd -seqSubDemand (Poly _) = () +seqSubDemand (Poly _ _) = () seqDemandList :: [Demand] -> () seqDemandList = foldr (seq . seqDemand) () @@ -1905,19 +2200,21 @@ in the user's guide. For pretty-printing demands, we use quite a compact notation with some abbreviations. Here's the BNF: - card ::= B {} - | A {0} - | M {0,1} - | L {0,1,n} - | 1 {1} - | S {1,n} + card ::= B {} + | A {0} + | M {0,1} + | L {0,1,n} + | 1 {1} + | S {1,n} + + box ::= ! Unboxed + | <empty> Boxed d ::= card sd The :* constructor, just juxtaposition - | card abbreviation: Same as "card card", - in code @polyDmd card@ + | card abbreviation: Same as "card card" - sd ::= card @Poly card@ - | P(d,d,..) @Prod [d1,d2,..]@ + sd ::= box card @Poly box card@ + | box P(d,d,..) @Prod box [d1,d2,..]@ | Ccard(sd) @Call card sd@ So, L can denote a 'Card', polymorphic 'SubDemand' or polymorphic 'Demand', @@ -1957,22 +2254,26 @@ instance Outputable Card where -- | See Note [Demand notation] instance Outputable Demand where - ppr AbsDmd = char 'A' - ppr BotDmd = char 'B' - ppr (C_0N :* Poly C_0N) = char 'L' -- Print LL as just L - ppr (C_1N :* Poly C_1N) = char 'S' -- Dito SS - ppr (n :* sd) = ppr n <> ppr sd + ppr AbsDmd = char 'A' + ppr BotDmd = char 'B' + ppr (C_0N :* Poly Boxed C_0N) = char 'L' -- Print LL as just L + ppr (C_1N :* Poly Boxed C_1N) = char 'S' -- Dito SS + ppr (n :* sd) = ppr n <> ppr sd -- | See Note [Demand notation] instance Outputable SubDemand where - ppr (Poly sd) = ppr sd + ppr (Poly b sd) = pp_boxity b <> ppr sd ppr (Call n sd) = char 'C' <> ppr n <> parens (ppr sd) - ppr (Prod ds) = char 'P' <> parens (fields ds) + ppr (Prod b ds) = pp_boxity b <> char 'P' <> parens (fields ds) where fields [] = empty fields [x] = ppr x fields (x:xs) = ppr x <> char ',' <> fields xs +pp_boxity :: Boxity -> SDoc +pp_boxity Unboxed = char '!' +pp_boxity _ = empty + instance Outputable Divergence where ppr Diverges = char 'b' -- for (b)ottom ppr ExnOrDiv = char 'x' -- for e(x)ception @@ -2026,15 +2327,15 @@ instance Binary Demand where _ -> (n :*) <$> get bh instance Binary SubDemand where - put_ bh (Poly sd) = putByte bh 0 *> put_ bh sd + put_ bh (Poly b sd) = putByte bh 0 *> put_ bh b *> put_ bh sd put_ bh (Call n sd) = putByte bh 1 *> put_ bh n *> put_ bh sd - put_ bh (Prod ds) = putByte bh 2 *> put_ bh ds + put_ bh (Prod b ds) = putByte bh 2 *> put_ bh b *> put_ bh ds get bh = do h <- getByte bh case h of - 0 -> Poly <$> get bh + 0 -> Poly <$> get bh <*> get bh 1 -> mkCall <$> get bh <*> get bh - 2 -> Prod <$> get bh + 2 -> Prod <$> get bh <*> get bh _ -> pprPanic "Binary:SubDemand" (ppr (fromIntegral h :: Int)) instance Binary DmdSig where diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs index e52c12ce71..9955a025ea 100644 --- a/compiler/GHC/Types/Id/Make.hs +++ b/compiler/GHC/Types/Id/Make.hs @@ -516,9 +516,12 @@ mkDictSelId name clas strict_sig = mkClosedDmdSig [arg_dmd] topDiv arg_dmd | new_tycon = evalDmd - | otherwise = C_1N :* - Prod [ if name == sel_name then evalDmd else absDmd - | sel_name <- sel_names ] + | otherwise = C_1N :* mkProd Unboxed dict_field_dmds + where + -- The evalDmd below is just a placeholder and will be replaced in + -- GHC.Types.Demand.dmdTransformDictSel + dict_field_dmds = [ if name == sel_name then evalDmd else absDmd + | sel_name <- sel_names ] mkDictSelRhs :: Class -> Int -- 0-indexed selector among (superclasses ++ methods) diff --git a/compiler/GHC/Utils/Misc.hs b/compiler/GHC/Utils/Misc.hs index 05e8365745..fbf52d1bdb 100644 --- a/compiler/GHC/Utils/Misc.hs +++ b/compiler/GHC/Utils/Misc.hs @@ -79,7 +79,7 @@ module GHC.Utils.Misc ( transitiveClosure, -- * Strictness - seqList, strictMap, strictZipWith, + seqList, strictMap, strictZipWith, strictZipWith3, -- * Module names looksLikeModuleName, @@ -991,8 +991,8 @@ seqList [] b = b seqList (x:xs) b = x `seq` seqList xs b strictMap :: (a -> b) -> [a] -> [b] -strictMap _ [] = [] -strictMap f (x : xs) = +strictMap _ [] = [] +strictMap f (x:xs) = let !x' = f x !xs' = strictMap f xs @@ -1000,15 +1000,26 @@ strictMap f (x : xs) = x' : xs' strictZipWith :: (a -> b -> c) -> [a] -> [b] -> [c] -strictZipWith _ [] _ = [] -strictZipWith _ _ [] = [] -strictZipWith f (x : xs) (y: ys) = +strictZipWith _ [] _ = [] +strictZipWith _ _ [] = [] +strictZipWith f (x:xs) (y:ys) = let !x' = f x y !xs' = strictZipWith f xs ys in x' : xs' +strictZipWith3 :: (a -> b -> c -> d) -> [a] -> [b] -> [c] -> [d] +strictZipWith3 _ [] _ _ = [] +strictZipWith3 _ _ [] _ = [] +strictZipWith3 _ _ _ [] = [] +strictZipWith3 f (x:xs) (y:ys) (z:zs) = + let + !x' = f x y z + !xs' = strictZipWith3 f xs ys zs + in + x' : xs' + -- Module names: |