diff options
Diffstat (limited to 'compiler/GHC/Core/Opt')
-rw-r--r-- | compiler/GHC/Core/Opt/CprAnal.hs | 36 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/DmdAnal.hs | 271 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Pipeline.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/SpecConstr.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap/Utils.hs | 385 |
6 files changed, 413 insertions, 287 deletions
diff --git a/compiler/GHC/Core/Opt/CprAnal.hs b/compiler/GHC/Core/Opt/CprAnal.hs index d3f6a248ce..65468cd037 100644 --- a/compiler/GHC/Core/Opt/CprAnal.hs +++ b/compiler/GHC/Core/Opt/CprAnal.hs @@ -21,14 +21,13 @@ import GHC.Types.Id.Info import GHC.Types.Demand import GHC.Types.Cpr -import GHC.Core.DataCon import GHC.Core.FamInstEnv -import GHC.Core.Multiplicity -import GHC.Core.Opt.WorkWrap.Utils +import GHC.Core.DataCon import GHC.Core.Type import GHC.Core.Utils import GHC.Core import GHC.Core.Seq +import GHC.Core.Opt.WorkWrap.Utils import GHC.Data.Graph.UnVar -- for UnVarSet @@ -639,30 +638,23 @@ nonVirgin env = env { ae_virgin = False } -- See Note [CPR for binders that will be unboxed]. extendSigEnvForArg :: AnalEnv -> Id -> AnalEnv extendSigEnvForArg env id - = extendSigEnv env id (CprSig (argCprType env (idType id) (idDemandInfo id))) + = extendSigEnv env id (CprSig (argCprType (idDemandInfo id))) -- | Produces a 'CprType' according to how a strict argument will be unboxed. -- Examples: -- --- * A head-strict demand @1L@ on @Int@ would translate to @1@ --- * A product demand @1P(1L,L)@ on @(Int, Bool)@ would translate to @1(1,)@ --- * A product demand @1P(1L,L)@ on @(a , Bool)@ would translate to @1(,)@, --- because the unboxing strategy would not unbox the @a@. -argCprType :: AnalEnv -> Type -> Demand -> CprType -argCprType env arg_ty dmd = CprType 0 (go arg_ty dmd) +-- * A head-strict demand @1!L@ would translate to @1@ +-- * A product demand @1!P(1!L,L)@ would translate to @1(1,)@ +-- * A product demand @1!P(1L,L)@ would translate to @1(,)@, +-- because the first field will not be unboxed. +argCprType :: Demand -> CprType +argCprType dmd = CprType 0 (go dmd) where - go ty dmd - | Unbox (DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args }) ds - <- wantToUnboxArg (ae_fam_envs env) MaybeArgOfInlineableFun ty dmd - -- No existentials; see Note [Which types are unboxed?]) - -- Otherwise we'd need to call dataConRepInstPat here and thread a - -- UniqSupply. So argCprType is a bit less aggressive than it could - -- be, for the sake of coding convenience. - , null (dataConExTyCoVars dc) - , let arg_tys = map scaledThing (dataConInstArgTys dc tc_args) - = ConCpr (dataConTag dc) (zipWith go arg_tys ds) - | otherwise - = topCpr + go (n :* sd) + | isAbs n = topCpr + | Prod Unboxed ds <- sd = ConCpr fIRST_TAG (strictMap go ds) + | Poly Unboxed _ <- sd = ConCpr fIRST_TAG [] + | otherwise = topCpr {- Note [Safe abortion in the fixed-point iteration] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs index 5f209701a9..fa4bed48f0 100644 --- a/compiler/GHC/Core/Opt/DmdAnal.hs +++ b/compiler/GHC/Core/Opt/DmdAnal.hs @@ -32,7 +32,8 @@ import GHC.Core.Utils import GHC.Core.TyCon import GHC.Core.Type import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds ) -import GHC.Core.Coercion ( Coercion, coVarsOfCo ) +import GHC.Core.Coercion ( Coercion ) +import GHC.Core.TyCo.FVs ( coVarsOfCos ) import GHC.Core.FamInstEnv import GHC.Core.Opt.Arity ( typeArity ) import GHC.Utils.Misc @@ -55,8 +56,9 @@ _ = pprTrace -- Tired of commenting out the import all the time -} -- | Options for the demand analysis -newtype DmdAnalOpts = DmdAnalOpts - { dmd_strict_dicts :: Bool -- ^ Use strict dictionaries +data DmdAnalOpts = DmdAnalOpts + { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries + , dmd_unbox_width :: !Int -- ^ Use strict dictionaries } -- This is a strict alternative to (,) @@ -276,8 +278,10 @@ dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec where WithDmdType body_ty body' = anal_body env WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id - !id' = setBindIdDemandInfo top_lvl id id_dmd - (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd) rhs + -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils" + id_dmd' = finaliseBoxity (ae_fam_envs env) NotInsideInlineableFun (idType id) id_dmd + !id' = setBindIdDemandInfo top_lvl id id_dmd' + (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd') rhs -- See Note [Absence analysis for stable unfoldings and RULES] rule_fvs = bndrRuleAndUnfoldingIds id @@ -425,21 +429,24 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs]) | is_single_data_alt alt = let WithDmdType rhs_ty rhs' = dmdAnal env dmd rhs - WithDmdType alt_ty1 dmds = findBndrsDmds env rhs_ty bndrs + WithDmdType alt_ty1 fld_dmds = findBndrsDmds env rhs_ty bndrs WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env alt_ty1 case_bndr + !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd -- Evaluation cardinality on the case binder is irrelevant and a no-op. -- What matters is its nested sub-demand! + -- NB: If case_bndr_dmd is absDmd, boxity will say Unboxed, which is + -- what we want, because then `seq` will put a `seqDmd` on its scrut. (_ :* case_bndr_sd) = case_bndr_dmd -- Compute demand on the scrutinee -- FORCE the result, otherwise thunks will end up retaining the -- whole DmdEnv !(!bndrs', !scrut_sd) | DataAlt _ <- alt - , id_dmds <- addCaseBndrDmd case_bndr_sd dmds - -- See Note [Demand on scrutinee of a product case] - = let !new_info = setBndrsDemandInfo bndrs id_dmds - !new_prod = mkProd id_dmds - in (new_info, new_prod) + -- See Note [Demand on the scrutinee of a product case] + -- See Note [Demand on case-alternative binders] + , (!scrut_sd, fld_dmds') <- addCaseBndrDmd case_bndr_sd fld_dmds + , let !bndrs' = setBndrsDemandInfo bndrs fld_dmds' + = (bndrs', scrut_sd) | otherwise -- __DEFAULT and literal alts. Simply add demands and discard the -- evaluation cardinality, as we evaluate the scrutinee exactly once. @@ -454,7 +461,6 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs]) WithDmdType scrut_ty scrut' = dmdAnal env scrut_sd scrut res_ty = alt_ty3 `plusDmdType` toPlusDmdArg scrut_ty - !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd in -- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut -- , text "dmd" <+> ppr dmd @@ -482,8 +488,9 @@ dmdAnal' env dmd (Case scrut case_bndr ty alts) WithDmdType rest_ty as' = combineAltDmds as in WithDmdType (lubDmdType cur_ty rest_ty) (a':as') - WithDmdType scrut_ty scrut' = dmdAnal env topSubDmd scrut - WithDmdType alt_ty1 case_bndr' = annotateBndr env alt_ty case_bndr + WithDmdType alt_ty1 case_bndr_dmd = findBndrDmd env alt_ty case_bndr + !case_bndr' = setIdDemandInfo case_bndr case_bndr_dmd + WithDmdType scrut_ty scrut' = dmdAnal env topSubDmd scrut -- NB: Base case is botDmdType, for empty case alternatives -- This is a unit for lubDmdType, and the right result -- when there really are no alternatives @@ -549,12 +556,30 @@ dmdAnalSumAlt env dmd case_bndr (Alt con bndrs rhs) | WithDmdType rhs_ty rhs' <- dmdAnal env dmd rhs , WithDmdType alt_ty dmds <- findBndrsDmds env rhs_ty bndrs , let (_ :* case_bndr_sd) = findIdDemand alt_ty case_bndr - -- See Note [Demand on scrutinee of a product case] - id_dmds = addCaseBndrDmd case_bndr_sd dmds + -- See Note [Demand on case-alternative binders] + -- we can't use the scrut_sd, because it says 'Prod' and we'll use + -- topSubDmd anyway for scrutinees of sum types. + (!_scrut_sd, dmds') = addCaseBndrDmd case_bndr_sd dmds -- Do not put a thunk into the Alt - !new_ids = setBndrsDemandInfo bndrs id_dmds + !new_ids = setBndrsDemandInfo bndrs dmds' = WithDmdType alt_ty (Alt con new_ids rhs') +-- Precondition: The SubDemand is not a Call +-- See Note [Demand on the scrutinee of a product case] +-- and Note [Demand on case-alternative binders] +addCaseBndrDmd :: SubDemand -- On the case binder + -> [Demand] -- On the fields of the constructor + -> (SubDemand, [Demand]) + -- SubDemand on the case binder incl. field demands + -- and final demands for the components of the constructor +addCaseBndrDmd case_sd fld_dmds + | Just (_, ds) <- viewProd (length fld_dmds) scrut_sd + = (scrut_sd, ds) + | otherwise + = pprPanic "was a call demand" (ppr case_sd $$ ppr fld_dmds) -- See the Precondition + where + scrut_sd = case_sd `plusSubDmd` mkProd Unboxed fld_dmds + {- Note [Analysing with absent demand] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -674,6 +699,51 @@ worker, so the worker will rebuild x = (a, absent-error) and that'll crash. +Note [Demand on case-alternative binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The demand on a binder in a case alternative comes + (a) From the demand on the binder itself + (b) From the demand on the case binder +Forgetting (b) led directly to #10148. + +Example. Source code: + f x@(p,_) = if p then foo x else True + + foo (p,True) = True + foo (p,q) = foo (q,p) + +After strictness analysis, forgetting (b): + f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) -> + case x_an1 + of wild_X7 [Dmd=MP(ML,ML)] + { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) -> + case p_an2 of _ { + False -> GHC.Types.True; + True -> foo wild_X7 } + +Note that ds_dnz is syntactically dead, but the expression bound to it is +reachable through the case binder wild_X7. Now watch what happens if we inline +foo's wrapper: + f = \ (x_an1 [Dmd=1P(1L,ML)] :: (Bool, Bool)) -> + case x_an1 + of _ [Dmd=MP(ML,ML)] + { (p_an2 [Dmd=1L], ds_dnz [Dmd=A]) -> + case p_an2 of _ { + False -> GHC.Types.True; + True -> $wfoo_soq GHC.Types.True ds_dnz } + +Look at that! ds_dnz has come back to life in the call to $wfoo_soq! A second +run of demand analysis would no longer infer ds_dnz to be absent. +But unlike occurrence analysis, which infers properties of the *syntactic* +shape of the program, the results of demand analysis describe expressions +*semantically* and are supposed to be mostly stable across Simplification. +That's why we should better account for (b). +In #10148, we ended up emitting a single-entry thunk instead of an updateable +thunk for a let binder that was an an absent case-alt binder during DmdAnal. + +This is needed even for non-product types, in case the case-binder +is used but the components of the case alternative are not. + Note [Aggregated demand for cardinality] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FIXME: This Note should be named [LetUp vs. LetDown] and probably predates @@ -725,43 +795,42 @@ strict in |y|. ************************************************************************ -} -dmdTransform :: AnalEnv -- ^ The strictness environment - -> Id -- ^ The function - -> SubDemand -- ^ The demand on the function - -> DmdType -- ^ The demand type of the function in this context - -- Returned DmdEnv includes the demand on - -- this function plus demand on its free variables - +dmdTransform :: AnalEnv -- ^ The analysis environment + -> Id -- ^ The variable + -> SubDemand -- ^ The evaluation context of the var + -> DmdType -- ^ The demand type unleashed by the variable in this + -- context. The returned DmdEnv includes the demand on + -- this function plus demand on its free variables -- See Note [What are demand signatures?] in "GHC.Types.Demand" -dmdTransform env var dmd +dmdTransform env var sd -- Data constructors | isDataConWorkId var - = dmdTransformDataConSig (idArity var) dmd + = dmdTransformDataConSig (idArity var) sd -- Dictionary component selectors -- Used to be controlled by a flag. -- See #18429 for some perf measurements. | Just _ <- isClassOpId_maybe var - = -- pprTrace "dmdTransform:DictSel" (ppr var $$ ppr dmd) $ - dmdTransformDictSelSig (idDmdSig var) dmd + = -- pprTrace "dmdTransform:DictSel" (ppr var $$ ppr (idDmdSig var) $$ ppr sd) $ + dmdTransformDictSelSig (idDmdSig var) sd -- Imported functions | isGlobalId var - , let res = dmdTransformSig (idDmdSig var) dmd - = -- pprTrace "dmdTransform:import" (vcat [ppr var, ppr (idDmdSig var), ppr dmd, ppr res]) + , let res = dmdTransformSig (idDmdSig var) sd + = -- pprTrace "dmdTransform:import" (vcat [ppr var, ppr (idDmdSig var), ppr sd, ppr res]) res -- Top-level or local let-bound thing for which we use LetDown ('useLetUp'). -- In that case, we have a strictness signature to unleash in our AnalEnv. | Just (sig, top_lvl) <- lookupSigEnv env var - , let fn_ty = dmdTransformSig sig dmd - = -- pprTrace "dmdTransform:LetDown" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $ + , let fn_ty = dmdTransformSig sig sd + = -- pprTrace "dmdTransform:LetDown" (vcat [ppr var, ppr sig, ppr sd, ppr fn_ty]) $ case top_lvl of - NotTopLevel -> addVarDmd fn_ty var (C_11 :* dmd) + NotTopLevel -> addVarDmd fn_ty var (C_11 :* sd) TopLevel | isInterestingTopLevelFn var -- Top-level things will be used multiple times or not at -- all anyway, hence the multDmd below: It means we don't -- have to track whether @var@ is used strictly or at most -- once, because ultimately it never will. - -> addVarDmd fn_ty var (C_0N `multDmd` (C_11 :* dmd)) -- discard strictness + -> addVarDmd fn_ty var (C_0N `multDmd` (C_11 :* sd)) -- discard strictness | otherwise -> fn_ty -- don't bother tracking; just annotate with 'topDmd' later -- Everything else: @@ -769,8 +838,8 @@ dmdTransform env var dmd -- * Lambda binders -- * Case and constructor field binders | otherwise - = -- pprTrace "dmdTransform:other" (vcat [ppr var, ppr sig, ppr dmd, ppr res]) $ - unitDmdType (unitVarEnv var (C_11 :* dmd)) + = -- pprTrace "dmdTransform:other" (vcat [ppr var, ppr boxity, ppr sd]) $ + unitDmdType (unitVarEnv var (C_11 :* sd)) {- ********************************************************************* * * @@ -802,15 +871,21 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs where rhs_arity = idArity id -- See Note [Demand signatures are computed for a threshold demand based on idArity] - rhs_dmd -- See Note [Demand analysis for join points] - -- See Note [Invariants on join points] invariant 2b, in GHC.Core - -- rhs_arity matches the join arity of the join point - | isJoinId id - = mkCalledOnceDmds rhs_arity let_dmd - | otherwise - = mkCalledOnceDmds rhs_arity topSubDmd - - WithDmdType rhs_dmd_ty rhs' = dmdAnal env rhs_dmd rhs + + rhs_dmd = mkCalledOnceDmds rhs_arity body_dmd + + body_dmd + | isJoinId id + -- See Note [Demand analysis for join points] + -- See Note [Invariants on join points] invariant 2b, in GHC.Core + -- rhs_arity matches the join arity of the join point + = let_dmd + | otherwise + -- See Note [Unboxed demand on function bodies returning small products] + = unboxedWhenSmall (ae_opts env) (unboxableResultWidth env id) topSubDmd + + -- See Note [Do not unbox class dictionaries] + WithDmdType rhs_dmd_ty rhs' = dmdAnal (adjustInlFun id env) rhs_dmd rhs DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty sig = mkDmdSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div) @@ -829,6 +904,7 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs -- might turn into used-many even if the signature was stable and -- we'd have to do an additional iteration. reuseEnv makes sure that -- we never get used-once info for FVs of recursive functions. + -- See #14816 where we try to get rid of reuseEnv. rhs_fv1 = case rec_flag of Recursive -> reuseEnv rhs_fv NonRecursive -> rhs_fv @@ -839,6 +915,26 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs -- See Note [Lazy and unleashable free variables] !(!lazy_fv, !sig_fv) = partitionVarEnv isWeakDmd rhs_fv2 +unboxableResultWidth :: AnalEnv -> Id -> Maybe Arity +unboxableResultWidth env id + | (pis,ret_ty) <- splitPiTys (idType id) + , count (not . isNamedBinder) pis == idArity id + , Just (tc, _tc_args, _co) <- normSplitTyConApp_maybe (ae_fam_envs env) ret_ty + , Just dc <- tyConSingleAlgDataCon_maybe tc + , null (dataConExTyCoVars dc) -- Can't unbox results with existentials + = Just (dataConRepArity dc) + | otherwise + = Nothing + +unboxedWhenSmall :: DmdAnalOpts -> Maybe Arity -> SubDemand -> SubDemand +-- See Note [Unboxed demand on function bodies returning small products] +unboxedWhenSmall opts mb_n sd + | Just n <- mb_n + , n <= dmd_unbox_width opts + = unboxSubDemand sd + | otherwise + = sd + -- | If given the (local, non-recursive) let-bound 'Id', 'useLetUp' determines -- whether we should process the binding up (body before rhs) or down (rhs -- before body). @@ -1056,34 +1152,6 @@ Now f's optimised RHS will be \x.a, but if we change g to (error "..") (since it is apparently Absent) and then inline (\x. fst g) we get disaster. But regardless, #18638 was a more complicated version of this, that actually happened in practice. - -Historical Note [Product demands for function body] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In 2013 I spotted this example, in shootout/binary_trees: - - Main.check' = \ b z ds. case z of z' { I# ip -> - case ds_d13s of - Main.Nil -> z' - Main.Node s14k s14l s14m -> - Main.check' (not b) - (Main.check' b - (case b { - False -> I# (-# s14h s14k); - True -> I# (+# s14h s14k) - }) - s14l) - s14m } } } - -Here we *really* want to unbox z, even though it appears to be used boxed in -the Nil case. Partly the Nil case is not a hot path. But more specifically, -the whole function gets the CPR property if we do. - -That motivated using a demand of C1(C1(C1(P(L,L)))) for the RHS, where -(solely because the result was a product) we used a product demand -(albeit with lazy components) for the body. But that gives very silly -behaviour -- see #17932. Happily it turns out now to be entirely -unnecessary: we get good results with C1(C1(C1(L))). So I simply -deleted the special case. -} {- ********************************************************************* @@ -1159,7 +1227,6 @@ dmdFix top_lvl env let_dmd orig_pairs {- Note [Safe abortion in the fixed-point iteration] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Fixed-point iteration may fail to terminate. But we cannot simply give up and return the environment and code unchanged! We still need to do one additional round, for two reasons: @@ -1231,8 +1298,11 @@ unitDmdType :: DmdEnv -> DmdType unitDmdType dmd_env = DmdType dmd_env [] topDiv coercionDmdEnv :: Coercion -> DmdEnv -coercionDmdEnv co = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCo co) - -- The VarSet from coVarsOfCo is really a VarEnv Var +coercionDmdEnv co = coercionsDmdEnv [co] + +coercionsDmdEnv :: [Coercion] -> DmdEnv +coercionsDmdEnv cos = mapVarEnv (const topDmd) (getUniqSet $ coVarsOfCos cos) + -- The VarSet from coVarsOfCos is really a VarEnv Var addVarDmd :: DmdType -> Var -> Demand -> DmdType addVarDmd (DmdType fv ds res) var dmd @@ -1283,18 +1353,6 @@ setBndrsDemandInfo (b:bs) (d:ds) = setBndrsDemandInfo [] ds = assert (null ds) [] setBndrsDemandInfo bs _ = pprPanic "setBndrsDemandInfo" (ppr bs) -annotateBndr :: AnalEnv -> DmdType -> Var -> WithDmdType Var --- The returned env has the var deleted --- The returned var is annotated with demand info --- according to the result demand of the provided demand type --- No effect on the argument demands -annotateBndr env dmd_ty var - | isId var = WithDmdType dmd_ty' new_id - | otherwise = WithDmdType dmd_ty var - where - new_id = setIdDemandInfo var dmd - WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty var - annotateLamIdBndr :: AnalEnv -> DmdType -- Demand type of body -> Id -- Lambda binder @@ -1308,8 +1366,11 @@ annotateLamIdBndr env dmd_ty id -- pprTrace "annLamBndr" (vcat [ppr id, ppr dmd_ty, ppr final_ty]) $ WithDmdType main_ty new_id where - new_id = setIdDemandInfo id dmd - main_ty = addDemand dmd dmd_ty' + -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils" + -- and Note [Do not unbox class dictionaries] + dmd' = finaliseBoxity (ae_fam_envs env) (ae_inl_fun env) (idType id) dmd + new_id = setIdDemandInfo id dmd' + main_ty = addDemand dmd' dmd_ty' WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id {- Note [NOINLINE and strictness] @@ -1389,11 +1450,14 @@ demand put on them (topDmd), and add that to the "lazy_fv" returned by "dmdFix". data AnalEnv = AE - { ae_strict_dicts :: !Bool -- ^ Enable strict dict - , ae_sigs :: !SigEnv - , ae_virgin :: !Bool -- ^ True on first iteration only - -- See Note [Initialising strictness] - , ae_fam_envs :: !FamInstEnvs + { ae_opts :: !DmdAnalOpts -- ^ Analysis options + , ae_sigs :: !SigEnv + , ae_virgin :: !Bool -- ^ True on first iteration only + -- See Note [Initialising strictness] + , ae_fam_envs :: !FamInstEnvs + , ae_inl_fun :: !InsideInlineableFun + -- ^ Whether we analyse the body of an inlineable fun. + -- See Note [Do not unbox class dictionaries]. } -- We use the se_env to tell us whether to @@ -1408,16 +1472,16 @@ type SigEnv = VarEnv (DmdSig, TopLevelFlag) instance Outputable AnalEnv where ppr env = text "AE" <+> braces (vcat [ text "ae_virgin =" <+> ppr (ae_virgin env) - , text "ae_strict_dicts =" <+> ppr (ae_strict_dicts env) , text "ae_sigs =" <+> ppr (ae_sigs env) ]) emptyAnalEnv :: DmdAnalOpts -> FamInstEnvs -> AnalEnv emptyAnalEnv opts fam_envs - = AE { ae_strict_dicts = dmd_strict_dicts opts + = AE { ae_opts = opts , ae_sigs = emptySigEnv , ae_virgin = True , ae_fam_envs = fam_envs + , ae_inl_fun = NotInsideInlineableFun } emptySigEnv :: SigEnv @@ -1445,6 +1509,13 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id nonVirgin :: AnalEnv -> AnalEnv nonVirgin env = env { ae_virgin = False } +-- | Sets 'ae_inl_fun' according to whether the given 'Id' has an inlineable +-- unfolding. See Note [Do not unbox class dictionaries]. +adjustInlFun :: Id -> AnalEnv -> AnalEnv +adjustInlFun id env + | isStableUnfolding (realIdUnfolding id) = env { ae_inl_fun = InsideInlineableFun } + | otherwise = env { ae_inl_fun = NotInsideInlineableFun } + findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> WithDmdType [Demand] -- Return the demands on the Ids in the [Var] findBndrsDmds env dmd_ty bndrs @@ -1472,9 +1543,9 @@ findBndrDmd env dmd_ty id strictify dmd -- See Note [Making dictionaries strict] - | ae_strict_dicts env + | dmd_strict_dicts (ae_opts env) -- We never want to strictify a recursive let. At the moment - -- annotateBndr is only call for non-recursive lets; if that + -- findBndrDmd is never called for recursive lets; if that -- changes, we need a RecFlag parameter and another guard here. = strictifyDictDmd id_ty dmd | otherwise @@ -1522,7 +1593,7 @@ to inline one applied to a function. Sometimes this makes just enough of a difference to stop a function from inlining. This is documented in #18421. -It's somewhat similar to Note [Do not unpack class dictionaries] although +It's somewhat similar to Note [Do not unbox class dictionaries] although here our problem is with the inliner, not the specializer. Note [Initialising strictness] diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index 18ac910d15..ee79e28b60 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -1067,6 +1067,7 @@ dmdAnal :: Logger -> DynFlags -> FamInstEnvs -> [CoreRule] -> CoreProgram -> IO dmdAnal logger dflags fam_envs rules binds = do let !opts = DmdAnalOpts { dmd_strict_dicts = gopt Opt_DictsStrict dflags + , dmd_unbox_width = dmdUnboxWidth dflags } binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $ diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs index 718c840c96..966e86a344 100644 --- a/compiler/GHC/Core/Opt/SpecConstr.hs +++ b/compiler/GHC/Core/Opt/SpecConstr.hs @@ -1806,7 +1806,7 @@ calcSpecInfo fn (CP { cp_qvars = qvars, cp_args = pats }) extra_bndrs go_one env d (Var v) = extendVarEnv_C plusDmd env v d go_one env (_n :* cd) e -- NB: _n does not have to be strict | (Var _, args) <- collectArgs e - , Just ds <- viewProd (length args) cd + , Just (_b, ds) <- viewProd (length args) cd -- TODO: We may want to look at boxity _b, though... = go env ds args go_one env _ _ = env diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs index 976dcd5fe5..9becea0c18 100644 --- a/compiler/GHC/Core/Opt/WorkWrap.hs +++ b/compiler/GHC/Core/Opt/WorkWrap.hs @@ -734,8 +734,6 @@ splitFun ww_opts fn_id rhs uf_opts = so_uf_opts (wo_simple_opts ww_opts) fn_info = idInfo fn_id (arg_vars, body) = collectBinders rhs - -- collectBinders was not enough for GHC.Event.IntTable.insertWith - -- last time I checked, where manifest lambdas were wrapped in casts (wrap_dmds, div) = splitDmdSig (dmdSigInfo fn_info) @@ -978,8 +976,7 @@ splitThunk :: WwOpts -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] splitThunk ww_opts is_rec x rhs = assert (not (isJoinId x)) $ do { let x' = localiseId x -- See comment above - ; (useful,_, wrap_fn, fn_arg) - <- mkWWstr_one ww_opts NotArgOfInlineableFun x' + ; (useful,_, wrap_fn, fn_arg) <- mkWWstr_one ww_opts x' ; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ] ; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive return res diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs index df3608fe7d..6473408843 100644 --- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs +++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs @@ -10,8 +10,9 @@ A library for the ``worker\/wrapper'' back-end to the strictness analyser module GHC.Core.Opt.WorkWrap.Utils ( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs , DataConPatContext(..) - , UnboxingDecision(..), ArgOfInlineableFun(..), wantToUnboxArg - , findTypeShape, mkAbsentFiller, IsRecDataConResult(..), isRecDataCon + , UnboxingDecision(..), InsideInlineableFun(..), wantToUnboxArg + , findTypeShape, IsRecDataConResult(..), isRecDataCon, finaliseBoxity + , mkAbsentFiller , isWorkerSmallEnough ) where @@ -229,7 +230,7 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr res_ty' = GHC.Core.Subst.substTy subst res_ty ; (useful1, work_args, wrap_fn_str, fn_args) - <- mkWWstr opts inlineable_flag cloned_arg_vars + <- mkWWstr opts cloned_arg_vars -- Do CPR w/w. See Note [Always do CPR w/w] ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) @@ -265,9 +266,6 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr = info `setOccInfo` noOccInfo mb_join_arity = isJoinId_maybe fun_id - inlineable_flag -- See Note [Do not unpack class dictionaries] - | isStableUnfolding (realIdUnfolding fun_id) = MaybeArgOfInlineableFun - | otherwise = NotArgOfInlineableFun -- Note [Do not split void functions] only_one_void_argument @@ -562,59 +560,30 @@ data UnboxingDecision s -- The @[s]@ carries the bits of information with which we can continue -- unboxing, e.g. @s@ will be 'Demand' or 'Cpr'. --- | A specialised Bool for an argument to 'wantToUnboxArg'. --- See Note [Do not unpack class dictionaries]. -data ArgOfInlineableFun - = NotArgOfInlineableFun -- ^ Definitely not in an inlineable fun. - | MaybeArgOfInlineableFun -- ^ We might be in an inlineable fun, so we won't - -- unbox dictionary args. - deriving Eq - --- | Unboxing strategy for strict arguments. -wantToUnboxArg :: FamInstEnvs -> ArgOfInlineableFun -> Type -> Demand -> UnboxingDecision Demand +-- | Unwraps the 'Boxity' decision encoded in the given 'SubDemand' and returns +-- a 'DataConPatContext' as well the nested demands on fields of the 'DataCon' +-- to unbox. +wantToUnboxArg + :: FamInstEnvs + -> Type -- ^ Type of the argument + -> Demand -- ^ How the arg was used + -> UnboxingDecision Demand -- See Note [Which types are unboxed?] -wantToUnboxArg fam_envs inlineable_flag ty dmd - | isAbsDmd dmd +wantToUnboxArg fam_envs ty (n :* sd) + | isAbs n = DropAbsent - | isStrUsedDmd dmd - , Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty + | Just (tc, tc_args, co) <- normSplitTyConApp_maybe fam_envs ty , Just dc <- tyConSingleAlgDataCon_maybe tc , let arity = dataConRepArity dc - -- See Note [Unpacking arguments with product and polymorphic demands] - , Just cs <- split_prod_dmd_arity dmd arity - -- See Note [Do not unpack class dictionaries] - , inlineable_flag == NotArgOfInlineableFun || not (isClassPred ty) - -- See Note [mkWWstr and unsafeCoerce] - , cs `lengthIs` arity - -- See Note [Add demands for strict constructors] - , let cs' = addDataConStrictness dc cs - = Unbox (DataConPatContext dc tc_args co) cs' + , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity Analysis] + -- NB: No strictness or evaluatedness checks here. That is done by + -- 'finaliseBoxity'! + = Unbox (DataConPatContext dc tc_args co) ds | otherwise = StopUnboxing - where - split_prod_dmd_arity dmd arity - -- For seqDmd, it should behave like <S(AAAA)>, for some - -- suitable arity - | isSeqDmd dmd = Just (replicate arity absDmd) - | _ :* Prod ds <- dmd = Just ds - | otherwise = Nothing - -addDataConStrictness :: DataCon -> [Demand] -> [Demand] --- See Note [Add demands for strict constructors] -addDataConStrictness con ds - | Nothing <- dataConWrapId_maybe con - -- DataCon worker=wrapper. Implies no strict fields, so nothing to do - = ds -addDataConStrictness con ds - = zipWithEqual "addDataConStrictness" add ds strs - where - strs = dataConRepStrictness con - add dmd str | isMarkedStrict str = strictifyDmd dmd - | otherwise = dmd - -- | Unboxing strategy for constructed results. wantToUnboxResult :: FamInstEnvs -> Type -> Cpr -> UnboxingDecision Cpr @@ -688,35 +657,8 @@ Note that the data constructor /can/ have evidence arguments: equality constraints, type classes etc. So it can be GADT. These evidence arguments are simply value arguments, and should not get in the way. -Note [Unpacking arguments with product and polymorphic demands] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The argument is unpacked in a case if it has a product type and has a -strict *and* used demand put on it. I.e., arguments, with demands such -as the following ones: - - <S,U(U, L)> - <S(L,S),U> - -will be unpacked, but - - <S,U> or <B,U> - -will not, because the pieces aren't used. This is quite important otherwise -we end up unpacking massive tuples passed to the bottoming function. Example: - - f :: ((Int,Int) -> String) -> (Int,Int) -> a - f g pr = error (g pr) - - main = print (f fst (1, error "no")) - -Does 'main' print "error 1" or "error no"? We don't really want 'f' -to unbox its second argument. This actually happened in GHC's onwn -source code, in Packages.applyPackageFlag, which ended up un-boxing -the enormous DynFlags tuple, and being strict in the -as-yet-un-filled-in unitState files. - -Note [Do not unpack class dictionaries] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note [Do not unbox class dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If we have f :: Ord a => [a] -> Int -> a {-# INLINABLE f #-} @@ -729,12 +671,19 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get fw :: (a->a->Bool) -> [a] -> Int# -> a and the type-class specialiser can't specialise that. An example is #6056. -But in any other situation a dictionary is just an ordinary value, -and can be unpacked. So we track the INLINABLE pragma, and switch -off the unpacking in mkWWstr_one (see the isClassPred test). +But in any other situation, a dictionary is just an ordinary value, +and can be unpacked. So we track the INLINABLE pragma, and discard the boxity +flag in finaliseBoxity (see the isClassPred test). Historical note: #14955 describes how I got this fix wrong the first time. +Note that the simplicity of this fix implies that INLINE functions (such as +wrapper functions after the WW run) will never say that they unbox class +dictionaries. That's not ideal, but not worth losing sleep over, as INLINE +functions will have been inlined by the time we run demand analysis so we'll +see the unboxing around the worker in client modules. I got aware of the issue +in T5075 by the change in boxity of loop between demand analysis runs. + Note [mkWWstr and unsafeCoerce] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By using unsafeCoerce, it is possible to make the number of demands fail to @@ -742,14 +691,14 @@ match the number of constructor arguments; this happened in #8037. If so, the worker/wrapper split doesn't work right and we get a Core Lint bug. The fix here is simply to decline to do w/w if that happens. -Note [Add demands for strict constructors] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note [Unboxing evaluated arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider this program (due to Roman): data X a = X !a foo :: X Int -> Int -> Int - foo (X a) n = go 0 + foo x@(X a) n = go 0 where go i | i < n = a + go (i+1) | otherwise = 0 @@ -758,12 +707,12 @@ We want the worker for 'foo' too look like this: $wfoo :: Int# -> Int# -> Int# -with the first argument unboxed, so that it is not eval'd each time -around the 'go' loop (which would otherwise happen, since 'foo' is not -strict in 'a'). It is sound for the wrapper to pass an unboxed arg -because X is strict, so its argument must be evaluated. And if we -*don't* pass an unboxed argument, we can't even repair it by adding a -`seq` thus: +with the first argument unboxed, so that it is not eval'd each time around the +'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It +is sound for the wrapper to pass an unboxed arg because X is strict +(see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument +must be evaluated. And if we *don't* pass an unboxed argument, we can't even +repair it by adding a `seq` thus: foo (X a) n = a `seq` go 0 @@ -771,34 +720,38 @@ because the seq is discarded (very early) since X is strict! So here's what we do -* We leave the demand-analysis alone. The demand on 'a' in the - definition of 'foo' is <L, U(U)>; the strictness info is Lazy - because foo's body may or may not evaluate 'a'; but the usage info - says that 'a' is unpacked and its content is used. +* Since this has nothing to do with how 'foo' uses 'a', we leave demand analysis + alone, but account for the additional evaluatedness when annotating the binder + in 'annotateLamIdBndr' via 'finaliseBoxity', which will retain the Unboxed boxity + on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning it's used + lazily but unboxed nonetheless. This seems to contradict + Note [No lazy, Unboxed demands in demand signature], but we know that 'a' is + evaluated and thus can be unboxed. -* During worker/wrapper, if we unpack a strict constructor (as we do - for 'foo'), we use 'addDataConStrictness' to bump up the strictness on - the strict arguments of the data constructor. +* When 'finaliseBoxity' decides to unbox a record, it will zip the field demands + together with the respective 'StrictnessMark'. In case of 'x', it will pair + up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for + the strict field. -* That in turn means that, if the usage info supports doing so - (i.e. splitProdDmd_maybe returns Just), we will unpack that argument - -- even though the original demand (e.g. on 'a') was lazy. +* Said 'StrictnessMark' is passed to the recursive invocation of + 'finaliseBoxity' when deciding whether to unbox 'a'. 'a' was used lazily, but + since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'. -* What does "bump up the strictness" mean? Just add a head-strict - demand to the strictness! Even for a demand like <L,A> we can - safely turn it into <S,A>; remember case (1) of - Note [Worker/wrapper for Strictness and Absence]. +* Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will + /not/ look at the strictness bits of the demand, only at Boxity flags. As such, + it will happily unbox 'a' despite the lazy demand on it. -The net effect is that the w/w transformation is more aggressive about -unpacking the strict arguments of a data constructor, when that -eagerness is supported by the usage info. +The net effect is that boxity analysis and the w/w transformation are more +aggressive about unboxing the strict arguments of a data constructor than when +looking at strictness info exclusively. It is very much like (Nested) CPR, which +needs its nested fields to be evaluated in order for it to unbox nestedly. There is the usual danger of reboxing, which as usual we ignore. But if X is monomorphic, and has an UNPACK pragma, then this optimisation is even more important. We don't want the wrapper to rebox an unboxed argument, and pass an Int to $wfoo! -This works in nested situations like +This works in nested situations like T10482 data family Bar a data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) @@ -863,6 +816,68 @@ applying the strictness demands to the final result of DmdAnal. The result is that we get the strict demand signature we wanted even if we can't float the case on `x` up through the case on `burble`. +Note [No nested Unboxed inside Boxed in demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider +``` +f p@(x,y) + | even (x+y) = [] + | otherwise = [p] +``` +Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)` +on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper +can't unbox the pair components without unboxing the pair! So we better say +`1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info. +That happens in 'finaliseBoxity'. + +Note [No lazy, Unboxed demands in demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider T19407: + + data Huge = Huge Bool () ... () -- think: DynFlags + data T = T { h :: Huge, n :: Int } + f t@(T h _) = g h t + g (H b _ ... _) t = if b then 1 else n t + +The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better +not put that demand in `g`'s demand signature, because worker/wrapper will not +in general unbox a lazy-and-unboxed demand like `L!P(..)`. +(The exception are known-to-be-evaluated arguments like strict fields, +see Note [Unboxing evaluated arguments].) + +The program above is an example where spreading misinformed boxity through the +signature is particularly egregious. If we give `g` that signature, then `f` +puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and +we get + + f (T (H b _ ... _) n) = $wf b n + $wf b n = $wg b (T (H b x ... x) n) + $wg = ... + +Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in +'finaliseBoxity', modulo Note [Unboxing evaluated arguments]. + +Note [Finalising boxity for demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The worker/wrapper pass must strictly adhere to the boxity decisions encoded +in the demand signature, because that is the information that demand analysis +propagates throughout the program. Failing to implement the strategy laid out +in the signature can result in reboxing in unexpected places. Hence, we must +completely anticipate unboxing decisions during demand analysis and reflect +these decicions in demand annotations. That is the job of 'finaliseBoxity', +which is defined here and called from demand analysis. + +Here is a list of different Notes it has to take care of: + + * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in + general, but still allow Note [Unboxing evaluated arguments] + * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)` + * Implement fixes for corner cases Note [Do not unbox class dictionaries] + and Note [mkWWstr and unsafeCoerce] + +Then, in worker/wrapper blindly trusts the boxity info in the demand signature +and will not look at strictness info *at all*, in 'wantToUnboxArg'. + Note [non-algebraic or open body type warning] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are a few cases where the W/W transformation is told that something @@ -894,7 +909,6 @@ way to express existential types in the worker's type signature. -} mkWWstr :: WwOpts - -> ArgOfInlineableFun -- See Note [Do not unpack class dictionaries] -> [Var] -- Wrapper args; have their demand info on them -- *Includes type variables* -> UniqSM (Bool, -- Is this useful @@ -905,10 +919,10 @@ mkWWstr :: WwOpts [CoreExpr]) -- Reboxed args for the call to the -- original RHS. Corresponds one-to-one -- with the wrapper arg vars -mkWWstr opts inlineable_flag args +mkWWstr opts args = go args where - go_one arg = mkWWstr_one opts inlineable_flag arg + go_one arg = mkWWstr_one opts arg go [] = return (False, [], nop_fn, []) go (arg : args) = do { (useful1, args1, wrap_fn1, wrap_arg) <- go_one arg @@ -925,12 +939,9 @@ mkWWstr opts inlineable_flag args -- * wrap_arg assumes work_args are in scope, and builds a ConApp that -- reconstructs the RHS of wrap_var that we pass to the original RHS -- See Note [Worker/wrapper for Strictness and Absence] -mkWWstr_one :: WwOpts - -> ArgOfInlineableFun -- See Note [Do not unpack class dictionaries] - -> Var - -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr) -mkWWstr_one opts inlineable_flag arg = - case wantToUnboxArg fam_envs inlineable_flag arg_ty arg_dmd of +mkWWstr_one :: WwOpts -> Var -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr) +mkWWstr_one opts arg = + case wantToUnboxArg fam_envs arg_ty arg_dmd of _ | isTyVar arg -> do_nothing DropAbsent @@ -940,7 +951,7 @@ mkWWstr_one opts inlineable_flag arg = -- (that's what mkAbsentFiller does) -> return (True, [], nop_fn, absent_filler) - Unbox dcpc cs -> unbox_one_arg opts arg cs dcpc + Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc _ -> do_nothing -- Other cases, like StopUnboxing @@ -955,17 +966,17 @@ unbox_one_arg :: WwOpts -> [Demand] -> DataConPatContext -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr) -unbox_one_arg opts arg_var cs +unbox_one_arg opts arg_var ds DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args , dcpc_co = co } = do { pat_bndrs_uniqs <- getUniquesM ; let ex_name_fss = map getOccFS $ dataConExTyCoVars dc (ex_tvs', arg_ids) = dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args - arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids cs + arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var) dc (ex_tvs' ++ arg_ids') - ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts NotArgOfInlineableFun (ex_tvs' ++ arg_ids') + ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids') ; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co ; return (True, worker_args, unbox_fn . wrap_fn, wrap_arg) } -- Don't pass the arg, rebox instead @@ -1009,42 +1020,45 @@ mkAbsentFiller opts arg {- Note [Worker/wrapper for Strictness and Absence] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The worker/wrapper transformation, mkWWstr_one, takes into account -several possibilities to decide if the function is worthy for -splitting: +The worker/wrapper transformation, mkWWstr_one, takes concrete action +based on the 'UnboxingDescision' returned by 'wantToUnboxArg'. +The latter takes into account several possibilities to decide if the +function is worthy for splitting: 1. If an argument is absent, it would be silly to pass it to - the worker. Hence the isAbsDmd case. This case must come - first because a demand like <S,A> or <B,A> is possible. - E.g. <B,A> comes from a function like + the worker. Hence the DropAbsent case. This case must come + first because the bottom demand B is also strict. + E.g. B comes from a function like f x = error "urk" - and <S,A> can come from Note [Add demands for strict constructors] - -2. If the argument is evaluated strictly, and we can split the - product demand (splitProdDmd_maybe), then unbox it and w/w its - pieces. For example - - f :: (Int, Int) -> Int - f p = (case p of (a,b) -> a) + 1 - is split to - f :: (Int, Int) -> Int - f p = case p of (a,b) -> $wf a - - $wf :: Int -> Int - $wf a = a + 1 - - and - g :: Bool -> (Int, Int) -> Int - g c p = case p of (a,b) -> - if c then a else b - is split to - g c p = case p of (a,b) -> $gw c a b - $gw c a b = if c then a else b - -2a But do /not/ split if the components are not used; that is, the - usage is just 'Used' rather than 'UProd'. In this case - splitProdDmd_maybe returns Nothing. Otherwise we risk decomposing - a massive tuple which is barely used. Example: + and the absent demand A can come from Note [Unboxing evaluated arguments] + +2. If the argument is evaluated strictly (or known to be eval'd), + we can take a view into the product demand ('viewProd'). In accordance + with Note [Boxity analysis], 'wantToUnboxArg' will say 'Unbox'. + 'mkWWstr_one' then follows suit it and recurses into the fields of the + product demand. For example + + f :: (Int, Int) -> Int + f p = (case p of (a,b) -> a) + 1 + is split to + f :: (Int, Int) -> Int + f p = case p of (a,b) -> $wf a + + $wf :: Int -> Int + $wf a = a + 1 + + and + g :: Bool -> (Int, Int) -> Int + g c p = case p of (a,b) -> + if c then a else b + is split to + g c p = case p of (a,b) -> $gw c a b + $gw c a b = if c then a else b + +2a But do /not/ split if Boxity Analysis said "Boxed". + In this case, 'wantToUnboxArg' returns 'StopUnboxing'. + Otherwise we risk decomposing and reboxing a massive + tuple which is barely used. Example: f :: ((Int,Int) -> String) -> (Int,Int) -> a f g pr = error (g pr) @@ -1055,10 +1069,11 @@ splitting: Imagine that it had millions of fields. This actually happened in GHC itself where the tuple was DynFlags -3. A plain 'seqDmd', which is head-strict with usage UHead, can't - be split by splitProdDmd_maybe. But we want it to behave just - like U(AAAA) for suitable number of absent demands. So we have - a special case for it, with arity coming from the data constructor. +3. In all other cases (e.g., lazy, used demand and not eval'd), + 'finaliseBoxity' will have cleared the Boxity flag to 'Boxed' + (see Note [Finalising boxity for demand signature]) and + 'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one' + stops unboxing. Note [Worker/wrapper for bottoming functions] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1162,7 +1177,7 @@ Needless to say, there are some wrinkles: Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field, but it's quite nasty to thread the marks though 'mkWWstr' and 'mkWWstr_one'. So we rather look out for a necessary condition for strict fields: - Note [Add demands for strict constructors] makes it so that the demand on + Note [Unboxing evaluated arguments] makes it so that the demand on 'zs' is absent and /strict/: It will get cardinality 'C_10', the empty interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It guarantees we never fill in an error-thunk for an absent strict field. @@ -1393,6 +1408,56 @@ isRecDataCon fam_envs fuel dc -> combineIRDCRs (map (\dc -> go_dc (subWithInf fuel 1) rec_tc' dc) dcs) -- See Note [Detecting recursive data constructors], point (4) +-- | A specialised Bool for an argument to 'finaliseBoxity'. +-- See Note [Do not unbox class dictionaries]. +data InsideInlineableFun + = NotInsideInlineableFun -- ^ Not in an inlineable fun. + | InsideInlineableFun -- ^ We are in an inlineable fun, so we won't + -- unbox dictionary args. + deriving Eq + +-- | This function makes sure that the demand only says 'Unboxed' where +-- worker/wrapper should actually unbox and trims any boxity beyond that. +-- Called for every demand annotation during DmdAnal. +-- +-- > data T a = T !a +-- > f :: (T (Int,Int), Int) -> () +-- > f p = ... -- demand on p: 1!P(L!P(L!P(L), L!P(L)), L!P(L)) +-- +-- 'finaliseBoxity' will trim the demand on 'p' to 1!P(L!P(LP(L), LP(L)), LP(L)). +-- This is done when annotating lambdas and thunk bindings. +-- See Note [Finalising boxity for demand signature] +finaliseBoxity + :: FamInstEnvs + -> InsideInlineableFun -- ^ See the haddocks on 'InsideInlineableFun' + -> Type -- ^ Type of the argument + -> Demand -- ^ How the arg was used + -> Demand +finaliseBoxity env in_inl_fun ty dmd = go NotMarkedStrict ty dmd + where + go mark ty dmd@(n :* _) = + case wantToUnboxArg env ty dmd of + DropAbsent -> dmd + Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} ds + -- See Note [No lazy, Unboxed demands in demand signature] + -- See Note [Unboxing evaluated arguments] + | isStrict n || isMarkedStrict mark + -- See Note [Do not unbox class dictionaries] + , in_inl_fun == NotInsideInlineableFun || not (isClassPred ty) + -- See Note [mkWWstr and unsafeCoerce] + , ds `lengthIs` dataConRepArity dc + , let arg_tys = dubiousDataConInstArgTys dc tc_args + -> -- pprTrace "finaliseBoxity:Unbox" (ppr ty $$ ppr dmd $$ ppr ds) $ + n :* (mkProd Unboxed $! zip_go_with_marks dc arg_tys ds) + -- See Note [No nested Unboxed inside Boxed in demand signature] + _ -> trimBoxity dmd + + -- See Note [Unboxing evaluated arguments] + zip_go_with_marks dc arg_tys ds = case dataConWrapId_maybe dc of + Nothing -> strictZipWith (go NotMarkedStrict) arg_tys ds + -- Shortcut when DataCon worker=wrapper + Just _ -> strictZipWith3 go (dataConRepStrictness dc) arg_tys ds + {- ************************************************************************ * * |