diff options
author | Simon Peyton Jones <simon.peytonjones@gmail.com> | 2021-12-14 23:41:47 +0000 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-02-02 23:49:58 -0500 |
commit | 0a82ae0d9c834661514f11fdf1183ef1b01c50ee (patch) | |
tree | 1fc9c5345cbb2ba1f4074d469270ad6755cb62af | |
parent | fbc77d3a5525e810f6a08b1fc1b6ce3a73ce054e (diff) | |
download | haskell-0a82ae0d9c834661514f11fdf1183ef1b01c50ee.tar.gz |
More accurate unboxing
This patch implements a fix for #20817. It ensures that
* The final strictness signature for a function accurately
reflects the unboxing done by the wrapper
See Note [Finalising boxity for demand signatures]
and Note [Finalising boxity for let-bound Ids]
* A much better "layer-at-a-time" implementation of the
budget for how many worker arguments we can have
See Note [Worker argument budget]
Generally this leads to a bit more worker/wrapper generation,
because instead of aborting entirely if the budget is exceeded
(and then lying about boxity), we unbox a bit.
Binary sizes in increase slightly (around 1.8%) because of the increase
in worker/wrapper generation. The big effects are to GHC.Ix,
GHC.Show, GHC.IO.Handle.Internals. If we did a better job of dropping
dead code, this effect might go away.
Some nofib perf improvements:
Program Size Allocs Runtime Elapsed TotalMem
--------------------------------------------------------------------------------
VSD +1.8% -0.5% 0.017 0.017 0.0%
awards +1.8% -0.1% +2.3% +2.3% 0.0%
banner +1.7% -0.2% +0.3% +0.3% 0.0%
bspt +1.8% -0.1% +3.1% +3.1% 0.0%
eliza +1.8% -0.1% +1.2% +1.2% 0.0%
expert +1.7% -0.1% +9.6% +9.6% 0.0%
fannkuch-redux +1.8% -0.4% -9.3% -9.3% 0.0%
kahan +1.8% -0.1% +22.7% +22.7% 0.0%
maillist +1.8% -0.9% +21.2% +21.6% 0.0%
nucleic2 +1.7% -5.1% +7.5% +7.6% 0.0%
pretty +1.8% -0.2% 0.000 0.000 0.0%
reverse-complem +1.8% -2.5% +12.2% +12.2% 0.0%
rfib +1.8% -0.2% +2.5% +2.5% 0.0%
scc +1.8% -0.4% 0.000 0.000 0.0%
simple +1.7% -1.3% +17.0% +17.0% +7.4%
spectral-norm +1.8% -0.1% +6.8% +6.7% 0.0%
sphere +1.7% -2.0% +13.3% +13.3% 0.0%
tak +1.8% -0.2% +3.3% +3.3% 0.0%
x2n1 +1.8% -0.4% +8.1% +8.1% 0.0%
--------------------------------------------------------------------------------
Min +1.1% -5.1% -23.6% -23.6% 0.0%
Max +1.8% +0.0% +36.2% +36.2% +7.4%
Geometric Mean +1.7% -0.1% +6.8% +6.8% +0.1%
Compiler allocations in CI have a geometric mean of +0.1%; many small
decreases but there are three bigger increases (7%), all because we do
more worker/wrapper than before, so there is simply more code to
compile. That's OK.
Perf benchmarks in perf/should_run improve in allocation by a geo mean
of -0.2%, which is good. None get worse. T12996 improves by -5.8%
Metric Decrease:
T12996
Metric Increase:
T18282
T18923
T9630
-rw-r--r-- | compiler/GHC/Core/Opt/DmdAnal.hs | 408 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Pipeline.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap.hs | 71 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/WorkWrap/Utils.hs | 334 | ||||
-rw-r--r-- | compiler/GHC/Types/Demand.hs | 12 | ||||
-rw-r--r-- | testsuite/tests/stranal/should_compile/T20817.hs | 8 | ||||
-rw-r--r-- | testsuite/tests/stranal/should_compile/T20817.stderr | 358 | ||||
-rw-r--r-- | testsuite/tests/stranal/should_compile/all.T | 3 | ||||
-rw-r--r-- | testsuite/tests/stranal/sigs/T19871.stderr | 8 |
9 files changed, 823 insertions, 384 deletions
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs index fa4bed48f0..e6b404ff61 100644 --- a/compiler/GHC/Core/Opt/DmdAnal.hs +++ b/compiler/GHC/Core/Opt/DmdAnal.hs @@ -31,6 +31,7 @@ import GHC.Types.Id import GHC.Core.Utils import GHC.Core.TyCon import GHC.Core.Type +import GHC.Core.Predicate ( isClassPred ) import GHC.Core.FVs ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds ) import GHC.Core.Coercion ( Coercion ) import GHC.Core.TyCo.FVs ( coVarsOfCos ) @@ -39,7 +40,7 @@ import GHC.Core.Opt.Arity ( typeArity ) import GHC.Utils.Misc import GHC.Utils.Panic import GHC.Utils.Panic.Plain -import GHC.Data.Maybe ( isJust ) +import GHC.Data.Maybe ( isJust, orElse ) import GHC.Builtin.PrimOps import GHC.Builtin.Types.Prim ( realWorldStatePrimTy ) import GHC.Types.Unique.Set @@ -57,8 +58,9 @@ _ = pprTrace -- Tired of commenting out the import all the time -- | Options for the demand analysis data DmdAnalOpts = DmdAnalOpts - { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries - , dmd_unbox_width :: !Int -- ^ Use strict dictionaries + { dmd_strict_dicts :: !Bool -- ^ Use strict dictionaries + , dmd_unbox_width :: !Int -- ^ Use strict dictionaries + , dmd_max_worker_args :: !Int } -- This is a strict alternative to (,) @@ -278,8 +280,9 @@ dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec where WithDmdType body_ty body' = anal_body env WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id - -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils" - id_dmd' = finaliseBoxity (ae_fam_envs env) NotInsideInlineableFun (idType id) id_dmd + -- See Note [Finalising boxity for demand signatures] + + id_dmd' = finaliseLetBoxity (ae_fam_envs env) (idType id) id_dmd !id' = setBindIdDemandInfo top_lvl id id_dmd' (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd') rhs @@ -867,7 +870,7 @@ dmdAnalRhsSig -- See Note [NOINLINE and strictness] dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs = -- pprTrace "dmdAnalRhsSig" (ppr id $$ ppr let_dmd $$ ppr sig $$ ppr lazy_fv) $ - (env', lazy_fv, id', rhs') + (final_env, lazy_fv, final_id, final_rhs) where rhs_arity = idArity id -- See Note [Demand signatures are computed for a threshold demand based on idArity] @@ -885,13 +888,15 @@ dmdAnalRhsSig top_lvl rec_flag env let_dmd id rhs = unboxedWhenSmall (ae_opts env) (unboxableResultWidth env id) topSubDmd -- See Note [Do not unbox class dictionaries] - WithDmdType rhs_dmd_ty rhs' = dmdAnal (adjustInlFun id env) rhs_dmd rhs - DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty + WithDmdType rhs_dmd_ty rhs' = dmdAnal env rhs_dmd rhs + DmdType rhs_fv rhs_dmds rhs_div = rhs_dmd_ty + (final_rhs_dmds, final_rhs) = finaliseArgBoxities env id rhs_arity rhs' + `orElse` (rhs_dmds, rhs') - sig = mkDmdSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div) + sig = mkDmdSigForArity rhs_arity (DmdType sig_fv final_rhs_dmds rhs_div) - id' = id `setIdDmdSig` sig - !env' = extendAnalEnv top_lvl env id' sig + final_id = id `setIdDmdSig` sig + !final_env = extendAnalEnv top_lvl env final_id sig -- See Note [Aggregated demand for cardinality] -- FIXME: That Note doesn't explain the following lines at all. The reason @@ -1156,6 +1161,369 @@ this, that actually happened in practice. {- ********************************************************************* * * + Finalising boxity +* * +********************************************************************* -} + +{- Note [Finalising boxity for demand signatures] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The worker/wrapper pass must strictly adhere to the boxity decisions +encoded in the demand signature, because that is the information that +demand analysis propagates throughout the program. Failing to +implement the strategy laid out in the signature can result in +reboxing in unexpected places. Hence, we must completely anticipate +unboxing decisions during demand analysis and reflect these decicions +in demand annotations. That is the job of 'finaliseArgBoxities', +which is defined here and called from demand analysis. + +Here is a list of different Notes it has to take care of: + + * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in + general, but still allow Note [Unboxing evaluated arguments] + * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)` + * Implement fixes for corner cases Note [Do not unbox class dictionaries] + and Note [mkWWstr and unsafeCoerce] + +Then, in worker/wrapper blindly trusts the boxity info in the demand signature +and will not look at strictness info *at all*, in 'wantToUnboxArg'. + +Note [Finalising boxity for let-bound Ids] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + let x = e in body +where the demand on 'x' is 1!P(blah). We want to unbox x according to +Note [Thunk splitting] in GHC.Core.Opt.WorkWrap. We must do this becuase +worker/wrapper ignores stricness and looks only at boxity flags; so if +x's demand is L!P(blah) we might still split it (wrongly). We want to +switch to Boxed on any lazy demand. + +That is what finaliseLetBoxity does. It has no worker-arg budget, so it +is much simpler than finaliseArgBoxities. + +Note [No nested Unboxed inside Boxed in demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider +``` +f p@(x,y) + | even (x+y) = [] + | otherwise = [p] +``` +Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)` +on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper +can't unbox the pair components without unboxing the pair! So we better say +`1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info. +That happens via the call to trimBoxity in 'finaliseArgBoxities'/'finaliseLetBoxity'. + +Note [No lazy, Unboxed demands in demand signature] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider T19407: + + data Huge = Huge Bool () ... () -- think: DynFlags + data T = T { h :: Huge, n :: Int } + f t@(T h _) = g h t + g (H b _ ... _) t = if b then 1 else n t + +The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better +not put that demand in `g`'s demand signature, because worker/wrapper will not +in general unbox a lazy-and-unboxed demand like `L!P(..)`. +(The exception are known-to-be-evaluated arguments like strict fields, +see Note [Unboxing evaluated arguments].) + +The program above is an example where spreading misinformed boxity through the +signature is particularly egregious. If we give `g` that signature, then `f` +puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and +we get + + f (T (H b _ ... _) n) = $wf b n + $wf b n = $wg b (T (H b x ... x) n) + $wg = ... + +Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in +'trimBoxity', modulo Note [Unboxing evaluated arguments]. + +Note [Unboxing evaluated arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this program (due to Roman): + + data X a = X !a + + foo :: X Int -> Int -> Int + foo x@(X a) n = go 0 + where + go i | i < n = a + go (i+1) + | otherwise = 0 + +We want the worker for 'foo' to look like this: + + $wfoo :: Int# -> Int# -> Int# + +with the first argument unboxed, so that it is not eval'd each time around the +'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It +is sound for the wrapper to pass an unboxed arg because X is strict +(see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument +must be evaluated. And if we *don't* pass an unboxed argument, we can't even +repair it by adding a `seq` thus: + + foo (X a) n = a `seq` go 0 + +because the seq is discarded (very early) since X is strict! + +So here's what we do + +* Since this has nothing to do with how 'foo' uses 'a', we leave demand + analysis alone, but account for the additional evaluatedness when + annotating the binder 'finaliseArgBoxities', which will retain the Unboxed + boxity on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning + it's used lazily but unboxed nonetheless. This seems to contradict Note + [No lazy, Unboxed demands in demand signature], but we know that 'a' is + evaluated and thus can be unboxed. + +* When 'finaliseArgBoxities' decides to unbox a record, it will zip the field demands + together with the respective 'StrictnessMark'. In case of 'x', it will pair + up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for + the strict field. + +* Said 'StrictnessMark' is passed to the recursive invocation of 'go_args' in + 'finaliseArgBoxities' when deciding whether to unbox 'a'. 'a' was used lazily, but + since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'. + +* Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will + /not/ look at the strictness bits of the demand, only at Boxity flags. As such, + it will happily unbox 'a' despite the lazy demand on it. + +The net effect is that boxity analysis and the w/w transformation are more +aggressive about unboxing the strict arguments of a data constructor than when +looking at strictness info exclusively. It is very much like (Nested) CPR, which +needs its nested fields to be evaluated in order for it to unbox nestedly. + +There is the usual danger of reboxing, which as usual we ignore. But +if X is monomorphic, and has an UNPACK pragma, then this optimisation +is even more important. We don't want the wrapper to rebox an unboxed +argument, and pass an Int to $wfoo! + +This works in nested situations like T10482 + + data family Bar a + data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) + newtype instance Bar Int = Bar Int + + foo :: Bar ((Int, Int), Int) -> Int -> Int + foo f k = case f of BarPair x y -> + case burble of + True -> case x of + BarPair p q -> ... + False -> ... + +The extra eagerness lets us produce a worker of type: + $wfoo :: Int# -> Int# -> Int# -> Int -> Int + $wfoo p# q# y# = ... + +even though the `case x` is only lazily evaluated. + +--------- Historical note ------------ +We used to add data-con strictness demands when demand analysing case +expression. However, it was noticed in #15696 that this misses some cases. For +instance, consider the program (from T10482) + + data family Bar a + data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) + newtype instance Bar Int = Bar Int + + foo :: Bar ((Int, Int), Int) -> Int -> Int + foo f k = + case f of + BarPair x y -> case burble of + True -> case x of + BarPair p q -> ... + False -> ... + +We really should be able to assume that `p` is already evaluated since it came +from a strict field of BarPair. This strictness would allow us to produce a +worker of type: + + $wfoo :: Int# -> Int# -> Int# -> Int -> Int + $wfoo p# q# y# = ... + +even though the `case x` is only lazily evaluated + +Indeed before we fixed #15696 this would happen since we would float the inner +`case x` through the `case burble` to get: + + foo f k = + case f of + BarPair x y -> case x of + BarPair p q -> case burble of + True -> ... + False -> ... + +However, after fixing #15696 this could no longer happen (for the reasons +discussed in ticket:15696#comment:76). This means that the demand placed on `f` +would then be significantly weaker (since the False branch of the case on +`burble` is not strict in `p` or `q`). + +Consequently, we now instead account for data-con strictness in mkWWstr_one, +applying the strictness demands to the final result of DmdAnal. The result is +that we get the strict demand signature we wanted even if we can't float +the case on `x` up through the case on `burble`. + +Note [Do not unbox class dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + f :: Ord a => [a] -> Int -> a + {-# INLINABLE f #-} +and we worker/wrapper f, we'll get a worker with an INLINABLE pragma +(see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap), +which can still be specialised by the type-class specialiser, something like + fw :: Ord a => [a] -> Int# -> a + +BUT if f is strict in the Ord dictionary, we might unpack it, to get + fw :: (a->a->Bool) -> [a] -> Int# -> a +and the type-class specialiser can't specialise that. An example is #6056. + +But in any other situation, a dictionary is just an ordinary value, +and can be unpacked. So we track the INLINABLE pragma, and discard the boxity +flag in finaliseArgBoxities (see the isClassPred test). + +Historical note: #14955 describes how I got this fix wrong the first time. + +Note that the simplicity of this fix implies that INLINE functions (such as +wrapper functions after the WW run) will never say that they unbox class +dictionaries. That's not ideal, but not worth losing sleep over, as INLINE +functions will have been inlined by the time we run demand analysis so we'll +see the unboxing around the worker in client modules. I got aware of the issue +in T5075 by the change in boxity of loop between demand analysis runs. + +Note [Worker argument budget] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In 'finaliseArgBoxities' we don't want to generate workers with zillions of +argument when, say given a strict record with zillions of fields. So we +limit the maximum number of worker args to the maximum of + - -fmax-worker-args=N + - The number of args in the original function; if it already has has + zillions of arguments we don't want to seek /fewer/ args in the worker. +(Maybe we should /add/ them instead of maxing?) + +We pursue a "layered" strategy for unboxing: we unbox the top level of the +argument(s), subject to budget; if there are any arguments left we unbox the +next layer, using that depleted budget. + +To achieve this, we use the classic almost-circular programming technique in +which we we write one pass that takes a lazy list of the Budgets for every +layer. +-} + +data Budgets = MkB Arity Budgets -- An infinite list of arity budgets + +incTopBudget :: Budgets -> Budgets +incTopBudget (MkB n bg) = MkB (n+1) bg + +positiveTopBudget :: Budgets -> Bool +positiveTopBudget (MkB n _) = n >= 0 + +finaliseArgBoxities :: AnalEnv -> Id -> Arity -> CoreExpr + -> Maybe ([Demand], CoreExpr) +finaliseArgBoxities env fn arity rhs + | arity > count isId bndrs -- Can't find enough binders + = Nothing -- This happens if we have f = g + -- Then there are no binders; we don't worker/wrapper; and we + -- simply want to give f the same demand signature as g + + | otherwise + = Just (arg_dmds', add_demands arg_dmds' rhs) + -- add_demands: we must attach the final boxities to the lambda-binders + -- of the function, both because that's kosher, and because CPR analysis + -- uses the info on the binders directly. + where + opts = ae_opts env + fam_envs = ae_fam_envs env + is_inlinable_fn = isStableUnfolding (realIdUnfolding fn) + (bndrs, _body) = collectBinders rhs + max_wkr_args = dmd_max_worker_args opts `max` arity + -- See Note [Worker argument budget] + + -- This is the key line, which uses almost-circular programming + -- The remaining budget from one layer becomes the initial + -- budget for the next layer down. See Note [Worker argument budget] + (remaining_budget, arg_dmds') = go_args (MkB max_wkr_args remaining_budget) arg_triples + + arg_triples :: [(Type, StrictnessMark, Demand)] + arg_triples = take arity $ + map mk_triple $ + filter isRuntimeVar bndrs + + mk_triple :: Id -> (Type,StrictnessMark,Demand) + mk_triple bndr | is_cls_arg ty = (ty, NotMarkedStrict, trimBoxity dmd) + | otherwise = (ty, NotMarkedStrict, dmd) + where + ty = idType bndr + dmd = idDemandInfo bndr + + -- is_cls_arg: see Note [Do not unbox class dictionaries] + is_cls_arg arg_ty = is_inlinable_fn && isClassPred arg_ty + + go_args :: Budgets -> [(Type,StrictnessMark,Demand)] -> (Budgets, [Demand]) + go_args bg triples = mapAccumL go_arg bg triples + + go_arg :: Budgets -> (Type,StrictnessMark,Demand) -> (Budgets, Demand) + go_arg bg@(MkB bg_top bg_inner) (ty, str_mark, dmd@(n :* _)) + = case wantToUnboxArg fam_envs ty dmd of + DropAbsent -> (bg, dmd) + StopUnboxing -> (MkB (bg_top-1) bg_inner, trimBoxity dmd) + + Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds + -> (MkB (bg_top-1) final_bg_inner, final_dmd) + where + dc_arity = dataConRepArity dc + arg_tys = dubiousDataConInstArgTys dc tc_args + (bg_inner', dmds') = go_args (incTopBudget bg_inner) $ + zip3 arg_tys (dataConRepStrictness dc) dmds + dmd' = n :* (mkProd Unboxed $! dmds') + (final_bg_inner, final_dmd) + | dmds `lengthIs` dc_arity + , isStrict n || isMarkedStrict str_mark + -- isStrict: see Note [No lazy, Unboxed demands in demand signature] + -- isMarkedStrict: see Note [Unboxing evaluated arguments] + , positiveTopBudget bg_inner' + = (bg_inner', dmd') + | otherwise + = (bg_inner, trimBoxity dmd) + + add_demands :: [Demand] -> CoreExpr -> CoreExpr + -- Attach the demands to the outer lambdas of this expression + add_demands [] e = e + add_demands (dmd:dmds) (Lam v e) + | isTyVar v = Lam v (add_demands (dmd:dmds) e) + | otherwise = Lam (v `setIdDemandInfo` dmd) (add_demands dmds e) + add_demands dmds e = pprPanic "add_demands" (ppr dmds $$ ppr e) + +finaliseLetBoxity + :: FamInstEnvs + -> Type -- ^ Type of the let-bound Id + -> Demand -- ^ How the Id is used + -> Demand +-- See Note [Finalising boxity for let-bound Ids] +-- This function is like finaliseArgBoxities, but much simpler because +-- it has no "budget". It simply unboxes strict demands, and stops +-- when it reaches a lazy one. +finaliseLetBoxity env ty dmd + = go ty NotMarkedStrict dmd + where + go ty mark dmd@(n :* _) = + case wantToUnboxArg env ty dmd of + DropAbsent -> dmd + StopUnboxing -> trimBoxity dmd + Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds + | isStrict n || isMarkedStrict mark + , dmds `lengthIs` dataConRepArity dc + , let arg_tys = dubiousDataConInstArgTys dc tc_args + dmds' = strictZipWith3 go arg_tys (dataConRepStrictness dc) dmds + -> n :* (mkProd Unboxed $! dmds') + | otherwise + -> trimBoxity dmd + + +{- ********************************************************************* +* * Fixpoints * * ********************************************************************* -} @@ -1366,11 +1734,8 @@ annotateLamIdBndr env dmd_ty id -- pprTrace "annLamBndr" (vcat [ppr id, ppr dmd_ty, ppr final_ty]) $ WithDmdType main_ty new_id where - -- See Note [Finalising boxity for demand signature] in "GHC.Core.Opt.WorkWrap.Utils" - -- and Note [Do not unbox class dictionaries] - dmd' = finaliseBoxity (ae_fam_envs env) (ae_inl_fun env) (idType id) dmd - new_id = setIdDemandInfo id dmd' - main_ty = addDemand dmd' dmd_ty' + new_id = setIdDemandInfo id dmd + main_ty = addDemand dmd dmd_ty' WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id {- Note [NOINLINE and strictness] @@ -1455,9 +1820,6 @@ data AnalEnv = AE , ae_virgin :: !Bool -- ^ True on first iteration only -- See Note [Initialising strictness] , ae_fam_envs :: !FamInstEnvs - , ae_inl_fun :: !InsideInlineableFun - -- ^ Whether we analyse the body of an inlineable fun. - -- See Note [Do not unbox class dictionaries]. } -- We use the se_env to tell us whether to @@ -1481,7 +1843,6 @@ emptyAnalEnv opts fam_envs , ae_sigs = emptySigEnv , ae_virgin = True , ae_fam_envs = fam_envs - , ae_inl_fun = NotInsideInlineableFun } emptySigEnv :: SigEnv @@ -1509,13 +1870,6 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id nonVirgin :: AnalEnv -> AnalEnv nonVirgin env = env { ae_virgin = False } --- | Sets 'ae_inl_fun' according to whether the given 'Id' has an inlineable --- unfolding. See Note [Do not unbox class dictionaries]. -adjustInlFun :: Id -> AnalEnv -> AnalEnv -adjustInlFun id env - | isStableUnfolding (realIdUnfolding id) = env { ae_inl_fun = InsideInlineableFun } - | otherwise = env { ae_inl_fun = NotInsideInlineableFun } - findBndrsDmds :: AnalEnv -> DmdType -> [Var] -> WithDmdType [Demand] -- Return the demands on the Ids in the [Var] findBndrsDmds env dmd_ty bndrs diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index 62a40fbcb2..3f8e3fc186 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -1065,8 +1065,9 @@ transferIdInfo exported_id local_id dmdAnal :: Logger -> DynFlags -> FamInstEnvs -> [CoreRule] -> CoreProgram -> IO CoreProgram dmdAnal logger dflags fam_envs rules binds = do let !opts = DmdAnalOpts - { dmd_strict_dicts = gopt Opt_DictsStrict dflags - , dmd_unbox_width = dmdUnboxWidth dflags + { dmd_strict_dicts = gopt Opt_DictsStrict dflags + , dmd_unbox_width = dmdUnboxWidth dflags + , dmd_max_worker_args = maxWorkerArgs dflags } binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $ diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs index 6180a69ab8..3e4770a997 100644 --- a/compiler/GHC/Core/Opt/WorkWrap.hs +++ b/compiler/GHC/Core/Opt/WorkWrap.hs @@ -11,7 +11,6 @@ import GHC.Prelude import GHC.Driver.Session -import GHC.Core.Opt.Arity ( manifestArity ) import GHC.Core import GHC.Core.Unfold.Make import GHC.Core.Utils ( exprType, exprIsHNF ) @@ -552,7 +551,7 @@ tryWW ww_opts is_rec fn_id rhs | isRecordSelector fn_id = return [ (new_fn_id, rhs ) ] - | is_fun && is_eta_exp + | is_fun = splitFun ww_opts new_fn_id rhs -- See Note [Thunk splitting] @@ -576,8 +575,6 @@ tryWW ww_opts is_rec fn_id rhs | otherwise = id -- See Note [Don't w/w join points for CPR] - -- is_eta_exp: see Note [Don't eta expand in w/w] - is_eta_exp = length wrap_dmds == manifestArity rhs is_fun = notNull wrap_dmds || isJoinId fn_id is_thunk = not is_fun && not (exprIsHNF rhs) && not (isJoinId fn_id) && not (isUnliftedType (idType fn_id)) @@ -722,6 +719,11 @@ by LitRubbish (see Note [Drop absent bindings]) so there is no great harm. --------------------- splitFun :: WwOpts -> Id -> CoreExpr -> UniqSM [(Id, CoreExpr)] splitFun ww_opts fn_id rhs + | not (wrap_dmds `lengthIs` count isId arg_vars) + -- See Note [Don't eta expand in w/w] + = return [(fn_id, rhs)] + + | otherwise = warnPprTrace (not (wrap_dmds `lengthIs` (arityInfo fn_info))) "splitFun" (ppr fn_id <+> (ppr wrap_dmds $$ ppr cpr)) $ @@ -907,41 +909,60 @@ in w/w so that we don't pass the argument at all. Note [Thunk splitting] ~~~~~~~~~~~~~~~~~~~~~~ -Suppose x is used strictly (never mind whether it has the CPR -property). +Suppose x is used strictly; never mind whether it has the CPR +property. I'll use a '*' to mean "x* is demanded strictly". let x* = x-rhs in body splitThunk transforms like this: - let - x* = case x-rhs of { I# a -> I# a } + x* = let x = x-rhs in + case x of { I# a -> I# a } in body -Now simplifier will transform to - +This is a little strange: we are re-using the same `x` in the RHS; and +the RHS takes `x` apart and reboxes it. But because the outer 'let' is +strict, and the inner let mentions `x` only once, the simplifier +transform it to case x-rhs of I# a -> let x* = I# a in body -which is what we want. Now suppose x-rhs is itself a case: - - x-rhs = case e of { T -> I# a; F -> I# b } +That is good: in `body` we know the form of `x`, which + * gives the CPR property, and + * allows case-of-case to happen on x -The join point will abstract over a, rather than over (which is -what would have happened before) which is fine. - -Notice that x certainly has the CPR property now! - -In fact, splitThunk uses the function argument w/w splitting -function, so that if x's demand is deeper (say U(U(L,L),L)) -then the splitting will go deeper too. - -NB: For recursive thunks, the Simplifier is unable to float `x-rhs` out of -`x*`'s RHS, because `x*` occurs freely in `x-rhs`, and will just change it -back to the original definition, so we just split non-recursive thunks. +Notes +* I tried transforming like this: + let + x* = let x = x-rhs in + case x of { I# a -> x } + in body + where I return `x` itself, rather than reboxing it. But this + turned out to cause some regressions, which I never fully + investigated. + +* Suppose x-rhs is itself a case: + x-rhs = case e of { T -> I# e1; F -> I# e2 } + Then we'll get + join j a = let x* = I# a in body + in case e of { T -> j e1; F -> j e2 } + which is good (no boxing). But in the original, unsplit program + we would transform + let x* = case e of ... in body + ==> join j2 x = body + in case e of { T -> j2 (I# e1); F -> j (I# e2) } + which is not good (boxing). + +* In fact, splitThunk uses the function argument w/w splitting + function, mkWWstr_one, so that if x's demand is deeper (say U(U(L,L),L)) + then the splitting will go deeper too. + +* For recursive thunks, the Simplifier is unable to float `x-rhs` out of + `x*`'s RHS, because `x*` occurs freely in `x-rhs`, and will just change it + back to the original definition, so we just split non-recursive thunks. Note [Thunk splitting for top-level binders] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs index 86e57286c1..1b2d3ca1ba 100644 --- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs +++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs @@ -10,10 +10,10 @@ A library for the ``worker\/wrapper'' back-end to the strictness analyser module GHC.Core.Opt.WorkWrap.Utils ( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs , DataConPatContext(..) - , UnboxingDecision(..), InsideInlineableFun(..), wantToUnboxArg - , findTypeShape, IsRecDataConResult(..), isRecDataCon, finaliseBoxity + , UnboxingDecision(..), wantToUnboxArg + , findTypeShape, IsRecDataConResult(..), isRecDataCon , mkAbsentFiller - , isWorkerSmallEnough + , isWorkerSmallEnough, dubiousDataConInstArgTys ) where @@ -29,7 +29,6 @@ import GHC.Core.Make import GHC.Core.Subst import GHC.Core.Type import GHC.Core.Multiplicity -import GHC.Core.Predicate ( isClassPred ) import GHC.Core.Coercion import GHC.Core.Reduction import GHC.Core.FamInstEnv @@ -142,7 +141,7 @@ data WwOpts , wo_simple_opts :: !SimpleOpts , wo_cpr_anal :: !Bool , wo_fun_to_thunk :: !Bool - , wo_max_worker_args :: !Int + -- Used for absent argument error message , wo_module :: !Module } @@ -153,7 +152,6 @@ initWwOpts this_mod dflags fam_envs = MkWwOpts , wo_simple_opts = initSimpleOpts dflags , wo_cpr_anal = gopt Opt_CprAnal dflags , wo_fun_to_thunk = gopt Opt_FunToThunk dflags - , wo_max_worker_args = maxWorkerArgs dflags , wo_module = this_mod } @@ -245,9 +243,7 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr worker_body = mkLams work_lam_args . work_fn_cpr . call_rhs worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v] - ; if isWorkerSmallEnough (wo_max_worker_args opts) (length demands) work_args - && not (too_many_args_for_join_point arg_vars) - && ((useful1 && not only_one_void_argument) || useful2) + ; if ((useful1 && not only_one_void_argument) || useful2) then return (Just (worker_args_dmds, length work_call_args, wrapper_body, worker_body)) else return Nothing @@ -265,8 +261,6 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr zap_info info -- See Note [Zap IdInfo on worker args] = info `setOccInfo` noOccInfo - mb_join_arity = isJoinId_maybe fun_id - -- Note [Do not split void functions] only_one_void_argument | [d] <- demands @@ -276,17 +270,6 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr | otherwise = False - -- Note [Join points returning functions] - too_many_args_for_join_point wrap_args - | Just join_arity <- mb_join_arity - , wrap_args `lengthExceeds` join_arity - = warnPprTrace True "Unable to worker/wrapper join point" - (text "arity" <+> int join_arity <+> text "but" <+> - int (length wrap_args) <+> text "args") $ - True - | otherwise - = False - -- | Version of 'GHC.Core.mkApps' that does beta reduction on-the-fly. -- PRECONDITION: The arg expressions are not free in any of the lambdas binders. mkAppsBeta :: CoreExpr -> [CoreArg] -> CoreExpr @@ -454,36 +437,6 @@ occurrences, so $wj2 here is wrong. But of course, this is easy enough to fix: Hence we simply do the beta-reduction here. (This would be harder if we had to worry about hygiene, but luckily wy is freshly generated.) -Note [Join points returning functions] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It is crucial that the arity of a join point depends on its *callers,* not its -own syntax. What this means is that a join point can have "extra lambdas": - -f :: Int -> Int -> (Int, Int) -> Int -f x y = join j (z, w) = \(u, v) -> ... - in jump j (x, y) - -Typically this happens with functions that are seen as computing functions, -rather than being curried. (The real-life example was GHC.Data.Graph.Ops.addConflicts.) - -When we create the wrapper, it *must* be in "eta-contracted" form so that the -jump has the right number of arguments: - -f x y = join $wj z' w' = \u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ... - j (z, w) = jump $wj z w - -(See Note [Join points and beta-redexes] for where the lets come from.) If j -were a function, we would instead say - -f x y = let $wj = \z' w' u' v' -> let {z = z'; w = w'; u = u'; v = v'} in ... - j (z, w) (u, v) = $wj z w u v - -Notice that the worker ends up with the same lambdas; it's only the wrapper we -have to be concerned about. - -FIXME Currently the functionality to produce "eta-contracted" wrappers is -unimplemented; we simply give up. - Note [Freshen WW arguments] ~~~~~~~~~~~~~~~~~~~~~~~~~~~ When we do a worker/wrapper split, we must freshen the arg vars of the original @@ -577,8 +530,8 @@ wantToUnboxArg fam_envs ty (n :* sd) , Just dc <- tyConSingleAlgDataCon_maybe tc , let arity = dataConRepArity dc , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity analysis] - -- NB: No strictness or evaluatedness checks here. That is done by - -- 'finaliseBoxity'! + -- NB: No strictness or evaluatedness checks here. + -- That is done by 'finaliseArgBoxities'! = Unbox (DataConPatContext dc tc_args co) ds | otherwise @@ -657,33 +610,6 @@ Note that the data constructor /can/ have evidence arguments: equality constraints, type classes etc. So it can be GADT. These evidence arguments are simply value arguments, and should not get in the way. -Note [Do not unbox class dictionaries] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If we have - f :: Ord a => [a] -> Int -> a - {-# INLINABLE f #-} -and we worker/wrapper f, we'll get a worker with an INLINABLE pragma -(see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap), -which can still be specialised by the type-class specialiser, something like - fw :: Ord a => [a] -> Int# -> a - -BUT if f is strict in the Ord dictionary, we might unpack it, to get - fw :: (a->a->Bool) -> [a] -> Int# -> a -and the type-class specialiser can't specialise that. An example is #6056. - -But in any other situation, a dictionary is just an ordinary value, -and can be unpacked. So we track the INLINABLE pragma, and discard the boxity -flag in finaliseBoxity (see the isClassPred test). - -Historical note: #14955 describes how I got this fix wrong the first time. - -Note that the simplicity of this fix implies that INLINE functions (such as -wrapper functions after the WW run) will never say that they unbox class -dictionaries. That's not ideal, but not worth losing sleep over, as INLINE -functions will have been inlined by the time we run demand analysis so we'll -see the unboxing around the worker in client modules. I got aware of the issue -in T5075 by the change in boxity of loop between demand analysis runs. - Note [mkWWstr and unsafeCoerce] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By using unsafeCoerce, it is possible to make the number of demands fail to @@ -691,193 +617,6 @@ match the number of constructor arguments; this happened in #8037. If so, the worker/wrapper split doesn't work right and we get a Core Lint bug. The fix here is simply to decline to do w/w if that happens. -Note [Unboxing evaluated arguments] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider this program (due to Roman): - - data X a = X !a - - foo :: X Int -> Int -> Int - foo x@(X a) n = go 0 - where - go i | i < n = a + go (i+1) - | otherwise = 0 - -We want the worker for 'foo' too look like this: - - $wfoo :: Int# -> Int# -> Int# - -with the first argument unboxed, so that it is not eval'd each time around the -'go' loop (which would otherwise happen, since 'foo' is not strict in 'a'). It -is sound for the wrapper to pass an unboxed arg because X is strict -(see Note [Strictness and Unboxing] in "GHC.Core.Opt.DmdAnal"), so its argument -must be evaluated. And if we *don't* pass an unboxed argument, we can't even -repair it by adding a `seq` thus: - - foo (X a) n = a `seq` go 0 - -because the seq is discarded (very early) since X is strict! - -So here's what we do - -* Since this has nothing to do with how 'foo' uses 'a', we leave demand analysis - alone, but account for the additional evaluatedness when annotating the binder - in 'annotateLamIdBndr' via 'finaliseBoxity', which will retain the Unboxed boxity - on 'a' in the definition of 'foo' in the demand 'L!P(L)'; meaning it's used - lazily but unboxed nonetheless. This seems to contradict - Note [No lazy, Unboxed demands in demand signature], but we know that 'a' is - evaluated and thus can be unboxed. - -* When 'finaliseBoxity' decides to unbox a record, it will zip the field demands - together with the respective 'StrictnessMark'. In case of 'x', it will pair - up the lazy field demand 'L!P(L)' on 'a' with 'MarkedStrict' to account for - the strict field. - -* Said 'StrictnessMark' is passed to the recursive invocation of - 'finaliseBoxity' when deciding whether to unbox 'a'. 'a' was used lazily, but - since it also says 'MarkedStrict', we'll retain the 'Unboxed' boxity on 'a'. - -* Worker/wrapper will consult 'wantToUnboxArg' for its unboxing decision. It will - /not/ look at the strictness bits of the demand, only at Boxity flags. As such, - it will happily unbox 'a' despite the lazy demand on it. - -The net effect is that boxity analysis and the w/w transformation are more -aggressive about unboxing the strict arguments of a data constructor than when -looking at strictness info exclusively. It is very much like (Nested) CPR, which -needs its nested fields to be evaluated in order for it to unbox nestedly. - -There is the usual danger of reboxing, which as usual we ignore. But -if X is monomorphic, and has an UNPACK pragma, then this optimisation -is even more important. We don't want the wrapper to rebox an unboxed -argument, and pass an Int to $wfoo! - -This works in nested situations like T10482 - - data family Bar a - data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) - newtype instance Bar Int = Bar Int - - foo :: Bar ((Int, Int), Int) -> Int -> Int - foo f k = case f of BarPair x y -> - case burble of - True -> case x of - BarPair p q -> ... - False -> ... - -The extra eagerness lets us produce a worker of type: - $wfoo :: Int# -> Int# -> Int# -> Int -> Int - $wfoo p# q# y# = ... - -even though the `case x` is only lazily evaluated. - ---------- Historical note ------------ -We used to add data-con strictness demands when demand analysing case -expression. However, it was noticed in #15696 that this misses some cases. For -instance, consider the program (from T10482) - - data family Bar a - data instance Bar (a, b) = BarPair !(Bar a) !(Bar b) - newtype instance Bar Int = Bar Int - - foo :: Bar ((Int, Int), Int) -> Int -> Int - foo f k = - case f of - BarPair x y -> case burble of - True -> case x of - BarPair p q -> ... - False -> ... - -We really should be able to assume that `p` is already evaluated since it came -from a strict field of BarPair. This strictness would allow us to produce a -worker of type: - - $wfoo :: Int# -> Int# -> Int# -> Int -> Int - $wfoo p# q# y# = ... - -even though the `case x` is only lazily evaluated - -Indeed before we fixed #15696 this would happen since we would float the inner -`case x` through the `case burble` to get: - - foo f k = - case f of - BarPair x y -> case x of - BarPair p q -> case burble of - True -> ... - False -> ... - -However, after fixing #15696 this could no longer happen (for the reasons -discussed in ticket:15696#comment:76). This means that the demand placed on `f` -would then be significantly weaker (since the False branch of the case on -`burble` is not strict in `p` or `q`). - -Consequently, we now instead account for data-con strictness in mkWWstr_one, -applying the strictness demands to the final result of DmdAnal. The result is -that we get the strict demand signature we wanted even if we can't float -the case on `x` up through the case on `burble`. - -Note [No nested Unboxed inside Boxed in demand signature] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider -``` -f p@(x,y) - | even (x+y) = [] - | otherwise = [p] -``` -Demand analysis will infer that the function body puts a demand of `1P(1!L,1!L)` -on 'p', e.g., Boxed on the outside but Unboxed on the inside. But worker/wrapper -can't unbox the pair components without unboxing the pair! So we better say -`1P(1L,1L)` in the demand signature in order not to spread wrong Boxity info. -That happens in 'finaliseBoxity'. - -Note [No lazy, Unboxed demands in demand signature] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider T19407: - - data Huge = Huge Bool () ... () -- think: DynFlags - data T = T { h :: Huge, n :: Int } - f t@(T h _) = g h t - g (H b _ ... _) t = if b then 1 else n t - -The body of `g` puts (approx.) demand `L!P(A,1)` on `t`. But we better -not put that demand in `g`'s demand signature, because worker/wrapper will not -in general unbox a lazy-and-unboxed demand like `L!P(..)`. -(The exception are known-to-be-evaluated arguments like strict fields, -see Note [Unboxing evaluated arguments].) - -The program above is an example where spreading misinformed boxity through the -signature is particularly egregious. If we give `g` that signature, then `f` -puts demand `S!P(1!P(1L,A,..),ML)` on `t`. Now we will unbox `t` in `f` it and -we get - - f (T (H b _ ... _) n) = $wf b n - $wf b n = $wg b (T (H b x ... x) n) - $wg = ... - -Massive reboxing in `$wf`! Solution: Trim boxity on lazy demands in -'finaliseBoxity', modulo Note [Unboxing evaluated arguments]. - -Note [Finalising boxity for demand signature] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The worker/wrapper pass must strictly adhere to the boxity decisions encoded -in the demand signature, because that is the information that demand analysis -propagates throughout the program. Failing to implement the strategy laid out -in the signature can result in reboxing in unexpected places. Hence, we must -completely anticipate unboxing decisions during demand analysis and reflect -these decicions in demand annotations. That is the job of 'finaliseBoxity', -which is defined here and called from demand analysis. - -Here is a list of different Notes it has to take care of: - - * Note [No lazy, Unboxed demands in demand signature] such as `L!P(L)` in - general, but still allow Note [Unboxing evaluated arguments] - * Note [No nested Unboxed inside Boxed in demand signature] such as `1P(1!L)` - * Implement fixes for corner cases Note [Do not unbox class dictionaries] - and Note [mkWWstr and unsafeCoerce] - -Then, in worker/wrapper blindly trusts the boxity info in the demand signature -and will not look at strictness info *at all*, in 'wantToUnboxArg'. - Note [non-algebraic or open body type warning] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are a few cases where the W/W transformation is told that something @@ -1031,6 +770,7 @@ function is worthy for splitting: E.g. B comes from a function like f x = error "urk" and the absent demand A can come from Note [Unboxing evaluated arguments] + in GHC.Core.Opt.DmdAnal. 2. If the argument is evaluated strictly (or known to be eval'd), we can take a view into the product demand ('viewProd'). In accordance @@ -1070,9 +810,9 @@ function is worthy for splitting: in GHC itself where the tuple was DynFlags 3. In all other cases (e.g., lazy, used demand and not eval'd), - 'finaliseBoxity' will have cleared the Boxity flag to 'Boxed' - (see Note [Finalising boxity for demand signature]) and - 'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one' + 'finaliseArgBoxities' will have cleared the Boxity flag to 'Boxed' + (see Note [Finalising boxity for demand signatures] in GHC.Core.Opt.DmdAnal) + and 'wantToUnboxArg' returns 'StopUnboxing' so that 'mkWWstr_one' stops unboxing. Note [Worker/wrapper for bottoming functions] @@ -1177,7 +917,7 @@ Needless to say, there are some wrinkles: Ideally, we'd just look at the 'StrictnessMark' of the DataCon's field, but it's quite nasty to thread the marks though 'mkWWstr' and 'mkWWstr_one'. So we rather look out for a necessary condition for strict fields: - Note [Unboxing evaluated arguments] makes it so that the demand on + Note [Unboxing evaluated arguments] in DmdAnal makes it so that the demand on 'zs' is absent and /strict/: It will get cardinality 'C_10', the empty interval, rather than 'C_00'. Hence the 'isStrictDmd' check: It guarantees we never fill in an error-thunk for an absent strict field. @@ -1410,56 +1150,6 @@ isRecDataCon fam_envs fuel dc -> combineIRDCRs (map (\dc -> go_dc (subWithInf fuel 1) rec_tc' dc) dcs) -- See Note [Detecting recursive data constructors], point (4) --- | A specialised Bool for an argument to 'finaliseBoxity'. --- See Note [Do not unbox class dictionaries]. -data InsideInlineableFun - = NotInsideInlineableFun -- ^ Not in an inlineable fun. - | InsideInlineableFun -- ^ We are in an inlineable fun, so we won't - -- unbox dictionary args. - deriving Eq - --- | This function makes sure that the demand only says 'Unboxed' where --- worker/wrapper should actually unbox and trims any boxity beyond that. --- Called for every demand annotation during DmdAnal. --- --- > data T a = T !a --- > f :: (T (Int,Int), Int) -> () --- > f p = ... -- demand on p: 1!P(L!P(L!P(L), L!P(L)), L!P(L)) --- --- 'finaliseBoxity' will trim the demand on 'p' to 1!P(L!P(LP(L), LP(L)), LP(L)). --- This is done when annotating lambdas and thunk bindings. --- See Note [Finalising boxity for demand signature] -finaliseBoxity - :: FamInstEnvs - -> InsideInlineableFun -- ^ See the haddocks on 'InsideInlineableFun' - -> Type -- ^ Type of the argument - -> Demand -- ^ How the arg was used - -> Demand -finaliseBoxity env in_inl_fun ty dmd = go NotMarkedStrict ty dmd - where - go mark ty dmd@(n :* _) = - case wantToUnboxArg env ty dmd of - DropAbsent -> dmd - Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} ds - -- See Note [No lazy, Unboxed demands in demand signature] - -- See Note [Unboxing evaluated arguments] - | isStrict n || isMarkedStrict mark - -- See Note [Do not unbox class dictionaries] - , in_inl_fun == NotInsideInlineableFun || not (isClassPred ty) - -- See Note [mkWWstr and unsafeCoerce] - , ds `lengthIs` dataConRepArity dc - , let arg_tys = dubiousDataConInstArgTys dc tc_args - -> -- pprTrace "finaliseBoxity:Unbox" (ppr ty $$ ppr dmd $$ ppr ds) $ - n :* (mkProd Unboxed $! zip_go_with_marks dc arg_tys ds) - -- See Note [No nested Unboxed inside Boxed in demand signature] - _ -> trimBoxity dmd - - -- See Note [Unboxing evaluated arguments] - zip_go_with_marks dc arg_tys ds = case dataConWrapId_maybe dc of - Nothing -> strictZipWith (go NotMarkedStrict) arg_tys ds - -- Shortcut when DataCon worker=wrapper - Just _ -> strictZipWith3 go (dataConRepStrictness dc) arg_tys ds - {- ************************************************************************ * * diff --git a/compiler/GHC/Types/Demand.hs b/compiler/GHC/Types/Demand.hs index 65f3239a9e..7376a610d4 100644 --- a/compiler/GHC/Types/Demand.hs +++ b/compiler/GHC/Types/Demand.hs @@ -148,7 +148,7 @@ is the important trait. Unboxing implies eager evaluation of an argument and we don't want to change the termination properties of the function. One way to ensure that is to unbox strict arguments only, but strictness is only a sufficient condition for evaluatedness. -See Note [Unboxing evaluated arguments] in "GHC.Core.Opt.WorkWrap.Utils", where +See Note [Unboxing evaluated arguments] in "GHC.Core.Opt.DmdAnal", where we manage to unbox *strict fields* of unboxed arguments that the function is not actually strict in, simply by realising that those fields have to be evaluated. @@ -200,8 +200,8 @@ two fields". By contrast, the demand signature of 'ann' above would look like A demand signature like <1P(1!L)> -- Boxed outside but Unboxed in the field -- doesn't make a lot of sense, as we can never unbox the field without unboxing -the containing record. See Note [Finalising boxity for demand signature] in -"GHC.Core.Opt.WorkWrap.Utils" for how we avoid to spread this and other kinds of +the containing record. See Note [Finalising boxity for demand signatures] in +"GHC.Core.Opt.DmdAnal" for how we avoid to spread this and other kinds of misinformed boxities. Due to various practical reasons, Boxity Analysis is not conservative at times. @@ -338,6 +338,12 @@ update h = (True, h) ``` Here, we decide to unbox 'h' because it's used Unboxed in the first branch. +Another real-life example (c.f. !7182) is in the code compiled for +GHC.Core.Unify. Here the two mutually-recursive functions: + * `unify_ty` takes its UMEnv argument boxed, but + * `uVar` takes its UMEnv argument unboxed. +So the UMEnv ends up getting reboxed every time around the loop. + Note that this is fundamentally working around a phase problem, namely that the results of boxity analysis depend on CPR analysis (and vice versa, of course). -} diff --git a/testsuite/tests/stranal/should_compile/T20817.hs b/testsuite/tests/stranal/should_compile/T20817.hs new file mode 100644 index 0000000000..eef3d26131 --- /dev/null +++ b/testsuite/tests/stranal/should_compile/T20817.hs @@ -0,0 +1,8 @@ +module Foo where + +f True (x,y,z,_,_) b1 b2 b3 b4 b5 b6 b7 b8 = (x,y,z, b1, b2, b3, b4, b5, b6, b7, b8) +f False at b1 b2 b3 b4 b5 b6 b7 b8 = f True at b1 b2 b3 b4 b5 b6 b7 b8 + + +g True (x,y,z,_,_) b1 b2 b3 = (x,y,z, b1, b2, b3) +g False at b1 b2 b3 = g True at b1 b2 b3 diff --git a/testsuite/tests/stranal/should_compile/T20817.stderr b/testsuite/tests/stranal/should_compile/T20817.stderr new file mode 100644 index 0000000000..49fa683b42 --- /dev/null +++ b/testsuite/tests/stranal/should_compile/T20817.stderr @@ -0,0 +1,358 @@ + +==================== Demand analysis ==================== +Result size of Demand analysis + = {terms: 102, types: 178, coercions: 0, joins: 0/0} + +Rec { +-- RHS size: {terms: 53, types: 64, coercions: 0, joins: 0/0} +f [Occ=LoopBreaker] + :: forall {a} {b} {c} {d} {e} {t} {t} {t} {t} {t} {t} {t} {t}. + Bool + -> (a, b, c, d, e) + -> t + -> t + -> t + -> t + -> t + -> t + -> t + -> t + -> (a, b, c, t, t, t, t, t, t, t, t) +[LclIdX, + Arity=10, + Str=<1L><1P(L,L,L,A,A)><L><L><L><L><L><L><L><L>, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=IF_ARGS [50 20 0 0 0 0 0 0 0 0] 150 10}] +f = \ (@a) + (@b) + (@c) + (@d) + (@e) + (@t) + (@t) + (@t) + (@t) + (@t) + (@t) + (@t) + (@t) + (ds [Dmd=1L] :: Bool) + (ds [Dmd=1P(L,L,L,A,A)] :: (a, b, c, d, e)) + (b1 :: t) + (b2 :: t) + (b3 :: t) + (b4 :: t) + (b5 :: t) + (b6 :: t) + (b7 :: t) + (b8 :: t) -> + case ds of { + False -> + f @a + @b + @c + @d + @e + @t + @t + @t + @t + @t + @t + @t + @t + GHC.Types.True + ds + b1 + b2 + b3 + b4 + b5 + b6 + b7 + b8; + True -> + case ds of { (x, y, z, ds [Dmd=A], ds [Dmd=A]) -> + (x, y, z, b1, b2, b3, b4, b5, b6, b7, b8) + } + } +end Rec } + +Rec { +-- RHS size: {terms: 33, types: 44, coercions: 0, joins: 0/0} +g [Occ=LoopBreaker] + :: forall {a} {b} {c} {d} {e} {t} {t} {t}. + Bool -> (a, b, c, d, e) -> t -> t -> t -> (a, b, c, t, t, t) +[LclIdX, + Arity=5, + Str=<1L><1!P(L,L,L,A,A)><L><L><L>, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=IF_ARGS [50 20 0 0 0] 100 10}] +g = \ (@a) + (@b) + (@c) + (@d) + (@e) + (@t) + (@t) + (@t) + (ds [Dmd=1L] :: Bool) + (ds [Dmd=1!P(L,L,L,A,A)] :: (a, b, c, d, e)) + (b1 :: t) + (b2 :: t) + (b3 :: t) -> + case ds of { + False -> g @a @b @c @d @e @t @t @t GHC.Types.True ds b1 b2 b3; + True -> + case ds of { (x, y, z, ds [Dmd=A], ds [Dmd=A]) -> + (x, y, z, b1, b2, b3) + } + } +end Rec } + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Prim.Addr# +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}] +$trModule = "main"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Types.TrName +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +$trModule = GHC.Types.TrNameS $trModule + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Prim.Addr# +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}] +$trModule = "Foo"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Types.TrName +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +$trModule = GHC.Types.TrNameS $trModule + +-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0} +Foo.$trModule :: GHC.Types.Module +[LclIdX, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +Foo.$trModule = GHC.Types.Module $trModule $trModule + + + + +==================== Demand analysis ==================== +Result size of Demand analysis + = {terms: 137, types: 260, coercions: 0, joins: 0/0} + +Rec { +-- RHS size: {terms: 53, types: 64, coercions: 0, joins: 0/0} +f [Occ=LoopBreaker] + :: forall {a} {b} {c} {d} {e} {t} {t} {t} {t} {t} {t} {t} {t}. + Bool + -> (a, b, c, d, e) + -> t + -> t + -> t + -> t + -> t + -> t + -> t + -> t + -> (a, b, c, t, t, t, t, t, t, t, t) +[LclIdX, + Arity=10, + Str=<1L><1P(L,L,L,A,A)><L><L><L><L><L><L><L><L>, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=IF_ARGS [50 20 0 0 0 0 0 0 0 0] 150 10}] +f = \ (@a) + (@b) + (@c) + (@d) + (@e) + (@t) + (@t) + (@t) + (@t) + (@t) + (@t) + (@t) + (@t) + (ds [Dmd=1L] :: Bool) + (ds [Dmd=1P(L,L,L,A,A)] :: (a, b, c, d, e)) + (b1 :: t) + (b2 :: t) + (b3 :: t) + (b4 :: t) + (b5 :: t) + (b6 :: t) + (b7 :: t) + (b8 :: t) -> + case ds of { + False -> + f @a + @b + @c + @d + @e + @t + @t + @t + @t + @t + @t + @t + @t + GHC.Types.True + ds + b1 + b2 + b3 + b4 + b5 + b6 + b7 + b8; + True -> + case ds of { (x, y, z, ds [Dmd=A], ds [Dmd=A]) -> + (x, y, z, b1, b2, b3, b4, b5, b6, b7, b8) + } + } +end Rec } + +Rec { +-- RHS size: {terms: 34, types: 36, coercions: 0, joins: 0/0} +$wg [InlPrag=[2], + Occ=LoopBreaker, + Dmd=LCL(C1(C1(C1(C1(C1(C1(!L)))))))] + :: forall {a} {b} {c} {d} {e} {t} {t} {t}. + Bool -> a -> b -> c -> t -> t -> t -> (# a, b, c, t, t, t #) +[LclId, + Arity=7, + Str=<1L><L><L><L><L><L><L>, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=IF_ARGS [30 0 0 0 0 0 0] 100 10}] +$wg + = \ (@a) + (@b) + (@c) + (@d) + (@e) + (@t) + (@t) + (@t) + (ds [Dmd=1L] :: Bool) + (ww :: a) + (ww :: b) + (ww :: c) + (b1 :: t) + (b2 :: t) + (b3 :: t) -> + case ds of { + False -> + $wg @a @b @c @d @e @t @t @t GHC.Types.True ww ww ww b1 b2 b3; + True -> (# ww, ww, ww, b1, b2, b3 #) + } +end Rec } + +-- RHS size: {terms: 33, types: 62, coercions: 0, joins: 0/0} +g [InlPrag=[2]] + :: forall {a} {b} {c} {d} {e} {t} {t} {t}. + Bool -> (a, b, c, d, e) -> t -> t -> t -> (a, b, c, t, t, t) +[LclIdX, + Arity=5, + Str=<1L><1!P(L,L,L,A,A)><L><L><L>, + Cpr=1, + Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, + Guidance=ALWAYS_IF(arity=5,unsat_ok=True,boring_ok=False) + Tmpl= \ (@a) + (@b) + (@c) + (@d) + (@e) + (@t) + (@t) + (@t) + (ds [Occ=Once1, Dmd=SL] :: Bool) + (ds [Occ=Once1!, Dmd=S!P(L,L,L,A,A)] :: (a, b, c, d, e)) + (b1 [Occ=Once1] :: t) + (b2 [Occ=Once1] :: t) + (b3 [Occ=Once1] :: t) -> + case ds of + { (ww [Occ=Once1], ww [Occ=Once1], ww [Occ=Once1], + _ [Occ=Dead, Dmd=A], _ [Occ=Dead, Dmd=A]) -> + case $wg @a @b @c @d @e @t @t @t ds ww ww ww b1 b2 b3 of + { (# ww [Occ=Once1], ww [Occ=Once1], ww [Occ=Once1], + ww [Occ=Once1], ww [Occ=Once1], ww [Occ=Once1] #) -> + (ww, ww, ww, ww, ww, ww) + } + }}] +g = \ (@a) + (@b) + (@c) + (@d) + (@e) + (@t) + (@t) + (@t) + (ds [Dmd=1L] :: Bool) + (ds [Dmd=1!P(L,L,L,A,A)] :: (a, b, c, d, e)) + (b1 :: t) + (b2 :: t) + (b3 :: t) -> + case ds of { (ww, ww, ww, ww [Dmd=A], ww [Dmd=A]) -> + case $wg @a @b @c @d @e @t @t @t ds ww ww ww b1 b2 b3 of + { (# ww, ww, ww, ww, ww, ww #) -> + (ww, ww, ww, ww, ww, ww) + } + } + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Prim.Addr# +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}] +$trModule = "main"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Types.TrName +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +$trModule = GHC.Types.TrNameS $trModule + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Prim.Addr# +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 20 0}] +$trModule = "Foo"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +$trModule :: GHC.Types.TrName +[LclId, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +$trModule = GHC.Types.TrNameS $trModule + +-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0} +Foo.$trModule :: GHC.Types.Module +[LclIdX, + Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True, + WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}] +Foo.$trModule = GHC.Types.Module $trModule $trModule + + + diff --git a/testsuite/tests/stranal/should_compile/all.T b/testsuite/tests/stranal/should_compile/all.T index 1723436f8d..ac35fc42ce 100644 --- a/testsuite/tests/stranal/should_compile/all.T +++ b/testsuite/tests/stranal/should_compile/all.T @@ -77,6 +77,7 @@ test('T19882b', normal, compile, ['']) # We want that the 'go' joinrec in the unfolding has been worker/wrappered. # So we simply grep for 'jump $wgo' and hope we find more than 2 call sites: test('T20510', [ grep_errmsg(r'jump \$wgo') ], compile, ['-dsuppress-uniques -ddump-exitify']) - test('T20746', normal, compile, ['-dsuppress-uniques -ddump-simpl']) test('T20746b', normal, compile, ['-dsuppress-uniques -ddump-simpl -dno-typeable-binds']) +test('T20817', [ grep_errmsg(r'Str') ], compile, ['-dsuppress-uniques -ddump-stranal']) + diff --git a/testsuite/tests/stranal/sigs/T19871.stderr b/testsuite/tests/stranal/sigs/T19871.stderr index 14619b5891..1afea4e841 100644 --- a/testsuite/tests/stranal/sigs/T19871.stderr +++ b/testsuite/tests/stranal/sigs/T19871.stderr @@ -17,9 +17,9 @@ T19871.f6: <1!P(A,A,A,A,A,1L,A,A,A,A,A,A)> T19871.f7: <1!P(A,A,A,A,A,A,1L,A,A,A,A,A)> T19871.f8: <1!P(A,A,A,A,A,A,A,1L,A,A,A,A)> T19871.f9: <1!P(A,A,A,A,A,A,A,A,1L,A,A,A)> -T19871.guarded: <MCM(L)><1!P(SL,L,L,L,L,L,L,L,L,L,L,L)> +T19871.guarded: <MCM(L)><1P(SL,L,L,L,L,L,L,L,L,L,L,L)> T19871.sumIO: <1!P(1L)><1!L><L> -T19871.update: <1!P(SL,L,L,L,L,L,L,L,L,L,L,L)> +T19871.update: <1P(SL,L,L,L,L,L,L,L,L,L,L,L)> @@ -65,8 +65,8 @@ T19871.f6: <1!P(A,A,A,A,A,1L,A,A,A,A,A,A)> T19871.f7: <1!P(A,A,A,A,A,A,1L,A,A,A,A,A)> T19871.f8: <1!P(A,A,A,A,A,A,A,1L,A,A,A,A)> T19871.f9: <1!P(A,A,A,A,A,A,A,A,1L,A,A,A)> -T19871.guarded: <MCM(L)><1!P(SL,L,L,L,L,L,L,L,L,L,L,L)> +T19871.guarded: <MCM(L)><1P(SL,L,L,L,L,L,L,L,L,L,L,L)> T19871.sumIO: <1!P(1L)><1!L><L> -T19871.update: <1!P(SL,L,L,L,L,L,L,L,L,L,L,L)> +T19871.update: <1P(SL,L,L,L,L,L,L,L,L,L,L,L)> |