summaryrefslogtreecommitdiff
path: root/compiler/stranal
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2018-12-12 17:22:07 +0000
committerSimon Peyton Jones <simonpj@microsoft.com>2018-12-12 17:38:25 +0000
commitd77501cd5b9060e38acd50e11e0c5aae89d75b65 (patch)
treea6cba21b5adb5c04b476c92bf33436bd1a880981 /compiler/stranal
parentded4a1db4d61b1bc8b5fd73e8eb87cf572efda35 (diff)
downloadhaskell-d77501cd5b9060e38acd50e11e0c5aae89d75b65.tar.gz
Improvements to demand analysis
This patch collects a few improvements triggered by Trac #15696, and fixing Trac #16029 * Stop making toCleanDmd behave specially for unlifted types. This special case was the cause of stupid behaviour in Trac #16029. And to my joy I discovered the let/app invariant rendered it unnecessary. (Maybe the special case pre-dated the let/app invariant.) Result: less special-case handling in the compiler, and better perf for the compiled code. * In WwLib.mkWWstr_one, treat seqDmd like U(AAA). It was not being so treated before, which again led to stupid code. * Update and improve Notes There are .stderr test wibbles because we get slightly different strictness signatures for an argumment of unlifted type: <L,U> rather than <S,U> for Int# <S,U> rather than <S(S),U(U)> for Int
Diffstat (limited to 'compiler/stranal')
-rw-r--r--compiler/stranal/DmdAnal.hs28
-rw-r--r--compiler/stranal/WwLib.hs213
2 files changed, 175 insertions, 66 deletions
diff --git a/compiler/stranal/DmdAnal.hs b/compiler/stranal/DmdAnal.hs
index 0b8133d98f..6e10c987a9 100644
--- a/compiler/stranal/DmdAnal.hs
+++ b/compiler/stranal/DmdAnal.hs
@@ -26,7 +26,7 @@ import BasicTypes
import Data.List
import DataCon
import Id
-import CoreUtils ( exprIsHNF, exprType, exprIsTrivial )
+import CoreUtils ( exprIsHNF, exprType, exprIsTrivial, exprOkForSpeculation )
import TyCon
import Type
import Coercion ( Coercion, coVarsOfCo )
@@ -140,11 +140,15 @@ dmdTransformThunkDmd e
-- See ↦* relation in the Cardinality Analysis paper
dmdAnalStar :: AnalEnv
-> Demand -- This one takes a *Demand*
- -> CoreExpr -> (BothDmdArg, CoreExpr)
+ -> CoreExpr -- Should obey the let/app invariatn
+ -> (BothDmdArg, CoreExpr)
dmdAnalStar env dmd e
- | (defer_and_use, cd) <- toCleanDmd dmd (exprType e)
- , (dmd_ty, e') <- dmdAnal env cd e
- = (postProcessDmdType defer_and_use dmd_ty, e')
+ | (dmd_shell, cd) <- toCleanDmd dmd
+ , (dmd_ty, e') <- dmdAnal env cd e
+ = ASSERT2( not (isUnliftedType (exprType e)) || exprOkForSpeculation e, ppr e )
+ -- The argument 'e' should satisfy the let/app invariant
+ -- See Note [Analysing with absent demand] in Demand.hs
+ (postProcessDmdType dmd_shell dmd_ty, e')
-- Main Demand Analsysis machinery
dmdAnal, dmdAnal' :: AnalEnv
@@ -170,19 +174,6 @@ dmdAnal' env dmd (Cast e co)
where
(dmd_ty, e') = dmdAnal env dmd e
-{- ----- I don't get this, so commenting out -------
- to_co = pSnd (coercionKind co)
- dmd'
- | Just tc <- tyConAppTyCon_maybe to_co
- , isRecursiveTyCon tc = cleanEvalDmd
- | otherwise = dmd
- -- This coerce usually arises from a recursive
- -- newtype, and we don't want to look inside them
- -- for exactly the same reason that we don't look
- -- inside recursive products -- we might not reach
- -- a fixpoint. So revert to a vanilla Eval demand
--}
-
dmdAnal' env dmd (Tick t e)
= (dmd_ty, Tick t e')
where
@@ -259,6 +250,7 @@ dmdAnal' env dmd (Case scrut case_bndr ty [(DataAlt dc, bndrs, rhs)])
-- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut
-- , text "dmd" <+> ppr dmd
-- , text "case_bndr_dmd" <+> ppr (idDemandInfo case_bndr')
+-- , text "id_dmds" <+> ppr id_dmds
-- , text "scrut_dmd" <+> ppr scrut_dmd
-- , text "scrut_ty" <+> ppr scrut_ty
-- , text "alt_ty" <+> ppr alt_ty2
diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs
index ce036c8c26..ef6be898c7 100644
--- a/compiler/stranal/WwLib.hs
+++ b/compiler/stranal/WwLib.hs
@@ -567,6 +567,7 @@ as-yet-un-filled-in pkgState files.
-- brings into scope work_args (via cases)
-- * work_fn assumes work_args are in scope, a
-- brings into scope wrap_arg (via lets)
+-- See Note [How to do the worker/wrapper split]
mkWWstr_one :: DynFlags -> FamInstEnvs
-> Bool -- True <=> INLINEABLE pragama on this function defn
-- See Note [Do not unpack class dictionaries]
@@ -576,43 +577,42 @@ mkWWstr_one dflags fam_envs has_inlineable_prag arg
| isTyVar arg
= return (False, [arg], nop_fn, nop_fn)
- -- See Note [Worker-wrapper for bottoming functions]
| isAbsDmd dmd
, Just work_fn <- mk_absent_let dflags arg
-- Absent case. We can't always handle absence for arbitrary
-- unlifted types, so we need to choose just the cases we can
- --- (that's what mk_absent_let does)
+ -- (that's what mk_absent_let does)
= return (True, [], nop_fn, work_fn)
- -- See Note [Worthy functions for Worker-Wrapper split]
- | isSeqDmd dmd -- `seq` demand; evaluate in wrapper in the hope
- -- of dropping seqs in the worker
- = let arg_w_unf = arg `setIdUnfolding` evaldUnfolding
- -- Tell the worker arg that it's sure to be evaluated
- -- so that internal seqs can be dropped
- in return (True, [arg_w_unf], mk_seq_case arg, nop_fn)
- -- Pass the arg, anyway, even if it is in theory discarded
- -- Consider
- -- f x y = x `seq` y
- -- x gets a (Eval (Poly Abs)) demand, but if we fail to pass it to the worker
- -- we ABSOLUTELY MUST record that x is evaluated in the wrapper.
- -- Something like:
- -- f x y = x `seq` fw y
- -- fw y = let x{Evald} = error "oops" in (x `seq` y)
- -- If we don't pin on the "Evald" flag, the seq doesn't disappear, and
- -- we end up evaluating the absent thunk.
- -- But the Evald flag is pretty weird, and I worry that it might disappear
- -- during simplification, so for now I've just nuked this whole case
-
| isStrictDmd dmd
, Just cs <- splitProdDmd_maybe dmd
-- See Note [Unpacking arguments with product and polymorphic demands]
, not (has_inlineable_prag && isClassPred arg_ty)
-- See Note [Do not unpack class dictionaries]
- , Just (data_con, inst_tys, inst_con_arg_tys, co)
- <- deepSplitProductType_maybe fam_envs arg_ty
+ , Just stuff@(_, _, inst_con_arg_tys, _) <- deepSplitProductType_maybe fam_envs arg_ty
, cs `equalLength` inst_con_arg_tys
-- See Note [mkWWstr and unsafeCoerce]
+ = unbox_one dflags fam_envs arg cs stuff
+
+ | isSeqDmd dmd -- For seqDmd, splitProdDmd_maybe will return Nothing, but
+ -- it should behave like <S, U(AAAA)>, for some suitable arity
+ , Just stuff@(_, _, inst_con_arg_tys, _) <- deepSplitProductType_maybe fam_envs arg_ty
+ , let abs_dmds = map (const absDmd) inst_con_arg_tys
+ = unbox_one dflags fam_envs arg abs_dmds stuff
+
+ | otherwise -- Other cases
+ = return (False, [arg], nop_fn, nop_fn)
+
+ where
+ arg_ty = idType arg
+ dmd = idDemandInfo arg
+
+unbox_one :: DynFlags -> FamInstEnvs -> Var
+ -> [Demand]
+ -> (DataCon, [Type], [(Type, StrictnessMark)], Coercion)
+ -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr)
+unbox_one dflags fam_envs arg cs
+ (data_con, inst_tys, inst_con_arg_tys, co)
= do { (uniq1:uniqs) <- getUniquesM
; let -- See Note [Add demands for strict constructors]
cs' = addDataConStrictness data_con cs
@@ -627,13 +627,7 @@ mkWWstr_one dflags fam_envs has_inlineable_prag arg
; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False unpk_args
; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) }
-- Don't pass the arg, rebox instead
-
- | otherwise -- Other cases
- = return (False, [arg], nop_fn, nop_fn)
-
where
- arg_ty = idType arg
- dmd = idDemandInfo arg
mk_ww_arg uniq ty sub_dmd = setIdDemandInfo (mk_ww_local uniq ty) sub_dmd
----------------------
@@ -647,11 +641,76 @@ addDataConStrictness con ds
zipWith add ds strs
where
strs = dataConRepStrictness con
- add dmd str | isMarkedStrict str
- , not (isAbsDmd dmd) = dmd `bothDmd` seqDmd
+ add dmd str | isMarkedStrict str = strictifyDmd dmd
| otherwise = dmd
-{-
+{- Note [How to do the worker/wrapper split]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The worker-wrapper transformation, mkWWstr_one, takes into account
+several possibilities to decide if the function is worthy for
+splitting:
+
+1. If an argument is absent, it would be silly to pass it to
+ the worker. Hence the isAbsDmd case. This case must come
+ first because a demand like <S,A> or <B,A> is possible.
+ E.g. <B,A> comes from a function like
+ f x = error "urk"
+ and <S,A> can come from Note [Add demands for strict constructors]
+
+2. If the argument is evaluated strictly, and we can split the
+ product demand (splitProdDmd_maybe), then unbox it and w/w its
+ pieces. For example
+
+ f :: (Int, Int) -> Int
+ f p = (case p of (a,b) -> a) + 1
+ is split to
+ f :: (Int, Int) -> Int
+ f p = case p of (a,b) -> $wf a
+
+ $wf :: Int -> Int
+ $wf a = a + 1
+
+ and
+ g :: Bool -> (Int, Int) -> Int
+ g c p = case p of (a,b) ->
+ if c then a else b
+ is split to
+ g c p = case p of (a,b) -> $gw c a b
+ $gw c a b = if c then a else b
+
+2a But do /not/ split if the components are not used; that is, the
+ usage is just 'Used' rather than 'UProd'. In this case
+ splitProdDmd_maybe returns Nothing. Otherwise we risk decomposing
+ a massive tuple which is barely used. Example:
+
+ f :: ((Int,Int) -> String) -> (Int,Int) -> a
+ f g pr = error (g pr)
+
+ main = print (f fst (1, error "no"))
+
+ Here, f does not take 'pr' apart, and it's stupid to do so.
+ Imagine that it had millions of fields. This actually happened
+ in GHC itself where the tuple was DynFlags
+
+3. A plain 'seqDmd', which is head-strict with usage UHead, can't
+ be split by splitProdDmd_maybe. But we want it to behave just
+ like U(AAAA) for suitable number of absent demands. So we have
+ a special case for it, with arity coming from the data constructor.
+
+Note [Worker-wrapper for bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We used not to split if the result is bottom.
+[Justification: there's no efficiency to be gained.]
+
+But it's sometimes bad not to make a wrapper. Consider
+ fw = \x# -> let x = I# x# in case e of
+ p1 -> error_fn x
+ p2 -> error_fn x
+ p3 -> the real stuff
+The re-boxing code won't go away unless error_fn gets a wrapper too.
+[We don't do reboxing now, but in general it's better to pass an
+unboxed thing to f, and have it reboxed in the error cases....]
+
Note [Add demands for strict constructors]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider this program (due to Roman):
@@ -677,22 +736,36 @@ because X is strict, so its argument must be evaluated. And if we
foo (X a) n = a `seq` go 0
+because the seq is discarded (very early) since X is strict!
+
So here's what we do
-* We leave the demand-analysis alone. The demand on 'a' in the definition of
- 'foo' is <L, U(U)>; the strictness info is Lazy because foo's body may or may
- not evaluate 'a'; but the usage info says that 'a' is unpacked and its content
- is used.
+* We leave the demand-analysis alone. The demand on 'a' in the
+ definition of 'foo' is <L, U(U)>; the strictness info is Lazy
+ because foo's body may or may not evaluate 'a'; but the usage info
+ says that 'a' is unpacked and its content is used.
-* During worker/wrapper, if we unpack a strict constructor (as we do for 'foo'),
- we use 'strictifyDemand' to bump up the strictness on the strict arguments of
- the data constructor. That in turn means that, if the usage info supports
- doing so (i.e. splitProdDmd_maybe returns Just), we will unpack that argument
+* During worker/wrapper, if we unpack a strict constructor (as we do
+ for 'foo'), we use 'addDataConStrictness' to bump up the strictness on
+ the strict arguments of the data constructor.
+
+* That in turn means that, if the usage info supports doing so
+ (i.e. splitProdDmd_maybe returns Just), we will unpack that argument
-- even though the original demand (e.g. on 'a') was lazy.
-The net effect is that the w/w transformation is more aggressive about unpacking
-the strict arguments of a data constructor, when that eagerness is supported by
-the usage info.
+* What does "bump up the strictness" mean? Just add a head-strict
+ demand to the strictness! Even for a demand like <L,A> we can
+ safely turn it into <S,A>; remember case (1) of
+ Note [How to do the worker/wrapper split].
+
+The net effect is that the w/w transformation is more aggressive about
+unpacking the strict arguments of a data constructor, when that
+eagerness is supported by the usage info.
+
+There is the usual danger of reboxing, which as usual we ignore. But
+if X is monomorphic, and has an UNPACK pragma, then this optimisation
+is even more important. We don't want the wrapper to rebox an unboxed
+argument, and pass an Int to $wfoo!
This works in nested situations like
@@ -701,20 +774,64 @@ This works in nested situations like
newtype instance Bar Int = Bar Int
foo :: Bar ((Int, Int), Int) -> Int -> Int
+ foo f k = case f of BarPair x y ->
+ case burble of
+ True -> case x of
+ BarPair p q -> ...
+ False -> ...
+
+The extra eagerness lets us produce a worker of type:
+ $wfoo :: Int# -> Int# -> Int# -> Int -> Int
+ $wfoo p# q# y# = ...
+
+even though the `case x` is only lazily evaluated.
+
+--------- Historical note ------------
+We used to add data-con strictness demands when demand analysing case
+expression. However, it was noticed in #15696 that this misses some cases. For
+instance, consider the program (from T10482)
+
+ data family Bar a
+ data instance Bar (a, b) = BarPair !(Bar a) !(Bar b)
+ newtype instance Bar Int = Bar Int
+
+ foo :: Bar ((Int, Int), Int) -> Int -> Int
foo f k =
case f of
BarPair x y -> case burble of
- True -> case x of
- BarPair p q -> ...
- False -> ...
+ True -> case x of
+ BarPair p q -> ...
+ False -> ...
-The extra eagerness lets us produce a worker of type:
+We really should be able to assume that `p` is already evaluated since it came
+from a strict field of BarPair. This strictness would allow us to produce a
+worker of type:
$wfoo :: Int# -> Int# -> Int# -> Int -> Int
$wfoo p# q# y# = ...
even though the `case x` is only lazily evaluated
+Indeed before we fixed #15696 this would happen since we would float the inner
+`case x` through the `case burble` to get:
+
+ foo f k =
+ case f of
+ BarPair x y -> case x of
+ BarPair p q -> case burble of
+ True -> ...
+ False -> ...
+
+However, after fixing #15696 this could no longer happen (for the reasons
+discussed in ticket:15696#comment:76). This means that the demand placed on `f`
+would then be significantly weaker (since the False branch of the case on
+`burble` is not strict in `p` or `q`).
+
+Consequently, we now instead account for data-con strictness in mkWWstr_one,
+applying the strictness demands to the final result of DmdAnal. The result is
+that we get the strict demand signature we wanted even if we can't float
+the case on `x` up through the case on `burble`.
+
Note [mkWWstr and unsafeCoerce]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~