summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2020-06-08 13:03:18 +0200
committerAndreas Klebinger <klebinger.andreas@gmx.at>2020-06-08 13:35:25 +0200
commitb23c6d7f8c61ade781c50e95f913bb1b2f45eda4 (patch)
treed991ae8d7109c229932efac8d12f9bdd0bfe6f3e
parent2b792facab46f7cdd09d12e79499f4e0dcd4293f (diff)
downloadhaskell-wip/andreask/dmd_widening.tar.gz
DmdAnal: Limit nesting of incoming demands.wip/andreask/dmd_widening
In #18304 we saw a case where large recursive groups caused demand annotations to grow to millions of constructors. To avoid this we limit the depth of incoming demands when analysing expressions. This loses some precision, but we don't really make use of demands nested this deeply so I don't expect performance regressions from this.
-rw-r--r--compiler/GHC/Core/Opt/DmdAnal.hs103
-rw-r--r--compiler/GHC/Core/TyCon.hs3
-rw-r--r--compiler/GHC/Types/Demand.hs50
3 files changed, 141 insertions, 15 deletions
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs
index 97c7e29622..db442f2a7c 100644
--- a/compiler/GHC/Core/Opt/DmdAnal.hs
+++ b/compiler/GHC/Core/Opt/DmdAnal.hs
@@ -8,6 +8,7 @@
-}
{-# LANGUAGE CPP #-}
+{-# LANGUAGE BangPatterns #-}
module GHC.Core.Opt.DmdAnal ( dmdAnalProgram ) where
@@ -51,7 +52,7 @@ import GHC.Types.Unique.Set
dmdAnalProgram :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram
dmdAnalProgram dflags fam_envs binds = do
let env = emptyAnalEnv dflags fam_envs
- let binds_plus_dmds = snd $ mapAccumL dmdAnalTopBind env binds
+ let !binds_plus_dmds = snd $ mapAccumL dmdAnalTopBind env binds
dumpIfSet_dyn dflags Opt_D_dump_str_signatures "Strictness signatures" FormatText $
dumpIdInfoOfProgram (pprIfaceStrictSig . strictnessInfo) binds_plus_dmds
-- See Note [Stamp out space leaks in demand analysis]
@@ -149,7 +150,9 @@ dmdAnal, dmdAnal' :: AnalEnv
-- See Note [Ensure demand is strict]
dmdAnal env d e = -- pprTrace "dmdAnal" (ppr d <+> ppr e) $
- dmdAnal' env d e
+ -- See Note [Demand analysis on self recursive functions]
+ -- for why we widen the incoming demand here.
+ dmdAnal' env (widenDmd 5 d) e
dmdAnal' _ _ (Lit lit) = (nopDmdType, Lit lit)
dmdAnal' _ _ (Type ty) = (nopDmdType, Type ty) -- Doesn't happen, in fact
@@ -517,6 +520,83 @@ dmdTransform env var dmd
************************************************************************
-}
+{- Note [Demand analysis on self recursive functions]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Given a data type like this:
+
+ T = C { ... , next :: T }
+
+and a function
+
+ f x = ..
+ .. -> f (next x)
+
+usage information would be unbounded in it's size.
+
+The reason is that we figure out f will use the next field of x.
+Giving us useage information of U<U>.
+Armed with this information we analyse `f (next x)` in the body again
+on the next iteration giving usage of U<U<U>>. We can repeat this
+for infinity and will never reach a fixpoint.
+
+We used to deal with this simply by limiting the number of iterations
+to 10 and giving up if we could not find a fix point in this time.
+
+While this works well for small recursive groups it doesn't work for
+large ones. This happened in #18304.
+
+The reason is simple. We analyse a recursive group of functions
+like below:
+
+f1 x = ...
+ -> f1 (next x)
+ -> f2 (next x)
+
+f2 x = ...
+ -> f1 (next x)
+ -> f2 (next x)
+ -> fn ...
+
+We analyse f1 under the default demand resulting in U<U>.
+We analyse f2 and see the call `f1 (next x)` in the body.
+Since `f1 x` has U<U> "f1 (next x)" in the body of f2 will
+result in U<U<U>> as usage demand of f2.
+
+For each additional function fn in the group of this pattern
+usage information will become nested deeper by one level.
+
+This means depth of usage information will grow linear in the
+number of functions in the recursive group. Being capped at
+iterations * n.
+
+This is still tractable, the issue in #18304 addone one more
+dimension to the problem by not having one, but two "next" fields.
+
+data T = C { ... , next1 :: T, next2 :: T}
+
+f1 x = ...
+ .. -> f1 (next1 x)
+ .. -> f1 (next2 x)
+ .. -> f2 (next1 x)
+ .. -> f2 (next2 x)
+
+Suddenly the size of usage information was growing exponentially
+in 2 ^ (n * iterations).
+
+This very quickly becomes untractable!
+
+This is a well known problem which is usually solved by adding a
+widening operator.
+
+For simplicity however we apply this operator to the incoming demand
+instead of the result. This has the same result of allowing us to reach
+a fixpoint but has two benefits:
+
+* There is only a single place where we need to care (in the argument of dmdAnal).
+* We can fully analyze functions taking apart deeply nested non-recursive types
+
+-}
-- Recursive bindings
dmdFix :: TopLevelFlag
-> AnalEnv -- Does not include bindings for this binding
@@ -623,15 +703,16 @@ dmdAnalRhsLetDown rec_flag env let_dmd id rhs
where
rhs_arity = idArity id
rhs_dmd
- -- See Note [Demand analysis for join points]
- -- See Note [Invariants on join points] invariant 2b, in GHC.Core
- -- rhs_arity matches the join arity of the join point
- | isJoinId id
- = mkCallDmds rhs_arity let_dmd
- | otherwise
- -- NB: rhs_arity
- -- See Note [Demand signatures are computed for a threshold demand based on idArity]
- = mkRhsDmd env rhs_arity rhs
+ -- See Note [Demand analysis for join points]
+ -- See Note [Invariants on join points] invariant 2b, in GHC.Core
+ -- rhs_arity matches the join arity of the join point
+ | isJoinId id
+ = mkCallDmds rhs_arity let_dmd
+ | otherwise
+ -- NB: rhs_arity
+ -- See Note [Demand signatures are computed for a threshold demand based on idArity]
+ = mkRhsDmd env rhs_arity rhs
+
(DmdType rhs_fv rhs_dmds rhs_div, rhs')
= dmdAnal env rhs_dmd rhs
sig = mkStrictSigForArity rhs_arity (DmdType sig_fv rhs_dmds rhs_div)
diff --git a/compiler/GHC/Core/TyCon.hs b/compiler/GHC/Core/TyCon.hs
index 80b4500685..124eab6ebc 100644
--- a/compiler/GHC/Core/TyCon.hs
+++ b/compiler/GHC/Core/TyCon.hs
@@ -2750,6 +2750,9 @@ data RecTcChecker = RC !Int (NameEnv Int)
-- The upper bound, and the number of times
-- we have encountered each TyCon
+instance Outputable RecTcChecker where
+ ppr (RC n env) = braces (text "RC" <+> ppr n <+> ppr env)
+
-- | Initialise a 'RecTcChecker' with 'defaultRecTcMaxBound'.
initRecTc :: RecTcChecker
initRecTc = RC defaultRecTcMaxBound emptyNameEnv
diff --git a/compiler/GHC/Types/Demand.hs b/compiler/GHC/Types/Demand.hs
index cbbbe6688d..5820e69a47 100644
--- a/compiler/GHC/Types/Demand.hs
+++ b/compiler/GHC/Types/Demand.hs
@@ -5,6 +5,7 @@
\section[Demand]{@Demand@: A decoupled implementation of a demand domain}
-}
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP, FlexibleInstances, TypeSynonymInstances, RecordWildCards #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
@@ -51,7 +52,9 @@ module GHC.Types.Demand (
useCount, isUsedOnce, reuseEnv,
zapUsageDemand, zapUsageEnvSig,
zapUsedOnceDemand, zapUsedOnceSig,
- strictifyDictDmd, strictifyDmd
+ strictifyDictDmd, strictifyDmd,
+
+ widenDmd
) where
@@ -83,6 +86,12 @@ import GHC.Core.DataCon ( splitDataProductType_maybe )
data JointDmd s u = JD { sd :: s, ud :: u }
deriving ( Eq, Show )
+-- | Limit the depth of demands to the given nesting.
+-- Any sub-demand exceeding this depth will be given the top
+-- demand for the respective domain.
+widenDmd :: Int -> JointDmd StrDmd UseDmd -> JointDmd StrDmd UseDmd
+widenDmd n (JD s u) = JD (widenStrDmd n s) (widenUseDmd n u)
+
getStrDmd :: JointDmd s u -> s
getStrDmd = sd
@@ -206,6 +215,21 @@ data StrDmd
deriving ( Eq, Show )
+widenStrDmd :: Int -> StrDmd -> StrDmd
+widenStrDmd !n d =
+ case d of
+ HyperStr -> HyperStr
+ HeadStr -> HeadStr
+ SCall d -> SCall $! widenStrDmd n d
+ SProd args -> SProd $ map (widenStrArgDmd n) args
+
+widenStrArgDmd :: Int -> ArgStr -> ArgStr
+widenStrArgDmd 0 _ = Lazy
+widenStrArgDmd n d =
+ case d of
+ Lazy -> Lazy
+ Str d -> Str $! widenStrDmd (n-1) d
+
-- | Strictness of a function argument.
type ArgStr = Str StrDmd
@@ -330,14 +354,20 @@ splitStrProdDmd _ (SCall {}) = Nothing
UHead
|
Count x -
- |
- Abs
+ |
+ Abs
-}
-- | Domain for genuine usage
data UseDmd
- = UCall Count UseDmd -- ^ Call demand for absence.
+ = UCall Count UseDmd -- ^ Call demand for absence analysis.
-- Used only for values of function type
+ --
+ -- The Count argument describes how often the
+ -- value itself is used.
+ -- The UseDmd describes how often we use the result
+ -- of applying one argument to the value. This can
+ -- and often is nested for multiple arguments.
| UProd [ArgUse] -- ^ Product.
-- Used only for values of product type
@@ -363,6 +393,18 @@ data UseDmd
-- (top of the lattice)
deriving ( Eq, Show )
+widenUseDmd :: Int -> UseDmd -> UseDmd
+widenUseDmd 0 _ = Used
+widenUseDmd _ UHead = UHead
+widenUseDmd _ Used = Used
+widenUseDmd n (UCall c d) = UCall c $! widenUseDmd n d
+widenUseDmd n (UProd args) = UProd $ map (widenUseArg n) args
+
+widenUseArg :: Int -> ArgUse -> ArgUse
+widenUseArg _ Abs = Abs
+widenUseArg n (Use c d) = Use c $! widenUseDmd (n-1) d
+
+
-- Extended usage demand for absence and counting
type ArgUse = Use UseDmd