summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2020-01-28 20:53:04 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-05-27 02:35:11 -0400
commit03d69e4bb6b92ccd8238bebd9cff68da23741f49 (patch)
tree4dbaf811ec9bda4c9544530f63509ad8c7062bc3 /compiler
parent9faafb0aaff04e86a58b9e108f84618b12f2057c (diff)
downloadhaskell-03d69e4bb6b92ccd8238bebd9cff68da23741f49.tar.gz
Enable strict dicts by default at -O2.
In the common case this is a straight performance win at a compile time cost so we enable it at -O2. In rare cases it can lead to compile time regressions because of changed inlining behaviour. Which can very rarely also affect runtime performance. Increasing the inlining threshold can help to avoid this which is documented in the user guide. In terms of measured results this reduced instructions executed for nofib by 1%. However for some cases (e.g. Cabal) enabling this by default increases compile time by 2-3% so we enable it only at -O2 where it's clear that a user is willing to trade compile time for runtime. Most of the testsuite runs without -O2 so there are few perf changes. Increases: T12545/T18698: We perform more WW work because dicts are now treated strict. T9198: Also some more work because functions are now subject to W/W Decreases: T14697: Compiling empty modules. Probably because of changes inside ghc. T9203: I can't reproduce this improvement locally. Might be spurious. ------------------------- Metric Decrease: T12227 T14697 T9203 Metric Increase: T9198 T12545 T18698a T18698b -------------------------
Diffstat (limited to 'compiler')
-rw-r--r--compiler/GHC/Core/Opt/DmdAnal.hs72
-rw-r--r--compiler/GHC/Driver/Session.hs1
2 files changed, 56 insertions, 17 deletions
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs
index 373bdcb033..14ee7419bf 100644
--- a/compiler/GHC/Core/Opt/DmdAnal.hs
+++ b/compiler/GHC/Core/Opt/DmdAnal.hs
@@ -274,7 +274,7 @@ dmdAnalBindLetUp :: TopLevelFlag
dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec id' rhs') (body'))
where
WithDmdType body_ty body' = anal_body env
- WithDmdType body_ty' id_dmd = findBndrDmd env notArgOfDfun body_ty id
+ WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id
!id' = setBindIdDemandInfo top_lvl id id_dmd
(rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd) rhs
@@ -412,7 +412,7 @@ dmdAnal' env dmd (Lam var body)
-- body_dmd: a demand to analyze the body
WithDmdType body_ty body' = dmdAnal env body_dmd body
- WithDmdType lam_ty var' = annotateLamIdBndr env notArgOfDfun body_ty var
+ WithDmdType lam_ty var' = annotateLamIdBndr env body_ty var
new_dmd_type = multDmdType n lam_ty
in
WithDmdType new_dmd_type (Lam var' body')
@@ -424,7 +424,7 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs])
= let
WithDmdType rhs_ty rhs' = dmdAnal env dmd rhs
WithDmdType alt_ty1 dmds = findBndrsDmds env rhs_ty bndrs
- WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env False alt_ty1 case_bndr
+ WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env alt_ty1 case_bndr
-- Evaluation cardinality on the case binder is irrelevant and a no-op.
-- What matters is its nested sub-demand!
(_ :* case_bndr_sd) = case_bndr_dmd
@@ -1282,16 +1282,15 @@ annotateBndr env dmd_ty var
| otherwise = WithDmdType dmd_ty var
where
new_id = setIdDemandInfo var dmd
- WithDmdType dmd_ty' dmd = findBndrDmd env False dmd_ty var
+ WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty var
annotateLamIdBndr :: AnalEnv
- -> DFunFlag -- is this lambda at the top of the RHS of a dfun?
-> DmdType -- Demand type of body
-> Id -- Lambda binder
-> WithDmdType Id -- Demand type of lambda
-- and binder annotated with demand
-annotateLamIdBndr env arg_of_dfun dmd_ty id
+annotateLamIdBndr env dmd_ty id
-- For lambdas we add the demand to the argument demands
-- Only called for Ids
= assert (isId id) $
@@ -1307,7 +1306,7 @@ annotateLamIdBndr env arg_of_dfun dmd_ty id
(unf_ty, _) = dmdAnalStar env dmd unf
main_ty = addDemand dmd dmd_ty'
- WithDmdType dmd_ty' dmd = findBndrDmd env arg_of_dfun dmd_ty id
+ WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id
{- Note [NOINLINE and strictness]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1394,10 +1393,6 @@ forget that fact, otherwise we might make 'x' absent when it isn't.
************************************************************************
-}
-type DFunFlag = Bool -- indicates if the lambda being considered is in the
- -- sequence of lambdas at the top of the RHS of a dfun
-notArgOfDfun :: DFunFlag
-notArgOfDfun = False
data AnalEnv = AE
{ ae_strict_dicts :: !Bool -- ^ Enable strict dict
@@ -1464,13 +1459,13 @@ findBndrsDmds env dmd_ty bndrs
go dmd_ty [] = WithDmdType dmd_ty []
go dmd_ty (b:bs)
| isId b = let WithDmdType dmd_ty1 dmds = go dmd_ty bs
- WithDmdType dmd_ty2 dmd = findBndrDmd env False dmd_ty1 b
+ WithDmdType dmd_ty2 dmd = findBndrDmd env dmd_ty1 b
in WithDmdType dmd_ty2 (dmd : dmds)
| otherwise = go dmd_ty bs
-findBndrDmd :: AnalEnv -> Bool -> DmdType -> Id -> WithDmdType Demand
+findBndrDmd :: AnalEnv -> DmdType -> Id -> WithDmdType Demand
-- See Note [Trimming a demand to a type]
-findBndrDmd env arg_of_dfun dmd_ty id
+findBndrDmd env dmd_ty id
= -- pprTrace "findBndrDmd" (ppr id $$ ppr dmd_ty $$ ppr starting_dmd $$ ppr dmd') $
WithDmdType dmd_ty' dmd'
where
@@ -1482,19 +1477,62 @@ findBndrDmd env arg_of_dfun dmd_ty id
id_ty = idType id
strictify dmd
+ -- See Note [Making dictionaries strict]
| ae_strict_dicts env
-- We never want to strictify a recursive let. At the moment
-- annotateBndr is only call for non-recursive lets; if that
-- changes, we need a RecFlag parameter and another guard here.
- , not arg_of_dfun -- See Note [Do not strictify the argument dictionaries of a dfun]
= strictifyDictDmd id_ty dmd
| otherwise
= dmd
fam_envs = ae_fam_envs env
-{- Note [Initialising strictness]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [Making dictionaries strict]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The Opt_DictsStrict flag makes GHC use call-by-value for dictionaries. Why?
+
+* Generally CBV is more efficient.
+
+* Dictionaries are always non-bottom; and never take much work to
+ compute. E.g. a dfun from an instance decl always returns a dicionary
+ record immediately. See DFunUnfolding in CoreSyn.
+ See also Note [Recursive superclasses] in TcInstDcls.
+
+* The strictness analyser will then unbox dictionaries and pass the
+ methods individually, rather than in a bundle. If there are a lot of
+ methods that might be bad; but worker/wrapper already does throttling.
+
+* A newtype dictionary is *not* always non-bottom. E.g.
+ class C a where op :: a -> a
+ instance C Int where op = error "urk"
+ Now a value of type (C Int) is just a newtype wrapper (a cast) around
+ the error thunk. Don't strictify these!
+
+See #17758 for more background and perf numbers.
+
+The implementation is extremly simple: just make the strictness
+analyser strictify the demand on a dictionary binder in
+'findBndrDmd'.
+
+However there is one case where this can make performance worse.
+For the principle consider some function at the core level:
+ myEq :: Eq a => a -> a -> Bool
+ myEq eqDict x y = ((==) eqDict) x y
+If we make the dictionary strict then WW can fire turning this into:
+ $wmyEq :: (a -> a -> Bool) -> a -> a -> Bool
+ $wmyEq eq x y = eq x y
+Which *usually* performs better. However if the dictionary is known we
+are far more likely to inline a function applied to the dictionary than
+to inline one applied to a function. Sometimes this makes just enough
+of a difference to stop a function from inlining. This is documented in
+#18421.
+
+It's somewhat similar to Note [Do not unpack class dictionaries] although
+here our problem is with the inliner, not the specializer.
+
+Note [Initialising strictness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
See section 9.2 (Finding fixpoints) of the paper.
Our basic plan is to initialise the strictness of each Id in a
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index 9e90db9853..4e570f1b3e 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -3883,6 +3883,7 @@ optLevelFlags -- see Note [Documenting optimisation flags]
= [ ([0,1,2], Opt_DoLambdaEtaExpansion)
, ([0,1,2], Opt_DoEtaReduction) -- See Note [Eta-reduction in -O0]
, ([0,1,2], Opt_LlvmTBAA)
+ , ([2], Opt_DictsStrict)
, ([0], Opt_IgnoreInterfacePragmas)
, ([0], Opt_OmitInterfacePragmas)