summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/GHC/Core/Opt/DmdAnal.hs72
-rw-r--r--compiler/GHC/Driver/Session.hs1
-rw-r--r--docs/users_guide/using-optimisation.rst11
-rw-r--r--testsuite/tests/warnings/should_compile/T16282/T16282.hs39
-rw-r--r--testsuite/tests/warnings/should_compile/T16282/T16282.stderr15
-rw-r--r--testsuite/tests/warnings/should_compile/T16282/T16282A.hs22
-rw-r--r--testsuite/tests/warnings/should_compile/T16282/all.T2
7 files changed, 123 insertions, 39 deletions
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs
index 373bdcb033..14ee7419bf 100644
--- a/compiler/GHC/Core/Opt/DmdAnal.hs
+++ b/compiler/GHC/Core/Opt/DmdAnal.hs
@@ -274,7 +274,7 @@ dmdAnalBindLetUp :: TopLevelFlag
dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec id' rhs') (body'))
where
WithDmdType body_ty body' = anal_body env
- WithDmdType body_ty' id_dmd = findBndrDmd env notArgOfDfun body_ty id
+ WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id
!id' = setBindIdDemandInfo top_lvl id id_dmd
(rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd) rhs
@@ -412,7 +412,7 @@ dmdAnal' env dmd (Lam var body)
-- body_dmd: a demand to analyze the body
WithDmdType body_ty body' = dmdAnal env body_dmd body
- WithDmdType lam_ty var' = annotateLamIdBndr env notArgOfDfun body_ty var
+ WithDmdType lam_ty var' = annotateLamIdBndr env body_ty var
new_dmd_type = multDmdType n lam_ty
in
WithDmdType new_dmd_type (Lam var' body')
@@ -424,7 +424,7 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs])
= let
WithDmdType rhs_ty rhs' = dmdAnal env dmd rhs
WithDmdType alt_ty1 dmds = findBndrsDmds env rhs_ty bndrs
- WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env False alt_ty1 case_bndr
+ WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env alt_ty1 case_bndr
-- Evaluation cardinality on the case binder is irrelevant and a no-op.
-- What matters is its nested sub-demand!
(_ :* case_bndr_sd) = case_bndr_dmd
@@ -1282,16 +1282,15 @@ annotateBndr env dmd_ty var
| otherwise = WithDmdType dmd_ty var
where
new_id = setIdDemandInfo var dmd
- WithDmdType dmd_ty' dmd = findBndrDmd env False dmd_ty var
+ WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty var
annotateLamIdBndr :: AnalEnv
- -> DFunFlag -- is this lambda at the top of the RHS of a dfun?
-> DmdType -- Demand type of body
-> Id -- Lambda binder
-> WithDmdType Id -- Demand type of lambda
-- and binder annotated with demand
-annotateLamIdBndr env arg_of_dfun dmd_ty id
+annotateLamIdBndr env dmd_ty id
-- For lambdas we add the demand to the argument demands
-- Only called for Ids
= assert (isId id) $
@@ -1307,7 +1306,7 @@ annotateLamIdBndr env arg_of_dfun dmd_ty id
(unf_ty, _) = dmdAnalStar env dmd unf
main_ty = addDemand dmd dmd_ty'
- WithDmdType dmd_ty' dmd = findBndrDmd env arg_of_dfun dmd_ty id
+ WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id
{- Note [NOINLINE and strictness]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1394,10 +1393,6 @@ forget that fact, otherwise we might make 'x' absent when it isn't.
************************************************************************
-}
-type DFunFlag = Bool -- indicates if the lambda being considered is in the
- -- sequence of lambdas at the top of the RHS of a dfun
-notArgOfDfun :: DFunFlag
-notArgOfDfun = False
data AnalEnv = AE
{ ae_strict_dicts :: !Bool -- ^ Enable strict dict
@@ -1464,13 +1459,13 @@ findBndrsDmds env dmd_ty bndrs
go dmd_ty [] = WithDmdType dmd_ty []
go dmd_ty (b:bs)
| isId b = let WithDmdType dmd_ty1 dmds = go dmd_ty bs
- WithDmdType dmd_ty2 dmd = findBndrDmd env False dmd_ty1 b
+ WithDmdType dmd_ty2 dmd = findBndrDmd env dmd_ty1 b
in WithDmdType dmd_ty2 (dmd : dmds)
| otherwise = go dmd_ty bs
-findBndrDmd :: AnalEnv -> Bool -> DmdType -> Id -> WithDmdType Demand
+findBndrDmd :: AnalEnv -> DmdType -> Id -> WithDmdType Demand
-- See Note [Trimming a demand to a type]
-findBndrDmd env arg_of_dfun dmd_ty id
+findBndrDmd env dmd_ty id
= -- pprTrace "findBndrDmd" (ppr id $$ ppr dmd_ty $$ ppr starting_dmd $$ ppr dmd') $
WithDmdType dmd_ty' dmd'
where
@@ -1482,19 +1477,62 @@ findBndrDmd env arg_of_dfun dmd_ty id
id_ty = idType id
strictify dmd
+ -- See Note [Making dictionaries strict]
| ae_strict_dicts env
-- We never want to strictify a recursive let. At the moment
-- annotateBndr is only call for non-recursive lets; if that
-- changes, we need a RecFlag parameter and another guard here.
- , not arg_of_dfun -- See Note [Do not strictify the argument dictionaries of a dfun]
= strictifyDictDmd id_ty dmd
| otherwise
= dmd
fam_envs = ae_fam_envs env
-{- Note [Initialising strictness]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [Making dictionaries strict]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The Opt_DictsStrict flag makes GHC use call-by-value for dictionaries. Why?
+
+* Generally CBV is more efficient.
+
+* Dictionaries are always non-bottom; and never take much work to
+ compute. E.g. a dfun from an instance decl always returns a dicionary
+ record immediately. See DFunUnfolding in CoreSyn.
+ See also Note [Recursive superclasses] in TcInstDcls.
+
+* The strictness analyser will then unbox dictionaries and pass the
+ methods individually, rather than in a bundle. If there are a lot of
+ methods that might be bad; but worker/wrapper already does throttling.
+
+* A newtype dictionary is *not* always non-bottom. E.g.
+ class C a where op :: a -> a
+ instance C Int where op = error "urk"
+ Now a value of type (C Int) is just a newtype wrapper (a cast) around
+ the error thunk. Don't strictify these!
+
+See #17758 for more background and perf numbers.
+
+The implementation is extremly simple: just make the strictness
+analyser strictify the demand on a dictionary binder in
+'findBndrDmd'.
+
+However there is one case where this can make performance worse.
+For the principle consider some function at the core level:
+ myEq :: Eq a => a -> a -> Bool
+ myEq eqDict x y = ((==) eqDict) x y
+If we make the dictionary strict then WW can fire turning this into:
+ $wmyEq :: (a -> a -> Bool) -> a -> a -> Bool
+ $wmyEq eq x y = eq x y
+Which *usually* performs better. However if the dictionary is known we
+are far more likely to inline a function applied to the dictionary than
+to inline one applied to a function. Sometimes this makes just enough
+of a difference to stop a function from inlining. This is documented in
+#18421.
+
+It's somewhat similar to Note [Do not unpack class dictionaries] although
+here our problem is with the inliner, not the specializer.
+
+Note [Initialising strictness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
See section 9.2 (Finding fixpoints) of the paper.
Our basic plan is to initialise the strictness of each Id in a
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index 9e90db9853..4e570f1b3e 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -3883,6 +3883,7 @@ optLevelFlags -- see Note [Documenting optimisation flags]
= [ ([0,1,2], Opt_DoLambdaEtaExpansion)
, ([0,1,2], Opt_DoEtaReduction) -- See Note [Eta-reduction in -O0]
, ([0,1,2], Opt_LlvmTBAA)
+ , ([2], Opt_DictsStrict)
, ([0], Opt_IgnoreInterfacePragmas)
, ([0], Opt_OmitInterfacePragmas)
diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst
index 3fcac45e11..31d3a71431 100644
--- a/docs/users_guide/using-optimisation.rst
+++ b/docs/users_guide/using-optimisation.rst
@@ -343,7 +343,7 @@ by saying ``-fno-wombat``.
seem cheap to the optimiser.
.. ghc-flag:: -fdicts-strict
- :shortdesc: Make dictionaries strict
+ :shortdesc: Make dictionaries strict. Implied by :ghc-flag:`-O2`.
:type: dynamic
:reverse: -fno-dicts-strict
:category:
@@ -352,6 +352,15 @@ by saying ``-fno-wombat``.
Make dictionaries strict.
+ This enables WW to fire on dictionary constraints which usually results
+ in better runtime. In niche cases it can lead to significant compile time
+ regressions because of changed inlining behaviour. Rarely this can also affect
+ runtime negatively.
+
+ If enabling this flag leads to regressions try increasing the unfolding
+ threshold using :ghc-flag:`-funfolding-use-threshold=⟨n⟩` by a modest amount (~30)
+ as this is likely a result of a known limitation described in `#18421`.
+
.. ghc-flag:: -fdmd-tx-dict-sel
:shortdesc: *(deprecated)* Use a special demand transformer for dictionary selectors.
:type: dynamic
diff --git a/testsuite/tests/warnings/should_compile/T16282/T16282.hs b/testsuite/tests/warnings/should_compile/T16282/T16282.hs
index 0f6ab866a0..34abd24e51 100644
--- a/testsuite/tests/warnings/should_compile/T16282/T16282.hs
+++ b/testsuite/tests/warnings/should_compile/T16282/T16282.hs
@@ -1,14 +1,25 @@
-import Data.Map
-
--- If someone improves the specializer so that
--- GHC no longer misses the specialization below,
--- then this test will fail, as it expects a warning
--- to be issued.
--- Another reason this could fail is due to spelling:
--- the test checks for the "specialisation" spelling,
--- but due to changes in how the warnings are listed in DynFalgs.hs
--- the compiler may spit out the "specialization" spelling.
-main :: IO ()
-main = do
- let m = [] :: [Map String Bool]
- mapM_ print m
+{-# OPTIONS_GHC -Wall-missed-specialisations -O2 #-}
+
+import T16282A
+
+import Data.Map (Map)
+-- The purpose of this test is simple to trigger
+-- a missed specialization and check if GHC puts
+-- out a warning.
+-- It used to fail to specialize on the show instance
+-- for Data.Map, now that we enable -fdicts-strict by default
+-- the worker for these no longer take a dictionary (having been
+-- WWed). So instead we force it to fail to specialize on myMapM_
+
+-- If someone improves the specializer so that
+-- GHC no longer misses the specialization below,
+-- then this test will fail, as it expects a warning
+-- to be issued.
+-- Another reason this could fail is due to spelling:
+-- the test checks for the "specialisation" spelling,
+-- but due to changes in how the warnings are listed in DynFalgs.hs
+-- the compiler may spit out the "specialization" spelling.
+main :: IO ()
+main = do
+ let m = [] :: [MyMap Double]
+ myMapM_ print m
diff --git a/testsuite/tests/warnings/should_compile/T16282/T16282.stderr b/testsuite/tests/warnings/should_compile/T16282/T16282.stderr
index e9cc798546..b25c4c3563 100644
--- a/testsuite/tests/warnings/should_compile/T16282/T16282.stderr
+++ b/testsuite/tests/warnings/should_compile/T16282/T16282.stderr
@@ -1,10 +1,13 @@
+[1 of 2] Compiling T16282A ( T16282A.hs, T16282A.o )
+[2 of 2] Compiling Main ( T16282.hs, T16282.o )
T16282.hs: warning: [-Wall-missed-specialisations]
- Could not specialise imported function ‘Data.Foldable.$wmapM_’
- when specialising ‘mapM_’
- Probable fix: add INLINABLE pragma on ‘Data.Foldable.$wmapM_’
+ Could not specialise imported function ‘T16282A.$wmyMapM_’
+ when specialising ‘myMapM_’
+ Probable fix: add INLINABLE pragma on ‘T16282A.$wmyMapM_’
T16282.hs: warning: [-Wall-missed-specialisations]
- Could not specialise imported function ‘Data.Map.Internal.$w$cshowsPrec’
- when specialising ‘Data.Map.Internal.$fShowMap_$cshowsPrec’
- Probable fix: add INLINABLE pragma on ‘Data.Map.Internal.$w$cshowsPrec’
+ Could not specialise imported function ‘T16282A.$w$cshowsPrec’
+ when specialising ‘T16282A.$fShowMyMap_$cshowsPrec’
+ Probable fix: add INLINABLE pragma on ‘T16282A.$w$cshowsPrec’
+Linking T16282.exe ...
diff --git a/testsuite/tests/warnings/should_compile/T16282/T16282A.hs b/testsuite/tests/warnings/should_compile/T16282/T16282A.hs
new file mode 100644
index 0000000000..3bf6385fd0
--- /dev/null
+++ b/testsuite/tests/warnings/should_compile/T16282/T16282A.hs
@@ -0,0 +1,22 @@
+-- To ensure we miss the mapM specialization we:
+-- * Prevent a unfolding with NOINLINE
+-- * Turn of dicts-strict, to keep a dictionary as argument.
+
+{-# OPTIONS_GHC -fno-dicts-strict #-}
+
+module T16282A where
+
+import Data.Map as M (Map, toList)
+
+newtype MyMap v = MyMap (Map Int v)
+
+instance (Show a) => Show (MyMap a) where
+ showsPrec d (MyMap m) = showParen (d > 10) $
+ showString "fromList " . shows (M.toList m)
+
+{-# NOINLINE myMapM_ #-}
+myMapM_ :: (Foldable t, Monad m) => (a -> m b) -> t a -> m ()
+myMapM_ f = Prelude.foldr c (return ())
+ -- See Note [List fusion and continuations in 'c']
+ where c x k = f x >> k
+ {-# INLINE c #-}
diff --git a/testsuite/tests/warnings/should_compile/T16282/all.T b/testsuite/tests/warnings/should_compile/T16282/all.T
index dfcdd0562a..c3d2bf462a 100644
--- a/testsuite/tests/warnings/should_compile/T16282/all.T
+++ b/testsuite/tests/warnings/should_compile/T16282/all.T
@@ -1 +1 @@
-test('T16282', normal, compile, ['-O2 -Wall-missed-specialisations']) \ No newline at end of file
+test('T16282', [check_errmsg('-Wall-missed-specialisations')], multimod_compile, ['T16282', '-O2 -Wall-missed-specialisations'])