diff options
-rw-r--r-- | compiler/GHC/Core/Opt/DmdAnal.hs | 72 | ||||
-rw-r--r-- | compiler/GHC/Driver/Session.hs | 1 | ||||
-rw-r--r-- | docs/users_guide/using-optimisation.rst | 11 | ||||
-rw-r--r-- | testsuite/tests/warnings/should_compile/T16282/T16282.hs | 39 | ||||
-rw-r--r-- | testsuite/tests/warnings/should_compile/T16282/T16282.stderr | 15 | ||||
-rw-r--r-- | testsuite/tests/warnings/should_compile/T16282/T16282A.hs | 22 | ||||
-rw-r--r-- | testsuite/tests/warnings/should_compile/T16282/all.T | 2 |
7 files changed, 123 insertions, 39 deletions
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs index 373bdcb033..14ee7419bf 100644 --- a/compiler/GHC/Core/Opt/DmdAnal.hs +++ b/compiler/GHC/Core/Opt/DmdAnal.hs @@ -274,7 +274,7 @@ dmdAnalBindLetUp :: TopLevelFlag dmdAnalBindLetUp top_lvl env id rhs anal_body = WithDmdType final_ty (R (NonRec id' rhs') (body')) where WithDmdType body_ty body' = anal_body env - WithDmdType body_ty' id_dmd = findBndrDmd env notArgOfDfun body_ty id + WithDmdType body_ty' id_dmd = findBndrDmd env body_ty id !id' = setBindIdDemandInfo top_lvl id id_dmd (rhs_ty, rhs') = dmdAnalStar env (dmdTransformThunkDmd rhs id_dmd) rhs @@ -412,7 +412,7 @@ dmdAnal' env dmd (Lam var body) -- body_dmd: a demand to analyze the body WithDmdType body_ty body' = dmdAnal env body_dmd body - WithDmdType lam_ty var' = annotateLamIdBndr env notArgOfDfun body_ty var + WithDmdType lam_ty var' = annotateLamIdBndr env body_ty var new_dmd_type = multDmdType n lam_ty in WithDmdType new_dmd_type (Lam var' body') @@ -424,7 +424,7 @@ dmdAnal' env dmd (Case scrut case_bndr ty [Alt alt bndrs rhs]) = let WithDmdType rhs_ty rhs' = dmdAnal env dmd rhs WithDmdType alt_ty1 dmds = findBndrsDmds env rhs_ty bndrs - WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env False alt_ty1 case_bndr + WithDmdType alt_ty2 case_bndr_dmd = findBndrDmd env alt_ty1 case_bndr -- Evaluation cardinality on the case binder is irrelevant and a no-op. -- What matters is its nested sub-demand! (_ :* case_bndr_sd) = case_bndr_dmd @@ -1282,16 +1282,15 @@ annotateBndr env dmd_ty var | otherwise = WithDmdType dmd_ty var where new_id = setIdDemandInfo var dmd - WithDmdType dmd_ty' dmd = findBndrDmd env False dmd_ty var + WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty var annotateLamIdBndr :: AnalEnv - -> DFunFlag -- is this lambda at the top of the RHS of a dfun? -> DmdType -- Demand type of body -> Id -- Lambda binder -> WithDmdType Id -- Demand type of lambda -- and binder annotated with demand -annotateLamIdBndr env arg_of_dfun dmd_ty id +annotateLamIdBndr env dmd_ty id -- For lambdas we add the demand to the argument demands -- Only called for Ids = assert (isId id) $ @@ -1307,7 +1306,7 @@ annotateLamIdBndr env arg_of_dfun dmd_ty id (unf_ty, _) = dmdAnalStar env dmd unf main_ty = addDemand dmd dmd_ty' - WithDmdType dmd_ty' dmd = findBndrDmd env arg_of_dfun dmd_ty id + WithDmdType dmd_ty' dmd = findBndrDmd env dmd_ty id {- Note [NOINLINE and strictness] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1394,10 +1393,6 @@ forget that fact, otherwise we might make 'x' absent when it isn't. ************************************************************************ -} -type DFunFlag = Bool -- indicates if the lambda being considered is in the - -- sequence of lambdas at the top of the RHS of a dfun -notArgOfDfun :: DFunFlag -notArgOfDfun = False data AnalEnv = AE { ae_strict_dicts :: !Bool -- ^ Enable strict dict @@ -1464,13 +1459,13 @@ findBndrsDmds env dmd_ty bndrs go dmd_ty [] = WithDmdType dmd_ty [] go dmd_ty (b:bs) | isId b = let WithDmdType dmd_ty1 dmds = go dmd_ty bs - WithDmdType dmd_ty2 dmd = findBndrDmd env False dmd_ty1 b + WithDmdType dmd_ty2 dmd = findBndrDmd env dmd_ty1 b in WithDmdType dmd_ty2 (dmd : dmds) | otherwise = go dmd_ty bs -findBndrDmd :: AnalEnv -> Bool -> DmdType -> Id -> WithDmdType Demand +findBndrDmd :: AnalEnv -> DmdType -> Id -> WithDmdType Demand -- See Note [Trimming a demand to a type] -findBndrDmd env arg_of_dfun dmd_ty id +findBndrDmd env dmd_ty id = -- pprTrace "findBndrDmd" (ppr id $$ ppr dmd_ty $$ ppr starting_dmd $$ ppr dmd') $ WithDmdType dmd_ty' dmd' where @@ -1482,19 +1477,62 @@ findBndrDmd env arg_of_dfun dmd_ty id id_ty = idType id strictify dmd + -- See Note [Making dictionaries strict] | ae_strict_dicts env -- We never want to strictify a recursive let. At the moment -- annotateBndr is only call for non-recursive lets; if that -- changes, we need a RecFlag parameter and another guard here. - , not arg_of_dfun -- See Note [Do not strictify the argument dictionaries of a dfun] = strictifyDictDmd id_ty dmd | otherwise = dmd fam_envs = ae_fam_envs env -{- Note [Initialising strictness] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +{- Note [Making dictionaries strict] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The Opt_DictsStrict flag makes GHC use call-by-value for dictionaries. Why? + +* Generally CBV is more efficient. + +* Dictionaries are always non-bottom; and never take much work to + compute. E.g. a dfun from an instance decl always returns a dicionary + record immediately. See DFunUnfolding in CoreSyn. + See also Note [Recursive superclasses] in TcInstDcls. + +* The strictness analyser will then unbox dictionaries and pass the + methods individually, rather than in a bundle. If there are a lot of + methods that might be bad; but worker/wrapper already does throttling. + +* A newtype dictionary is *not* always non-bottom. E.g. + class C a where op :: a -> a + instance C Int where op = error "urk" + Now a value of type (C Int) is just a newtype wrapper (a cast) around + the error thunk. Don't strictify these! + +See #17758 for more background and perf numbers. + +The implementation is extremly simple: just make the strictness +analyser strictify the demand on a dictionary binder in +'findBndrDmd'. + +However there is one case where this can make performance worse. +For the principle consider some function at the core level: + myEq :: Eq a => a -> a -> Bool + myEq eqDict x y = ((==) eqDict) x y +If we make the dictionary strict then WW can fire turning this into: + $wmyEq :: (a -> a -> Bool) -> a -> a -> Bool + $wmyEq eq x y = eq x y +Which *usually* performs better. However if the dictionary is known we +are far more likely to inline a function applied to the dictionary than +to inline one applied to a function. Sometimes this makes just enough +of a difference to stop a function from inlining. This is documented in +#18421. + +It's somewhat similar to Note [Do not unpack class dictionaries] although +here our problem is with the inliner, not the specializer. + +Note [Initialising strictness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ See section 9.2 (Finding fixpoints) of the paper. Our basic plan is to initialise the strictness of each Id in a diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index 9e90db9853..4e570f1b3e 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -3883,6 +3883,7 @@ optLevelFlags -- see Note [Documenting optimisation flags] = [ ([0,1,2], Opt_DoLambdaEtaExpansion) , ([0,1,2], Opt_DoEtaReduction) -- See Note [Eta-reduction in -O0] , ([0,1,2], Opt_LlvmTBAA) + , ([2], Opt_DictsStrict) , ([0], Opt_IgnoreInterfacePragmas) , ([0], Opt_OmitInterfacePragmas) diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst index 3fcac45e11..31d3a71431 100644 --- a/docs/users_guide/using-optimisation.rst +++ b/docs/users_guide/using-optimisation.rst @@ -343,7 +343,7 @@ by saying ``-fno-wombat``. seem cheap to the optimiser. .. ghc-flag:: -fdicts-strict - :shortdesc: Make dictionaries strict + :shortdesc: Make dictionaries strict. Implied by :ghc-flag:`-O2`. :type: dynamic :reverse: -fno-dicts-strict :category: @@ -352,6 +352,15 @@ by saying ``-fno-wombat``. Make dictionaries strict. + This enables WW to fire on dictionary constraints which usually results + in better runtime. In niche cases it can lead to significant compile time + regressions because of changed inlining behaviour. Rarely this can also affect + runtime negatively. + + If enabling this flag leads to regressions try increasing the unfolding + threshold using :ghc-flag:`-funfolding-use-threshold=⟨n⟩` by a modest amount (~30) + as this is likely a result of a known limitation described in `#18421`. + .. ghc-flag:: -fdmd-tx-dict-sel :shortdesc: *(deprecated)* Use a special demand transformer for dictionary selectors. :type: dynamic diff --git a/testsuite/tests/warnings/should_compile/T16282/T16282.hs b/testsuite/tests/warnings/should_compile/T16282/T16282.hs index 0f6ab866a0..34abd24e51 100644 --- a/testsuite/tests/warnings/should_compile/T16282/T16282.hs +++ b/testsuite/tests/warnings/should_compile/T16282/T16282.hs @@ -1,14 +1,25 @@ -import Data.Map
-
--- If someone improves the specializer so that
--- GHC no longer misses the specialization below,
--- then this test will fail, as it expects a warning
--- to be issued.
--- Another reason this could fail is due to spelling:
--- the test checks for the "specialisation" spelling,
--- but due to changes in how the warnings are listed in DynFalgs.hs
--- the compiler may spit out the "specialization" spelling.
-main :: IO ()
-main = do
- let m = [] :: [Map String Bool]
- mapM_ print m
+{-# OPTIONS_GHC -Wall-missed-specialisations -O2 #-} + +import T16282A + +import Data.Map (Map) +-- The purpose of this test is simple to trigger +-- a missed specialization and check if GHC puts +-- out a warning. +-- It used to fail to specialize on the show instance +-- for Data.Map, now that we enable -fdicts-strict by default +-- the worker for these no longer take a dictionary (having been +-- WWed). So instead we force it to fail to specialize on myMapM_ + +-- If someone improves the specializer so that +-- GHC no longer misses the specialization below, +-- then this test will fail, as it expects a warning +-- to be issued. +-- Another reason this could fail is due to spelling: +-- the test checks for the "specialisation" spelling, +-- but due to changes in how the warnings are listed in DynFalgs.hs +-- the compiler may spit out the "specialization" spelling. +main :: IO () +main = do + let m = [] :: [MyMap Double] + myMapM_ print m diff --git a/testsuite/tests/warnings/should_compile/T16282/T16282.stderr b/testsuite/tests/warnings/should_compile/T16282/T16282.stderr index e9cc798546..b25c4c3563 100644 --- a/testsuite/tests/warnings/should_compile/T16282/T16282.stderr +++ b/testsuite/tests/warnings/should_compile/T16282/T16282.stderr @@ -1,10 +1,13 @@ +[1 of 2] Compiling T16282A ( T16282A.hs, T16282A.o ) +[2 of 2] Compiling Main ( T16282.hs, T16282.o ) T16282.hs: warning: [-Wall-missed-specialisations] - Could not specialise imported function ‘Data.Foldable.$wmapM_’ - when specialising ‘mapM_’ - Probable fix: add INLINABLE pragma on ‘Data.Foldable.$wmapM_’ + Could not specialise imported function ‘T16282A.$wmyMapM_’ + when specialising ‘myMapM_’ + Probable fix: add INLINABLE pragma on ‘T16282A.$wmyMapM_’ T16282.hs: warning: [-Wall-missed-specialisations] - Could not specialise imported function ‘Data.Map.Internal.$w$cshowsPrec’ - when specialising ‘Data.Map.Internal.$fShowMap_$cshowsPrec’ - Probable fix: add INLINABLE pragma on ‘Data.Map.Internal.$w$cshowsPrec’ + Could not specialise imported function ‘T16282A.$w$cshowsPrec’ + when specialising ‘T16282A.$fShowMyMap_$cshowsPrec’ + Probable fix: add INLINABLE pragma on ‘T16282A.$w$cshowsPrec’ +Linking T16282.exe ... diff --git a/testsuite/tests/warnings/should_compile/T16282/T16282A.hs b/testsuite/tests/warnings/should_compile/T16282/T16282A.hs new file mode 100644 index 0000000000..3bf6385fd0 --- /dev/null +++ b/testsuite/tests/warnings/should_compile/T16282/T16282A.hs @@ -0,0 +1,22 @@ +-- To ensure we miss the mapM specialization we: +-- * Prevent a unfolding with NOINLINE +-- * Turn of dicts-strict, to keep a dictionary as argument. + +{-# OPTIONS_GHC -fno-dicts-strict #-} + +module T16282A where + +import Data.Map as M (Map, toList) + +newtype MyMap v = MyMap (Map Int v) + +instance (Show a) => Show (MyMap a) where + showsPrec d (MyMap m) = showParen (d > 10) $ + showString "fromList " . shows (M.toList m) + +{-# NOINLINE myMapM_ #-} +myMapM_ :: (Foldable t, Monad m) => (a -> m b) -> t a -> m () +myMapM_ f = Prelude.foldr c (return ()) + -- See Note [List fusion and continuations in 'c'] + where c x k = f x >> k + {-# INLINE c #-} diff --git a/testsuite/tests/warnings/should_compile/T16282/all.T b/testsuite/tests/warnings/should_compile/T16282/all.T index dfcdd0562a..c3d2bf462a 100644 --- a/testsuite/tests/warnings/should_compile/T16282/all.T +++ b/testsuite/tests/warnings/should_compile/T16282/all.T @@ -1 +1 @@ -test('T16282', normal, compile, ['-O2 -Wall-missed-specialisations'])
\ No newline at end of file +test('T16282', [check_errmsg('-Wall-missed-specialisations')], multimod_compile, ['T16282', '-O2 -Wall-missed-specialisations']) |