diff options
author | Simon Peyton Jones <simonpj@microsoft.com> | 2022-04-20 11:50:48 +0100 |
---|---|---|
committer | Simon Peyton Jones <simonpj@microsoft.com> | 2022-05-23 17:09:34 +0100 |
commit | bc723ac2cf2cfc329de4b8523bf891965075879b (patch) | |
tree | 30b6402a103a7d794a4eb0613e7714dfa0154311 | |
parent | ffbe28e56aa382164525300fbc32d78eefd95e7d (diff) | |
download | haskell-wip/T21386.tar.gz |
Improve FloatOut and SpecConstrwip/T21386
This patch addresses a relatively obscure situation that arose
when chasing perf regressions in !7847, which itself is fixing
It does two things:
* SpecConstr can specialise on ($df d1 d2) dictionary arguments
* FloatOut no longer checks argument strictness
See Note [Specialising on dictionaries] in GHC.Core.Opt.SpecConstr.
A test case is difficult to construct, but it makes a big difference
in nofib/real/eff/VSM, at least when we have the patch for #21286
installed. (The latter stops worker/wrapper for dictionary arguments).
There is a spectacular, but slightly illusory, improvement in
runtime perf on T15426. I have documented the specifics in
T15426 itself.
Metric Decrease:
T15426
-rw-r--r-- | compiler/GHC/Core/Opt/SetLevels.hs | 43 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/SpecConstr.hs | 72 | ||||
-rw-r--r-- | compiler/GHC/Core/Utils.hs | 1 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/T15426.hs | 18 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr | 26 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T15631.stdout | 2 |
6 files changed, 111 insertions, 51 deletions
diff --git a/compiler/GHC/Core/Opt/SetLevels.hs b/compiler/GHC/Core/Opt/SetLevels.hs index 21ddfbda22..9e2376da45 100644 --- a/compiler/GHC/Core/Opt/SetLevels.hs +++ b/compiler/GHC/Core/Opt/SetLevels.hs @@ -104,7 +104,7 @@ import GHC.Types.Unique.Set ( nonDetStrictFoldUniqSet ) import GHC.Types.Unique.DSet ( getUniqDSet ) import GHC.Types.Var.Env import GHC.Types.Literal ( litIsTrivial ) -import GHC.Types.Demand ( DmdSig, Demand, isStrUsedDmd, splitDmdSig, prependArgsDmdSig ) +import GHC.Types.Demand ( DmdSig, prependArgsDmdSig ) import GHC.Types.Cpr ( mkCprSig, botCpr ) import GHC.Types.Name ( getOccName, mkSystemVarName ) import GHC.Types.Name.Occurrence ( occNameString ) @@ -120,7 +120,6 @@ import GHC.Builtin.Names ( runRWKey ) import GHC.Data.FastString import GHC.Utils.FV -import GHC.Utils.Monad ( mapAccumLM ) import GHC.Utils.Misc import GHC.Utils.Outputable import GHC.Utils.Panic @@ -440,21 +439,13 @@ lvlApp env orig_expr ((_,AnnVar fn), args) ; return (foldl' App lapp' rargs') } | otherwise - = do { (_, args') <- mapAccumLM lvl_arg stricts args - -- Take account of argument strictness; see - -- Note [Floating to the top] + = do { args' <- mapM (lvlMFE env False) args + -- False: see "Arguments" in Note [Floating to the top] ; return (foldl' App (lookupVar env fn) args') } where n_val_args = count (isValArg . deAnnotate) args arity = idArity fn - stricts :: [Demand] -- True for strict /value/ arguments - stricts = case splitDmdSig (idDmdSig fn) of - (arg_ds, _) | arg_ds `lengthExceeds` n_val_args - -> [] - | otherwise - -> arg_ds - -- Separate out the PAP that we are floating from the extra -- arguments, by traversing the spine until we have collected -- (n_val_args - arity) value arguments. @@ -466,19 +457,6 @@ lvlApp env orig_expr ((_,AnnVar fn), args) | otherwise = left n f (a:rargs) left _ _ _ = panic "GHC.Core.Opt.SetLevels.lvlExpr.left" - is_val_arg :: CoreExprWithFVs -> Bool - is_val_arg (_, AnnType {}) = False - is_val_arg _ = True - - lvl_arg :: [Demand] -> CoreExprWithFVs -> LvlM ([Demand], LevelledExpr) - lvl_arg strs arg | (str1 : strs') <- strs - , is_val_arg arg - = do { arg' <- lvlMFE env (isStrUsedDmd str1) arg - ; return (strs', arg') } - | otherwise - = do { arg' <- lvlMFE env False arg - ; return (strs, arg') } - lvlApp env _ (fun, args) = -- No PAPs that we can float: just carry on with the -- arguments and the function. @@ -791,8 +769,8 @@ escape a value lambda (and hence save work), for two reasons: instructions) into a static one. Minor because we are assuming we are not escaping a value lambda. -But do not so if: - - the context is a strict, and +But do not do so if (saves_alloc): + - the context is strict, and - the expression is not a HNF, and - the expression is not bottoming @@ -824,10 +802,13 @@ Exammples: * Arguments t = f (g True) - If f is lazy, we /do/ float (g True) because then we can allocate - the thunk statically rather than dynamically. But if f is strict - we don't (see the use of idDmdSig in lvlApp). It's not clear - if this test is worth the bother: it's only about CAFs! + Prior to Apr 22 we didn't float (g True) to the top if f was strict. + But (a) this only affected CAFs, because if it escapes a value lambda + we'll definitely float it; so the complication of working out + argument strictness doesn't seem worth it. + (b) floating to the top helps SpecContr; see GHC.Core.Opt.SpecConstr + Note [Specialising on dictionaries]. + So now we don't use strictness to affect argument floating. It's controlled by a flag (floatConsts), because doing this too early loses opportunities for RULES which (needless to say) are diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs index d3b9396b2a..c07b8ae954 100644 --- a/compiler/GHC/Core/Opt/SpecConstr.hs +++ b/compiler/GHC/Core/Opt/SpecConstr.hs @@ -32,6 +32,7 @@ import GHC.Core.FVs ( exprsFreeVarsList ) import GHC.Core.Opt.Monad import GHC.Core.Opt.WorkWrap.Utils import GHC.Core.DataCon +import GHC.Core.Class( classTyVars ) import GHC.Core.Coercion hiding( substCo ) import GHC.Core.Rules import GHC.Core.Type hiding ( substTy ) @@ -45,6 +46,7 @@ import GHC.Unit.Module.ModGuts import GHC.Types.Literal ( litIsLifted ) import GHC.Types.Id +import GHC.Types.Id.Info ( IdDetails(..) ) import GHC.Types.Var.Env import GHC.Types.Var.Set import GHC.Types.Name @@ -662,6 +664,50 @@ information to adjust the calling convention of See Note [Tag Inference], Note [Strict Worker Ids] for more information on how we can take advantage of this. +Note [Specialising on dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In #21386, SpecConstr saw this call: + + $wgo 100# @.. ($fMonadStateT @.. @.. $fMonadIdentity) + +where $wgo :: Int# -> forall m. Monad m => blah + +You might think that the type-class Specialiser would have specialised +this, but there are good reasons why not: the Specialiser ran too early. +But regardless, SpecConstr can and should! It's easy: + +* isValue: treat ($fblah d1 .. dn) + like a constructor application. + +* scApp: treat (op_sel d), a class method selection, + like a case expression + +* Float that dictionary application to top level, thus + lvl = $fMonadStateT @.. @.. $fMonadIdentity + so the call looks like + ($wgo 100# @.. lvl) + + Why? This way dictionaries will appear as top level binders which we + can trivially match in rules. (CSE runs before SpecConstr, so we + may hope to common-up duplicate top-level dictionaries.) + For the floating part, see the "Arguments" case of Note + [Floating to the top] in GHC.Core.Opt.SetLevels. + + We could be more clever, perhaps, and generate a RULE like + $wgo _ @.. ($fMonadStateT @.. @.. $fMonadIdentity) = $s$wgo ... + but that would mean making argToPat able to spot dfun applications as + well as constructor applications. + +Wrinkles: +* This should all work perfectly fine for newtype classes. Mind you, + currently newtype classes are inlined fairly agressively, but we + may change that. And it would take extra code to exclude them, as + well as being unnecessary. + +* We (mis-) use LambdaVal for this purpose, because ConVal + requires us to list the data constructor and fields, and that + is (a) inconvenient and (b) unnecessary for class methods. + ----------------------------------------------------- Stuff not yet handled ----------------------------------------------------- @@ -939,13 +985,13 @@ instance Outputable HowBound where scForce :: ScEnv -> Bool -> ScEnv scForce env b = env { sc_force = b } -lookupHowBound :: ScEnv -> Id -> Maybe HowBound +lookupHowBound :: ScEnv -> OutId -> Maybe HowBound lookupHowBound env id = lookupVarEnv (sc_how_bound env) id -scSubstId :: ScEnv -> Id -> CoreExpr +scSubstId :: ScEnv -> InId -> OutExpr scSubstId env v = lookupIdSubst (sc_subst env) v -scSubstTy :: ScEnv -> Type -> Type +scSubstTy :: ScEnv -> InType -> OutType scSubstTy env ty = substTy (sc_subst env) ty scSubstCo :: ScEnv -> Coercion -> Coercion @@ -1310,7 +1356,7 @@ scExpr' env (Case scrut b ty alts) ; let (usg', b_occ:arg_occs) = lookupOccs usg (b':bs2) scrut_occ = case con of DataAlt dc -> ScrutOcc (unitUFM dc arg_occs) - _ -> ScrutOcc emptyUFM + _ -> evalScrutOcc ; return (usg', b_occ `combineOcc` scrut_occ, Alt con bs2 rhs') } scExpr' env (Let (NonRec bndr rhs) body) @@ -1398,8 +1444,15 @@ scApp env (Var fn, args) -- Function is a variable fn'@(Lam {}) -> scExpr (zapScSubst env) (doBeta fn' args') -- Do beta-reduction and try again - Var fn' -> return (arg_usg `combineUsage` mkVarUsage env fn' args', + Var fn' -> return (arg_usg' `combineUsage` mkVarUsage env fn' args', mkApps (Var fn') args') + where + -- arg_usg': see Note [Specialising on dictionaries] + arg_usg' | Just cls <- isClassOpId_maybe fn' + , dict_arg : _ <- dropList (classTyVars cls) args' + = setScrutOcc env arg_usg dict_arg evalScrutOcc + | otherwise + = arg_usg other_fn' -> return (arg_usg, mkApps other_fn' args') } -- NB: doing this ignores any usage info from the substituted @@ -1407,7 +1460,6 @@ scApp env (Var fn, args) -- Function is a variable -- we can fix it. where doBeta :: OutExpr -> [OutExpr] -> OutExpr - -- ToDo: adjust for System IF doBeta (Lam bndr body) (arg : args) = Let (NonRec bndr arg) (doBeta body args) doBeta fn args = mkApps fn args @@ -2558,13 +2610,15 @@ isValue env (Tick t e) isValue _env expr -- Maybe it's a constructor application | (Var fun, args, _) <- collectArgsTicks (not . tickishIsCode) expr - = case isDataConWorkId_maybe fun of - - Just con | args `lengthAtLeast` dataConRepArity con + = case idDetails fun of + DataConWorkId con | args `lengthAtLeast` dataConRepArity con -- Check saturated; might be > because the -- arity excludes type args -> Just (ConVal (DataAlt con) args) + DFunId {} -> Just LambdaVal + -- DFunId: see Note [Specialising on dictionaries] + _other | valArgCount args < idArity fun -- Under-applied function -> Just LambdaVal -- Partial application diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs index 6811498c54..8ade2a981a 100644 --- a/compiler/GHC/Core/Utils.hs +++ b/compiler/GHC/Core/Utils.hs @@ -1226,7 +1226,6 @@ there is only dictionary selection (no construction) involved Note [exprIsCheap] ~~~~~~~~~~~~~~~~~~ - See also Note [Interaction of exprIsWorkFree and lone variables] in GHC.Core.Unfold @exprIsCheap@ looks at a Core expression and returns \tr{True} if diff --git a/testsuite/tests/perf/should_run/T15426.hs b/testsuite/tests/perf/should_run/T15426.hs index c85c7e7ec0..cbc74d5337 100644 --- a/testsuite/tests/perf/should_run/T15426.hs +++ b/testsuite/tests/perf/should_run/T15426.hs @@ -11,3 +11,21 @@ main = do evaluate $ L.elemIndex 999999 [(1::Int)..1000000] evaluate $ L.findIndex (>=999999) [(1::Int)..1000000] evaluate $ L.findIndices (>=999999) [(1::Int)..1000000] evaluate $ unsafeFindIndex (>=999999) [(1::Int)..1000000] + +{- Note; see !7997. + +You would think those [1..100000] sub-expressions would float to the +top level, be CSE'd, and shared. + +But no: until May 22-ish, they are the argument of a strict function +findIndices; and in HEAD SetLevels goes to some trouble not to float +strict arguments. So in HEAD, no sharing happens. + +I think the reasoning is bogus, so I changed in; see +"Arguments" in Note [Floating to the top] in SetLevels. + +As a result these lists are now floated out and shared. + +Just leaving breadcrumbs, in case we later see big perf changes on +this (slightly fragile) benchmark. +-}
\ No newline at end of file diff --git a/testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr b/testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr index a74980ed99..3f01f42d2d 100644 --- a/testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr +++ b/testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr @@ -1,7 +1,7 @@ ==================== Tidy Core ==================== Result size of Tidy Core - = {terms: 120, types: 47, coercions: 4, joins: 1/1} + = {terms: 122, types: 49, coercions: 4, joins: 1/1} -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} OpaqueNoRebox3.$trModule4 :: GHC.Prim.Addr# @@ -45,7 +45,7 @@ OpaqueNoRebox3.$trModule -- RHS size: {terms: 8, types: 3, coercions: 0, joins: 0/0} f [InlPrag=OPAQUE] :: Int -> Int [GblId, Arity=1, Str=<1L>, Unf=OtherCon []] -f = / (x :: Int) -> +f = \ (x :: Int) -> case x of { GHC.Types.I# ipv -> GHC.Types.I# (GHC.Prim.+# ipv 1#) } -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} @@ -110,10 +110,15 @@ lvl11 :: GHC.Prim.Addr# [GblId, Unf=OtherCon []] lvl11 = "patError"# --- RHS size: {terms: 4, types: 2, coercions: 4, joins: 0/0} -lvl12 :: Int +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +lvl12 :: [Char] +[GblId] +lvl12 = GHC.CString.unpackCString# lvl11 + +-- RHS size: {terms: 3, types: 2, coercions: 4, joins: 0/0} +lvl13 :: Int [GblId, Str=b, Cpr=b] -lvl12 +lvl13 = error @GHC.Types.LiftedRep @Int @@ -122,13 +127,13 @@ lvl12 <"callStack">_N <GHC.Stack.Types.CallStack>_N) :: GHC.Stack.Types.CallStack ~R# (?callStack::GHC.Stack.Types.CallStack))) - (GHC.CString.unpackCString# lvl11) + lvl12 Rec { -- RHS size: {terms: 50, types: 13, coercions: 0, joins: 1/1} g [Occ=LoopBreaker] :: Bool -> Bool -> Bool -> Int -> Int [GblId, Arity=4, Str=<SL><SL><L><1L>, Unf=OtherCon []] -g = / (w :: Bool) (w1 :: Bool) (w2 :: Bool) (p :: Int) -> +g = \ (w :: Bool) (w1 :: Bool) (w2 :: Bool) (p :: Int) -> join { fail_ [Dmd=M!P(L)] :: Int [LclId[JoinId(0)(Nothing)]] @@ -139,7 +144,7 @@ g = / (w :: Bool) (w1 :: Bool) (w2 :: Bool) (p :: Int) -> False -> g w GHC.Types.True GHC.Types.False p; True -> f (f p) }; - True -> lvl12 + True -> lvl13 } } in case w of { False -> @@ -158,4 +163,7 @@ g = / (w :: Bool) (w1 :: Bool) (w2 :: Bool) (p :: Int) -> True -> f p } } -end Rec }
\ No newline at end of file +end Rec } + + + diff --git a/testsuite/tests/simplCore/should_compile/T15631.stdout b/testsuite/tests/simplCore/should_compile/T15631.stdout index ab181b58ed..6c528debc1 100644 --- a/testsuite/tests/simplCore/should_compile/T15631.stdout +++ b/testsuite/tests/simplCore/should_compile/T15631.stdout @@ -1,4 +1,4 @@ - case GHC.List.$wlenAcc + case GHC.List.$wlenAcc @a (Foo.f2 @a) 0# of v { __DEFAULT -> case GHC.List.$wlenAcc @a xs 0# of ww1 { __DEFAULT -> case GHC.List.reverse1 @a xs (GHC.Types.[] @a) of { [] -> case Foo.f1 @a of { GHC.Types.I# v1 -> GHC.Prim.+# ww1 v1 }; |