summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2022-04-20 11:50:48 +0100
committerSimon Peyton Jones <simonpj@microsoft.com>2022-05-23 17:09:34 +0100
commitbc723ac2cf2cfc329de4b8523bf891965075879b (patch)
tree30b6402a103a7d794a4eb0613e7714dfa0154311
parentffbe28e56aa382164525300fbc32d78eefd95e7d (diff)
downloadhaskell-wip/T21386.tar.gz
Improve FloatOut and SpecConstrwip/T21386
This patch addresses a relatively obscure situation that arose when chasing perf regressions in !7847, which itself is fixing It does two things: * SpecConstr can specialise on ($df d1 d2) dictionary arguments * FloatOut no longer checks argument strictness See Note [Specialising on dictionaries] in GHC.Core.Opt.SpecConstr. A test case is difficult to construct, but it makes a big difference in nofib/real/eff/VSM, at least when we have the patch for #21286 installed. (The latter stops worker/wrapper for dictionary arguments). There is a spectacular, but slightly illusory, improvement in runtime perf on T15426. I have documented the specifics in T15426 itself. Metric Decrease: T15426
-rw-r--r--compiler/GHC/Core/Opt/SetLevels.hs43
-rw-r--r--compiler/GHC/Core/Opt/SpecConstr.hs72
-rw-r--r--compiler/GHC/Core/Utils.hs1
-rw-r--r--testsuite/tests/perf/should_run/T15426.hs18
-rw-r--r--testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr26
-rw-r--r--testsuite/tests/simplCore/should_compile/T15631.stdout2
6 files changed, 111 insertions, 51 deletions
diff --git a/compiler/GHC/Core/Opt/SetLevels.hs b/compiler/GHC/Core/Opt/SetLevels.hs
index 21ddfbda22..9e2376da45 100644
--- a/compiler/GHC/Core/Opt/SetLevels.hs
+++ b/compiler/GHC/Core/Opt/SetLevels.hs
@@ -104,7 +104,7 @@ import GHC.Types.Unique.Set ( nonDetStrictFoldUniqSet )
import GHC.Types.Unique.DSet ( getUniqDSet )
import GHC.Types.Var.Env
import GHC.Types.Literal ( litIsTrivial )
-import GHC.Types.Demand ( DmdSig, Demand, isStrUsedDmd, splitDmdSig, prependArgsDmdSig )
+import GHC.Types.Demand ( DmdSig, prependArgsDmdSig )
import GHC.Types.Cpr ( mkCprSig, botCpr )
import GHC.Types.Name ( getOccName, mkSystemVarName )
import GHC.Types.Name.Occurrence ( occNameString )
@@ -120,7 +120,6 @@ import GHC.Builtin.Names ( runRWKey )
import GHC.Data.FastString
import GHC.Utils.FV
-import GHC.Utils.Monad ( mapAccumLM )
import GHC.Utils.Misc
import GHC.Utils.Outputable
import GHC.Utils.Panic
@@ -440,21 +439,13 @@ lvlApp env orig_expr ((_,AnnVar fn), args)
; return (foldl' App lapp' rargs') }
| otherwise
- = do { (_, args') <- mapAccumLM lvl_arg stricts args
- -- Take account of argument strictness; see
- -- Note [Floating to the top]
+ = do { args' <- mapM (lvlMFE env False) args
+ -- False: see "Arguments" in Note [Floating to the top]
; return (foldl' App (lookupVar env fn) args') }
where
n_val_args = count (isValArg . deAnnotate) args
arity = idArity fn
- stricts :: [Demand] -- True for strict /value/ arguments
- stricts = case splitDmdSig (idDmdSig fn) of
- (arg_ds, _) | arg_ds `lengthExceeds` n_val_args
- -> []
- | otherwise
- -> arg_ds
-
-- Separate out the PAP that we are floating from the extra
-- arguments, by traversing the spine until we have collected
-- (n_val_args - arity) value arguments.
@@ -466,19 +457,6 @@ lvlApp env orig_expr ((_,AnnVar fn), args)
| otherwise = left n f (a:rargs)
left _ _ _ = panic "GHC.Core.Opt.SetLevels.lvlExpr.left"
- is_val_arg :: CoreExprWithFVs -> Bool
- is_val_arg (_, AnnType {}) = False
- is_val_arg _ = True
-
- lvl_arg :: [Demand] -> CoreExprWithFVs -> LvlM ([Demand], LevelledExpr)
- lvl_arg strs arg | (str1 : strs') <- strs
- , is_val_arg arg
- = do { arg' <- lvlMFE env (isStrUsedDmd str1) arg
- ; return (strs', arg') }
- | otherwise
- = do { arg' <- lvlMFE env False arg
- ; return (strs, arg') }
-
lvlApp env _ (fun, args)
= -- No PAPs that we can float: just carry on with the
-- arguments and the function.
@@ -791,8 +769,8 @@ escape a value lambda (and hence save work), for two reasons:
instructions) into a static one. Minor because we are assuming
we are not escaping a value lambda.
-But do not so if:
- - the context is a strict, and
+But do not do so if (saves_alloc):
+ - the context is strict, and
- the expression is not a HNF, and
- the expression is not bottoming
@@ -824,10 +802,13 @@ Exammples:
* Arguments
t = f (g True)
- If f is lazy, we /do/ float (g True) because then we can allocate
- the thunk statically rather than dynamically. But if f is strict
- we don't (see the use of idDmdSig in lvlApp). It's not clear
- if this test is worth the bother: it's only about CAFs!
+ Prior to Apr 22 we didn't float (g True) to the top if f was strict.
+ But (a) this only affected CAFs, because if it escapes a value lambda
+ we'll definitely float it; so the complication of working out
+ argument strictness doesn't seem worth it.
+ (b) floating to the top helps SpecContr; see GHC.Core.Opt.SpecConstr
+ Note [Specialising on dictionaries].
+ So now we don't use strictness to affect argument floating.
It's controlled by a flag (floatConsts), because doing this too
early loses opportunities for RULES which (needless to say) are
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index d3b9396b2a..c07b8ae954 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -32,6 +32,7 @@ import GHC.Core.FVs ( exprsFreeVarsList )
import GHC.Core.Opt.Monad
import GHC.Core.Opt.WorkWrap.Utils
import GHC.Core.DataCon
+import GHC.Core.Class( classTyVars )
import GHC.Core.Coercion hiding( substCo )
import GHC.Core.Rules
import GHC.Core.Type hiding ( substTy )
@@ -45,6 +46,7 @@ import GHC.Unit.Module.ModGuts
import GHC.Types.Literal ( litIsLifted )
import GHC.Types.Id
+import GHC.Types.Id.Info ( IdDetails(..) )
import GHC.Types.Var.Env
import GHC.Types.Var.Set
import GHC.Types.Name
@@ -662,6 +664,50 @@ information to adjust the calling convention of
See Note [Tag Inference], Note [Strict Worker Ids] for more information on
how we can take advantage of this.
+Note [Specialising on dictionaries]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In #21386, SpecConstr saw this call:
+
+ $wgo 100# @.. ($fMonadStateT @.. @.. $fMonadIdentity)
+
+where $wgo :: Int# -> forall m. Monad m => blah
+
+You might think that the type-class Specialiser would have specialised
+this, but there are good reasons why not: the Specialiser ran too early.
+But regardless, SpecConstr can and should! It's easy:
+
+* isValue: treat ($fblah d1 .. dn)
+ like a constructor application.
+
+* scApp: treat (op_sel d), a class method selection,
+ like a case expression
+
+* Float that dictionary application to top level, thus
+ lvl = $fMonadStateT @.. @.. $fMonadIdentity
+ so the call looks like
+ ($wgo 100# @.. lvl)
+
+ Why? This way dictionaries will appear as top level binders which we
+ can trivially match in rules. (CSE runs before SpecConstr, so we
+ may hope to common-up duplicate top-level dictionaries.)
+ For the floating part, see the "Arguments" case of Note
+ [Floating to the top] in GHC.Core.Opt.SetLevels.
+
+ We could be more clever, perhaps, and generate a RULE like
+ $wgo _ @.. ($fMonadStateT @.. @.. $fMonadIdentity) = $s$wgo ...
+ but that would mean making argToPat able to spot dfun applications as
+ well as constructor applications.
+
+Wrinkles:
+* This should all work perfectly fine for newtype classes. Mind you,
+ currently newtype classes are inlined fairly agressively, but we
+ may change that. And it would take extra code to exclude them, as
+ well as being unnecessary.
+
+* We (mis-) use LambdaVal for this purpose, because ConVal
+ requires us to list the data constructor and fields, and that
+ is (a) inconvenient and (b) unnecessary for class methods.
+
-----------------------------------------------------
Stuff not yet handled
-----------------------------------------------------
@@ -939,13 +985,13 @@ instance Outputable HowBound where
scForce :: ScEnv -> Bool -> ScEnv
scForce env b = env { sc_force = b }
-lookupHowBound :: ScEnv -> Id -> Maybe HowBound
+lookupHowBound :: ScEnv -> OutId -> Maybe HowBound
lookupHowBound env id = lookupVarEnv (sc_how_bound env) id
-scSubstId :: ScEnv -> Id -> CoreExpr
+scSubstId :: ScEnv -> InId -> OutExpr
scSubstId env v = lookupIdSubst (sc_subst env) v
-scSubstTy :: ScEnv -> Type -> Type
+scSubstTy :: ScEnv -> InType -> OutType
scSubstTy env ty = substTy (sc_subst env) ty
scSubstCo :: ScEnv -> Coercion -> Coercion
@@ -1310,7 +1356,7 @@ scExpr' env (Case scrut b ty alts)
; let (usg', b_occ:arg_occs) = lookupOccs usg (b':bs2)
scrut_occ = case con of
DataAlt dc -> ScrutOcc (unitUFM dc arg_occs)
- _ -> ScrutOcc emptyUFM
+ _ -> evalScrutOcc
; return (usg', b_occ `combineOcc` scrut_occ, Alt con bs2 rhs') }
scExpr' env (Let (NonRec bndr rhs) body)
@@ -1398,8 +1444,15 @@ scApp env (Var fn, args) -- Function is a variable
fn'@(Lam {}) -> scExpr (zapScSubst env) (doBeta fn' args')
-- Do beta-reduction and try again
- Var fn' -> return (arg_usg `combineUsage` mkVarUsage env fn' args',
+ Var fn' -> return (arg_usg' `combineUsage` mkVarUsage env fn' args',
mkApps (Var fn') args')
+ where
+ -- arg_usg': see Note [Specialising on dictionaries]
+ arg_usg' | Just cls <- isClassOpId_maybe fn'
+ , dict_arg : _ <- dropList (classTyVars cls) args'
+ = setScrutOcc env arg_usg dict_arg evalScrutOcc
+ | otherwise
+ = arg_usg
other_fn' -> return (arg_usg, mkApps other_fn' args') }
-- NB: doing this ignores any usage info from the substituted
@@ -1407,7 +1460,6 @@ scApp env (Var fn, args) -- Function is a variable
-- we can fix it.
where
doBeta :: OutExpr -> [OutExpr] -> OutExpr
- -- ToDo: adjust for System IF
doBeta (Lam bndr body) (arg : args) = Let (NonRec bndr arg) (doBeta body args)
doBeta fn args = mkApps fn args
@@ -2558,13 +2610,15 @@ isValue env (Tick t e)
isValue _env expr -- Maybe it's a constructor application
| (Var fun, args, _) <- collectArgsTicks (not . tickishIsCode) expr
- = case isDataConWorkId_maybe fun of
-
- Just con | args `lengthAtLeast` dataConRepArity con
+ = case idDetails fun of
+ DataConWorkId con | args `lengthAtLeast` dataConRepArity con
-- Check saturated; might be > because the
-- arity excludes type args
-> Just (ConVal (DataAlt con) args)
+ DFunId {} -> Just LambdaVal
+ -- DFunId: see Note [Specialising on dictionaries]
+
_other | valArgCount args < idArity fun
-- Under-applied function
-> Just LambdaVal -- Partial application
diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs
index 6811498c54..8ade2a981a 100644
--- a/compiler/GHC/Core/Utils.hs
+++ b/compiler/GHC/Core/Utils.hs
@@ -1226,7 +1226,6 @@ there is only dictionary selection (no construction) involved
Note [exprIsCheap]
~~~~~~~~~~~~~~~~~~
-
See also Note [Interaction of exprIsWorkFree and lone variables] in GHC.Core.Unfold
@exprIsCheap@ looks at a Core expression and returns \tr{True} if
diff --git a/testsuite/tests/perf/should_run/T15426.hs b/testsuite/tests/perf/should_run/T15426.hs
index c85c7e7ec0..cbc74d5337 100644
--- a/testsuite/tests/perf/should_run/T15426.hs
+++ b/testsuite/tests/perf/should_run/T15426.hs
@@ -11,3 +11,21 @@ main = do evaluate $ L.elemIndex 999999 [(1::Int)..1000000]
evaluate $ L.findIndex (>=999999) [(1::Int)..1000000]
evaluate $ L.findIndices (>=999999) [(1::Int)..1000000]
evaluate $ unsafeFindIndex (>=999999) [(1::Int)..1000000]
+
+{- Note; see !7997.
+
+You would think those [1..100000] sub-expressions would float to the
+top level, be CSE'd, and shared.
+
+But no: until May 22-ish, they are the argument of a strict function
+findIndices; and in HEAD SetLevels goes to some trouble not to float
+strict arguments. So in HEAD, no sharing happens.
+
+I think the reasoning is bogus, so I changed in; see
+"Arguments" in Note [Floating to the top] in SetLevels.
+
+As a result these lists are now floated out and shared.
+
+Just leaving breadcrumbs, in case we later see big perf changes on
+this (slightly fragile) benchmark.
+-} \ No newline at end of file
diff --git a/testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr b/testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr
index a74980ed99..3f01f42d2d 100644
--- a/testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr
+++ b/testsuite/tests/simplCore/should_compile/OpaqueNoRebox3.stderr
@@ -1,7 +1,7 @@
==================== Tidy Core ====================
Result size of Tidy Core
- = {terms: 120, types: 47, coercions: 4, joins: 1/1}
+ = {terms: 122, types: 49, coercions: 4, joins: 1/1}
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
OpaqueNoRebox3.$trModule4 :: GHC.Prim.Addr#
@@ -45,7 +45,7 @@ OpaqueNoRebox3.$trModule
-- RHS size: {terms: 8, types: 3, coercions: 0, joins: 0/0}
f [InlPrag=OPAQUE] :: Int -> Int
[GblId, Arity=1, Str=<1L>, Unf=OtherCon []]
-f = / (x :: Int) ->
+f = \ (x :: Int) ->
case x of { GHC.Types.I# ipv -> GHC.Types.I# (GHC.Prim.+# ipv 1#) }
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
@@ -110,10 +110,15 @@ lvl11 :: GHC.Prim.Addr#
[GblId, Unf=OtherCon []]
lvl11 = "patError"#
--- RHS size: {terms: 4, types: 2, coercions: 4, joins: 0/0}
-lvl12 :: Int
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+lvl12 :: [Char]
+[GblId]
+lvl12 = GHC.CString.unpackCString# lvl11
+
+-- RHS size: {terms: 3, types: 2, coercions: 4, joins: 0/0}
+lvl13 :: Int
[GblId, Str=b, Cpr=b]
-lvl12
+lvl13
= error
@GHC.Types.LiftedRep
@Int
@@ -122,13 +127,13 @@ lvl12
<"callStack">_N <GHC.Stack.Types.CallStack>_N)
:: GHC.Stack.Types.CallStack
~R# (?callStack::GHC.Stack.Types.CallStack)))
- (GHC.CString.unpackCString# lvl11)
+ lvl12
Rec {
-- RHS size: {terms: 50, types: 13, coercions: 0, joins: 1/1}
g [Occ=LoopBreaker] :: Bool -> Bool -> Bool -> Int -> Int
[GblId, Arity=4, Str=<SL><SL><L><1L>, Unf=OtherCon []]
-g = / (w :: Bool) (w1 :: Bool) (w2 :: Bool) (p :: Int) ->
+g = \ (w :: Bool) (w1 :: Bool) (w2 :: Bool) (p :: Int) ->
join {
fail_ [Dmd=M!P(L)] :: Int
[LclId[JoinId(0)(Nothing)]]
@@ -139,7 +144,7 @@ g = / (w :: Bool) (w1 :: Bool) (w2 :: Bool) (p :: Int) ->
False -> g w GHC.Types.True GHC.Types.False p;
True -> f (f p)
};
- True -> lvl12
+ True -> lvl13
} } in
case w of {
False ->
@@ -158,4 +163,7 @@ g = / (w :: Bool) (w1 :: Bool) (w2 :: Bool) (p :: Int) ->
True -> f p
}
}
-end Rec } \ No newline at end of file
+end Rec }
+
+
+
diff --git a/testsuite/tests/simplCore/should_compile/T15631.stdout b/testsuite/tests/simplCore/should_compile/T15631.stdout
index ab181b58ed..6c528debc1 100644
--- a/testsuite/tests/simplCore/should_compile/T15631.stdout
+++ b/testsuite/tests/simplCore/should_compile/T15631.stdout
@@ -1,4 +1,4 @@
- case GHC.List.$wlenAcc
+ case GHC.List.$wlenAcc @a (Foo.f2 @a) 0# of v { __DEFAULT ->
case GHC.List.$wlenAcc @a xs 0# of ww1 { __DEFAULT ->
case GHC.List.reverse1 @a xs (GHC.Types.[] @a) of {
[] -> case Foo.f1 @a of { GHC.Types.I# v1 -> GHC.Prim.+# ww1 v1 };