diff options
author | Simon Peyton Jones <simonpj@microsoft.com> | 2022-06-22 10:29:03 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-08-27 00:29:02 -0400 |
commit | 95fe09da09b386008fd730abc5374f3521dd339b (patch) | |
tree | cc149dc6b1614983eb63d6a0e4ec494964fcc13b | |
parent | 1007829bfb18708dda77b4eb6106fce9cb05f908 (diff) | |
download | haskell-95fe09da09b386008fd730abc5374f3521dd339b.tar.gz |
Improve SpecConstr for evals
As #21763 showed, we were over-specialising in some cases, when
the function involved was doing a simple 'eval', but not taking
the value apart, or branching on it.
This MR fixes the problem. See Note [Do not specialise evals].
Nofib barely budges, except that spectral/cichelli allocates about
3% less.
Compiler bytes-allocated improves a bit
geo. mean -0.1%
minimum -0.5%
maximum +0.0%
The -0.5% is on T11303b, for what it's worth.
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Utils.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/SpecConstr.hs | 86 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T21763.hs | 24 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T21763.stderr | 5 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T21763a.hs | 12 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T21763a.stderr | 5 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/all.T | 2 |
7 files changed, 121 insertions, 15 deletions
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs index b44b195e84..a6deec63cf 100644 --- a/compiler/GHC/Core/Opt/Simplify/Utils.hs +++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs @@ -2257,7 +2257,7 @@ prepareAlts tries these things: case e of x { (a,b) -> rhs } where the type is a single constructor type. This gives better code when rhs also scrutinises x or e. - See CoreUtils Note [Refine DEFAULT case alternatives] + See GHC.Core.Utils Note [Refine DEFAULT case alternatives] 3. combineIdenticalAlts: combine identical alternatives into a DEFAULT. See CoreUtils Note [Combine identical alternatives], which also diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs index c7ace3fe0c..f4fe044b95 100644 --- a/compiler/GHC/Core/Opt/SpecConstr.hs +++ b/compiler/GHC/Core/Opt/SpecConstr.hs @@ -671,14 +671,16 @@ But regardless, SpecConstr can and should! It's easy: well as constructor applications. Wrinkles: + * This should all work perfectly fine for newtype classes. Mind you, currently newtype classes are inlined fairly agressively, but we may change that. And it would take extra code to exclude them, as well as being unnecessary. -* We (mis-) use LambdaVal for this purpose, because ConVal - requires us to list the data constructor and fields, and that - is (a) inconvenient and (b) unnecessary for class methods. +* In isValue, we (mis-) use LambdaVal for this ($fblah d1 .. dn) + because ConVal requires us to list the data constructor and + fields, and that is (a) inconvenient and (b) unnecessary for + class methods. ----------------------------------------------------- Stuff not yet handled @@ -1227,7 +1229,20 @@ data ArgOcc = NoOcc -- Doesn't occur at all; or a type argument | UnkOcc -- Used in some unknown way | ScrutOcc -- See Note [ScrutOcc] - (DataConEnv [ArgOcc]) -- How the sub-components are used + (DataConEnv [ArgOcc]) + -- [ArgOcc]: how the sub-components are used + +deadArgOcc :: ArgOcc -> Bool +deadArgOcc (ScrutOcc {}) = False +deadArgOcc UnkOcc = False +deadArgOcc NoOcc = True + +specialisableArgOcc :: ArgOcc -> Bool +-- | Does this occurence represent one worth specializing for. +specialisableArgOcc UnkOcc = False +specialisableArgOcc NoOcc = False +specialisableArgOcc (ScrutOcc {}) = True + {- Note [ScrutOcc] ~~~~~~~~~~~~~~~~~~ @@ -1253,6 +1268,9 @@ instance Outputable ArgOcc where ppr NoOcc = text "no-occ" evalScrutOcc :: ArgOcc +-- We use evalScrutOcc for +-- - mkVarUsage: applied functions +-- - scApp: dicts that are the arugment of a classop evalScrutOcc = ScrutOcc emptyUFM -- Experimentally, this version of combineOcc makes ScrutOcc "win", so @@ -1333,26 +1351,29 @@ scExpr' env (Case scrut b ty alts) = do { let (alt_env,b') = extendBndrWith RecArg env b -- Record RecArg for the components - ; (alt_usgs, alt_occs, alts') - <- mapAndUnzip3M (sc_alt alt_env scrut' b') alts + ; (alt_usgs, alt_occs, alts') <- mapAndUnzip3M (sc_alt alt_env scrut' b') alts ; let scrut_occ = foldr combineOcc NoOcc alt_occs scrut_usg' = setScrutOcc env scrut_usg scrut' scrut_occ -- The combined usage of the scrutinee is given - -- by scrut_occ, which is passed to scScrut, which + -- by scrut_occ, which is passed to setScrutOcc, which -- in turn treats a bare-variable scrutinee specially ; return (foldr combineUsage scrut_usg' alt_usgs, Case scrut' b' (scSubstTy env ty) alts') } + single_alt = isSingleton alts + sc_alt env scrut' b' (Alt con bs rhs) = do { let (env1, bs1) = extendBndrsWith RecArg env bs (env2, bs2) = extendCaseBndrs env1 scrut' b' con bs1 ; (usg, rhs') <- scExpr env2 rhs ; let (usg', b_occ:arg_occs) = lookupOccs usg (b':bs2) scrut_occ = case con of - DataAlt dc -> ScrutOcc (unitUFM dc arg_occs) - _ -> evalScrutOcc + DataAlt dc -- See Note [Do not specialise evals] + | not (single_alt && all deadArgOcc arg_occs) + -> ScrutOcc (unitUFM dc arg_occs) + _ -> UnkOcc ; return (usg', b_occ `combineOcc` scrut_occ, Alt con bs2 rhs') } scExpr' env (Let (NonRec bndr rhs) body) @@ -1429,6 +1450,46 @@ recursive function, but that's not essential and might even be harmful. I'm not sure. -} +{- Note [Do not specialise evals] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f x y = case x of I# _ -> + if y>1 then f x (y-1) else x + +Here `x` is scrutinised by a case, but only in an eval-like way; the +/component/ of the I# is unused. We don't want to specialise this +function, even if we find a call (f (I# z)), because nothing is gained + * No case branches are discarded + * No allocation in removed +The specialised version would take an unboxed Int#, pass it along, +and rebox it at the end. + +In fact this can cause significant regression. In #21763 we had: +like + f = ... case x of x' { I# n -> + join j y = rhs + in ...jump j x'... + +Now if we specialise `j` for the argument `I# n`, we'll end up reboxing +it in `j`, without even removing an allocation from the call site. + +Reboxing is always a worry. But here we can ameliorate the problem as +follows. + +* In scExpr (Case ...), for a /single-alternative/ case expression, in + which the pattern binders are all unused, we build a UnkOcc for + the scrutinee, not one that maps the data constructor; we don't treat + this occurrence as a reason for specialisation. + +* Conveniently, SpecConstr is doing its own occurrence analysis, so + the "unused" bit is just looking for NoOcc + +* Note that if we have + f x = case x of { True -> e1; False -> e2 } + then even though the pattern binders are unused (there are none), it is + still worth specialising on x. Hence the /single-alternative/ guard. +-} + scApp :: ScEnv -> (InExpr, [InExpr]) -> UniqSM (ScUsage, CoreExpr) scApp env (Var fn, args) -- Function is a variable @@ -1478,7 +1539,6 @@ mkVarUsage env fn args , scu_occs = unitVarEnv fn arg_occ } Nothing -> nullUsage where - -- I rather think we could use UnkOcc all the time arg_occ | null args = UnkOcc | otherwise = evalScrutOcc @@ -2558,10 +2618,8 @@ argToPat1 env in_scope val_env arg arg_occ _arg_str -- (b) we know what its value is -- In that case it counts as "interesting" argToPat1 env in_scope val_env (Var v) arg_occ arg_str - | sc_force env || case arg_occ of { ScrutOcc {} -> True - ; UnkOcc -> False - ; NoOcc -> False } -- (a) - , is_value -- (b) + | sc_force env || specialisableArgOcc arg_occ -- (a) + , is_value -- (b) -- Ignoring sc_keen here to avoid gratuitously incurring Note [Reboxing] -- So sc_keen focused just on f (I# x), where we have freshly-allocated -- box that we can eliminate in the caller diff --git a/testsuite/tests/simplCore/should_compile/T21763.hs b/testsuite/tests/simplCore/should_compile/T21763.hs new file mode 100644 index 0000000000..229614ad7b --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21763.hs @@ -0,0 +1,24 @@ +{-# LANGUAGE MagicHash #-} +module T21763 where + +import GHC.Exts + +-- We should get ONE SpecConstr-generated rule, for f2, +-- not one for f1 and one for f2 + +f1 :: Int -> [Int] -> (Int, [Int]) +-- This one only seq's x, so SpecConstr should not specialise it +f1 x [] = (x, x `seq` []) +f1 x (_:ys) = f1 x ys + + +f2 :: Int -> [Int] -> (Int, [Int]) +-- This one takes x apart, so SpecConstr should specialise it +f2 x [] = (x+1, x `seq` []) +f2 x (_:ys) = f2 x ys + +foo1 :: [Int] -> (Int, [Int]) +foo1 ys = f1 9 ys + +foo2 :: [Int] -> (Int, [Int]) +foo2 ys = f2 9 ys diff --git a/testsuite/tests/simplCore/should_compile/T21763.stderr b/testsuite/tests/simplCore/should_compile/T21763.stderr new file mode 100644 index 0000000000..1f473da726 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21763.stderr @@ -0,0 +1,5 @@ + +==================== Tidy Core rules ==================== +"SC:$wf20" [2] forall (sc :: Int#). $wf2 (I# sc) = f2_$s$wf2 sc + + diff --git a/testsuite/tests/simplCore/should_compile/T21763a.hs b/testsuite/tests/simplCore/should_compile/T21763a.hs new file mode 100644 index 0000000000..21ce638ed0 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21763a.hs @@ -0,0 +1,12 @@ +module T21763a where + +{-# NOINLINE g_imp #-} +g_imp !x = not x + +f3 :: (Bool -> Bool) -> Bool -> [Bool] -> (Bool, [Bool]) +-- We want to specialize for `g` to turn it into a known call. +f3 g x [] = (g x, []) +f3 g x (_:ys) = f3 g x ys + +foo3 :: [Bool] -> (Bool, [Bool]) +foo3 ys = f3 g_imp True ys diff --git a/testsuite/tests/simplCore/should_compile/T21763a.stderr b/testsuite/tests/simplCore/should_compile/T21763a.stderr new file mode 100644 index 0000000000..eb75c07f82 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T21763a.stderr @@ -0,0 +1,5 @@ + +==================== Tidy Core rules ==================== +"SC:$wf30" [2] forall. $wf3 g_imp = f3_$s$wf3 + + diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 012150b21e..f37e56373c 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -424,3 +424,5 @@ test('T21848', [grep_errmsg(r'SPEC wombat') ], compile, ['-O -ddump-spec']) test('T21694b', [grep_errmsg(r'Arity=4') ], compile, ['-O -ddump-simpl']) test('T21960', [grep_errmsg(r'^ Arity=5') ], compile, ['-O2 -ddump-simpl']) test('T21948', [grep_errmsg(r'^ Arity=5') ], compile, ['-O -ddump-simpl']) +test('T21763', only_ways(['optasm']), compile, ['-O2 -ddump-rules']) +test('T21763a', only_ways(['optasm']), compile, ['-O2 -ddump-rules']) |