summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2021-02-11 09:47:42 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-02-14 03:36:20 -0500
commit4dc2002aeb08b8be399f1f535b86a671d18eac04 (patch)
treea19d53361b176e26bc223644d53735d04fbbcd1b
parentbc5cb5f900941085e5e22f3e8cafa4deea3b589c (diff)
downloadhaskell-4dc2002aeb08b8be399f1f535b86a671d18eac04.tar.gz
Fix over-eager inlining in SimpleOpt
In GHC.Core.SimpleOpt, I found that its inlining could duplicate an arbitary redex inside a lambda! Consider (\xyz. x+y). The occurrence-analysis treats the lamdda as a group, and says that both x and y occur once, even though the occur under the lambda-z. See Note [Occurrence analysis for lambda binders] in OccurAnal. When the lambda is under-applied in a call, the Simplifier is careful to zap the occ-info on x,y, because they appear under the \z. (See the call to zapLamBndrs in simplExprF1.) But SimpleOpt missed this test, resulting in #19347. So this patch * commons up the binder-zapping in GHC.Core.Utils.zapLamBndrs. * Calls this new function from GHC.Core.Opt.Simplify * Adds a call to zapLamBndrs to GHC.Core.SimpleOpt.simple_app This change makes test T12990 regress somewhat, but it was always very delicate, so I'm going to put up with that. In this voyage I also discovered a small, rather unrelated infelicity in the Simplifier: * In GHC.Core.Opt.Simplify.simplNonRecX we should apply isStrictId to the OutId not the InId. See Note [Dark corner with levity polymorphism] It may never "bite", because SimpleOpt should have inlined all the levity-polymorphic compulsory inlnings already, but somehow it bit me at one point and it's generally a more solid thing to do. Fixing the main bug increases runtime allocation in test perf/should_run/T12990, for (acceptable) reasons explained in a comement on Metric Increase: T12990
-rw-r--r--compiler/GHC/Core/Opt/OccurAnal.hs26
-rw-r--r--compiler/GHC/Core/Opt/Simplify.hs64
-rw-r--r--compiler/GHC/Core/SimpleOpt.hs17
-rw-r--r--compiler/GHC/Core/Utils.hs33
-rw-r--r--compiler/GHC/Driver/Main.hs2
-rw-r--r--compiler/GHC/Types/Basic.hs7
-rw-r--r--testsuite/tests/perf/should_run/T19347.hs30
-rw-r--r--testsuite/tests/perf/should_run/T19347.stdout1
-rw-r--r--testsuite/tests/perf/should_run/all.T5
9 files changed, 145 insertions, 40 deletions
diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs
index c5c5e4207a..d4d617bf6f 100644
--- a/compiler/GHC/Core/Opt/OccurAnal.hs
+++ b/compiler/GHC/Core/Opt/OccurAnal.hs
@@ -1932,17 +1932,25 @@ occAnal env (Lam x body)
(markAllNonTail body_usage, Lam x body')
}
--- For value lambdas we do a special hack. Consider
--- (\x. \y. ...x...)
--- If we did nothing, x is used inside the \y, so would be marked
--- as dangerous to dup. But in the common case where the abstraction
--- is applied to two arguments this is over-pessimistic.
--- So instead, we just mark each binder with its occurrence
--- info in the *body* of the multiple lambda.
--- Then, the simplifier is careful when partially applying lambdas.
+{- Note [Occurrence analysis for lambda binders]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For value lambdas we do a special hack. Consider
+ (\x. \y. ...x...)
+If we did nothing, x is used inside the \y, so would be marked
+as dangerous to dup. But in the common case where the abstraction
+is applied to two arguments this is over-pessimistic, which delays
+inlining x, which forces more simplifier iterations.
+
+So instead, we just mark each binder with its occurrence info in the
+*body* of the multiple lambda. Then, the simplifier is careful when
+partially applying lambdas. See the calls to zapLamBndrs in
+ GHC.Core.Opt.Simplify.simplExprF1
+ GHC.Core.SimpleOpt.simple_app
+-}
occAnal env expr@(Lam _ _)
- = case occAnalLamOrRhs env bndrs body of { (usage, tagged_bndrs, body') ->
+ = -- See Note [Occurrence analysis for lambda binders]
+ case occAnalLamOrRhs env bndrs body of { (usage, tagged_bndrs, body') ->
let
expr' = mkLams tagged_bndrs body'
usage1 = markAllNonTail usage
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index 9f98615711..da039a8e83 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -388,8 +388,13 @@ simplNonRecX env bndr new_rhs
| otherwise
= do { (env', bndr') <- simplBinder env bndr
- ; completeNonRecX NotTopLevel env' (isStrictId bndr) bndr bndr' new_rhs }
- -- simplNonRecX is only used for NotTopLevel things
+ ; completeNonRecX NotTopLevel env' (isStrictId bndr') bndr bndr' new_rhs }
+ -- NotTopLevel: simplNonRecX is only used for NotTopLevel things
+ --
+ -- isStrictId: use bndr' because in a levity-polymorphic setting
+ -- the InId bndr might have a levity-polymorphic type, which
+ -- which isStrictId doesn't expect
+ -- c.f. Note [Dark corner with levity polymorphism]
--------------------------
completeNonRecX :: TopLevelFlag -> SimplEnv
@@ -1033,18 +1038,11 @@ simplExprF1 env expr@(Lam {}) cont
-- occ-info, UNLESS the remaining binders are one-shot
where
(bndrs, body) = collectBinders expr
- zapped_bndrs | need_to_zap = map zap bndrs
- | otherwise = bndrs
-
- need_to_zap = any zappable_bndr (drop n_args bndrs)
+ zapped_bndrs = zapLamBndrs n_args bndrs
n_args = countArgs cont
-- NB: countArgs counts all the args (incl type args)
-- and likewise drop counts all binders (incl type lambdas)
- zappable_bndr b = isId b && not (isOneShotBndr b)
- zap b | isTyVar b = b
- | otherwise = zapLamIdInfo b
-
simplExprF1 env (Case scrut bndr _ alts) cont
= {-#SCC "simplExprF1-Case" #-}
simplExprF env scrut (Select { sc_dup = NoDup, sc_bndr = bndr
@@ -1574,21 +1572,22 @@ simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont
; -- pprTrace "preInlineUncond" (ppr bndr <+> ppr rhs) $
simplLam env' bndrs body cont }
- -- Deal with strict bindings
- | isStrictId bndr -- Includes coercions, and unlifted types
- , sm_case_case (getMode env)
- = simplExprF (rhs_se `setInScopeFromE` env) rhs
- (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs, sc_body = body
- , sc_env = env, sc_cont = cont, sc_dup = NoDup })
-
- -- Deal with lazy bindings
| otherwise
- = ASSERT( not (isTyVar bndr) )
- do { (env1, bndr1) <- simplNonRecBndr env bndr
- ; (env2, bndr2) <- addBndrRules env1 bndr bndr1 Nothing
+ = do { (env1, bndr1) <- simplNonRecBndr env bndr
+
+ -- Deal with strict bindings
+ -- See Note [Dark corner with levity polymorphism]
+ ; if isStrictId bndr1 && sm_case_case (getMode env)
+ then simplExprF (rhs_se `setInScopeFromE` env) rhs
+ (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs, sc_body = body
+ , sc_env = env, sc_cont = cont, sc_dup = NoDup })
+
+ -- Deal with lazy bindings
+ else do
+ { (env2, bndr2) <- addBndrRules env1 bndr bndr1 Nothing
; (floats1, env3) <- simplLazyBind env2 NotTopLevel NonRecursive bndr bndr2 rhs rhs_se
; (floats2, expr') <- simplLam env3 bndrs body cont
- ; return (floats1 `addFloats` floats2, expr') }
+ ; return (floats1 `addFloats` floats2, expr') } }
------------------
simplRecE :: SimplEnv
@@ -1609,7 +1608,26 @@ simplRecE env pairs body cont
; (floats2, expr') <- simplExprF env2 body cont
; return (floats1 `addFloats` floats2, expr') }
-{- Note [Avoiding exponential behaviour]
+{- Note [Dark corner with levity polymorphism]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In `simplNonRecE`, the call to `isStrictId` will fail if the binder
+has a levity-polymorphic type, of kind (TYPE r). So we are careful to
+call `isStrictId` on the OutId, not the InId, in case we have
+ ((\(r::RuntimeRep) \(x::Type r). blah) Lifted arg)
+That will lead to `simplNonRecE env (x::Type r) arg`, and we can't tell
+if x is lifted or unlifted from that.
+
+We only get such redexes from the compulsory inlining of a wired-in,
+levity-polymorphic function like `rightSection` (see
+GHC.Types.Id.Make). Mind you, SimpleOpt should probably have inlined
+such compulsory inlinings already, but belt and braces does no harm.
+
+Plus, it turns out that GHC.Driver.Main.hscCompileCoreExpr calls the
+Simplifier without first calling SimpleOpt, so anything involving
+GHCi or TH and operator sections will fall over if we don't take
+care here.
+
+Note [Avoiding exponential behaviour]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
One way in which we can get exponential behaviour is if we simplify a
big expression, and the re-simplify it -- and then this happens in a
diff --git a/compiler/GHC/Core/SimpleOpt.hs b/compiler/GHC/Core/SimpleOpt.hs
index bf9602bdaf..81bbc9247e 100644
--- a/compiler/GHC/Core/SimpleOpt.hs
+++ b/compiler/GHC/Core/SimpleOpt.hs
@@ -333,10 +333,21 @@ simple_app env (Var v) as
simple_app env (App e1 e2) as
= simple_app env e1 ((env, e2) : as)
-simple_app env (Lam b e) (a:as)
- = wrapLet mb_pr (simple_app env' e as)
+simple_app env e@(Lam {}) as@(_:_)
+ | (bndrs, body) <- collectBinders e
+ , let zapped_bndrs = zapLamBndrs (length as) bndrs
+ -- Be careful to zap the lambda binders if necessary
+ -- c.f. the Lam caes of simplExprF1 in GHC.Core.Opt.Simplify
+ -- Lacking this zap caused #19347, when we had a redex
+ -- (\ a b. K a b) e1 e2
+ -- where (as it happens) the eta-expanded K is produced by
+ -- Note [Linear fields generalization] in GHC.Tc.Gen.Head
+ = do_beta env zapped_bndrs body as
where
- (env', mb_pr) = simple_bind_pair env b Nothing a NotTopLevel
+ do_beta env (b:bs) body (a:as)
+ | (env', mb_pr) <- simple_bind_pair env b Nothing a NotTopLevel
+ = wrapLet mb_pr $ do_beta env' bs body as
+ do_beta env bs body as = simple_app env (mkLams bs body) as
simple_app env (Tick t e) as
-- Okay to do "(Tick t e) x ==> Tick t (e x)"?
diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs
index 1e34a5fd62..b87ab11453 100644
--- a/compiler/GHC/Core/Utils.hs
+++ b/compiler/GHC/Core/Utils.hs
@@ -40,8 +40,8 @@ module GHC.Core.Utils (
cheapEqExpr, cheapEqExpr', eqExpr,
diffExpr, diffBinds,
- -- * Eta reduction
- tryEtaReduce,
+ -- * Lambdas and eta reduction
+ tryEtaReduce, zapLamBndrs,
-- * Manipulating data constructors and types
exprToType, exprToCoercion_maybe,
@@ -99,7 +99,7 @@ import GHC.Utils.Panic
import GHC.Data.FastString
import GHC.Data.Maybe
import GHC.Data.List.SetOps( minusList )
-import GHC.Types.Basic ( Arity )
+import GHC.Types.Basic ( Arity, FullArgCount )
import GHC.Utils.Misc
import GHC.Data.Pair
import Data.ByteString ( ByteString )
@@ -2523,9 +2523,34 @@ to the rule that
we can eta-reduce \x. f x ===> f
This turned up in #7542.
+-}
+{- *********************************************************************
+* *
+ Zapping lambda binders
+* *
+********************************************************************* -}
-************************************************************************
+zapLamBndrs :: FullArgCount -> [Var] -> [Var]
+-- If (\xyz. t) appears under-applied to only two arguments,
+-- we must zap the occ-info on x,y, because they appear under the \x
+-- See Note [Occurrence analysis for lambda binders] in GHc.Core.Opt.OccurAnal
+--
+-- NB: both `arg_count` and `bndrs` include both type and value args/bndrs
+zapLamBndrs arg_count bndrs
+ | no_need_to_zap = bndrs
+ | otherwise = zap_em arg_count bndrs
+ where
+ no_need_to_zap = all isOneShotBndr (drop arg_count bndrs)
+
+ zap_em :: FullArgCount -> [Var] -> [Var]
+ zap_em 0 bs = bs
+ zap_em _ [] = []
+ zap_em n (b:bs) | isTyVar b = b : zap_em (n-1) bs
+ | otherwise = zapLamIdInfo b : zap_em (n-1) bs
+
+
+{- *********************************************************************
* *
\subsection{Determining non-updatable right-hand-sides}
* *
diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs
index bbf7a3336c..ab877f6f48 100644
--- a/compiler/GHC/Driver/Main.hs
+++ b/compiler/GHC/Driver/Main.hs
@@ -2029,6 +2029,8 @@ hscCompileCoreExpr hsc_env =
hscCompileCoreExpr' :: HscEnv -> SrcSpan -> CoreExpr -> IO ForeignHValue
hscCompileCoreExpr' hsc_env srcspan ds_expr
= do { {- Simplify it -}
+ -- Question: should we call SimpleOpt.simpleOptExpr here instead?
+ -- It is, well, simpler, and does less inlining etc.
simpl_expr <- simplifyExpr hsc_env ds_expr
{- Tidy it (temporary, until coreSat does cloning) -}
diff --git a/compiler/GHC/Types/Basic.hs b/compiler/GHC/Types/Basic.hs
index 4c92b8f9a3..f89185ee24 100644
--- a/compiler/GHC/Types/Basic.hs
+++ b/compiler/GHC/Types/Basic.hs
@@ -26,7 +26,7 @@ module GHC.Types.Basic (
ConTag, ConTagZ, fIRST_TAG,
- Arity, RepArity, JoinArity,
+ Arity, RepArity, JoinArity, FullArgCount,
Alignment, mkAlignment, alignmentOf, alignmentBytes,
@@ -172,6 +172,11 @@ type RepArity = Int
-- are counted.
type JoinArity = Int
+-- | FullArgCount is the number of type or value arguments in an application,
+-- or the number of type or value binders in a lambda. Note: it includes
+-- both type and value arguments!
+type FullArgCount = Int
+
{-
************************************************************************
* *
diff --git a/testsuite/tests/perf/should_run/T19347.hs b/testsuite/tests/perf/should_run/T19347.hs
new file mode 100644
index 0000000000..c885eac724
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T19347.hs
@@ -0,0 +1,30 @@
+{-# LANGUAGE UnboxedTuples #-}
+
+module Main where
+
+data T = MkT !Int Int
+
+-- An expensive recursive function
+g :: Int -> Int -> (# Int, Int #)
+g x 0 = (# x, 33 #)
+g x n = g (x+n) (n-1)
+
+-- 'foo' calls 'h' often
+foo h 0 = 0
+foo h n = h n `seq` foo h (n-1)
+
+main = print (foo (MkT (case g 1 200 of (# a,b #) -> a))
+ 200)
+
+{- In main, we don't want to eta-expand the MkT to
+ (\x. MkT (case g 1 200 of (# a,b #) -> a) x)
+because then that call to g may be made more often
+The faffing with unboxed tuples is to defeat full
+laziness which would otherwise lift the call to g
+out to top level
+
+Before fixing #19347, running this program gave
+ 2,012,096 bytes allocated in the heap
+after it gave
+ 101,712 bytes allocated in the heap
+-}
diff --git a/testsuite/tests/perf/should_run/T19347.stdout b/testsuite/tests/perf/should_run/T19347.stdout
new file mode 100644
index 0000000000..573541ac97
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T19347.stdout
@@ -0,0 +1 @@
+0
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 75044776ca..0cb7c7a73a 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -385,3 +385,8 @@ test('T18574',
compile_and_run,
['-O'])
+test('T19347',
+ [collect_stats('bytes allocated', 5), only_ways(['normal'])],
+ compile_and_run,
+ ['-O'])
+