summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Graf <sebastian.graf@kit.edu>2023-03-07 13:23:19 +0100
committerSebastian Graf <sebastian.graf@kit.edu>2023-05-17 17:17:25 +0200
commit5c8858846fb0d67a550f5943bf8bfced19b5f46b (patch)
tree5a1022059f8d7b96e55e1c3aa4b15e5308312acd
parentfaafe2a0ec4dcca1643c5bf3d52718eefc93d039 (diff)
downloadhaskell-wip/T23083.tar.gz
CorePrep: Eta expand arguments (#23083)wip/T23083
Previously, we'd only eta expand let bindings and lambdas, now we'll also eta expand arguments such as in T23083: ```hs g f h = f (h `seq` (h $)) ``` Unless `-fpedantic-bottoms` is set, we'll now transform to ```hs g f h = f (\eta -> h eta) ``` in CorePrep. See the new `Note [Eta expansion of arguments in CorePrep]` for the details. We only do this optimisation with -O2 because we saw 2-3% ghc/alloc regressions in T4801 and T5321FD. Fixes #23083.
-rw-r--r--compiler/GHC/CoreToStg/Prep.hs71
-rw-r--r--compiler/GHC/Driver/Config/CoreToStg/Prep.hs9
-rw-r--r--compiler/GHC/Driver/DynFlags.hs3
-rw-r--r--compiler/GHC/Driver/Flags.hs1
-rw-r--r--compiler/GHC/Driver/Session.hs3
-rw-r--r--docs/users_guide/using-optimisation.rst11
-rw-r--r--testsuite/tests/simplCore/should_compile/T23083.hs10
-rw-r--r--testsuite/tests/simplCore/should_compile/T23083.stderr47
-rw-r--r--testsuite/tests/simplCore/should_compile/all.T1
9 files changed, 150 insertions, 6 deletions
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index 0a2203d460..b6228a3f82 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -1471,11 +1471,36 @@ cpeArg env dmd arg
; if exprIsTrivial arg2
then return (floats2, arg2)
else do { v <- newVar arg_ty
- ; let arg3 = cpeEtaExpand (exprArity arg2) arg2
+ -- See Note [Eta expansion of arguments in CorePrep]
+ ; let arg3 = cpeEtaExpandArg env arg2
arg_float = mkFloat env dmd is_unlifted v arg3
; return (addFloat floats2 arg_float, varToCoreExpr v) }
}
+cpeEtaExpandArg :: CorePrepEnv -> CoreArg -> CoreArg
+-- ^ See Note [Eta expansion of arguments in CorePrep]
+cpeEtaExpandArg env arg = cpeEtaExpand arity arg
+ where
+ arity | Just ao <- cp_arityOpts (cpe_config env) -- Just <=> -O1 or -O2
+ , not (is_join_head arg)
+ -- See Note [Eta expansion for join points]
+ -- Eta expanding the join point would introduce crap that we can't
+ -- generate code for
+ = case exprEtaExpandArity ao arg of
+ Nothing -> 0
+ Just at -> arityTypeArity at
+ | otherwise
+ = exprArity arg -- this is cheap enough for -O0
+
+is_join_head :: CoreExpr -> Bool
+-- ^ Identify the cases where our mishandling described in
+-- Note [Eta expansion for join points] would generate crap
+is_join_head (Let bs e) = isJoinBind bs || is_join_head e
+is_join_head (Cast e _) = is_join_head e
+is_join_head (Tick _ e) = is_join_head e
+is_join_head (Case _ _ _ alts) = any is_join_head (rhssOfAlts alts)
+is_join_head _ = False
+
{-
Note [Floating unlifted arguments]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1593,6 +1618,44 @@ and now we do NOT want eta expansion to give
Instead GHC.Core.Opt.Arity.etaExpand gives
f = /\a -> \y -> let s = h 3 in g s y
+Note [Eta expansion of arguments in CorePrep]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose `g = \x y. blah` and consider the expression `f (g x)`; we ANFise to
+
+ let t = g x
+ in f t
+
+We really don't want that `t` to be a thunk! That just wastes runtime, updating
+a thunk with a PAP etc. The code generator could in principle allocate a PAP,
+but in fact it does not know how to do that -- it's easier just to eta-expand:
+
+ let t = \y. g x y
+ in f t
+
+To what arity should we eta-expand the argument? `cpeArg` uses two strategies,
+governed by the presence of `-fdo-clever-arg-eta-expansion` (implied by -O):
+
+ 1. Cheap, with -O0: just use `exprArity`.
+ 2. More clever but expensive, with -O1 -O2: use `exprEtaExpandArity`,
+ same function the Simplifier uses to eta expand RHSs and lambda bodies.
+
+The only reason for using (1) rather than (2) is to keep compile times down.
+Using (2) in -O0 bumped up compiler allocations by 2-3% in tests T4801 and
+T5321*. However, Plan (2) catches cases that (1) misses.
+For example (#23083, assuming -fno-pedantic-bottoms):
+
+ let t = case z of __DEFAULT -> g x
+ in f t
+
+to
+
+ let t = \y -> case z of __DEFAULT -> g x y
+ in f t
+
+Note that there is a missed opportunity in eta expanding `t` earlier, in the
+Simplifier: It would allow us to inline `g`, potentially enabling further
+simplification. But then we could have inlined `g` into the PAP to begin with,
+and that is discussed in #23150; hence we needn't worry about that in CorePrep.
-}
cpeEtaExpand :: Arity -> CpeRhs -> CpeRhs
@@ -1982,6 +2045,11 @@ data CorePrepConfig = CorePrepConfig
, cp_convertNumLit :: !(LitNumType -> Integer -> Maybe CoreExpr)
-- ^ Convert some numeric literals (Integer, Natural) into their final
-- Core form.
+
+ , cp_arityOpts :: !(Maybe ArityOpts)
+ -- ^ Configuration for arity analysis ('exprEtaExpandArity').
+ -- See Note [Eta expansion of arguments in CorePrep]
+ -- When 'Nothing' (e.g., -O0, -O1), use the cheaper 'exprArity' instead
}
data CorePrepEnv
@@ -1992,6 +2060,7 @@ data CorePrepEnv
-- enabled we instead produce an 'error' expression to catch
-- the case where a function we think should bottom
-- unexpectedly returns.
+
, cpe_env :: IdEnv CoreExpr -- Clone local Ids
-- ^ This environment is used for three operations:
--
diff --git a/compiler/GHC/Driver/Config/CoreToStg/Prep.hs b/compiler/GHC/Driver/Config/CoreToStg/Prep.hs
index 9f2a757457..16a8026931 100644
--- a/compiler/GHC/Driver/Config/CoreToStg/Prep.hs
+++ b/compiler/GHC/Driver/Config/CoreToStg/Prep.hs
@@ -9,6 +9,7 @@ import GHC.Core.Opt.Pipeline.Types ( CoreToDo(..) )
import GHC.Driver.Env
import GHC.Driver.Session
import GHC.Driver.Config.Core.Lint
+import GHC.Driver.Config.Core.Opt.Arity
import GHC.Tc.Utils.Env
import GHC.Types.Var
import GHC.Utils.Outputable ( alwaysQualify )
@@ -17,14 +18,18 @@ import GHC.CoreToStg.Prep
initCorePrepConfig :: HscEnv -> IO CorePrepConfig
initCorePrepConfig hsc_env = do
+ let dflags = hsc_dflags hsc_env
convertNumLit <- do
- let platform = targetPlatform $ hsc_dflags hsc_env
+ let platform = targetPlatform dflags
home_unit = hsc_home_unit hsc_env
lookup_global = lookupGlobal hsc_env
mkConvertNumLiteral platform home_unit lookup_global
return $ CorePrepConfig
- { cp_catchNonexhaustiveCases = gopt Opt_CatchNonexhaustiveCases $ hsc_dflags hsc_env
+ { cp_catchNonexhaustiveCases = gopt Opt_CatchNonexhaustiveCases dflags
, cp_convertNumLit = convertNumLit
+ , cp_arityOpts = if gopt Opt_DoCleverArgEtaExpansion dflags
+ then Just (initArityOpts dflags)
+ else Nothing
}
initCorePrepPgmConfig :: DynFlags -> [Var] -> CorePrepPgmConfig
diff --git a/compiler/GHC/Driver/DynFlags.hs b/compiler/GHC/Driver/DynFlags.hs
index 01aa518452..05f70628f9 100644
--- a/compiler/GHC/Driver/DynFlags.hs
+++ b/compiler/GHC/Driver/DynFlags.hs
@@ -1242,7 +1242,8 @@ optLevelFlags :: [([Int], GeneralFlag)]
-- Default settings of flags, before any command-line overrides
optLevelFlags -- see Note [Documenting optimisation flags]
= [ ([0,1,2], Opt_DoLambdaEtaExpansion)
- , ([0,1,2], Opt_DoEtaReduction) -- See Note [Eta-reduction in -O0]
+ , ([1,2], Opt_DoCleverArgEtaExpansion) -- See Note [Eta expansion of arguments in CorePrep]
+ , ([0,1,2], Opt_DoEtaReduction) -- See Note [Eta-reduction in -O0]
, ([0,1,2], Opt_LlvmTBAA)
, ([0,1,2], Opt_ProfManualCcs )
, ([2], Opt_DictsStrict)
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index 759c137eb5..8930a26127 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -262,6 +262,7 @@ data GeneralFlag
| Opt_SpecConstr
| Opt_SpecConstrKeen
| Opt_DoLambdaEtaExpansion
+ | Opt_DoCleverArgEtaExpansion -- See Note [Eta expansion of arguments in CorePrep]
| Opt_IgnoreAsserts
| Opt_DoEtaReduction
| Opt_CaseMerge
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index dd5bb6b7cb..03d5d3105b 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -2328,6 +2328,7 @@ fFlagsDeps = [
Opt_DmdTxDictSel "effect is now unconditionally enabled",
flagSpec "do-eta-reduction" Opt_DoEtaReduction,
flagSpec "do-lambda-eta-expansion" Opt_DoLambdaEtaExpansion,
+ flagSpec "do-clever-arg-eta-expansion" Opt_DoCleverArgEtaExpansion, -- See Note [Eta expansion of arguments in CorePrep]
flagSpec "eager-blackholing" Opt_EagerBlackHoling,
flagSpec "embed-manifest" Opt_EmbedManifest,
flagSpec "enable-rewrite-rules" Opt_EnableRewriteRules,
@@ -2815,8 +2816,6 @@ impliedXFlags
, (LangExt.UnliftedDatatypes, turnOn, LangExt.StandaloneKindSignatures)
]
-
-
-- | Things you get with `-dlint`.
enableDLint :: DynP ()
enableDLint = do
diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst
index c75d5e6097..d587c847d9 100644
--- a/docs/users_guide/using-optimisation.rst
+++ b/docs/users_guide/using-optimisation.rst
@@ -467,6 +467,17 @@ as such you shouldn't need to set any of them explicitly. A flag
Eta-expand let-bindings to increase their arity.
+.. ghc-flag:: -fdo-clever-arg-eta-expansion
+ :shortdesc: Enable sophisticated argument eta-expansion. Implied by :ghc-flag:`-O2`.
+ :type: dynamic
+ :reverse: -fno-do-clever-arg-eta-expansion
+ :category:
+
+ :default: off
+
+ Eta-expand arguments to increase their arity to avoid allocating unnecessary
+ thunks for them.
+
.. ghc-flag:: -feager-blackholing
:shortdesc: Turn on :ref:`eager blackholing <parallel-compile-options>`
:type: dynamic
diff --git a/testsuite/tests/simplCore/should_compile/T23083.hs b/testsuite/tests/simplCore/should_compile/T23083.hs
new file mode 100644
index 0000000000..1f181217a4
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T23083.hs
@@ -0,0 +1,10 @@
+module T23083 where
+
+-- Just ($), but NOINLINE so that we don't inline it eagerly, subverting the
+-- test case
+($$) :: (a -> b) -> a -> b
+($$) f x = f x
+{-# NOINLINE ($$) #-}
+
+g :: ((Integer -> Integer) -> Integer) -> (Integer -> Integer) -> Integer
+g f h = f (h `seq` (h $$))
diff --git a/testsuite/tests/simplCore/should_compile/T23083.stderr b/testsuite/tests/simplCore/should_compile/T23083.stderr
new file mode 100644
index 0000000000..61151890e5
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T23083.stderr
@@ -0,0 +1,47 @@
+
+==================== CorePrep ====================
+Result size of CorePrep = {terms: 34, types: 34, coercions: 0, joins: 0/1}
+
+-- RHS size: {terms: 6, types: 5, coercions: 0, joins: 0/0}
+(T23083.$$) [InlPrag=NOINLINE] :: forall a b. (a -> b) -> a -> b
+[GblId, Arity=2, Str=<1C(1,L)><L>, Unf=OtherCon []]
+(T23083.$$) = \ (@a) (@b) (f [Occ=Once1!] :: a -> b) (x [Occ=Once1] :: a) -> f x
+
+-- RHS size: {terms: 12, types: 12, coercions: 0, joins: 0/1}
+T23083.g :: ((GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer) -> GHC.Num.Integer.Integer) -> (GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer) -> GHC.Num.Integer.Integer
+[GblId, Arity=2, Str=<1C(1,L)><ML>, Unf=OtherCon []]
+T23083.g
+ = \ (f [Occ=Once1!] :: (GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer) -> GHC.Num.Integer.Integer) (h [Occ=OnceL1] :: GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer) ->
+ let {
+ sat [Occ=Once1] :: GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer
+ [LclId]
+ sat = \ (eta [Occ=Once1] :: GHC.Num.Integer.Integer) -> case h of h1 [Occ=Once1] { __DEFAULT -> T23083.$$ @GHC.Num.Integer.Integer @GHC.Num.Integer.Integer h1 eta } } in
+ f sat
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+T23083.$trModule4 :: GHC.Prim.Addr#
+[GblId, Unf=OtherCon []]
+T23083.$trModule4 = "main"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+T23083.$trModule3 :: GHC.Types.TrName
+[GblId, Unf=OtherCon []]
+T23083.$trModule3 = GHC.Types.TrNameS T23083.$trModule4
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+T23083.$trModule2 :: GHC.Prim.Addr#
+[GblId, Unf=OtherCon []]
+T23083.$trModule2 = "T23083"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+T23083.$trModule1 :: GHC.Types.TrName
+[GblId, Unf=OtherCon []]
+T23083.$trModule1 = GHC.Types.TrNameS T23083.$trModule2
+
+-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
+T23083.$trModule :: GHC.Types.Module
+[GblId, Unf=OtherCon []]
+T23083.$trModule = GHC.Types.Module T23083.$trModule3 T23083.$trModule1
+
+
+
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index b1ec7473ff..bd6a862ba9 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -477,6 +477,7 @@ test('T23012', normal, compile, ['-O'])
test('RewriteHigherOrderPatterns', normal, compile, ['-O -ddump-rule-rewrites -dsuppress-all -dsuppress-uniques'])
test('T23024', normal, multimod_compile, ['T23024', '-O -v0'])
test('T23026', normal, compile, ['-O'])
+test('T23083', [ grep_errmsg(r'eta.+::.+Integer') ], compile, ['-O -ddump-prep -dsuppress-uniques -dppr-cols=99999'])
test('T23267', [expect_broken(23267), only_ways(['ghci']), extra_hc_opts('-fno-unoptimized-core-for-interpreter -fspec-constr')], ghci_script, ['T23267.script'])
test('T23362', normal, compile, ['-O'])
test('T23307', normal, compile, ['-O -ddump-simpl -dno-typeable-binds -dsuppress-uniques'])