diff options
-rw-r--r-- | compiler/GHC/CoreToStg/Prep.hs | 71 | ||||
-rw-r--r-- | compiler/GHC/Driver/Config/CoreToStg/Prep.hs | 9 | ||||
-rw-r--r-- | compiler/GHC/Driver/DynFlags.hs | 3 | ||||
-rw-r--r-- | compiler/GHC/Driver/Flags.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Driver/Session.hs | 3 | ||||
-rw-r--r-- | docs/users_guide/using-optimisation.rst | 11 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T23083.hs | 10 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T23083.stderr | 47 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/all.T | 1 |
9 files changed, 150 insertions, 6 deletions
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs index 0a2203d460..b6228a3f82 100644 --- a/compiler/GHC/CoreToStg/Prep.hs +++ b/compiler/GHC/CoreToStg/Prep.hs @@ -1471,11 +1471,36 @@ cpeArg env dmd arg ; if exprIsTrivial arg2 then return (floats2, arg2) else do { v <- newVar arg_ty - ; let arg3 = cpeEtaExpand (exprArity arg2) arg2 + -- See Note [Eta expansion of arguments in CorePrep] + ; let arg3 = cpeEtaExpandArg env arg2 arg_float = mkFloat env dmd is_unlifted v arg3 ; return (addFloat floats2 arg_float, varToCoreExpr v) } } +cpeEtaExpandArg :: CorePrepEnv -> CoreArg -> CoreArg +-- ^ See Note [Eta expansion of arguments in CorePrep] +cpeEtaExpandArg env arg = cpeEtaExpand arity arg + where + arity | Just ao <- cp_arityOpts (cpe_config env) -- Just <=> -O1 or -O2 + , not (is_join_head arg) + -- See Note [Eta expansion for join points] + -- Eta expanding the join point would introduce crap that we can't + -- generate code for + = case exprEtaExpandArity ao arg of + Nothing -> 0 + Just at -> arityTypeArity at + | otherwise + = exprArity arg -- this is cheap enough for -O0 + +is_join_head :: CoreExpr -> Bool +-- ^ Identify the cases where our mishandling described in +-- Note [Eta expansion for join points] would generate crap +is_join_head (Let bs e) = isJoinBind bs || is_join_head e +is_join_head (Cast e _) = is_join_head e +is_join_head (Tick _ e) = is_join_head e +is_join_head (Case _ _ _ alts) = any is_join_head (rhssOfAlts alts) +is_join_head _ = False + {- Note [Floating unlifted arguments] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1593,6 +1618,44 @@ and now we do NOT want eta expansion to give Instead GHC.Core.Opt.Arity.etaExpand gives f = /\a -> \y -> let s = h 3 in g s y +Note [Eta expansion of arguments in CorePrep] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose `g = \x y. blah` and consider the expression `f (g x)`; we ANFise to + + let t = g x + in f t + +We really don't want that `t` to be a thunk! That just wastes runtime, updating +a thunk with a PAP etc. The code generator could in principle allocate a PAP, +but in fact it does not know how to do that -- it's easier just to eta-expand: + + let t = \y. g x y + in f t + +To what arity should we eta-expand the argument? `cpeArg` uses two strategies, +governed by the presence of `-fdo-clever-arg-eta-expansion` (implied by -O): + + 1. Cheap, with -O0: just use `exprArity`. + 2. More clever but expensive, with -O1 -O2: use `exprEtaExpandArity`, + same function the Simplifier uses to eta expand RHSs and lambda bodies. + +The only reason for using (1) rather than (2) is to keep compile times down. +Using (2) in -O0 bumped up compiler allocations by 2-3% in tests T4801 and +T5321*. However, Plan (2) catches cases that (1) misses. +For example (#23083, assuming -fno-pedantic-bottoms): + + let t = case z of __DEFAULT -> g x + in f t + +to + + let t = \y -> case z of __DEFAULT -> g x y + in f t + +Note that there is a missed opportunity in eta expanding `t` earlier, in the +Simplifier: It would allow us to inline `g`, potentially enabling further +simplification. But then we could have inlined `g` into the PAP to begin with, +and that is discussed in #23150; hence we needn't worry about that in CorePrep. -} cpeEtaExpand :: Arity -> CpeRhs -> CpeRhs @@ -1982,6 +2045,11 @@ data CorePrepConfig = CorePrepConfig , cp_convertNumLit :: !(LitNumType -> Integer -> Maybe CoreExpr) -- ^ Convert some numeric literals (Integer, Natural) into their final -- Core form. + + , cp_arityOpts :: !(Maybe ArityOpts) + -- ^ Configuration for arity analysis ('exprEtaExpandArity'). + -- See Note [Eta expansion of arguments in CorePrep] + -- When 'Nothing' (e.g., -O0, -O1), use the cheaper 'exprArity' instead } data CorePrepEnv @@ -1992,6 +2060,7 @@ data CorePrepEnv -- enabled we instead produce an 'error' expression to catch -- the case where a function we think should bottom -- unexpectedly returns. + , cpe_env :: IdEnv CoreExpr -- Clone local Ids -- ^ This environment is used for three operations: -- diff --git a/compiler/GHC/Driver/Config/CoreToStg/Prep.hs b/compiler/GHC/Driver/Config/CoreToStg/Prep.hs index 9f2a757457..16a8026931 100644 --- a/compiler/GHC/Driver/Config/CoreToStg/Prep.hs +++ b/compiler/GHC/Driver/Config/CoreToStg/Prep.hs @@ -9,6 +9,7 @@ import GHC.Core.Opt.Pipeline.Types ( CoreToDo(..) ) import GHC.Driver.Env import GHC.Driver.Session import GHC.Driver.Config.Core.Lint +import GHC.Driver.Config.Core.Opt.Arity import GHC.Tc.Utils.Env import GHC.Types.Var import GHC.Utils.Outputable ( alwaysQualify ) @@ -17,14 +18,18 @@ import GHC.CoreToStg.Prep initCorePrepConfig :: HscEnv -> IO CorePrepConfig initCorePrepConfig hsc_env = do + let dflags = hsc_dflags hsc_env convertNumLit <- do - let platform = targetPlatform $ hsc_dflags hsc_env + let platform = targetPlatform dflags home_unit = hsc_home_unit hsc_env lookup_global = lookupGlobal hsc_env mkConvertNumLiteral platform home_unit lookup_global return $ CorePrepConfig - { cp_catchNonexhaustiveCases = gopt Opt_CatchNonexhaustiveCases $ hsc_dflags hsc_env + { cp_catchNonexhaustiveCases = gopt Opt_CatchNonexhaustiveCases dflags , cp_convertNumLit = convertNumLit + , cp_arityOpts = if gopt Opt_DoCleverArgEtaExpansion dflags + then Just (initArityOpts dflags) + else Nothing } initCorePrepPgmConfig :: DynFlags -> [Var] -> CorePrepPgmConfig diff --git a/compiler/GHC/Driver/DynFlags.hs b/compiler/GHC/Driver/DynFlags.hs index 01aa518452..05f70628f9 100644 --- a/compiler/GHC/Driver/DynFlags.hs +++ b/compiler/GHC/Driver/DynFlags.hs @@ -1242,7 +1242,8 @@ optLevelFlags :: [([Int], GeneralFlag)] -- Default settings of flags, before any command-line overrides optLevelFlags -- see Note [Documenting optimisation flags] = [ ([0,1,2], Opt_DoLambdaEtaExpansion) - , ([0,1,2], Opt_DoEtaReduction) -- See Note [Eta-reduction in -O0] + , ([1,2], Opt_DoCleverArgEtaExpansion) -- See Note [Eta expansion of arguments in CorePrep] + , ([0,1,2], Opt_DoEtaReduction) -- See Note [Eta-reduction in -O0] , ([0,1,2], Opt_LlvmTBAA) , ([0,1,2], Opt_ProfManualCcs ) , ([2], Opt_DictsStrict) diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs index 759c137eb5..8930a26127 100644 --- a/compiler/GHC/Driver/Flags.hs +++ b/compiler/GHC/Driver/Flags.hs @@ -262,6 +262,7 @@ data GeneralFlag | Opt_SpecConstr | Opt_SpecConstrKeen | Opt_DoLambdaEtaExpansion + | Opt_DoCleverArgEtaExpansion -- See Note [Eta expansion of arguments in CorePrep] | Opt_IgnoreAsserts | Opt_DoEtaReduction | Opt_CaseMerge diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index dd5bb6b7cb..03d5d3105b 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -2328,6 +2328,7 @@ fFlagsDeps = [ Opt_DmdTxDictSel "effect is now unconditionally enabled", flagSpec "do-eta-reduction" Opt_DoEtaReduction, flagSpec "do-lambda-eta-expansion" Opt_DoLambdaEtaExpansion, + flagSpec "do-clever-arg-eta-expansion" Opt_DoCleverArgEtaExpansion, -- See Note [Eta expansion of arguments in CorePrep] flagSpec "eager-blackholing" Opt_EagerBlackHoling, flagSpec "embed-manifest" Opt_EmbedManifest, flagSpec "enable-rewrite-rules" Opt_EnableRewriteRules, @@ -2815,8 +2816,6 @@ impliedXFlags , (LangExt.UnliftedDatatypes, turnOn, LangExt.StandaloneKindSignatures) ] - - -- | Things you get with `-dlint`. enableDLint :: DynP () enableDLint = do diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst index c75d5e6097..d587c847d9 100644 --- a/docs/users_guide/using-optimisation.rst +++ b/docs/users_guide/using-optimisation.rst @@ -467,6 +467,17 @@ as such you shouldn't need to set any of them explicitly. A flag Eta-expand let-bindings to increase their arity. +.. ghc-flag:: -fdo-clever-arg-eta-expansion + :shortdesc: Enable sophisticated argument eta-expansion. Implied by :ghc-flag:`-O2`. + :type: dynamic + :reverse: -fno-do-clever-arg-eta-expansion + :category: + + :default: off + + Eta-expand arguments to increase their arity to avoid allocating unnecessary + thunks for them. + .. ghc-flag:: -feager-blackholing :shortdesc: Turn on :ref:`eager blackholing <parallel-compile-options>` :type: dynamic diff --git a/testsuite/tests/simplCore/should_compile/T23083.hs b/testsuite/tests/simplCore/should_compile/T23083.hs new file mode 100644 index 0000000000..1f181217a4 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T23083.hs @@ -0,0 +1,10 @@ +module T23083 where + +-- Just ($), but NOINLINE so that we don't inline it eagerly, subverting the +-- test case +($$) :: (a -> b) -> a -> b +($$) f x = f x +{-# NOINLINE ($$) #-} + +g :: ((Integer -> Integer) -> Integer) -> (Integer -> Integer) -> Integer +g f h = f (h `seq` (h $$)) diff --git a/testsuite/tests/simplCore/should_compile/T23083.stderr b/testsuite/tests/simplCore/should_compile/T23083.stderr new file mode 100644 index 0000000000..61151890e5 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T23083.stderr @@ -0,0 +1,47 @@ + +==================== CorePrep ==================== +Result size of CorePrep = {terms: 34, types: 34, coercions: 0, joins: 0/1} + +-- RHS size: {terms: 6, types: 5, coercions: 0, joins: 0/0} +(T23083.$$) [InlPrag=NOINLINE] :: forall a b. (a -> b) -> a -> b +[GblId, Arity=2, Str=<1C(1,L)><L>, Unf=OtherCon []] +(T23083.$$) = \ (@a) (@b) (f [Occ=Once1!] :: a -> b) (x [Occ=Once1] :: a) -> f x + +-- RHS size: {terms: 12, types: 12, coercions: 0, joins: 0/1} +T23083.g :: ((GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer) -> GHC.Num.Integer.Integer) -> (GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer) -> GHC.Num.Integer.Integer +[GblId, Arity=2, Str=<1C(1,L)><ML>, Unf=OtherCon []] +T23083.g + = \ (f [Occ=Once1!] :: (GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer) -> GHC.Num.Integer.Integer) (h [Occ=OnceL1] :: GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer) -> + let { + sat [Occ=Once1] :: GHC.Num.Integer.Integer -> GHC.Num.Integer.Integer + [LclId] + sat = \ (eta [Occ=Once1] :: GHC.Num.Integer.Integer) -> case h of h1 [Occ=Once1] { __DEFAULT -> T23083.$$ @GHC.Num.Integer.Integer @GHC.Num.Integer.Integer h1 eta } } in + f sat + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +T23083.$trModule4 :: GHC.Prim.Addr# +[GblId, Unf=OtherCon []] +T23083.$trModule4 = "main"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +T23083.$trModule3 :: GHC.Types.TrName +[GblId, Unf=OtherCon []] +T23083.$trModule3 = GHC.Types.TrNameS T23083.$trModule4 + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +T23083.$trModule2 :: GHC.Prim.Addr# +[GblId, Unf=OtherCon []] +T23083.$trModule2 = "T23083"# + +-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0} +T23083.$trModule1 :: GHC.Types.TrName +[GblId, Unf=OtherCon []] +T23083.$trModule1 = GHC.Types.TrNameS T23083.$trModule2 + +-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0} +T23083.$trModule :: GHC.Types.Module +[GblId, Unf=OtherCon []] +T23083.$trModule = GHC.Types.Module T23083.$trModule3 T23083.$trModule1 + + + diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index b1ec7473ff..bd6a862ba9 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -477,6 +477,7 @@ test('T23012', normal, compile, ['-O']) test('RewriteHigherOrderPatterns', normal, compile, ['-O -ddump-rule-rewrites -dsuppress-all -dsuppress-uniques']) test('T23024', normal, multimod_compile, ['T23024', '-O -v0']) test('T23026', normal, compile, ['-O']) +test('T23083', [ grep_errmsg(r'eta.+::.+Integer') ], compile, ['-O -ddump-prep -dsuppress-uniques -dppr-cols=99999']) test('T23267', [expect_broken(23267), only_ways(['ghci']), extra_hc_opts('-fno-unoptimized-core-for-interpreter -fspec-constr')], ghci_script, ['T23267.script']) test('T23362', normal, compile, ['-O']) test('T23307', normal, compile, ['-O -ddump-simpl -dno-typeable-binds -dsuppress-uniques']) |