diff options
-rw-r--r-- | compiler/main/DynFlags.hs | 2 | ||||
-rw-r--r-- | compiler/specialise/SpecConstr.hs | 57 | ||||
-rw-r--r-- | docs/users_guide/using-optimisation.rst | 12 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/all.T | 4 |
4 files changed, 66 insertions, 9 deletions
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index d3d0ac34b7..442bbb984c 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -432,6 +432,7 @@ data GeneralFlag | Opt_StgCSE | Opt_LiberateCase | Opt_SpecConstr + | Opt_SpecConstrKeen | Opt_DoLambdaEtaExpansion | Opt_IgnoreAsserts | Opt_DoEtaReduction @@ -3684,6 +3685,7 @@ fFlagsDeps = [ (useInstead "enable-rewrite-rules"), flagSpec "shared-implib" Opt_SharedImplib, flagSpec "spec-constr" Opt_SpecConstr, + flagSpec "spec-constr-keen" Opt_SpecConstrKeen, flagSpec "specialise" Opt_Specialise, flagSpec "specialize" Opt_Specialise, flagSpec "specialise-aggressively" Opt_SpecialiseAggressively, diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs index 8a3e227c94..c2470bd644 100644 --- a/compiler/specialise/SpecConstr.hs +++ b/compiler/specialise/SpecConstr.hs @@ -41,7 +41,8 @@ import VarEnv import VarSet import Name import BasicTypes -import DynFlags ( DynFlags(..), hasPprDebug ) +import DynFlags ( DynFlags(..), GeneralFlag( Opt_SpecConstrKeen ) + , gopt, hasPprDebug ) import Maybes ( orElse, catMaybes, isJust, isNothing ) import Demand import GHC.Serialized ( deserializeWithData ) @@ -447,7 +448,6 @@ breaks an invariant. Note [Forcing specialisation] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - With stream fusion and in other similar cases, we want to fully specialise some (but not necessarily all!) loops regardless of their size and the number of specialisations. @@ -754,6 +754,39 @@ into a work-free value again, thus a'_shr = (a1, x_af7) but that's more work, so until its shown to be important I'm going to leave it for now. + +Note [Making SpecConstr keener] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this, in (perf/should_run/T9339) + last (filter odd [1..1000]) + +After optimisation, including SpecConstr, we get: + f :: Int# -> Int -> Int + f x y = case case remInt# x 2# of + __DEFAULT -> case x of + __DEFAULT -> f (+# wild_Xp 1#) (I# x) + 1000000# -> ... + 0# -> case x of + __DEFAULT -> f (+# wild_Xp 1#) y + 1000000# -> y + +Not good! We build an (I# x) box every time around the loop. +SpecConstr (as described in the paper) does not specialise f, despite +the call (f ... (I# x)) because 'y' is not scrutinied in the body. +But it is much better to specialise f for the case where the argument +is of form (I# x); then we build the box only when returning y, which +is on the cold path. + +Another exmaple: + + f x = ...(g x).... + +Here 'x' is not scrutinised in f's body; but if we did specialise 'f' +then the call (g x) might allow 'g' to be specialised in turn. + +So sc_keen controls whether or not we take account of whether argument is +scrutinised in the body. True <=> ignore that, and speicalise whenever +the function is applied to a data constructor. -} data ScEnv = SCE { sc_dflags :: DynFlags, @@ -765,6 +798,11 @@ data ScEnv = SCE { sc_dflags :: DynFlags, sc_recursive :: Int, -- Max # of specialisations over recursive type. -- Stops ForceSpecConstr from diverging. + sc_keen :: Bool, -- Specialise on arguments that are known + -- constructors, even if they are not + -- scrutinised in the body. See + -- Note [Making SpecConstr keener] + sc_force :: Bool, -- Force specialisation? -- See Note [Forcing specialisation] @@ -807,6 +845,7 @@ initScEnv dflags this_mod anns sc_size = specConstrThreshold dflags, sc_count = specConstrCount dflags, sc_recursive = specConstrRecursive dflags, + sc_keen = gopt Opt_SpecConstrKeen dflags, sc_force = False, sc_subst = emptySubst, sc_how_bound = emptyVarEnv, @@ -1976,11 +2015,12 @@ argToPat env in_scope val_env arg arg_occ mkConApp dc (ty_args ++ args')) } where mb_scrut dc = case arg_occ of - ScrutOcc bs - | Just occs <- lookupUFM bs dc - -> Just (occs) -- See Note [Reboxing] - _other | sc_force env -> Just (repeat UnkOcc) - | otherwise -> Nothing + ScrutOcc bs | Just occs <- lookupUFM bs dc + -> Just (occs) -- See Note [Reboxing] + _other | sc_force env || sc_keen env + -> Just (repeat UnkOcc) + | otherwise + -> Nothing -- Check if the argument is a variable that -- (a) is used in an interesting way in the function body @@ -1989,6 +2029,9 @@ argToPat env in_scope val_env arg arg_occ argToPat env in_scope val_env (Var v) arg_occ | sc_force env || case arg_occ of { UnkOcc -> False; _other -> True }, -- (a) is_value, -- (b) + -- Ignoring sc_keen here to avoid gratuitously incurring Note [Reboxing] + -- So sc_keen focused just on f (I# x), where we have freshly-allocated + -- box that we can eliminate in the caller not (ignoreType env (varType v)) = return (True, Var v) where diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst index 9436832611..e56c47312c 100644 --- a/docs/users_guide/using-optimisation.rst +++ b/docs/users_guide/using-optimisation.rst @@ -522,7 +522,7 @@ list. Turn on call-pattern specialisation; see `Call-pattern specialisation for Haskell programs - <http://research.microsoft.com/en-us/um/people/simonpj/papers/spec-constr/index.htm>`__. + <https://www.microsoft.com/en-us/research/publication/system-f-with-type-equality-coercions-2/>`__. This optimisation specializes recursive functions according to their argument "shapes". This is best explained by example so consider: :: @@ -580,6 +580,16 @@ list. body directly, allowing heavy specialisation over the recursive cases. +.. ghc-flag:: -fspec-constr-keen + + :default: off + + If this flag is on, call-patten specialision will specialise a call + ``(f (Just x))`` with an explicit constructor agument, even if the argument + is not scrutinised in the body of the function. This is sometimes + beneficial; e.g. the argument might be given to some other function + that can itself be specialised. + .. ghc-flag:: -fspec-constr-count=<n> :default: 3 diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T index efcbb4a30b..2f57613c3a 100644 --- a/testsuite/tests/perf/should_run/all.T +++ b/testsuite/tests/perf/should_run/all.T @@ -462,7 +462,9 @@ test('T9339', # 2016-08-17: 50728 Join points (#12988) only_ways(['normal'])], compile_and_run, - ['-O2']) + ['-O2 -fspec-constr-keen']) + # For the -fspec-constr-keen see Note [Making SpecConstr keener] in SpecConstr + test('T8472', [stats_num_field('bytes allocated', |