From aac08a0f37442a79096d7d2392f34b42ee5da2bb Mon Sep 17 00:00:00 2001 From: Joachim Breitner Date: Thu, 15 Dec 2016 10:57:43 -0800 Subject: Add a CSE pass to Stg (#9291) This CSE pass only targets data constructor applications. This is probably the best we can do, as function calls and primitive operations might have side-effects. Introduces the flag -fstg-cse, enabled by default with -O. Differential Revision: https://phabricator.haskell.org/D2871 --- compiler/basicTypes/Id.hs | 6 + compiler/basicTypes/Var.hs | 19 + compiler/coreSyn/CoreSyn.hs | 8 - compiler/coreSyn/TrieMap.hs | 6 +- compiler/ghc.cabal.in | 1 + compiler/main/DynFlags.hs | 3 + compiler/simplStg/SimplStg.hs | 30 +- compiler/simplStg/StgCse.hs | 427 +++++++++++++++++++++++ compiler/simplStg/UnariseStg.hs | 5 - compiler/stgSyn/StgSyn.hs | 24 +- docs/users_guide/using-optimisation.rst | 8 + testsuite/tests/simplStg/Makefile | 3 + testsuite/tests/simplStg/should_run/Makefile | 3 + testsuite/tests/simplStg/should_run/T9291.hs | 58 +++ testsuite/tests/simplStg/should_run/T9291.stdout | 5 + testsuite/tests/simplStg/should_run/all.T | 12 + 16 files changed, 588 insertions(+), 30 deletions(-) create mode 100644 compiler/simplStg/StgCse.hs create mode 100644 testsuite/tests/simplStg/Makefile create mode 100644 testsuite/tests/simplStg/should_run/Makefile create mode 100644 testsuite/tests/simplStg/should_run/T9291.hs create mode 100644 testsuite/tests/simplStg/should_run/T9291.stdout create mode 100644 testsuite/tests/simplStg/should_run/all.T diff --git a/compiler/basicTypes/Id.hs b/compiler/basicTypes/Id.hs index 1b84acda75..84cafa3902 100644 --- a/compiler/basicTypes/Id.hs +++ b/compiler/basicTypes/Id.hs @@ -28,6 +28,10 @@ module Id ( -- * The main types Var, Id, isId, + -- * In and Out variants + InVar, InId, + OutVar, OutId, + -- ** Simple construction mkGlobalId, mkVanillaGlobal, mkVanillaGlobalWithInfo, mkLocalId, mkLocalCoVar, mkLocalIdOrCoVar, @@ -114,6 +118,8 @@ import BasicTypes -- Imported and re-exported import Var( Id, CoVar, DictId, + InId, InVar, + OutId, OutVar, idInfo, idDetails, globaliseId, varType, isId, isLocalId, isGlobalId, isExportedId ) import qualified Var diff --git a/compiler/basicTypes/Var.hs b/compiler/basicTypes/Var.hs index e783efea0d..3f78c2800f 100644 --- a/compiler/basicTypes/Var.hs +++ b/compiler/basicTypes/Var.hs @@ -37,6 +37,10 @@ module Var ( Var, CoVar, Id, NcId, DictId, DFunId, EvVar, EqVar, EvId, IpId, TyVar, TypeVar, KindVar, TKVar, TyCoVar, + -- * In and Out variants + InVar, InCoVar, InId, InTyVar, + OutVar, OutCoVar, OutId, OutTyVar, + -- ** Taking 'Var's apart varName, varUnique, varType, @@ -150,6 +154,21 @@ type EqVar = EvId -- Boxed equality evidence type TyCoVar = Id -- Type, *or* coercion variable -- predicate: isTyCoVar + +{- Many passes apply a substitution, and it's very handy to have type + synonyms to remind us whether or not the subsitution has been applied -} + +type InVar = Var +type InTyVar = TyVar +type InCoVar = CoVar +type InId = Id +type OutVar = Var +type OutTyVar = TyVar +type OutCoVar = CoVar +type OutId = Id + + + {- Note [Evidence: EvIds and CoVars] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * An EvId (evidence Id) is a term-level evidence variable diff --git a/compiler/coreSyn/CoreSyn.hs b/compiler/coreSyn/CoreSyn.hs index 52ffad041b..17b546bb98 100644 --- a/compiler/coreSyn/CoreSyn.hs +++ b/compiler/coreSyn/CoreSyn.hs @@ -477,10 +477,6 @@ this exhaustive list can be empty! -- Pre-cloning or substitution type InBndr = CoreBndr -type InVar = Var -type InTyVar = TyVar -type InCoVar = CoVar -type InId = Id type InType = Type type InKind = Kind type InBind = CoreBind @@ -491,10 +487,6 @@ type InCoercion = Coercion -- Post-cloning or substitution type OutBndr = CoreBndr -type OutVar = Var -type OutId = Id -type OutTyVar = TyVar -type OutCoVar = CoVar type OutType = Type type OutKind = Kind type OutCoercion = Coercion diff --git a/compiler/coreSyn/TrieMap.hs b/compiler/coreSyn/TrieMap.hs index c6b9f8e1d3..f8546d1680 100644 --- a/compiler/coreSyn/TrieMap.hs +++ b/compiler/coreSyn/TrieMap.hs @@ -15,7 +15,11 @@ module TrieMap( LooseTypeMap, MaybeMap, ListMap, - TrieMap(..), insertTM, deleteTM + TrieMap(..), insertTM, deleteTM, + LiteralMap, + lkDFreeVar, xtDFreeVar, + lkDNamed, xtDNamed, + (>.>), (|>), (|>>), ) where import CoreSyn diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index a7d380afb6..2f1f813ab0 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -400,6 +400,7 @@ Library Simplify SimplStg StgStats + StgCse UnariseStg RepType Rules diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index e7ace47f2a..0bc119a783 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -427,6 +427,7 @@ data GeneralFlag | Opt_CrossModuleSpecialise | Opt_StaticArgumentTransformation | Opt_CSE + | Opt_StgCSE | Opt_LiberateCase | Opt_SpecConstr | Opt_DoLambdaEtaExpansion @@ -3481,6 +3482,7 @@ fFlagsDeps = [ flagSpec "cmm-elim-common-blocks" Opt_CmmElimCommonBlocks, flagSpec "cmm-sink" Opt_CmmSink, flagSpec "cse" Opt_CSE, + flagSpec "stg-cse" Opt_StgCSE, flagSpec "cpr-anal" Opt_CprAnal, flagSpec "defer-type-errors" Opt_DeferTypeErrors, flagSpec "defer-typed-holes" Opt_DeferTypedHoles, @@ -3930,6 +3932,7 @@ optLevelFlags -- see Note [Documenting optimisation flags] , ([1,2], Opt_CmmElimCommonBlocks) , ([1,2], Opt_CmmSink) , ([1,2], Opt_CSE) + , ([1,2], Opt_StgCSE) , ([1,2], Opt_EnableRewriteRules) -- Off for -O0; see Note [Scoping for Builtin rules] -- in PrelRules , ([1,2], Opt_FloatIn) diff --git a/compiler/simplStg/SimplStg.hs b/compiler/simplStg/SimplStg.hs index 771df871cc..406e415287 100644 --- a/compiler/simplStg/SimplStg.hs +++ b/compiler/simplStg/SimplStg.hs @@ -17,6 +17,7 @@ import SCCfinal ( stgMassageForProfiling ) import StgLint ( lintStgBindings ) import StgStats ( showStgStats ) import UnariseStg ( unarise ) +import StgCse ( stgCse ) import DynFlags import Module ( Module ) @@ -64,22 +65,27 @@ stg2stg dflags module_name binds ------------------------------------------- do_stg_pass (binds, us, ccs) to_do - = let - (us1, us2) = splitUniqSupply us - in - case to_do of + = case to_do of D_stg_stats -> trace (showStgStats binds) - end_pass us2 "StgStats" ccs binds + end_pass us "StgStats" ccs binds StgDoMassageForProfiling -> {-# SCC "ProfMassage" #-} let + (us1, us2) = splitUniqSupply us (collected_CCs, binds3) = stgMassageForProfiling dflags module_name us1 binds in end_pass us2 "ProfMassage" collected_CCs binds3 + StgCSE -> + {-# SCC "StgCse" #-} + let + binds' = stgCse binds + in + end_pass us "StgCse" ccs binds' + end_pass us2 what ccs binds2 = do -- report verbosely, if required dumpIfSet_dyn dflags Opt_D_verbose_stg2stg what @@ -96,19 +102,15 @@ stg2stg dflags module_name binds -- | Optional Stg-to-Stg passes. data StgToDo - = StgDoMassageForProfiling -- should be (next to) last + = StgCSE + | StgDoMassageForProfiling -- should be (next to) last | D_stg_stats -- | Which optional Stg-to-Stg passes to run. Depends on flags, ways etc. getStgToDo :: DynFlags -> [StgToDo] getStgToDo dflags - = todo2 + = [ StgCSE | gopt Opt_StgCSE dflags] ++ + [ StgDoMassageForProfiling | WayProf `elem` ways dflags] ++ + [ D_stg_stats | stg_stats ] where stg_stats = gopt Opt_StgStats dflags - - todo1 = if stg_stats then [D_stg_stats] else [] - - todo2 | WayProf `elem` ways dflags - = StgDoMassageForProfiling : todo1 - | otherwise - = todo1 diff --git a/compiler/simplStg/StgCse.hs b/compiler/simplStg/StgCse.hs new file mode 100644 index 0000000000..74c196432f --- /dev/null +++ b/compiler/simplStg/StgCse.hs @@ -0,0 +1,427 @@ +{-# LANGUAGE TypeFamilies #-} + +{-| +Note [CSE for Stg] +~~~~~~~~~~~~~~~~~~ +This module implements a simple common subexpression elimination pass for STG. +This is useful because there are expressions that we want to common up (because +they are operational equivalent), but that we cannot common up in Core, because +their types differ. +This was original reported as #9291. + +There are two types of common code occurrences that we aim for, see +note [Case 1: CSEing allocated closures] and +note [Case 2: CSEing case binders] below. + + +Note [Case 1: CSEing allocated closures] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The fist kind of CSE opportunity we aim for is generated by this Haskell code: + + bar :: a -> (Either Int a, Either Bool a) + bar x = (Right x, Right x) + +which produces this Core: + + bar :: forall a. a -> (Either Int a, Either Bool a) + bar @a x = (Right @Int @a x, Right @Bool @a x) + +where the two components of the tuple are differnt terms, and cannot be +commoned up (easily). On the STG level we have + + bar [x] = let c1 = Right [x] + c2 = Right [x] + in (c1,c2) + +and now it is obvious that we can write + + bar [x] = let c1 = Right [x] + in (c1,c1) + +instead. + + +Note [Case 2: CSEing case binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The second kind of CSE opportunity we aim for is more interesting. +The Haskell code + + foo :: Either Int a -> Either Bool a + foo (Right x) = Right x + foo _ = Left False + +produces this Core + + foo :: forall a. Either Int a -> Either Bool a + foo @a e = case e of b { Left n -> … + , Right x -> Right @Bool @a x } + +where we cannot CSE `Right @Bool @a x` with the case binder `b` as they have +different types. But in STG we have + + foo [e] = case e of b { Left [n] -> … + , Right [x] -> Right [x] } + +and nothing stops us from transforming that to + + foo [e] = case e of b { Left [n] -> … + , Right [x] -> b} + +-} +module StgCse (stgCse) where + +import DataCon +import Id +import StgSyn +import Outputable +import VarEnv +import CoreSyn (AltCon(..)) +import Data.List (mapAccumL) +import Data.Maybe (fromMaybe) +import TrieMap +import NameEnv +import Control.Monad( (>=>) ) + +-------------- +-- The Trie -- +-------------- + +-- A lookup trie for data constructor appliations, i.e. +-- keys of type `(DataCon, [StgArg])`, following the patterns in TrieMap. + +data StgArgMap a = SAM + { sam_var :: DVarEnv a + , sam_lit :: LiteralMap a + } + +instance TrieMap StgArgMap where + type Key StgArgMap = StgArg + emptyTM = SAM { sam_var = emptyTM + , sam_lit = emptyTM } + lookupTM (StgVarArg var) = sam_var >.> lkDFreeVar var + lookupTM (StgLitArg lit) = sam_lit >.> lookupTM lit + alterTM (StgVarArg var) f m = m { sam_var = sam_var m |> xtDFreeVar var f } + alterTM (StgLitArg lit) f m = m { sam_lit = sam_lit m |> alterTM lit f } + foldTM k m = foldTM k (sam_var m) . foldTM k (sam_lit m) + mapTM f (SAM {sam_var = varm, sam_lit = litm}) = + SAM { sam_var = mapTM f varm, sam_lit = mapTM f litm } + +newtype ConAppMap a = CAM { un_cam :: DNameEnv (ListMap StgArgMap a) } + +instance TrieMap ConAppMap where + type Key ConAppMap = (DataCon, [StgArg]) + emptyTM = CAM emptyTM + lookupTM (dataCon, args) = un_cam >.> lkDNamed dataCon >=> lookupTM args + alterTM (dataCon, args) f m = + m { un_cam = un_cam m |> xtDNamed dataCon |>> alterTM args f } + foldTM k = un_cam >.> foldTM (foldTM k) + mapTM f = un_cam >.> mapTM (mapTM f) >.> CAM + +----------------- +-- The CSE Env -- +----------------- + +-- | The CSE environment. See note [CseEnv Example] +data CseEnv = CseEnv + { ce_conAppMap :: ConAppMap OutId + -- ^ The main component of the environment is the trie that maps + -- data constructor applications (with their `OutId` arguments) + -- to an in-scope name that can be used instead. + , ce_renaming :: IdEnv OutId + -- ^ CSE is simple to implement (and reason about) when there is no + -- shadowing. Unfortunately, we have to cope with shadowing + -- (see Note [Shadowing]). So we morally do a separate renaming pass + -- before CSE, and practically do both passes in one traversal of the tree. + -- It still causes less confusion to keep the renaming substitution + -- and the substitutions due to CSE separate. + , ce_subst :: IdEnv OutId + -- ^ This substitution contains CSE-specific entries. The domain are + -- OutIds, so ce_renaming has to be applied first. + -- It has an entry x ↦ y when a let-binding `let x = Con y` is + -- removed because `let y = Con z` is in scope. + -- + -- Both substitutions are applied to data constructor arguments + -- before these are looked up in the conAppMap. + , ce_in_scope :: InScopeSet + -- ^ The third component is an in-scope set, to rename away any + -- shadowing binders + } + +{-| +Note [CseEnv Example] +~~~~~~~~~~~~~~~~~~~~~ +The following tables shows how the CseEnvironment changes as code is traversed, +as well as the changes to that code. + + InExpr OutExpr + conAppMap renaming subst in_scope + ────────────────────────────────────────────────────────────────────── + -- empty {} {} {} + case … as a of {Con x y -> case … as a of {Con x y -> + -- Con x y ↦ a {} {} {a,x,y} + let b = Con x y (removed) + -- Con x y ↦ a {} b↦a {a,x,y,b} + let c = Bar a let c = Bar a + -- Con x y ↦ a, Bar a ↦ c {} b↦a {a,x,y,b,c} + let c = some expression let c' = some expression + -- Con x y ↦ a, Bar a ↦ c c↦c' b↦a {a,x,y,b,c,c'} + let d = Bar b (removed) + -- Con x y ↦ a, Bar a ↦ c c↦c' b↦a, d↦c {a,x,y,b,c,c',d} + (a, b, c d) (a, a, c' c) +-} + +initEnv :: InScopeSet -> CseEnv +initEnv in_scope = CseEnv + { ce_conAppMap = emptyTM + , ce_renaming = emptyVarEnv + , ce_subst = emptyVarEnv + , ce_in_scope = in_scope + } + +envLookup :: DataCon -> [OutStgArg] -> CseEnv -> Maybe OutId +envLookup dataCon args env = lookupTM (dataCon, args) (ce_conAppMap env) + +addDataCon :: OutId -> DataCon -> [OutStgArg] -> CseEnv -> CseEnv +-- do not bother with nullary data constructors, they are static anyways +addDataCon _ _ [] env = env +addDataCon bndr dataCon args env = env { ce_conAppMap = new_env } + where + new_env = insertTM (dataCon, args) bndr (ce_conAppMap env) + +forgetCse :: CseEnv -> CseEnv +forgetCse env = env { ce_conAppMap = emptyTM } + -- See note [Free variables of an StgClosure] + +addSubst :: OutId -> OutId -> CseEnv -> CseEnv +addSubst from to env + = env { ce_subst = extendVarEnv (ce_subst env) from to } + +substArgs :: CseEnv -> [InStgArg] -> [OutStgArg] +substArgs env = map (substArg env) + +substArg :: CseEnv -> InStgArg -> OutStgArg +substArg env (StgVarArg from) = StgVarArg (substVar env from) +substArg _ (StgLitArg lit) = StgLitArg lit + +substVars :: CseEnv -> [InId] -> [OutId] +substVars env = map (substVar env) + +substVar :: CseEnv -> InId -> OutId +substVar env id0 = id2 + where + id1 = fromMaybe id0 $ lookupVarEnv (ce_renaming env) id0 + id2 = fromMaybe id1 $ lookupVarEnv (ce_subst env) id1 + +-- Functions to enter binders + +-- This is much simpler than the requivalent code in CoreSubst: +-- * We do not substitute type variables, and +-- * There is nothing relevant in IdInfo at this stage +-- that needs substitutions. +-- Therefore, no special treatment for a recursive group is required. + +substBndr :: CseEnv -> InId -> (CseEnv, OutId) +substBndr env old_id + = (new_env, new_id) + where + new_id = uniqAway (ce_in_scope env) old_id + no_change = new_id == old_id + env' = env { ce_in_scope = ce_in_scope env `extendInScopeSet` new_id } + new_env | no_change = env' { ce_renaming = extendVarEnv (ce_subst env) old_id new_id } + | otherwise = env' + +substBndrs :: CseEnv -> [InVar] -> (CseEnv, [OutVar]) +substBndrs env bndrs = mapAccumL substBndr env bndrs + +substPairs :: CseEnv -> [(InVar, a)] -> (CseEnv, [(OutVar, a)]) +substPairs env bndrs = mapAccumL go env bndrs + where go env (id, x) = let (env', id') = substBndr env id + in (env', (id', x)) + +-- Main entry point + +stgCse :: [InStgBinding] -> [OutStgBinding] +stgCse binds = snd $ mapAccumL stgCseTopLvl emptyInScopeSet binds + +-- Top level bindings. +-- +-- We do not CSE these, as top-level closures are allocated statically anyways. +-- Also, they might be exported. +-- But we still have to collect the set of in-scope variables, otherwise +-- uniqAway might shadow a top-level closure. + +stgCseTopLvl :: InScopeSet -> InStgBinding -> (InScopeSet, OutStgBinding) +stgCseTopLvl in_scope (StgNonRec bndr rhs) + = (in_scope' + , StgNonRec bndr (stgCseTopLvlRhs in_scope rhs)) + where in_scope' = in_scope `extendInScopeSet` bndr + +stgCseTopLvl in_scope (StgRec eqs) + = ( in_scope' + , StgRec [ (bndr, stgCseTopLvlRhs in_scope' rhs) | (bndr, rhs) <- eqs ]) + where in_scope' = in_scope `extendInScopeSetList` [ bndr | (bndr, _) <- eqs ] + +stgCseTopLvlRhs :: InScopeSet -> InStgRhs -> OutStgRhs +stgCseTopLvlRhs in_scope (StgRhsClosure ccs info occs upd args body) + = let body' = stgCseExpr (initEnv in_scope) body + in StgRhsClosure ccs info occs upd args body' +stgCseTopLvlRhs _ (StgRhsCon ccs dataCon args) + = StgRhsCon ccs dataCon args + +------------------------------ +-- The actual AST traversal -- +------------------------------ + +-- Trivial cases +stgCseExpr :: CseEnv -> InStgExpr -> OutStgExpr +stgCseExpr env (StgApp fun args) + = StgApp fun' args' + where fun' = substVar env fun + args' = substArgs env args +stgCseExpr _ (StgLit lit) + = StgLit lit +stgCseExpr env (StgOpApp op args tys) + = StgOpApp op args' tys + where args' = substArgs env args +stgCseExpr _ (StgLam _ _) + = pprPanic "stgCseExp" (text "StgLam") +stgCseExpr env (StgTick tick body) + = let body' = stgCseExpr env body + in StgTick tick body' +stgCseExpr env (StgCase scrut bndr ty alts) + = StgCase scrut' bndr' ty alts' + where + scrut' = stgCseExpr env scrut + (env1, bndr') = substBndr env bndr + cse_bndr | StgApp trivial_scrut [] <- scrut' = trivial_scrut + -- See Note [Trivial case scrutinee] + | otherwise = bndr' + alts' = map (stgCseAlt env1 cse_bndr) alts + + +-- A constructor application. +-- To be removed by a variable use when found in the CSE environment +stgCseExpr env (StgConApp dataCon args tys) + | Just bndr' <- envLookup dataCon args' env + = StgApp bndr' [] + | otherwise + = StgConApp dataCon args' tys + where args' = substArgs env args + +-- Let bindings +-- The binding might be removed due to CSE (we do not want trivial bindings on +-- the STG level), so use the smart constructor `mkStgLet` to remove the binding +-- if empty. +stgCseExpr env (StgLet binds body) + = let (binds', env') = stgCseBind env binds + body' = stgCseExpr env' body + in mkStgLet StgLet binds' body' +stgCseExpr env (StgLetNoEscape binds body) + = let (binds', env') = stgCseBind env binds + body' = stgCseExpr env' body + in mkStgLet StgLetNoEscape binds' body' + +-- Case alternatives +-- Extend the CSE environment +stgCseAlt :: CseEnv -> OutId -> InStgAlt -> OutStgAlt +stgCseAlt env case_bndr (DataAlt dataCon, args, rhs) + = let (env1, args') = substBndrs env args + env2 = addDataCon case_bndr dataCon (map StgVarArg args') env1 + -- see note [Case 2: CSEing case binders] + rhs' = stgCseExpr env2 rhs + in (DataAlt dataCon, args', rhs') +stgCseAlt env _ (altCon, args, rhs) + = let (env1, args') = substBndrs env args + rhs' = stgCseExpr env1 rhs + in (altCon, args', rhs') + +-- Bindings +stgCseBind :: CseEnv -> InStgBinding -> (Maybe OutStgBinding, CseEnv) +stgCseBind env (StgNonRec b e) + = let (env1, b') = substBndr env b + in case stgCseRhs env1 b' e of + (Nothing, env2) -> (Nothing, env2) + (Just (b2,e'), env2) -> (Just (StgNonRec b2 e'), env2) +stgCseBind env (StgRec pairs) + = let (env1, pairs1) = substPairs env pairs + in case stgCsePairs env1 pairs1 of + ([], env2) -> (Nothing, env2) + (pairs2, env2) -> (Just (StgRec pairs2), env2) + +stgCsePairs :: CseEnv -> [(OutId, InStgRhs)] -> ([(OutId, OutStgRhs)], CseEnv) +stgCsePairs env [] = ([], env) +stgCsePairs env0 ((b,e):pairs) + = let (pairMB, env1) = stgCseRhs env0 b e + (pairs', env2) = stgCsePairs env1 pairs + in (pairMB `mbCons` pairs', env2) + where + mbCons = maybe id (:) + +-- The RHS of a binding. +-- If it is an constructor application, either short-cut it or extend the environment +stgCseRhs :: CseEnv -> OutId -> InStgRhs -> (Maybe (OutId, OutStgRhs), CseEnv) +stgCseRhs env bndr (StgRhsCon ccs dataCon args) + | Just other_bndr <- envLookup dataCon args' env + = let env' = addSubst bndr other_bndr env + in (Nothing, env') + | otherwise + = let env' = addDataCon bndr dataCon args' env + -- see note [Case 1: CSEing allocated closures] + pair = (bndr, StgRhsCon ccs dataCon args') + in (Just pair, env') + where args' = substArgs env args +stgCseRhs env bndr (StgRhsClosure ccs info occs upd args body) + = let (env1, args') = substBndrs env args + env2 = forgetCse env1 -- See note [Free variables of an StgClosure] + body' = stgCseExpr env2 body + in (Just (substVar env bndr, StgRhsClosure ccs info occs' upd args' body'), env) + where occs' = substVars env occs + +-- Utilities + +-- | This function short-cuts let-bindings that are now obsolete +mkStgLet :: (a -> b -> b) -> Maybe a -> b -> b +mkStgLet _ Nothing body = body +mkStgLet stgLet (Just binds) body = stgLet binds body + + +{- +Note [Trivial case scrutinee] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we find + + case x as b of { Con a -> … } + +we really want to replace uses of Con a in the body with x, and not just b, in +order to handle nested reconstruction of constructors as in + + nested :: Either Int (Either Int a) -> Either Bool (Either Bool a) + nested (Right (Right x)) = Right (Right x) + nested _ = Left True + +Therefore, we add + Con a ↦ x +to the ConAppMap respectively. +Compare Note [CSE for case expressions] in CSE.hs, which does the same for Core CSE. + +If we find + case foo x as b of { Con a -> … } +we use + Con a ↦ b + +Note [Free variables of an StgClosure] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +StgClosures (function and thunks) have an explicit list of free variables: + +foo [x] = + let not_a_free_var = Left [x] + let a_free_var = Right [x] + let closure = \[x a_free_var] -> \[y] -> bar y (Left [x]) a_free_var + in closure + +If we were to CSE `Left [x]` in the body of `closure` with `not_a_free_var`, +then the list of free variables would be wrong, so for now, we do not CSE +across such a closure, simply because I (Joachim) was not sure about possible +knock-on effects. If deemed safe and worth the slight code complication of +re-calculating this list during or after this pass, this can surely be done. +-} diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs index fc30859980..e8ba200d0a 100644 --- a/compiler/simplStg/UnariseStg.hs +++ b/compiler/simplStg/UnariseStg.hs @@ -264,11 +264,6 @@ extendRho rho x (UnaryVal val) -------------------------------------------------------------------------------- -type OutStgExpr = StgExpr -type InStgAlt = StgAlt -type InStgArg = StgArg -type OutStgArg = StgArg - unarise :: UniqSupply -> [StgBinding] -> [StgBinding] unarise us binds = initUs_ us (mapM (unariseBinding emptyVarEnv) binds) diff --git a/compiler/stgSyn/StgSyn.hs b/compiler/stgSyn/StgSyn.hs index 3ec37eefff..64c8448421 100644 --- a/compiler/stgSyn/StgSyn.hs +++ b/compiler/stgSyn/StgSyn.hs @@ -24,8 +24,11 @@ module StgSyn ( combineStgBinderInfo, -- a set of synonyms for the most common (only :-) parameterisation - StgArg, - StgBinding, StgExpr, StgRhs, StgAlt, + StgArg, StgBinding, StgExpr, StgRhs, StgAlt, + + -- a set of synonyms to distinguish in- and out variants + InStgArg, InStgBinding, InStgExpr, InStgRhs, InStgAlt, + OutStgArg, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt, -- StgOp StgOp(..), @@ -551,7 +554,24 @@ type StgExpr = GenStgExpr Id Id type StgRhs = GenStgRhs Id Id type StgAlt = GenStgAlt Id Id +{- Many passes apply a substitution, and it's very handy to have type + synonyms to remind us whether or not the subsitution has been applied. + See CoreSyn for precedence in Core land +-} + +type InStgBinding = StgBinding +type InStgArg = StgArg +type InStgExpr = StgExpr +type InStgRhs = StgRhs +type InStgAlt = StgAlt +type OutStgBinding = StgBinding +type OutStgArg = StgArg +type OutStgExpr = StgExpr +type OutStgRhs = StgRhs +type OutStgAlt = StgAlt + {- + ************************************************************************ * * \subsubsection[UpdateFlag-datatype]{@UpdateFlag@} diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst index 3e660c19e9..1cad51bb1b 100644 --- a/docs/users_guide/using-optimisation.rst +++ b/docs/users_guide/using-optimisation.rst @@ -186,6 +186,14 @@ list. optimisation. Switching this off can be useful if you have some ``unsafePerformIO`` expressions that you don't want commoned-up. +.. ghc-flag:: -fstg-cse + + :default: on + + Enables the common-sub-expression elimination optimisation on the STG + intermediate language, where it is able to common up some subexpressions + that differ in their types, but not their represetation. + .. ghc-flag:: -fdicts-cheap A very experimental flag that makes dictionary-valued expressions diff --git a/testsuite/tests/simplStg/Makefile b/testsuite/tests/simplStg/Makefile new file mode 100644 index 0000000000..9a36a1c5fe --- /dev/null +++ b/testsuite/tests/simplStg/Makefile @@ -0,0 +1,3 @@ +TOP=../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/simplStg/should_run/Makefile b/testsuite/tests/simplStg/should_run/Makefile new file mode 100644 index 0000000000..9101fbd40a --- /dev/null +++ b/testsuite/tests/simplStg/should_run/Makefile @@ -0,0 +1,3 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/simplStg/should_run/T9291.hs b/testsuite/tests/simplStg/should_run/T9291.hs new file mode 100644 index 0000000000..db2ce75da2 --- /dev/null +++ b/testsuite/tests/simplStg/should_run/T9291.hs @@ -0,0 +1,58 @@ +{-# LANGUAGE MagicHash #-} +import GHC.Exts +import Unsafe.Coerce + +foo :: Either Int a -> Either Bool a +foo (Right x) = Right x +foo _ = Left True +{-# NOINLINE foo #-} + +bar :: a -> (Either Int a, Either Bool a) +bar x = (Right x, Right x) +{-# NOINLINE bar #-} + +nested :: Either Int (Either Int a) -> Either Bool (Either Bool a) +nested (Right (Right x)) = Right (Right x) +nested _ = Left True +{-# NOINLINE nested #-} + + +-- CSE in a recursive group +data Tree x = T x (Either Int (Tree x)) (Either Bool (Tree x)) +rec1 :: x -> Tree x +rec1 x = + let t = T x r1 r2 + r1 = Right t + r2 = Right t + in t +{-# NOINLINE rec1 #-} + +-- Not yet supported! (and tricky) +data Stream a b x = S x (Stream b a x) +rec2 :: x -> Stream a b x +rec2 x = + let s1 = S x s2 + s2 = S x s1 + in s1 +{-# NOINLINE rec2 #-} + +test x = do + let (r1,r2) = bar x + (same $! r1) $! r2 + let r3 = foo r1 + (same $! r1) $! r3 + let (r4,_) = bar r1 + let r5 = nested r4 + (same $! r4) $! r5 + let (T _ r6 r7) = rec1 x + (same $! r6) $! r7 + let s1@(S _ s2) = rec2 x + (same $! s1) $! s2 +{-# NOINLINE test #-} + +main = test "foo" + +same :: a -> b -> IO () +same x y = case reallyUnsafePtrEquality# (unsafeCoerce x) y of + 1# -> putStrLn "yes" + _ -> putStrLn "no" diff --git a/testsuite/tests/simplStg/should_run/T9291.stdout b/testsuite/tests/simplStg/should_run/T9291.stdout new file mode 100644 index 0000000000..aa14978324 --- /dev/null +++ b/testsuite/tests/simplStg/should_run/T9291.stdout @@ -0,0 +1,5 @@ +yes +yes +yes +yes +no diff --git a/testsuite/tests/simplStg/should_run/all.T b/testsuite/tests/simplStg/should_run/all.T new file mode 100644 index 0000000000..3d4f4a3763 --- /dev/null +++ b/testsuite/tests/simplStg/should_run/all.T @@ -0,0 +1,12 @@ +# Args to compile_and_run are: +# extra compile flags +# extra run flags +# expected process return value, if not zero + +# Only compile with optimisation +def f( name, opts ): + opts.only_ways = ['optasm'] + +setTestOpts(f) + +test('T9291', normal, compile_and_run, ['']) -- cgit v1.2.1