summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoachim Breitner <mail@joachim-breitner.de>2016-12-15 10:57:43 -0800
committerJoachim Breitner <mail@joachim-breitner.de>2016-12-29 19:25:50 +0100
commitaac08a0f37442a79096d7d2392f34b42ee5da2bb (patch)
tree4001ca3f68b6bb5eef8089d52633817913c47a16
parenta3704409acc3bd237d3e872f640686918fb51f5f (diff)
downloadhaskell-wip/T9291.tar.gz
Add a CSE pass to Stg (#9291)wip/T9291
This CSE pass only targets data constructor applications. This is probably the best we can do, as function calls and primitive operations might have side-effects. Introduces the flag -fstg-cse, enabled by default with -O. Differential Revision: https://phabricator.haskell.org/D2871
-rw-r--r--compiler/basicTypes/Id.hs6
-rw-r--r--compiler/basicTypes/Var.hs19
-rw-r--r--compiler/coreSyn/CoreSyn.hs8
-rw-r--r--compiler/coreSyn/TrieMap.hs6
-rw-r--r--compiler/ghc.cabal.in1
-rw-r--r--compiler/main/DynFlags.hs3
-rw-r--r--compiler/simplStg/SimplStg.hs30
-rw-r--r--compiler/simplStg/StgCse.hs427
-rw-r--r--compiler/simplStg/UnariseStg.hs5
-rw-r--r--compiler/stgSyn/StgSyn.hs24
-rw-r--r--docs/users_guide/using-optimisation.rst8
-rw-r--r--testsuite/tests/simplStg/Makefile3
-rw-r--r--testsuite/tests/simplStg/should_run/Makefile3
-rw-r--r--testsuite/tests/simplStg/should_run/T9291.hs58
-rw-r--r--testsuite/tests/simplStg/should_run/T9291.stdout5
-rw-r--r--testsuite/tests/simplStg/should_run/all.T12
16 files changed, 588 insertions, 30 deletions
diff --git a/compiler/basicTypes/Id.hs b/compiler/basicTypes/Id.hs
index 1b84acda75..84cafa3902 100644
--- a/compiler/basicTypes/Id.hs
+++ b/compiler/basicTypes/Id.hs
@@ -28,6 +28,10 @@ module Id (
-- * The main types
Var, Id, isId,
+ -- * In and Out variants
+ InVar, InId,
+ OutVar, OutId,
+
-- ** Simple construction
mkGlobalId, mkVanillaGlobal, mkVanillaGlobalWithInfo,
mkLocalId, mkLocalCoVar, mkLocalIdOrCoVar,
@@ -114,6 +118,8 @@ import BasicTypes
-- Imported and re-exported
import Var( Id, CoVar, DictId,
+ InId, InVar,
+ OutId, OutVar,
idInfo, idDetails, globaliseId, varType,
isId, isLocalId, isGlobalId, isExportedId )
import qualified Var
diff --git a/compiler/basicTypes/Var.hs b/compiler/basicTypes/Var.hs
index e783efea0d..3f78c2800f 100644
--- a/compiler/basicTypes/Var.hs
+++ b/compiler/basicTypes/Var.hs
@@ -37,6 +37,10 @@ module Var (
Var, CoVar, Id, NcId, DictId, DFunId, EvVar, EqVar, EvId, IpId,
TyVar, TypeVar, KindVar, TKVar, TyCoVar,
+ -- * In and Out variants
+ InVar, InCoVar, InId, InTyVar,
+ OutVar, OutCoVar, OutId, OutTyVar,
+
-- ** Taking 'Var's apart
varName, varUnique, varType,
@@ -150,6 +154,21 @@ type EqVar = EvId -- Boxed equality evidence
type TyCoVar = Id -- Type, *or* coercion variable
-- predicate: isTyCoVar
+
+{- Many passes apply a substitution, and it's very handy to have type
+ synonyms to remind us whether or not the subsitution has been applied -}
+
+type InVar = Var
+type InTyVar = TyVar
+type InCoVar = CoVar
+type InId = Id
+type OutVar = Var
+type OutTyVar = TyVar
+type OutCoVar = CoVar
+type OutId = Id
+
+
+
{- Note [Evidence: EvIds and CoVars]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* An EvId (evidence Id) is a term-level evidence variable
diff --git a/compiler/coreSyn/CoreSyn.hs b/compiler/coreSyn/CoreSyn.hs
index 52ffad041b..17b546bb98 100644
--- a/compiler/coreSyn/CoreSyn.hs
+++ b/compiler/coreSyn/CoreSyn.hs
@@ -477,10 +477,6 @@ this exhaustive list can be empty!
-- Pre-cloning or substitution
type InBndr = CoreBndr
-type InVar = Var
-type InTyVar = TyVar
-type InCoVar = CoVar
-type InId = Id
type InType = Type
type InKind = Kind
type InBind = CoreBind
@@ -491,10 +487,6 @@ type InCoercion = Coercion
-- Post-cloning or substitution
type OutBndr = CoreBndr
-type OutVar = Var
-type OutId = Id
-type OutTyVar = TyVar
-type OutCoVar = CoVar
type OutType = Type
type OutKind = Kind
type OutCoercion = Coercion
diff --git a/compiler/coreSyn/TrieMap.hs b/compiler/coreSyn/TrieMap.hs
index c6b9f8e1d3..f8546d1680 100644
--- a/compiler/coreSyn/TrieMap.hs
+++ b/compiler/coreSyn/TrieMap.hs
@@ -15,7 +15,11 @@ module TrieMap(
LooseTypeMap,
MaybeMap,
ListMap,
- TrieMap(..), insertTM, deleteTM
+ TrieMap(..), insertTM, deleteTM,
+ LiteralMap,
+ lkDFreeVar, xtDFreeVar,
+ lkDNamed, xtDNamed,
+ (>.>), (|>), (|>>),
) where
import CoreSyn
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index a7d380afb6..2f1f813ab0 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -400,6 +400,7 @@ Library
Simplify
SimplStg
StgStats
+ StgCse
UnariseStg
RepType
Rules
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index e7ace47f2a..0bc119a783 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -427,6 +427,7 @@ data GeneralFlag
| Opt_CrossModuleSpecialise
| Opt_StaticArgumentTransformation
| Opt_CSE
+ | Opt_StgCSE
| Opt_LiberateCase
| Opt_SpecConstr
| Opt_DoLambdaEtaExpansion
@@ -3481,6 +3482,7 @@ fFlagsDeps = [
flagSpec "cmm-elim-common-blocks" Opt_CmmElimCommonBlocks,
flagSpec "cmm-sink" Opt_CmmSink,
flagSpec "cse" Opt_CSE,
+ flagSpec "stg-cse" Opt_StgCSE,
flagSpec "cpr-anal" Opt_CprAnal,
flagSpec "defer-type-errors" Opt_DeferTypeErrors,
flagSpec "defer-typed-holes" Opt_DeferTypedHoles,
@@ -3930,6 +3932,7 @@ optLevelFlags -- see Note [Documenting optimisation flags]
, ([1,2], Opt_CmmElimCommonBlocks)
, ([1,2], Opt_CmmSink)
, ([1,2], Opt_CSE)
+ , ([1,2], Opt_StgCSE)
, ([1,2], Opt_EnableRewriteRules) -- Off for -O0; see Note [Scoping for Builtin rules]
-- in PrelRules
, ([1,2], Opt_FloatIn)
diff --git a/compiler/simplStg/SimplStg.hs b/compiler/simplStg/SimplStg.hs
index 771df871cc..406e415287 100644
--- a/compiler/simplStg/SimplStg.hs
+++ b/compiler/simplStg/SimplStg.hs
@@ -17,6 +17,7 @@ import SCCfinal ( stgMassageForProfiling )
import StgLint ( lintStgBindings )
import StgStats ( showStgStats )
import UnariseStg ( unarise )
+import StgCse ( stgCse )
import DynFlags
import Module ( Module )
@@ -64,22 +65,27 @@ stg2stg dflags module_name binds
-------------------------------------------
do_stg_pass (binds, us, ccs) to_do
- = let
- (us1, us2) = splitUniqSupply us
- in
- case to_do of
+ = case to_do of
D_stg_stats ->
trace (showStgStats binds)
- end_pass us2 "StgStats" ccs binds
+ end_pass us "StgStats" ccs binds
StgDoMassageForProfiling ->
{-# SCC "ProfMassage" #-}
let
+ (us1, us2) = splitUniqSupply us
(collected_CCs, binds3)
= stgMassageForProfiling dflags module_name us1 binds
in
end_pass us2 "ProfMassage" collected_CCs binds3
+ StgCSE ->
+ {-# SCC "StgCse" #-}
+ let
+ binds' = stgCse binds
+ in
+ end_pass us "StgCse" ccs binds'
+
end_pass us2 what ccs binds2
= do -- report verbosely, if required
dumpIfSet_dyn dflags Opt_D_verbose_stg2stg what
@@ -96,19 +102,15 @@ stg2stg dflags module_name binds
-- | Optional Stg-to-Stg passes.
data StgToDo
- = StgDoMassageForProfiling -- should be (next to) last
+ = StgCSE
+ | StgDoMassageForProfiling -- should be (next to) last
| D_stg_stats
-- | Which optional Stg-to-Stg passes to run. Depends on flags, ways etc.
getStgToDo :: DynFlags -> [StgToDo]
getStgToDo dflags
- = todo2
+ = [ StgCSE | gopt Opt_StgCSE dflags] ++
+ [ StgDoMassageForProfiling | WayProf `elem` ways dflags] ++
+ [ D_stg_stats | stg_stats ]
where
stg_stats = gopt Opt_StgStats dflags
-
- todo1 = if stg_stats then [D_stg_stats] else []
-
- todo2 | WayProf `elem` ways dflags
- = StgDoMassageForProfiling : todo1
- | otherwise
- = todo1
diff --git a/compiler/simplStg/StgCse.hs b/compiler/simplStg/StgCse.hs
new file mode 100644
index 0000000000..74c196432f
--- /dev/null
+++ b/compiler/simplStg/StgCse.hs
@@ -0,0 +1,427 @@
+{-# LANGUAGE TypeFamilies #-}
+
+{-|
+Note [CSE for Stg]
+~~~~~~~~~~~~~~~~~~
+This module implements a simple common subexpression elimination pass for STG.
+This is useful because there are expressions that we want to common up (because
+they are operational equivalent), but that we cannot common up in Core, because
+their types differ.
+This was original reported as #9291.
+
+There are two types of common code occurrences that we aim for, see
+note [Case 1: CSEing allocated closures] and
+note [Case 2: CSEing case binders] below.
+
+
+Note [Case 1: CSEing allocated closures]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The fist kind of CSE opportunity we aim for is generated by this Haskell code:
+
+ bar :: a -> (Either Int a, Either Bool a)
+ bar x = (Right x, Right x)
+
+which produces this Core:
+
+ bar :: forall a. a -> (Either Int a, Either Bool a)
+ bar @a x = (Right @Int @a x, Right @Bool @a x)
+
+where the two components of the tuple are differnt terms, and cannot be
+commoned up (easily). On the STG level we have
+
+ bar [x] = let c1 = Right [x]
+ c2 = Right [x]
+ in (c1,c2)
+
+and now it is obvious that we can write
+
+ bar [x] = let c1 = Right [x]
+ in (c1,c1)
+
+instead.
+
+
+Note [Case 2: CSEing case binders]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The second kind of CSE opportunity we aim for is more interesting.
+The Haskell code
+
+ foo :: Either Int a -> Either Bool a
+ foo (Right x) = Right x
+ foo _ = Left False
+
+produces this Core
+
+ foo :: forall a. Either Int a -> Either Bool a
+ foo @a e = case e of b { Left n -> …
+ , Right x -> Right @Bool @a x }
+
+where we cannot CSE `Right @Bool @a x` with the case binder `b` as they have
+different types. But in STG we have
+
+ foo [e] = case e of b { Left [n] -> …
+ , Right [x] -> Right [x] }
+
+and nothing stops us from transforming that to
+
+ foo [e] = case e of b { Left [n] -> …
+ , Right [x] -> b}
+
+-}
+module StgCse (stgCse) where
+
+import DataCon
+import Id
+import StgSyn
+import Outputable
+import VarEnv
+import CoreSyn (AltCon(..))
+import Data.List (mapAccumL)
+import Data.Maybe (fromMaybe)
+import TrieMap
+import NameEnv
+import Control.Monad( (>=>) )
+
+--------------
+-- The Trie --
+--------------
+
+-- A lookup trie for data constructor appliations, i.e.
+-- keys of type `(DataCon, [StgArg])`, following the patterns in TrieMap.
+
+data StgArgMap a = SAM
+ { sam_var :: DVarEnv a
+ , sam_lit :: LiteralMap a
+ }
+
+instance TrieMap StgArgMap where
+ type Key StgArgMap = StgArg
+ emptyTM = SAM { sam_var = emptyTM
+ , sam_lit = emptyTM }
+ lookupTM (StgVarArg var) = sam_var >.> lkDFreeVar var
+ lookupTM (StgLitArg lit) = sam_lit >.> lookupTM lit
+ alterTM (StgVarArg var) f m = m { sam_var = sam_var m |> xtDFreeVar var f }
+ alterTM (StgLitArg lit) f m = m { sam_lit = sam_lit m |> alterTM lit f }
+ foldTM k m = foldTM k (sam_var m) . foldTM k (sam_lit m)
+ mapTM f (SAM {sam_var = varm, sam_lit = litm}) =
+ SAM { sam_var = mapTM f varm, sam_lit = mapTM f litm }
+
+newtype ConAppMap a = CAM { un_cam :: DNameEnv (ListMap StgArgMap a) }
+
+instance TrieMap ConAppMap where
+ type Key ConAppMap = (DataCon, [StgArg])
+ emptyTM = CAM emptyTM
+ lookupTM (dataCon, args) = un_cam >.> lkDNamed dataCon >=> lookupTM args
+ alterTM (dataCon, args) f m =
+ m { un_cam = un_cam m |> xtDNamed dataCon |>> alterTM args f }
+ foldTM k = un_cam >.> foldTM (foldTM k)
+ mapTM f = un_cam >.> mapTM (mapTM f) >.> CAM
+
+-----------------
+-- The CSE Env --
+-----------------
+
+-- | The CSE environment. See note [CseEnv Example]
+data CseEnv = CseEnv
+ { ce_conAppMap :: ConAppMap OutId
+ -- ^ The main component of the environment is the trie that maps
+ -- data constructor applications (with their `OutId` arguments)
+ -- to an in-scope name that can be used instead.
+ , ce_renaming :: IdEnv OutId
+ -- ^ CSE is simple to implement (and reason about) when there is no
+ -- shadowing. Unfortunately, we have to cope with shadowing
+ -- (see Note [Shadowing]). So we morally do a separate renaming pass
+ -- before CSE, and practically do both passes in one traversal of the tree.
+ -- It still causes less confusion to keep the renaming substitution
+ -- and the substitutions due to CSE separate.
+ , ce_subst :: IdEnv OutId
+ -- ^ This substitution contains CSE-specific entries. The domain are
+ -- OutIds, so ce_renaming has to be applied first.
+ -- It has an entry x ↦ y when a let-binding `let x = Con y` is
+ -- removed because `let y = Con z` is in scope.
+ --
+ -- Both substitutions are applied to data constructor arguments
+ -- before these are looked up in the conAppMap.
+ , ce_in_scope :: InScopeSet
+ -- ^ The third component is an in-scope set, to rename away any
+ -- shadowing binders
+ }
+
+{-|
+Note [CseEnv Example]
+~~~~~~~~~~~~~~~~~~~~~
+The following tables shows how the CseEnvironment changes as code is traversed,
+as well as the changes to that code.
+
+ InExpr OutExpr
+ conAppMap renaming subst in_scope
+ ──────────────────────────────────────────────────────────────────────
+ -- empty {} {} {}
+ case … as a of {Con x y -> case … as a of {Con x y ->
+ -- Con x y ↦ a {} {} {a,x,y}
+ let b = Con x y (removed)
+ -- Con x y ↦ a {} b↦a {a,x,y,b}
+ let c = Bar a let c = Bar a
+ -- Con x y ↦ a, Bar a ↦ c {} b↦a {a,x,y,b,c}
+ let c = some expression let c' = some expression
+ -- Con x y ↦ a, Bar a ↦ c c↦c' b↦a {a,x,y,b,c,c'}
+ let d = Bar b (removed)
+ -- Con x y ↦ a, Bar a ↦ c c↦c' b↦a, d↦c {a,x,y,b,c,c',d}
+ (a, b, c d) (a, a, c' c)
+-}
+
+initEnv :: InScopeSet -> CseEnv
+initEnv in_scope = CseEnv
+ { ce_conAppMap = emptyTM
+ , ce_renaming = emptyVarEnv
+ , ce_subst = emptyVarEnv
+ , ce_in_scope = in_scope
+ }
+
+envLookup :: DataCon -> [OutStgArg] -> CseEnv -> Maybe OutId
+envLookup dataCon args env = lookupTM (dataCon, args) (ce_conAppMap env)
+
+addDataCon :: OutId -> DataCon -> [OutStgArg] -> CseEnv -> CseEnv
+-- do not bother with nullary data constructors, they are static anyways
+addDataCon _ _ [] env = env
+addDataCon bndr dataCon args env = env { ce_conAppMap = new_env }
+ where
+ new_env = insertTM (dataCon, args) bndr (ce_conAppMap env)
+
+forgetCse :: CseEnv -> CseEnv
+forgetCse env = env { ce_conAppMap = emptyTM }
+ -- See note [Free variables of an StgClosure]
+
+addSubst :: OutId -> OutId -> CseEnv -> CseEnv
+addSubst from to env
+ = env { ce_subst = extendVarEnv (ce_subst env) from to }
+
+substArgs :: CseEnv -> [InStgArg] -> [OutStgArg]
+substArgs env = map (substArg env)
+
+substArg :: CseEnv -> InStgArg -> OutStgArg
+substArg env (StgVarArg from) = StgVarArg (substVar env from)
+substArg _ (StgLitArg lit) = StgLitArg lit
+
+substVars :: CseEnv -> [InId] -> [OutId]
+substVars env = map (substVar env)
+
+substVar :: CseEnv -> InId -> OutId
+substVar env id0 = id2
+ where
+ id1 = fromMaybe id0 $ lookupVarEnv (ce_renaming env) id0
+ id2 = fromMaybe id1 $ lookupVarEnv (ce_subst env) id1
+
+-- Functions to enter binders
+
+-- This is much simpler than the requivalent code in CoreSubst:
+-- * We do not substitute type variables, and
+-- * There is nothing relevant in IdInfo at this stage
+-- that needs substitutions.
+-- Therefore, no special treatment for a recursive group is required.
+
+substBndr :: CseEnv -> InId -> (CseEnv, OutId)
+substBndr env old_id
+ = (new_env, new_id)
+ where
+ new_id = uniqAway (ce_in_scope env) old_id
+ no_change = new_id == old_id
+ env' = env { ce_in_scope = ce_in_scope env `extendInScopeSet` new_id }
+ new_env | no_change = env' { ce_renaming = extendVarEnv (ce_subst env) old_id new_id }
+ | otherwise = env'
+
+substBndrs :: CseEnv -> [InVar] -> (CseEnv, [OutVar])
+substBndrs env bndrs = mapAccumL substBndr env bndrs
+
+substPairs :: CseEnv -> [(InVar, a)] -> (CseEnv, [(OutVar, a)])
+substPairs env bndrs = mapAccumL go env bndrs
+ where go env (id, x) = let (env', id') = substBndr env id
+ in (env', (id', x))
+
+-- Main entry point
+
+stgCse :: [InStgBinding] -> [OutStgBinding]
+stgCse binds = snd $ mapAccumL stgCseTopLvl emptyInScopeSet binds
+
+-- Top level bindings.
+--
+-- We do not CSE these, as top-level closures are allocated statically anyways.
+-- Also, they might be exported.
+-- But we still have to collect the set of in-scope variables, otherwise
+-- uniqAway might shadow a top-level closure.
+
+stgCseTopLvl :: InScopeSet -> InStgBinding -> (InScopeSet, OutStgBinding)
+stgCseTopLvl in_scope (StgNonRec bndr rhs)
+ = (in_scope'
+ , StgNonRec bndr (stgCseTopLvlRhs in_scope rhs))
+ where in_scope' = in_scope `extendInScopeSet` bndr
+
+stgCseTopLvl in_scope (StgRec eqs)
+ = ( in_scope'
+ , StgRec [ (bndr, stgCseTopLvlRhs in_scope' rhs) | (bndr, rhs) <- eqs ])
+ where in_scope' = in_scope `extendInScopeSetList` [ bndr | (bndr, _) <- eqs ]
+
+stgCseTopLvlRhs :: InScopeSet -> InStgRhs -> OutStgRhs
+stgCseTopLvlRhs in_scope (StgRhsClosure ccs info occs upd args body)
+ = let body' = stgCseExpr (initEnv in_scope) body
+ in StgRhsClosure ccs info occs upd args body'
+stgCseTopLvlRhs _ (StgRhsCon ccs dataCon args)
+ = StgRhsCon ccs dataCon args
+
+------------------------------
+-- The actual AST traversal --
+------------------------------
+
+-- Trivial cases
+stgCseExpr :: CseEnv -> InStgExpr -> OutStgExpr
+stgCseExpr env (StgApp fun args)
+ = StgApp fun' args'
+ where fun' = substVar env fun
+ args' = substArgs env args
+stgCseExpr _ (StgLit lit)
+ = StgLit lit
+stgCseExpr env (StgOpApp op args tys)
+ = StgOpApp op args' tys
+ where args' = substArgs env args
+stgCseExpr _ (StgLam _ _)
+ = pprPanic "stgCseExp" (text "StgLam")
+stgCseExpr env (StgTick tick body)
+ = let body' = stgCseExpr env body
+ in StgTick tick body'
+stgCseExpr env (StgCase scrut bndr ty alts)
+ = StgCase scrut' bndr' ty alts'
+ where
+ scrut' = stgCseExpr env scrut
+ (env1, bndr') = substBndr env bndr
+ cse_bndr | StgApp trivial_scrut [] <- scrut' = trivial_scrut
+ -- See Note [Trivial case scrutinee]
+ | otherwise = bndr'
+ alts' = map (stgCseAlt env1 cse_bndr) alts
+
+
+-- A constructor application.
+-- To be removed by a variable use when found in the CSE environment
+stgCseExpr env (StgConApp dataCon args tys)
+ | Just bndr' <- envLookup dataCon args' env
+ = StgApp bndr' []
+ | otherwise
+ = StgConApp dataCon args' tys
+ where args' = substArgs env args
+
+-- Let bindings
+-- The binding might be removed due to CSE (we do not want trivial bindings on
+-- the STG level), so use the smart constructor `mkStgLet` to remove the binding
+-- if empty.
+stgCseExpr env (StgLet binds body)
+ = let (binds', env') = stgCseBind env binds
+ body' = stgCseExpr env' body
+ in mkStgLet StgLet binds' body'
+stgCseExpr env (StgLetNoEscape binds body)
+ = let (binds', env') = stgCseBind env binds
+ body' = stgCseExpr env' body
+ in mkStgLet StgLetNoEscape binds' body'
+
+-- Case alternatives
+-- Extend the CSE environment
+stgCseAlt :: CseEnv -> OutId -> InStgAlt -> OutStgAlt
+stgCseAlt env case_bndr (DataAlt dataCon, args, rhs)
+ = let (env1, args') = substBndrs env args
+ env2 = addDataCon case_bndr dataCon (map StgVarArg args') env1
+ -- see note [Case 2: CSEing case binders]
+ rhs' = stgCseExpr env2 rhs
+ in (DataAlt dataCon, args', rhs')
+stgCseAlt env _ (altCon, args, rhs)
+ = let (env1, args') = substBndrs env args
+ rhs' = stgCseExpr env1 rhs
+ in (altCon, args', rhs')
+
+-- Bindings
+stgCseBind :: CseEnv -> InStgBinding -> (Maybe OutStgBinding, CseEnv)
+stgCseBind env (StgNonRec b e)
+ = let (env1, b') = substBndr env b
+ in case stgCseRhs env1 b' e of
+ (Nothing, env2) -> (Nothing, env2)
+ (Just (b2,e'), env2) -> (Just (StgNonRec b2 e'), env2)
+stgCseBind env (StgRec pairs)
+ = let (env1, pairs1) = substPairs env pairs
+ in case stgCsePairs env1 pairs1 of
+ ([], env2) -> (Nothing, env2)
+ (pairs2, env2) -> (Just (StgRec pairs2), env2)
+
+stgCsePairs :: CseEnv -> [(OutId, InStgRhs)] -> ([(OutId, OutStgRhs)], CseEnv)
+stgCsePairs env [] = ([], env)
+stgCsePairs env0 ((b,e):pairs)
+ = let (pairMB, env1) = stgCseRhs env0 b e
+ (pairs', env2) = stgCsePairs env1 pairs
+ in (pairMB `mbCons` pairs', env2)
+ where
+ mbCons = maybe id (:)
+
+-- The RHS of a binding.
+-- If it is an constructor application, either short-cut it or extend the environment
+stgCseRhs :: CseEnv -> OutId -> InStgRhs -> (Maybe (OutId, OutStgRhs), CseEnv)
+stgCseRhs env bndr (StgRhsCon ccs dataCon args)
+ | Just other_bndr <- envLookup dataCon args' env
+ = let env' = addSubst bndr other_bndr env
+ in (Nothing, env')
+ | otherwise
+ = let env' = addDataCon bndr dataCon args' env
+ -- see note [Case 1: CSEing allocated closures]
+ pair = (bndr, StgRhsCon ccs dataCon args')
+ in (Just pair, env')
+ where args' = substArgs env args
+stgCseRhs env bndr (StgRhsClosure ccs info occs upd args body)
+ = let (env1, args') = substBndrs env args
+ env2 = forgetCse env1 -- See note [Free variables of an StgClosure]
+ body' = stgCseExpr env2 body
+ in (Just (substVar env bndr, StgRhsClosure ccs info occs' upd args' body'), env)
+ where occs' = substVars env occs
+
+-- Utilities
+
+-- | This function short-cuts let-bindings that are now obsolete
+mkStgLet :: (a -> b -> b) -> Maybe a -> b -> b
+mkStgLet _ Nothing body = body
+mkStgLet stgLet (Just binds) body = stgLet binds body
+
+
+{-
+Note [Trivial case scrutinee]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we find
+
+ case x as b of { Con a -> … }
+
+we really want to replace uses of Con a in the body with x, and not just b, in
+order to handle nested reconstruction of constructors as in
+
+ nested :: Either Int (Either Int a) -> Either Bool (Either Bool a)
+ nested (Right (Right x)) = Right (Right x)
+ nested _ = Left True
+
+Therefore, we add
+ Con a ↦ x
+to the ConAppMap respectively.
+Compare Note [CSE for case expressions] in CSE.hs, which does the same for Core CSE.
+
+If we find
+ case foo x as b of { Con a -> … }
+we use
+ Con a ↦ b
+
+Note [Free variables of an StgClosure]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+StgClosures (function and thunks) have an explicit list of free variables:
+
+foo [x] =
+ let not_a_free_var = Left [x]
+ let a_free_var = Right [x]
+ let closure = \[x a_free_var] -> \[y] -> bar y (Left [x]) a_free_var
+ in closure
+
+If we were to CSE `Left [x]` in the body of `closure` with `not_a_free_var`,
+then the list of free variables would be wrong, so for now, we do not CSE
+across such a closure, simply because I (Joachim) was not sure about possible
+knock-on effects. If deemed safe and worth the slight code complication of
+re-calculating this list during or after this pass, this can surely be done.
+-}
diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs
index fc30859980..e8ba200d0a 100644
--- a/compiler/simplStg/UnariseStg.hs
+++ b/compiler/simplStg/UnariseStg.hs
@@ -264,11 +264,6 @@ extendRho rho x (UnaryVal val)
--------------------------------------------------------------------------------
-type OutStgExpr = StgExpr
-type InStgAlt = StgAlt
-type InStgArg = StgArg
-type OutStgArg = StgArg
-
unarise :: UniqSupply -> [StgBinding] -> [StgBinding]
unarise us binds = initUs_ us (mapM (unariseBinding emptyVarEnv) binds)
diff --git a/compiler/stgSyn/StgSyn.hs b/compiler/stgSyn/StgSyn.hs
index 3ec37eefff..64c8448421 100644
--- a/compiler/stgSyn/StgSyn.hs
+++ b/compiler/stgSyn/StgSyn.hs
@@ -24,8 +24,11 @@ module StgSyn (
combineStgBinderInfo,
-- a set of synonyms for the most common (only :-) parameterisation
- StgArg,
- StgBinding, StgExpr, StgRhs, StgAlt,
+ StgArg, StgBinding, StgExpr, StgRhs, StgAlt,
+
+ -- a set of synonyms to distinguish in- and out variants
+ InStgArg, InStgBinding, InStgExpr, InStgRhs, InStgAlt,
+ OutStgArg, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt,
-- StgOp
StgOp(..),
@@ -551,7 +554,24 @@ type StgExpr = GenStgExpr Id Id
type StgRhs = GenStgRhs Id Id
type StgAlt = GenStgAlt Id Id
+{- Many passes apply a substitution, and it's very handy to have type
+ synonyms to remind us whether or not the subsitution has been applied.
+ See CoreSyn for precedence in Core land
+-}
+
+type InStgBinding = StgBinding
+type InStgArg = StgArg
+type InStgExpr = StgExpr
+type InStgRhs = StgRhs
+type InStgAlt = StgAlt
+type OutStgBinding = StgBinding
+type OutStgArg = StgArg
+type OutStgExpr = StgExpr
+type OutStgRhs = StgRhs
+type OutStgAlt = StgAlt
+
{-
+
************************************************************************
* *
\subsubsection[UpdateFlag-datatype]{@UpdateFlag@}
diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst
index 3e660c19e9..1cad51bb1b 100644
--- a/docs/users_guide/using-optimisation.rst
+++ b/docs/users_guide/using-optimisation.rst
@@ -186,6 +186,14 @@ list.
optimisation. Switching this off can be useful if you have some
``unsafePerformIO`` expressions that you don't want commoned-up.
+.. ghc-flag:: -fstg-cse
+
+ :default: on
+
+ Enables the common-sub-expression elimination optimisation on the STG
+ intermediate language, where it is able to common up some subexpressions
+ that differ in their types, but not their represetation.
+
.. ghc-flag:: -fdicts-cheap
A very experimental flag that makes dictionary-valued expressions
diff --git a/testsuite/tests/simplStg/Makefile b/testsuite/tests/simplStg/Makefile
new file mode 100644
index 0000000000..9a36a1c5fe
--- /dev/null
+++ b/testsuite/tests/simplStg/Makefile
@@ -0,0 +1,3 @@
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
diff --git a/testsuite/tests/simplStg/should_run/Makefile b/testsuite/tests/simplStg/should_run/Makefile
new file mode 100644
index 0000000000..9101fbd40a
--- /dev/null
+++ b/testsuite/tests/simplStg/should_run/Makefile
@@ -0,0 +1,3 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
diff --git a/testsuite/tests/simplStg/should_run/T9291.hs b/testsuite/tests/simplStg/should_run/T9291.hs
new file mode 100644
index 0000000000..db2ce75da2
--- /dev/null
+++ b/testsuite/tests/simplStg/should_run/T9291.hs
@@ -0,0 +1,58 @@
+{-# LANGUAGE MagicHash #-}
+import GHC.Exts
+import Unsafe.Coerce
+
+foo :: Either Int a -> Either Bool a
+foo (Right x) = Right x
+foo _ = Left True
+{-# NOINLINE foo #-}
+
+bar :: a -> (Either Int a, Either Bool a)
+bar x = (Right x, Right x)
+{-# NOINLINE bar #-}
+
+nested :: Either Int (Either Int a) -> Either Bool (Either Bool a)
+nested (Right (Right x)) = Right (Right x)
+nested _ = Left True
+{-# NOINLINE nested #-}
+
+
+-- CSE in a recursive group
+data Tree x = T x (Either Int (Tree x)) (Either Bool (Tree x))
+rec1 :: x -> Tree x
+rec1 x =
+ let t = T x r1 r2
+ r1 = Right t
+ r2 = Right t
+ in t
+{-# NOINLINE rec1 #-}
+
+-- Not yet supported! (and tricky)
+data Stream a b x = S x (Stream b a x)
+rec2 :: x -> Stream a b x
+rec2 x =
+ let s1 = S x s2
+ s2 = S x s1
+ in s1
+{-# NOINLINE rec2 #-}
+
+test x = do
+ let (r1,r2) = bar x
+ (same $! r1) $! r2
+ let r3 = foo r1
+ (same $! r1) $! r3
+ let (r4,_) = bar r1
+ let r5 = nested r4
+ (same $! r4) $! r5
+ let (T _ r6 r7) = rec1 x
+ (same $! r6) $! r7
+ let s1@(S _ s2) = rec2 x
+ (same $! s1) $! s2
+{-# NOINLINE test #-}
+
+main = test "foo"
+
+same :: a -> b -> IO ()
+same x y = case reallyUnsafePtrEquality# (unsafeCoerce x) y of
+ 1# -> putStrLn "yes"
+ _ -> putStrLn "no"
diff --git a/testsuite/tests/simplStg/should_run/T9291.stdout b/testsuite/tests/simplStg/should_run/T9291.stdout
new file mode 100644
index 0000000000..aa14978324
--- /dev/null
+++ b/testsuite/tests/simplStg/should_run/T9291.stdout
@@ -0,0 +1,5 @@
+yes
+yes
+yes
+yes
+no
diff --git a/testsuite/tests/simplStg/should_run/all.T b/testsuite/tests/simplStg/should_run/all.T
new file mode 100644
index 0000000000..3d4f4a3763
--- /dev/null
+++ b/testsuite/tests/simplStg/should_run/all.T
@@ -0,0 +1,12 @@
+# Args to compile_and_run are:
+# extra compile flags
+# extra run flags
+# expected process return value, if not zero
+
+# Only compile with optimisation
+def f( name, opts ):
+ opts.only_ways = ['optasm']
+
+setTestOpts(f)
+
+test('T9291', normal, compile_and_run, [''])