summaryrefslogtreecommitdiff
path: root/compiler/simplCore
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/simplCore')
-rw-r--r--compiler/simplCore/CSE.lhs290
-rw-r--r--compiler/simplCore/FloatIn.lhs464
-rw-r--r--compiler/simplCore/FloatOut.lhs443
-rw-r--r--compiler/simplCore/LiberateCase.lhs317
-rw-r--r--compiler/simplCore/OccurAnal.lhs823
-rw-r--r--compiler/simplCore/SAT.lhs214
-rw-r--r--compiler/simplCore/SATMonad.lhs263
-rw-r--r--compiler/simplCore/SetLevels.lhs847
-rw-r--r--compiler/simplCore/SimplCore.lhs674
-rw-r--r--compiler/simplCore/SimplEnv.lhs741
-rw-r--r--compiler/simplCore/SimplMonad.lhs526
-rw-r--r--compiler/simplCore/SimplUtils.lhs1592
-rw-r--r--compiler/simplCore/Simplify.lhs1894
-rw-r--r--compiler/simplCore/simplifier.tib771
14 files changed, 9859 insertions, 0 deletions
diff --git a/compiler/simplCore/CSE.lhs b/compiler/simplCore/CSE.lhs
new file mode 100644
index 0000000000..2e8489a295
--- /dev/null
+++ b/compiler/simplCore/CSE.lhs
@@ -0,0 +1,290 @@
+%
+% (c) The AQUA Project, Glasgow University, 1993-1998
+%
+\section{Common subexpression}
+
+\begin{code}
+module CSE (
+ cseProgram
+ ) where
+
+#include "HsVersions.h"
+
+import DynFlags ( DynFlag(..), DynFlags )
+import Id ( Id, idType, idWorkerInfo )
+import IdInfo ( workerExists )
+import CoreUtils ( hashExpr, cheapEqExpr, exprIsBig, mkAltExpr, exprIsCheap )
+import DataCon ( isUnboxedTupleCon )
+import Type ( tyConAppArgs )
+import CoreSyn
+import VarEnv
+import CoreLint ( showPass, endPass )
+import Outputable
+import Util ( mapAccumL, lengthExceeds )
+import UniqFM
+\end{code}
+
+
+ Simple common sub-expression
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we see
+ x1 = C a b
+ x2 = C x1 b
+we build up a reverse mapping: C a b -> x1
+ C x1 b -> x2
+and apply that to the rest of the program.
+
+When we then see
+ y1 = C a b
+ y2 = C y1 b
+we replace the C a b with x1. But then we *dont* want to
+add x1 -> y1 to the mapping. Rather, we want the reverse, y1 -> x1
+so that a subsequent binding
+ y2 = C y1 b
+will get transformed to C x1 b, and then to x2.
+
+So we carry an extra var->var substitution which we apply *before* looking up in the
+reverse mapping.
+
+
+[Note: SHADOWING]
+~~~~~~~~~~~~~~~~~
+We have to be careful about shadowing.
+For example, consider
+ f = \x -> let y = x+x in
+ h = \x -> x+x
+ in ...
+
+Here we must *not* do CSE on the inner x+x! The simplifier used to guarantee no
+shadowing, but it doesn't any more (it proved too hard), so we clone as we go.
+We can simply add clones to the substitution already described.
+
+However, we do NOT clone type variables. It's just too hard, because then we need
+to run the substitution over types and IdInfo. No no no. Instead, we just throw
+
+(In fact, I think the simplifier does guarantee no-shadowing for type variables.)
+
+
+[Note: case binders 1]
+~~~~~~~~~~~~~~~~~~~~~~
+Consider
+
+ f = \x -> case x of wild {
+ (a:as) -> case a of wild1 {
+ (p,q) -> ...(wild1:as)...
+
+Here, (wild1:as) is morally the same as (a:as) and hence equal to wild.
+But that's not quite obvious. In general we want to keep it as (wild1:as),
+but for CSE purpose that's a bad idea.
+
+So we add the binding (wild1 -> a) to the extra var->var mapping.
+Notice this is exactly backwards to what the simplifier does, which is
+to try to replaces uses of a with uses of wild1
+
+[Note: case binders 2]
+~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ case (h x) of y -> ...(h x)...
+
+We'd like to replace (h x) in the alternative, by y. But because of
+the preceding [Note: case binders 1], we only want to add the mapping
+ scrutinee -> case binder
+to the reverse CSE mapping if the scrutinee is a non-trivial expression.
+(If the scrutinee is a simple variable we want to add the mapping
+ case binder -> scrutinee
+to the substitution
+
+[Note: unboxed tuple case binders]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ case f x of t { (# a,b #) ->
+ case ... of
+ True -> f x
+ False -> 0 }
+
+We must not replace (f x) by t, because t is an unboxed-tuple binder.
+Instead, we shoudl replace (f x) by (# a,b #). That is, the "reverse mapping" is
+ f x --> (# a,b #)
+That is why the CSEMap has pairs of expressions.
+
+
+%************************************************************************
+%* *
+\section{Common subexpression}
+%* *
+%************************************************************************
+
+\begin{code}
+cseProgram :: DynFlags -> [CoreBind] -> IO [CoreBind]
+
+cseProgram dflags binds
+ = do {
+ showPass dflags "Common sub-expression";
+ let { binds' = cseBinds emptyCSEnv binds };
+ endPass dflags "Common sub-expression" Opt_D_dump_cse binds'
+ }
+
+cseBinds :: CSEnv -> [CoreBind] -> [CoreBind]
+cseBinds env [] = []
+cseBinds env (b:bs) = (b':bs')
+ where
+ (env1, b') = cseBind env b
+ bs' = cseBinds env1 bs
+
+cseBind :: CSEnv -> CoreBind -> (CSEnv, CoreBind)
+cseBind env (NonRec b e) = let (env', (b',e')) = do_one env (b, e)
+ in (env', NonRec b' e')
+cseBind env (Rec pairs) = let (env', pairs') = mapAccumL do_one env pairs
+ in (env', Rec pairs')
+
+
+do_one env (id, rhs)
+ = case lookupCSEnv env rhs' of
+ Just (Var other_id) -> (extendSubst env' id other_id, (id', Var other_id))
+ Just other_expr -> (env', (id', other_expr))
+ Nothing -> (addCSEnvItem env' rhs' (Var id'), (id', rhs'))
+ where
+ (env', id') = addBinder env id
+ rhs' | not (workerExists (idWorkerInfo id)) = cseExpr env' rhs
+
+ -- Hack alert: don't do CSE on wrapper RHSs.
+ -- Otherwise we find:
+ -- $wf = h
+ -- f = \x -> ...$wf...
+ -- ===>
+ -- f = \x -> ...h...
+ -- But the WorkerInfo for f still says $wf, which is now dead!
+ | otherwise = rhs
+
+
+tryForCSE :: CSEnv -> CoreExpr -> CoreExpr
+tryForCSE env (Type t) = Type t
+tryForCSE env expr = case lookupCSEnv env expr' of
+ Just smaller_expr -> smaller_expr
+ Nothing -> expr'
+ where
+ expr' = cseExpr env expr
+
+cseExpr :: CSEnv -> CoreExpr -> CoreExpr
+cseExpr env (Type t) = Type t
+cseExpr env (Lit lit) = Lit lit
+cseExpr env (Var v) = Var (lookupSubst env v)
+cseExpr env (App f a) = App (cseExpr env f) (tryForCSE env a)
+cseExpr env (Note n e) = Note n (cseExpr env e)
+cseExpr env (Lam b e) = let (env', b') = addBinder env b
+ in Lam b' (cseExpr env' e)
+cseExpr env (Let bind e) = let (env', bind') = cseBind env bind
+ in Let bind' (cseExpr env' e)
+cseExpr env (Case scrut bndr ty alts) = Case scrut' bndr' ty (cseAlts env' scrut' bndr bndr' alts)
+ where
+ scrut' = tryForCSE env scrut
+ (env', bndr') = addBinder env bndr
+
+
+cseAlts env scrut' bndr bndr' [(DataAlt con, args, rhs)]
+ | isUnboxedTupleCon con
+ -- Unboxed tuples are special because the case binder isn't
+ -- a real values. See [Note: unboxed tuple case binders]
+ = [(DataAlt con, args', tryForCSE new_env rhs)]
+ where
+ (env', args') = addBinders env args
+ new_env | exprIsCheap scrut' = env'
+ | otherwise = extendCSEnv env' scrut' tup_value
+ tup_value = mkAltExpr (DataAlt con) args' (tyConAppArgs (idType bndr))
+
+cseAlts env scrut' bndr bndr' alts
+ = map cse_alt alts
+ where
+ (con_target, alt_env)
+ = case scrut' of
+ Var v' -> (v', extendSubst env bndr v') -- See [Note: case binder 1]
+ -- map: bndr -> v'
+
+ other -> (bndr', extendCSEnv env scrut' (Var bndr')) -- See [Note: case binder 2]
+ -- map: scrut' -> bndr'
+
+ arg_tys = tyConAppArgs (idType bndr)
+
+ cse_alt (DataAlt con, args, rhs)
+ | not (null args)
+ -- Don't try CSE if there are no args; it just increases the number
+ -- of live vars. E.g.
+ -- case x of { True -> ....True.... }
+ -- Don't replace True by x!
+ -- Hence the 'null args', which also deal with literals and DEFAULT
+ = (DataAlt con, args', tryForCSE new_env rhs)
+ where
+ (env', args') = addBinders alt_env args
+ new_env = extendCSEnv env' (mkAltExpr (DataAlt con) args' arg_tys)
+ (Var con_target)
+
+ cse_alt (con, args, rhs)
+ = (con, args', tryForCSE env' rhs)
+ where
+ (env', args') = addBinders alt_env args
+\end{code}
+
+
+%************************************************************************
+%* *
+\section{The CSE envt}
+%* *
+%************************************************************************
+
+\begin{code}
+data CSEnv = CS CSEMap InScopeSet (IdEnv Id)
+ -- Simple substitution
+
+type CSEMap = UniqFM [(CoreExpr, CoreExpr)] -- This is the reverse mapping
+ -- It maps the hash-code of an expression e to list of (e,e') pairs
+ -- This means that it's good to replace e by e'
+ -- INVARIANT: The expr in the range has already been CSE'd
+
+emptyCSEnv = CS emptyUFM emptyInScopeSet emptyVarEnv
+
+lookupCSEnv :: CSEnv -> CoreExpr -> Maybe CoreExpr
+lookupCSEnv (CS cs _ _) expr
+ = case lookupUFM cs (hashExpr expr) of
+ Nothing -> Nothing
+ Just pairs -> lookup_list pairs expr
+
+lookup_list :: [(CoreExpr,CoreExpr)] -> CoreExpr -> Maybe CoreExpr
+lookup_list [] expr = Nothing
+lookup_list ((e,e'):es) expr | cheapEqExpr e expr = Just e'
+ | otherwise = lookup_list es expr
+
+addCSEnvItem env expr expr' | exprIsBig expr = env
+ | otherwise = extendCSEnv env expr expr'
+ -- We don't try to CSE big expressions, because they are expensive to compare
+ -- (and are unlikely to be the same anyway)
+
+extendCSEnv (CS cs in_scope sub) expr expr'
+ = CS (addToUFM_C combine cs hash [(expr, expr')]) in_scope sub
+ where
+ hash = hashExpr expr
+ combine old new = WARN( result `lengthExceeds` 4, text "extendCSEnv: long list:" <+> ppr result )
+ result
+ where
+ result = new ++ old
+
+lookupSubst (CS _ _ sub) x = case lookupVarEnv sub x of
+ Just y -> y
+ Nothing -> x
+
+extendSubst (CS cs in_scope sub) x y = CS cs in_scope (extendVarEnv sub x y)
+
+addBinder :: CSEnv -> Id -> (CSEnv, Id)
+addBinder env@(CS cs in_scope sub) v
+ | not (v `elemInScopeSet` in_scope) = (CS cs (extendInScopeSet in_scope v) sub, v)
+ | isId v = (CS cs (extendInScopeSet in_scope v') (extendVarEnv sub v v'), v')
+ | not (isId v) = WARN( True, ppr v )
+ (CS emptyUFM in_scope sub, v)
+ -- This last case is the unusual situation where we have shadowing of
+ -- a type variable; we have to discard the CSE mapping
+ -- See "IMPORTANT NOTE" at the top
+ where
+ v' = uniqAway in_scope v
+
+addBinders :: CSEnv -> [Id] -> (CSEnv, [Id])
+addBinders env vs = mapAccumL addBinder env vs
+\end{code}
diff --git a/compiler/simplCore/FloatIn.lhs b/compiler/simplCore/FloatIn.lhs
new file mode 100644
index 0000000000..0e8edb5930
--- /dev/null
+++ b/compiler/simplCore/FloatIn.lhs
@@ -0,0 +1,464 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+%************************************************************************
+%* *
+\section[FloatIn]{Floating Inwards pass}
+%* *
+%************************************************************************
+
+The main purpose of @floatInwards@ is floating into branches of a
+case, so that we don't allocate things, save them on the stack, and
+then discover that they aren't needed in the chosen branch.
+
+\begin{code}
+module FloatIn ( floatInwards ) where
+
+#include "HsVersions.h"
+
+import DynFlags ( DynFlags, DynFlag(..) )
+import CoreSyn
+import CoreUtils ( exprIsHNF, exprIsDupable )
+import CoreLint ( showPass, endPass )
+import CoreFVs ( CoreExprWithFVs, freeVars, freeVarsOf )
+import Id ( isOneShotBndr )
+import Var ( Id, idType )
+import Type ( isUnLiftedType )
+import VarSet
+import Util ( zipEqual, zipWithEqual, count )
+import Outputable
+\end{code}
+
+Top-level interface function, @floatInwards@. Note that we do not
+actually float any bindings downwards from the top-level.
+
+\begin{code}
+floatInwards :: DynFlags -> [CoreBind] -> IO [CoreBind]
+
+floatInwards dflags binds
+ = do {
+ showPass dflags "Float inwards";
+ let { binds' = map fi_top_bind binds };
+ endPass dflags "Float inwards" Opt_D_verbose_core2core binds'
+ {- no specific flag for dumping float-in -}
+ }
+
+ where
+ fi_top_bind (NonRec binder rhs)
+ = NonRec binder (fiExpr [] (freeVars rhs))
+ fi_top_bind (Rec pairs)
+ = Rec [ (b, fiExpr [] (freeVars rhs)) | (b, rhs) <- pairs ]
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Mail from Andr\'e [edited]}
+%* *
+%************************************************************************
+
+{\em Will wrote: What??? I thought the idea was to float as far
+inwards as possible, no matter what. This is dropping all bindings
+every time it sees a lambda of any kind. Help! }
+
+You are assuming we DO DO full laziness AFTER floating inwards! We
+have to [not float inside lambdas] if we don't.
+
+If we indeed do full laziness after the floating inwards (we could
+check the compilation flags for that) then I agree we could be more
+aggressive and do float inwards past lambdas.
+
+Actually we are not doing a proper full laziness (see below), which
+was another reason for not floating inwards past a lambda.
+
+This can easily be fixed. The problem is that we float lets outwards,
+but there are a few expressions which are not let bound, like case
+scrutinees and case alternatives. After floating inwards the
+simplifier could decide to inline the let and the laziness would be
+lost, e.g.
+
+\begin{verbatim}
+let a = expensive ==> \b -> case expensive of ...
+in \ b -> case a of ...
+\end{verbatim}
+The fix is
+\begin{enumerate}
+\item
+to let bind the algebraic case scrutinees (done, I think) and
+the case alternatives (except the ones with an
+unboxed type)(not done, I think). This is best done in the
+SetLevels.lhs module, which tags things with their level numbers.
+\item
+do the full laziness pass (floating lets outwards).
+\item
+simplify. The simplifier inlines the (trivial) lets that were
+ created but were not floated outwards.
+\end{enumerate}
+
+With the fix I think Will's suggestion that we can gain even more from
+strictness by floating inwards past lambdas makes sense.
+
+We still gain even without going past lambdas, as things may be
+strict in the (new) context of a branch (where it was floated to) or
+of a let rhs, e.g.
+\begin{verbatim}
+let a = something case x of
+in case x of alt1 -> case something of a -> a + a
+ alt1 -> a + a ==> alt2 -> b
+ alt2 -> b
+
+let a = something let b = case something of a -> a + a
+in let b = a + a ==> in (b,b)
+in (b,b)
+\end{verbatim}
+Also, even if a is not found to be strict in the new context and is
+still left as a let, if the branch is not taken (or b is not entered)
+the closure for a is not built.
+
+%************************************************************************
+%* *
+\subsection{Main floating-inwards code}
+%* *
+%************************************************************************
+
+\begin{code}
+type FreeVarsSet = IdSet
+
+type FloatingBinds = [(CoreBind, FreeVarsSet)]
+ -- In reverse dependency order (innermost bindiner first)
+
+ -- The FreeVarsSet is the free variables of the binding. In the case
+ -- of recursive bindings, the set doesn't include the bound
+ -- variables.
+
+fiExpr :: FloatingBinds -- Binds we're trying to drop
+ -- as far "inwards" as possible
+ -> CoreExprWithFVs -- Input expr
+ -> CoreExpr -- Result
+
+fiExpr to_drop (_, AnnVar v) = mkCoLets' to_drop (Var v)
+
+fiExpr to_drop (_, AnnType ty) = ASSERT( null to_drop )
+ Type ty
+
+fiExpr to_drop (_, AnnLit lit) = Lit lit
+\end{code}
+
+Applications: we do float inside applications, mainly because we
+need to get at all the arguments. The next simplifier run will
+pull out any silly ones.
+
+\begin{code}
+fiExpr to_drop (_,AnnApp fun arg)
+ = mkCoLets' drop_here (App (fiExpr fun_drop fun) (fiExpr arg_drop arg))
+ where
+ [drop_here, fun_drop, arg_drop] = sepBindsByDropPoint False [freeVarsOf fun, freeVarsOf arg] to_drop
+\end{code}
+
+We are careful about lambdas:
+
+* We must be careful about floating inside inside a value lambda.
+ That risks losing laziness.
+ The float-out pass might rescue us, but then again it might not.
+
+* We must be careful about type lambdas too. At one time we did, and
+ there is no risk of duplicating work thereby, but we do need to be
+ careful. In particular, here is a bad case (it happened in the
+ cichelli benchmark:
+ let v = ...
+ in let f = /\t -> \a -> ...
+ ==>
+ let f = /\t -> let v = ... in \a -> ...
+ This is bad as now f is an updatable closure (update PAP)
+ and has arity 0.
+
+So we treat lambda in groups, using the following rule:
+
+ Float inside a group of lambdas only if
+ they are all either type lambdas or one-shot lambdas.
+
+ Otherwise drop all the bindings outside the group.
+
+\begin{code}
+ -- Hack alert! We only float in through one-shot lambdas,
+ -- not (as you might guess) through big lambdas.
+ -- Reason: we float *out* past big lambdas (see the test in the Lam
+ -- case of FloatOut.floatExpr) and we don't want to float straight
+ -- back in again.
+ --
+ -- It *is* important to float into one-shot lambdas, however;
+ -- see the remarks with noFloatIntoRhs.
+fiExpr to_drop lam@(_, AnnLam _ _)
+ | all is_one_shot bndrs -- Float in
+ = mkLams bndrs (fiExpr to_drop body)
+
+ | otherwise -- Dump it all here
+ = mkCoLets' to_drop (mkLams bndrs (fiExpr [] body))
+
+ where
+ (bndrs, body) = collectAnnBndrs lam
+\end{code}
+
+We don't float lets inwards past an SCC.
+ ToDo: keep info on current cc, and when passing
+ one, if it is not the same, annotate all lets in binds with current
+ cc, change current cc to the new one and float binds into expr.
+
+\begin{code}
+fiExpr to_drop (_, AnnNote note@(SCC cc) expr)
+ = -- Wimp out for now
+ mkCoLets' to_drop (Note note (fiExpr [] expr))
+
+fiExpr to_drop (_, AnnNote InlineCall expr)
+ = -- Wimp out for InlineCall; keep it close
+ -- the the call it annotates
+ mkCoLets' to_drop (Note InlineCall (fiExpr [] expr))
+
+fiExpr to_drop (_, AnnNote InlineMe expr)
+ = -- Ditto... don't float anything into an INLINE expression
+ mkCoLets' to_drop (Note InlineMe (fiExpr [] expr))
+
+fiExpr to_drop (_, AnnNote note@(Coerce _ _) expr)
+ = -- Just float in past coercion
+ Note note (fiExpr to_drop expr)
+
+fiExpr to_drop (_, AnnNote note@(CoreNote _) expr)
+ = Note note (fiExpr to_drop expr)
+\end{code}
+
+For @Lets@, the possible ``drop points'' for the \tr{to_drop}
+bindings are: (a)~in the body, (b1)~in the RHS of a NonRec binding,
+or~(b2), in each of the RHSs of the pairs of a @Rec@.
+
+Note that we do {\em weird things} with this let's binding. Consider:
+\begin{verbatim}
+let
+ w = ...
+in {
+ let v = ... w ...
+ in ... v .. w ...
+}
+\end{verbatim}
+Look at the inner \tr{let}. As \tr{w} is used in both the bind and
+body of the inner let, we could panic and leave \tr{w}'s binding where
+it is. But \tr{v} is floatable further into the body of the inner let, and
+{\em then} \tr{w} will also be only in the body of that inner let.
+
+So: rather than drop \tr{w}'s binding here, we add it onto the list of
+things to drop in the outer let's body, and let nature take its
+course.
+
+\begin{code}
+fiExpr to_drop (_,AnnLet (AnnNonRec id rhs@(rhs_fvs, ann_rhs)) body)
+ = fiExpr new_to_drop body
+ where
+ body_fvs = freeVarsOf body
+
+ final_body_fvs | noFloatIntoRhs ann_rhs
+ || isUnLiftedType (idType id) = body_fvs `unionVarSet` rhs_fvs
+ | otherwise = body_fvs
+ -- See commments with letrec below
+ -- No point in floating in only to float straight out again
+ -- Ditto ok-for-speculation unlifted RHSs
+
+ [shared_binds, rhs_binds, body_binds] = sepBindsByDropPoint False [rhs_fvs, final_body_fvs] to_drop
+
+ new_to_drop = body_binds ++ -- the bindings used only in the body
+ [(NonRec id rhs', rhs_fvs')] ++ -- the new binding itself
+ shared_binds -- the bindings used both in rhs and body
+
+ -- Push rhs_binds into the right hand side of the binding
+ rhs' = fiExpr rhs_binds rhs
+ rhs_fvs' = rhs_fvs `unionVarSet` floatedBindsFVs rhs_binds
+
+fiExpr to_drop (_,AnnLet (AnnRec bindings) body)
+ = fiExpr new_to_drop body
+ where
+ rhss = map snd bindings
+
+ rhss_fvs = map freeVarsOf rhss
+ body_fvs = freeVarsOf body
+
+ -- Add to body_fvs the free vars of any RHS that has
+ -- a lambda at the top. This has the effect of making it seem
+ -- that such things are used in the body as well, and hence prevents
+ -- them getting floated in. The big idea is to avoid turning:
+ -- let x# = y# +# 1#
+ -- in
+ -- letrec f = \z. ...x#...f...
+ -- in ...
+ -- into
+ -- letrec f = let x# = y# +# 1# in \z. ...x#...f... in ...
+ --
+ -- Because now we can't float the let out again, because a letrec
+ -- can't have unboxed bindings.
+
+ final_body_fvs = foldr (unionVarSet . get_extras) body_fvs rhss
+ get_extras (rhs_fvs, rhs) | noFloatIntoRhs rhs = rhs_fvs
+ | otherwise = emptyVarSet
+
+ (shared_binds:body_binds:rhss_binds) = sepBindsByDropPoint False (final_body_fvs:rhss_fvs) to_drop
+
+ new_to_drop = -- the bindings used only in the body
+ body_binds ++
+ -- the new binding itself
+ [(Rec (fi_bind rhss_binds bindings), rhs_fvs')] ++
+ -- the bindings used both in rhs and body or in more than one rhs
+ shared_binds
+
+ rhs_fvs' = unionVarSet (unionVarSets rhss_fvs)
+ (unionVarSets (map floatedBindsFVs rhss_binds))
+
+ -- Push rhs_binds into the right hand side of the binding
+ fi_bind :: [FloatingBinds] -- one per "drop pt" conjured w/ fvs_of_rhss
+ -> [(Id, CoreExprWithFVs)]
+ -> [(Id, CoreExpr)]
+
+ fi_bind to_drops pairs
+ = [ (binder, fiExpr to_drop rhs)
+ | ((binder, rhs), to_drop) <- zipEqual "fi_bind" pairs to_drops ]
+\end{code}
+
+For @Case@, the possible ``drop points'' for the \tr{to_drop}
+bindings are: (a)~inside the scrutinee, (b)~inside one of the
+alternatives/default [default FVs always {\em first}!].
+
+\begin{code}
+fiExpr to_drop (_, AnnCase scrut case_bndr ty alts)
+ = mkCoLets' drop_here1 $
+ mkCoLets' drop_here2 $
+ Case (fiExpr scrut_drops scrut) case_bndr ty
+ (zipWith fi_alt alts_drops_s alts)
+ where
+ -- Float into the scrut and alts-considered-together just like App
+ [drop_here1, scrut_drops, alts_drops] = sepBindsByDropPoint False [scrut_fvs, all_alts_fvs] to_drop
+
+ -- Float into the alts with the is_case flag set
+ (drop_here2 : alts_drops_s) = sepBindsByDropPoint True alts_fvs alts_drops
+
+ scrut_fvs = freeVarsOf scrut
+ alts_fvs = map alt_fvs alts
+ all_alts_fvs = unionVarSets alts_fvs
+ alt_fvs (con, args, rhs) = foldl delVarSet (freeVarsOf rhs) (case_bndr:args)
+ -- Delete case_bndr and args from free vars of rhs
+ -- to get free vars of alt
+
+ fi_alt to_drop (con, args, rhs) = (con, args, fiExpr to_drop rhs)
+
+noFloatIntoRhs (AnnNote InlineMe _) = True
+noFloatIntoRhs (AnnLam b _) = not (is_one_shot b)
+ -- IMPORTANT: don't say 'True' for a RHS with a one-shot lambda at the top.
+ -- This makes a big difference for things like
+ -- f x# = let x = I# x#
+ -- in let j = \() -> ...x...
+ -- in if <condition> then normal-path else j ()
+ -- If x is used only in the error case join point, j, we must float the
+ -- boxing constructor into it, else we box it every time which is very bad
+ -- news indeed.
+
+noFloatIntoRhs rhs = exprIsHNF (deAnnotate' rhs) -- We'd just float right back out again...
+
+is_one_shot b = isId b && isOneShotBndr b
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{@sepBindsByDropPoint@}
+%* *
+%************************************************************************
+
+This is the crucial function. The idea is: We have a wad of bindings
+that we'd like to distribute inside a collection of {\em drop points};
+insides the alternatives of a \tr{case} would be one example of some
+drop points; the RHS and body of a non-recursive \tr{let} binding
+would be another (2-element) collection.
+
+So: We're given a list of sets-of-free-variables, one per drop point,
+and a list of floating-inwards bindings. If a binding can go into
+only one drop point (without suddenly making something out-of-scope),
+in it goes. If a binding is used inside {\em multiple} drop points,
+then it has to go in a you-must-drop-it-above-all-these-drop-points
+point.
+
+We have to maintain the order on these drop-point-related lists.
+
+\begin{code}
+sepBindsByDropPoint
+ :: Bool -- True <=> is case expression
+ -> [FreeVarsSet] -- One set of FVs per drop point
+ -> FloatingBinds -- Candidate floaters
+ -> [FloatingBinds] -- FIRST one is bindings which must not be floated
+ -- inside any drop point; the rest correspond
+ -- one-to-one with the input list of FV sets
+
+-- Every input floater is returned somewhere in the result;
+-- none are dropped, not even ones which don't seem to be
+-- free in *any* of the drop-point fvs. Why? Because, for example,
+-- a binding (let x = E in B) might have a specialised version of
+-- x (say x') stored inside x, but x' isn't free in E or B.
+
+type DropBox = (FreeVarsSet, FloatingBinds)
+
+sepBindsByDropPoint is_case drop_pts []
+ = [] : [[] | p <- drop_pts] -- cut to the chase scene; it happens
+
+sepBindsByDropPoint is_case drop_pts floaters
+ = go floaters (map (\fvs -> (fvs, [])) (emptyVarSet : drop_pts))
+ where
+ go :: FloatingBinds -> [DropBox] -> [FloatingBinds]
+ -- The *first* one in the argument list is the drop_here set
+ -- The FloatingBinds in the lists are in the reverse of
+ -- the normal FloatingBinds order; that is, they are the right way round!
+
+ go [] drop_boxes = map (reverse . snd) drop_boxes
+
+ go (bind_w_fvs@(bind, bind_fvs) : binds) drop_boxes@(here_box : fork_boxes)
+ = go binds new_boxes
+ where
+ -- "here" means the group of bindings dropped at the top of the fork
+
+ (used_here : used_in_flags) = [ any (`elemVarSet` fvs) (bindersOf bind)
+ | (fvs, drops) <- drop_boxes]
+
+ drop_here = used_here || not can_push
+
+ -- For case expressions we duplicate the binding if it is
+ -- reasonably small, and if it is not used in all the RHSs
+ -- This is good for situations like
+ -- let x = I# y in
+ -- case e of
+ -- C -> error x
+ -- D -> error x
+ -- E -> ...not mentioning x...
+
+ n_alts = length used_in_flags
+ n_used_alts = count id used_in_flags -- returns number of Trues in list.
+
+ can_push = n_used_alts == 1 -- Used in just one branch
+ || (is_case && -- We are looking at case alternatives
+ n_used_alts > 1 && -- It's used in more than one
+ n_used_alts < n_alts && -- ...but not all
+ bindIsDupable bind) -- and we can duplicate the binding
+
+ new_boxes | drop_here = (insert here_box : fork_boxes)
+ | otherwise = (here_box : new_fork_boxes)
+
+ new_fork_boxes = zipWithEqual "FloatIn.sepBinds" insert_maybe fork_boxes used_in_flags
+
+ insert :: DropBox -> DropBox
+ insert (fvs,drops) = (fvs `unionVarSet` bind_fvs, bind_w_fvs:drops)
+
+ insert_maybe box True = insert box
+ insert_maybe box False = box
+
+
+floatedBindsFVs :: FloatingBinds -> FreeVarsSet
+floatedBindsFVs binds = unionVarSets (map snd binds)
+
+mkCoLets' :: FloatingBinds -> CoreExpr -> CoreExpr
+mkCoLets' to_drop e = foldl (flip (Let . fst)) e to_drop
+ -- Remember to_drop is in *reverse* dependency order
+
+bindIsDupable (Rec prs) = all (exprIsDupable . snd) prs
+bindIsDupable (NonRec b r) = exprIsDupable r
+\end{code}
diff --git a/compiler/simplCore/FloatOut.lhs b/compiler/simplCore/FloatOut.lhs
new file mode 100644
index 0000000000..988bd53015
--- /dev/null
+++ b/compiler/simplCore/FloatOut.lhs
@@ -0,0 +1,443 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[FloatOut]{Float bindings outwards (towards the top level)}
+
+``Long-distance'' floating of bindings towards the top level.
+
+\begin{code}
+module FloatOut ( floatOutwards ) where
+
+#include "HsVersions.h"
+
+import CoreSyn
+import CoreUtils ( mkSCC, exprIsHNF, exprIsTrivial )
+
+import DynFlags ( DynFlags, DynFlag(..), FloatOutSwitches(..) )
+import ErrUtils ( dumpIfSet_dyn )
+import CostCentre ( dupifyCC, CostCentre )
+import Id ( Id, idType )
+import Type ( isUnLiftedType )
+import CoreLint ( showPass, endPass )
+import SetLevels ( Level(..), LevelledExpr, LevelledBind,
+ setLevels, ltMajLvl, ltLvl, isTopLvl )
+import UniqSupply ( UniqSupply )
+import List ( partition )
+import Outputable
+import Util ( notNull )
+\end{code}
+
+ -----------------
+ Overall game plan
+ -----------------
+
+The Big Main Idea is:
+
+ To float out sub-expressions that can thereby get outside
+ a non-one-shot value lambda, and hence may be shared.
+
+
+To achieve this we may need to do two thing:
+
+ a) Let-bind the sub-expression:
+
+ f (g x) ==> let lvl = f (g x) in lvl
+
+ Now we can float the binding for 'lvl'.
+
+ b) More than that, we may need to abstract wrt a type variable
+
+ \x -> ... /\a -> let v = ...a... in ....
+
+ Here the binding for v mentions 'a' but not 'x'. So we
+ abstract wrt 'a', to give this binding for 'v':
+
+ vp = /\a -> ...a...
+ v = vp a
+
+ Now the binding for vp can float out unimpeded.
+ I can't remember why this case seemed important enough to
+ deal with, but I certainly found cases where important floats
+ didn't happen if we did not abstract wrt tyvars.
+
+With this in mind we can also achieve another goal: lambda lifting.
+We can make an arbitrary (function) binding float to top level by
+abstracting wrt *all* local variables, not just type variables, leaving
+a binding that can be floated right to top level. Whether or not this
+happens is controlled by a flag.
+
+
+Random comments
+~~~~~~~~~~~~~~~
+
+At the moment we never float a binding out to between two adjacent
+lambdas. For example:
+
+@
+ \x y -> let t = x+x in ...
+===>
+ \x -> let t = x+x in \y -> ...
+@
+Reason: this is less efficient in the case where the original lambda
+is never partially applied.
+
+But there's a case I've seen where this might not be true. Consider:
+@
+elEm2 x ys
+ = elem' x ys
+ where
+ elem' _ [] = False
+ elem' x (y:ys) = x==y || elem' x ys
+@
+It turns out that this generates a subexpression of the form
+@
+ \deq x ys -> let eq = eqFromEqDict deq in ...
+@
+vwhich might usefully be separated to
+@
+ \deq -> let eq = eqFromEqDict deq in \xy -> ...
+@
+Well, maybe. We don't do this at the moment.
+
+\begin{code}
+type FloatBind = (Level, CoreBind) -- INVARIANT: a FloatBind is always lifted
+type FloatBinds = [FloatBind]
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[floatOutwards]{@floatOutwards@: let-floating interface function}
+%* *
+%************************************************************************
+
+\begin{code}
+floatOutwards :: FloatOutSwitches
+ -> DynFlags
+ -> UniqSupply
+ -> [CoreBind] -> IO [CoreBind]
+
+floatOutwards float_sws dflags us pgm
+ = do {
+ showPass dflags float_msg ;
+
+ let { annotated_w_levels = setLevels float_sws pgm us ;
+ (fss, binds_s') = unzip (map floatTopBind annotated_w_levels)
+ } ;
+
+ dumpIfSet_dyn dflags Opt_D_verbose_core2core "Levels added:"
+ (vcat (map ppr annotated_w_levels));
+
+ let { (tlets, ntlets, lams) = get_stats (sum_stats fss) };
+
+ dumpIfSet_dyn dflags Opt_D_dump_simpl_stats "FloatOut stats:"
+ (hcat [ int tlets, ptext SLIT(" Lets floated to top level; "),
+ int ntlets, ptext SLIT(" Lets floated elsewhere; from "),
+ int lams, ptext SLIT(" Lambda groups")]);
+
+ endPass dflags float_msg Opt_D_verbose_core2core (concat binds_s')
+ {- no specific flag for dumping float-out -}
+ }
+ where
+ float_msg = showSDoc (text "Float out" <+> parens (sws float_sws))
+ sws (FloatOutSw lam const) = pp_not lam <+> text "lambdas" <> comma <+>
+ pp_not const <+> text "constants"
+ pp_not True = empty
+ pp_not False = text "not"
+
+floatTopBind bind@(NonRec _ _)
+ = case (floatBind bind) of { (fs, floats, bind') ->
+ (fs, floatsToBinds floats ++ [bind'])
+ }
+
+floatTopBind bind@(Rec _)
+ = case (floatBind bind) of { (fs, floats, Rec pairs') ->
+ WARN( notNull floats, ppr bind $$ ppr floats )
+ (fs, [Rec (floatsToBindPairs floats ++ pairs')]) }
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[FloatOut-Bind]{Floating in a binding (the business end)}
+%* *
+%************************************************************************
+
+
+\begin{code}
+floatBind :: LevelledBind
+ -> (FloatStats, FloatBinds, CoreBind)
+
+floatBind (NonRec (TB name level) rhs)
+ = case (floatNonRecRhs level rhs) of { (fs, rhs_floats, rhs') ->
+ (fs, rhs_floats, NonRec name rhs') }
+
+floatBind bind@(Rec pairs)
+ = case (unzip3 (map do_pair pairs)) of { (fss, rhss_floats, new_pairs) ->
+
+ if not (isTopLvl bind_dest_level) then
+ -- Standard case; the floated bindings can't mention the
+ -- binders, because they couldn't be escaping a major level
+ -- if so.
+ (sum_stats fss, concat rhss_floats, Rec new_pairs)
+ else
+ -- In a recursive binding, *destined for* the top level
+ -- (only), the rhs floats may contain references to the
+ -- bound things. For example
+ -- f = ...(let v = ...f... in b) ...
+ -- might get floated to
+ -- v = ...f...
+ -- f = ... b ...
+ -- and hence we must (pessimistically) make all the floats recursive
+ -- with the top binding. Later dependency analysis will unravel it.
+ --
+ -- This can only happen for bindings destined for the top level,
+ -- because only then will partitionByMajorLevel allow through a binding
+ -- that only differs in its minor level
+ (sum_stats fss, [],
+ Rec (new_pairs ++ floatsToBindPairs (concat rhss_floats)))
+ }
+ where
+ bind_dest_level = getBindLevel bind
+
+ do_pair (TB name level, rhs)
+ = case (floatRhs level rhs) of { (fs, rhs_floats, rhs') ->
+ (fs, rhs_floats, (name, rhs'))
+ }
+\end{code}
+
+%************************************************************************
+
+\subsection[FloatOut-Expr]{Floating in expressions}
+%* *
+%************************************************************************
+
+\begin{code}
+floatExpr, floatRhs, floatNonRecRhs
+ :: Level
+ -> LevelledExpr
+ -> (FloatStats, FloatBinds, CoreExpr)
+
+floatRhs lvl arg -- Used rec rhss, and case-alternative rhss
+ = case (floatExpr lvl arg) of { (fsa, floats, arg') ->
+ case (partitionByMajorLevel lvl floats) of { (floats', heres) ->
+ -- Dump bindings that aren't going to escape from a lambda;
+ -- in particular, we must dump the ones that are bound by
+ -- the rec or case alternative
+ (fsa, floats', install heres arg') }}
+
+floatNonRecRhs lvl arg -- Used for nested non-rec rhss, and fn args
+ = case (floatExpr lvl arg) of { (fsa, floats, arg') ->
+ -- Dump bindings that aren't going to escape from a lambda
+ -- This isn't a scoping issue (the binder isn't in scope in the RHS of a non-rec binding)
+ -- Rather, it is to avoid floating the x binding out of
+ -- f (let x = e in b)
+ -- unnecessarily. But we first test for values or trival rhss,
+ -- because (in particular) we don't want to insert new bindings between
+ -- the "=" and the "\". E.g.
+ -- f = \x -> let <bind> in <body>
+ -- We do not want
+ -- f = let <bind> in \x -> <body>
+ -- (a) The simplifier will immediately float it further out, so we may
+ -- as well do so right now; in general, keeping rhss as manifest
+ -- values is good
+ -- (b) If a float-in pass follows immediately, it might add yet more
+ -- bindings just after the '='. And some of them might (correctly)
+ -- be strict even though the 'let f' is lazy, because f, being a value,
+ -- gets its demand-info zapped by the simplifier.
+ if exprIsHNF arg' || exprIsTrivial arg' then
+ (fsa, floats, arg')
+ else
+ case (partitionByMajorLevel lvl floats) of { (floats', heres) ->
+ (fsa, floats', install heres arg') }}
+
+floatExpr _ (Var v) = (zeroStats, [], Var v)
+floatExpr _ (Type ty) = (zeroStats, [], Type ty)
+floatExpr _ (Lit lit) = (zeroStats, [], Lit lit)
+
+floatExpr lvl (App e a)
+ = case (floatExpr lvl e) of { (fse, floats_e, e') ->
+ case (floatNonRecRhs lvl a) of { (fsa, floats_a, a') ->
+ (fse `add_stats` fsa, floats_e ++ floats_a, App e' a') }}
+
+floatExpr lvl lam@(Lam _ _)
+ = let
+ (bndrs_w_lvls, body) = collectBinders lam
+ bndrs = [b | TB b _ <- bndrs_w_lvls]
+ lvls = [l | TB b l <- bndrs_w_lvls]
+
+ -- For the all-tyvar case we are prepared to pull
+ -- the lets out, to implement the float-out-of-big-lambda
+ -- transform; but otherwise we only float bindings that are
+ -- going to escape a value lambda.
+ -- In particular, for one-shot lambdas we don't float things
+ -- out; we get no saving by so doing.
+ partition_fn | all isTyVar bndrs = partitionByLevel
+ | otherwise = partitionByMajorLevel
+ in
+ case (floatExpr (last lvls) body) of { (fs, floats, body') ->
+
+ -- Dump any bindings which absolutely cannot go any further
+ case (partition_fn (head lvls) floats) of { (floats', heres) ->
+
+ (add_to_stats fs floats', floats', mkLams bndrs (install heres body'))
+ }}
+
+floatExpr lvl (Note note@(SCC cc) expr)
+ = case (floatExpr lvl expr) of { (fs, floating_defns, expr') ->
+ let
+ -- Annotate bindings floated outwards past an scc expression
+ -- with the cc. We mark that cc as "duplicated", though.
+
+ annotated_defns = annotate (dupifyCC cc) floating_defns
+ in
+ (fs, annotated_defns, Note note expr') }
+ where
+ annotate :: CostCentre -> FloatBinds -> FloatBinds
+
+ annotate dupd_cc defn_groups
+ = [ (level, ann_bind floater) | (level, floater) <- defn_groups ]
+ where
+ ann_bind (NonRec binder rhs)
+ = NonRec binder (mkSCC dupd_cc rhs)
+
+ ann_bind (Rec pairs)
+ = Rec [(binder, mkSCC dupd_cc rhs) | (binder, rhs) <- pairs]
+
+floatExpr lvl (Note InlineMe expr) -- Other than SCCs
+ = case floatExpr InlineCtxt expr of { (fs, floating_defns, expr') ->
+ -- There can be some floating_defns, arising from
+ -- ordinary lets that were there all the time. It seems
+ -- more efficient to test once here than to avoid putting
+ -- them into floating_defns (which would mean testing for
+ -- inlineCtxt at every let)
+ (fs, [], Note InlineMe (install floating_defns expr')) } -- See notes in SetLevels
+
+floatExpr lvl (Note note expr) -- Other than SCCs
+ = case (floatExpr lvl expr) of { (fs, floating_defns, expr') ->
+ (fs, floating_defns, Note note expr') }
+
+floatExpr lvl (Let (NonRec (TB bndr bndr_lvl) rhs) body)
+ | isUnLiftedType (idType bndr) -- Treat unlifted lets just like a case
+ = case floatExpr lvl rhs of { (fs, rhs_floats, rhs') ->
+ case floatRhs bndr_lvl body of { (fs, body_floats, body') ->
+ (fs, rhs_floats ++ body_floats, Let (NonRec bndr rhs') body') }}
+
+floatExpr lvl (Let bind body)
+ = case (floatBind bind) of { (fsb, rhs_floats, bind') ->
+ case (floatExpr lvl body) of { (fse, body_floats, body') ->
+ (add_stats fsb fse,
+ rhs_floats ++ [(bind_lvl, bind')] ++ body_floats,
+ body') }}
+ where
+ bind_lvl = getBindLevel bind
+
+floatExpr lvl (Case scrut (TB case_bndr case_lvl) ty alts)
+ = case floatExpr lvl scrut of { (fse, fde, scrut') ->
+ case floatList float_alt alts of { (fsa, fda, alts') ->
+ (add_stats fse fsa, fda ++ fde, Case scrut' case_bndr ty alts')
+ }}
+ where
+ -- Use floatRhs for the alternatives, so that we
+ -- don't gratuitiously float bindings out of the RHSs
+ float_alt (con, bs, rhs)
+ = case (floatRhs case_lvl rhs) of { (fs, rhs_floats, rhs') ->
+ (fs, rhs_floats, (con, [b | TB b _ <- bs], rhs')) }
+
+
+floatList :: (a -> (FloatStats, FloatBinds, b)) -> [a] -> (FloatStats, FloatBinds, [b])
+floatList f [] = (zeroStats, [], [])
+floatList f (a:as) = case f a of { (fs_a, binds_a, b) ->
+ case floatList f as of { (fs_as, binds_as, bs) ->
+ (fs_a `add_stats` fs_as, binds_a ++ binds_as, b:bs) }}
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Utility bits for floating stats}
+%* *
+%************************************************************************
+
+I didn't implement this with unboxed numbers. I don't want to be too
+strict in this stuff, as it is rarely turned on. (WDP 95/09)
+
+\begin{code}
+data FloatStats
+ = FlS Int -- Number of top-floats * lambda groups they've been past
+ Int -- Number of non-top-floats * lambda groups they've been past
+ Int -- Number of lambda (groups) seen
+
+get_stats (FlS a b c) = (a, b, c)
+
+zeroStats = FlS 0 0 0
+
+sum_stats xs = foldr add_stats zeroStats xs
+
+add_stats (FlS a1 b1 c1) (FlS a2 b2 c2)
+ = FlS (a1 + a2) (b1 + b2) (c1 + c2)
+
+add_to_stats (FlS a b c) floats
+ = FlS (a + length top_floats) (b + length other_floats) (c + 1)
+ where
+ (top_floats, other_floats) = partition to_very_top floats
+
+ to_very_top (my_lvl, _) = isTopLvl my_lvl
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Utility bits for floating}
+%* *
+%************************************************************************
+
+\begin{code}
+getBindLevel (NonRec (TB _ lvl) _) = lvl
+getBindLevel (Rec (((TB _ lvl), _) : _)) = lvl
+\end{code}
+
+\begin{code}
+partitionByMajorLevel, partitionByLevel
+ :: Level -- Partitioning level
+
+ -> FloatBinds -- Defns to be divided into 2 piles...
+
+ -> (FloatBinds, -- Defns with level strictly < partition level,
+ FloatBinds) -- The rest
+
+
+partitionByMajorLevel ctxt_lvl defns
+ = partition float_further defns
+ where
+ -- Float it if we escape a value lambda, or if we get to the top level
+ float_further (my_lvl, bind) = my_lvl `ltMajLvl` ctxt_lvl || isTopLvl my_lvl
+ -- The isTopLvl part says that if we can get to the top level, say "yes" anyway
+ -- This means that
+ -- x = f e
+ -- transforms to
+ -- lvl = e
+ -- x = f lvl
+ -- which is as it should be
+
+partitionByLevel ctxt_lvl defns
+ = partition float_further defns
+ where
+ float_further (my_lvl, _) = my_lvl `ltLvl` ctxt_lvl
+\end{code}
+
+\begin{code}
+floatsToBinds :: FloatBinds -> [CoreBind]
+floatsToBinds floats = map snd floats
+
+floatsToBindPairs :: FloatBinds -> [(Id,CoreExpr)]
+
+floatsToBindPairs floats = concat (map mk_pairs floats)
+ where
+ mk_pairs (_, Rec pairs) = pairs
+ mk_pairs (_, NonRec binder rhs) = [(binder,rhs)]
+
+install :: FloatBinds -> CoreExpr -> CoreExpr
+
+install defn_groups expr
+ = foldr install_group expr defn_groups
+ where
+ install_group (_, defns) body = Let defns body
+\end{code}
diff --git a/compiler/simplCore/LiberateCase.lhs b/compiler/simplCore/LiberateCase.lhs
new file mode 100644
index 0000000000..c29a5b9c68
--- /dev/null
+++ b/compiler/simplCore/LiberateCase.lhs
@@ -0,0 +1,317 @@
+%
+% (c) The AQUA Project, Glasgow University, 1994-1998
+%
+\section[LiberateCase]{Unroll recursion to allow evals to be lifted from a loop}
+
+\begin{code}
+module LiberateCase ( liberateCase ) where
+
+#include "HsVersions.h"
+
+import DynFlags ( DynFlags, DynFlag(..) )
+import StaticFlags ( opt_LiberateCaseThreshold )
+import CoreLint ( showPass, endPass )
+import CoreSyn
+import CoreUnfold ( couldBeSmallEnoughToInline )
+import Id ( Id, setIdName, idName, setIdNotExported )
+import VarEnv
+import Name ( localiseName )
+import Outputable
+import Util ( notNull )
+\end{code}
+
+This module walks over @Core@, and looks for @case@ on free variables.
+The criterion is:
+ if there is case on a free on the route to the recursive call,
+ then the recursive call is replaced with an unfolding.
+
+Example
+
+\begin{verbatim}
+f = \ t -> case v of
+ V a b -> a : f t
+\end{verbatim}
+
+=> the inner f is replaced.
+
+\begin{verbatim}
+f = \ t -> case v of
+ V a b -> a : (letrec
+ f = \ t -> case v of
+ V a b -> a : f t
+ in f) t
+\end{verbatim}
+(note the NEED for shadowing)
+
+=> Simplify
+
+\begin{verbatim}
+f = \ t -> case v of
+ V a b -> a : (letrec
+ f = \ t -> a : f t
+ in f t)
+\begin{verbatim}
+
+Better code, because 'a' is free inside the inner letrec, rather
+than needing projection from v.
+
+Other examples we'd like to catch with this kind of transformation
+
+ last [] = error
+ last (x:[]) = x
+ last (x:xs) = last xs
+
+We'd like to avoid the redundant pattern match, transforming to
+
+ last [] = error
+ last (x:[]) = x
+ last (x:(y:ys)) = last' y ys
+ where
+ last' y [] = y
+ last' _ (y:ys) = last' y ys
+
+ (is this necessarily an improvement)
+
+
+Similarly drop:
+
+ drop n [] = []
+ drop 0 xs = xs
+ drop n (x:xs) = drop (n-1) xs
+
+Would like to pass n along unboxed.
+
+
+To think about (Apr 94)
+~~~~~~~~~~~~~~
+
+Main worry: duplicating code excessively. At the moment we duplicate
+the entire binding group once at each recursive call. But there may
+be a group of recursive calls which share a common set of evaluated
+free variables, in which case the duplication is a plain waste.
+
+Another thing we could consider adding is some unfold-threshold thing,
+so that we'll only duplicate if the size of the group rhss isn't too
+big.
+
+Data types
+~~~~~~~~~~
+
+The ``level'' of a binder tells how many
+recursive defns lexically enclose the binding
+A recursive defn "encloses" its RHS, not its
+scope. For example:
+\begin{verbatim}
+ letrec f = let g = ... in ...
+ in
+ let h = ...
+ in ...
+\end{verbatim}
+Here, the level of @f@ is zero, the level of @g@ is one,
+and the level of @h@ is zero (NB not one).
+
+\begin{code}
+type LibCaseLevel = Int
+
+topLevel :: LibCaseLevel
+topLevel = 0
+\end{code}
+
+\begin{code}
+data LibCaseEnv
+ = LibCaseEnv
+ Int -- Bomb-out size for deciding if
+ -- potential liberatees are too big.
+ -- (passed in from cmd-line args)
+
+ LibCaseLevel -- Current level
+
+ (IdEnv LibCaseLevel) -- Binds all non-top-level in-scope Ids
+ -- (top-level and imported things have
+ -- a level of zero)
+
+ (IdEnv CoreBind) -- Binds *only* recursively defined
+ -- Ids, to their own binding group,
+ -- and *only* in their own RHSs
+
+ [(Id,LibCaseLevel)] -- Each of these Ids was scrutinised by an
+ -- enclosing case expression, with the
+ -- specified number of enclosing
+ -- recursive bindings; furthermore,
+ -- the Id is bound at a lower level
+ -- than the case expression. The
+ -- order is insignificant; it's a bag
+ -- really
+
+initEnv :: Int -> LibCaseEnv
+initEnv bomb_size = LibCaseEnv bomb_size 0 emptyVarEnv emptyVarEnv []
+
+bombOutSize (LibCaseEnv bomb_size _ _ _ _) = bomb_size
+\end{code}
+
+
+Programs
+~~~~~~~~
+\begin{code}
+liberateCase :: DynFlags -> [CoreBind] -> IO [CoreBind]
+liberateCase dflags binds
+ = do {
+ showPass dflags "Liberate case" ;
+ let { binds' = do_prog (initEnv opt_LiberateCaseThreshold) binds } ;
+ endPass dflags "Liberate case" Opt_D_verbose_core2core binds'
+ {- no specific flag for dumping -}
+ }
+ where
+ do_prog env [] = []
+ do_prog env (bind:binds) = bind' : do_prog env' binds
+ where
+ (env', bind') = libCaseBind env bind
+\end{code}
+
+Bindings
+~~~~~~~~
+
+\begin{code}
+libCaseBind :: LibCaseEnv -> CoreBind -> (LibCaseEnv, CoreBind)
+
+libCaseBind env (NonRec binder rhs)
+ = (addBinders env [binder], NonRec binder (libCase env rhs))
+
+libCaseBind env (Rec pairs)
+ = (env_body, Rec pairs')
+ where
+ (binders, rhss) = unzip pairs
+
+ env_body = addBinders env binders
+
+ pairs' = [(binder, libCase env_rhs rhs) | (binder,rhs) <- pairs]
+
+ env_rhs = if all rhs_small_enough rhss then extended_env else env
+
+ -- We extend the rec-env by binding each Id to its rhs, first
+ -- processing the rhs with an *un-extended* environment, so
+ -- that the same process doesn't occur for ever!
+ --
+ extended_env = addRecBinds env [ (adjust binder, libCase env_body rhs)
+ | (binder, rhs) <- pairs ]
+
+ -- Two subtle things:
+ -- (a) Reset the export flags on the binders so
+ -- that we don't get name clashes on exported things if the
+ -- local binding floats out to top level. This is most unlikely
+ -- to happen, since the whole point concerns free variables.
+ -- But resetting the export flag is right regardless.
+ --
+ -- (b) Make the name an Internal one. External Names should never be
+ -- nested; if it were floated to the top level, we'd get a name
+ -- clash at code generation time.
+ adjust bndr = setIdNotExported (setIdName bndr (localiseName (idName bndr)))
+
+ rhs_small_enough rhs = couldBeSmallEnoughToInline lIBERATE_BOMB_SIZE rhs
+ lIBERATE_BOMB_SIZE = bombOutSize env
+\end{code}
+
+
+Expressions
+~~~~~~~~~~~
+
+\begin{code}
+libCase :: LibCaseEnv
+ -> CoreExpr
+ -> CoreExpr
+
+libCase env (Var v) = libCaseId env v
+libCase env (Lit lit) = Lit lit
+libCase env (Type ty) = Type ty
+libCase env (App fun arg) = App (libCase env fun) (libCase env arg)
+libCase env (Note note body) = Note note (libCase env body)
+
+libCase env (Lam binder body)
+ = Lam binder (libCase (addBinders env [binder]) body)
+
+libCase env (Let bind body)
+ = Let bind' (libCase env_body body)
+ where
+ (env_body, bind') = libCaseBind env bind
+
+libCase env (Case scrut bndr ty alts)
+ = Case (libCase env scrut) bndr ty (map (libCaseAlt env_alts) alts)
+ where
+ env_alts = addBinders env_with_scrut [bndr]
+ env_with_scrut = case scrut of
+ Var scrut_var -> addScrutedVar env scrut_var
+ other -> env
+
+libCaseAlt env (con,args,rhs) = (con, args, libCase (addBinders env args) rhs)
+\end{code}
+
+Ids
+~~~
+\begin{code}
+libCaseId :: LibCaseEnv -> Id -> CoreExpr
+libCaseId env v
+ | Just the_bind <- lookupRecId env v -- It's a use of a recursive thing
+ , notNull free_scruts -- with free vars scrutinised in RHS
+ = Let the_bind (Var v)
+
+ | otherwise
+ = Var v
+
+ where
+ rec_id_level = lookupLevel env v
+ free_scruts = freeScruts env rec_id_level
+\end{code}
+
+
+
+Utility functions
+~~~~~~~~~~~~~~~~~
+\begin{code}
+addBinders :: LibCaseEnv -> [CoreBndr] -> LibCaseEnv
+addBinders (LibCaseEnv bomb lvl lvl_env rec_env scruts) binders
+ = LibCaseEnv bomb lvl lvl_env' rec_env scruts
+ where
+ lvl_env' = extendVarEnvList lvl_env (binders `zip` repeat lvl)
+
+addRecBinds :: LibCaseEnv -> [(Id,CoreExpr)] -> LibCaseEnv
+addRecBinds (LibCaseEnv bomb lvl lvl_env rec_env scruts) pairs
+ = LibCaseEnv bomb lvl' lvl_env' rec_env' scruts
+ where
+ lvl' = lvl + 1
+ lvl_env' = extendVarEnvList lvl_env [(binder,lvl) | (binder,_) <- pairs]
+ rec_env' = extendVarEnvList rec_env [(binder, Rec pairs) | (binder,_) <- pairs]
+
+addScrutedVar :: LibCaseEnv
+ -> Id -- This Id is being scrutinised by a case expression
+ -> LibCaseEnv
+
+addScrutedVar env@(LibCaseEnv bomb lvl lvl_env rec_env scruts) scrut_var
+ | bind_lvl < lvl
+ = LibCaseEnv bomb lvl lvl_env rec_env scruts'
+ -- Add to scruts iff the scrut_var is being scrutinised at
+ -- a deeper level than its defn
+
+ | otherwise = env
+ where
+ scruts' = (scrut_var, lvl) : scruts
+ bind_lvl = case lookupVarEnv lvl_env scrut_var of
+ Just lvl -> lvl
+ Nothing -> topLevel
+
+lookupRecId :: LibCaseEnv -> Id -> Maybe CoreBind
+lookupRecId (LibCaseEnv bomb lvl lvl_env rec_env scruts) id
+ = lookupVarEnv rec_env id
+
+lookupLevel :: LibCaseEnv -> Id -> LibCaseLevel
+lookupLevel (LibCaseEnv bomb lvl lvl_env rec_env scruts) id
+ = case lookupVarEnv lvl_env id of
+ Just lvl -> lvl
+ Nothing -> topLevel
+
+freeScruts :: LibCaseEnv
+ -> LibCaseLevel -- Level of the recursive Id
+ -> [Id] -- Ids that are scrutinised between the binding
+ -- of the recursive Id and here
+freeScruts (LibCaseEnv bomb lvl lvl_env rec_env scruts) rec_bind_lvl
+ = [v | (v,scrut_lvl) <- scruts, scrut_lvl > rec_bind_lvl]
+\end{code}
diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs
new file mode 100644
index 0000000000..90a565f4dd
--- /dev/null
+++ b/compiler/simplCore/OccurAnal.lhs
@@ -0,0 +1,823 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+%************************************************************************
+%* *
+\section[OccurAnal]{Occurrence analysis pass}
+%* *
+%************************************************************************
+
+The occurrence analyser re-typechecks a core expression, returning a new
+core expression with (hopefully) improved usage information.
+
+\begin{code}
+module OccurAnal (
+ occurAnalysePgm, occurAnalyseExpr
+ ) where
+
+#include "HsVersions.h"
+
+import CoreSyn
+import CoreFVs ( idRuleVars )
+import CoreUtils ( exprIsTrivial, isDefaultAlt )
+import Id ( isDataConWorkId, isOneShotBndr, setOneShotLambda,
+ idOccInfo, setIdOccInfo, isLocalId,
+ isExportedId, idArity, idSpecialisation,
+ idType, idUnique, Id
+ )
+import IdInfo ( isEmptySpecInfo )
+import BasicTypes ( OccInfo(..), isOneOcc, InterestingCxt )
+
+import VarSet
+import VarEnv
+
+import Type ( isFunTy, dropForAlls )
+import Maybes ( orElse )
+import Digraph ( stronglyConnCompR, SCC(..) )
+import PrelNames ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey )
+import Unique ( Unique )
+import UniqFM ( keysUFM )
+import Util ( zipWithEqual, mapAndUnzip )
+import Outputable
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[OccurAnal-main]{Counting occurrences: main function}
+%* *
+%************************************************************************
+
+Here's the externally-callable interface:
+
+\begin{code}
+occurAnalysePgm :: [CoreBind] -> [CoreBind]
+occurAnalysePgm binds
+ = snd (go initOccEnv binds)
+ where
+ go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
+ go env []
+ = (emptyDetails, [])
+ go env (bind:binds)
+ = (final_usage, bind' ++ binds')
+ where
+ (bs_usage, binds') = go env binds
+ (final_usage, bind') = occAnalBind env bind bs_usage
+
+occurAnalyseExpr :: CoreExpr -> CoreExpr
+ -- Do occurrence analysis, and discard occurence info returned
+occurAnalyseExpr expr = snd (occAnal initOccEnv expr)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[OccurAnal-main]{Counting occurrences: main function}
+%* *
+%************************************************************************
+
+Bindings
+~~~~~~~~
+
+\begin{code}
+type IdWithOccInfo = Id -- An Id with fresh PragmaInfo attached
+
+type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
+ -- which is gotten from the Id.
+type Details1 = (Id, UsageDetails, CoreExpr)
+type Details2 = (IdWithOccInfo, CoreExpr)
+
+
+occAnalBind :: OccEnv
+ -> CoreBind
+ -> UsageDetails -- Usage details of scope
+ -> (UsageDetails, -- Of the whole let(rec)
+ [CoreBind])
+
+occAnalBind env (NonRec binder rhs) body_usage
+ | not (binder `usedIn` body_usage) -- It's not mentioned
+ = (body_usage, [])
+
+ | otherwise -- It's mentioned in the body
+ = (final_body_usage `combineUsageDetails` rhs_usage,
+ [NonRec tagged_binder rhs'])
+
+ where
+ (final_body_usage, tagged_binder) = tagBinder body_usage binder
+ (rhs_usage, rhs') = occAnalRhs env tagged_binder rhs
+\end{code}
+
+Dropping dead code for recursive bindings is done in a very simple way:
+
+ the entire set of bindings is dropped if none of its binders are
+ mentioned in its body; otherwise none are.
+
+This seems to miss an obvious improvement.
+@
+ letrec f = ...g...
+ g = ...f...
+ in
+ ...g...
+
+===>
+
+ letrec f = ...g...
+ g = ...(...g...)...
+ in
+ ...g...
+@
+
+Now @f@ is unused. But dependency analysis will sort this out into a
+@letrec@ for @g@ and a @let@ for @f@, and then @f@ will get dropped.
+It isn't easy to do a perfect job in one blow. Consider
+
+@
+ letrec f = ...g...
+ g = ...h...
+ h = ...k...
+ k = ...m...
+ m = ...m...
+ in
+ ...m...
+@
+
+
+\begin{code}
+occAnalBind env (Rec pairs) body_usage
+ = foldr (_scc_ "occAnalBind.dofinal" do_final_bind) (body_usage, []) sccs
+ where
+ analysed_pairs :: [Details1]
+ analysed_pairs = [ (bndr, rhs_usage, rhs')
+ | (bndr, rhs) <- pairs,
+ let (rhs_usage, rhs') = occAnalRhs env bndr rhs
+ ]
+
+ sccs :: [SCC (Node Details1)]
+ sccs = _scc_ "occAnalBind.scc" stronglyConnCompR edges
+
+
+ ---- stuff for dependency analysis of binds -------------------------------
+ edges :: [Node Details1]
+ edges = _scc_ "occAnalBind.assoc"
+ [ (details, idUnique id, edges_from rhs_usage)
+ | details@(id, rhs_usage, rhs) <- analysed_pairs
+ ]
+
+ -- (a -> b) means a mentions b
+ -- Given the usage details (a UFM that gives occ info for each free var of
+ -- the RHS) we can get the list of free vars -- or rather their Int keys --
+ -- by just extracting the keys from the finite map. Grimy, but fast.
+ -- Previously we had this:
+ -- [ bndr | bndr <- bndrs,
+ -- maybeToBool (lookupVarEnv rhs_usage bndr)]
+ -- which has n**2 cost, and this meant that edges_from alone
+ -- consumed 10% of total runtime!
+ edges_from :: UsageDetails -> [Unique]
+ edges_from rhs_usage = _scc_ "occAnalBind.edges_from"
+ keysUFM rhs_usage
+
+ ---- stuff to "re-constitute" bindings from dependency-analysis info ------
+
+ -- Non-recursive SCC
+ do_final_bind (AcyclicSCC ((bndr, rhs_usage, rhs'), _, _)) (body_usage, binds_so_far)
+ | not (bndr `usedIn` body_usage)
+ = (body_usage, binds_so_far) -- Dead code
+ | otherwise
+ = (combined_usage, new_bind : binds_so_far)
+ where
+ total_usage = combineUsageDetails body_usage rhs_usage
+ (combined_usage, tagged_bndr) = tagBinder total_usage bndr
+ new_bind = NonRec tagged_bndr rhs'
+
+ -- Recursive SCC
+ do_final_bind (CyclicSCC cycle) (body_usage, binds_so_far)
+ | not (any (`usedIn` body_usage) bndrs) -- NB: look at body_usage, not total_usage
+ = (body_usage, binds_so_far) -- Dead code
+ | otherwise
+ = (combined_usage, final_bind:binds_so_far)
+ where
+ details = [details | (details, _, _) <- cycle]
+ bndrs = [bndr | (bndr, _, _) <- details]
+ rhs_usages = [rhs_usage | (_, rhs_usage, _) <- details]
+ total_usage = foldr combineUsageDetails body_usage rhs_usages
+ (combined_usage, tagged_bndrs) = tagBinders total_usage bndrs
+ final_bind = Rec (reOrderRec env new_cycle)
+
+ new_cycle = CyclicSCC (zipWithEqual "occAnalBind" mk_new_bind tagged_bndrs cycle)
+ mk_new_bind tagged_bndr ((_, _, rhs'), key, keys) = ((tagged_bndr, rhs'), key, keys)
+\end{code}
+
+@reOrderRec@ is applied to the list of (binder,rhs) pairs for a cyclic
+strongly connected component (there's guaranteed to be a cycle). It returns the
+same pairs, but
+ a) in a better order,
+ b) with some of the Ids having a IMustNotBeINLINEd pragma
+
+The "no-inline" Ids are sufficient to break all cycles in the SCC. This means
+that the simplifier can guarantee not to loop provided it never records an inlining
+for these no-inline guys.
+
+Furthermore, the order of the binds is such that if we neglect dependencies
+on the no-inline Ids then the binds are topologically sorted. This means
+that the simplifier will generally do a good job if it works from top bottom,
+recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
+
+==============
+[June 98: I don't understand the following paragraphs, and I've
+ changed the a=b case again so that it isn't a special case any more.]
+
+Here's a case that bit me:
+
+ letrec
+ a = b
+ b = \x. BIG
+ in
+ ...a...a...a....
+
+Re-ordering doesn't change the order of bindings, but there was no loop-breaker.
+
+My solution was to make a=b bindings record b as Many, rather like INLINE bindings.
+Perhaps something cleverer would suffice.
+===============
+
+You might think that you can prevent non-termination simply by making
+sure that we simplify a recursive binding's RHS in an environment that
+simply clones the recursive Id. But no. Consider
+
+ letrec f = \x -> let z = f x' in ...
+
+ in
+ let n = f y
+ in
+ case n of { ... }
+
+We bind n to its *simplified* RHS, we then *re-simplify* it when
+we inline n. Then we may well inline f; and then the same thing
+happens with z!
+
+I don't think it's possible to prevent non-termination by environment
+manipulation in this way. Apart from anything else, successive
+iterations of the simplifier may unroll recursive loops in cases like
+that above. The idea of beaking every recursive loop with an
+IMustNotBeINLINEd pragma is much much better.
+
+
+\begin{code}
+reOrderRec
+ :: OccEnv
+ -> SCC (Node Details2)
+ -> [Details2]
+ -- Sorted into a plausible order. Enough of the Ids have
+ -- dontINLINE pragmas that there are no loops left.
+
+ -- Non-recursive case
+reOrderRec env (AcyclicSCC (bind, _, _)) = [bind]
+
+ -- Common case of simple self-recursion
+reOrderRec env (CyclicSCC [bind])
+ = [(setIdOccInfo tagged_bndr IAmALoopBreaker, rhs)]
+ where
+ ((tagged_bndr, rhs), _, _) = bind
+
+reOrderRec env (CyclicSCC (bind : binds))
+ = -- Choose a loop breaker, mark it no-inline,
+ -- do SCC analysis on the rest, and recursively sort them out
+ concat (map (reOrderRec env) (stronglyConnCompR unchosen))
+ ++
+ [(setIdOccInfo tagged_bndr IAmALoopBreaker, rhs)]
+
+ where
+ (chosen_pair, unchosen) = choose_loop_breaker bind (score bind) [] binds
+ (tagged_bndr, rhs) = chosen_pair
+
+ -- This loop looks for the bind with the lowest score
+ -- to pick as the loop breaker. The rest accumulate in
+ choose_loop_breaker (details,_,_) loop_sc acc []
+ = (details, acc) -- Done
+
+ choose_loop_breaker loop_bind loop_sc acc (bind : binds)
+ | sc < loop_sc -- Lower score so pick this new one
+ = choose_loop_breaker bind sc (loop_bind : acc) binds
+
+ | otherwise -- No lower so don't pick it
+ = choose_loop_breaker loop_bind loop_sc (bind : acc) binds
+ where
+ sc = score bind
+
+ score :: Node Details2 -> Int -- Higher score => less likely to be picked as loop breaker
+ score ((bndr, rhs), _, _)
+ | exprIsTrivial rhs = 4 -- Practically certain to be inlined
+ -- Used to have also: && not (isExportedId bndr)
+ -- But I found this sometimes cost an extra iteration when we have
+ -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
+ -- where df is the exported dictionary. Then df makes a really
+ -- bad choice for loop breaker
+
+ | not_fun_ty (idType bndr) = 3 -- Data types help with cases
+ -- This used to have a lower score than inlineCandidate, but
+ -- it's *really* helpful if dictionaries get inlined fast,
+ -- so I'm experimenting with giving higher priority to data-typed things
+
+ | inlineCandidate bndr rhs = 2 -- Likely to be inlined
+
+ | not (isEmptySpecInfo (idSpecialisation bndr)) = 1
+ -- Avoid things with specialisations; we'd like
+ -- to take advantage of them in the subsequent bindings
+
+ | otherwise = 0
+
+ inlineCandidate :: Id -> CoreExpr -> Bool
+ inlineCandidate id (Note InlineMe _) = True
+ inlineCandidate id rhs = isOneOcc (idOccInfo id)
+
+ -- Real example (the Enum Ordering instance from PrelBase):
+ -- rec f = \ x -> case d of (p,q,r) -> p x
+ -- g = \ x -> case d of (p,q,r) -> q x
+ -- d = (v, f, g)
+ --
+ -- Here, f and g occur just once; but we can't inline them into d.
+ -- On the other hand we *could* simplify those case expressions if
+ -- we didn't stupidly choose d as the loop breaker.
+ -- But we won't because constructor args are marked "Many".
+
+ not_fun_ty ty = not (isFunTy (dropForAlls ty))
+\end{code}
+
+@occAnalRhs@ deals with the question of bindings where the Id is marked
+by an INLINE pragma. For these we record that anything which occurs
+in its RHS occurs many times. This pessimistically assumes that ths
+inlined binder also occurs many times in its scope, but if it doesn't
+we'll catch it next time round. At worst this costs an extra simplifier pass.
+ToDo: try using the occurrence info for the inline'd binder.
+
+[March 97] We do the same for atomic RHSs. Reason: see notes with reOrderRec.
+[June 98, SLPJ] I've undone this change; I don't understand it. See notes with reOrderRec.
+
+
+\begin{code}
+occAnalRhs :: OccEnv
+ -> Id -> CoreExpr -- Binder and rhs
+ -- For non-recs the binder is alrady tagged
+ -- with occurrence info
+ -> (UsageDetails, CoreExpr)
+
+occAnalRhs env id rhs
+ = (final_usage, rhs')
+ where
+ (rhs_usage, rhs') = occAnal ctxt rhs
+ ctxt | certainly_inline id = env
+ | otherwise = rhsCtxt
+ -- Note that we generally use an rhsCtxt. This tells the occ anal n
+ -- that it's looking at an RHS, which has an effect in occAnalApp
+ --
+ -- But there's a problem. Consider
+ -- x1 = a0 : []
+ -- x2 = a1 : x1
+ -- x3 = a2 : x2
+ -- g = f x3
+ -- First time round, it looks as if x1 and x2 occur as an arg of a
+ -- let-bound constructor ==> give them a many-occurrence.
+ -- But then x3 is inlined (unconditionally as it happens) and
+ -- next time round, x2 will be, and the next time round x1 will be
+ -- Result: multiple simplifier iterations. Sigh.
+ -- Crude solution: use rhsCtxt for things that occur just once...
+
+ certainly_inline id = case idOccInfo id of
+ OneOcc in_lam one_br _ -> not in_lam && one_br
+ other -> False
+
+ -- [March 98] A new wrinkle is that if the binder has specialisations inside
+ -- it then we count the specialised Ids as "extra rhs's". That way
+ -- the "parent" keeps the specialised "children" alive. If the parent
+ -- dies (because it isn't referenced any more), then the children will
+ -- die too unless they are already referenced directly.
+
+ final_usage = addRuleUsage rhs_usage id
+
+addRuleUsage :: UsageDetails -> Id -> UsageDetails
+-- Add the usage from RULES in Id to the usage
+addRuleUsage usage id
+ = foldVarSet add usage (idRuleVars id)
+ where
+ add v u = addOneOcc u v NoOccInfo -- Give a non-committal binder info
+ -- (i.e manyOcc) because many copies
+ -- of the specialised thing can appear
+\end{code}
+
+Expressions
+~~~~~~~~~~~
+\begin{code}
+occAnal :: OccEnv
+ -> CoreExpr
+ -> (UsageDetails, -- Gives info only about the "interesting" Ids
+ CoreExpr)
+
+occAnal env (Type t) = (emptyDetails, Type t)
+occAnal env (Var v) = (mkOneOcc env v False, Var v)
+ -- At one stage, I gathered the idRuleVars for v here too,
+ -- which in a way is the right thing to do.
+ -- Btu that went wrong right after specialisation, when
+ -- the *occurrences* of the overloaded function didn't have any
+ -- rules in them, so the *specialised* versions looked as if they
+ -- weren't used at all.
+\end{code}
+
+We regard variables that occur as constructor arguments as "dangerousToDup":
+
+\begin{verbatim}
+module A where
+f x = let y = expensive x in
+ let z = (True,y) in
+ (case z of {(p,q)->q}, case z of {(p,q)->q})
+\end{verbatim}
+
+We feel free to duplicate the WHNF (True,y), but that means
+that y may be duplicated thereby.
+
+If we aren't careful we duplicate the (expensive x) call!
+Constructors are rather like lambdas in this way.
+
+\begin{code}
+occAnal env expr@(Lit lit) = (emptyDetails, expr)
+\end{code}
+
+\begin{code}
+occAnal env (Note InlineMe body)
+ = case occAnal env body of { (usage, body') ->
+ (mapVarEnv markMany usage, Note InlineMe body')
+ }
+
+occAnal env (Note note@(SCC cc) body)
+ = case occAnal env body of { (usage, body') ->
+ (mapVarEnv markInsideSCC usage, Note note body')
+ }
+
+occAnal env (Note note body)
+ = case occAnal env body of { (usage, body') ->
+ (usage, Note note body')
+ }
+\end{code}
+
+\begin{code}
+occAnal env app@(App fun arg)
+ = occAnalApp env (collectArgs app) False
+
+-- Ignore type variables altogether
+-- (a) occurrences inside type lambdas only not marked as InsideLam
+-- (b) type variables not in environment
+
+occAnal env expr@(Lam x body) | isTyVar x
+ = case occAnal env body of { (body_usage, body') ->
+ (body_usage, Lam x body')
+ }
+
+-- For value lambdas we do a special hack. Consider
+-- (\x. \y. ...x...)
+-- If we did nothing, x is used inside the \y, so would be marked
+-- as dangerous to dup. But in the common case where the abstraction
+-- is applied to two arguments this is over-pessimistic.
+-- So instead, we just mark each binder with its occurrence
+-- info in the *body* of the multiple lambda.
+-- Then, the simplifier is careful when partially applying lambdas.
+
+occAnal env expr@(Lam _ _)
+ = case occAnal env_body body of { (body_usage, body') ->
+ let
+ (final_usage, tagged_binders) = tagBinders body_usage binders
+ -- URGH! Sept 99: we don't seem to be able to use binders' here, because
+ -- we get linear-typed things in the resulting program that we can't handle yet.
+ -- (e.g. PrelShow) TODO
+
+ really_final_usage = if linear then
+ final_usage
+ else
+ mapVarEnv markInsideLam final_usage
+ in
+ (really_final_usage,
+ mkLams tagged_binders body') }
+ where
+ env_body = vanillaCtxt -- Body is (no longer) an RhsContext
+ (binders, body) = collectBinders expr
+ binders' = oneShotGroup env binders
+ linear = all is_one_shot binders'
+ is_one_shot b = isId b && isOneShotBndr b
+
+occAnal env (Case scrut bndr ty alts)
+ = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
+ case mapAndUnzip (occAnalAlt env bndr) alts of { (alts_usage_s, alts') ->
+ let
+ alts_usage = foldr1 combineAltsUsageDetails alts_usage_s
+ alts_usage' = addCaseBndrUsage alts_usage
+ (alts_usage1, tagged_bndr) = tagBinder alts_usage' bndr
+ total_usage = scrut_usage `combineUsageDetails` alts_usage1
+ in
+ total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
+ where
+ -- The case binder gets a usage of either "many" or "dead", never "one".
+ -- Reason: we like to inline single occurrences, to eliminate a binding,
+ -- but inlining a case binder *doesn't* eliminate a binding.
+ -- We *don't* want to transform
+ -- case x of w { (p,q) -> f w }
+ -- into
+ -- case x of w { (p,q) -> f (p,q) }
+ addCaseBndrUsage usage = case lookupVarEnv usage bndr of
+ Nothing -> usage
+ Just occ -> extendVarEnv usage bndr (markMany occ)
+
+ occ_anal_scrut (Var v) (alt1 : other_alts)
+ | not (null other_alts) || not (isDefaultAlt alt1)
+ = (mkOneOcc env v True, Var v)
+ occ_anal_scrut scrut alts = occAnal vanillaCtxt scrut
+ -- No need for rhsCtxt
+
+occAnal env (Let bind body)
+ = case occAnal env body of { (body_usage, body') ->
+ case occAnalBind env bind body_usage of { (final_usage, new_binds) ->
+ (final_usage, mkLets new_binds body') }}
+
+occAnalArgs env args
+ = case mapAndUnzip (occAnal arg_env) args of { (arg_uds_s, args') ->
+ (foldr combineUsageDetails emptyDetails arg_uds_s, args')}
+ where
+ arg_env = vanillaCtxt
+\end{code}
+
+Applications are dealt with specially because we want
+the "build hack" to work.
+
+\begin{code}
+-- Hack for build, fold, runST
+occAnalApp env (Var fun, args) is_rhs
+ = case args_stuff of { (args_uds, args') ->
+ let
+ -- We mark the free vars of the argument of a constructor or PAP
+ -- as "many", if it is the RHS of a let(rec).
+ -- This means that nothing gets inlined into a constructor argument
+ -- position, which is what we want. Typically those constructor
+ -- arguments are just variables, or trivial expressions.
+ --
+ -- This is the *whole point* of the isRhsEnv predicate
+ final_args_uds
+ | isRhsEnv env,
+ isDataConWorkId fun || valArgCount args < idArity fun
+ = mapVarEnv markMany args_uds
+ | otherwise = args_uds
+ in
+ (fun_uds `combineUsageDetails` final_args_uds, mkApps (Var fun) args') }
+ where
+ fun_uniq = idUnique fun
+ fun_uds = mkOneOcc env fun (valArgCount args > 0)
+ args_stuff | fun_uniq == buildIdKey = appSpecial env 2 [True,True] args
+ | fun_uniq == augmentIdKey = appSpecial env 2 [True,True] args
+ | fun_uniq == foldrIdKey = appSpecial env 3 [False,True] args
+ | fun_uniq == runSTRepIdKey = appSpecial env 2 [True] args
+ -- (foldr k z xs) may call k many times, but it never
+ -- shares a partial application of k; hence [False,True]
+ -- This means we can optimise
+ -- foldr (\x -> let v = ...x... in \y -> ...v...) z xs
+ -- by floating in the v
+
+ | otherwise = occAnalArgs env args
+
+
+occAnalApp env (fun, args) is_rhs
+ = case occAnal (addAppCtxt env args) fun of { (fun_uds, fun') ->
+ -- The addAppCtxt is a bit cunning. One iteration of the simplifier
+ -- often leaves behind beta redexs like
+ -- (\x y -> e) a1 a2
+ -- Here we would like to mark x,y as one-shot, and treat the whole
+ -- thing much like a let. We do this by pushing some True items
+ -- onto the context stack.
+
+ case occAnalArgs env args of { (args_uds, args') ->
+ let
+ final_uds = fun_uds `combineUsageDetails` args_uds
+ in
+ (final_uds, mkApps fun' args') }}
+
+appSpecial :: OccEnv
+ -> Int -> CtxtTy -- Argument number, and context to use for it
+ -> [CoreExpr]
+ -> (UsageDetails, [CoreExpr])
+appSpecial env n ctxt args
+ = go n args
+ where
+ arg_env = vanillaCtxt
+
+ go n [] = (emptyDetails, []) -- Too few args
+
+ go 1 (arg:args) -- The magic arg
+ = case occAnal (setCtxt arg_env ctxt) arg of { (arg_uds, arg') ->
+ case occAnalArgs env args of { (args_uds, args') ->
+ (combineUsageDetails arg_uds args_uds, arg':args') }}
+
+ go n (arg:args)
+ = case occAnal arg_env arg of { (arg_uds, arg') ->
+ case go (n-1) args of { (args_uds, args') ->
+ (combineUsageDetails arg_uds args_uds, arg':args') }}
+\end{code}
+
+
+Case alternatives
+~~~~~~~~~~~~~~~~~
+If the case binder occurs at all, the other binders effectively do too.
+For example
+ case e of x { (a,b) -> rhs }
+is rather like
+ let x = (a,b) in rhs
+If e turns out to be (e1,e2) we indeed get something like
+ let a = e1; b = e2; x = (a,b) in rhs
+
+\begin{code}
+occAnalAlt env case_bndr (con, bndrs, rhs)
+ = case occAnal env rhs of { (rhs_usage, rhs') ->
+ let
+ (final_usage, tagged_bndrs) = tagBinders rhs_usage bndrs
+ final_bndrs | case_bndr `elemVarEnv` final_usage = bndrs
+ | otherwise = tagged_bndrs
+ -- Leave the binders untagged if the case
+ -- binder occurs at all; see note above
+ in
+ (final_usage, (con, final_bndrs, rhs')) }
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[OccurAnal-types]{OccEnv}
+%* *
+%************************************************************************
+
+\begin{code}
+data OccEnv
+ = OccEnv OccEncl -- Enclosing context information
+ CtxtTy -- Tells about linearity
+
+-- OccEncl is used to control whether to inline into constructor arguments
+-- For example:
+-- x = (p,q) -- Don't inline p or q
+-- y = /\a -> (p a, q a) -- Still don't inline p or q
+-- z = f (p,q) -- Do inline p,q; it may make a rule fire
+-- So OccEncl tells enought about the context to know what to do when
+-- we encounter a contructor application or PAP.
+
+data OccEncl
+ = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
+ -- Don't inline into constructor args here
+ | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
+ -- Do inline into constructor args here
+
+type CtxtTy = [Bool]
+ -- [] No info
+ --
+ -- True:ctxt Analysing a function-valued expression that will be
+ -- applied just once
+ --
+ -- False:ctxt Analysing a function-valued expression that may
+ -- be applied many times; but when it is,
+ -- the CtxtTy inside applies
+
+initOccEnv :: OccEnv
+initOccEnv = OccEnv OccRhs []
+
+vanillaCtxt = OccEnv OccVanilla []
+rhsCtxt = OccEnv OccRhs []
+
+isRhsEnv (OccEnv OccRhs _) = True
+isRhsEnv (OccEnv OccVanilla _) = False
+
+setCtxt :: OccEnv -> CtxtTy -> OccEnv
+setCtxt (OccEnv encl _) ctxt = OccEnv encl ctxt
+
+oneShotGroup :: OccEnv -> [CoreBndr] -> [CoreBndr]
+ -- The result binders have one-shot-ness set that they might not have had originally.
+ -- This happens in (build (\cn -> e)). Here the occurrence analyser
+ -- linearity context knows that c,n are one-shot, and it records that fact in
+ -- the binder. This is useful to guide subsequent float-in/float-out tranformations
+
+oneShotGroup (OccEnv encl ctxt) bndrs
+ = go ctxt bndrs []
+ where
+ go ctxt [] rev_bndrs = reverse rev_bndrs
+
+ go (lin_ctxt:ctxt) (bndr:bndrs) rev_bndrs
+ | isId bndr = go ctxt bndrs (bndr':rev_bndrs)
+ where
+ bndr' | lin_ctxt = setOneShotLambda bndr
+ | otherwise = bndr
+
+ go ctxt (bndr:bndrs) rev_bndrs = go ctxt bndrs (bndr:rev_bndrs)
+
+addAppCtxt (OccEnv encl ctxt) args
+ = OccEnv encl (replicate (valArgCount args) True ++ ctxt)
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[OccurAnal-types]{OccEnv}
+%* *
+%************************************************************************
+
+\begin{code}
+type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage
+
+combineUsageDetails, combineAltsUsageDetails
+ :: UsageDetails -> UsageDetails -> UsageDetails
+
+combineUsageDetails usage1 usage2
+ = plusVarEnv_C addOccInfo usage1 usage2
+
+combineAltsUsageDetails usage1 usage2
+ = plusVarEnv_C orOccInfo usage1 usage2
+
+addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
+addOneOcc usage id info
+ = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
+ -- ToDo: make this more efficient
+
+emptyDetails = (emptyVarEnv :: UsageDetails)
+
+usedIn :: Id -> UsageDetails -> Bool
+v `usedIn` details = isExportedId v || v `elemVarEnv` details
+
+tagBinders :: UsageDetails -- Of scope
+ -> [Id] -- Binders
+ -> (UsageDetails, -- Details with binders removed
+ [IdWithOccInfo]) -- Tagged binders
+
+tagBinders usage binders
+ = let
+ usage' = usage `delVarEnvList` binders
+ uss = map (setBinderOcc usage) binders
+ in
+ usage' `seq` (usage', uss)
+
+tagBinder :: UsageDetails -- Of scope
+ -> Id -- Binders
+ -> (UsageDetails, -- Details with binders removed
+ IdWithOccInfo) -- Tagged binders
+
+tagBinder usage binder
+ = let
+ usage' = usage `delVarEnv` binder
+ binder' = setBinderOcc usage binder
+ in
+ usage' `seq` (usage', binder')
+
+setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
+setBinderOcc usage bndr
+ | isTyVar bndr = bndr
+ | isExportedId bndr = case idOccInfo bndr of
+ NoOccInfo -> bndr
+ other -> setIdOccInfo bndr NoOccInfo
+ -- Don't use local usage info for visible-elsewhere things
+ -- BUT *do* erase any IAmALoopBreaker annotation, because we're
+ -- about to re-generate it and it shouldn't be "sticky"
+
+ | otherwise = setIdOccInfo bndr occ_info
+ where
+ occ_info = lookupVarEnv usage bndr `orElse` IAmDead
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Operations over OccInfo}
+%* *
+%************************************************************************
+
+\begin{code}
+mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
+mkOneOcc env id int_cxt
+ | isLocalId id = unitVarEnv id (OneOcc False True int_cxt)
+ | otherwise = emptyDetails
+
+markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo
+
+markMany IAmDead = IAmDead
+markMany other = NoOccInfo
+
+markInsideSCC occ = markMany occ
+
+markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
+markInsideLam occ = occ
+
+addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
+
+addOccInfo IAmDead info2 = info2
+addOccInfo info1 IAmDead = info1
+addOccInfo info1 info2 = NoOccInfo
+
+-- (orOccInfo orig new) is used
+-- when combining occurrence info from branches of a case
+
+orOccInfo IAmDead info2 = info2
+orOccInfo info1 IAmDead = info1
+orOccInfo (OneOcc in_lam1 one_branch1 int_cxt1)
+ (OneOcc in_lam2 one_branch2 int_cxt2)
+ = OneOcc (in_lam1 || in_lam2)
+ False -- False, because it occurs in both branches
+ (int_cxt1 && int_cxt2)
+
+orOccInfo info1 info2 = NoOccInfo
+\end{code}
diff --git a/compiler/simplCore/SAT.lhs b/compiler/simplCore/SAT.lhs
new file mode 100644
index 0000000000..81f3c4c406
--- /dev/null
+++ b/compiler/simplCore/SAT.lhs
@@ -0,0 +1,214 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+%************************************************************************
+%* *
+\section[SAT]{Static Argument Transformation pass}
+%* *
+%************************************************************************
+
+96/03: We aren't using the static-argument transformation right now.
+
+May be seen as removing invariants from loops:
+Arguments of recursive functions that do not change in recursive
+calls are removed from the recursion, which is done locally
+and only passes the arguments which effectively change.
+
+Example:
+map = /\ ab -> \f -> \xs -> case xs of
+ [] -> []
+ (a:b) -> f a : map f b
+
+as map is recursively called with the same argument f (unmodified)
+we transform it to
+
+map = /\ ab -> \f -> \xs -> let map' ys = case ys of
+ [] -> []
+ (a:b) -> f a : map' b
+ in map' xs
+
+Notice that for a compiler that uses lambda lifting this is
+useless as map' will be transformed back to what map was.
+
+We could possibly do the same for big lambdas, but we don't as
+they will eventually be removed in later stages of the compiler,
+therefore there is no penalty in keeping them.
+
+Experimental Evidence: Heap: +/- 7%
+ Instrs: Always improves for 2 or more Static Args.
+
+\begin{code}
+module SAT ( doStaticArgs ) where
+
+#include "HsVersions.h"
+
+import Panic ( panic )
+
+doStaticArgs = panic "SAT.doStaticArgs (ToDo)"
+
+{- LATER: to end of file:
+
+import SATMonad
+import Util
+\end{code}
+
+\begin{code}
+doStaticArgs :: [CoreBind] -> UniqSupply -> [CoreBind]
+
+doStaticArgs binds
+ = do {
+ showPass "Static argument";
+ let { binds' = initSAT (mapSAT sat_bind binds) };
+ endPass "Static argument"
+ False -- No specific flag for dumping SAT
+ binds'
+ }
+ where
+ sat_bind (NonRec binder expr)
+ = emptyEnvSAT `thenSAT_`
+ satExpr expr `thenSAT` (\ expr' ->
+ returnSAT (NonRec binder expr') )
+ sat_bind (Rec [(binder,rhs)])
+ = emptyEnvSAT `thenSAT_`
+ insSAEnv binder (getArgLists rhs) `thenSAT_`
+ satExpr rhs `thenSAT` (\ rhs' ->
+ saTransform binder rhs')
+ sat_bind (Rec pairs)
+ = emptyEnvSAT `thenSAT_`
+ mapSAT satExpr rhss `thenSAT` \ rhss' ->
+ returnSAT (Rec (zipEqual "doStaticArgs" binders rhss'))
+ where
+ (binders, rhss) = unzip pairs
+\end{code}
+
+\begin{code}
+satAtom (VarArg v)
+ = updSAEnv (Just (v,([],[]))) `thenSAT_`
+ returnSAT ()
+
+satAtom _ = returnSAT ()
+\end{code}
+
+\begin{code}
+satExpr :: CoreExpr -> SatM CoreExpr
+
+satExpr var@(Var v)
+ = updSAEnv (Just (v,([],[]))) `thenSAT_`
+ returnSAT var
+
+satExpr lit@(Lit _) = returnSAT lit
+
+satExpr e@(Prim prim ty args)
+ = mapSAT satAtom args `thenSAT_`
+ returnSAT e
+
+satExpr (Lam binders body)
+ = satExpr body `thenSAT` \ body' ->
+ returnSAT (Lam binders body')
+
+satExpr (CoTyLam tyvar body)
+ = satExpr body `thenSAT` (\ body' ->
+ returnSAT (CoTyLam tyvar body') )
+
+satExpr app@(App _ _)
+ = getAppArgs app
+
+satExpr app@(CoTyApp _ _)
+ = getAppArgs app
+
+satExpr (Case expr alts)
+ = satExpr expr `thenSAT` \ expr' ->
+ sat_alts alts `thenSAT` \ alts' ->
+ returnSAT (Case expr' alts')
+ where
+ sat_alts (AlgAlts alts deflt)
+ = mapSAT satAlgAlt alts `thenSAT` \ alts' ->
+ sat_default deflt `thenSAT` \ deflt' ->
+ returnSAT (AlgAlts alts' deflt')
+ where
+ satAlgAlt (con, params, rhs)
+ = satExpr rhs `thenSAT` \ rhs' ->
+ returnSAT (con, params, rhs')
+
+ sat_alts (PrimAlts alts deflt)
+ = mapSAT satPrimAlt alts `thenSAT` \ alts' ->
+ sat_default deflt `thenSAT` \ deflt' ->
+ returnSAT (PrimAlts alts' deflt')
+ where
+ satPrimAlt (lit, rhs)
+ = satExpr rhs `thenSAT` \ rhs' ->
+ returnSAT (lit, rhs')
+
+ sat_default NoDefault
+ = returnSAT NoDefault
+ sat_default (BindDefault binder rhs)
+ = satExpr rhs `thenSAT` \ rhs' ->
+ returnSAT (BindDefault binder rhs')
+
+satExpr (Let (NonRec binder rhs) body)
+ = satExpr body `thenSAT` \ body' ->
+ satExpr rhs `thenSAT` \ rhs' ->
+ returnSAT (Let (NonRec binder rhs') body')
+
+satExpr (Let (Rec [(binder,rhs)]) body)
+ = satExpr body `thenSAT` \ body' ->
+ insSAEnv binder (getArgLists rhs) `thenSAT_`
+ satExpr rhs `thenSAT` \ rhs' ->
+ saTransform binder rhs' `thenSAT` \ binding ->
+ returnSAT (Let binding body')
+
+satExpr (Let (Rec binds) body)
+ = let
+ (binders, rhss) = unzip binds
+ in
+ satExpr body `thenSAT` \ body' ->
+ mapSAT satExpr rhss `thenSAT` \ rhss' ->
+ returnSAT (Let (Rec (zipEqual "satExpr:Rec" binders rhss')) body')
+
+satExpr (Note note expr)
+ = satExpr expr `thenSAT` \ expr2 ->
+ returnSAT (Note note expr2)
+\end{code}
+
+\begin{code}
+getAppArgs :: CoreExpr -> SatM CoreExpr
+
+getAppArgs app
+ = get app `thenSAT` \ (app',result) ->
+ updSAEnv result `thenSAT_`
+ returnSAT app'
+ where
+ get :: CoreExpr
+ -> SatM (CoreExpr, Maybe (Id, SATInfo))
+
+ get (CoTyApp e ty)
+ = get e `thenSAT` \ (e',result) ->
+ returnSAT (
+ CoTyApp e' ty,
+ case result of
+ Nothing -> Nothing
+ Just (v,(tv,lv)) -> Just (v,(tv++[Static ty],lv))
+ )
+
+ get (App e a)
+ = get e `thenSAT` \ (e', result) ->
+ satAtom a `thenSAT_`
+ let si = case a of
+ (VarArg v) -> Static v
+ _ -> NotStatic
+ in
+ returnSAT (
+ App e' a,
+ case result of
+ Just (v,(tv,lv)) -> Just (v,(tv,lv++[si]))
+ Nothing -> Nothing
+ )
+
+ get var@(Var v)
+ = returnSAT (var, Just (v,([],[])))
+
+ get e
+ = satExpr e `thenSAT` \ e2 ->
+ returnSAT (e2, Nothing)
+-}
+\end{code}
diff --git a/compiler/simplCore/SATMonad.lhs b/compiler/simplCore/SATMonad.lhs
new file mode 100644
index 0000000000..9786f448af
--- /dev/null
+++ b/compiler/simplCore/SATMonad.lhs
@@ -0,0 +1,263 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+%************************************************************************
+%* *
+\section[SATMonad]{The Static Argument Transformation pass Monad}
+%* *
+%************************************************************************
+
+96/03: We aren't using the static-argument transformation right now.
+
+\begin{code}
+module SATMonad where
+
+#include "HsVersions.h"
+
+import Panic ( panic )
+
+junk_from_SATMonad = panic "SATMonad.junk"
+
+{- LATER: to end of file:
+
+module SATMonad (
+ SATInfo(..), updSAEnv,
+ SatM(..), initSAT, emptyEnvSAT,
+ returnSAT, thenSAT, thenSAT_, mapSAT, getSATInfo, newSATName,
+ getArgLists, Arg(..), insSAEnv, saTransform,
+
+ SATEnv(..), isStatic, dropStatics
+ ) where
+
+import Type ( mkTyVarTy, mkSigmaTy,
+ splitSigmaTy, splitFunTys,
+ glueTyArgs, substTy,
+ InstTyEnv(..)
+ )
+import MkId ( mkSysLocal )
+import Id ( idType, idName, mkLocalId )
+import UniqSupply
+import Util
+
+infixr 9 `thenSAT`, `thenSAT_`
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Static Argument Transformation Environment}
+%* *
+%************************************************************************
+
+\begin{code}
+type SATEnv = IdEnv SATInfo
+
+type SATInfo = ([Arg Type],[Arg Id])
+
+data Arg a = Static a | NotStatic
+ deriving Eq
+
+delOneFromSAEnv v us env
+ = ((), delVarEnv env v)
+
+updSAEnv :: Maybe (Id,SATInfo) -> SatM ()
+updSAEnv Nothing
+ = returnSAT ()
+updSAEnv (Just (b,(tyargs,args)))
+ = getSATInfo b `thenSAT` (\ r ->
+ case r of
+ Nothing -> returnSAT ()
+ Just (tyargs',args') -> delOneFromSAEnv b `thenSAT_`
+ insSAEnv b (checkArgs tyargs tyargs',
+ checkArgs args args')
+ )
+
+checkArgs as [] = notStatics (length as)
+checkArgs [] as = notStatics (length as)
+checkArgs (a:as) (a':as') | a == a' = a:checkArgs as as'
+checkArgs (_:as) (_:as') = NotStatic:checkArgs as as'
+
+notStatics :: Int -> [Arg a]
+notStatics n = nOfThem n NotStatic
+
+insSAEnv :: Id -> SATInfo -> SatM ()
+insSAEnv b info us env
+ = ((), extendVarEnv env b info)
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Static Argument Transformation Monad}
+%* *
+%************************************************************************
+
+Two items of state to thread around: a UniqueSupply and a SATEnv.
+
+\begin{code}
+type SatM result
+ = UniqSupply -> SATEnv -> (result, SATEnv)
+
+initSAT :: SatM a -> UniqSupply -> a
+
+initSAT f us = fst (f us emptyVarEnv)
+
+thenSAT m k us env
+ = case splitUniqSupply us of { (s1, s2) ->
+ case m s1 env of { (m_result, menv) ->
+ k m_result s2 menv }}
+
+thenSAT_ m k us env
+ = case splitUniqSupply us of { (s1, s2) ->
+ case m s1 env of { (_, menv) ->
+ k s2 menv }}
+
+emptyEnvSAT :: SatM ()
+emptyEnvSAT us _ = ((), emptyVarEnv)
+
+returnSAT v us env = (v, env)
+
+mapSAT f [] = returnSAT []
+mapSAT f (x:xs)
+ = f x `thenSAT` \ x' ->
+ mapSAT f xs `thenSAT` \ xs' ->
+ returnSAT (x':xs')
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Utility Functions}
+%* *
+%************************************************************************
+
+\begin{code}
+getSATInfo :: Id -> SatM (Maybe SATInfo)
+getSATInfo var us env
+ = (lookupVarEnv env var, env)
+
+newSATName :: Id -> Type -> SatM Id
+newSATName id ty us env
+ = case (getUnique us) of { unique ->
+ let
+ new_name = mkCompoundName SLIT("$sat") unique (idName id)
+ in
+ (mkLocalId new_name ty, env) }
+
+getArgLists :: CoreExpr -> ([Arg Type],[Arg Id])
+getArgLists expr
+ = let
+ (tvs, lambda_bounds, body) = collectBinders expr
+ in
+ ([ Static (mkTyVarTy tv) | tv <- tvs ],
+ [ Static v | v <- lambda_bounds ])
+
+dropArgs :: CoreExpr -> CoreExpr
+dropArgs (Lam _ e) = dropArgs e
+dropArgs (CoTyLam _ e) = dropArgs e
+dropArgs e = e
+\end{code}
+
+We implement saTransform using shadowing of binders, that is
+we transform
+map = \f as -> case as of
+ [] -> []
+ (a':as') -> let x = f a'
+ y = map f as'
+ in x:y
+to
+map = \f as -> let map = \f as -> map' as
+ in let rec map' = \as -> case as of
+ [] -> []
+ (a':as') -> let x = f a'
+ y = map f as'
+ in x:y
+ in map' as
+
+the inner map should get inlined and eliminated.
+\begin{code}
+saTransform :: Id -> CoreExpr -> SatM CoreBinding
+saTransform binder rhs
+ = getSATInfo binder `thenSAT` \ r ->
+ case r of
+ -- [Andre] test: do it only if we have more than one static argument.
+ --Just (tyargs,args) | any isStatic args
+ Just (tyargs,args) | (filter isStatic args) `lengthExceeds` 1
+ -> newSATName binder (new_ty tyargs args) `thenSAT` \ binder' ->
+ mkNewRhs binder binder' tyargs args rhs `thenSAT` \ new_rhs ->
+ trace ("SAT "++ show (length (filter isStatic args))) (
+ returnSAT (NonRec binder new_rhs)
+ )
+ _ -> returnSAT (Rec [(binder, rhs)])
+ where
+ mkNewRhs binder binder' tyargs args rhs
+ = let
+ non_static_args :: [Id]
+ non_static_args
+ = get_nsa args (snd (getArgLists rhs))
+ where
+ get_nsa :: [Arg a] -> [Arg a] -> [a]
+ get_nsa [] _ = []
+ get_nsa _ [] = []
+ get_nsa (NotStatic:args) (Static v:as) = v:get_nsa args as
+ get_nsa (_:args) (_:as) = get_nsa args as
+
+ local_body = foldl App (Var binder')
+ [VarArg a | a <- non_static_args]
+
+ nonrec_rhs = origLams local_body
+
+ -- HACK! The following is a fake SysLocal binder with
+ -- *the same* unique as binder.
+ -- the reason for this is the following:
+ -- this binder *will* get inlined but if it happen to be
+ -- a top level binder it is never removed as dead code,
+ -- therefore we have to remove that information (of it being
+ -- top-level or exported somehow.)
+ -- A better fix is to use binder directly but with the TopLevel
+ -- tag (or Exported tag) modified.
+ fake_binder = mkSysLocal SLIT("sat")
+ (getUnique binder)
+ (idType binder)
+ rec_body = mkValLam non_static_args
+ ( Let (NonRec fake_binder nonrec_rhs)
+ {-in-} (dropArgs rhs))
+ in
+ returnSAT (
+ origLams (Let (Rec [(binder',rec_body)]) {-in-} local_body)
+ )
+ where
+ origLams = origLams' rhs
+ where
+ origLams' (Lam v e) e' = Lam v (origLams' e e')
+ origLams' (CoTyLam ty e) e' = CoTyLam ty (origLams' e e')
+ origLams' _ e' = e'
+
+ new_ty tyargs args
+ = substTy (mk_inst_tyenv tyargs tv_tmpl)
+ (mkSigmaTy tv_tmpl' dict_tys' tau_ty')
+ where
+ -- get type info for the local function:
+ (tv_tmpl, dict_tys, tau_ty) = (splitSigmaTy . idType) binder
+ (reg_arg_tys, res_type) = splitFunTys tau_ty
+
+ -- now, we drop the ones that are
+ -- static, that is, the ones we will not pass to the local function
+ tv_tmpl' = dropStatics tyargs tv_tmpl
+
+ (args1, args2) = splitAtList dict_tys args
+ dict_tys' = dropStatics args1 dict_tys
+ reg_arg_tys' = dropStatics args2 reg_arg_tys
+
+ tau_ty' = glueTyArgs reg_arg_tys' res_type
+
+ mk_inst_tyenv [] _ = emptyVarEnv
+ mk_inst_tyenv (Static s:args) (t:ts) = extendVarEnv (mk_inst_tyenv args ts) t s
+ mk_inst_tyenv (_:args) (_:ts) = mk_inst_tyenv args ts
+
+dropStatics [] t = t
+dropStatics (Static _:args) (t:ts) = dropStatics args ts
+dropStatics (_:args) (t:ts) = t:dropStatics args ts
+
+isStatic :: Arg a -> Bool
+isStatic NotStatic = False
+isStatic _ = True
+-}
+\end{code}
diff --git a/compiler/simplCore/SetLevels.lhs b/compiler/simplCore/SetLevels.lhs
new file mode 100644
index 0000000000..f8ab29dcd5
--- /dev/null
+++ b/compiler/simplCore/SetLevels.lhs
@@ -0,0 +1,847 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section{SetLevels}
+
+ ***************************
+ Overview
+ ***************************
+
+1. We attach binding levels to Core bindings, in preparation for floating
+ outwards (@FloatOut@).
+
+2. We also let-ify many expressions (notably case scrutinees), so they
+ will have a fighting chance of being floated sensible.
+
+3. We clone the binders of any floatable let-binding, so that when it is
+ floated out it will be unique. (This used to be done by the simplifier
+ but the latter now only ensures that there's no shadowing; indeed, even
+ that may not be true.)
+
+ NOTE: this can't be done using the uniqAway idea, because the variable
+ must be unique in the whole program, not just its current scope,
+ because two variables in different scopes may float out to the
+ same top level place
+
+ NOTE: Very tiresomely, we must apply this substitution to
+ the rules stored inside a variable too.
+
+ We do *not* clone top-level bindings, because some of them must not change,
+ but we *do* clone bindings that are heading for the top level
+
+4. In the expression
+ case x of wild { p -> ...wild... }
+ we substitute x for wild in the RHS of the case alternatives:
+ case x of wild { p -> ...x... }
+ This means that a sub-expression involving x is not "trapped" inside the RHS.
+ And it's not inconvenient because we already have a substitution.
+
+ Note that this is EXACTLY BACKWARDS from the what the simplifier does.
+ The simplifier tries to get rid of occurrences of x, in favour of wild,
+ in the hope that there will only be one remaining occurrence of x, namely
+ the scrutinee of the case, and we can inline it.
+
+\begin{code}
+module SetLevels (
+ setLevels,
+
+ Level(..), tOP_LEVEL,
+ LevelledBind, LevelledExpr,
+
+ incMinorLvl, ltMajLvl, ltLvl, isTopLvl, isInlineCtxt
+ ) where
+
+#include "HsVersions.h"
+
+import CoreSyn
+
+import DynFlags ( FloatOutSwitches(..) )
+import CoreUtils ( exprType, exprIsTrivial, exprIsCheap, mkPiTypes )
+import CoreFVs -- all of it
+import CoreSubst ( Subst, emptySubst, extendInScope, extendIdSubst,
+ cloneIdBndr, cloneRecIdBndrs )
+import Id ( Id, idType, mkSysLocal, isOneShotLambda,
+ zapDemandIdInfo,
+ idSpecialisation, idWorkerInfo, setIdInfo
+ )
+import IdInfo ( workerExists, vanillaIdInfo, isEmptySpecInfo )
+import Var ( Var )
+import VarSet
+import VarEnv
+import Name ( getOccName )
+import OccName ( occNameString )
+import Type ( isUnLiftedType, Type )
+import BasicTypes ( TopLevelFlag(..) )
+import UniqSupply
+import Util ( sortLe, isSingleton, count )
+import Outputable
+import FastString
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Level numbers}
+%* *
+%************************************************************************
+
+\begin{code}
+data Level = InlineCtxt -- A level that's used only for
+ -- the context parameter ctxt_lvl
+ | Level Int -- Level number of enclosing lambdas
+ Int -- Number of big-lambda and/or case expressions between
+ -- here and the nearest enclosing lambda
+\end{code}
+
+The {\em level number} on a (type-)lambda-bound variable is the
+nesting depth of the (type-)lambda which binds it. The outermost lambda
+has level 1, so (Level 0 0) means that the variable is bound outside any lambda.
+
+On an expression, it's the maximum level number of its free
+(type-)variables. On a let(rec)-bound variable, it's the level of its
+RHS. On a case-bound variable, it's the number of enclosing lambdas.
+
+Top-level variables: level~0. Those bound on the RHS of a top-level
+definition but ``before'' a lambda; e.g., the \tr{x} in (levels shown
+as ``subscripts'')...
+\begin{verbatim}
+a_0 = let b_? = ... in
+ x_1 = ... b ... in ...
+\end{verbatim}
+
+The main function @lvlExpr@ carries a ``context level'' (@ctxt_lvl@).
+That's meant to be the level number of the enclosing binder in the
+final (floated) program. If the level number of a sub-expression is
+less than that of the context, then it might be worth let-binding the
+sub-expression so that it will indeed float.
+
+If you can float to level @Level 0 0@ worth doing so because then your
+allocation becomes static instead of dynamic. We always start with
+context @Level 0 0@.
+
+
+InlineCtxt
+~~~~~~~~~~
+@InlineCtxt@ very similar to @Level 0 0@, but is used for one purpose:
+to say "don't float anything out of here". That's exactly what we
+want for the body of an INLINE, where we don't want to float anything
+out at all. See notes with lvlMFE below.
+
+But, check this out:
+
+-- At one time I tried the effect of not float anything out of an InlineMe,
+-- but it sometimes works badly. For example, consider PrelArr.done. It
+-- has the form __inline (\d. e)
+-- where e doesn't mention d. If we float this to
+-- __inline (let x = e in \d. x)
+-- things are bad. The inliner doesn't even inline it because it doesn't look
+-- like a head-normal form. So it seems a lesser evil to let things float.
+-- In SetLevels we do set the context to (Level 0 0) when we get to an InlineMe
+-- which discourages floating out.
+
+So the conclusion is: don't do any floating at all inside an InlineMe.
+(In the above example, don't float the {x=e} out of the \d.)
+
+One particular case is that of workers: we don't want to float the
+call to the worker outside the wrapper, otherwise the worker might get
+inlined into the floated expression, and an importing module won't see
+the worker at all.
+
+\begin{code}
+type LevelledExpr = TaggedExpr Level
+type LevelledBind = TaggedBind Level
+
+tOP_LEVEL = Level 0 0
+iNLINE_CTXT = InlineCtxt
+
+incMajorLvl :: Level -> Level
+-- For InlineCtxt we ignore any inc's; we don't want
+-- to do any floating at all; see notes above
+incMajorLvl InlineCtxt = InlineCtxt
+incMajorLvl (Level major minor) = Level (major+1) 0
+
+incMinorLvl :: Level -> Level
+incMinorLvl InlineCtxt = InlineCtxt
+incMinorLvl (Level major minor) = Level major (minor+1)
+
+maxLvl :: Level -> Level -> Level
+maxLvl InlineCtxt l2 = l2
+maxLvl l1 InlineCtxt = l1
+maxLvl l1@(Level maj1 min1) l2@(Level maj2 min2)
+ | (maj1 > maj2) || (maj1 == maj2 && min1 > min2) = l1
+ | otherwise = l2
+
+ltLvl :: Level -> Level -> Bool
+ltLvl any_lvl InlineCtxt = False
+ltLvl InlineCtxt (Level _ _) = True
+ltLvl (Level maj1 min1) (Level maj2 min2)
+ = (maj1 < maj2) || (maj1 == maj2 && min1 < min2)
+
+ltMajLvl :: Level -> Level -> Bool
+ -- Tells if one level belongs to a difft *lambda* level to another
+ltMajLvl any_lvl InlineCtxt = False
+ltMajLvl InlineCtxt (Level maj2 _) = 0 < maj2
+ltMajLvl (Level maj1 _) (Level maj2 _) = maj1 < maj2
+
+isTopLvl :: Level -> Bool
+isTopLvl (Level 0 0) = True
+isTopLvl other = False
+
+isInlineCtxt :: Level -> Bool
+isInlineCtxt InlineCtxt = True
+isInlineCtxt other = False
+
+instance Outputable Level where
+ ppr InlineCtxt = text "<INLINE>"
+ ppr (Level maj min) = hcat [ char '<', int maj, char ',', int min, char '>' ]
+
+instance Eq Level where
+ InlineCtxt == InlineCtxt = True
+ (Level maj1 min1) == (Level maj2 min2) = maj1==maj2 && min1==min2
+ l1 == l2 = False
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Main level-setting code}
+%* *
+%************************************************************************
+
+\begin{code}
+setLevels :: FloatOutSwitches
+ -> [CoreBind]
+ -> UniqSupply
+ -> [LevelledBind]
+
+setLevels float_lams binds us
+ = initLvl us (do_them binds)
+ where
+ -- "do_them"'s main business is to thread the monad along
+ -- It gives each top binding the same empty envt, because
+ -- things unbound in the envt have level number zero implicitly
+ do_them :: [CoreBind] -> LvlM [LevelledBind]
+
+ do_them [] = returnLvl []
+ do_them (b:bs)
+ = lvlTopBind init_env b `thenLvl` \ (lvld_bind, _) ->
+ do_them bs `thenLvl` \ lvld_binds ->
+ returnLvl (lvld_bind : lvld_binds)
+
+ init_env = initialEnv float_lams
+
+lvlTopBind env (NonRec binder rhs)
+ = lvlBind TopLevel tOP_LEVEL env (AnnNonRec binder (freeVars rhs))
+ -- Rhs can have no free vars!
+
+lvlTopBind env (Rec pairs)
+ = lvlBind TopLevel tOP_LEVEL env (AnnRec [(b,freeVars rhs) | (b,rhs) <- pairs])
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Setting expression levels}
+%* *
+%************************************************************************
+
+\begin{code}
+lvlExpr :: Level -- ctxt_lvl: Level of enclosing expression
+ -> LevelEnv -- Level of in-scope names/tyvars
+ -> CoreExprWithFVs -- input expression
+ -> LvlM LevelledExpr -- Result expression
+\end{code}
+
+The @ctxt_lvl@ is, roughly, the level of the innermost enclosing
+binder. Here's an example
+
+ v = \x -> ...\y -> let r = case (..x..) of
+ ..x..
+ in ..
+
+When looking at the rhs of @r@, @ctxt_lvl@ will be 1 because that's
+the level of @r@, even though it's inside a level-2 @\y@. It's
+important that @ctxt_lvl@ is 1 and not 2 in @r@'s rhs, because we
+don't want @lvlExpr@ to turn the scrutinee of the @case@ into an MFE
+--- because it isn't a *maximal* free expression.
+
+If there were another lambda in @r@'s rhs, it would get level-2 as well.
+
+\begin{code}
+lvlExpr _ _ (_, AnnType ty) = returnLvl (Type ty)
+lvlExpr _ env (_, AnnVar v) = returnLvl (lookupVar env v)
+lvlExpr _ env (_, AnnLit lit) = returnLvl (Lit lit)
+
+lvlExpr ctxt_lvl env (_, AnnApp fun arg)
+ = lvl_fun fun `thenLvl` \ fun' ->
+ lvlMFE False ctxt_lvl env arg `thenLvl` \ arg' ->
+ returnLvl (App fun' arg')
+ where
+-- gaw 2004
+ lvl_fun (_, AnnCase _ _ _ _) = lvlMFE True ctxt_lvl env fun
+ lvl_fun other = lvlExpr ctxt_lvl env fun
+ -- We don't do MFE on partial applications generally,
+ -- but we do if the function is big and hairy, like a case
+
+lvlExpr ctxt_lvl env (_, AnnNote InlineMe expr)
+-- Don't float anything out of an InlineMe; hence the iNLINE_CTXT
+ = lvlExpr iNLINE_CTXT env expr `thenLvl` \ expr' ->
+ returnLvl (Note InlineMe expr')
+
+lvlExpr ctxt_lvl env (_, AnnNote note expr)
+ = lvlExpr ctxt_lvl env expr `thenLvl` \ expr' ->
+ returnLvl (Note note expr')
+
+-- We don't split adjacent lambdas. That is, given
+-- \x y -> (x+1,y)
+-- we don't float to give
+-- \x -> let v = x+y in \y -> (v,y)
+-- Why not? Because partial applications are fairly rare, and splitting
+-- lambdas makes them more expensive.
+
+lvlExpr ctxt_lvl env expr@(_, AnnLam bndr rhs)
+ = lvlMFE True new_lvl new_env body `thenLvl` \ new_body ->
+ returnLvl (mkLams new_bndrs new_body)
+ where
+ (bndrs, body) = collectAnnBndrs expr
+ (new_lvl, new_bndrs) = lvlLamBndrs ctxt_lvl bndrs
+ new_env = extendLvlEnv env new_bndrs
+ -- At one time we called a special verion of collectBinders,
+ -- which ignored coercions, because we don't want to split
+ -- a lambda like this (\x -> coerce t (\s -> ...))
+ -- This used to happen quite a bit in state-transformer programs,
+ -- but not nearly so much now non-recursive newtypes are transparent.
+ -- [See SetLevels rev 1.50 for a version with this approach.]
+
+lvlExpr ctxt_lvl env (_, AnnLet (AnnNonRec bndr rhs) body)
+ | isUnLiftedType (idType bndr)
+ -- Treat unlifted let-bindings (let x = b in e) just like (case b of x -> e)
+ -- That is, leave it exactly where it is
+ -- We used to float unlifted bindings too (e.g. to get a cheap primop
+ -- outside a lambda (to see how, look at lvlBind in rev 1.58)
+ -- but an unrelated change meant that these unlifed bindings
+ -- could get to the top level which is bad. And there's not much point;
+ -- unlifted bindings are always cheap, and so hardly worth floating.
+ = lvlExpr ctxt_lvl env rhs `thenLvl` \ rhs' ->
+ lvlExpr incd_lvl env' body `thenLvl` \ body' ->
+ returnLvl (Let (NonRec bndr' rhs') body')
+ where
+ incd_lvl = incMinorLvl ctxt_lvl
+ bndr' = TB bndr incd_lvl
+ env' = extendLvlEnv env [bndr']
+
+lvlExpr ctxt_lvl env (_, AnnLet bind body)
+ = lvlBind NotTopLevel ctxt_lvl env bind `thenLvl` \ (bind', new_env) ->
+ lvlExpr ctxt_lvl new_env body `thenLvl` \ body' ->
+ returnLvl (Let bind' body')
+
+lvlExpr ctxt_lvl env (_, AnnCase expr case_bndr ty alts)
+ = lvlMFE True ctxt_lvl env expr `thenLvl` \ expr' ->
+ let
+ alts_env = extendCaseBndrLvlEnv env expr' case_bndr incd_lvl
+ in
+ mapLvl (lvl_alt alts_env) alts `thenLvl` \ alts' ->
+ returnLvl (Case expr' (TB case_bndr incd_lvl) ty alts')
+ where
+ incd_lvl = incMinorLvl ctxt_lvl
+
+ lvl_alt alts_env (con, bs, rhs)
+ = lvlMFE True incd_lvl new_env rhs `thenLvl` \ rhs' ->
+ returnLvl (con, bs', rhs')
+ where
+ bs' = [ TB b incd_lvl | b <- bs ]
+ new_env = extendLvlEnv alts_env bs'
+\end{code}
+
+@lvlMFE@ is just like @lvlExpr@, except that it might let-bind
+the expression, so that it can itself be floated.
+
+[NOTE: unlifted MFEs]
+We don't float unlifted MFEs, which potentially loses big opportunites.
+For example:
+ \x -> f (h y)
+where h :: Int -> Int# is expensive. We'd like to float the (h y) outside
+the \x, but we don't because it's unboxed. Possible solution: box it.
+
+\begin{code}
+lvlMFE :: Bool -- True <=> strict context [body of case or let]
+ -> Level -- Level of innermost enclosing lambda/tylam
+ -> LevelEnv -- Level of in-scope names/tyvars
+ -> CoreExprWithFVs -- input expression
+ -> LvlM LevelledExpr -- Result expression
+
+lvlMFE strict_ctxt ctxt_lvl env (_, AnnType ty)
+ = returnLvl (Type ty)
+
+
+lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _)
+ | isUnLiftedType ty -- Can't let-bind it; see [NOTE: unlifted MFEs]
+ || isInlineCtxt ctxt_lvl -- Don't float out of an __inline__ context
+ || exprIsTrivial expr -- Never float if it's trivial
+ || not good_destination
+ = -- Don't float it out
+ lvlExpr ctxt_lvl env ann_expr
+
+ | otherwise -- Float it out!
+ = lvlFloatRhs abs_vars dest_lvl env ann_expr `thenLvl` \ expr' ->
+ newLvlVar "lvl" abs_vars ty `thenLvl` \ var ->
+ returnLvl (Let (NonRec (TB var dest_lvl) expr')
+ (mkVarApps (Var var) abs_vars))
+ where
+ expr = deAnnotate ann_expr
+ ty = exprType expr
+ dest_lvl = destLevel env fvs (isFunction ann_expr)
+ abs_vars = abstractVars dest_lvl env fvs
+
+ -- A decision to float entails let-binding this thing, and we only do
+ -- that if we'll escape a value lambda, or will go to the top level.
+ good_destination
+ | dest_lvl `ltMajLvl` ctxt_lvl -- Escapes a value lambda
+ = not (exprIsCheap expr) || isTopLvl dest_lvl
+ -- Even if it escapes a value lambda, we only
+ -- float if it's not cheap (unless it'll get all the
+ -- way to the top). I've seen cases where we
+ -- float dozens of tiny free expressions, which cost
+ -- more to allocate than to evaluate.
+ -- NB: exprIsCheap is also true of bottom expressions, which
+ -- is good; we don't want to share them
+ --
+ -- It's only Really Bad to float a cheap expression out of a
+ -- strict context, because that builds a thunk that otherwise
+ -- would never be built. So another alternative would be to
+ -- add
+ -- || (strict_ctxt && not (exprIsBottom expr))
+ -- to the condition above. We should really try this out.
+
+ | otherwise -- Does not escape a value lambda
+ = isTopLvl dest_lvl -- Only float if we are going to the top level
+ && floatConsts env -- and the floatConsts flag is on
+ && not strict_ctxt -- Don't float from a strict context
+ -- We are keen to float something to the top level, even if it does not
+ -- escape a lambda, because then it needs no allocation. But it's controlled
+ -- by a flag, because doing this too early loses opportunities for RULES
+ -- which (needless to say) are important in some nofib programs
+ -- (gcd is an example).
+ --
+ -- Beware:
+ -- concat = /\ a -> foldr ..a.. (++) []
+ -- was getting turned into
+ -- concat = /\ a -> lvl a
+ -- lvl = /\ a -> foldr ..a.. (++) []
+ -- which is pretty stupid. Hence the strict_ctxt test
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Bindings}
+%* *
+%************************************************************************
+
+The binding stuff works for top level too.
+
+\begin{code}
+lvlBind :: TopLevelFlag -- Used solely to decide whether to clone
+ -> Level -- Context level; might be Top even for bindings nested in the RHS
+ -- of a top level binding
+ -> LevelEnv
+ -> CoreBindWithFVs
+ -> LvlM (LevelledBind, LevelEnv)
+
+lvlBind top_lvl ctxt_lvl env (AnnNonRec bndr rhs@(rhs_fvs,_))
+ | isInlineCtxt ctxt_lvl -- Don't do anything inside InlineMe
+ = lvlExpr ctxt_lvl env rhs `thenLvl` \ rhs' ->
+ returnLvl (NonRec (TB bndr ctxt_lvl) rhs', env)
+
+ | null abs_vars
+ = -- No type abstraction; clone existing binder
+ lvlExpr dest_lvl env rhs `thenLvl` \ rhs' ->
+ cloneVar top_lvl env bndr ctxt_lvl dest_lvl `thenLvl` \ (env', bndr') ->
+ returnLvl (NonRec (TB bndr' dest_lvl) rhs', env')
+
+ | otherwise
+ = -- Yes, type abstraction; create a new binder, extend substitution, etc
+ lvlFloatRhs abs_vars dest_lvl env rhs `thenLvl` \ rhs' ->
+ newPolyBndrs dest_lvl env abs_vars [bndr] `thenLvl` \ (env', [bndr']) ->
+ returnLvl (NonRec (TB bndr' dest_lvl) rhs', env')
+
+ where
+ bind_fvs = rhs_fvs `unionVarSet` idFreeVars bndr
+ abs_vars = abstractVars dest_lvl env bind_fvs
+ dest_lvl = destLevel env bind_fvs (isFunction rhs)
+\end{code}
+
+
+\begin{code}
+lvlBind top_lvl ctxt_lvl env (AnnRec pairs)
+ | isInlineCtxt ctxt_lvl -- Don't do anything inside InlineMe
+ = mapLvl (lvlExpr ctxt_lvl env) rhss `thenLvl` \ rhss' ->
+ returnLvl (Rec ([TB b ctxt_lvl | b <- bndrs] `zip` rhss'), env)
+
+ | null abs_vars
+ = cloneRecVars top_lvl env bndrs ctxt_lvl dest_lvl `thenLvl` \ (new_env, new_bndrs) ->
+ mapLvl (lvlExpr ctxt_lvl new_env) rhss `thenLvl` \ new_rhss ->
+ returnLvl (Rec ([TB b dest_lvl | b <- new_bndrs] `zip` new_rhss), new_env)
+
+ | isSingleton pairs && count isId abs_vars > 1
+ = -- Special case for self recursion where there are
+ -- several variables carried around: build a local loop:
+ -- poly_f = \abs_vars. \lam_vars . letrec f = \lam_vars. rhs in f lam_vars
+ -- This just makes the closures a bit smaller. If we don't do
+ -- this, allocation rises significantly on some programs
+ --
+ -- We could elaborate it for the case where there are several
+ -- mutually functions, but it's quite a bit more complicated
+ --
+ -- This all seems a bit ad hoc -- sigh
+ let
+ (bndr,rhs) = head pairs
+ (rhs_lvl, abs_vars_w_lvls) = lvlLamBndrs dest_lvl abs_vars
+ rhs_env = extendLvlEnv env abs_vars_w_lvls
+ in
+ cloneVar NotTopLevel rhs_env bndr rhs_lvl rhs_lvl `thenLvl` \ (rhs_env', new_bndr) ->
+ let
+ (lam_bndrs, rhs_body) = collectAnnBndrs rhs
+ (body_lvl, new_lam_bndrs) = lvlLamBndrs rhs_lvl lam_bndrs
+ body_env = extendLvlEnv rhs_env' new_lam_bndrs
+ in
+ lvlExpr body_lvl body_env rhs_body `thenLvl` \ new_rhs_body ->
+ newPolyBndrs dest_lvl env abs_vars [bndr] `thenLvl` \ (poly_env, [poly_bndr]) ->
+ returnLvl (Rec [(TB poly_bndr dest_lvl,
+ mkLams abs_vars_w_lvls $
+ mkLams new_lam_bndrs $
+ Let (Rec [(TB new_bndr rhs_lvl, mkLams new_lam_bndrs new_rhs_body)])
+ (mkVarApps (Var new_bndr) lam_bndrs))],
+ poly_env)
+
+ | otherwise -- Non-null abs_vars
+ = newPolyBndrs dest_lvl env abs_vars bndrs `thenLvl` \ (new_env, new_bndrs) ->
+ mapLvl (lvlFloatRhs abs_vars dest_lvl new_env) rhss `thenLvl` \ new_rhss ->
+ returnLvl (Rec ([TB b dest_lvl | b <- new_bndrs] `zip` new_rhss), new_env)
+
+ where
+ (bndrs,rhss) = unzip pairs
+
+ -- Finding the free vars of the binding group is annoying
+ bind_fvs = (unionVarSets [ idFreeVars bndr `unionVarSet` rhs_fvs
+ | (bndr, (rhs_fvs,_)) <- pairs])
+ `minusVarSet`
+ mkVarSet bndrs
+
+ dest_lvl = destLevel env bind_fvs (all isFunction rhss)
+ abs_vars = abstractVars dest_lvl env bind_fvs
+
+----------------------------------------------------
+-- Three help functons for the type-abstraction case
+
+lvlFloatRhs abs_vars dest_lvl env rhs
+ = lvlExpr rhs_lvl rhs_env rhs `thenLvl` \ rhs' ->
+ returnLvl (mkLams abs_vars_w_lvls rhs')
+ where
+ (rhs_lvl, abs_vars_w_lvls) = lvlLamBndrs dest_lvl abs_vars
+ rhs_env = extendLvlEnv env abs_vars_w_lvls
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Deciding floatability}
+%* *
+%************************************************************************
+
+\begin{code}
+lvlLamBndrs :: Level -> [CoreBndr] -> (Level, [TaggedBndr Level])
+-- Compute the levels for the binders of a lambda group
+-- The binders returned are exactly the same as the ones passed,
+-- but they are now paired with a level
+lvlLamBndrs lvl []
+ = (lvl, [])
+
+lvlLamBndrs lvl bndrs
+ = go (incMinorLvl lvl)
+ False -- Havn't bumped major level in this group
+ [] bndrs
+ where
+ go old_lvl bumped_major rev_lvld_bndrs (bndr:bndrs)
+ | isId bndr && -- Go to the next major level if this is a value binder,
+ not bumped_major && -- and we havn't already gone to the next level (one jump per group)
+ not (isOneShotLambda bndr) -- and it isn't a one-shot lambda
+ = go new_lvl True (TB bndr new_lvl : rev_lvld_bndrs) bndrs
+
+ | otherwise
+ = go old_lvl bumped_major (TB bndr old_lvl : rev_lvld_bndrs) bndrs
+
+ where
+ new_lvl = incMajorLvl old_lvl
+
+ go old_lvl _ rev_lvld_bndrs []
+ = (old_lvl, reverse rev_lvld_bndrs)
+ -- a lambda like this (\x -> coerce t (\s -> ...))
+ -- This happens quite a bit in state-transformer programs
+\end{code}
+
+\begin{code}
+ -- Destintion level is the max Id level of the expression
+ -- (We'll abstract the type variables, if any.)
+destLevel :: LevelEnv -> VarSet -> Bool -> Level
+destLevel env fvs is_function
+ | floatLams env
+ && is_function = tOP_LEVEL -- Send functions to top level; see
+ -- the comments with isFunction
+ | otherwise = maxIdLevel env fvs
+
+isFunction :: CoreExprWithFVs -> Bool
+-- The idea here is that we want to float *functions* to
+-- the top level. This saves no work, but
+-- (a) it can make the host function body a lot smaller,
+-- and hence inlinable.
+-- (b) it can also save allocation when the function is recursive:
+-- h = \x -> letrec f = \y -> ...f...y...x...
+-- in f x
+-- becomes
+-- f = \x y -> ...(f x)...y...x...
+-- h = \x -> f x x
+-- No allocation for f now.
+-- We may only want to do this if there are sufficiently few free
+-- variables. We certainly only want to do it for values, and not for
+-- constructors. So the simple thing is just to look for lambdas
+isFunction (_, AnnLam b e) | isId b = True
+ | otherwise = isFunction e
+isFunction (_, AnnNote n e) = isFunction e
+isFunction other = False
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Free-To-Level Monad}
+%* *
+%************************************************************************
+
+\begin{code}
+type LevelEnv = (FloatOutSwitches,
+ VarEnv Level, -- Domain is *post-cloned* TyVars and Ids
+ Subst, -- Domain is pre-cloned Ids; tracks the in-scope set
+ -- so that subtitution is capture-avoiding
+ IdEnv ([Var], LevelledExpr)) -- Domain is pre-cloned Ids
+ -- We clone let-bound variables so that they are still
+ -- distinct when floated out; hence the SubstEnv/IdEnv.
+ -- (see point 3 of the module overview comment).
+ -- We also use these envs when making a variable polymorphic
+ -- because we want to float it out past a big lambda.
+ --
+ -- The SubstEnv and IdEnv always implement the same mapping, but the
+ -- SubstEnv maps to CoreExpr and the IdEnv to LevelledExpr
+ -- Since the range is always a variable or type application,
+ -- there is never any difference between the two, but sadly
+ -- the types differ. The SubstEnv is used when substituting in
+ -- a variable's IdInfo; the IdEnv when we find a Var.
+ --
+ -- In addition the IdEnv records a list of tyvars free in the
+ -- type application, just so we don't have to call freeVars on
+ -- the type application repeatedly.
+ --
+ -- The domain of the both envs is *pre-cloned* Ids, though
+ --
+ -- The domain of the VarEnv Level is the *post-cloned* Ids
+
+initialEnv :: FloatOutSwitches -> LevelEnv
+initialEnv float_lams = (float_lams, emptyVarEnv, emptySubst, emptyVarEnv)
+
+floatLams :: LevelEnv -> Bool
+floatLams (FloatOutSw float_lams _, _, _, _) = float_lams
+
+floatConsts :: LevelEnv -> Bool
+floatConsts (FloatOutSw _ float_consts, _, _, _) = float_consts
+
+extendLvlEnv :: LevelEnv -> [TaggedBndr Level] -> LevelEnv
+-- Used when *not* cloning
+extendLvlEnv (float_lams, lvl_env, subst, id_env) prs
+ = (float_lams,
+ foldl add_lvl lvl_env prs,
+ foldl del_subst subst prs,
+ foldl del_id id_env prs)
+ where
+ add_lvl env (TB v l) = extendVarEnv env v l
+ del_subst env (TB v _) = extendInScope env v
+ del_id env (TB v _) = delVarEnv env v
+ -- We must remove any clone for this variable name in case of
+ -- shadowing. This bit me in the following case
+ -- (in nofib/real/gg/Spark.hs):
+ --
+ -- case ds of wild {
+ -- ... -> case e of wild {
+ -- ... -> ... wild ...
+ -- }
+ -- }
+ --
+ -- The inside occurrence of @wild@ was being replaced with @ds@,
+ -- incorrectly, because the SubstEnv was still lying around. Ouch!
+ -- KSW 2000-07.
+
+-- extendCaseBndrLvlEnv adds the mapping case-bndr->scrut-var if it can
+-- (see point 4 of the module overview comment)
+extendCaseBndrLvlEnv (float_lams, lvl_env, subst, id_env) (Var scrut_var) case_bndr lvl
+ = (float_lams,
+ extendVarEnv lvl_env case_bndr lvl,
+ extendIdSubst subst case_bndr (Var scrut_var),
+ extendVarEnv id_env case_bndr ([scrut_var], Var scrut_var))
+
+extendCaseBndrLvlEnv env scrut case_bndr lvl
+ = extendLvlEnv env [TB case_bndr lvl]
+
+extendPolyLvlEnv dest_lvl (float_lams, lvl_env, subst, id_env) abs_vars bndr_pairs
+ = (float_lams,
+ foldl add_lvl lvl_env bndr_pairs,
+ foldl add_subst subst bndr_pairs,
+ foldl add_id id_env bndr_pairs)
+ where
+ add_lvl env (v,v') = extendVarEnv env v' dest_lvl
+ add_subst env (v,v') = extendIdSubst env v (mkVarApps (Var v') abs_vars)
+ add_id env (v,v') = extendVarEnv env v ((v':abs_vars), mkVarApps (Var v') abs_vars)
+
+extendCloneLvlEnv lvl (float_lams, lvl_env, _, id_env) new_subst bndr_pairs
+ = (float_lams,
+ foldl add_lvl lvl_env bndr_pairs,
+ new_subst,
+ foldl add_id id_env bndr_pairs)
+ where
+ add_lvl env (v,v') = extendVarEnv env v' lvl
+ add_id env (v,v') = extendVarEnv env v ([v'], Var v')
+
+
+maxIdLevel :: LevelEnv -> VarSet -> Level
+maxIdLevel (_, lvl_env,_,id_env) var_set
+ = foldVarSet max_in tOP_LEVEL var_set
+ where
+ max_in in_var lvl = foldr max_out lvl (case lookupVarEnv id_env in_var of
+ Just (abs_vars, _) -> abs_vars
+ Nothing -> [in_var])
+
+ max_out out_var lvl
+ | isId out_var = case lookupVarEnv lvl_env out_var of
+ Just lvl' -> maxLvl lvl' lvl
+ Nothing -> lvl
+ | otherwise = lvl -- Ignore tyvars in *maxIdLevel*
+
+lookupVar :: LevelEnv -> Id -> LevelledExpr
+lookupVar (_, _, _, id_env) v = case lookupVarEnv id_env v of
+ Just (_, expr) -> expr
+ other -> Var v
+
+abstractVars :: Level -> LevelEnv -> VarSet -> [Var]
+ -- Find the variables in fvs, free vars of the target expresion,
+ -- whose level is greater than the destination level
+ -- These are the ones we are going to abstract out
+abstractVars dest_lvl env fvs
+ = uniq (sortLe le [var | fv <- varSetElems fvs, var <- absVarsOf dest_lvl env fv])
+ where
+ -- Sort the variables so we don't get
+ -- mixed-up tyvars and Ids; it's just messy
+ v1 `le` v2 = case (isId v1, isId v2) of
+ (True, False) -> False
+ (False, True) -> True
+ other -> v1 <= v2 -- Same family
+
+ uniq :: [Var] -> [Var]
+ -- Remove adjacent duplicates; the sort will have brought them together
+ uniq (v1:v2:vs) | v1 == v2 = uniq (v2:vs)
+ | otherwise = v1 : uniq (v2:vs)
+ uniq vs = vs
+
+absVarsOf :: Level -> LevelEnv -> Var -> [Var]
+ -- If f is free in the expression, and f maps to poly_f a b c in the
+ -- current substitution, then we must report a b c as candidate type
+ -- variables
+absVarsOf dest_lvl (_, lvl_env, _, id_env) v
+ | isId v
+ = [zap av2 | av1 <- lookup_avs v, av2 <- add_tyvars av1, abstract_me av2]
+
+ | otherwise
+ = if abstract_me v then [v] else []
+
+ where
+ abstract_me v = case lookupVarEnv lvl_env v of
+ Just lvl -> dest_lvl `ltLvl` lvl
+ Nothing -> False
+
+ lookup_avs v = case lookupVarEnv id_env v of
+ Just (abs_vars, _) -> abs_vars
+ Nothing -> [v]
+
+ add_tyvars v | isId v = v : varSetElems (idFreeTyVars v)
+ | otherwise = [v]
+
+ -- We are going to lambda-abstract, so nuke any IdInfo,
+ -- and add the tyvars of the Id (if necessary)
+ zap v | isId v = WARN( workerExists (idWorkerInfo v) ||
+ not (isEmptySpecInfo (idSpecialisation v)),
+ text "absVarsOf: discarding info on" <+> ppr v )
+ setIdInfo v vanillaIdInfo
+ | otherwise = v
+\end{code}
+
+\begin{code}
+type LvlM result = UniqSM result
+
+initLvl = initUs_
+thenLvl = thenUs
+returnLvl = returnUs
+mapLvl = mapUs
+\end{code}
+
+\begin{code}
+newPolyBndrs dest_lvl env abs_vars bndrs
+ = getUniquesUs `thenLvl` \ uniqs ->
+ let
+ new_bndrs = zipWith mk_poly_bndr bndrs uniqs
+ in
+ returnLvl (extendPolyLvlEnv dest_lvl env abs_vars (bndrs `zip` new_bndrs), new_bndrs)
+ where
+ mk_poly_bndr bndr uniq = mkSysLocal (mkFastString str) uniq poly_ty
+ where
+ str = "poly_" ++ occNameString (getOccName bndr)
+ poly_ty = mkPiTypes abs_vars (idType bndr)
+
+
+newLvlVar :: String
+ -> [CoreBndr] -> Type -- Abstract wrt these bndrs
+ -> LvlM Id
+newLvlVar str vars body_ty
+ = getUniqueUs `thenLvl` \ uniq ->
+ returnUs (mkSysLocal (mkFastString str) uniq (mkPiTypes vars body_ty))
+
+-- The deeply tiresome thing is that we have to apply the substitution
+-- to the rules inside each Id. Grr. But it matters.
+
+cloneVar :: TopLevelFlag -> LevelEnv -> Id -> Level -> Level -> LvlM (LevelEnv, Id)
+cloneVar TopLevel env v ctxt_lvl dest_lvl
+ = returnUs (env, v) -- Don't clone top level things
+cloneVar NotTopLevel env@(_,_,subst,_) v ctxt_lvl dest_lvl
+ = ASSERT( isId v )
+ getUs `thenLvl` \ us ->
+ let
+ (subst', v1) = cloneIdBndr subst us v
+ v2 = zap_demand ctxt_lvl dest_lvl v1
+ env' = extendCloneLvlEnv dest_lvl env subst' [(v,v2)]
+ in
+ returnUs (env', v2)
+
+cloneRecVars :: TopLevelFlag -> LevelEnv -> [Id] -> Level -> Level -> LvlM (LevelEnv, [Id])
+cloneRecVars TopLevel env vs ctxt_lvl dest_lvl
+ = returnUs (env, vs) -- Don't clone top level things
+cloneRecVars NotTopLevel env@(_,_,subst,_) vs ctxt_lvl dest_lvl
+ = ASSERT( all isId vs )
+ getUs `thenLvl` \ us ->
+ let
+ (subst', vs1) = cloneRecIdBndrs subst us vs
+ vs2 = map (zap_demand ctxt_lvl dest_lvl) vs1
+ env' = extendCloneLvlEnv dest_lvl env subst' (vs `zip` vs2)
+ in
+ returnUs (env', vs2)
+
+ -- VERY IMPORTANT: we must zap the demand info
+ -- if the thing is going to float out past a lambda
+zap_demand dest_lvl ctxt_lvl id
+ | ctxt_lvl == dest_lvl = id -- Stays put
+ | otherwise = zapDemandIdInfo id -- Floats out
+\end{code}
+
diff --git a/compiler/simplCore/SimplCore.lhs b/compiler/simplCore/SimplCore.lhs
new file mode 100644
index 0000000000..a386a3d6b0
--- /dev/null
+++ b/compiler/simplCore/SimplCore.lhs
@@ -0,0 +1,674 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[SimplCore]{Driver for simplifying @Core@ programs}
+
+\begin{code}
+module SimplCore ( core2core, simplifyExpr ) where
+
+#include "HsVersions.h"
+
+import DynFlags ( CoreToDo(..), SimplifierSwitch(..),
+ SimplifierMode(..), DynFlags, DynFlag(..), dopt,
+ getCoreToDo )
+import CoreSyn
+import HscTypes ( HscEnv(..), ModGuts(..), ExternalPackageState(..),
+ Dependencies( dep_mods ),
+ hscEPS, hptRules )
+import CSE ( cseProgram )
+import Rules ( RuleBase, emptyRuleBase, mkRuleBase, unionRuleBase,
+ extendRuleBaseList, pprRuleBase, ruleCheckProgram,
+ addSpecInfo, addIdSpecialisations )
+import PprCore ( pprCoreBindings, pprCoreExpr, pprRules )
+import OccurAnal ( occurAnalysePgm, occurAnalyseExpr )
+import IdInfo ( setNewStrictnessInfo, newStrictnessInfo,
+ setWorkerInfo, workerInfo,
+ setSpecInfo, specInfo, specInfoRules )
+import CoreUtils ( coreBindsSize )
+import Simplify ( simplTopBinds, simplExpr )
+import SimplEnv ( SimplEnv, simplBinders, mkSimplEnv, setInScopeSet )
+import SimplMonad
+import ErrUtils ( dumpIfSet, dumpIfSet_dyn, showPass )
+import CoreLint ( endPass )
+import FloatIn ( floatInwards )
+import FloatOut ( floatOutwards )
+import Id ( Id, modifyIdInfo, idInfo, isExportedId, isLocalId,
+ idSpecialisation, idName )
+import VarSet
+import VarEnv
+import NameEnv ( lookupNameEnv )
+import LiberateCase ( liberateCase )
+import SAT ( doStaticArgs )
+import Specialise ( specProgram)
+import SpecConstr ( specConstrProgram)
+import DmdAnal ( dmdAnalPgm )
+import WorkWrap ( wwTopBinds )
+#ifdef OLD_STRICTNESS
+import StrictAnal ( saBinds )
+import CprAnalyse ( cprAnalyse )
+#endif
+
+import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
+import IO ( hPutStr, stderr )
+import Outputable
+import List ( partition )
+import Maybes ( orElse )
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{The driver for the simplifier}
+%* *
+%************************************************************************
+
+\begin{code}
+core2core :: HscEnv
+ -> ModGuts
+ -> IO ModGuts
+
+core2core hsc_env guts
+ = do
+ let dflags = hsc_dflags hsc_env
+ core_todos = getCoreToDo dflags
+
+ us <- mkSplitUniqSupply 's'
+ let (cp_us, ru_us) = splitUniqSupply us
+
+ -- COMPUTE THE RULE BASE TO USE
+ (imp_rule_base, guts') <- prepareRules hsc_env guts ru_us
+
+ -- DO THE BUSINESS
+ (stats, guts'') <- doCorePasses hsc_env imp_rule_base cp_us
+ (zeroSimplCount dflags)
+ guts' core_todos
+
+ dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
+ "Grand total simplifier statistics"
+ (pprSimplCount stats)
+
+ return guts''
+
+
+simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
+ -> CoreExpr
+ -> IO CoreExpr
+-- simplifyExpr is called by the driver to simplify an
+-- expression typed in at the interactive prompt
+simplifyExpr dflags expr
+ = do {
+ ; showPass dflags "Simplify"
+
+ ; us <- mkSplitUniqSupply 's'
+
+ ; let (expr', _counts) = initSmpl dflags us $
+ simplExprGently gentleSimplEnv expr
+
+ ; dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
+ (pprCoreExpr expr')
+
+ ; return expr'
+ }
+
+gentleSimplEnv :: SimplEnv
+gentleSimplEnv = mkSimplEnv SimplGently
+ (isAmongSimpl [])
+ emptyRuleBase
+
+doCorePasses :: HscEnv
+ -> RuleBase -- the imported main rule base
+ -> UniqSupply -- uniques
+ -> SimplCount -- simplifier stats
+ -> ModGuts -- local binds in (with rules attached)
+ -> [CoreToDo] -- which passes to do
+ -> IO (SimplCount, ModGuts)
+
+doCorePasses hsc_env rb us stats guts []
+ = return (stats, guts)
+
+doCorePasses hsc_env rb us stats guts (to_do : to_dos)
+ = do
+ let (us1, us2) = splitUniqSupply us
+ (stats1, guts1) <- doCorePass to_do hsc_env us1 rb guts
+ doCorePasses hsc_env rb us2 (stats `plusSimplCount` stats1) guts1 to_dos
+
+doCorePass (CoreDoSimplify mode sws) = _scc_ "Simplify" simplifyPgm mode sws
+doCorePass CoreCSE = _scc_ "CommonSubExpr" trBinds cseProgram
+doCorePass CoreLiberateCase = _scc_ "LiberateCase" trBinds liberateCase
+doCorePass CoreDoFloatInwards = _scc_ "FloatInwards" trBinds floatInwards
+doCorePass (CoreDoFloatOutwards f) = _scc_ "FloatOutwards" trBindsU (floatOutwards f)
+doCorePass CoreDoStaticArgs = _scc_ "StaticArgs" trBinds doStaticArgs
+doCorePass CoreDoStrictness = _scc_ "Stranal" trBinds dmdAnalPgm
+doCorePass CoreDoWorkerWrapper = _scc_ "WorkWrap" trBindsU wwTopBinds
+doCorePass CoreDoSpecialising = _scc_ "Specialise" trBindsU specProgram
+doCorePass CoreDoSpecConstr = _scc_ "SpecConstr" trBindsU specConstrProgram
+doCorePass CoreDoGlomBinds = trBinds glomBinds
+doCorePass CoreDoPrintCore = observe printCore
+doCorePass (CoreDoRuleCheck phase pat) = observe (ruleCheck phase pat)
+doCorePass CoreDoNothing = observe (\ _ _ -> return ())
+#ifdef OLD_STRICTNESS
+doCorePass CoreDoOldStrictness = _scc_ "OldStrictness" trBinds doOldStrictness
+#endif
+
+#ifdef OLD_STRICTNESS
+doOldStrictness dfs binds
+ = do binds1 <- saBinds dfs binds
+ binds2 <- cprAnalyse dfs binds1
+ return binds2
+#endif
+
+printCore _ binds = dumpIfSet True "Print Core" (pprCoreBindings binds)
+
+ruleCheck phase pat dflags binds = do showPass dflags "RuleCheck"
+ printDump (ruleCheckProgram phase pat binds)
+
+-- Most passes return no stats and don't change rules
+trBinds :: (DynFlags -> [CoreBind] -> IO [CoreBind])
+ -> HscEnv -> UniqSupply -> RuleBase -> ModGuts
+ -> IO (SimplCount, ModGuts)
+trBinds do_pass hsc_env us rb guts
+ = do { binds' <- do_pass dflags (mg_binds guts)
+ ; return (zeroSimplCount dflags, guts { mg_binds = binds' }) }
+ where
+ dflags = hsc_dflags hsc_env
+
+trBindsU :: (DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind])
+ -> HscEnv -> UniqSupply -> RuleBase -> ModGuts
+ -> IO (SimplCount, ModGuts)
+trBindsU do_pass hsc_env us rb guts
+ = do { binds' <- do_pass dflags us (mg_binds guts)
+ ; return (zeroSimplCount dflags, guts { mg_binds = binds' }) }
+ where
+ dflags = hsc_dflags hsc_env
+
+-- Observer passes just peek; don't modify the bindings at all
+observe :: (DynFlags -> [CoreBind] -> IO a)
+ -> HscEnv -> UniqSupply -> RuleBase -> ModGuts
+ -> IO (SimplCount, ModGuts)
+observe do_pass hsc_env us rb guts
+ = do { binds <- do_pass dflags (mg_binds guts)
+ ; return (zeroSimplCount dflags, guts) }
+ where
+ dflags = hsc_dflags hsc_env
+\end{code}
+
+
+
+%************************************************************************
+%* *
+\subsection{Dealing with rules}
+%* *
+%************************************************************************
+
+-- prepareLocalRuleBase takes the CoreBinds and rules defined in this module.
+-- It attaches those rules that are for local Ids to their binders, and
+-- returns the remainder attached to Ids in an IdSet.
+
+\begin{code}
+prepareRules :: HscEnv
+ -> ModGuts
+ -> UniqSupply
+ -> IO (RuleBase, -- Rule base for imported things, incl
+ -- (a) rules defined in this module (orphans)
+ -- (b) rules from other modules in home package
+ -- but not things from other packages
+
+ ModGuts) -- Modified fields are
+ -- (a) Bindings have rules attached,
+ -- (b) Rules are now just orphan rules
+
+prepareRules hsc_env@(HscEnv { hsc_dflags = dflags, hsc_HPT = hpt })
+ guts@(ModGuts { mg_binds = binds, mg_deps = deps, mg_rules = local_rules })
+ us
+ = do { let -- Simplify the local rules; boringly, we need to make an in-scope set
+ -- from the local binders, to avoid warnings from Simplify.simplVar
+ local_ids = mkInScopeSet (mkVarSet (bindersOfBinds binds))
+ env = setInScopeSet gentleSimplEnv local_ids
+ (better_rules,_) = initSmpl dflags us (mapSmpl (simplRule env) local_rules)
+ home_pkg_rules = hptRules hsc_env (dep_mods deps)
+
+ -- Find the rules for locally-defined Ids; then we can attach them
+ -- to the binders in the top-level bindings
+ --
+ -- Reason
+ -- - It makes the rules easier to look up
+ -- - It means that transformation rules and specialisations for
+ -- locally defined Ids are handled uniformly
+ -- - It keeps alive things that are referred to only from a rule
+ -- (the occurrence analyser knows about rules attached to Ids)
+ -- - It makes sure that, when we apply a rule, the free vars
+ -- of the RHS are more likely to be in scope
+ -- - The imported rules are carried in the in-scope set
+ -- which is extended on each iteration by the new wave of
+ -- local binders; any rules which aren't on the binding will
+ -- thereby get dropped
+ (rules_for_locals, rules_for_imps) = partition isLocalRule better_rules
+ local_rule_base = extendRuleBaseList emptyRuleBase rules_for_locals
+ binds_w_rules = updateBinders local_rule_base binds
+
+ hpt_rule_base = mkRuleBase home_pkg_rules
+ imp_rule_base = extendRuleBaseList hpt_rule_base rules_for_imps
+
+ ; dumpIfSet_dyn dflags Opt_D_dump_rules "Transformation rules"
+ (vcat [text "Local rules", pprRules better_rules,
+ text "",
+ text "Imported rules", pprRuleBase imp_rule_base])
+
+ ; return (imp_rule_base, guts { mg_binds = binds_w_rules,
+ mg_rules = rules_for_imps })
+ }
+
+updateBinders :: RuleBase -> [CoreBind] -> [CoreBind]
+updateBinders local_rules binds
+ = map update_bndrs binds
+ where
+ update_bndrs (NonRec b r) = NonRec (update_bndr b) r
+ update_bndrs (Rec prs) = Rec [(update_bndr b, r) | (b,r) <- prs]
+
+ update_bndr bndr = case lookupNameEnv local_rules (idName bndr) of
+ Nothing -> bndr
+ Just rules -> bndr `addIdSpecialisations` rules
+ -- The binder might have some existing rules,
+ -- arising from specialisation pragmas
+\end{code}
+
+
+We must do some gentle simplification on the template (but not the RHS)
+of each rule. The case that forced me to add this was the fold/build rule,
+which without simplification looked like:
+ fold k z (build (/\a. g a)) ==> ...
+This doesn't match unless you do eta reduction on the build argument.
+
+\begin{code}
+simplRule env rule@(BuiltinRule {})
+ = returnSmpl rule
+simplRule env rule@(Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
+ = simplBinders env bndrs `thenSmpl` \ (env, bndrs') ->
+ mapSmpl (simplExprGently env) args `thenSmpl` \ args' ->
+ simplExprGently env rhs `thenSmpl` \ rhs' ->
+ returnSmpl (rule { ru_bndrs = bndrs', ru_args = args', ru_rhs = rhs' })
+
+-- It's important that simplExprGently does eta reduction.
+-- For example, in a rule like:
+-- augment g (build h)
+-- we do not want to get
+-- augment (\a. g a) (build h)
+-- otherwise we don't match when given an argument like
+-- (\a. h a a)
+--
+-- The simplifier does indeed do eta reduction (it's in
+-- Simplify.completeLam) but only if -O is on.
+\end{code}
+
+\begin{code}
+simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
+-- Simplifies an expression
+-- does occurrence analysis, then simplification
+-- and repeats (twice currently) because one pass
+-- alone leaves tons of crud.
+-- Used (a) for user expressions typed in at the interactive prompt
+-- (b) the LHS and RHS of a RULE
+--
+-- The name 'Gently' suggests that the SimplifierMode is SimplGently,
+-- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
+-- enforce that; it just simplifies the expression twice
+
+simplExprGently env expr
+ = simplExpr env (occurAnalyseExpr expr) `thenSmpl` \ expr1 ->
+ simplExpr env (occurAnalyseExpr expr1)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Glomming}
+%* *
+%************************************************************************
+
+\begin{code}
+glomBinds :: DynFlags -> [CoreBind] -> IO [CoreBind]
+-- Glom all binds together in one Rec, in case any
+-- transformations have introduced any new dependencies
+--
+-- NB: the global invariant is this:
+-- *** the top level bindings are never cloned, and are always unique ***
+--
+-- We sort them into dependency order, but applying transformation rules may
+-- make something at the top refer to something at the bottom:
+-- f = \x -> p (q x)
+-- h = \y -> 3
+--
+-- RULE: p (q x) = h x
+--
+-- Applying this rule makes f refer to h,
+-- although it doesn't appear to in the source program.
+-- This pass lets us control where it happens.
+--
+-- NOTICE that this cannot happen for rules whose head is a locally-defined
+-- function. It only happens for rules whose head is an imported function
+-- (p in the example above). So, for example, the rule had been
+-- RULE: f (p x) = h x
+-- then the rule for f would be attached to f itself (in its IdInfo)
+-- by prepareLocalRuleBase and h would be regarded by the occurrency
+-- analyser as free in f.
+
+glomBinds dflags binds
+ = do { showPass dflags "GlomBinds" ;
+ let { recd_binds = [Rec (flattenBinds binds)] } ;
+ return recd_binds }
+ -- Not much point in printing the result...
+ -- just consumes output bandwidth
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{The driver for the simplifier}
+%* *
+%************************************************************************
+
+\begin{code}
+simplifyPgm :: SimplifierMode
+ -> [SimplifierSwitch]
+ -> HscEnv
+ -> UniqSupply
+ -> RuleBase
+ -> ModGuts
+ -> IO (SimplCount, ModGuts) -- New bindings
+
+simplifyPgm mode switches hsc_env us imp_rule_base guts
+ = do {
+ showPass dflags "Simplify";
+
+ (termination_msg, it_count, counts_out, binds')
+ <- do_iteration us 1 (zeroSimplCount dflags) (mg_binds guts) ;
+
+ dumpIfSet (dopt Opt_D_verbose_core2core dflags
+ && dopt Opt_D_dump_simpl_stats dflags)
+ "Simplifier statistics"
+ (vcat [text termination_msg <+> text "after" <+> ppr it_count <+> text "iterations",
+ text "",
+ pprSimplCount counts_out]);
+
+ endPass dflags "Simplify" Opt_D_verbose_core2core binds';
+
+ return (counts_out, guts { mg_binds = binds' })
+ }
+ where
+ dflags = hsc_dflags hsc_env
+ phase_info = case mode of
+ SimplGently -> "gentle"
+ SimplPhase n -> show n
+
+ sw_chkr = isAmongSimpl switches
+ max_iterations = intSwitchSet sw_chkr MaxSimplifierIterations `orElse` 2
+
+ do_iteration us iteration_no counts binds
+ -- iteration_no is the number of the iteration we are
+ -- about to begin, with '1' for the first
+ | iteration_no > max_iterations -- Stop if we've run out of iterations
+ = do {
+#ifdef DEBUG
+ if max_iterations > 2 then
+ hPutStr stderr ("NOTE: Simplifier still going after " ++
+ show max_iterations ++
+ " iterations; bailing out.\n")
+ else
+ return ();
+#endif
+ -- Subtract 1 from iteration_no to get the
+ -- number of iterations we actually completed
+ return ("Simplifier baled out", iteration_no - 1, counts, binds)
+ }
+
+ -- Try and force thunks off the binds; significantly reduces
+ -- space usage, especially with -O. JRS, 000620.
+ | let sz = coreBindsSize binds in sz == sz
+ = do {
+ -- Occurrence analysis
+ let { tagged_binds = _scc_ "OccAnal" occurAnalysePgm binds } ;
+ dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
+ (pprCoreBindings tagged_binds);
+
+ -- Get any new rules, and extend the rule base
+ -- We need to do this regularly, because simplification can
+ -- poke on IdInfo thunks, which in turn brings in new rules
+ -- behind the scenes. Otherwise there's a danger we'll simply
+ -- miss the rules for Ids hidden inside imported inlinings
+ eps <- hscEPS hsc_env ;
+ let { rule_base' = unionRuleBase imp_rule_base (eps_rule_base eps)
+ ; simpl_env = mkSimplEnv mode sw_chkr rule_base' } ;
+
+ -- Simplify the program
+ -- We do this with a *case* not a *let* because lazy pattern
+ -- matching bit us with bad space leak!
+ -- With a let, we ended up with
+ -- let
+ -- t = initSmpl ...
+ -- counts' = snd t
+ -- in
+ -- case t of {(_,counts') -> if counts'=0 then ... }
+ -- So the conditional didn't force counts', because the
+ -- selection got duplicated. Sigh!
+ case initSmpl dflags us1 (_scc_ "SimplTopBinds" simplTopBinds simpl_env tagged_binds) of {
+ (binds', counts') -> do {
+
+ let { all_counts = counts `plusSimplCount` counts'
+ ; herald = "Simplifier phase " ++ phase_info ++
+ ", iteration " ++ show iteration_no ++
+ " out of " ++ show max_iterations
+ } ;
+
+ -- Stop if nothing happened; don't dump output
+ if isZeroSimplCount counts' then
+ return ("Simplifier reached fixed point", iteration_no,
+ all_counts, binds')
+ else do {
+ -- Short out indirections
+ -- We do this *after* at least one run of the simplifier
+ -- because indirection-shorting uses the export flag on *occurrences*
+ -- and that isn't guaranteed to be ok until after the first run propagates
+ -- stuff from the binding site to its occurrences
+ let { binds'' = _scc_ "ZapInd" shortOutIndirections binds' } ;
+
+ -- Dump the result of this iteration
+ dumpIfSet_dyn dflags Opt_D_dump_simpl_iterations herald
+ (pprSimplCount counts') ;
+ endPass dflags herald Opt_D_dump_simpl_iterations binds'' ;
+
+ -- Loop
+ do_iteration us2 (iteration_no + 1) all_counts binds''
+ } } } }
+ where
+ (us1, us2) = splitUniqSupply us
+\end{code}
+
+
+%************************************************************************
+%* *
+ Shorting out indirections
+%* *
+%************************************************************************
+
+If we have this:
+
+ x_local = <expression>
+ ...bindings...
+ x_exported = x_local
+
+where x_exported is exported, and x_local is not, then we replace it with this:
+
+ x_exported = <expression>
+ x_local = x_exported
+ ...bindings...
+
+Without this we never get rid of the x_exported = x_local thing. This
+save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
+makes strictness information propagate better. This used to happen in
+the final phase, but it's tidier to do it here.
+
+STRICTNESS: if we have done strictness analysis, we want the strictness info on
+x_local to transfer to x_exported. Hence the copyIdInfo call.
+
+RULES: we want to *add* any RULES for x_local to x_exported.
+
+Note [Rules and indirection-zapping]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Problem: what if x_exported has a RULE that mentions something in ...bindings...?
+Then the things mentioned can be out of scope! Solution
+ a) Make sure that in this pass the usage-info from x_exported is
+ available for ...bindings...
+ b) If there are any such RULES, rec-ify the entire top-level.
+ It'll get sorted out next time round
+
+Messing up the rules
+~~~~~~~~~~~~~~~~~~~~
+The example that went bad on me at one stage was this one:
+
+ iterate :: (a -> a) -> a -> [a]
+ [Exported]
+ iterate = iterateList
+
+ iterateFB c f x = x `c` iterateFB c f (f x)
+ iterateList f x = x : iterateList f (f x)
+ [Not exported]
+
+ {-# RULES
+ "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
+ "iterateFB" iterateFB (:) = iterateList
+ #-}
+
+This got shorted out to:
+
+ iterateList :: (a -> a) -> a -> [a]
+ iterateList = iterate
+
+ iterateFB c f x = x `c` iterateFB c f (f x)
+ iterate f x = x : iterate f (f x)
+
+ {-# RULES
+ "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x)
+ "iterateFB" iterateFB (:) = iterate
+ #-}
+
+And now we get an infinite loop in the rule system
+ iterate f x -> build (\cn -> iterateFB c f x)
+ -> iterateFB (:) f x
+ -> iterate f x
+
+Tiresome old solution:
+ don't do shorting out if f has rewrite rules (see shortableIdInfo)
+
+New solution (I think):
+ use rule switching-off pragmas to get rid
+ of iterateList in the first place
+
+
+Other remarks
+~~~~~~~~~~~~~
+If more than one exported thing is equal to a local thing (i.e., the
+local thing really is shared), then we do one only:
+\begin{verbatim}
+ x_local = ....
+ x_exported1 = x_local
+ x_exported2 = x_local
+==>
+ x_exported1 = ....
+
+ x_exported2 = x_exported1
+\end{verbatim}
+
+We rely on prior eta reduction to simplify things like
+\begin{verbatim}
+ x_exported = /\ tyvars -> x_local tyvars
+==>
+ x_exported = x_local
+\end{verbatim}
+Hence,there's a possibility of leaving unchanged something like this:
+\begin{verbatim}
+ x_local = ....
+ x_exported1 = x_local Int
+\end{verbatim}
+By the time we've thrown away the types in STG land this
+could be eliminated. But I don't think it's very common
+and it's dangerous to do this fiddling in STG land
+because we might elminate a binding that's mentioned in the
+unfolding for something.
+
+\begin{code}
+type IndEnv = IdEnv Id -- Maps local_id -> exported_id
+
+shortOutIndirections :: [CoreBind] -> [CoreBind]
+shortOutIndirections binds
+ | isEmptyVarEnv ind_env = binds
+ | no_need_to_flatten = binds'
+ | otherwise = [Rec (flattenBinds binds')] -- See Note [Rules and indirect-zapping]
+ where
+ ind_env = makeIndEnv binds
+ exp_ids = varSetElems ind_env -- These exported Ids are the subjects
+ exp_id_set = mkVarSet exp_ids -- of the indirection-elimination
+ no_need_to_flatten = all (null . specInfoRules . idSpecialisation) exp_ids
+ binds' = concatMap zap binds
+
+ zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
+ zap (Rec pairs) = [Rec (concatMap zapPair pairs)]
+
+ zapPair (bndr, rhs)
+ | bndr `elemVarSet` exp_id_set = []
+ | Just exp_id <- lookupVarEnv ind_env bndr = [(transferIdInfo exp_id bndr, rhs),
+ (bndr, Var exp_id)]
+ | otherwise = [(bndr,rhs)]
+
+makeIndEnv :: [CoreBind] -> IndEnv
+makeIndEnv binds
+ = foldr add_bind emptyVarEnv binds
+ where
+ add_bind :: CoreBind -> IndEnv -> IndEnv
+ add_bind (NonRec exported_id rhs) env = add_pair (exported_id, rhs) env
+ add_bind (Rec pairs) env = foldr add_pair env pairs
+
+ add_pair :: (Id,CoreExpr) -> IndEnv -> IndEnv
+ add_pair (exported_id, Var local_id) env
+ | shortMeOut env exported_id local_id = extendVarEnv env local_id exported_id
+ add_pair (exported_id, rhs) env
+ = env
+
+shortMeOut ind_env exported_id local_id
+-- The if-then-else stuff is just so I can get a pprTrace to see
+-- how often I don't get shorting out becuase of IdInfo stuff
+ = if isExportedId exported_id && -- Only if this is exported
+
+ isLocalId local_id && -- Only if this one is defined in this
+ -- module, so that we *can* change its
+ -- binding to be the exported thing!
+
+ not (isExportedId local_id) && -- Only if this one is not itself exported,
+ -- since the transformation will nuke it
+
+ not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for
+ then
+ True
+
+{- No longer needed
+ if isEmptySpecInfo (specInfo (idInfo exported_id)) -- Only if no rules
+ then True -- See note on "Messing up rules"
+ else
+#ifdef DEBUG
+ pprTrace "shortMeOut:" (ppr exported_id)
+#endif
+ False
+-}
+ else
+ False
+
+
+-----------------
+transferIdInfo :: Id -> Id -> Id
+transferIdInfo exported_id local_id
+ = modifyIdInfo transfer exported_id
+ where
+ local_info = idInfo local_id
+ transfer exp_info = exp_info `setNewStrictnessInfo` newStrictnessInfo local_info
+ `setWorkerInfo` workerInfo local_info
+ `setSpecInfo` addSpecInfo (specInfo exp_info)
+ (specInfo local_info)
+\end{code}
diff --git a/compiler/simplCore/SimplEnv.lhs b/compiler/simplCore/SimplEnv.lhs
new file mode 100644
index 0000000000..00f035e513
--- /dev/null
+++ b/compiler/simplCore/SimplEnv.lhs
@@ -0,0 +1,741 @@
+%
+% (c) The AQUA Project, Glasgow University, 1993-1998
+%
+\section[SimplMonad]{The simplifier Monad}
+
+\begin{code}
+module SimplEnv (
+ InId, InBind, InExpr, InAlt, InArg, InType, InBinder,
+ OutId, OutTyVar, OutBind, OutExpr, OutAlt, OutArg, OutType, OutBinder,
+
+ -- The simplifier mode
+ setMode, getMode,
+
+ -- Switch checker
+ SwitchChecker, SwitchResult(..), getSwitchChecker, getSimplIntSwitch,
+ isAmongSimpl, intSwitchSet, switchIsOn,
+
+ setEnclosingCC, getEnclosingCC,
+
+ -- Environments
+ SimplEnv, mkSimplEnv, extendIdSubst, extendTvSubst,
+ zapSubstEnv, setSubstEnv,
+ getInScope, setInScope, setInScopeSet, modifyInScope, addNewInScopeIds,
+ getRules, refineSimplEnv,
+
+ SimplSR(..), mkContEx, substId,
+
+ simplNonRecBndr, simplRecBndrs, simplLamBndr, simplLamBndrs,
+ simplBinder, simplBinders, addLetIdInfo,
+ substExpr, substTy,
+
+ -- Floats
+ FloatsWith, FloatsWithExpr,
+ Floats, emptyFloats, isEmptyFloats, unitFloat, addFloats, flattenFloats,
+ allLifted, wrapFloats, floatBinds,
+ addAuxiliaryBind,
+ ) where
+
+#include "HsVersions.h"
+
+import SimplMonad
+import Id ( Id, idType, idOccInfo, idUnfolding, setIdUnfolding )
+import IdInfo ( IdInfo, vanillaIdInfo, occInfo, setOccInfo, specInfo, setSpecInfo,
+ arityInfo, setArityInfo, workerInfo, setWorkerInfo,
+ unfoldingInfo, setUnfoldingInfo, isEmptySpecInfo,
+ unknownArity, workerExists
+ )
+import CoreSyn
+import Unify ( TypeRefinement )
+import Rules ( RuleBase )
+import CoreUtils ( needsCaseBinding )
+import CostCentre ( CostCentreStack, subsumedCCS )
+import Var
+import VarEnv
+import VarSet ( isEmptyVarSet )
+import OrdList
+
+import qualified CoreSubst ( Subst, mkSubst, substExpr, substSpec, substWorker )
+import qualified Type ( substTy, substTyVarBndr )
+
+import Type ( Type, TvSubst(..), TvSubstEnv, composeTvSubst,
+ isUnLiftedType, seqType, tyVarsOfType )
+import BasicTypes ( OccInfo(..), isFragileOcc )
+import DynFlags ( SimplifierMode(..) )
+import Util ( mapAccumL )
+import Outputable
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[Simplify-types]{Type declarations}
+%* *
+%************************************************************************
+
+\begin{code}
+type InBinder = CoreBndr
+type InId = Id -- Not yet cloned
+type InType = Type -- Ditto
+type InBind = CoreBind
+type InExpr = CoreExpr
+type InAlt = CoreAlt
+type InArg = CoreArg
+
+type OutBinder = CoreBndr
+type OutId = Id -- Cloned
+type OutTyVar = TyVar -- Cloned
+type OutType = Type -- Cloned
+type OutBind = CoreBind
+type OutExpr = CoreExpr
+type OutAlt = CoreAlt
+type OutArg = CoreArg
+\end{code}
+
+%************************************************************************
+%* *
+\subsubsection{The @SimplEnv@ type}
+%* *
+%************************************************************************
+
+
+\begin{code}
+data SimplEnv
+ = SimplEnv {
+ seMode :: SimplifierMode,
+ seChkr :: SwitchChecker,
+ seCC :: CostCentreStack, -- The enclosing CCS (when profiling)
+
+ -- Rules from other modules
+ seExtRules :: RuleBase,
+
+ -- The current set of in-scope variables
+ -- They are all OutVars, and all bound in this module
+ seInScope :: InScopeSet, -- OutVars only
+
+ -- The current substitution
+ seTvSubst :: TvSubstEnv, -- InTyVar |--> OutType
+ seIdSubst :: SimplIdSubst -- InId |--> OutExpr
+ }
+
+type SimplIdSubst = IdEnv SimplSR -- IdId |--> OutExpr
+
+data SimplSR
+ = DoneEx OutExpr -- Completed term
+ | DoneId OutId OccInfo -- Completed term variable, with occurrence info
+ | ContEx TvSubstEnv -- A suspended substitution
+ SimplIdSubst
+ InExpr
+\end{code}
+
+
+seInScope:
+ The in-scope part of Subst includes *all* in-scope TyVars and Ids
+ The elements of the set may have better IdInfo than the
+ occurrences of in-scope Ids, and (more important) they will
+ have a correctly-substituted type. So we use a lookup in this
+ set to replace occurrences
+
+ The Ids in the InScopeSet are replete with their Rules,
+ and as we gather info about the unfolding of an Id, we replace
+ it in the in-scope set.
+
+ The in-scope set is actually a mapping OutVar -> OutVar, and
+ in case expressions we sometimes bind
+
+seIdSubst:
+ The substitution is *apply-once* only, because InIds and OutIds can overlap.
+ For example, we generally omit mappings
+ a77 -> a77
+ from the substitution, when we decide not to clone a77, but it's quite
+ legitimate to put the mapping in the substitution anyway.
+
+ Indeed, we do so when we want to pass fragile OccInfo to the
+ occurrences of the variable; we add a substitution
+ x77 -> DoneId x77 occ
+ to record x's occurrence information.]
+
+ Furthermore, consider
+ let x = case k of I# x77 -> ... in
+ let y = case k of I# x77 -> ... in ...
+ and suppose the body is strict in both x and y. Then the simplifier
+ will pull the first (case k) to the top; so the second (case k) will
+ cancel out, mapping x77 to, well, x77! But one is an in-Id and the
+ other is an out-Id.
+
+ Of course, the substitution *must* applied! Things in its domain
+ simply aren't necessarily bound in the result.
+
+* substId adds a binding (DoneId new_id occ) to the substitution if
+ EITHER the Id's unique has changed
+ OR the Id has interesting occurrence information
+ So in effect you can only get to interesting occurrence information
+ by looking up the *old* Id; it's not really attached to the new id
+ at all.
+
+ Note, though that the substitution isn't necessarily extended
+ if the type changes. Why not? Because of the next point:
+
+* We *always, always* finish by looking up in the in-scope set
+ any variable that doesn't get a DoneEx or DoneVar hit in the substitution.
+ Reason: so that we never finish up with a "old" Id in the result.
+ An old Id might point to an old unfolding and so on... which gives a space leak.
+
+ [The DoneEx and DoneVar hits map to "new" stuff.]
+
+* It follows that substExpr must not do a no-op if the substitution is empty.
+ substType is free to do so, however.
+
+* When we come to a let-binding (say) we generate new IdInfo, including an
+ unfolding, attach it to the binder, and add this newly adorned binder to
+ the in-scope set. So all subsequent occurrences of the binder will get mapped
+ to the full-adorned binder, which is also the one put in the binding site.
+
+* The in-scope "set" usually maps x->x; we use it simply for its domain.
+ But sometimes we have two in-scope Ids that are synomyms, and should
+ map to the same target: x->x, y->x. Notably:
+ case y of x { ... }
+ That's why the "set" is actually a VarEnv Var
+
+
+Note [GADT type refinement]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we come to a GADT pattern match that refines the in-scope types, we
+ a) Refine the types of the Ids in the in-scope set, seInScope.
+ For exmaple, consider
+ data T a where
+ Foo :: T (Bool -> Bool)
+
+ (\ (x::T a) (y::a) -> case x of { Foo -> y True }
+
+ Technically this is well-typed, but exprType will barf on the
+ (y True) unless we refine the type on y's occurrence.
+
+ b) Refine the range of the type substitution, seTvSubst.
+ Very similar reason to (a).
+
+ NB: we don't refine the range of the SimplIdSubst, because it's always
+ interpreted relative to the seInScope (see substId)
+
+For (b) we need to be a little careful. Specifically, we compose the refinement
+with the type substitution. Suppose
+ The substitution was [a->b, b->a]
+ and the refinement was [b->Int]
+ Then we want [a->Int, b->a]
+
+But also if
+ The substitution was [a->b]
+ and the refinement was [b->Int]
+ Then we want [a->Int, b->Int]
+ becuase b might be both an InTyVar and OutTyVar
+
+
+\begin{code}
+mkSimplEnv :: SimplifierMode -> SwitchChecker -> RuleBase -> SimplEnv
+mkSimplEnv mode switches rules
+ = SimplEnv { seChkr = switches, seCC = subsumedCCS,
+ seMode = mode, seInScope = emptyInScopeSet,
+ seExtRules = rules,
+ seTvSubst = emptyVarEnv, seIdSubst = emptyVarEnv }
+ -- The top level "enclosing CC" is "SUBSUMED".
+
+---------------------
+getSwitchChecker :: SimplEnv -> SwitchChecker
+getSwitchChecker env = seChkr env
+
+---------------------
+getMode :: SimplEnv -> SimplifierMode
+getMode env = seMode env
+
+setMode :: SimplifierMode -> SimplEnv -> SimplEnv
+setMode mode env = env { seMode = mode }
+
+---------------------
+getEnclosingCC :: SimplEnv -> CostCentreStack
+getEnclosingCC env = seCC env
+
+setEnclosingCC :: SimplEnv -> CostCentreStack -> SimplEnv
+setEnclosingCC env cc = env {seCC = cc}
+
+---------------------
+extendIdSubst :: SimplEnv -> Id -> SimplSR -> SimplEnv
+extendIdSubst env@(SimplEnv {seIdSubst = subst}) var res
+ = env {seIdSubst = extendVarEnv subst var res}
+
+extendTvSubst :: SimplEnv -> TyVar -> Type -> SimplEnv
+extendTvSubst env@(SimplEnv {seTvSubst = subst}) var res
+ = env {seTvSubst = extendVarEnv subst var res}
+
+---------------------
+getInScope :: SimplEnv -> InScopeSet
+getInScope env = seInScope env
+
+setInScopeSet :: SimplEnv -> InScopeSet -> SimplEnv
+setInScopeSet env in_scope = env {seInScope = in_scope}
+
+setInScope :: SimplEnv -> SimplEnv -> SimplEnv
+setInScope env env_with_in_scope = setInScopeSet env (getInScope env_with_in_scope)
+
+addNewInScopeIds :: SimplEnv -> [CoreBndr] -> SimplEnv
+ -- The new Ids are guaranteed to be freshly allocated
+addNewInScopeIds env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst }) vs
+ = env { seInScope = in_scope `extendInScopeSetList` vs,
+ seIdSubst = id_subst `delVarEnvList` vs }
+ -- Why delete? Consider
+ -- let x = a*b in (x, \x -> x+3)
+ -- We add [x |-> a*b] to the substitution, but we must
+ -- *delete* it from the substitution when going inside
+ -- the (\x -> ...)!
+
+modifyInScope :: SimplEnv -> CoreBndr -> CoreBndr -> SimplEnv
+modifyInScope env@(SimplEnv {seInScope = in_scope}) v v'
+ = env {seInScope = modifyInScopeSet in_scope v v'}
+
+---------------------
+zapSubstEnv :: SimplEnv -> SimplEnv
+zapSubstEnv env = env {seTvSubst = emptyVarEnv, seIdSubst = emptyVarEnv}
+
+setSubstEnv :: SimplEnv -> TvSubstEnv -> SimplIdSubst -> SimplEnv
+setSubstEnv env tvs ids = env { seTvSubst = tvs, seIdSubst = ids }
+
+mkContEx :: SimplEnv -> InExpr -> SimplSR
+mkContEx (SimplEnv { seTvSubst = tvs, seIdSubst = ids }) e = ContEx tvs ids e
+
+isEmptySimplSubst :: SimplEnv -> Bool
+isEmptySimplSubst (SimplEnv { seTvSubst = tvs, seIdSubst = ids })
+ = isEmptyVarEnv tvs && isEmptyVarEnv ids
+
+---------------------
+getRules :: SimplEnv -> RuleBase
+getRules = seExtRules
+\end{code}
+
+ GADT stuff
+
+Given an idempotent substitution, generated by the unifier, use it to
+refine the environment
+
+\begin{code}
+refineSimplEnv :: SimplEnv -> TypeRefinement -> SimplEnv
+-- The TvSubstEnv is the refinement, and it refines OutTyVars into OutTypes
+refineSimplEnv env@(SimplEnv { seTvSubst = tv_subst, seInScope = in_scope })
+ (refine_tv_subst, all_bound_here)
+ = env { seTvSubst = composeTvSubst in_scope refine_tv_subst tv_subst,
+ seInScope = in_scope' }
+ where
+ in_scope'
+ | all_bound_here = in_scope
+ -- The tvs are the tyvars bound here. If only they
+ -- are refined, there's no need to do anything
+ | otherwise = mapInScopeSet refine_id in_scope
+
+ refine_id v -- Only refine its type; any rules will get
+ -- refined if they are used (I hope)
+ | isId v = setIdType v (Type.substTy refine_subst (idType v))
+ | otherwise = v
+ refine_subst = TvSubst in_scope refine_tv_subst
+\end{code}
+
+%************************************************************************
+%* *
+ Substitution of Vars
+%* *
+%************************************************************************
+
+
+\begin{code}
+substId :: SimplEnv -> Id -> SimplSR
+substId (SimplEnv { seInScope = in_scope, seIdSubst = ids }) v
+ | not (isLocalId v)
+ = DoneId v NoOccInfo
+ | otherwise -- A local Id
+ = case lookupVarEnv ids v of
+ Just (DoneId v occ) -> DoneId (refine v) occ
+ Just res -> res
+ Nothing -> let v' = refine v
+ in DoneId v' (idOccInfo v')
+ -- We don't put LoopBreakers in the substitution (unless then need
+ -- to be cloned for name-clash rasons), so the idOccInfo is
+ -- very important! If isFragileOcc returned True for
+ -- loop breakers we could avoid this call, but at the expense
+ -- of adding more to the substitution, and building new Ids
+ -- a bit more often than really necessary
+ where
+ -- Get the most up-to-date thing from the in-scope set
+ -- Even though it isn't in the substitution, it may be in
+ -- the in-scope set with a different type (we only use the
+ -- substitution if the unique changes).
+ refine v = case lookupInScope in_scope v of
+ Just v' -> v'
+ Nothing -> WARN( True, ppr v ) v -- This is an error!
+\end{code}
+
+
+%************************************************************************
+%* *
+\section{Substituting an Id binder}
+%* *
+%************************************************************************
+
+
+These functions are in the monad only so that they can be made strict via seq.
+
+\begin{code}
+simplBinders, simplLamBndrs
+ :: SimplEnv -> [InBinder] -> SimplM (SimplEnv, [OutBinder])
+simplBinders env bndrs = mapAccumLSmpl simplBinder env bndrs
+simplLamBndrs env bndrs = mapAccumLSmpl simplLamBndr env bndrs
+
+-------------
+simplBinder :: SimplEnv -> InBinder -> SimplM (SimplEnv, OutBinder)
+-- Used for lambda and case-bound variables
+-- Clone Id if necessary, substitute type
+-- Return with IdInfo already substituted, but (fragile) occurrence info zapped
+-- The substitution is extended only if the variable is cloned, because
+-- we *don't* need to use it to track occurrence info.
+simplBinder env bndr
+ | isTyVar bndr = do { let (env', tv) = substTyVarBndr env bndr
+ ; seqTyVar tv `seq` return (env', tv) }
+ | otherwise = do { let (env', id) = substIdBndr env bndr
+ ; seqId id `seq` return (env', id) }
+
+-------------
+simplLamBndr :: SimplEnv -> Var -> SimplM (SimplEnv, Var)
+-- Used for lambda binders. These sometimes have unfoldings added by
+-- the worker/wrapper pass that must be preserved, becuase they can't
+-- be reconstructed from context. For example:
+-- f x = case x of (a,b) -> fw a b x
+-- fw a b x{=(a,b)} = ...
+-- The "{=(a,b)}" is an unfolding we can't reconstruct otherwise.
+simplLamBndr env bndr
+ | not (isId bndr && hasSomeUnfolding old_unf) = simplBinder env bndr -- Normal case
+ | otherwise = seqId id2 `seq` return (env', id2)
+ where
+ old_unf = idUnfolding bndr
+ (env', id1) = substIdBndr env bndr
+ id2 = id1 `setIdUnfolding` substUnfolding env old_unf
+
+--------------
+substIdBndr :: SimplEnv -> Id -- Substitition and Id to transform
+ -> (SimplEnv, Id) -- Transformed pair
+
+-- Returns with:
+-- * Unique changed if necessary
+-- * Type substituted
+-- * Unfolding zapped
+-- * Rules, worker, lbvar info all substituted
+-- * Fragile occurrence info zapped
+-- * The in-scope set extended with the returned Id
+-- * The substitution extended with a DoneId if unique changed
+-- In this case, the var in the DoneId is the same as the
+-- var returned
+
+substIdBndr env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst})
+ old_id
+ = (env { seInScope = in_scope `extendInScopeSet` new_id,
+ seIdSubst = new_subst }, new_id)
+ where
+ -- id1 is cloned if necessary
+ id1 = uniqAway in_scope old_id
+
+ -- id2 has its type zapped
+ id2 = substIdType env id1
+
+ -- new_id has the final IdInfo
+ subst = mkCoreSubst env
+ new_id = maybeModifyIdInfo (substIdInfo subst) id2
+
+ -- Extend the substitution if the unique has changed
+ -- See the notes with substTyVarBndr for the delSubstEnv
+ new_subst | new_id /= old_id
+ = extendVarEnv id_subst old_id (DoneId new_id (idOccInfo old_id))
+ | otherwise
+ = delVarEnv id_subst old_id
+\end{code}
+
+
+\begin{code}
+seqTyVar :: TyVar -> ()
+seqTyVar b = b `seq` ()
+
+seqId :: Id -> ()
+seqId id = seqType (idType id) `seq`
+ idInfo id `seq`
+ ()
+
+seqIds :: [Id] -> ()
+seqIds [] = ()
+seqIds (id:ids) = seqId id `seq` seqIds ids
+\end{code}
+
+
+%************************************************************************
+%* *
+ Let bindings
+%* *
+%************************************************************************
+
+Simplifying let binders
+~~~~~~~~~~~~~~~~~~~~~~~
+Rename the binders if necessary,
+
+\begin{code}
+simplNonRecBndr :: SimplEnv -> InBinder -> SimplM (SimplEnv, OutBinder)
+simplNonRecBndr env id
+ = do { let (env1, id1) = substLetIdBndr env id
+ ; seqId id1 `seq` return (env1, id1) }
+
+---------------
+simplRecBndrs :: SimplEnv -> [InBinder] -> SimplM (SimplEnv, [OutBinder])
+simplRecBndrs env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst }) ids
+ = do { let (env1, ids1) = mapAccumL substLetIdBndr env ids
+ ; seqIds ids1 `seq` return (env1, ids1) }
+
+---------------
+substLetIdBndr :: SimplEnv -> InBinder -- Env and binder to transform
+ -> (SimplEnv, OutBinder)
+-- C.f. CoreSubst.substIdBndr
+-- Clone Id if necessary, substitute its type
+-- Return an Id with completely zapped IdInfo
+-- [addLetIdInfo, below, will restore its IdInfo]
+-- Augment the subtitution
+-- if the unique changed, *or*
+-- if there's interesting occurrence info
+
+substLetIdBndr env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst }) old_id
+ = (env { seInScope = in_scope `extendInScopeSet` new_id,
+ seIdSubst = new_subst }, new_id)
+ where
+ id1 = uniqAway in_scope old_id
+ id2 = substIdType env id1
+ new_id = setIdInfo id2 vanillaIdInfo
+
+ -- Extend the substitution if the unique has changed,
+ -- or there's some useful occurrence information
+ -- See the notes with substTyVarBndr for the delSubstEnv
+ occ_info = occInfo (idInfo old_id)
+ new_subst | new_id /= old_id || isFragileOcc occ_info
+ = extendVarEnv id_subst old_id (DoneId new_id occ_info)
+ | otherwise
+ = delVarEnv id_subst old_id
+\end{code}
+
+Add IdInfo back onto a let-bound Id
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We must transfer the IdInfo of the original binder to the new binder.
+This is crucial, to preserve
+ strictness
+ rules
+ worker info
+etc. To do this we must apply the current substitution,
+which incorporates earlier substitutions in this very letrec group.
+
+NB 1. We do this *before* processing the RHS of the binder, so that
+its substituted rules are visible in its own RHS.
+This is important. Manuel found cases where he really, really
+wanted a RULE for a recursive function to apply in that function's
+own right-hand side.
+
+NB 2: We do not transfer the arity (see Subst.substIdInfo)
+The arity of an Id should not be visible
+in its own RHS, else we eta-reduce
+ f = \x -> f x
+to
+ f = f
+which isn't sound. And it makes the arity in f's IdInfo greater than
+the manifest arity, which isn't good.
+The arity will get added later.
+
+NB 3: It's important that we *do* transer the loop-breaker OccInfo,
+because that's what stops the Id getting inlined infinitely, in the body
+of the letrec.
+
+NB 4: does no harm for non-recursive bindings
+
+NB 5: we can't do the addLetIdInfo part before *all* the RHSs because
+ rec { f = g
+ h = ...
+ RULE h Int = f
+ }
+Here, we'll do postInlineUnconditionally on f, and we must "see" that
+when substituting in h's RULE.
+
+\begin{code}
+addLetIdInfo :: SimplEnv -> InBinder -> OutBinder -> (SimplEnv, OutBinder)
+addLetIdInfo env in_id out_id
+ = (modifyInScope env out_id out_id, final_id)
+ where
+ final_id = out_id `setIdInfo` new_info
+ subst = mkCoreSubst env
+ old_info = idInfo in_id
+ new_info = case substIdInfo subst old_info of
+ Nothing -> old_info
+ Just new_info -> new_info
+
+substIdInfo :: CoreSubst.Subst -> IdInfo -> Maybe IdInfo
+-- Substitute the
+-- rules
+-- worker info
+-- Zap the unfolding
+-- Keep only 'robust' OccInfo
+-- Zap Arity
+--
+-- Seq'ing on the returned IdInfo is enough to cause all the
+-- substitutions to happen completely
+
+substIdInfo subst info
+ | nothing_to_do = Nothing
+ | otherwise = Just (info `setOccInfo` (if keep_occ then old_occ else NoOccInfo)
+ `setArityInfo` (if keep_arity then old_arity else unknownArity)
+ `setSpecInfo` CoreSubst.substSpec subst old_rules
+ `setWorkerInfo` CoreSubst.substWorker subst old_wrkr
+ `setUnfoldingInfo` noUnfolding)
+ -- setSpecInfo does a seq
+ -- setWorkerInfo does a seq
+ where
+ nothing_to_do = keep_occ && keep_arity &&
+ isEmptySpecInfo old_rules &&
+ not (workerExists old_wrkr) &&
+ not (hasUnfolding (unfoldingInfo info))
+
+ keep_occ = not (isFragileOcc old_occ)
+ keep_arity = old_arity == unknownArity
+ old_arity = arityInfo info
+ old_occ = occInfo info
+ old_rules = specInfo info
+ old_wrkr = workerInfo info
+
+------------------
+substIdType :: SimplEnv -> Id -> Id
+substIdType env@(SimplEnv { seInScope = in_scope, seTvSubst = tv_env}) id
+ | isEmptyVarEnv tv_env || isEmptyVarSet (tyVarsOfType old_ty) = id
+ | otherwise = setIdType id (Type.substTy (TvSubst in_scope tv_env) old_ty)
+ -- The tyVarsOfType is cheaper than it looks
+ -- because we cache the free tyvars of the type
+ -- in a Note in the id's type itself
+ where
+ old_ty = idType id
+
+------------------
+substUnfolding env NoUnfolding = NoUnfolding
+substUnfolding env (OtherCon cons) = OtherCon cons
+substUnfolding env (CompulsoryUnfolding rhs) = CompulsoryUnfolding (substExpr env rhs)
+substUnfolding env (CoreUnfolding rhs t v w g) = CoreUnfolding (substExpr env rhs) t v w g
+\end{code}
+
+
+%************************************************************************
+%* *
+ Impedence matching to type substitution
+%* *
+%************************************************************************
+
+\begin{code}
+substTy :: SimplEnv -> Type -> Type
+substTy (SimplEnv { seInScope = in_scope, seTvSubst = tv_env }) ty
+ = Type.substTy (TvSubst in_scope tv_env) ty
+
+substTyVarBndr :: SimplEnv -> TyVar -> (SimplEnv, TyVar)
+substTyVarBndr env@(SimplEnv { seInScope = in_scope, seTvSubst = tv_env }) tv
+ = case Type.substTyVarBndr (TvSubst in_scope tv_env) tv of
+ (TvSubst in_scope' tv_env', tv')
+ -> (env { seInScope = in_scope', seTvSubst = tv_env'}, tv')
+
+-- When substituting in rules etc we can get CoreSubst to do the work
+-- But CoreSubst uses a simpler form of IdSubstEnv, so we must impedence-match
+-- here. I think the this will not usually result in a lot of work;
+-- the substitutions are typically small, and laziness will avoid work in many cases.
+
+mkCoreSubst :: SimplEnv -> CoreSubst.Subst
+mkCoreSubst (SimplEnv { seInScope = in_scope, seTvSubst = tv_env, seIdSubst = id_env })
+ = mk_subst tv_env id_env
+ where
+ mk_subst tv_env id_env = CoreSubst.mkSubst in_scope tv_env (mapVarEnv fiddle id_env)
+
+ fiddle (DoneEx e) = e
+ fiddle (DoneId v occ) = Var v
+ fiddle (ContEx tv id e) = CoreSubst.substExpr (mk_subst tv id) e
+
+substExpr :: SimplEnv -> CoreExpr -> CoreExpr
+substExpr env expr
+ | isEmptySimplSubst env = expr
+ | otherwise = CoreSubst.substExpr (mkCoreSubst env) expr
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Floats}
+%* *
+%************************************************************************
+
+\begin{code}
+type FloatsWithExpr = FloatsWith OutExpr
+type FloatsWith a = (Floats, a)
+ -- We return something equivalent to (let b in e), but
+ -- in pieces to avoid the quadratic blowup when floating
+ -- incrementally. Comments just before simplExprB in Simplify.lhs
+
+data Floats = Floats (OrdList OutBind)
+ InScopeSet -- Environment "inside" all the floats
+ Bool -- True <=> All bindings are lifted
+
+allLifted :: Floats -> Bool
+allLifted (Floats _ _ is_lifted) = is_lifted
+
+wrapFloats :: Floats -> OutExpr -> OutExpr
+wrapFloats (Floats bs _ _) body = foldrOL Let body bs
+
+isEmptyFloats :: Floats -> Bool
+isEmptyFloats (Floats bs _ _) = isNilOL bs
+
+floatBinds :: Floats -> [OutBind]
+floatBinds (Floats bs _ _) = fromOL bs
+
+flattenFloats :: Floats -> Floats
+-- Flattens into a single Rec group
+flattenFloats (Floats bs is is_lifted)
+ = ASSERT2( is_lifted, ppr (fromOL bs) )
+ Floats (unitOL (Rec (flattenBinds (fromOL bs)))) is is_lifted
+\end{code}
+
+\begin{code}
+emptyFloats :: SimplEnv -> Floats
+emptyFloats env = Floats nilOL (getInScope env) True
+
+unitFloat :: SimplEnv -> OutId -> OutExpr -> Floats
+-- A single non-rec float; extend the in-scope set
+unitFloat env var rhs = Floats (unitOL (NonRec var rhs))
+ (extendInScopeSet (getInScope env) var)
+ (not (isUnLiftedType (idType var)))
+
+addFloats :: SimplEnv -> Floats
+ -> (SimplEnv -> SimplM (FloatsWith a))
+ -> SimplM (FloatsWith a)
+addFloats env (Floats b1 is1 l1) thing_inside
+ | isNilOL b1
+ = thing_inside env
+ | otherwise
+ = thing_inside (setInScopeSet env is1) `thenSmpl` \ (Floats b2 is2 l2, res) ->
+ returnSmpl (Floats (b1 `appOL` b2) is2 (l1 && l2), res)
+
+addLetBind :: OutBind -> Floats -> Floats
+addLetBind bind (Floats binds in_scope lifted)
+ = Floats (bind `consOL` binds) in_scope (lifted && is_lifted_bind bind)
+
+is_lifted_bind (Rec _) = True
+is_lifted_bind (NonRec b r) = not (isUnLiftedType (idType b))
+
+-- addAuxiliaryBind * takes already-simplified things (bndr and rhs)
+-- * extends the in-scope env
+-- * assumes it's a let-bindable thing
+addAuxiliaryBind :: SimplEnv -> OutBind
+ -> (SimplEnv -> SimplM (FloatsWith a))
+ -> SimplM (FloatsWith a)
+ -- Extends the in-scope environment as well as wrapping the bindings
+addAuxiliaryBind env bind thing_inside
+ = ASSERT( case bind of { NonRec b r -> not (needsCaseBinding (idType b) r) ; Rec _ -> True } )
+ thing_inside (addNewInScopeIds env (bindersOf bind)) `thenSmpl` \ (floats, x) ->
+ returnSmpl (addLetBind bind floats, x)
+\end{code}
+
+
diff --git a/compiler/simplCore/SimplMonad.lhs b/compiler/simplCore/SimplMonad.lhs
new file mode 100644
index 0000000000..bc09e1128c
--- /dev/null
+++ b/compiler/simplCore/SimplMonad.lhs
@@ -0,0 +1,526 @@
+%
+% (c) The AQUA Project, Glasgow University, 1993-1998
+%
+\section[SimplMonad]{The simplifier Monad}
+
+\begin{code}
+module SimplMonad (
+ -- The monad
+ SimplM,
+ initSmpl, returnSmpl, thenSmpl, thenSmpl_,
+ mapSmpl, mapAndUnzipSmpl, mapAccumLSmpl,
+ getDOptsSmpl,
+
+ -- Unique supply
+ getUniqueSmpl, getUniquesSmpl, getUniqSupplySmpl, newId,
+
+ -- Counting
+ SimplCount, Tick(..),
+ tick, freeTick,
+ getSimplCount, zeroSimplCount, pprSimplCount,
+ plusSimplCount, isZeroSimplCount,
+
+ -- Switch checker
+ SwitchChecker, SwitchResult(..), getSimplIntSwitch,
+ isAmongSimpl, intSwitchSet, switchIsOn
+ ) where
+
+#include "HsVersions.h"
+
+import Id ( Id, mkSysLocal )
+import Type ( Type )
+import UniqSupply ( uniqsFromSupply, uniqFromSupply, splitUniqSupply,
+ UniqSupply
+ )
+import DynFlags ( SimplifierSwitch(..), DynFlags, DynFlag(..), dopt )
+import StaticFlags ( opt_PprStyle_Debug, opt_HistorySize )
+import Unique ( Unique )
+import Maybes ( expectJust )
+import FiniteMap ( FiniteMap, emptyFM, isEmptyFM, lookupFM, addToFM, plusFM_C, fmToList )
+import FastString ( FastString )
+import Outputable
+import FastTypes
+
+import GLAEXTS ( indexArray# )
+
+#if __GLASGOW_HASKELL__ < 503
+import PrelArr ( Array(..) )
+#else
+import GHC.Arr ( Array(..) )
+#endif
+
+import Array ( array, (//) )
+
+infixr 0 `thenSmpl`, `thenSmpl_`
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Monad plumbing}
+%* *
+%************************************************************************
+
+For the simplifier monad, we want to {\em thread} a unique supply and a counter.
+(Command-line switches move around through the explicitly-passed SimplEnv.)
+
+\begin{code}
+newtype SimplM result
+ = SM { unSM :: DynFlags -- We thread the unique supply because
+ -> UniqSupply -- constantly splitting it is rather expensive
+ -> SimplCount
+ -> (result, UniqSupply, SimplCount)}
+\end{code}
+
+\begin{code}
+initSmpl :: DynFlags
+ -> UniqSupply -- No init count; set to 0
+ -> SimplM a
+ -> (a, SimplCount)
+
+initSmpl dflags us m
+ = case unSM m dflags us (zeroSimplCount dflags) of
+ (result, _, count) -> (result, count)
+
+
+{-# INLINE thenSmpl #-}
+{-# INLINE thenSmpl_ #-}
+{-# INLINE returnSmpl #-}
+
+instance Monad SimplM where
+ (>>) = thenSmpl_
+ (>>=) = thenSmpl
+ return = returnSmpl
+
+returnSmpl :: a -> SimplM a
+returnSmpl e = SM (\ dflags us sc -> (e, us, sc))
+
+thenSmpl :: SimplM a -> (a -> SimplM b) -> SimplM b
+thenSmpl_ :: SimplM a -> SimplM b -> SimplM b
+
+thenSmpl m k
+ = SM (\ dflags us0 sc0 ->
+ case (unSM m dflags us0 sc0) of
+ (m_result, us1, sc1) -> unSM (k m_result) dflags us1 sc1 )
+
+thenSmpl_ m k
+ = SM (\dflags us0 sc0 ->
+ case (unSM m dflags us0 sc0) of
+ (_, us1, sc1) -> unSM k dflags us1 sc1)
+\end{code}
+
+
+\begin{code}
+mapSmpl :: (a -> SimplM b) -> [a] -> SimplM [b]
+mapAndUnzipSmpl :: (a -> SimplM (b, c)) -> [a] -> SimplM ([b],[c])
+
+mapSmpl f [] = returnSmpl []
+mapSmpl f (x:xs)
+ = f x `thenSmpl` \ x' ->
+ mapSmpl f xs `thenSmpl` \ xs' ->
+ returnSmpl (x':xs')
+
+mapAndUnzipSmpl f [] = returnSmpl ([],[])
+mapAndUnzipSmpl f (x:xs)
+ = f x `thenSmpl` \ (r1, r2) ->
+ mapAndUnzipSmpl f xs `thenSmpl` \ (rs1, rs2) ->
+ returnSmpl (r1:rs1, r2:rs2)
+
+mapAccumLSmpl :: (acc -> b -> SimplM (acc,c)) -> acc -> [b] -> SimplM (acc, [c])
+mapAccumLSmpl f acc [] = returnSmpl (acc, [])
+mapAccumLSmpl f acc (x:xs) = f acc x `thenSmpl` \ (acc', x') ->
+ mapAccumLSmpl f acc' xs `thenSmpl` \ (acc'', xs') ->
+ returnSmpl (acc'', x':xs')
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{The unique supply}
+%* *
+%************************************************************************
+
+\begin{code}
+getUniqSupplySmpl :: SimplM UniqSupply
+getUniqSupplySmpl
+ = SM (\dflags us sc -> case splitUniqSupply us of
+ (us1, us2) -> (us1, us2, sc))
+
+getUniqueSmpl :: SimplM Unique
+getUniqueSmpl
+ = SM (\dflags us sc -> case splitUniqSupply us of
+ (us1, us2) -> (uniqFromSupply us1, us2, sc))
+
+getUniquesSmpl :: SimplM [Unique]
+getUniquesSmpl
+ = SM (\dflags us sc -> case splitUniqSupply us of
+ (us1, us2) -> (uniqsFromSupply us1, us2, sc))
+
+getDOptsSmpl :: SimplM DynFlags
+getDOptsSmpl
+ = SM (\dflags us sc -> (dflags, us, sc))
+
+newId :: FastString -> Type -> SimplM Id
+newId fs ty = getUniqueSmpl `thenSmpl` \ uniq ->
+ returnSmpl (mkSysLocal fs uniq ty)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Counting up what we've done}
+%* *
+%************************************************************************
+
+\begin{code}
+getSimplCount :: SimplM SimplCount
+getSimplCount = SM (\dflags us sc -> (sc, us, sc))
+
+tick :: Tick -> SimplM ()
+tick t
+ = SM (\dflags us sc -> let sc' = doTick t sc
+ in sc' `seq` ((), us, sc'))
+
+freeTick :: Tick -> SimplM ()
+-- Record a tick, but don't add to the total tick count, which is
+-- used to decide when nothing further has happened
+freeTick t
+ = SM (\dflags us sc -> let sc' = doFreeTick t sc
+ in sc' `seq` ((), us, sc'))
+\end{code}
+
+\begin{code}
+verboseSimplStats = opt_PprStyle_Debug -- For now, anyway
+
+zeroSimplCount :: DynFlags -> SimplCount
+isZeroSimplCount :: SimplCount -> Bool
+pprSimplCount :: SimplCount -> SDoc
+doTick, doFreeTick :: Tick -> SimplCount -> SimplCount
+plusSimplCount :: SimplCount -> SimplCount -> SimplCount
+\end{code}
+
+\begin{code}
+data SimplCount = VerySimplZero -- These two are used when
+ | VerySimplNonZero -- we are only interested in
+ -- termination info
+
+ | SimplCount {
+ ticks :: !Int, -- Total ticks
+ details :: !TickCounts, -- How many of each type
+ n_log :: !Int, -- N
+ log1 :: [Tick], -- Last N events; <= opt_HistorySize
+ log2 :: [Tick] -- Last opt_HistorySize events before that
+ }
+
+type TickCounts = FiniteMap Tick Int
+
+zeroSimplCount dflags
+ -- This is where we decide whether to do
+ -- the VerySimpl version or the full-stats version
+ | dopt Opt_D_dump_simpl_stats dflags
+ = SimplCount {ticks = 0, details = emptyFM,
+ n_log = 0, log1 = [], log2 = []}
+ | otherwise
+ = VerySimplZero
+
+isZeroSimplCount VerySimplZero = True
+isZeroSimplCount (SimplCount { ticks = 0 }) = True
+isZeroSimplCount other = False
+
+doFreeTick tick sc@SimplCount { details = dts }
+ = dts' `seqFM` sc { details = dts' }
+ where
+ dts' = dts `addTick` tick
+doFreeTick tick sc = sc
+
+-- Gross hack to persuade GHC 3.03 to do this important seq
+seqFM fm x | isEmptyFM fm = x
+ | otherwise = x
+
+doTick tick sc@SimplCount { ticks = tks, details = dts, n_log = nl, log1 = l1, log2 = l2 }
+ | nl >= opt_HistorySize = sc1 { n_log = 1, log1 = [tick], log2 = l1 }
+ | otherwise = sc1 { n_log = nl+1, log1 = tick : l1 }
+ where
+ sc1 = sc { ticks = tks+1, details = dts `addTick` tick }
+
+doTick tick sc = VerySimplNonZero -- The very simple case
+
+
+-- Don't use plusFM_C because that's lazy, and we want to
+-- be pretty strict here!
+addTick :: TickCounts -> Tick -> TickCounts
+addTick fm tick = case lookupFM fm tick of
+ Nothing -> addToFM fm tick 1
+ Just n -> n1 `seq` addToFM fm tick n1
+ where
+ n1 = n+1
+
+
+plusSimplCount sc1@(SimplCount { ticks = tks1, details = dts1 })
+ sc2@(SimplCount { ticks = tks2, details = dts2 })
+ = log_base { ticks = tks1 + tks2, details = plusFM_C (+) dts1 dts2 }
+ where
+ -- A hackish way of getting recent log info
+ log_base | null (log1 sc2) = sc1 -- Nothing at all in sc2
+ | null (log2 sc2) = sc2 { log2 = log1 sc1 }
+ | otherwise = sc2
+
+plusSimplCount VerySimplZero VerySimplZero = VerySimplZero
+plusSimplCount sc1 sc2 = VerySimplNonZero
+
+pprSimplCount VerySimplZero = ptext SLIT("Total ticks: ZERO!")
+pprSimplCount VerySimplNonZero = ptext SLIT("Total ticks: NON-ZERO!")
+pprSimplCount (SimplCount { ticks = tks, details = dts, log1 = l1, log2 = l2 })
+ = vcat [ptext SLIT("Total ticks: ") <+> int tks,
+ text "",
+ pprTickCounts (fmToList dts),
+ if verboseSimplStats then
+ vcat [text "",
+ ptext SLIT("Log (most recent first)"),
+ nest 4 (vcat (map ppr l1) $$ vcat (map ppr l2))]
+ else empty
+ ]
+
+pprTickCounts :: [(Tick,Int)] -> SDoc
+pprTickCounts [] = empty
+pprTickCounts ((tick1,n1):ticks)
+ = vcat [int tot_n <+> text (tickString tick1),
+ pprTCDetails real_these,
+ pprTickCounts others
+ ]
+ where
+ tick1_tag = tickToTag tick1
+ (these, others) = span same_tick ticks
+ real_these = (tick1,n1):these
+ same_tick (tick2,_) = tickToTag tick2 == tick1_tag
+ tot_n = sum [n | (_,n) <- real_these]
+
+pprTCDetails ticks@((tick,_):_)
+ | verboseSimplStats || isRuleFired tick
+ = nest 4 (vcat [int n <+> pprTickCts tick | (tick,n) <- ticks])
+ | otherwise
+ = empty
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Ticks}
+%* *
+%************************************************************************
+
+\begin{code}
+data Tick
+ = PreInlineUnconditionally Id
+ | PostInlineUnconditionally Id
+
+ | UnfoldingDone Id
+ | RuleFired FastString -- Rule name
+
+ | LetFloatFromLet
+ | EtaExpansion Id -- LHS binder
+ | EtaReduction Id -- Binder on outer lambda
+ | BetaReduction Id -- Lambda binder
+
+
+ | CaseOfCase Id -- Bndr on *inner* case
+ | KnownBranch Id -- Case binder
+ | CaseMerge Id -- Binder on outer case
+ | AltMerge Id -- Case binder
+ | CaseElim Id -- Case binder
+ | CaseIdentity Id -- Case binder
+ | FillInCaseDefault Id -- Case binder
+
+ | BottomFound
+ | SimplifierDone -- Ticked at each iteration of the simplifier
+
+isRuleFired (RuleFired _) = True
+isRuleFired other = False
+
+instance Outputable Tick where
+ ppr tick = text (tickString tick) <+> pprTickCts tick
+
+instance Eq Tick where
+ a == b = case a `cmpTick` b of { EQ -> True; other -> False }
+
+instance Ord Tick where
+ compare = cmpTick
+
+tickToTag :: Tick -> Int
+tickToTag (PreInlineUnconditionally _) = 0
+tickToTag (PostInlineUnconditionally _) = 1
+tickToTag (UnfoldingDone _) = 2
+tickToTag (RuleFired _) = 3
+tickToTag LetFloatFromLet = 4
+tickToTag (EtaExpansion _) = 5
+tickToTag (EtaReduction _) = 6
+tickToTag (BetaReduction _) = 7
+tickToTag (CaseOfCase _) = 8
+tickToTag (KnownBranch _) = 9
+tickToTag (CaseMerge _) = 10
+tickToTag (CaseElim _) = 11
+tickToTag (CaseIdentity _) = 12
+tickToTag (FillInCaseDefault _) = 13
+tickToTag BottomFound = 14
+tickToTag SimplifierDone = 16
+tickToTag (AltMerge _) = 17
+
+tickString :: Tick -> String
+tickString (PreInlineUnconditionally _) = "PreInlineUnconditionally"
+tickString (PostInlineUnconditionally _)= "PostInlineUnconditionally"
+tickString (UnfoldingDone _) = "UnfoldingDone"
+tickString (RuleFired _) = "RuleFired"
+tickString LetFloatFromLet = "LetFloatFromLet"
+tickString (EtaExpansion _) = "EtaExpansion"
+tickString (EtaReduction _) = "EtaReduction"
+tickString (BetaReduction _) = "BetaReduction"
+tickString (CaseOfCase _) = "CaseOfCase"
+tickString (KnownBranch _) = "KnownBranch"
+tickString (CaseMerge _) = "CaseMerge"
+tickString (AltMerge _) = "AltMerge"
+tickString (CaseElim _) = "CaseElim"
+tickString (CaseIdentity _) = "CaseIdentity"
+tickString (FillInCaseDefault _) = "FillInCaseDefault"
+tickString BottomFound = "BottomFound"
+tickString SimplifierDone = "SimplifierDone"
+
+pprTickCts :: Tick -> SDoc
+pprTickCts (PreInlineUnconditionally v) = ppr v
+pprTickCts (PostInlineUnconditionally v)= ppr v
+pprTickCts (UnfoldingDone v) = ppr v
+pprTickCts (RuleFired v) = ppr v
+pprTickCts LetFloatFromLet = empty
+pprTickCts (EtaExpansion v) = ppr v
+pprTickCts (EtaReduction v) = ppr v
+pprTickCts (BetaReduction v) = ppr v
+pprTickCts (CaseOfCase v) = ppr v
+pprTickCts (KnownBranch v) = ppr v
+pprTickCts (CaseMerge v) = ppr v
+pprTickCts (AltMerge v) = ppr v
+pprTickCts (CaseElim v) = ppr v
+pprTickCts (CaseIdentity v) = ppr v
+pprTickCts (FillInCaseDefault v) = ppr v
+pprTickCts other = empty
+
+cmpTick :: Tick -> Tick -> Ordering
+cmpTick a b = case (tickToTag a `compare` tickToTag b) of
+ GT -> GT
+ EQ | isRuleFired a || verboseSimplStats -> cmpEqTick a b
+ | otherwise -> EQ
+ LT -> LT
+ -- Always distinguish RuleFired, so that the stats
+ -- can report them even in non-verbose mode
+
+cmpEqTick :: Tick -> Tick -> Ordering
+cmpEqTick (PreInlineUnconditionally a) (PreInlineUnconditionally b) = a `compare` b
+cmpEqTick (PostInlineUnconditionally a) (PostInlineUnconditionally b) = a `compare` b
+cmpEqTick (UnfoldingDone a) (UnfoldingDone b) = a `compare` b
+cmpEqTick (RuleFired a) (RuleFired b) = a `compare` b
+cmpEqTick (EtaExpansion a) (EtaExpansion b) = a `compare` b
+cmpEqTick (EtaReduction a) (EtaReduction b) = a `compare` b
+cmpEqTick (BetaReduction a) (BetaReduction b) = a `compare` b
+cmpEqTick (CaseOfCase a) (CaseOfCase b) = a `compare` b
+cmpEqTick (KnownBranch a) (KnownBranch b) = a `compare` b
+cmpEqTick (CaseMerge a) (CaseMerge b) = a `compare` b
+cmpEqTick (AltMerge a) (AltMerge b) = a `compare` b
+cmpEqTick (CaseElim a) (CaseElim b) = a `compare` b
+cmpEqTick (CaseIdentity a) (CaseIdentity b) = a `compare` b
+cmpEqTick (FillInCaseDefault a) (FillInCaseDefault b) = a `compare` b
+cmpEqTick other1 other2 = EQ
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsubsection{Command-line switches}
+%* *
+%************************************************************************
+
+\begin{code}
+type SwitchChecker = SimplifierSwitch -> SwitchResult
+
+data SwitchResult
+ = SwBool Bool -- on/off
+ | SwString FastString -- nothing or a String
+ | SwInt Int -- nothing or an Int
+
+isAmongSimpl :: [SimplifierSwitch] -> SimplifierSwitch -> SwitchResult
+isAmongSimpl on_switches -- Switches mentioned later occur *earlier*
+ -- in the list; defaults right at the end.
+ = let
+ tidied_on_switches = foldl rm_dups [] on_switches
+ -- The fold*l* ensures that we keep the latest switches;
+ -- ie the ones that occur earliest in the list.
+
+ sw_tbl :: Array Int SwitchResult
+ sw_tbl = (array (0, lAST_SIMPL_SWITCH_TAG) -- bounds...
+ all_undefined)
+ // defined_elems
+
+ all_undefined = [ (i, SwBool False) | i <- [0 .. lAST_SIMPL_SWITCH_TAG ] ]
+
+ defined_elems = map mk_assoc_elem tidied_on_switches
+ in
+ -- (avoid some unboxing, bounds checking, and other horrible things:)
+ case sw_tbl of { Array _ _ stuff ->
+ \ switch ->
+ case (indexArray# stuff (tagOf_SimplSwitch switch)) of
+ (# v #) -> v
+ }
+ where
+ mk_assoc_elem k@(MaxSimplifierIterations lvl)
+ = (iBox (tagOf_SimplSwitch k), SwInt lvl)
+ mk_assoc_elem k
+ = (iBox (tagOf_SimplSwitch k), SwBool True) -- I'm here, Mom!
+
+ -- cannot have duplicates if we are going to use the array thing
+ rm_dups switches_so_far switch
+ = if switch `is_elem` switches_so_far
+ then switches_so_far
+ else switch : switches_so_far
+ where
+ sw `is_elem` [] = False
+ sw `is_elem` (s:ss) = (tagOf_SimplSwitch sw) ==# (tagOf_SimplSwitch s)
+ || sw `is_elem` ss
+\end{code}
+
+\begin{code}
+getSimplIntSwitch :: SwitchChecker -> (Int-> SimplifierSwitch) -> Int
+getSimplIntSwitch chkr switch
+ = expectJust "getSimplIntSwitch" (intSwitchSet chkr switch)
+
+switchIsOn :: (switch -> SwitchResult) -> switch -> Bool
+
+switchIsOn lookup_fn switch
+ = case (lookup_fn switch) of
+ SwBool False -> False
+ _ -> True
+
+intSwitchSet :: (switch -> SwitchResult)
+ -> (Int -> switch)
+ -> Maybe Int
+
+intSwitchSet lookup_fn switch
+ = case (lookup_fn (switch (panic "intSwitchSet"))) of
+ SwInt int -> Just int
+ _ -> Nothing
+\end{code}
+
+
+These things behave just like enumeration types.
+
+\begin{code}
+instance Eq SimplifierSwitch where
+ a == b = tagOf_SimplSwitch a ==# tagOf_SimplSwitch b
+
+instance Ord SimplifierSwitch where
+ a < b = tagOf_SimplSwitch a <# tagOf_SimplSwitch b
+ a <= b = tagOf_SimplSwitch a <=# tagOf_SimplSwitch b
+
+
+tagOf_SimplSwitch (MaxSimplifierIterations _) = _ILIT(1)
+tagOf_SimplSwitch NoCaseOfCase = _ILIT(2)
+
+-- If you add anything here, be sure to change lAST_SIMPL_SWITCH_TAG, too!
+
+lAST_SIMPL_SWITCH_TAG = 2
+\end{code}
+
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs
new file mode 100644
index 0000000000..9e616b5df1
--- /dev/null
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -0,0 +1,1592 @@
+%
+% (c) The AQUA Project, Glasgow University, 1993-1998
+%
+\section[SimplUtils]{The simplifier utilities}
+
+\begin{code}
+module SimplUtils (
+ mkLam, prepareAlts, mkCase,
+
+ -- Inlining,
+ preInlineUnconditionally, postInlineUnconditionally, activeInline, activeRule,
+ inlineMode,
+
+ -- The continuation type
+ SimplCont(..), DupFlag(..), LetRhsFlag(..),
+ contIsDupable, contResultType,
+ countValArgs, countArgs, pushContArgs,
+ mkBoringStop, mkRhsStop, contIsRhs, contIsRhsOrArg,
+ getContArgs, interestingCallContext, interestingArg, isStrictType
+
+ ) where
+
+#include "HsVersions.h"
+
+import SimplEnv
+import DynFlags ( SimplifierSwitch(..), SimplifierMode(..),
+ DynFlag(..), dopt )
+import StaticFlags ( opt_UF_UpdateInPlace, opt_SimplNoPreInlining,
+ opt_RulesOff )
+import CoreSyn
+import CoreFVs ( exprFreeVars )
+import CoreUtils ( cheapEqExpr, exprType, exprIsTrivial, exprIsCheap,
+ etaExpand, exprEtaExpandArity, bindNonRec, mkCoerce2,
+ findDefault, exprOkForSpeculation, exprIsHNF
+ )
+import Literal ( mkStringLit )
+import CoreUnfold ( smallEnoughToInline )
+import MkId ( eRROR_ID )
+import Id ( idType, isDataConWorkId, idOccInfo, isDictId,
+ mkSysLocal, isDeadBinder, idNewDemandInfo, isExportedId,
+ idUnfolding, idNewStrictness, idInlinePragma,
+ )
+import NewDemand ( isStrictDmd, isBotRes, splitStrictSig )
+import SimplMonad
+import Type ( Type, splitFunTys, dropForAlls, isStrictType,
+ splitTyConApp_maybe, tyConAppArgs, mkTyVarTys
+ )
+import Name ( mkSysTvName )
+import TyCon ( tyConDataCons_maybe, isAlgTyCon, isNewTyCon )
+import DataCon ( dataConRepArity, dataConTyVars, dataConInstArgTys, isVanillaDataCon )
+import Var ( tyVarKind, mkTyVar )
+import VarSet
+import BasicTypes ( TopLevelFlag(..), isNotTopLevel, OccInfo(..), isLoopBreaker, isOneOcc,
+ Activation, isAlwaysActive, isActive )
+import Util ( lengthExceeds )
+import Outputable
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{The continuation data type}
+%* *
+%************************************************************************
+
+\begin{code}
+data SimplCont -- Strict contexts
+ = Stop OutType -- Type of the result
+ LetRhsFlag
+ Bool -- True <=> This is the RHS of a thunk whose type suggests
+ -- that update-in-place would be possible
+ -- (This makes the inliner a little keener.)
+
+ | CoerceIt OutType -- The To-type, simplified
+ SimplCont
+
+ | InlinePlease -- This continuation makes a function very
+ SimplCont -- keen to inline itelf
+
+ | ApplyTo DupFlag
+ InExpr SimplEnv -- The argument, as yet unsimplified,
+ SimplCont -- and its environment
+
+ | Select DupFlag
+ InId [InAlt] SimplEnv -- The case binder, alts, and subst-env
+ SimplCont
+
+ | ArgOf LetRhsFlag -- An arbitrary strict context: the argument
+ -- of a strict function, or a primitive-arg fn
+ -- or a PrimOp
+ -- No DupFlag because we never duplicate it
+ OutType -- arg_ty: type of the argument itself
+ OutType -- cont_ty: the type of the expression being sought by the context
+ -- f (error "foo") ==> coerce t (error "foo")
+ -- when f is strict
+ -- We need to know the type t, to which to coerce.
+
+ (SimplEnv -> OutExpr -> SimplM FloatsWithExpr) -- What to do with the result
+ -- The result expression in the OutExprStuff has type cont_ty
+
+data LetRhsFlag = AnArg -- It's just an argument not a let RHS
+ | AnRhs -- It's the RHS of a let (so please float lets out of big lambdas)
+
+instance Outputable LetRhsFlag where
+ ppr AnArg = ptext SLIT("arg")
+ ppr AnRhs = ptext SLIT("rhs")
+
+instance Outputable SimplCont where
+ ppr (Stop ty is_rhs _) = ptext SLIT("Stop") <> brackets (ppr is_rhs) <+> ppr ty
+ ppr (ApplyTo dup arg se cont) = (ptext SLIT("ApplyTo") <+> ppr dup <+> ppr arg) $$ ppr cont
+ ppr (ArgOf _ _ _ _) = ptext SLIT("ArgOf...")
+ ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$
+ (nest 4 (ppr alts)) $$ ppr cont
+ ppr (CoerceIt ty cont) = (ptext SLIT("CoerceIt") <+> ppr ty) $$ ppr cont
+ ppr (InlinePlease cont) = ptext SLIT("InlinePlease") $$ ppr cont
+
+data DupFlag = OkToDup | NoDup
+
+instance Outputable DupFlag where
+ ppr OkToDup = ptext SLIT("ok")
+ ppr NoDup = ptext SLIT("nodup")
+
+
+-------------------
+mkBoringStop, mkRhsStop :: OutType -> SimplCont
+mkBoringStop ty = Stop ty AnArg (canUpdateInPlace ty)
+mkRhsStop ty = Stop ty AnRhs (canUpdateInPlace ty)
+
+contIsRhs :: SimplCont -> Bool
+contIsRhs (Stop _ AnRhs _) = True
+contIsRhs (ArgOf AnRhs _ _ _) = True
+contIsRhs other = False
+
+contIsRhsOrArg (Stop _ _ _) = True
+contIsRhsOrArg (ArgOf _ _ _ _) = True
+contIsRhsOrArg other = False
+
+-------------------
+contIsDupable :: SimplCont -> Bool
+contIsDupable (Stop _ _ _) = True
+contIsDupable (ApplyTo OkToDup _ _ _) = True
+contIsDupable (Select OkToDup _ _ _ _) = True
+contIsDupable (CoerceIt _ cont) = contIsDupable cont
+contIsDupable (InlinePlease cont) = contIsDupable cont
+contIsDupable other = False
+
+-------------------
+discardableCont :: SimplCont -> Bool
+discardableCont (Stop _ _ _) = False
+discardableCont (CoerceIt _ cont) = discardableCont cont
+discardableCont (InlinePlease cont) = discardableCont cont
+discardableCont other = True
+
+discardCont :: SimplCont -- A continuation, expecting
+ -> SimplCont -- Replace the continuation with a suitable coerce
+discardCont cont = case cont of
+ Stop to_ty is_rhs _ -> cont
+ other -> CoerceIt to_ty (mkBoringStop to_ty)
+ where
+ to_ty = contResultType cont
+
+-------------------
+contResultType :: SimplCont -> OutType
+contResultType (Stop to_ty _ _) = to_ty
+contResultType (ArgOf _ _ to_ty _) = to_ty
+contResultType (ApplyTo _ _ _ cont) = contResultType cont
+contResultType (CoerceIt _ cont) = contResultType cont
+contResultType (InlinePlease cont) = contResultType cont
+contResultType (Select _ _ _ _ cont) = contResultType cont
+
+-------------------
+countValArgs :: SimplCont -> Int
+countValArgs (ApplyTo _ (Type ty) se cont) = countValArgs cont
+countValArgs (ApplyTo _ val_arg se cont) = 1 + countValArgs cont
+countValArgs other = 0
+
+countArgs :: SimplCont -> Int
+countArgs (ApplyTo _ arg se cont) = 1 + countArgs cont
+countArgs other = 0
+
+-------------------
+pushContArgs :: SimplEnv -> [OutArg] -> SimplCont -> SimplCont
+-- Pushes args with the specified environment
+pushContArgs env [] cont = cont
+pushContArgs env (arg : args) cont = ApplyTo NoDup arg env (pushContArgs env args cont)
+\end{code}
+
+
+\begin{code}
+getContArgs :: SwitchChecker
+ -> OutId -> SimplCont
+ -> ([(InExpr, SimplEnv, Bool)], -- Arguments; the Bool is true for strict args
+ SimplCont, -- Remaining continuation
+ Bool) -- Whether we came across an InlineCall
+-- getContArgs id k = (args, k', inl)
+-- args are the leading ApplyTo items in k
+-- (i.e. outermost comes first)
+-- augmented with demand info from the functionn
+getContArgs chkr fun orig_cont
+ = let
+ -- Ignore strictness info if the no-case-of-case
+ -- flag is on. Strictness changes evaluation order
+ -- and that can change full laziness
+ stricts | switchIsOn chkr NoCaseOfCase = vanilla_stricts
+ | otherwise = computed_stricts
+ in
+ go [] stricts False orig_cont
+ where
+ ----------------------------
+
+ -- Type argument
+ go acc ss inl (ApplyTo _ arg@(Type _) se cont)
+ = go ((arg,se,False) : acc) ss inl cont
+ -- NB: don't bother to instantiate the function type
+
+ -- Value argument
+ go acc (s:ss) inl (ApplyTo _ arg se cont)
+ = go ((arg,se,s) : acc) ss inl cont
+
+ -- An Inline continuation
+ go acc ss inl (InlinePlease cont)
+ = go acc ss True cont
+
+ -- We're run out of arguments, or else we've run out of demands
+ -- The latter only happens if the result is guaranteed bottom
+ -- This is the case for
+ -- * case (error "hello") of { ... }
+ -- * (error "Hello") arg
+ -- * f (error "Hello") where f is strict
+ -- etc
+ -- Then, especially in the first of these cases, we'd like to discard
+ -- the continuation, leaving just the bottoming expression. But the
+ -- type might not be right, so we may have to add a coerce.
+ go acc ss inl cont
+ | null ss && discardableCont cont = (reverse acc, discardCont cont, inl)
+ | otherwise = (reverse acc, cont, inl)
+
+ ----------------------------
+ vanilla_stricts, computed_stricts :: [Bool]
+ vanilla_stricts = repeat False
+ computed_stricts = zipWith (||) fun_stricts arg_stricts
+
+ ----------------------------
+ (val_arg_tys, _) = splitFunTys (dropForAlls (idType fun))
+ arg_stricts = map isStrictType val_arg_tys ++ repeat False
+ -- These argument types are used as a cheap and cheerful way to find
+ -- unboxed arguments, which must be strict. But it's an InType
+ -- and so there might be a type variable where we expect a function
+ -- type (the substitution hasn't happened yet). And we don't bother
+ -- doing the type applications for a polymorphic function.
+ -- Hence the splitFunTys*IgnoringForAlls*
+
+ ----------------------------
+ -- If fun_stricts is finite, it means the function returns bottom
+ -- after that number of value args have been consumed
+ -- Otherwise it's infinite, extended with False
+ fun_stricts
+ = case splitStrictSig (idNewStrictness fun) of
+ (demands, result_info)
+ | not (demands `lengthExceeds` countValArgs orig_cont)
+ -> -- Enough args, use the strictness given.
+ -- For bottoming functions we used to pretend that the arg
+ -- is lazy, so that we don't treat the arg as an
+ -- interesting context. This avoids substituting
+ -- top-level bindings for (say) strings into
+ -- calls to error. But now we are more careful about
+ -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
+ if isBotRes result_info then
+ map isStrictDmd demands -- Finite => result is bottom
+ else
+ map isStrictDmd demands ++ vanilla_stricts
+
+ other -> vanilla_stricts -- Not enough args, or no strictness
+
+-------------------
+interestingArg :: OutExpr -> Bool
+ -- An argument is interesting if it has *some* structure
+ -- We are here trying to avoid unfolding a function that
+ -- is applied only to variables that have no unfolding
+ -- (i.e. they are probably lambda bound): f x y z
+ -- There is little point in inlining f here.
+interestingArg (Var v) = hasSomeUnfolding (idUnfolding v)
+ -- Was: isValueUnfolding (idUnfolding v')
+ -- But that seems over-pessimistic
+ || isDataConWorkId v
+ -- This accounts for an argument like
+ -- () or [], which is definitely interesting
+interestingArg (Type _) = False
+interestingArg (App fn (Type _)) = interestingArg fn
+interestingArg (Note _ a) = interestingArg a
+interestingArg other = True
+ -- Consider let x = 3 in f x
+ -- The substitution will contain (x -> ContEx 3), and we want to
+ -- to say that x is an interesting argument.
+ -- But consider also (\x. f x y) y
+ -- The substitution will contain (x -> ContEx y), and we want to say
+ -- that x is not interesting (assuming y has no unfolding)
+\end{code}
+
+Comment about interestingCallContext
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want to avoid inlining an expression where there can't possibly be
+any gain, such as in an argument position. Hence, if the continuation
+is interesting (eg. a case scrutinee, application etc.) then we
+inline, otherwise we don't.
+
+Previously some_benefit used to return True only if the variable was
+applied to some value arguments. This didn't work:
+
+ let x = _coerce_ (T Int) Int (I# 3) in
+ case _coerce_ Int (T Int) x of
+ I# y -> ....
+
+we want to inline x, but can't see that it's a constructor in a case
+scrutinee position, and some_benefit is False.
+
+Another example:
+
+dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
+
+.... case dMonadST _@_ x0 of (a,b,c) -> ....
+
+we'd really like to inline dMonadST here, but we *don't* want to
+inline if the case expression is just
+
+ case x of y { DEFAULT -> ... }
+
+since we can just eliminate this case instead (x is in WHNF). Similar
+applies when x is bound to a lambda expression. Hence
+contIsInteresting looks for case expressions with just a single
+default case.
+
+\begin{code}
+interestingCallContext :: Bool -- False <=> no args at all
+ -> Bool -- False <=> no value args
+ -> SimplCont -> Bool
+ -- The "lone-variable" case is important. I spent ages
+ -- messing about with unsatisfactory varaints, but this is nice.
+ -- The idea is that if a variable appear all alone
+ -- as an arg of lazy fn, or rhs Stop
+ -- as scrutinee of a case Select
+ -- as arg of a strict fn ArgOf
+ -- then we should not inline it (unless there is some other reason,
+ -- e.g. is is the sole occurrence). We achieve this by making
+ -- interestingCallContext return False for a lone variable.
+ --
+ -- Why? At least in the case-scrutinee situation, turning
+ -- let x = (a,b) in case x of y -> ...
+ -- into
+ -- let x = (a,b) in case (a,b) of y -> ...
+ -- and thence to
+ -- let x = (a,b) in let y = (a,b) in ...
+ -- is bad if the binding for x will remain.
+ --
+ -- Another example: I discovered that strings
+ -- were getting inlined straight back into applications of 'error'
+ -- because the latter is strict.
+ -- s = "foo"
+ -- f = \x -> ...(error s)...
+
+ -- Fundamentally such contexts should not ecourage inlining because
+ -- the context can ``see'' the unfolding of the variable (e.g. case or a RULE)
+ -- so there's no gain.
+ --
+ -- However, even a type application or coercion isn't a lone variable.
+ -- Consider
+ -- case $fMonadST @ RealWorld of { :DMonad a b c -> c }
+ -- We had better inline that sucker! The case won't see through it.
+ --
+ -- For now, I'm treating treating a variable applied to types
+ -- in a *lazy* context "lone". The motivating example was
+ -- f = /\a. \x. BIG
+ -- g = /\a. \y. h (f a)
+ -- There's no advantage in inlining f here, and perhaps
+ -- a significant disadvantage. Hence some_val_args in the Stop case
+
+interestingCallContext some_args some_val_args cont
+ = interesting cont
+ where
+ interesting (InlinePlease _) = True
+ interesting (Select _ _ _ _ _) = some_args
+ interesting (ApplyTo _ _ _ _) = True -- Can happen if we have (coerce t (f x)) y
+ -- Perhaps True is a bit over-keen, but I've
+ -- seen (coerce f) x, where f has an INLINE prag,
+ -- So we have to give some motivaiton for inlining it
+ interesting (ArgOf _ _ _ _) = some_val_args
+ interesting (Stop ty _ upd_in_place) = some_val_args && upd_in_place
+ interesting (CoerceIt _ cont) = interesting cont
+ -- If this call is the arg of a strict function, the context
+ -- is a bit interesting. If we inline here, we may get useful
+ -- evaluation information to avoid repeated evals: e.g.
+ -- x + (y * z)
+ -- Here the contIsInteresting makes the '*' keener to inline,
+ -- which in turn exposes a constructor which makes the '+' inline.
+ -- Assuming that +,* aren't small enough to inline regardless.
+ --
+ -- It's also very important to inline in a strict context for things
+ -- like
+ -- foldr k z (f x)
+ -- Here, the context of (f x) is strict, and if f's unfolding is
+ -- a build it's *great* to inline it here. So we must ensure that
+ -- the context for (f x) is not totally uninteresting.
+
+
+-------------------
+canUpdateInPlace :: Type -> Bool
+-- Consider let x = <wurble> in ...
+-- If <wurble> returns an explicit constructor, we might be able
+-- to do update in place. So we treat even a thunk RHS context
+-- as interesting if update in place is possible. We approximate
+-- this by seeing if the type has a single constructor with a
+-- small arity. But arity zero isn't good -- we share the single copy
+-- for that case, so no point in sharing.
+
+canUpdateInPlace ty
+ | not opt_UF_UpdateInPlace = False
+ | otherwise
+ = case splitTyConApp_maybe ty of
+ Nothing -> False
+ Just (tycon, _) -> case tyConDataCons_maybe tycon of
+ Just [dc] -> arity == 1 || arity == 2
+ where
+ arity = dataConRepArity dc
+ other -> False
+\end{code}
+
+
+
+%************************************************************************
+%* *
+\subsection{Decisions about inlining}
+%* *
+%************************************************************************
+
+Inlining is controlled partly by the SimplifierMode switch. This has two
+settings:
+
+ SimplGently (a) Simplifying before specialiser/full laziness
+ (b) Simplifiying inside INLINE pragma
+ (c) Simplifying the LHS of a rule
+ (d) Simplifying a GHCi expression or Template
+ Haskell splice
+
+ SimplPhase n Used at all other times
+
+The key thing about SimplGently is that it does no call-site inlining.
+Before full laziness we must be careful not to inline wrappers,
+because doing so inhibits floating
+ e.g. ...(case f x of ...)...
+ ==> ...(case (case x of I# x# -> fw x#) of ...)...
+ ==> ...(case x of I# x# -> case fw x# of ...)...
+and now the redex (f x) isn't floatable any more.
+
+The no-inling thing is also important for Template Haskell. You might be
+compiling in one-shot mode with -O2; but when TH compiles a splice before
+running it, we don't want to use -O2. Indeed, we don't want to inline
+anything, because the byte-code interpreter might get confused about
+unboxed tuples and suchlike.
+
+INLINE pragmas
+~~~~~~~~~~~~~~
+SimplGently is also used as the mode to simplify inside an InlineMe note.
+
+\begin{code}
+inlineMode :: SimplifierMode
+inlineMode = SimplGently
+\end{code}
+
+It really is important to switch off inlinings inside such
+expressions. Consider the following example
+
+ let f = \pq -> BIG
+ in
+ let g = \y -> f y y
+ {-# INLINE g #-}
+ in ...g...g...g...g...g...
+
+Now, if that's the ONLY occurrence of f, it will be inlined inside g,
+and thence copied multiple times when g is inlined.
+
+
+This function may be inlinined in other modules, so we
+don't want to remove (by inlining) calls to functions that have
+specialisations, or that may have transformation rules in an importing
+scope.
+
+E.g. {-# INLINE f #-}
+ f x = ...g...
+
+and suppose that g is strict *and* has specialisations. If we inline
+g's wrapper, we deny f the chance of getting the specialised version
+of g when f is inlined at some call site (perhaps in some other
+module).
+
+It's also important not to inline a worker back into a wrapper.
+A wrapper looks like
+ wraper = inline_me (\x -> ...worker... )
+Normally, the inline_me prevents the worker getting inlined into
+the wrapper (initially, the worker's only call site!). But,
+if the wrapper is sure to be called, the strictness analyser will
+mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
+continuation. That's why the keep_inline predicate returns True for
+ArgOf continuations. It shouldn't do any harm not to dissolve the
+inline-me note under these circumstances.
+
+Note that the result is that we do very little simplification
+inside an InlineMe.
+
+ all xs = foldr (&&) True xs
+ any p = all . map p {-# INLINE any #-}
+
+Problem: any won't get deforested, and so if it's exported and the
+importer doesn't use the inlining, (eg passes it as an arg) then we
+won't get deforestation at all. We havn't solved this problem yet!
+
+
+preInlineUnconditionally
+~~~~~~~~~~~~~~~~~~~~~~~~
+@preInlineUnconditionally@ examines a bndr to see if it is used just
+once in a completely safe way, so that it is safe to discard the
+binding inline its RHS at the (unique) usage site, REGARDLESS of how
+big the RHS might be. If this is the case we don't simplify the RHS
+first, but just inline it un-simplified.
+
+This is much better than first simplifying a perhaps-huge RHS and then
+inlining and re-simplifying it. Indeed, it can be at least quadratically
+better. Consider
+
+ x1 = e1
+ x2 = e2[x1]
+ x3 = e3[x2]
+ ...etc...
+ xN = eN[xN-1]
+
+We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
+This can happen with cascades of functions too:
+
+ f1 = \x1.e1
+ f2 = \xs.e2[f1]
+ f3 = \xs.e3[f3]
+ ...etc...
+
+THE MAIN INVARIANT is this:
+
+ ---- preInlineUnconditionally invariant -----
+ IF preInlineUnconditionally chooses to inline x = <rhs>
+ THEN doing the inlining should not change the occurrence
+ info for the free vars of <rhs>
+ ----------------------------------------------
+
+For example, it's tempting to look at trivial binding like
+ x = y
+and inline it unconditionally. But suppose x is used many times,
+but this is the unique occurrence of y. Then inlining x would change
+y's occurrence info, which breaks the invariant. It matters: y
+might have a BIG rhs, which will now be dup'd at every occurrenc of x.
+
+
+Evne RHSs labelled InlineMe aren't caught here, because there might be
+no benefit from inlining at the call site.
+
+[Sept 01] Don't unconditionally inline a top-level thing, because that
+can simply make a static thing into something built dynamically. E.g.
+ x = (a,b)
+ main = \s -> h x
+
+[Remember that we treat \s as a one-shot lambda.] No point in
+inlining x unless there is something interesting about the call site.
+
+But watch out: if you aren't careful, some useful foldr/build fusion
+can be lost (most notably in spectral/hartel/parstof) because the
+foldr didn't see the build. Doing the dynamic allocation isn't a big
+deal, in fact, but losing the fusion can be. But the right thing here
+seems to be to do a callSiteInline based on the fact that there is
+something interesting about the call site (it's strict). Hmm. That
+seems a bit fragile.
+
+Conclusion: inline top level things gaily until Phase 0 (the last
+phase), at which point don't.
+
+\begin{code}
+preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
+preInlineUnconditionally env top_lvl bndr rhs
+ | not active = False
+ | opt_SimplNoPreInlining = False
+ | otherwise = case idOccInfo bndr of
+ IAmDead -> True -- Happens in ((\x.1) v)
+ OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
+ other -> False
+ where
+ phase = getMode env
+ active = case phase of
+ SimplGently -> isAlwaysActive prag
+ SimplPhase n -> isActive n prag
+ prag = idInlinePragma bndr
+
+ try_once in_lam int_cxt -- There's one textual occurrence
+ | not in_lam = isNotTopLevel top_lvl || early_phase
+ | otherwise = int_cxt && canInlineInLam rhs
+
+-- Be very careful before inlining inside a lambda, becuase (a) we must not
+-- invalidate occurrence information, and (b) we want to avoid pushing a
+-- single allocation (here) into multiple allocations (inside lambda).
+-- Inlining a *function* with a single *saturated* call would be ok, mind you.
+-- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
+-- where
+-- is_cheap = exprIsCheap rhs
+-- ok = is_cheap && int_cxt
+
+ -- int_cxt The context isn't totally boring
+ -- E.g. let f = \ab.BIG in \y. map f xs
+ -- Don't want to substitute for f, because then we allocate
+ -- its closure every time the \y is called
+ -- But: let f = \ab.BIG in \y. map (f y) xs
+ -- Now we do want to substitute for f, even though it's not
+ -- saturated, because we're going to allocate a closure for
+ -- (f y) every time round the loop anyhow.
+
+ -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
+ -- so substituting rhs inside a lambda doesn't change the occ info.
+ -- Sadly, not quite the same as exprIsHNF.
+ canInlineInLam (Lit l) = True
+ canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e
+ canInlineInLam (Note _ e) = canInlineInLam e
+ canInlineInLam _ = False
+
+ early_phase = case phase of
+ SimplPhase 0 -> False
+ other -> True
+-- If we don't have this early_phase test, consider
+-- x = length [1,2,3]
+-- The full laziness pass carefully floats all the cons cells to
+-- top level, and preInlineUnconditionally floats them all back in.
+-- Result is (a) static allocation replaced by dynamic allocation
+-- (b) many simplifier iterations because this tickles
+-- a related problem; only one inlining per pass
+--
+-- On the other hand, I have seen cases where top-level fusion is
+-- lost if we don't inline top level thing (e.g. string constants)
+-- Hence the test for phase zero (which is the phase for all the final
+-- simplifications). Until phase zero we take no special notice of
+-- top level things, but then we become more leery about inlining
+-- them.
+
+\end{code}
+
+postInlineUnconditionally
+~~~~~~~~~~~~~~~~~~~~~~~~~
+@postInlineUnconditionally@ decides whether to unconditionally inline
+a thing based on the form of its RHS; in particular if it has a
+trivial RHS. If so, we can inline and discard the binding altogether.
+
+NB: a loop breaker has must_keep_binding = True and non-loop-breakers
+only have *forward* references Hence, it's safe to discard the binding
+
+NOTE: This isn't our last opportunity to inline. We're at the binding
+site right now, and we'll get another opportunity when we get to the
+ocurrence(s)
+
+Note that we do this unconditional inlining only for trival RHSs.
+Don't inline even WHNFs inside lambdas; doing so may simply increase
+allocation when the function is called. This isn't the last chance; see
+NOTE above.
+
+NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
+Because we don't even want to inline them into the RHS of constructor
+arguments. See NOTE above
+
+NB: At one time even NOINLINE was ignored here: if the rhs is trivial
+it's best to inline it anyway. We often get a=E; b=a from desugaring,
+with both a and b marked NOINLINE. But that seems incompatible with
+our new view that inlining is like a RULE, so I'm sticking to the 'active'
+story for now.
+
+\begin{code}
+postInlineUnconditionally :: SimplEnv -> TopLevelFlag -> OutId -> OccInfo -> OutExpr -> Unfolding -> Bool
+postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
+ | not active = False
+ | isLoopBreaker occ_info = False
+ | isExportedId bndr = False
+ | exprIsTrivial rhs = True
+ | otherwise
+ = case occ_info of
+ OneOcc in_lam one_br int_cxt
+ -> (one_br || smallEnoughToInline unfolding) -- Small enough to dup
+ -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
+ --
+ -- NB: Do we want to inline arbitrarily big things becuase
+ -- one_br is True? that can lead to inline cascades. But
+ -- preInlineUnconditionlly has dealt with all the common cases
+ -- so perhaps it's worth the risk. Here's an example
+ -- let f = if b then Left (\x.BIG) else Right (\y.BIG)
+ -- in \y. ....f....
+ -- We can't preInlineUnconditionally because that woud invalidate
+ -- the occ info for b. Yet f is used just once, and duplicating
+ -- the case work is fine (exprIsCheap).
+
+ && ((isNotTopLevel top_lvl && not in_lam) ||
+ -- But outside a lambda, we want to be reasonably aggressive
+ -- about inlining into multiple branches of case
+ -- e.g. let x = <non-value>
+ -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
+ -- Inlining can be a big win if C3 is the hot-spot, even if
+ -- the uses in C1, C2 are not 'interesting'
+ -- An example that gets worse if you add int_cxt here is 'clausify'
+
+ (isCheapUnfolding unfolding && int_cxt))
+ -- isCheap => acceptable work duplication; in_lam may be true
+ -- int_cxt to prevent us inlining inside a lambda without some
+ -- good reason. See the notes on int_cxt in preInlineUnconditionally
+
+ other -> False
+ -- The point here is that for *non-values* that occur
+ -- outside a lambda, the call-site inliner won't have
+ -- a chance (becuase it doesn't know that the thing
+ -- only occurs once). The pre-inliner won't have gotten
+ -- it either, if the thing occurs in more than one branch
+ -- So the main target is things like
+ -- let x = f y in
+ -- case v of
+ -- True -> case x of ...
+ -- False -> case x of ...
+ -- I'm not sure how important this is in practice
+ where
+ active = case getMode env of
+ SimplGently -> isAlwaysActive prag
+ SimplPhase n -> isActive n prag
+ prag = idInlinePragma bndr
+
+activeInline :: SimplEnv -> OutId -> OccInfo -> Bool
+activeInline env id occ
+ = case getMode env of
+ SimplGently -> isOneOcc occ && isAlwaysActive prag
+ -- No inlining at all when doing gentle stuff,
+ -- except for local things that occur once
+ -- The reason is that too little clean-up happens if you
+ -- don't inline use-once things. Also a bit of inlining is *good* for
+ -- full laziness; it can expose constant sub-expressions.
+ -- Example in spectral/mandel/Mandel.hs, where the mandelset
+ -- function gets a useful let-float if you inline windowToViewport
+
+ -- NB: we used to have a second exception, for data con wrappers.
+ -- On the grounds that we use gentle mode for rule LHSs, and
+ -- they match better when data con wrappers are inlined.
+ -- But that only really applies to the trivial wrappers (like (:)),
+ -- and they are now constructed as Compulsory unfoldings (in MkId)
+ -- so they'll happen anyway.
+
+ SimplPhase n -> isActive n prag
+ where
+ prag = idInlinePragma id
+
+activeRule :: SimplEnv -> Maybe (Activation -> Bool)
+-- Nothing => No rules at all
+activeRule env
+ | opt_RulesOff = Nothing
+ | otherwise
+ = case getMode env of
+ SimplGently -> Just isAlwaysActive
+ -- Used to be Nothing (no rules in gentle mode)
+ -- Main motivation for changing is that I wanted
+ -- lift String ===> ...
+ -- to work in Template Haskell when simplifying
+ -- splices, so we get simpler code for literal strings
+ SimplPhase n -> Just (isActive n)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Rebuilding a lambda}
+%* *
+%************************************************************************
+
+\begin{code}
+mkLam :: SimplEnv -> [OutBinder] -> OutExpr -> SimplCont -> SimplM FloatsWithExpr
+\end{code}
+
+Try three things
+ a) eta reduction, if that gives a trivial expression
+ b) eta expansion [only if there are some value lambdas]
+ c) floating lets out through big lambdas
+ [only if all tyvar lambdas, and only if this lambda
+ is the RHS of a let]
+
+\begin{code}
+mkLam env bndrs body cont
+ = getDOptsSmpl `thenSmpl` \dflags ->
+ mkLam' dflags env bndrs body cont
+ where
+ mkLam' dflags env bndrs body cont
+ | dopt Opt_DoEtaReduction dflags,
+ Just etad_lam <- tryEtaReduce bndrs body
+ = tick (EtaReduction (head bndrs)) `thenSmpl_`
+ returnSmpl (emptyFloats env, etad_lam)
+
+ | dopt Opt_DoLambdaEtaExpansion dflags,
+ any isRuntimeVar bndrs
+ = tryEtaExpansion body `thenSmpl` \ body' ->
+ returnSmpl (emptyFloats env, mkLams bndrs body')
+
+{- Sept 01: I'm experimenting with getting the
+ full laziness pass to float out past big lambdsa
+ | all isTyVar bndrs, -- Only for big lambdas
+ contIsRhs cont -- Only try the rhs type-lambda floating
+ -- if this is indeed a right-hand side; otherwise
+ -- we end up floating the thing out, only for float-in
+ -- to float it right back in again!
+ = tryRhsTyLam env bndrs body `thenSmpl` \ (floats, body') ->
+ returnSmpl (floats, mkLams bndrs body')
+-}
+
+ | otherwise
+ = returnSmpl (emptyFloats env, mkLams bndrs body)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Eta expansion and reduction}
+%* *
+%************************************************************************
+
+We try for eta reduction here, but *only* if we get all the
+way to an exprIsTrivial expression.
+We don't want to remove extra lambdas unless we are going
+to avoid allocating this thing altogether
+
+\begin{code}
+tryEtaReduce :: [OutBinder] -> OutExpr -> Maybe OutExpr
+tryEtaReduce bndrs body
+ -- We don't use CoreUtils.etaReduce, because we can be more
+ -- efficient here:
+ -- (a) we already have the binders
+ -- (b) we can do the triviality test before computing the free vars
+ = go (reverse bndrs) body
+ where
+ go (b : bs) (App fun arg) | ok_arg b arg = go bs fun -- Loop round
+ go [] fun | ok_fun fun = Just fun -- Success!
+ go _ _ = Nothing -- Failure!
+
+ ok_fun fun = exprIsTrivial fun
+ && not (any (`elemVarSet` (exprFreeVars fun)) bndrs)
+ && (exprIsHNF fun || all ok_lam bndrs)
+ ok_lam v = isTyVar v || isDictId v
+ -- The exprIsHNF is because eta reduction is not
+ -- valid in general: \x. bot /= bot
+ -- So we need to be sure that the "fun" is a value.
+ --
+ -- However, we always want to reduce (/\a -> f a) to f
+ -- This came up in a RULE: foldr (build (/\a -> g a))
+ -- did not match foldr (build (/\b -> ...something complex...))
+ -- The type checker can insert these eta-expanded versions,
+ -- with both type and dictionary lambdas; hence the slightly
+ -- ad-hoc isDictTy
+
+ ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
+\end{code}
+
+
+ Try eta expansion for RHSs
+
+We go for:
+ f = \x1..xn -> N ==> f = \x1..xn y1..ym -> N y1..ym
+ (n >= 0)
+
+where (in both cases)
+
+ * The xi can include type variables
+
+ * The yi are all value variables
+
+ * N is a NORMAL FORM (i.e. no redexes anywhere)
+ wanting a suitable number of extra args.
+
+We may have to sandwich some coerces between the lambdas
+to make the types work. exprEtaExpandArity looks through coerces
+when computing arity; and etaExpand adds the coerces as necessary when
+actually computing the expansion.
+
+\begin{code}
+tryEtaExpansion :: OutExpr -> SimplM OutExpr
+-- There is at least one runtime binder in the binders
+tryEtaExpansion body
+ = getUniquesSmpl `thenSmpl` \ us ->
+ returnSmpl (etaExpand fun_arity us body (exprType body))
+ where
+ fun_arity = exprEtaExpandArity body
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Floating lets out of big lambdas}
+%* *
+%************************************************************************
+
+tryRhsTyLam tries this transformation, when the big lambda appears as
+the RHS of a let(rec) binding:
+
+ /\abc -> let(rec) x = e in b
+ ==>
+ let(rec) x' = /\abc -> let x = x' a b c in e
+ in
+ /\abc -> let x = x' a b c in b
+
+This is good because it can turn things like:
+
+ let f = /\a -> letrec g = ... g ... in g
+into
+ letrec g' = /\a -> ... g' a ...
+ in
+ let f = /\ a -> g' a
+
+which is better. In effect, it means that big lambdas don't impede
+let-floating.
+
+This optimisation is CRUCIAL in eliminating the junk introduced by
+desugaring mutually recursive definitions. Don't eliminate it lightly!
+
+So far as the implementation is concerned:
+
+ Invariant: go F e = /\tvs -> F e
+
+ Equalities:
+ go F (Let x=e in b)
+ = Let x' = /\tvs -> F e
+ in
+ go G b
+ where
+ G = F . Let x = x' tvs
+
+ go F (Letrec xi=ei in b)
+ = Letrec {xi' = /\tvs -> G ei}
+ in
+ go G b
+ where
+ G = F . Let {xi = xi' tvs}
+
+[May 1999] If we do this transformation *regardless* then we can
+end up with some pretty silly stuff. For example,
+
+ let
+ st = /\ s -> let { x1=r1 ; x2=r2 } in ...
+ in ..
+becomes
+ let y1 = /\s -> r1
+ y2 = /\s -> r2
+ st = /\s -> ...[y1 s/x1, y2 s/x2]
+ in ..
+
+Unless the "..." is a WHNF there is really no point in doing this.
+Indeed it can make things worse. Suppose x1 is used strictly,
+and is of the form
+
+ x1* = case f y of { (a,b) -> e }
+
+If we abstract this wrt the tyvar we then can't do the case inline
+as we would normally do.
+
+
+\begin{code}
+{- Trying to do this in full laziness
+
+tryRhsTyLam :: SimplEnv -> [OutTyVar] -> OutExpr -> SimplM FloatsWithExpr
+-- Call ensures that all the binders are type variables
+
+tryRhsTyLam env tyvars body -- Only does something if there's a let
+ | not (all isTyVar tyvars)
+ || not (worth_it body) -- inside a type lambda,
+ = returnSmpl (emptyFloats env, body) -- and a WHNF inside that
+
+ | otherwise
+ = go env (\x -> x) body
+
+ where
+ worth_it e@(Let _ _) = whnf_in_middle e
+ worth_it e = False
+
+ whnf_in_middle (Let (NonRec x rhs) e) | isUnLiftedType (idType x) = False
+ whnf_in_middle (Let _ e) = whnf_in_middle e
+ whnf_in_middle e = exprIsCheap e
+
+ main_tyvar_set = mkVarSet tyvars
+
+ go env fn (Let bind@(NonRec var rhs) body)
+ | exprIsTrivial rhs
+ = go env (fn . Let bind) body
+
+ go env fn (Let (NonRec var rhs) body)
+ = mk_poly tyvars_here var `thenSmpl` \ (var', rhs') ->
+ addAuxiliaryBind env (NonRec var' (mkLams tyvars_here (fn rhs))) $ \ env ->
+ go env (fn . Let (mk_silly_bind var rhs')) body
+
+ where
+
+ tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprSomeFreeVars isTyVar rhs)
+ -- Abstract only over the type variables free in the rhs
+ -- wrt which the new binding is abstracted. But the naive
+ -- approach of abstract wrt the tyvars free in the Id's type
+ -- fails. Consider:
+ -- /\ a b -> let t :: (a,b) = (e1, e2)
+ -- x :: a = fst t
+ -- in ...
+ -- Here, b isn't free in x's type, but we must nevertheless
+ -- abstract wrt b as well, because t's type mentions b.
+ -- Since t is floated too, we'd end up with the bogus:
+ -- poly_t = /\ a b -> (e1, e2)
+ -- poly_x = /\ a -> fst (poly_t a *b*)
+ -- So for now we adopt the even more naive approach of
+ -- abstracting wrt *all* the tyvars. We'll see if that
+ -- gives rise to problems. SLPJ June 98
+
+ go env fn (Let (Rec prs) body)
+ = mapAndUnzipSmpl (mk_poly tyvars_here) vars `thenSmpl` \ (vars', rhss') ->
+ let
+ gn body = fn (foldr Let body (zipWith mk_silly_bind vars rhss'))
+ pairs = vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss]
+ in
+ addAuxiliaryBind env (Rec pairs) $ \ env ->
+ go env gn body
+ where
+ (vars,rhss) = unzip prs
+ tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprsSomeFreeVars isTyVar (map snd prs))
+ -- See notes with tyvars_here above
+
+ go env fn body = returnSmpl (emptyFloats env, fn body)
+
+ mk_poly tyvars_here var
+ = getUniqueSmpl `thenSmpl` \ uniq ->
+ let
+ poly_name = setNameUnique (idName var) uniq -- Keep same name
+ poly_ty = mkForAllTys tyvars_here (idType var) -- But new type of course
+ poly_id = mkLocalId poly_name poly_ty
+
+ -- In the olden days, it was crucial to copy the occInfo of the original var,
+ -- because we were looking at occurrence-analysed but as yet unsimplified code!
+ -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
+ -- at already simplified code, so it doesn't matter
+ --
+ -- It's even right to retain single-occurrence or dead-var info:
+ -- Suppose we started with /\a -> let x = E in B
+ -- where x occurs once in B. Then we transform to:
+ -- let x' = /\a -> E in /\a -> let x* = x' a in B
+ -- where x* has an INLINE prag on it. Now, once x* is inlined,
+ -- the occurrences of x' will be just the occurrences originally
+ -- pinned on x.
+ in
+ returnSmpl (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tyvars_here))
+
+ mk_silly_bind var rhs = NonRec var (Note InlineMe rhs)
+ -- Suppose we start with:
+ --
+ -- x = /\ a -> let g = G in E
+ --
+ -- Then we'll float to get
+ --
+ -- x = let poly_g = /\ a -> G
+ -- in /\ a -> let g = poly_g a in E
+ --
+ -- But now the occurrence analyser will see just one occurrence
+ -- of poly_g, not inside a lambda, so the simplifier will
+ -- PreInlineUnconditionally poly_g back into g! Badk to square 1!
+ -- (I used to think that the "don't inline lone occurrences" stuff
+ -- would stop this happening, but since it's the *only* occurrence,
+ -- PreInlineUnconditionally kicks in first!)
+ --
+ -- Solution: put an INLINE note on g's RHS, so that poly_g seems
+ -- to appear many times. (NB: mkInlineMe eliminates
+ -- such notes on trivial RHSs, so do it manually.)
+-}
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Case alternative filtering
+%* *
+%************************************************************************
+
+prepareAlts does two things:
+
+1. Eliminate alternatives that cannot match, including the
+ DEFAULT alternative.
+
+2. If the DEFAULT alternative can match only one possible constructor,
+ then make that constructor explicit.
+ e.g.
+ case e of x { DEFAULT -> rhs }
+ ===>
+ case e of x { (a,b) -> rhs }
+ where the type is a single constructor type. This gives better code
+ when rhs also scrutinises x or e.
+
+It's a good idea do do this stuff before simplifying the alternatives, to
+avoid simplifying alternatives we know can't happen, and to come up with
+the list of constructors that are handled, to put into the IdInfo of the
+case binder, for use when simplifying the alternatives.
+
+Eliminating the default alternative in (1) isn't so obvious, but it can
+happen:
+
+data Colour = Red | Green | Blue
+
+f x = case x of
+ Red -> ..
+ Green -> ..
+ DEFAULT -> h x
+
+h y = case y of
+ Blue -> ..
+ DEFAULT -> [ case y of ... ]
+
+If we inline h into f, the default case of the inlined h can't happen.
+If we don't notice this, we may end up filtering out *all* the cases
+of the inner case y, which give us nowhere to go!
+
+
+\begin{code}
+prepareAlts :: OutExpr -- Scrutinee
+ -> InId -- Case binder (passed only to use in statistics)
+ -> [InAlt] -- Increasing order
+ -> SimplM ([InAlt], -- Better alternatives, still incresaing order
+ [AltCon]) -- These cases are handled
+
+prepareAlts scrut case_bndr alts
+ = let
+ (alts_wo_default, maybe_deflt) = findDefault alts
+
+ impossible_cons = case scrut of
+ Var v -> otherCons (idUnfolding v)
+ other -> []
+
+ -- Filter out alternatives that can't possibly match
+ better_alts | null impossible_cons = alts_wo_default
+ | otherwise = [alt | alt@(con,_,_) <- alts_wo_default,
+ not (con `elem` impossible_cons)]
+
+ -- "handled_cons" are handled either by the context,
+ -- or by a branch in this case expression
+ -- (Don't add DEFAULT to the handled_cons!!)
+ handled_cons = impossible_cons ++ [con | (con,_,_) <- better_alts]
+ in
+ -- Filter out the default, if it can't happen,
+ -- or replace it with "proper" alternative if there
+ -- is only one constructor left
+ prepareDefault scrut case_bndr handled_cons maybe_deflt `thenSmpl` \ deflt_alt ->
+
+ returnSmpl (mergeAlts better_alts deflt_alt, handled_cons)
+ -- We need the mergeAlts in case the new default_alt
+ -- has turned into a constructor alternative.
+
+prepareDefault scrut case_bndr handled_cons (Just rhs)
+ | Just (tycon, inst_tys) <- splitTyConApp_maybe (exprType scrut),
+ -- Use exprType scrut here, rather than idType case_bndr, because
+ -- case_bndr is an InId, so exprType scrut may have more information
+ -- Test simpl013 is an example
+ isAlgTyCon tycon, -- It's a data type, tuple, or unboxed tuples.
+ not (isNewTyCon tycon), -- We can have a newtype, if we are just doing an eval:
+ -- case x of { DEFAULT -> e }
+ -- and we don't want to fill in a default for them!
+ Just all_cons <- tyConDataCons_maybe tycon,
+ not (null all_cons), -- This is a tricky corner case. If the data type has no constructors,
+ -- which GHC allows, then the case expression will have at most a default
+ -- alternative. We don't want to eliminate that alternative, because the
+ -- invariant is that there's always one alternative. It's more convenient
+ -- to leave
+ -- case x of { DEFAULT -> e }
+ -- as it is, rather than transform it to
+ -- error "case cant match"
+ -- which would be quite legitmate. But it's a really obscure corner, and
+ -- not worth wasting code on.
+ let handled_data_cons = [data_con | DataAlt data_con <- handled_cons],
+ let missing_cons = [con | con <- all_cons,
+ not (con `elem` handled_data_cons)]
+ = case missing_cons of
+ [] -> returnSmpl [] -- Eliminate the default alternative
+ -- if it can't match
+
+ [con] -> -- It matches exactly one constructor, so fill it in
+ tick (FillInCaseDefault case_bndr) `thenSmpl_`
+ mk_args con inst_tys `thenSmpl` \ args ->
+ returnSmpl [(DataAlt con, args, rhs)]
+
+ two_or_more -> returnSmpl [(DEFAULT, [], rhs)]
+
+ | otherwise
+ = returnSmpl [(DEFAULT, [], rhs)]
+
+prepareDefault scrut case_bndr handled_cons Nothing
+ = returnSmpl []
+
+mk_args missing_con inst_tys
+ = mk_tv_bndrs missing_con inst_tys `thenSmpl` \ (tv_bndrs, inst_tys') ->
+ getUniquesSmpl `thenSmpl` \ id_uniqs ->
+ let arg_tys = dataConInstArgTys missing_con inst_tys'
+ arg_ids = zipWith (mkSysLocal FSLIT("a")) id_uniqs arg_tys
+ in
+ returnSmpl (tv_bndrs ++ arg_ids)
+
+mk_tv_bndrs missing_con inst_tys
+ | isVanillaDataCon missing_con
+ = returnSmpl ([], inst_tys)
+ | otherwise
+ = getUniquesSmpl `thenSmpl` \ tv_uniqs ->
+ let new_tvs = zipWith mk tv_uniqs (dataConTyVars missing_con)
+ mk uniq tv = mkTyVar (mkSysTvName uniq FSLIT("t")) (tyVarKind tv)
+ in
+ returnSmpl (new_tvs, mkTyVarTys new_tvs)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Case absorption and identity-case elimination}
+%* *
+%************************************************************************
+
+mkCase puts a case expression back together, trying various transformations first.
+
+\begin{code}
+mkCase :: OutExpr -> OutId -> OutType
+ -> [OutAlt] -- Increasing order
+ -> SimplM OutExpr
+
+mkCase scrut case_bndr ty alts
+ = getDOptsSmpl `thenSmpl` \dflags ->
+ mkAlts dflags scrut case_bndr alts `thenSmpl` \ better_alts ->
+ mkCase1 scrut case_bndr ty better_alts
+\end{code}
+
+
+mkAlts tries these things:
+
+1. If several alternatives are identical, merge them into
+ a single DEFAULT alternative. I've occasionally seen this
+ making a big difference:
+
+ case e of =====> case e of
+ C _ -> f x D v -> ....v....
+ D v -> ....v.... DEFAULT -> f x
+ DEFAULT -> f x
+
+ The point is that we merge common RHSs, at least for the DEFAULT case.
+ [One could do something more elaborate but I've never seen it needed.]
+ To avoid an expensive test, we just merge branches equal to the *first*
+ alternative; this picks up the common cases
+ a) all branches equal
+ b) some branches equal to the DEFAULT (which occurs first)
+
+2. Case merging:
+ case e of b { ==> case e of b {
+ p1 -> rhs1 p1 -> rhs1
+ ... ...
+ pm -> rhsm pm -> rhsm
+ _ -> case b of b' { pn -> let b'=b in rhsn
+ pn -> rhsn ...
+ ... po -> let b'=b in rhso
+ po -> rhso _ -> let b'=b in rhsd
+ _ -> rhsd
+ }
+
+ which merges two cases in one case when -- the default alternative of
+ the outer case scrutises the same variable as the outer case This
+ transformation is called Case Merging. It avoids that the same
+ variable is scrutinised multiple times.
+
+
+The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
+
+ x | p `is` 1 -> e1
+ | p `is` 2 -> e2
+ ...etc...
+
+where @is@ was something like
+
+ p `is` n = p /= (-1) && p == n
+
+This gave rise to a horrible sequence of cases
+
+ case p of
+ (-1) -> $j p
+ 1 -> e1
+ DEFAULT -> $j p
+
+and similarly in cascade for all the join points!
+
+
+
+\begin{code}
+--------------------------------------------------
+-- 1. Merge identical branches
+--------------------------------------------------
+mkAlts dflags scrut case_bndr alts@((con1,bndrs1,rhs1) : con_alts)
+ | all isDeadBinder bndrs1, -- Remember the default
+ length filtered_alts < length con_alts -- alternative comes first
+ = tick (AltMerge case_bndr) `thenSmpl_`
+ returnSmpl better_alts
+ where
+ filtered_alts = filter keep con_alts
+ keep (con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
+ better_alts = (DEFAULT, [], rhs1) : filtered_alts
+
+
+--------------------------------------------------
+-- 2. Merge nested cases
+--------------------------------------------------
+
+mkAlts dflags scrut outer_bndr outer_alts
+ | dopt Opt_CaseMerge dflags,
+ (outer_alts_without_deflt, maybe_outer_deflt) <- findDefault outer_alts,
+ Just (Case (Var scrut_var) inner_bndr _ inner_alts) <- maybe_outer_deflt,
+ scruting_same_var scrut_var
+ = let
+ munged_inner_alts = [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts]
+ munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
+
+ new_alts = mergeAlts outer_alts_without_deflt munged_inner_alts
+ -- The merge keeps the inner DEFAULT at the front, if there is one
+ -- and eliminates any inner_alts that are shadowed by the outer_alts
+ in
+ tick (CaseMerge outer_bndr) `thenSmpl_`
+ returnSmpl new_alts
+ -- Warning: don't call mkAlts recursively!
+ -- Firstly, there's no point, because inner alts have already had
+ -- mkCase applied to them, so they won't have a case in their default
+ -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
+ -- in munge_rhs may put a case into the DEFAULT branch!
+ where
+ -- We are scrutinising the same variable if it's
+ -- the outer case-binder, or if the outer case scrutinises a variable
+ -- (and it's the same). Testing both allows us not to replace the
+ -- outer scrut-var with the outer case-binder (Simplify.simplCaseBinder).
+ scruting_same_var = case scrut of
+ Var outer_scrut -> \ v -> v == outer_bndr || v == outer_scrut
+ other -> \ v -> v == outer_bndr
+
+------------------------------------------------
+-- Catch-all
+------------------------------------------------
+
+mkAlts dflags scrut case_bndr other_alts = returnSmpl other_alts
+
+
+---------------------------------
+mergeAlts :: [OutAlt] -> [OutAlt] -> [OutAlt]
+-- Merge preserving order; alternatives in the first arg
+-- shadow ones in the second
+mergeAlts [] as2 = as2
+mergeAlts as1 [] = as1
+mergeAlts (a1:as1) (a2:as2)
+ = case a1 `cmpAlt` a2 of
+ LT -> a1 : mergeAlts as1 (a2:as2)
+ EQ -> a1 : mergeAlts as1 as2 -- Discard a2
+ GT -> a2 : mergeAlts (a1:as1) as2
+\end{code}
+
+
+
+=================================================================================
+
+mkCase1 tries these things
+
+1. Eliminate the case altogether if possible
+
+2. Case-identity:
+
+ case e of ===> e
+ True -> True;
+ False -> False
+
+ and similar friends.
+
+
+Start with a simple situation:
+
+ case x# of ===> e[x#/y#]
+ y# -> e
+
+(when x#, y# are of primitive type, of course). We can't (in general)
+do this for algebraic cases, because we might turn bottom into
+non-bottom!
+
+Actually, we generalise this idea to look for a case where we're
+scrutinising a variable, and we know that only the default case can
+match. For example:
+\begin{verbatim}
+ case x of
+ 0# -> ...
+ other -> ...(case x of
+ 0# -> ...
+ other -> ...) ...
+\end{code}
+Here the inner case can be eliminated. This really only shows up in
+eliminating error-checking code.
+
+We also make sure that we deal with this very common case:
+
+ case e of
+ x -> ...x...
+
+Here we are using the case as a strict let; if x is used only once
+then we want to inline it. We have to be careful that this doesn't
+make the program terminate when it would have diverged before, so we
+check that
+ - x is used strictly, or
+ - e is already evaluated (it may so if e is a variable)
+
+Lastly, we generalise the transformation to handle this:
+
+ case e of ===> r
+ True -> r
+ False -> r
+
+We only do this for very cheaply compared r's (constructors, literals
+and variables). If pedantic bottoms is on, we only do it when the
+scrutinee is a PrimOp which can't fail.
+
+We do it *here*, looking at un-simplified alternatives, because we
+have to check that r doesn't mention the variables bound by the
+pattern in each alternative, so the binder-info is rather useful.
+
+So the case-elimination algorithm is:
+
+ 1. Eliminate alternatives which can't match
+
+ 2. Check whether all the remaining alternatives
+ (a) do not mention in their rhs any of the variables bound in their pattern
+ and (b) have equal rhss
+
+ 3. Check we can safely ditch the case:
+ * PedanticBottoms is off,
+ or * the scrutinee is an already-evaluated variable
+ or * the scrutinee is a primop which is ok for speculation
+ -- ie we want to preserve divide-by-zero errors, and
+ -- calls to error itself!
+
+ or * [Prim cases] the scrutinee is a primitive variable
+
+ or * [Alg cases] the scrutinee is a variable and
+ either * the rhs is the same variable
+ (eg case x of C a b -> x ===> x)
+ or * there is only one alternative, the default alternative,
+ and the binder is used strictly in its scope.
+ [NB this is helped by the "use default binder where
+ possible" transformation; see below.]
+
+
+If so, then we can replace the case with one of the rhss.
+
+Further notes about case elimination
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider: test :: Integer -> IO ()
+ test = print
+
+Turns out that this compiles to:
+ Print.test
+ = \ eta :: Integer
+ eta1 :: State# RealWorld ->
+ case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT ->
+ case hPutStr stdout
+ (PrelNum.jtos eta ($w[] @ Char))
+ eta1
+ of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s }}
+
+Notice the strange '<' which has no effect at all. This is a funny one.
+It started like this:
+
+f x y = if x < 0 then jtos x
+ else if y==0 then "" else jtos x
+
+At a particular call site we have (f v 1). So we inline to get
+
+ if v < 0 then jtos x
+ else if 1==0 then "" else jtos x
+
+Now simplify the 1==0 conditional:
+
+ if v<0 then jtos v else jtos v
+
+Now common-up the two branches of the case:
+
+ case (v<0) of DEFAULT -> jtos v
+
+Why don't we drop the case? Because it's strict in v. It's technically
+wrong to drop even unnecessary evaluations, and in practice they
+may be a result of 'seq' so we *definitely* don't want to drop those.
+I don't really know how to improve this situation.
+
+
+\begin{code}
+--------------------------------------------------
+-- 0. Check for empty alternatives
+--------------------------------------------------
+
+-- This isn't strictly an error. It's possible that the simplifer might "see"
+-- that an inner case has no accessible alternatives before it "sees" that the
+-- entire branch of an outer case is inaccessible. So we simply
+-- put an error case here insteadd
+mkCase1 scrut case_bndr ty []
+ = pprTrace "mkCase1: null alts" (ppr case_bndr <+> ppr scrut) $
+ return (mkApps (Var eRROR_ID)
+ [Type ty, Lit (mkStringLit "Impossible alternative")])
+
+--------------------------------------------------
+-- 1. Eliminate the case altogether if poss
+--------------------------------------------------
+
+mkCase1 scrut case_bndr ty [(con,bndrs,rhs)]
+ -- See if we can get rid of the case altogether
+ -- See the extensive notes on case-elimination above
+ -- mkCase made sure that if all the alternatives are equal,
+ -- then there is now only one (DEFAULT) rhs
+ | all isDeadBinder bndrs,
+
+ -- Check that the scrutinee can be let-bound instead of case-bound
+ exprOkForSpeculation scrut
+ -- OK not to evaluate it
+ -- This includes things like (==# a# b#)::Bool
+ -- so that we simplify
+ -- case ==# a# b# of { True -> x; False -> x }
+ -- to just
+ -- x
+ -- This particular example shows up in default methods for
+ -- comparision operations (e.g. in (>=) for Int.Int32)
+ || exprIsHNF scrut -- It's already evaluated
+ || var_demanded_later scrut -- It'll be demanded later
+
+-- || not opt_SimplPedanticBottoms) -- Or we don't care!
+-- We used to allow improving termination by discarding cases, unless -fpedantic-bottoms was on,
+-- but that breaks badly for the dataToTag# primop, which relies on a case to evaluate
+-- its argument: case x of { y -> dataToTag# y }
+-- Here we must *not* discard the case, because dataToTag# just fetches the tag from
+-- the info pointer. So we'll be pedantic all the time, and see if that gives any
+-- other problems
+-- Also we don't want to discard 'seq's
+ = tick (CaseElim case_bndr) `thenSmpl_`
+ returnSmpl (bindCaseBndr case_bndr scrut rhs)
+
+ where
+ -- The case binder is going to be evaluated later,
+ -- and the scrutinee is a simple variable
+ var_demanded_later (Var v) = isStrictDmd (idNewDemandInfo case_bndr)
+ var_demanded_later other = False
+
+
+--------------------------------------------------
+-- 2. Identity case
+--------------------------------------------------
+
+mkCase1 scrut case_bndr ty alts -- Identity case
+ | all identity_alt alts
+ = tick (CaseIdentity case_bndr) `thenSmpl_`
+ returnSmpl (re_note scrut)
+ where
+ identity_alt (con, args, rhs) = de_note rhs `cheapEqExpr` identity_rhs con args
+
+ identity_rhs (DataAlt con) args = mkConApp con (arg_tys ++ map varToCoreExpr args)
+ identity_rhs (LitAlt lit) _ = Lit lit
+ identity_rhs DEFAULT _ = Var case_bndr
+
+ arg_tys = map Type (tyConAppArgs (idType case_bndr))
+
+ -- We've seen this:
+ -- case coerce T e of x { _ -> coerce T' x }
+ -- And we definitely want to eliminate this case!
+ -- So we throw away notes from the RHS, and reconstruct
+ -- (at least an approximation) at the other end
+ de_note (Note _ e) = de_note e
+ de_note e = e
+
+ -- re_note wraps a coerce if it might be necessary
+ re_note scrut = case head alts of
+ (_,_,rhs1@(Note _ _)) -> mkCoerce2 (exprType rhs1) (idType case_bndr) scrut
+ other -> scrut
+
+
+--------------------------------------------------
+-- Catch-all
+--------------------------------------------------
+mkCase1 scrut bndr ty alts = returnSmpl (Case scrut bndr ty alts)
+\end{code}
+
+
+When adding auxiliary bindings for the case binder, it's worth checking if
+its dead, because it often is, and occasionally these mkCase transformations
+cascade rather nicely.
+
+\begin{code}
+bindCaseBndr bndr rhs body
+ | isDeadBinder bndr = body
+ | otherwise = bindNonRec bndr rhs body
+\end{code}
diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs
new file mode 100644
index 0000000000..5ea0a91007
--- /dev/null
+++ b/compiler/simplCore/Simplify.lhs
@@ -0,0 +1,1894 @@
+%
+% (c) The AQUA Project, Glasgow University, 1993-1998
+%
+\section[Simplify]{The main module of the simplifier}
+
+\begin{code}
+module Simplify ( simplTopBinds, simplExpr ) where
+
+#include "HsVersions.h"
+
+import DynFlags ( dopt, DynFlag(Opt_D_dump_inlinings),
+ SimplifierSwitch(..)
+ )
+import SimplMonad
+import SimplEnv
+import SimplUtils ( mkCase, mkLam, prepareAlts,
+ SimplCont(..), DupFlag(..), LetRhsFlag(..),
+ mkRhsStop, mkBoringStop, pushContArgs,
+ contResultType, countArgs, contIsDupable, contIsRhsOrArg,
+ getContArgs, interestingCallContext, interestingArg, isStrictType,
+ preInlineUnconditionally, postInlineUnconditionally,
+ inlineMode, activeInline, activeRule
+ )
+import Id ( Id, idType, idInfo, idArity, isDataConWorkId,
+ setIdUnfolding, isDeadBinder,
+ idNewDemandInfo, setIdInfo,
+ setIdOccInfo, zapLamIdInfo, setOneShotLambda
+ )
+import MkId ( eRROR_ID )
+import Literal ( mkStringLit )
+import IdInfo ( OccInfo(..), isLoopBreaker,
+ setArityInfo, zapDemandInfo,
+ setUnfoldingInfo,
+ occInfo
+ )
+import NewDemand ( isStrictDmd )
+import Unify ( coreRefineTys )
+import DataCon ( dataConTyCon, dataConRepStrictness, isVanillaDataCon )
+import TyCon ( tyConArity )
+import CoreSyn
+import PprCore ( pprParendExpr, pprCoreExpr )
+import CoreUnfold ( mkUnfolding, callSiteInline )
+import CoreUtils ( exprIsDupable, exprIsTrivial, needsCaseBinding,
+ exprIsConApp_maybe, mkPiTypes, findAlt,
+ exprType, exprIsHNF,
+ exprOkForSpeculation, exprArity,
+ mkCoerce, mkCoerce2, mkSCC, mkInlineMe, applyTypeToArg
+ )
+import Rules ( lookupRule )
+import BasicTypes ( isMarkedStrict )
+import CostCentre ( currentCCS )
+import Type ( TvSubstEnv, isUnLiftedType, seqType, tyConAppArgs, funArgTy,
+ splitFunTy_maybe, splitFunTy, coreEqType
+ )
+import VarEnv ( elemVarEnv, emptyVarEnv )
+import TysPrim ( realWorldStatePrimTy )
+import PrelInfo ( realWorldPrimId )
+import BasicTypes ( TopLevelFlag(..), isTopLevel,
+ RecFlag(..), isNonRec
+ )
+import StaticFlags ( opt_PprStyle_Debug )
+import OrdList
+import Maybes ( orElse )
+import Outputable
+import Util ( notNull )
+\end{code}
+
+
+The guts of the simplifier is in this module, but the driver loop for
+the simplifier is in SimplCore.lhs.
+
+
+-----------------------------------------
+ *** IMPORTANT NOTE ***
+-----------------------------------------
+The simplifier used to guarantee that the output had no shadowing, but
+it does not do so any more. (Actually, it never did!) The reason is
+documented with simplifyArgs.
+
+
+-----------------------------------------
+ *** IMPORTANT NOTE ***
+-----------------------------------------
+Many parts of the simplifier return a bunch of "floats" as well as an
+expression. This is wrapped as a datatype SimplUtils.FloatsWith.
+
+All "floats" are let-binds, not case-binds, but some non-rec lets may
+be unlifted (with RHS ok-for-speculation).
+
+
+
+-----------------------------------------
+ ORGANISATION OF FUNCTIONS
+-----------------------------------------
+simplTopBinds
+ - simplify all top-level binders
+ - for NonRec, call simplRecOrTopPair
+ - for Rec, call simplRecBind
+
+
+ ------------------------------
+simplExpr (applied lambda) ==> simplNonRecBind
+simplExpr (Let (NonRec ...) ..) ==> simplNonRecBind
+simplExpr (Let (Rec ...) ..) ==> simplify binders; simplRecBind
+
+ ------------------------------
+simplRecBind [binders already simplfied]
+ - use simplRecOrTopPair on each pair in turn
+
+simplRecOrTopPair [binder already simplified]
+ Used for: recursive bindings (top level and nested)
+ top-level non-recursive bindings
+ Returns:
+ - check for PreInlineUnconditionally
+ - simplLazyBind
+
+simplNonRecBind
+ Used for: non-top-level non-recursive bindings
+ beta reductions (which amount to the same thing)
+ Because it can deal with strict arts, it takes a
+ "thing-inside" and returns an expression
+
+ - check for PreInlineUnconditionally
+ - simplify binder, including its IdInfo
+ - if strict binding
+ simplStrictArg
+ mkAtomicArgs
+ completeNonRecX
+ else
+ simplLazyBind
+ addFloats
+
+simplNonRecX: [given a *simplified* RHS, but an *unsimplified* binder]
+ Used for: binding case-binder and constr args in a known-constructor case
+ - check for PreInLineUnconditionally
+ - simplify binder
+ - completeNonRecX
+
+ ------------------------------
+simplLazyBind: [binder already simplified, RHS not]
+ Used for: recursive bindings (top level and nested)
+ top-level non-recursive bindings
+ non-top-level, but *lazy* non-recursive bindings
+ [must not be strict or unboxed]
+ Returns floats + an augmented environment, not an expression
+ - substituteIdInfo and add result to in-scope
+ [so that rules are available in rec rhs]
+ - simplify rhs
+ - mkAtomicArgs
+ - float if exposes constructor or PAP
+ - completeLazyBind
+
+
+completeNonRecX: [binder and rhs both simplified]
+ - if the the thing needs case binding (unlifted and not ok-for-spec)
+ build a Case
+ else
+ completeLazyBind
+ addFloats
+
+completeLazyBind: [given a simplified RHS]
+ [used for both rec and non-rec bindings, top level and not]
+ - try PostInlineUnconditionally
+ - add unfolding [this is the only place we add an unfolding]
+ - add arity
+
+
+
+Right hand sides and arguments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In many ways we want to treat
+ (a) the right hand side of a let(rec), and
+ (b) a function argument
+in the same way. But not always! In particular, we would
+like to leave these arguments exactly as they are, so they
+will match a RULE more easily.
+
+ f (g x, h x)
+ g (+ x)
+
+It's harder to make the rule match if we ANF-ise the constructor,
+or eta-expand the PAP:
+
+ f (let { a = g x; b = h x } in (a,b))
+ g (\y. + x y)
+
+On the other hand if we see the let-defns
+
+ p = (g x, h x)
+ q = + x
+
+then we *do* want to ANF-ise and eta-expand, so that p and q
+can be safely inlined.
+
+Even floating lets out is a bit dubious. For let RHS's we float lets
+out if that exposes a value, so that the value can be inlined more vigorously.
+For example
+
+ r = let x = e in (x,x)
+
+Here, if we float the let out we'll expose a nice constructor. We did experiments
+that showed this to be a generally good thing. But it was a bad thing to float
+lets out unconditionally, because that meant they got allocated more often.
+
+For function arguments, there's less reason to expose a constructor (it won't
+get inlined). Just possibly it might make a rule match, but I'm pretty skeptical.
+So for the moment we don't float lets out of function arguments either.
+
+
+Eta expansion
+~~~~~~~~~~~~~~
+For eta expansion, we want to catch things like
+
+ case e of (a,b) -> \x -> case a of (p,q) -> \y -> r
+
+If the \x was on the RHS of a let, we'd eta expand to bring the two
+lambdas together. And in general that's a good thing to do. Perhaps
+we should eta expand wherever we find a (value) lambda? Then the eta
+expansion at a let RHS can concentrate solely on the PAP case.
+
+
+%************************************************************************
+%* *
+\subsection{Bindings}
+%* *
+%************************************************************************
+
+\begin{code}
+simplTopBinds :: SimplEnv -> [InBind] -> SimplM [OutBind]
+
+simplTopBinds env binds
+ = -- Put all the top-level binders into scope at the start
+ -- so that if a transformation rule has unexpectedly brought
+ -- anything into scope, then we don't get a complaint about that.
+ -- It's rather as if the top-level binders were imported.
+ simplRecBndrs env (bindersOfBinds binds) `thenSmpl` \ (env, bndrs') ->
+ simpl_binds env binds bndrs' `thenSmpl` \ (floats, _) ->
+ freeTick SimplifierDone `thenSmpl_`
+ returnSmpl (floatBinds floats)
+ where
+ -- We need to track the zapped top-level binders, because
+ -- they should have their fragile IdInfo zapped (notably occurrence info)
+ -- That's why we run down binds and bndrs' simultaneously.
+ simpl_binds :: SimplEnv -> [InBind] -> [OutId] -> SimplM (FloatsWith ())
+ simpl_binds env [] bs = ASSERT( null bs ) returnSmpl (emptyFloats env, ())
+ simpl_binds env (bind:binds) bs = simpl_bind env bind bs `thenSmpl` \ (floats,env) ->
+ addFloats env floats $ \env ->
+ simpl_binds env binds (drop_bs bind bs)
+
+ drop_bs (NonRec _ _) (_ : bs) = bs
+ drop_bs (Rec prs) bs = drop (length prs) bs
+
+ simpl_bind env bind bs
+ = getDOptsSmpl `thenSmpl` \ dflags ->
+ if dopt Opt_D_dump_inlinings dflags then
+ pprTrace "SimplBind" (ppr (bindersOf bind)) $ simpl_bind1 env bind bs
+ else
+ simpl_bind1 env bind bs
+
+ simpl_bind1 env (NonRec b r) (b':_) = simplRecOrTopPair env TopLevel b b' r
+ simpl_bind1 env (Rec pairs) bs' = simplRecBind env TopLevel pairs bs'
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{simplNonRec}
+%* *
+%************************************************************************
+
+simplNonRecBind is used for
+ * non-top-level non-recursive lets in expressions
+ * beta reduction
+
+It takes
+ * An unsimplified (binder, rhs) pair
+ * The env for the RHS. It may not be the same as the
+ current env because the bind might occur via (\x.E) arg
+
+It uses the CPS form because the binding might be strict, in which
+case we might discard the continuation:
+ let x* = error "foo" in (...x...)
+
+It needs to turn unlifted bindings into a @case@. They can arise
+from, say: (\x -> e) (4# + 3#)
+
+\begin{code}
+simplNonRecBind :: SimplEnv
+ -> InId -- Binder
+ -> InExpr -> SimplEnv -- Arg, with its subst-env
+ -> OutType -- Type of thing computed by the context
+ -> (SimplEnv -> SimplM FloatsWithExpr) -- The body
+ -> SimplM FloatsWithExpr
+#ifdef DEBUG
+simplNonRecBind env bndr rhs rhs_se cont_ty thing_inside
+ | isTyVar bndr
+ = pprPanic "simplNonRecBind" (ppr bndr <+> ppr rhs)
+#endif
+
+simplNonRecBind env bndr rhs rhs_se cont_ty thing_inside
+ = simplNonRecBind' env bndr rhs rhs_se cont_ty thing_inside
+
+simplNonRecBind' env bndr rhs rhs_se cont_ty thing_inside
+ | preInlineUnconditionally env NotTopLevel bndr rhs
+ = tick (PreInlineUnconditionally bndr) `thenSmpl_`
+ thing_inside (extendIdSubst env bndr (mkContEx rhs_se rhs))
+
+ | isStrictDmd (idNewDemandInfo bndr) || isStrictType bndr_ty -- A strict let
+ = -- Don't use simplBinder because that doesn't keep
+ -- fragile occurrence info in the substitution
+ simplNonRecBndr env bndr `thenSmpl` \ (env, bndr1) ->
+ simplStrictArg AnRhs env rhs rhs_se (idType bndr1) cont_ty $ \ env1 rhs1 ->
+
+ -- Now complete the binding and simplify the body
+ let
+ (env2,bndr2) = addLetIdInfo env1 bndr bndr1
+ in
+ if needsCaseBinding bndr_ty rhs1
+ then
+ thing_inside env2 `thenSmpl` \ (floats, body) ->
+ returnSmpl (emptyFloats env2, Case rhs1 bndr2 (exprType body)
+ [(DEFAULT, [], wrapFloats floats body)])
+ else
+ completeNonRecX env2 True {- strict -} bndr bndr2 rhs1 thing_inside
+
+ | otherwise -- Normal, lazy case
+ = -- Don't use simplBinder because that doesn't keep
+ -- fragile occurrence info in the substitution
+ simplNonRecBndr env bndr `thenSmpl` \ (env, bndr') ->
+ simplLazyBind env NotTopLevel NonRecursive
+ bndr bndr' rhs rhs_se `thenSmpl` \ (floats, env) ->
+ addFloats env floats thing_inside
+
+ where
+ bndr_ty = idType bndr
+\end{code}
+
+A specialised variant of simplNonRec used when the RHS is already simplified, notably
+in knownCon. It uses case-binding where necessary.
+
+\begin{code}
+simplNonRecX :: SimplEnv
+ -> InId -- Old binder
+ -> OutExpr -- Simplified RHS
+ -> (SimplEnv -> SimplM FloatsWithExpr)
+ -> SimplM FloatsWithExpr
+
+simplNonRecX env bndr new_rhs thing_inside
+ | needsCaseBinding (idType bndr) new_rhs
+ -- Make this test *before* the preInlineUnconditionally
+ -- Consider case I# (quotInt# x y) of
+ -- I# v -> let w = J# v in ...
+ -- If we gaily inline (quotInt# x y) for v, we end up building an
+ -- extra thunk:
+ -- let w = J# (quotInt# x y) in ...
+ -- because quotInt# can fail.
+ = simplBinder env bndr `thenSmpl` \ (env, bndr') ->
+ thing_inside env `thenSmpl` \ (floats, body) ->
+ let body' = wrapFloats floats body in
+ returnSmpl (emptyFloats env, Case new_rhs bndr' (exprType body') [(DEFAULT, [], body')])
+
+ | preInlineUnconditionally env NotTopLevel bndr new_rhs
+ -- This happens; for example, the case_bndr during case of
+ -- known constructor: case (a,b) of x { (p,q) -> ... }
+ -- Here x isn't mentioned in the RHS, so we don't want to
+ -- create the (dead) let-binding let x = (a,b) in ...
+ --
+ -- Similarly, single occurrences can be inlined vigourously
+ -- e.g. case (f x, g y) of (a,b) -> ....
+ -- If a,b occur once we can avoid constructing the let binding for them.
+ = thing_inside (extendIdSubst env bndr (DoneEx new_rhs))
+
+ | otherwise
+ = simplBinder env bndr `thenSmpl` \ (env, bndr') ->
+ completeNonRecX env False {- Non-strict; pessimistic -}
+ bndr bndr' new_rhs thing_inside
+
+completeNonRecX env is_strict old_bndr new_bndr new_rhs thing_inside
+ = mkAtomicArgs is_strict
+ True {- OK to float unlifted -}
+ new_rhs `thenSmpl` \ (aux_binds, rhs2) ->
+
+ -- Make the arguments atomic if necessary,
+ -- adding suitable bindings
+ addAtomicBindsE env (fromOL aux_binds) $ \ env ->
+ completeLazyBind env NotTopLevel
+ old_bndr new_bndr rhs2 `thenSmpl` \ (floats, env) ->
+ addFloats env floats thing_inside
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Lazy bindings}
+%* *
+%************************************************************************
+
+simplRecBind is used for
+ * recursive bindings only
+
+\begin{code}
+simplRecBind :: SimplEnv -> TopLevelFlag
+ -> [(InId, InExpr)] -> [OutId]
+ -> SimplM (FloatsWith SimplEnv)
+simplRecBind env top_lvl pairs bndrs'
+ = go env pairs bndrs' `thenSmpl` \ (floats, env) ->
+ returnSmpl (flattenFloats floats, env)
+ where
+ go env [] _ = returnSmpl (emptyFloats env, env)
+
+ go env ((bndr, rhs) : pairs) (bndr' : bndrs')
+ = simplRecOrTopPair env top_lvl bndr bndr' rhs `thenSmpl` \ (floats, env) ->
+ addFloats env floats (\env -> go env pairs bndrs')
+\end{code}
+
+
+simplRecOrTopPair is used for
+ * recursive bindings (whether top level or not)
+ * top-level non-recursive bindings
+
+It assumes the binder has already been simplified, but not its IdInfo.
+
+\begin{code}
+simplRecOrTopPair :: SimplEnv
+ -> TopLevelFlag
+ -> InId -> OutId -- Binder, both pre-and post simpl
+ -> InExpr -- The RHS and its environment
+ -> SimplM (FloatsWith SimplEnv)
+
+simplRecOrTopPair env top_lvl bndr bndr' rhs
+ | preInlineUnconditionally env top_lvl bndr rhs -- Check for unconditional inline
+ = tick (PreInlineUnconditionally bndr) `thenSmpl_`
+ returnSmpl (emptyFloats env, extendIdSubst env bndr (mkContEx env rhs))
+
+ | otherwise
+ = simplLazyBind env top_lvl Recursive bndr bndr' rhs env
+ -- May not actually be recursive, but it doesn't matter
+\end{code}
+
+
+simplLazyBind is used for
+ * recursive bindings (whether top level or not)
+ * top-level non-recursive bindings
+ * non-top-level *lazy* non-recursive bindings
+
+[Thus it deals with the lazy cases from simplNonRecBind, and all cases
+from SimplRecOrTopBind]
+
+Nota bene:
+ 1. It assumes that the binder is *already* simplified,
+ and is in scope, but not its IdInfo
+
+ 2. It assumes that the binder type is lifted.
+
+ 3. It does not check for pre-inline-unconditionallly;
+ that should have been done already.
+
+\begin{code}
+simplLazyBind :: SimplEnv
+ -> TopLevelFlag -> RecFlag
+ -> InId -> OutId -- Binder, both pre-and post simpl
+ -> InExpr -> SimplEnv -- The RHS and its environment
+ -> SimplM (FloatsWith SimplEnv)
+
+simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
+ = let
+ (env1,bndr2) = addLetIdInfo env bndr bndr1
+ rhs_env = setInScope rhs_se env1
+ is_top_level = isTopLevel top_lvl
+ ok_float_unlifted = not is_top_level && isNonRec is_rec
+ rhs_cont = mkRhsStop (idType bndr2)
+ in
+ -- Simplify the RHS; note the mkRhsStop, which tells
+ -- the simplifier that this is the RHS of a let.
+ simplExprF rhs_env rhs rhs_cont `thenSmpl` \ (floats, rhs1) ->
+
+ -- If any of the floats can't be floated, give up now
+ -- (The allLifted predicate says True for empty floats.)
+ if (not ok_float_unlifted && not (allLifted floats)) then
+ completeLazyBind env1 top_lvl bndr bndr2
+ (wrapFloats floats rhs1)
+ else
+
+ -- ANF-ise a constructor or PAP rhs
+ mkAtomicArgs False {- Not strict -}
+ ok_float_unlifted rhs1 `thenSmpl` \ (aux_binds, rhs2) ->
+
+ -- If the result is a PAP, float the floats out, else wrap them
+ -- By this time it's already been ANF-ised (if necessary)
+ if isEmptyFloats floats && isNilOL aux_binds then -- Shortcut a common case
+ completeLazyBind env1 top_lvl bndr bndr2 rhs2
+
+ else if is_top_level || exprIsTrivial rhs2 || exprIsHNF rhs2 then
+ -- WARNING: long dodgy argument coming up
+ -- WANTED: a better way to do this
+ --
+ -- We can't use "exprIsCheap" instead of exprIsHNF,
+ -- because that causes a strictness bug.
+ -- x = let y* = E in case (scc y) of { T -> F; F -> T}
+ -- The case expression is 'cheap', but it's wrong to transform to
+ -- y* = E; x = case (scc y) of {...}
+ -- Either we must be careful not to float demanded non-values, or
+ -- we must use exprIsHNF for the test, which ensures that the
+ -- thing is non-strict. So exprIsHNF => bindings are non-strict
+ -- I think. The WARN below tests for this.
+ --
+ -- We use exprIsTrivial here because we want to reveal lone variables.
+ -- E.g. let { x = letrec { y = E } in y } in ...
+ -- Here we definitely want to float the y=E defn.
+ -- exprIsHNF definitely isn't right for that.
+ --
+ -- Again, the floated binding can't be strict; if it's recursive it'll
+ -- be non-strict; if it's non-recursive it'd be inlined.
+ --
+ -- Note [SCC-and-exprIsTrivial]
+ -- If we have
+ -- y = let { x* = E } in scc "foo" x
+ -- then we do *not* want to float out the x binding, because
+ -- it's strict! Fortunately, exprIsTrivial replies False to
+ -- (scc "foo" x).
+
+ -- There's a subtlety here. There may be a binding (x* = e) in the
+ -- floats, where the '*' means 'will be demanded'. So is it safe
+ -- to float it out? Answer no, but it won't matter because
+ -- we only float if (a) arg' is a WHNF, or (b) it's going to top level
+ -- and so there can't be any 'will be demanded' bindings in the floats.
+ -- Hence the warning
+ ASSERT2( is_top_level || not (any demanded_float (floatBinds floats)),
+ ppr (filter demanded_float (floatBinds floats)) )
+
+ tick LetFloatFromLet `thenSmpl_` (
+ addFloats env1 floats $ \ env2 ->
+ addAtomicBinds env2 (fromOL aux_binds) $ \ env3 ->
+ completeLazyBind env3 top_lvl bndr bndr2 rhs2)
+
+ else
+ completeLazyBind env1 top_lvl bndr bndr2 (wrapFloats floats rhs1)
+
+#ifdef DEBUG
+demanded_float (NonRec b r) = isStrictDmd (idNewDemandInfo b) && not (isUnLiftedType (idType b))
+ -- Unlifted-type (cheap-eagerness) lets may well have a demanded flag on them
+demanded_float (Rec _) = False
+#endif
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Completing a lazy binding}
+%* *
+%************************************************************************
+
+completeLazyBind
+ * deals only with Ids, not TyVars
+ * takes an already-simplified binder and RHS
+ * is used for both recursive and non-recursive bindings
+ * is used for both top-level and non-top-level bindings
+
+It does the following:
+ - tries discarding a dead binding
+ - tries PostInlineUnconditionally
+ - add unfolding [this is the only place we add an unfolding]
+ - add arity
+
+It does *not* attempt to do let-to-case. Why? Because it is used for
+ - top-level bindings (when let-to-case is impossible)
+ - many situations where the "rhs" is known to be a WHNF
+ (so let-to-case is inappropriate).
+
+\begin{code}
+completeLazyBind :: SimplEnv
+ -> TopLevelFlag -- Flag stuck into unfolding
+ -> InId -- Old binder
+ -> OutId -- New binder
+ -> OutExpr -- Simplified RHS
+ -> SimplM (FloatsWith SimplEnv)
+-- We return a new SimplEnv, because completeLazyBind may choose to do its work
+-- by extending the substitution (e.g. let x = y in ...)
+-- The new binding (if any) is returned as part of the floats.
+-- NB: the returned SimplEnv has the right SubstEnv, but you should
+-- (as usual) use the in-scope-env from the floats
+
+completeLazyBind env top_lvl old_bndr new_bndr new_rhs
+ | postInlineUnconditionally env top_lvl new_bndr occ_info new_rhs unfolding
+ = -- Drop the binding
+ tick (PostInlineUnconditionally old_bndr) `thenSmpl_`
+ returnSmpl (emptyFloats env, extendIdSubst env old_bndr (DoneEx new_rhs))
+ -- Use the substitution to make quite, quite sure that the substitution
+ -- will happen, since we are going to discard the binding
+
+ | otherwise
+ = let
+ -- Add arity info
+ new_bndr_info = idInfo new_bndr `setArityInfo` exprArity new_rhs
+
+ -- Add the unfolding *only* for non-loop-breakers
+ -- Making loop breakers not have an unfolding at all
+ -- means that we can avoid tests in exprIsConApp, for example.
+ -- This is important: if exprIsConApp says 'yes' for a recursive
+ -- thing, then we can get into an infinite loop
+
+ -- If the unfolding is a value, the demand info may
+ -- go pear-shaped, so we nuke it. Example:
+ -- let x = (a,b) in
+ -- case x of (p,q) -> h p q x
+ -- Here x is certainly demanded. But after we've nuked
+ -- the case, we'll get just
+ -- let x = (a,b) in h a b x
+ -- and now x is not demanded (I'm assuming h is lazy)
+ -- This really happens. Similarly
+ -- let f = \x -> e in ...f..f...
+ -- After inling f at some of its call sites the original binding may
+ -- (for example) be no longer strictly demanded.
+ -- The solution here is a bit ad hoc...
+ info_w_unf = new_bndr_info `setUnfoldingInfo` unfolding
+ final_info | loop_breaker = new_bndr_info
+ | isEvaldUnfolding unfolding = zapDemandInfo info_w_unf `orElse` info_w_unf
+ | otherwise = info_w_unf
+
+ final_id = new_bndr `setIdInfo` final_info
+ in
+ -- These seqs forces the Id, and hence its IdInfo,
+ -- and hence any inner substitutions
+ final_id `seq`
+ returnSmpl (unitFloat env final_id new_rhs, env)
+
+ where
+ unfolding = mkUnfolding (isTopLevel top_lvl) new_rhs
+ loop_breaker = isLoopBreaker occ_info
+ old_info = idInfo old_bndr
+ occ_info = occInfo old_info
+\end{code}
+
+
+
+%************************************************************************
+%* *
+\subsection[Simplify-simplExpr]{The main function: simplExpr}
+%* *
+%************************************************************************
+
+The reason for this OutExprStuff stuff is that we want to float *after*
+simplifying a RHS, not before. If we do so naively we get quadratic
+behaviour as things float out.
+
+To see why it's important to do it after, consider this (real) example:
+
+ let t = f x
+ in fst t
+==>
+ let t = let a = e1
+ b = e2
+ in (a,b)
+ in fst t
+==>
+ let a = e1
+ b = e2
+ t = (a,b)
+ in
+ a -- Can't inline a this round, cos it appears twice
+==>
+ e1
+
+Each of the ==> steps is a round of simplification. We'd save a
+whole round if we float first. This can cascade. Consider
+
+ let f = g d
+ in \x -> ...f...
+==>
+ let f = let d1 = ..d.. in \y -> e
+ in \x -> ...f...
+==>
+ let d1 = ..d..
+ in \x -> ...(\y ->e)...
+
+Only in this second round can the \y be applied, and it
+might do the same again.
+
+
+\begin{code}
+simplExpr :: SimplEnv -> CoreExpr -> SimplM CoreExpr
+simplExpr env expr = simplExprC env expr (mkBoringStop expr_ty')
+ where
+ expr_ty' = substTy env (exprType expr)
+ -- The type in the Stop continuation, expr_ty', is usually not used
+ -- It's only needed when discarding continuations after finding
+ -- a function that returns bottom.
+ -- Hence the lazy substitution
+
+
+simplExprC :: SimplEnv -> CoreExpr -> SimplCont -> SimplM CoreExpr
+ -- Simplify an expression, given a continuation
+simplExprC env expr cont
+ = simplExprF env expr cont `thenSmpl` \ (floats, expr) ->
+ returnSmpl (wrapFloats floats expr)
+
+simplExprF :: SimplEnv -> InExpr -> SimplCont -> SimplM FloatsWithExpr
+ -- Simplify an expression, returning floated binds
+
+simplExprF env (Var v) cont = simplVar env v cont
+simplExprF env (Lit lit) cont = rebuild env (Lit lit) cont
+simplExprF env expr@(Lam _ _) cont = simplLam env expr cont
+simplExprF env (Note note expr) cont = simplNote env note expr cont
+simplExprF env (App fun arg) cont = simplExprF env fun (ApplyTo NoDup arg env cont)
+
+simplExprF env (Type ty) cont
+ = ASSERT( contIsRhsOrArg cont )
+ simplType env ty `thenSmpl` \ ty' ->
+ rebuild env (Type ty') cont
+
+simplExprF env (Case scrut bndr case_ty alts) cont
+ | not (switchIsOn (getSwitchChecker env) NoCaseOfCase)
+ = -- Simplify the scrutinee with a Select continuation
+ simplExprF env scrut (Select NoDup bndr alts env cont)
+
+ | otherwise
+ = -- If case-of-case is off, simply simplify the case expression
+ -- in a vanilla Stop context, and rebuild the result around it
+ simplExprC env scrut case_cont `thenSmpl` \ case_expr' ->
+ rebuild env case_expr' cont
+ where
+ case_cont = Select NoDup bndr alts env (mkBoringStop case_ty')
+ case_ty' = substTy env case_ty -- c.f. defn of simplExpr
+
+simplExprF env (Let (Rec pairs) body) cont
+ = simplRecBndrs env (map fst pairs) `thenSmpl` \ (env, bndrs') ->
+ -- NB: bndrs' don't have unfoldings or rules
+ -- We add them as we go down
+
+ simplRecBind env NotTopLevel pairs bndrs' `thenSmpl` \ (floats, env) ->
+ addFloats env floats $ \ env ->
+ simplExprF env body cont
+
+-- A non-recursive let is dealt with by simplNonRecBind
+simplExprF env (Let (NonRec bndr rhs) body) cont
+ = simplNonRecBind env bndr rhs env (contResultType cont) $ \ env ->
+ simplExprF env body cont
+
+
+---------------------------------
+simplType :: SimplEnv -> InType -> SimplM OutType
+ -- Kept monadic just so we can do the seqType
+simplType env ty
+ = seqType new_ty `seq` returnSmpl new_ty
+ where
+ new_ty = substTy env ty
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Lambdas}
+%* *
+%************************************************************************
+
+\begin{code}
+simplLam env fun cont
+ = go env fun cont
+ where
+ zap_it = mkLamBndrZapper fun (countArgs cont)
+ cont_ty = contResultType cont
+
+ -- Type-beta reduction
+ go env (Lam bndr body) (ApplyTo _ (Type ty_arg) arg_se body_cont)
+ = ASSERT( isTyVar bndr )
+ tick (BetaReduction bndr) `thenSmpl_`
+ simplType (setInScope arg_se env) ty_arg `thenSmpl` \ ty_arg' ->
+ go (extendTvSubst env bndr ty_arg') body body_cont
+
+ -- Ordinary beta reduction
+ go env (Lam bndr body) cont@(ApplyTo _ arg arg_se body_cont)
+ = tick (BetaReduction bndr) `thenSmpl_`
+ simplNonRecBind env (zap_it bndr) arg arg_se cont_ty $ \ env ->
+ go env body body_cont
+
+ -- Not enough args, so there are real lambdas left to put in the result
+ go env lam@(Lam _ _) cont
+ = simplLamBndrs env bndrs `thenSmpl` \ (env, bndrs') ->
+ simplExpr env body `thenSmpl` \ body' ->
+ mkLam env bndrs' body' cont `thenSmpl` \ (floats, new_lam) ->
+ addFloats env floats $ \ env ->
+ rebuild env new_lam cont
+ where
+ (bndrs,body) = collectBinders lam
+
+ -- Exactly enough args
+ go env expr cont = simplExprF env expr cont
+
+mkLamBndrZapper :: CoreExpr -- Function
+ -> Int -- Number of args supplied, *including* type args
+ -> Id -> Id -- Use this to zap the binders
+mkLamBndrZapper fun n_args
+ | n_args >= n_params fun = \b -> b -- Enough args
+ | otherwise = \b -> zapLamIdInfo b
+ where
+ -- NB: we count all the args incl type args
+ -- so we must count all the binders (incl type lambdas)
+ n_params (Note _ e) = n_params e
+ n_params (Lam b e) = 1 + n_params e
+ n_params other = 0::Int
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Notes}
+%* *
+%************************************************************************
+
+\begin{code}
+simplNote env (Coerce to from) body cont
+ = let
+ addCoerce s1 k1 cont -- Drop redundant coerces. This can happen if a polymoprhic
+ -- (coerce a b e) is instantiated with a=ty1 b=ty2 and the
+ -- two are the same. This happens a lot in Happy-generated parsers
+ | s1 `coreEqType` k1 = cont
+
+ addCoerce s1 k1 (CoerceIt t1 cont)
+ -- coerce T1 S1 (coerce S1 K1 e)
+ -- ==>
+ -- e, if T1=K1
+ -- coerce T1 K1 e, otherwise
+ --
+ -- For example, in the initial form of a worker
+ -- we may find (coerce T (coerce S (\x.e))) y
+ -- and we'd like it to simplify to e[y/x] in one round
+ -- of simplification
+ | t1 `coreEqType` k1 = cont -- The coerces cancel out
+ | otherwise = CoerceIt t1 cont -- They don't cancel, but
+ -- the inner one is redundant
+
+ addCoerce t1t2 s1s2 (ApplyTo dup arg arg_se cont)
+ | not (isTypeArg arg), -- This whole case only works for value args
+ -- Could upgrade to have equiv thing for type apps too
+ Just (s1, s2) <- splitFunTy_maybe s1s2
+ -- (coerce (T1->T2) (S1->S2) F) E
+ -- ===>
+ -- coerce T2 S2 (F (coerce S1 T1 E))
+ --
+ -- t1t2 must be a function type, T1->T2, because it's applied to something
+ -- but s1s2 might conceivably not be
+ --
+ -- When we build the ApplyTo we can't mix the out-types
+ -- with the InExpr in the argument, so we simply substitute
+ -- to make it all consistent. It's a bit messy.
+ -- But it isn't a common case.
+ = let
+ (t1,t2) = splitFunTy t1t2
+ new_arg = mkCoerce2 s1 t1 (substExpr arg_env arg)
+ arg_env = setInScope arg_se env
+ in
+ ApplyTo dup new_arg (zapSubstEnv env) (addCoerce t2 s2 cont)
+
+ addCoerce to' _ cont = CoerceIt to' cont
+ in
+ simplType env to `thenSmpl` \ to' ->
+ simplType env from `thenSmpl` \ from' ->
+ simplExprF env body (addCoerce to' from' cont)
+
+
+-- Hack: we only distinguish subsumed cost centre stacks for the purposes of
+-- inlining. All other CCCSs are mapped to currentCCS.
+simplNote env (SCC cc) e cont
+ = simplExpr (setEnclosingCC env currentCCS) e `thenSmpl` \ e' ->
+ rebuild env (mkSCC cc e') cont
+
+simplNote env InlineCall e cont
+ = simplExprF env e (InlinePlease cont)
+
+-- See notes with SimplMonad.inlineMode
+simplNote env InlineMe e cont
+ | contIsRhsOrArg cont -- Totally boring continuation; see notes above
+ = -- Don't inline inside an INLINE expression
+ simplExpr (setMode inlineMode env ) e `thenSmpl` \ e' ->
+ rebuild env (mkInlineMe e') cont
+
+ | otherwise -- Dissolve the InlineMe note if there's
+ -- an interesting context of any kind to combine with
+ -- (even a type application -- anything except Stop)
+ = simplExprF env e cont
+
+simplNote env (CoreNote s) e cont
+ = simplExpr env e `thenSmpl` \ e' ->
+ rebuild env (Note (CoreNote s) e') cont
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Dealing with calls}
+%* *
+%************************************************************************
+
+\begin{code}
+simplVar env var cont
+ = case substId env var of
+ DoneEx e -> simplExprF (zapSubstEnv env) e cont
+ ContEx tvs ids e -> simplExprF (setSubstEnv env tvs ids) e cont
+ DoneId var1 occ -> completeCall (zapSubstEnv env) var1 occ cont
+ -- Note [zapSubstEnv]
+ -- The template is already simplified, so don't re-substitute.
+ -- This is VITAL. Consider
+ -- let x = e in
+ -- let y = \z -> ...x... in
+ -- \ x -> ...y...
+ -- We'll clone the inner \x, adding x->x' in the id_subst
+ -- Then when we inline y, we must *not* replace x by x' in
+ -- the inlined copy!!
+
+---------------------------------------------------------
+-- Dealing with a call site
+
+completeCall env var occ_info cont
+ = -- Simplify the arguments
+ getDOptsSmpl `thenSmpl` \ dflags ->
+ let
+ chkr = getSwitchChecker env
+ (args, call_cont, inline_call) = getContArgs chkr var cont
+ fn_ty = idType var
+ in
+ simplifyArgs env fn_ty args (contResultType call_cont) $ \ env args ->
+
+ -- Next, look for rules or specialisations that match
+ --
+ -- It's important to simplify the args first, because the rule-matcher
+ -- doesn't do substitution as it goes. We don't want to use subst_args
+ -- (defined in the 'where') because that throws away useful occurrence info,
+ -- and perhaps-very-important specialisations.
+ --
+ -- Some functions have specialisations *and* are strict; in this case,
+ -- we don't want to inline the wrapper of the non-specialised thing; better
+ -- to call the specialised thing instead.
+ -- We used to use the black-listing mechanism to ensure that inlining of
+ -- the wrapper didn't occur for things that have specialisations till a
+ -- later phase, so but now we just try RULES first
+ --
+ -- You might think that we shouldn't apply rules for a loop breaker:
+ -- doing so might give rise to an infinite loop, because a RULE is
+ -- rather like an extra equation for the function:
+ -- RULE: f (g x) y = x+y
+ -- Eqn: f a y = a-y
+ --
+ -- But it's too drastic to disable rules for loop breakers.
+ -- Even the foldr/build rule would be disabled, because foldr
+ -- is recursive, and hence a loop breaker:
+ -- foldr k z (build g) = g k z
+ -- So it's up to the programmer: rules can cause divergence
+
+ let
+ in_scope = getInScope env
+ rules = getRules env
+ maybe_rule = case activeRule env of
+ Nothing -> Nothing -- No rules apply
+ Just act_fn -> lookupRule act_fn in_scope rules var args
+ in
+ case maybe_rule of {
+ Just (rule_name, rule_rhs) ->
+ tick (RuleFired rule_name) `thenSmpl_`
+ (if dopt Opt_D_dump_inlinings dflags then
+ pprTrace "Rule fired" (vcat [
+ text "Rule:" <+> ftext rule_name,
+ text "Before:" <+> ppr var <+> sep (map pprParendExpr args),
+ text "After: " <+> pprCoreExpr rule_rhs,
+ text "Cont: " <+> ppr call_cont])
+ else
+ id) $
+ simplExprF env rule_rhs call_cont ;
+
+ Nothing -> -- No rules
+
+ -- Next, look for an inlining
+ let
+ arg_infos = [ interestingArg arg | arg <- args, isValArg arg]
+
+ interesting_cont = interestingCallContext (notNull args)
+ (notNull arg_infos)
+ call_cont
+
+ active_inline = activeInline env var occ_info
+ maybe_inline = callSiteInline dflags active_inline inline_call occ_info
+ var arg_infos interesting_cont
+ in
+ case maybe_inline of {
+ Just unfolding -- There is an inlining!
+ -> tick (UnfoldingDone var) `thenSmpl_`
+ (if dopt Opt_D_dump_inlinings dflags then
+ pprTrace "Inlining done" (vcat [
+ text "Before:" <+> ppr var <+> sep (map pprParendExpr args),
+ text "Inlined fn: " <+> ppr unfolding,
+ text "Cont: " <+> ppr call_cont])
+ else
+ id) $
+ makeThatCall env var unfolding args call_cont
+
+ ;
+ Nothing -> -- No inlining!
+
+ -- Done
+ rebuild env (mkApps (Var var) args) call_cont
+ }}
+
+makeThatCall :: SimplEnv
+ -> Id
+ -> InExpr -- Inlined function rhs
+ -> [OutExpr] -- Arguments, already simplified
+ -> SimplCont -- After the call
+ -> SimplM FloatsWithExpr
+-- Similar to simplLam, but this time
+-- the arguments are already simplified
+makeThatCall orig_env var fun@(Lam _ _) args cont
+ = go orig_env fun args
+ where
+ zap_it = mkLamBndrZapper fun (length args)
+
+ -- Type-beta reduction
+ go env (Lam bndr body) (Type ty_arg : args)
+ = ASSERT( isTyVar bndr )
+ tick (BetaReduction bndr) `thenSmpl_`
+ go (extendTvSubst env bndr ty_arg) body args
+
+ -- Ordinary beta reduction
+ go env (Lam bndr body) (arg : args)
+ = tick (BetaReduction bndr) `thenSmpl_`
+ simplNonRecX env (zap_it bndr) arg $ \ env ->
+ go env body args
+
+ -- Not enough args, so there are real lambdas left to put in the result
+ go env fun args
+ = simplExprF env fun (pushContArgs orig_env args cont)
+ -- NB: orig_env; the correct environment to capture with
+ -- the arguments.... env has been augmented with substitutions
+ -- from the beta reductions.
+
+makeThatCall env var fun args cont
+ = simplExprF env fun (pushContArgs env args cont)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Arguments}
+%* *
+%************************************************************************
+
+\begin{code}
+---------------------------------------------------------
+-- Simplifying the arguments of a call
+
+simplifyArgs :: SimplEnv
+ -> OutType -- Type of the function
+ -> [(InExpr, SimplEnv, Bool)] -- Details of the arguments
+ -> OutType -- Type of the continuation
+ -> (SimplEnv -> [OutExpr] -> SimplM FloatsWithExpr)
+ -> SimplM FloatsWithExpr
+
+-- [CPS-like because of strict arguments]
+
+-- Simplify the arguments to a call.
+-- This part of the simplifier may break the no-shadowing invariant
+-- Consider
+-- f (...(\a -> e)...) (case y of (a,b) -> e')
+-- where f is strict in its second arg
+-- If we simplify the innermost one first we get (...(\a -> e)...)
+-- Simplifying the second arg makes us float the case out, so we end up with
+-- case y of (a,b) -> f (...(\a -> e)...) e'
+-- So the output does not have the no-shadowing invariant. However, there is
+-- no danger of getting name-capture, because when the first arg was simplified
+-- we used an in-scope set that at least mentioned all the variables free in its
+-- static environment, and that is enough.
+--
+-- We can't just do innermost first, or we'd end up with a dual problem:
+-- case x of (a,b) -> f e (...(\a -> e')...)
+--
+-- I spent hours trying to recover the no-shadowing invariant, but I just could
+-- not think of an elegant way to do it. The simplifier is already knee-deep in
+-- continuations. We have to keep the right in-scope set around; AND we have
+-- to get the effect that finding (error "foo") in a strict arg position will
+-- discard the entire application and replace it with (error "foo"). Getting
+-- all this at once is TOO HARD!
+
+simplifyArgs env fn_ty args cont_ty thing_inside
+ = go env fn_ty args thing_inside
+ where
+ go env fn_ty [] thing_inside = thing_inside env []
+ go env fn_ty (arg:args) thing_inside = simplifyArg env fn_ty arg cont_ty $ \ env arg' ->
+ go env (applyTypeToArg fn_ty arg') args $ \ env args' ->
+ thing_inside env (arg':args')
+
+simplifyArg env fn_ty (Type ty_arg, se, _) cont_ty thing_inside
+ = simplType (setInScope se env) ty_arg `thenSmpl` \ new_ty_arg ->
+ thing_inside env (Type new_ty_arg)
+
+simplifyArg env fn_ty (val_arg, arg_se, is_strict) cont_ty thing_inside
+ | is_strict
+ = simplStrictArg AnArg env val_arg arg_se arg_ty cont_ty thing_inside
+
+ | otherwise -- Lazy argument
+ -- DO NOT float anything outside, hence simplExprC
+ -- There is no benefit (unlike in a let-binding), and we'd
+ -- have to be very careful about bogus strictness through
+ -- floating a demanded let.
+ = simplExprC (setInScope arg_se env) val_arg
+ (mkBoringStop arg_ty) `thenSmpl` \ arg1 ->
+ thing_inside env arg1
+ where
+ arg_ty = funArgTy fn_ty
+
+
+simplStrictArg :: LetRhsFlag
+ -> SimplEnv -- The env of the call
+ -> InExpr -> SimplEnv -- The arg plus its env
+ -> OutType -- arg_ty: type of the argument
+ -> OutType -- cont_ty: Type of thing computed by the context
+ -> (SimplEnv -> OutExpr -> SimplM FloatsWithExpr)
+ -- Takes an expression of type rhs_ty,
+ -- returns an expression of type cont_ty
+ -- The env passed to this continuation is the
+ -- env of the call, plus any new in-scope variables
+ -> SimplM FloatsWithExpr -- An expression of type cont_ty
+
+simplStrictArg is_rhs call_env arg arg_env arg_ty cont_ty thing_inside
+ = simplExprF (setInScope arg_env call_env) arg
+ (ArgOf is_rhs arg_ty cont_ty (\ new_env -> thing_inside (setInScope call_env new_env)))
+ -- Notice the way we use arg_env (augmented with in-scope vars from call_env)
+ -- to simplify the argument
+ -- and call-env (augmented with in-scope vars from the arg) to pass to the continuation
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{mkAtomicArgs}
+%* *
+%************************************************************************
+
+mkAtomicArgs takes a putative RHS, checks whether it's a PAP or
+constructor application and, if so, converts it to ANF, so that the
+resulting thing can be inlined more easily. Thus
+ x = (f a, g b)
+becomes
+ t1 = f a
+ t2 = g b
+ x = (t1,t2)
+
+There are three sorts of binding context, specified by the two
+boolean arguments
+
+Strict
+ OK-unlifted
+
+N N Top-level or recursive Only bind args of lifted type
+
+N Y Non-top-level and non-recursive, Bind args of lifted type, or
+ but lazy unlifted-and-ok-for-speculation
+
+Y Y Non-top-level, non-recursive, Bind all args
+ and strict (demanded)
+
+
+For example, given
+
+ x = MkC (y div# z)
+
+there is no point in transforming to
+
+ x = case (y div# z) of r -> MkC r
+
+because the (y div# z) can't float out of the let. But if it was
+a *strict* let, then it would be a good thing to do. Hence the
+context information.
+
+\begin{code}
+mkAtomicArgs :: Bool -- A strict binding
+ -> Bool -- OK to float unlifted args
+ -> OutExpr
+ -> SimplM (OrdList (OutId,OutExpr), -- The floats (unusually) may include
+ OutExpr) -- things that need case-binding,
+ -- if the strict-binding flag is on
+
+mkAtomicArgs is_strict ok_float_unlifted rhs
+ | (Var fun, args) <- collectArgs rhs, -- It's an application
+ isDataConWorkId fun || valArgCount args < idArity fun -- And it's a constructor or PAP
+ = go fun nilOL [] args -- Have a go
+
+ | otherwise = bale_out -- Give up
+
+ where
+ bale_out = returnSmpl (nilOL, rhs)
+
+ go fun binds rev_args []
+ = returnSmpl (binds, mkApps (Var fun) (reverse rev_args))
+
+ go fun binds rev_args (arg : args)
+ | exprIsTrivial arg -- Easy case
+ = go fun binds (arg:rev_args) args
+
+ | not can_float_arg -- Can't make this arg atomic
+ = bale_out -- ... so give up
+
+ | otherwise -- Don't forget to do it recursively
+ -- E.g. x = a:b:c:[]
+ = mkAtomicArgs is_strict ok_float_unlifted arg `thenSmpl` \ (arg_binds, arg') ->
+ newId FSLIT("a") arg_ty `thenSmpl` \ arg_id ->
+ go fun ((arg_binds `snocOL` (arg_id,arg')) `appOL` binds)
+ (Var arg_id : rev_args) args
+ where
+ arg_ty = exprType arg
+ can_float_arg = is_strict
+ || not (isUnLiftedType arg_ty)
+ || (ok_float_unlifted && exprOkForSpeculation arg)
+
+
+addAtomicBinds :: SimplEnv -> [(OutId,OutExpr)]
+ -> (SimplEnv -> SimplM (FloatsWith a))
+ -> SimplM (FloatsWith a)
+addAtomicBinds env [] thing_inside = thing_inside env
+addAtomicBinds env ((v,r):bs) thing_inside = addAuxiliaryBind env (NonRec v r) $ \ env ->
+ addAtomicBinds env bs thing_inside
+
+addAtomicBindsE :: SimplEnv -> [(OutId,OutExpr)]
+ -> (SimplEnv -> SimplM FloatsWithExpr)
+ -> SimplM FloatsWithExpr
+-- Same again, but this time we're in an expression context,
+-- and may need to do some case bindings
+
+addAtomicBindsE env [] thing_inside
+ = thing_inside env
+addAtomicBindsE env ((v,r):bs) thing_inside
+ | needsCaseBinding (idType v) r
+ = addAtomicBindsE (addNewInScopeIds env [v]) bs thing_inside `thenSmpl` \ (floats, expr) ->
+ WARN( exprIsTrivial expr, ppr v <+> pprCoreExpr expr )
+ (let body = wrapFloats floats expr in
+ returnSmpl (emptyFloats env, Case r v (exprType body) [(DEFAULT,[],body)]))
+
+ | otherwise
+ = addAuxiliaryBind env (NonRec v r) $ \ env ->
+ addAtomicBindsE env bs thing_inside
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{The main rebuilder}
+%* *
+%************************************************************************
+
+\begin{code}
+rebuild :: SimplEnv -> OutExpr -> SimplCont -> SimplM FloatsWithExpr
+
+rebuild env expr (Stop _ _ _) = rebuildDone env expr
+rebuild env expr (ArgOf _ _ _ cont_fn) = cont_fn env expr
+rebuild env expr (CoerceIt to_ty cont) = rebuild env (mkCoerce to_ty expr) cont
+rebuild env expr (InlinePlease cont) = rebuild env (Note InlineCall expr) cont
+rebuild env expr (Select _ bndr alts se cont) = rebuildCase (setInScope se env) expr bndr alts cont
+rebuild env expr (ApplyTo _ arg se cont) = rebuildApp (setInScope se env) expr arg cont
+
+rebuildApp env fun arg cont
+ = simplExpr env arg `thenSmpl` \ arg' ->
+ rebuild env (App fun arg') cont
+
+rebuildDone env expr = returnSmpl (emptyFloats env, expr)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Functions dealing with a case}
+%* *
+%************************************************************************
+
+Blob of helper functions for the "case-of-something-else" situation.
+
+\begin{code}
+---------------------------------------------------------
+-- Eliminate the case if possible
+
+rebuildCase :: SimplEnv
+ -> OutExpr -- Scrutinee
+ -> InId -- Case binder
+ -> [InAlt] -- Alternatives (inceasing order)
+ -> SimplCont
+ -> SimplM FloatsWithExpr
+
+rebuildCase env scrut case_bndr alts cont
+ | Just (con,args) <- exprIsConApp_maybe scrut
+ -- Works when the scrutinee is a variable with a known unfolding
+ -- as well as when it's an explicit constructor application
+ = knownCon env (DataAlt con) args case_bndr alts cont
+
+ | Lit lit <- scrut -- No need for same treatment as constructors
+ -- because literals are inlined more vigorously
+ = knownCon env (LitAlt lit) [] case_bndr alts cont
+
+ | otherwise
+ = -- Prepare the alternatives.
+ prepareAlts scrut case_bndr alts `thenSmpl` \ (better_alts, handled_cons) ->
+
+ -- Prepare the continuation;
+ -- The new subst_env is in place
+ prepareCaseCont env better_alts cont `thenSmpl` \ (floats, (dup_cont, nondup_cont)) ->
+ addFloats env floats $ \ env ->
+
+ let
+ -- The case expression is annotated with the result type of the continuation
+ -- This may differ from the type originally on the case. For example
+ -- case(T) (case(Int#) a of { True -> 1#; False -> 0# }) of
+ -- a# -> <blob>
+ -- ===>
+ -- let j a# = <blob>
+ -- in case(T) a of { True -> j 1#; False -> j 0# }
+ -- Note that the case that scrutinises a now returns a T not an Int#
+ res_ty' = contResultType dup_cont
+ in
+
+ -- Deal with case binder
+ simplCaseBinder env scrut case_bndr `thenSmpl` \ (alt_env, case_bndr') ->
+
+ -- Deal with the case alternatives
+ simplAlts alt_env handled_cons
+ case_bndr' better_alts dup_cont `thenSmpl` \ alts' ->
+
+ -- Put the case back together
+ mkCase scrut case_bndr' res_ty' alts' `thenSmpl` \ case_expr ->
+
+ -- Notice that rebuildDone returns the in-scope set from env, not alt_env
+ -- The case binder *not* scope over the whole returned case-expression
+ rebuild env case_expr nondup_cont
+\end{code}
+
+simplCaseBinder checks whether the scrutinee is a variable, v. If so,
+try to eliminate uses of v in the RHSs in favour of case_bndr; that
+way, there's a chance that v will now only be used once, and hence
+inlined.
+
+Note 1
+~~~~~~
+There is a time we *don't* want to do that, namely when
+-fno-case-of-case is on. This happens in the first simplifier pass,
+and enhances full laziness. Here's the bad case:
+ f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
+If we eliminate the inner case, we trap it inside the I# v -> arm,
+which might prevent some full laziness happening. I've seen this
+in action in spectral/cichelli/Prog.hs:
+ [(m,n) | m <- [1..max], n <- [1..max]]
+Hence the check for NoCaseOfCase.
+
+Note 2
+~~~~~~
+There is another situation when we don't want to do it. If we have
+
+ case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
+ ...other cases .... }
+
+We'll perform the binder-swap for the outer case, giving
+
+ case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
+ ...other cases .... }
+
+But there is no point in doing it for the inner case, because w1 can't
+be inlined anyway. Furthermore, doing the case-swapping involves
+zapping w2's occurrence info (see paragraphs that follow), and that
+forces us to bind w2 when doing case merging. So we get
+
+ case x of w1 { A -> let w2 = w1 in e1
+ B -> let w2 = w1 in e2
+ ...other cases .... }
+
+This is plain silly in the common case where w2 is dead.
+
+Even so, I can't see a good way to implement this idea. I tried
+not doing the binder-swap if the scrutinee was already evaluated
+but that failed big-time:
+
+ data T = MkT !Int
+
+ case v of w { MkT x ->
+ case x of x1 { I# y1 ->
+ case x of x2 { I# y2 -> ...
+
+Notice that because MkT is strict, x is marked "evaluated". But to
+eliminate the last case, we must either make sure that x (as well as
+x1) has unfolding MkT y1. THe straightforward thing to do is to do
+the binder-swap. So this whole note is a no-op.
+
+Note 3
+~~~~~~
+If we replace the scrutinee, v, by tbe case binder, then we have to nuke
+any occurrence info (eg IAmDead) in the case binder, because the
+case-binder now effectively occurs whenever v does. AND we have to do
+the same for the pattern-bound variables! Example:
+
+ (case x of { (a,b) -> a }) (case x of { (p,q) -> q })
+
+Here, b and p are dead. But when we move the argment inside the first
+case RHS, and eliminate the second case, we get
+
+ case x of { (a,b) -> a b }
+
+Urk! b is alive! Reason: the scrutinee was a variable, and case elimination
+happened.
+
+Indeed, this can happen anytime the case binder isn't dead:
+ case <any> of x { (a,b) ->
+ case x of { (p,q) -> p } }
+Here (a,b) both look dead, but come alive after the inner case is eliminated.
+The point is that we bring into the envt a binding
+ let x = (a,b)
+after the outer case, and that makes (a,b) alive. At least we do unless
+the case binder is guaranteed dead.
+
+\begin{code}
+simplCaseBinder env (Var v) case_bndr
+ | not (switchIsOn (getSwitchChecker env) NoCaseOfCase)
+
+-- Failed try [see Note 2 above]
+-- not (isEvaldUnfolding (idUnfolding v))
+
+ = simplBinder env (zap case_bndr) `thenSmpl` \ (env, case_bndr') ->
+ returnSmpl (modifyInScope env v case_bndr', case_bndr')
+ -- We could extend the substitution instead, but it would be
+ -- a hack because then the substitution wouldn't be idempotent
+ -- any more (v is an OutId). And this does just as well.
+ where
+ zap b = b `setIdOccInfo` NoOccInfo
+
+simplCaseBinder env other_scrut case_bndr
+ = simplBinder env case_bndr `thenSmpl` \ (env, case_bndr') ->
+ returnSmpl (env, case_bndr')
+\end{code}
+
+
+
+\begin{code}
+simplAlts :: SimplEnv
+ -> [AltCon] -- Alternatives the scrutinee can't be
+ -- in the default case
+ -> OutId -- Case binder
+ -> [InAlt] -> SimplCont
+ -> SimplM [OutAlt] -- Includes the continuation
+
+simplAlts env handled_cons case_bndr' alts cont'
+ = do { mb_alts <- mapSmpl simpl_alt alts
+ ; return [alt' | Just (_, alt') <- mb_alts] }
+ -- Filter out the alternatives that are inaccessible
+ where
+ simpl_alt alt = simplAlt env handled_cons case_bndr' alt cont'
+
+simplAlt :: SimplEnv -> [AltCon] -> OutId -> InAlt -> SimplCont
+ -> SimplM (Maybe (TvSubstEnv, OutAlt))
+-- Simplify an alternative, returning the type refinement for the
+-- alternative, if the alternative does any refinement at all
+-- Nothing => the alternative is inaccessible
+
+simplAlt env handled_cons case_bndr' (DEFAULT, bndrs, rhs) cont'
+ = ASSERT( null bndrs )
+ simplExprC env' rhs cont' `thenSmpl` \ rhs' ->
+ returnSmpl (Just (emptyVarEnv, (DEFAULT, [], rhs')))
+ where
+ env' = mk_rhs_env env case_bndr' (mkOtherCon handled_cons)
+ -- Record the constructors that the case-binder *can't* be.
+
+simplAlt env handled_cons case_bndr' (LitAlt lit, bndrs, rhs) cont'
+ = ASSERT( null bndrs )
+ simplExprC env' rhs cont' `thenSmpl` \ rhs' ->
+ returnSmpl (Just (emptyVarEnv, (LitAlt lit, [], rhs')))
+ where
+ env' = mk_rhs_env env case_bndr' (mkUnfolding False (Lit lit))
+
+simplAlt env handled_cons case_bndr' (DataAlt con, vs, rhs) cont'
+ | isVanillaDataCon con
+ = -- Deal with the pattern-bound variables
+ -- Mark the ones that are in ! positions in the data constructor
+ -- as certainly-evaluated.
+ -- NB: it happens that simplBinders does *not* erase the OtherCon
+ -- form of unfolding, so it's ok to add this info before
+ -- doing simplBinders
+ simplBinders env (add_evals con vs) `thenSmpl` \ (env, vs') ->
+
+ -- Bind the case-binder to (con args)
+ let unf = mkUnfolding False (mkConApp con con_args)
+ inst_tys' = tyConAppArgs (idType case_bndr')
+ con_args = map Type inst_tys' ++ map varToCoreExpr vs'
+ env' = mk_rhs_env env case_bndr' unf
+ in
+ simplExprC env' rhs cont' `thenSmpl` \ rhs' ->
+ returnSmpl (Just (emptyVarEnv, (DataAlt con, vs', rhs')))
+
+ | otherwise -- GADT case
+ = let
+ (tvs,ids) = span isTyVar vs
+ in
+ simplBinders env tvs `thenSmpl` \ (env1, tvs') ->
+ case coreRefineTys con tvs' (idType case_bndr') of {
+ Nothing -- Inaccessible
+ | opt_PprStyle_Debug -- Hack: if debugging is on, generate an error case
+ -- so we can see it
+ -> let rhs' = mkApps (Var eRROR_ID)
+ [Type (substTy env (exprType rhs)),
+ Lit (mkStringLit "Impossible alternative (GADT)")]
+ in
+ simplBinders env1 ids `thenSmpl` \ (env2, ids') ->
+ returnSmpl (Just (emptyVarEnv, (DataAlt con, tvs' ++ ids', rhs')))
+
+ | otherwise -- Filter out the inaccessible branch
+ -> return Nothing ;
+
+ Just refine@(tv_subst_env, _) -> -- The normal case
+
+ let
+ env2 = refineSimplEnv env1 refine
+ -- Simplify the Ids in the refined environment, so their types
+ -- reflect the refinement. Usually this doesn't matter, but it helps
+ -- in mkDupableAlt, when we want to float a lambda that uses these binders
+ -- Furthermore, it means the binders contain maximal type information
+ in
+ simplBinders env2 (add_evals con ids) `thenSmpl` \ (env3, ids') ->
+ let unf = mkUnfolding False con_app
+ con_app = mkConApp con con_args
+ con_args = map varToCoreExpr vs' -- NB: no inst_tys'
+ env_w_unf = mk_rhs_env env3 case_bndr' unf
+ vs' = tvs' ++ ids'
+ in
+ simplExprC env_w_unf rhs cont' `thenSmpl` \ rhs' ->
+ returnSmpl (Just (tv_subst_env, (DataAlt con, vs', rhs'))) }
+
+ where
+ -- add_evals records the evaluated-ness of the bound variables of
+ -- a case pattern. This is *important*. Consider
+ -- data T = T !Int !Int
+ --
+ -- case x of { T a b -> T (a+1) b }
+ --
+ -- We really must record that b is already evaluated so that we don't
+ -- go and re-evaluate it when constructing the result.
+ add_evals dc vs = cat_evals dc vs (dataConRepStrictness dc)
+
+ cat_evals dc vs strs
+ = go vs strs
+ where
+ go [] [] = []
+ go (v:vs) strs | isTyVar v = v : go vs strs
+ go (v:vs) (str:strs)
+ | isMarkedStrict str = evald_v : go vs strs
+ | otherwise = zapped_v : go vs strs
+ where
+ zapped_v = zap_occ_info v
+ evald_v = zapped_v `setIdUnfolding` evaldUnfolding
+ go _ _ = pprPanic "cat_evals" (ppr dc $$ ppr vs $$ ppr strs)
+
+ -- If the case binder is alive, then we add the unfolding
+ -- case_bndr = C vs
+ -- to the envt; so vs are now very much alive
+ zap_occ_info | isDeadBinder case_bndr' = \id -> id
+ | otherwise = \id -> id `setIdOccInfo` NoOccInfo
+
+mk_rhs_env env case_bndr' case_bndr_unf
+ = modifyInScope env case_bndr' (case_bndr' `setIdUnfolding` case_bndr_unf)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Known constructor}
+%* *
+%************************************************************************
+
+We are a bit careful with occurrence info. Here's an example
+
+ (\x* -> case x of (a*, b) -> f a) (h v, e)
+
+where the * means "occurs once". This effectively becomes
+ case (h v, e) of (a*, b) -> f a)
+and then
+ let a* = h v; b = e in f a
+and then
+ f (h v)
+
+All this should happen in one sweep.
+
+\begin{code}
+knownCon :: SimplEnv -> AltCon -> [OutExpr]
+ -> InId -> [InAlt] -> SimplCont
+ -> SimplM FloatsWithExpr
+
+knownCon env con args bndr alts cont
+ = tick (KnownBranch bndr) `thenSmpl_`
+ case findAlt con alts of
+ (DEFAULT, bs, rhs) -> ASSERT( null bs )
+ simplNonRecX env bndr scrut $ \ env ->
+ -- This might give rise to a binding with non-atomic args
+ -- like x = Node (f x) (g x)
+ -- but no harm will be done
+ simplExprF env rhs cont
+ where
+ scrut = case con of
+ LitAlt lit -> Lit lit
+ DataAlt dc -> mkConApp dc args
+
+ (LitAlt lit, bs, rhs) -> ASSERT( null bs )
+ simplNonRecX env bndr (Lit lit) $ \ env ->
+ simplExprF env rhs cont
+
+ (DataAlt dc, bs, rhs)
+ -> ASSERT( n_drop_tys + length bs == length args )
+ bind_args env bs (drop n_drop_tys args) $ \ env ->
+ let
+ con_app = mkConApp dc (take n_drop_tys args ++ con_args)
+ con_args = [substExpr env (varToCoreExpr b) | b <- bs]
+ -- args are aready OutExprs, but bs are InIds
+ in
+ simplNonRecX env bndr con_app $ \ env ->
+ simplExprF env rhs cont
+ where
+ n_drop_tys | isVanillaDataCon dc = tyConArity (dataConTyCon dc)
+ | otherwise = 0
+ -- Vanilla data constructors lack type arguments in the pattern
+
+-- Ugh!
+bind_args env [] _ thing_inside = thing_inside env
+
+bind_args env (b:bs) (Type ty : args) thing_inside
+ = ASSERT( isTyVar b )
+ bind_args (extendTvSubst env b ty) bs args thing_inside
+
+bind_args env (b:bs) (arg : args) thing_inside
+ = ASSERT( isId b )
+ simplNonRecX env b arg $ \ env ->
+ bind_args env bs args thing_inside
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Duplicating continuations}
+%* *
+%************************************************************************
+
+\begin{code}
+prepareCaseCont :: SimplEnv
+ -> [InAlt] -> SimplCont
+ -> SimplM (FloatsWith (SimplCont,SimplCont))
+ -- Return a duplicatable continuation, a non-duplicable part
+ -- plus some extra bindings
+
+ -- No need to make it duplicatable if there's only one alternative
+prepareCaseCont env [alt] cont = returnSmpl (emptyFloats env, (cont, mkBoringStop (contResultType cont)))
+prepareCaseCont env alts cont = mkDupableCont env cont
+\end{code}
+
+\begin{code}
+mkDupableCont :: SimplEnv -> SimplCont
+ -> SimplM (FloatsWith (SimplCont, SimplCont))
+
+mkDupableCont env cont
+ | contIsDupable cont
+ = returnSmpl (emptyFloats env, (cont, mkBoringStop (contResultType cont)))
+
+mkDupableCont env (CoerceIt ty cont)
+ = mkDupableCont env cont `thenSmpl` \ (floats, (dup_cont, nondup_cont)) ->
+ returnSmpl (floats, (CoerceIt ty dup_cont, nondup_cont))
+
+mkDupableCont env (InlinePlease cont)
+ = mkDupableCont env cont `thenSmpl` \ (floats, (dup_cont, nondup_cont)) ->
+ returnSmpl (floats, (InlinePlease dup_cont, nondup_cont))
+
+mkDupableCont env cont@(ArgOf _ arg_ty _ _)
+ = returnSmpl (emptyFloats env, (mkBoringStop arg_ty, cont))
+ -- Do *not* duplicate an ArgOf continuation
+ -- Because ArgOf continuations are opaque, we gain nothing by
+ -- propagating them into the expressions, and we do lose a lot.
+ -- Here's an example:
+ -- && (case x of { T -> F; F -> T }) E
+ -- Now, && is strict so we end up simplifying the case with
+ -- an ArgOf continuation. If we let-bind it, we get
+ --
+ -- let $j = \v -> && v E
+ -- in simplExpr (case x of { T -> F; F -> T })
+ -- (ArgOf (\r -> $j r)
+ -- And after simplifying more we get
+ --
+ -- let $j = \v -> && v E
+ -- in case of { T -> $j F; F -> $j T }
+ -- Which is a Very Bad Thing
+ --
+ -- The desire not to duplicate is the entire reason that
+ -- mkDupableCont returns a pair of continuations.
+ --
+ -- The original plan had:
+ -- e.g. (...strict-fn...) [...hole...]
+ -- ==>
+ -- let $j = \a -> ...strict-fn...
+ -- in $j [...hole...]
+
+mkDupableCont env (ApplyTo _ arg se cont)
+ = -- e.g. [...hole...] (...arg...)
+ -- ==>
+ -- let a = ...arg...
+ -- in [...hole...] a
+ simplExpr (setInScope se env) arg `thenSmpl` \ arg' ->
+
+ mkDupableCont env cont `thenSmpl` \ (floats, (dup_cont, nondup_cont)) ->
+ addFloats env floats $ \ env ->
+
+ if exprIsDupable arg' then
+ returnSmpl (emptyFloats env, (ApplyTo OkToDup arg' (zapSubstEnv se) dup_cont, nondup_cont))
+ else
+ newId FSLIT("a") (exprType arg') `thenSmpl` \ arg_id ->
+
+ tick (CaseOfCase arg_id) `thenSmpl_`
+ -- Want to tick here so that we go round again,
+ -- and maybe copy or inline the code.
+ -- Not strictly CaseOfCase, but never mind
+
+ returnSmpl (unitFloat env arg_id arg',
+ (ApplyTo OkToDup (Var arg_id) (zapSubstEnv se) dup_cont,
+ nondup_cont))
+ -- But what if the arg should be case-bound?
+ -- This has been this way for a long time, so I'll leave it,
+ -- but I can't convince myself that it's right.
+
+mkDupableCont env (Select _ case_bndr alts se cont)
+ = -- e.g. (case [...hole...] of { pi -> ei })
+ -- ===>
+ -- let ji = \xij -> ei
+ -- in case [...hole...] of { pi -> ji xij }
+ tick (CaseOfCase case_bndr) `thenSmpl_`
+ let
+ alt_env = setInScope se env
+ in
+ prepareCaseCont alt_env alts cont `thenSmpl` \ (floats1, (dup_cont, nondup_cont)) ->
+ addFloats alt_env floats1 $ \ alt_env ->
+
+ simplBinder alt_env case_bndr `thenSmpl` \ (alt_env, case_bndr') ->
+ -- NB: simplBinder does not zap deadness occ-info, so
+ -- a dead case_bndr' will still advertise its deadness
+ -- This is really important because in
+ -- case e of b { (# a,b #) -> ... }
+ -- b is always dead, and indeed we are not allowed to bind b to (# a,b #),
+ -- which might happen if e was an explicit unboxed pair and b wasn't marked dead.
+ -- In the new alts we build, we have the new case binder, so it must retain
+ -- its deadness.
+
+ mkDupableAlts alt_env case_bndr' alts dup_cont `thenSmpl` \ (floats2, alts') ->
+ addFloats alt_env floats2 $ \ alt_env ->
+ returnSmpl (emptyFloats alt_env,
+ (Select OkToDup case_bndr' alts' (zapSubstEnv se)
+ (mkBoringStop (contResultType dup_cont)),
+ nondup_cont))
+
+mkDupableAlts :: SimplEnv -> OutId -> [InAlt] -> SimplCont
+ -> SimplM (FloatsWith [InAlt])
+-- Absorbs the continuation into the new alternatives
+
+mkDupableAlts env case_bndr' alts dupable_cont
+ = go env alts
+ where
+ go env [] = returnSmpl (emptyFloats env, [])
+ go env (alt:alts)
+ = do { (floats1, mb_alt') <- mkDupableAlt env case_bndr' dupable_cont alt
+ ; addFloats env floats1 $ \ env -> do
+ { (floats2, alts') <- go env alts
+ ; returnSmpl (floats2, case mb_alt' of
+ Just alt' -> alt' : alts'
+ Nothing -> alts'
+ )}}
+
+mkDupableAlt env case_bndr' cont alt
+ = simplAlt env [] case_bndr' alt cont `thenSmpl` \ mb_stuff ->
+ case mb_stuff of {
+ Nothing -> returnSmpl (emptyFloats env, Nothing) ;
+
+ Just (reft, (con, bndrs', rhs')) ->
+ -- Safe to say that there are no handled-cons for the DEFAULT case
+
+ if exprIsDupable rhs' then
+ returnSmpl (emptyFloats env, Just (con, bndrs', rhs'))
+ -- It is worth checking for a small RHS because otherwise we
+ -- get extra let bindings that may cause an extra iteration of the simplifier to
+ -- inline back in place. Quite often the rhs is just a variable or constructor.
+ -- The Ord instance of Maybe in PrelMaybe.lhs, for example, took several extra
+ -- iterations because the version with the let bindings looked big, and so wasn't
+ -- inlined, but after the join points had been inlined it looked smaller, and so
+ -- was inlined.
+ --
+ -- NB: we have to check the size of rhs', not rhs.
+ -- Duplicating a small InAlt might invalidate occurrence information
+ -- However, if it *is* dupable, we return the *un* simplified alternative,
+ -- because otherwise we'd need to pair it up with an empty subst-env....
+ -- but we only have one env shared between all the alts.
+ -- (Remember we must zap the subst-env before re-simplifying something).
+ -- Rather than do this we simply agree to re-simplify the original (small) thing later.
+
+ else
+ let
+ rhs_ty' = exprType rhs'
+ used_bndrs' = filter abstract_over (case_bndr' : bndrs')
+ abstract_over bndr
+ | isTyVar bndr = not (bndr `elemVarEnv` reft)
+ -- Don't abstract over tyvar binders which are refined away
+ -- See Note [Refinement] below
+ | otherwise = not (isDeadBinder bndr)
+ -- The deadness info on the new Ids is preserved by simplBinders
+ in
+ -- If we try to lift a primitive-typed something out
+ -- for let-binding-purposes, we will *caseify* it (!),
+ -- with potentially-disastrous strictness results. So
+ -- instead we turn it into a function: \v -> e
+ -- where v::State# RealWorld#. The value passed to this function
+ -- is realworld#, which generates (almost) no code.
+
+ -- There's a slight infelicity here: we pass the overall
+ -- case_bndr to all the join points if it's used in *any* RHS,
+ -- because we don't know its usage in each RHS separately
+
+ -- We used to say "&& isUnLiftedType rhs_ty'" here, but now
+ -- we make the join point into a function whenever used_bndrs'
+ -- is empty. This makes the join-point more CPR friendly.
+ -- Consider: let j = if .. then I# 3 else I# 4
+ -- in case .. of { A -> j; B -> j; C -> ... }
+ --
+ -- Now CPR doesn't w/w j because it's a thunk, so
+ -- that means that the enclosing function can't w/w either,
+ -- which is a lose. Here's the example that happened in practice:
+ -- kgmod :: Int -> Int -> Int
+ -- kgmod x y = if x > 0 && y < 0 || x < 0 && y > 0
+ -- then 78
+ -- else 5
+ --
+ -- I have seen a case alternative like this:
+ -- True -> \v -> ...
+ -- It's a bit silly to add the realWorld dummy arg in this case, making
+ -- $j = \s v -> ...
+ -- True -> $j s
+ -- (the \v alone is enough to make CPR happy) but I think it's rare
+
+ ( if not (any isId used_bndrs')
+ then newId FSLIT("w") realWorldStatePrimTy `thenSmpl` \ rw_id ->
+ returnSmpl ([rw_id], [Var realWorldPrimId])
+ else
+ returnSmpl (used_bndrs', map varToCoreExpr used_bndrs')
+ ) `thenSmpl` \ (final_bndrs', final_args) ->
+
+ -- See comment about "$j" name above
+ newId FSLIT("$j") (mkPiTypes final_bndrs' rhs_ty') `thenSmpl` \ join_bndr ->
+ -- Notice the funky mkPiTypes. If the contructor has existentials
+ -- it's possible that the join point will be abstracted over
+ -- type varaibles as well as term variables.
+ -- Example: Suppose we have
+ -- data T = forall t. C [t]
+ -- Then faced with
+ -- case (case e of ...) of
+ -- C t xs::[t] -> rhs
+ -- We get the join point
+ -- let j :: forall t. [t] -> ...
+ -- j = /\t \xs::[t] -> rhs
+ -- in
+ -- case (case e of ...) of
+ -- C t xs::[t] -> j t xs
+ let
+ -- We make the lambdas into one-shot-lambdas. The
+ -- join point is sure to be applied at most once, and doing so
+ -- prevents the body of the join point being floated out by
+ -- the full laziness pass
+ really_final_bndrs = map one_shot final_bndrs'
+ one_shot v | isId v = setOneShotLambda v
+ | otherwise = v
+ join_rhs = mkLams really_final_bndrs rhs'
+ join_call = mkApps (Var join_bndr) final_args
+ in
+ returnSmpl (unitFloat env join_bndr join_rhs, Just (con, bndrs', join_call)) }
+\end{code}
+
+Note [Refinement]
+~~~~~~~~~~~~~~~~~
+Consider
+ data T a where
+ MkT :: a -> b -> T a
+
+ f = /\a. \(w::a).
+ case (case ...) of
+ MkT a' b (p::a') (q::b) -> [p,w]
+
+The danger is that we'll make a join point
+
+ j a' p = [p,w]
+
+and that's ill-typed, because (p::a') but (w::a).
+
+Solution so far: don't abstract over a', because the type refinement
+maps [a' -> a] . Ultimately that won't work when real refinement goes on.
+
+Then we must abstract over any refined free variables. Hmm. Maybe we
+could just abstract over *all* free variables, thereby lambda-lifting
+the join point? We should try this.
diff --git a/compiler/simplCore/simplifier.tib b/compiler/simplCore/simplifier.tib
new file mode 100644
index 0000000000..18acd27943
--- /dev/null
+++ b/compiler/simplCore/simplifier.tib
@@ -0,0 +1,771 @@
+% Andre:
+%
+% - I'd like the transformation rules to appear clearly-identified in
+% a box of some kind, so they can be distinguished from the examples.
+%
+
+
+
+\documentstyle[slpj,11pt]{article}
+
+\renewcommand{\textfraction}{0.2}
+\renewcommand{\floatpagefraction}{0.7}
+
+\begin{document}
+
+\title{How to simplify matters}
+
+\author{Simon Peyton Jones and Andre Santos\\
+Department of Computing Science, University of Glasgow, G12 8QQ \\
+ @simonpj@@dcs.gla.ac.uk@
+}
+
+\maketitle
+
+
+\section{Motivation}
+
+Quite a few compilers use the {\em compilation by transformation} idiom.
+The idea is that as much of possible of the compilation process is
+expressed as correctness-preserving transformations, each of which
+transforms a program into a semantically-equivalent
+program that (hopefully) executes more quickly or in less space.
+Functional languages are particularly amenable to this approach because
+they have a particularly rich family of possible transformations.
+Examples of transformation-based compilers
+include the Orbit compiler,[.kranz orbit thesis.]
+Kelsey's compilers,[.kelsey thesis, hudak kelsey principles 1989.]
+the New Jersey SML compiler,[.appel compiling with continuations.]
+and the Glasgow Haskell compiler.[.ghc JFIT.] Of course many, perhaps most,
+other compilers also use transformation to some degree.
+
+Compilation by transformation uses automatic transformations; that is, those
+which can safely be applied automatically by a compiler. There
+is also a whole approach to programming, which we might call {\em programming by transformation},
+in which the programmer manually transforms an inefficient specification into
+an efficient program. This development process might be supported by
+a programming environment in which does the book keeping, but the key steps
+are guided by the programmer. We focus exclusively on automatic transformations
+in this paper.
+
+Automatic program transformations seem to fall into two broad categories:
+\begin{itemize}
+\item {\bf Glamorous transformations} are global, sophisticated,
+intellectually satisfying transformations, sometimes guided by some
+interesting kind of analysis.
+Examples include:
+lambda lifting,[.johnsson lambda lifting.]
+full laziness,[.hughes thesis, lester spe.]
+closure conversion,[.appel jim 1989.]
+deforestation,[.wadler 1990 deforestation, marlow wadler deforestation Glasgow92, chin phd 1990 march, gill launchbury.]
+transformations based on strictness analysis,[.peyton launchbury unboxed.]
+and so on. It is easy to write papers about these sorts of transformations.
+
+\item {\bf Humble transformations} are small, simple, local transformations,
+which individually look pretty trivial. Here are two simple examples\footnote{
+The notation @E[]@ stands for an arbitrary expression with zero or more holes.
+The notation @E[e]@ denotes @E[]@ with the holes filled in by the expression @e@.
+We implicitly assume that no name-capture happens --- it's just
+a short-hand, not an algorithm.
+}:
+@
+ let x = y in E[x] ===> E[y]
+
+ case (x:xs) of ===> E1[x,xs]
+ (y:ys) -> E1[y,ys]
+ [] -> E2
+@
+Transformations of this kind are almost embarassingly simple. How could
+anyone write a paper about them?
+\end{itemize}
+This paper is about humble transformations, and how to implement them.
+Although each individual
+transformation is simple enough, there is a scaling issue:
+there are a large number of candidate transformations to consider, and
+there are a very large number of opportunities to apply them.
+
+In the Glasgow Haskell compiler, all humble transformations
+are performed by the so-called {\em simplifier}.
+Our goal in this paper is to give an overview of how the simplifier works, what
+transformations it applies, and what issues arose in its design.
+
+\section{The language}
+
+Mutter mutter. Important points:
+\begin{itemize}
+\item Second order lambda calculus.
+\item Arguments are variables.
+\item Unboxed data types, and unboxed cases.
+\end{itemize}
+Less important points:
+\begin{itemize}
+\item Constructors and primitives are saturated.
+\item if-then-else desugared to @case@
+\end{itemize}
+
+Give data type.
+
+\section{Transformations}
+
+This section lists all the transformations implemented by the simplifier.
+Because it is a complete list, it is a long one.
+We content ourselves with a brief statement of each transformation,
+augmented with forward references to Section~\ref{sect:composing}
+which gives examples of the ways in which the transformations can compose together.
+
+\subsection{Beta reduction}
+
+If a lambda abstraction is applied to an argument, we can simply
+beta-reduce. This applies equally to ordinary lambda abstractions and
+type abstractions:
+@
+ (\x -> E[x]) arg ===> E[arg]
+ (/\a -> E[a]) ty ===> E[ty]
+@
+There is no danger of duplicating work because the argument is
+guaranteed to be a simple variable or literal.
+
+\subsubsection{Floating applications inward}
+
+Applications can be floated inside a @let(rec)@ or @case@ expression.
+This is a good idea, because they might find a lambda abstraction inside
+to beta-reduce with:
+@
+ (let(rec) Bind in E) arg ===> let(rec) Bind in (E arg)
+
+ (case E of {P1 -> E1;...; Pn -> En}) arg
+ ===>
+ case E of {P1 -> E1 arg; ...; Pn -> En arg}
+@
+
+
+
+\subsection{Transformations concerning @let(rec)@}
+
+\subsubsection{Floating @let@ out of @let@}
+
+It is sometimes useful to float a @let(rec)@ out of a @let(rec)@ right-hand
+side:
+@
+ let x = let(rec) Bind in B1 ===> let(rec) Bind in
+ in B2 let x = B1
+ in B2
+
+
+ letrec x = let(rec) Bind in B1 ===> let(rec) Bind
+ in B2 x = B1
+ in B2
+@
+
+\subsubsection{Floating @case@ out of @let@}
+
+
+\subsubsection{@let@ to @case@}
+
+
+\subsection{Transformations concerning @case@}
+
+\subsubsection{Case of known constructor}
+
+If a @case@ expression scrutinises a constructor,
+the @case@ can be eliminated. This transformation is a real
+win: it eliminates a whole @case@ expression.
+@
+ case (C a1 .. an) of ===> E[a1..an]
+ ...
+ C b1 .. bn -> E[b1..bn]
+ ...
+@
+If none of the constructors in the alternatives match, then
+the default is taken:
+@
+ case (C a1 .. an) of ===> let y = C a1 .. an
+ ...[no alt matches C]... in E
+ y -> E
+@
+There is an important variant of this transformation when
+the @case@ expression scrutinises a {\em variable}
+which is known to be bound to a constructor.
+This situation can
+arise for two reasons:
+\begin{itemize}
+\item An enclosing @let(rec)@ binding binds the variable to a constructor.
+For example:
+@
+ let x = C p q in ... (case x of ...) ...
+@
+\item An enclosing @case@ expression scrutinises the same variable.
+For example:
+@
+ case x of
+ ...
+ C p q -> ... (case x of ...) ...
+ ...
+@
+This situation is particularly common, as we discuss in Section~\ref{sect:repeated-evals}.
+\end{itemize}
+In each of these examples, @x@ is known to be bound to @C p q@
+at the inner @case@. The general rules are:
+@
+ case x of {...; C b1 .. bn -> E[b1..bn]; ...}
+===> {x bound to C a1 .. an}
+ E[a1..an]
+
+ case x of {...[no alts match C]...; y -> E[y]}
+===> {x bound to C a1 .. an}
+ E[x]
+@
+
+\subsubsection{Dead alternative elimination}
+@
+ case x of
+ C a .. z -> E
+ ...[other alts]...
+===> x *not* bound to C
+ case x of
+ ...[other alts]...
+@
+We might know that @x@ is not bound to a particular constructor
+because of an enclosing case:
+@
+ case x of
+ C a .. z -> E1
+ other -> E2
+@
+Inside @E1@ we know that @x@ is bound to @C@.
+However, if the type has more than two constructors,
+inside @E2@ all we know is that @x@ is {\em not} bound to @C@.
+
+This applies to unboxed cases also, in the obvious way.
+
+\subsubsection{Case elimination}
+
+If we can prove that @x@ is not bottom, then this rule applies.
+@
+ case x of ===> E[x]
+ y -> E[y]
+@
+We might know that @x@ is non-bottom because:
+\begin{itemize}
+\item @x@ has an unboxed type.
+\item There's an enclosing case which scrutinises @x@.
+\item It is bound to an expression which provably terminates.
+\end{itemize}
+Since this transformation can only improve termination, even if we apply it
+when @x@ is not provably non-bottom, we provide a compiler flag to
+enable it all the time.
+
+\subsubsection{Case of error}
+
+@
+ case (error ty E) of Alts ===> error ty' E
+ where
+ ty' is type of whole case expression
+@
+
+Mutter about types. Mutter about variables bound to error.
+Mutter about disguised forms of error.
+
+\subsubsection{Floating @let(rec)@ out of @case@}
+
+A @let(rec)@ binding can be floated out of a @case@ scrutinee:
+@
+ case (let(rec) Bind in E) of Alts ===> let(rec) Bind in
+ case E of Alts
+@
+This increases the likelihood of a case-of-known-constructor transformation,
+because @E@ is not hidden from the @case@ by the @let(rec)@.
+
+\subsubsection{Floating @case@ out of @case@}
+
+Analogous to floating a @let(rec)@ from a @case@ scrutinee is
+floating a @case@ from a @case@ scrutinee. We have to be
+careful, though, about code size. If there's only one alternative
+in the inner case, things are easy:
+@
+ case (case E of {P -> R}) of ===> case E of {P -> case R of
+ Q1 -> S1 Q1 -> S1
+ ... ...
+ Qm -> Sm Qm -> Sm}
+@
+If there's more than one alternative there's a danger
+that we'll duplicate @S1@...@Sm@, which might be a lot of code.
+Our solution is to create a new local definition for each
+alternative:
+@
+ case (case E of {P1 -> R1; ...; Pn -> Rn}) of
+ Q1 -> S1
+ ...
+ Qm -> Sm
+===>
+ let s1 = \x1 ... z1 -> S1
+ ...
+ sm = \xm ... zm -> Sm
+ in
+ case E of
+ P1 -> case R1 of {Q1 -> s1 x1 ... z1; ...; Qm -> sm xm ... zm}
+ ...
+ Pn -> case Rn of {Q1 -> s1 x1 ... z1; ...; Qm -> sm xm ... zm}
+@
+Here, @x1 ... z1@ are that subset of
+variables bound by the pattern @Q1@ which are free in @S1@, and
+similarly for the other @si@.
+
+Is this transformation a win? After all, we have introduced @m@ new
+functions! Section~\ref{sect:join-points} discusses this point.
+
+\subsubsection{Case merging}
+
+@
+ case x of
+ ...[some alts]...
+ other -> case x of
+ ...[more alts]...
+===>
+ case x of
+ ...[some alts]...
+ ...[more alts]...
+@
+Any alternatives in @[more alts]@ which are already covered by @[some alts]@
+should first be eliminated by the dead-alternative transformation.
+
+
+\subsection{Constructor reuse}
+
+
+\subsection{Inlining}
+
+The inlining transformtion is simple enough:
+@
+ let x = R in B[x] ===> B[R]
+@
+Inlining is more conventionally used to describe the instantiation of a function
+body at its call site, with arguments substituted for formal parameters. We treat
+this as a two-stage process: inlining followed by beta reduction. Since we are
+working with a higher-order language, not all the arguments may be available at every
+call site, so separating inlining from beta reduction allows us to concentrate on
+one problem at a time.
+
+The choice of exactly {\em which} bindings to inline has a major impact on efficiency.
+Specifically, we need to consider the following factors:
+\begin{itemize}
+\item
+Inlining a function at its call site, followed by some beta reduction,
+very often exposes opportunities for further transformations.
+We inline many simple arithmetic and boolean operators for this reason.
+\item
+Inlining can increase code size.
+\item
+Inlining can duplicate work, for example if a redex is inlined at more than one site.
+Duplicating a single expensive redex can ruin a program's efficiency.
+\end{itemize}
+
+
+Our inlining strategy depends on the form of @R@:
+
+Mutter mutter.
+
+
+\subsubsection{Dead code removal}
+
+If a @let@-bound variable is not used the binding can be dropped:
+@
+ let x = E in B ===> B
+ x not free in B
+@
+A similar transformation applies for @letrec@-bound variables.
+Programmers seldom write dead code, of course, but bindings often become dead when they
+are inlined.
+
+
+
+
+\section{Composing transformations}
+\label{sect:composing}
+
+The really interesting thing about humble transformations is the way in which
+they compose together to carry out substantial and useful transformations.
+This section gives a collection of motivating examples, all of which have
+shown up in real application programs.
+
+\subsection{Repeated evals}
+\label{sect:repeated-evals}
+
+Example: x+x, as in unboxed paper.
+
+
+\subsection{Lazy pattern matching}
+
+Lazy pattern matching is pretty inefficient. Consider:
+@
+ let (x,y) = E in B
+@
+which desugars to:
+@
+ let t = E
+ x = case t of (x,y) -> x
+ y = case t of (x,y) -> y
+ in B
+@
+This code allocates three thunks! However, if @B@ is strict in {\em either}
+@x@ {\em or} @y@, then the strictness analyser will easily spot that
+the binding for @t@ is strict, so we can do a @let@-to-@case@ transformation:
+@
+ case E of
+ (x,y) -> let t = (x,y) in
+ let x = case t of (x,y) -> x
+ y = case t of (x,y) -> y
+ in B
+@
+whereupon the case-of-known-constructor transformation
+eliminates the @case@ expressions in the right-hand side of @x@ and @y@,
+and @t@ is then spotted as being dead, so we get
+@
+ case E of
+ (x,y) -> B
+@
+
+\subsection{Join points}
+\label{sect:join-points}
+
+One motivating example is this:
+@
+ if (not x) then E1 else E2
+@
+After desugaring the conditional, and inlining the definition of
+@not@, we get
+@
+ case (case x of True -> False; False -> True}) of
+ True -> E1
+ False -> E2
+@
+Now, if we apply our case-of-case transformation we get:
+@
+ let e1 = E1
+ e2 = E2
+ in
+ case x of
+ True -> case False of {True -> e1; False -> e2}
+ False -> case True of {True -> e1; False -> e2}
+@
+Now the case-of-known constructor transformation applies:
+@
+ let e1 = E1
+ e2 = E2
+ in
+ case x of
+ True -> e2
+ False -> e1
+@
+Since there is now only one occurrence of @e1@ and @e2@ we can
+inline them, giving just what we hoped for:
+@
+ case x of {True -> E2; False -> E1}
+@
+The point is that the local definitions will often disappear again.
+
+\subsubsection{How join points occur}
+
+But what if they don't disappear? Then the definitions @s1@ ... @sm@
+play the role of ``join points''; they represent the places where
+execution joins up again, having forked at the @case x@. The
+``calls'' to the @si@ should really be just jumps. To see this more clearly
+consider the expression
+@
+ if (x || y) then E1 else E2
+@
+A C compiler will ``short-circuit'' the
+evaluation of the condition if @x@ turns out to be true
+generate code, something like this:
+@
+ if (x) goto l1;
+ if (y) {...code for E2...}
+ l1: ...code for E1...
+@
+In our setting, here's what will happen. First we desguar the
+conditional, and inline the definition of @||@:
+@
+ case (case x of {True -> True; False -> y}) of
+ True -> E1
+ False -> E2
+@
+Now apply the case-of-case transformation:
+@
+ let e1 = E1
+ e2 = E2
+ in
+ case x of
+ True -> case True of {True -> e1; False -> e2}
+ False -> case y of {True -> e1; False -> e2}
+@
+Unlike the @not@ example, only one of the two inner case
+simplifies, and we can therefore only inline @e2@, because
+@e1@ is still mentioned twice\footnote{Unless the
+inlining strategy decides that @E1@ is small enough to duplicate;
+it is used in separate @case@ branches so there's no concern about duplicating
+work. Here's another example of the way in which we make one part of the
+simplifier (the inlining strategy) help with the work of another (@case@-expression
+simplification.}
+@
+ let e1 = E1
+ in
+ case x of
+ True -> e1
+ False -> case y of {True -> e1; False -> e2}
+@
+The code generator produces essentially the same code as
+the C code given above. The binding for @e1@ turns into
+just a label, which is jumped to from the two occurrences of @e1@.
+
+\subsubsection{Case of @error@}
+
+The case-of-error transformation is often exposed by the case-of-case
+transformation. Consider
+@
+ case (hd xs) of
+ True -> E1
+ False -> E2
+@
+After inlining @hd@, we get
+@
+ case (case xs of [] -> error "hd"; (x:_) -> x) of
+ True -> E1
+ False -> E2
+@
+(I've omitted the type argument of @error@ to save clutter.)
+Now doing case-of-case gives
+@
+ let e1 = E1
+ e2 = E2
+ in
+ case xs of
+ [] -> case (error "hd") of { True -> e1; False -> e2 }
+ (x:_) -> case x of { True -> e1; False -> e2 }
+@
+Now the case-of-error transformation springs to life, after which
+we can inline @e1@ and @e2@:
+@
+ case xs of
+ [] -> error "hd"
+ (x:_) -> case x of {True -> E1; False -> E2}
+@
+
+\subsection{Nested conditionals combined}
+
+Sometimes programmers write something which should be done
+by a single @case@ as a sequence of tests:
+@
+ if x==0::Int then E0 else
+ if x==1 then E1 else
+ E2
+@
+After eliminating some redundant evals and doing the case-of-case
+transformation we get
+@
+ case x of I# x# ->
+ case x# of
+ 0# -> E0
+ other -> case x# of
+ 1# -> E1
+ other -> E2
+@
+The case-merging transformation puts these together to get
+@
+ case x of I# x# ->
+ case x# of
+ 0# -> E0
+ 1# -> E1
+ other -> E2
+@
+Sometimes the sequence of tests cannot be eliminated from the source
+code because of overloading:
+@
+ f :: Num a => a -> Bool
+ f 0 = True
+ f 3 = True
+ f n = False
+@
+If we specialise @f@ to @Int@ we'll get the previous example again.
+
+\subsection{Error tests eliminated}
+
+The elimination of redundant alternatives, and then of redundant cases,
+arises when we inline functions which do error checking. A typical
+example is this:
+@
+ if (x `rem` y) == 0 then (x `div` y) else y
+@
+Here, both @rem@ and @div@ do an error-check for @y@ being zero.
+The second check is eliminated by the transformations.
+After transformation the code becomes:
+@
+ case x of I# x# ->
+ case y of I# y# ->
+ case y of
+ 0# -> error "rem: zero divisor"
+ _ -> case x# rem# y# of
+ 0# -> case x# div# y# of
+ r# -> I# r#
+ _ -> y
+@
+
+\subsection{Atomic arguments}
+
+At this point it is possible to appreciate the usefulness of
+the Core-language syntax requirement that arguments are atomic.
+For example, suppose that arguments could be arbitrary expressions.
+Here is a possible transformation:
+@
+ f (case x of (p,q) -> p)
+===> f strict in its second argument
+ case x of (p,q) -> f (p,p)
+@
+Doing this transformation would be useful, because now the
+argument to @f@ is a simple variable rather than a thunk.
+However, if arguments are atomic, this transformation becomes
+just a special case of floating a @case@ out of a strict @let@:
+@
+ let a = case x of (p,q) -> p
+ in f a
+===> (f a) strict in a
+ case x of (p,q) -> let a=p in f a
+===>
+ case x of (p,q) -> f p
+@
+There are many examples of this kind. For almost any transformation
+involving @let@ there is a corresponding one involving a function
+argument. The same effect is achieved with much less complexity
+by restricting function arguments to be atomic.
+
+\section{Design}
+
+Dependency analysis
+Occurrence analysis
+
+\subsection{Renaming and cloning}
+
+Every program-transformation system has to worry about name capture.
+For example, here is an erroneous transformation:
+@
+ let y = E
+ in
+ (\x -> \y -> x + y) (y+3)
+===> WRONG!
+ let y = E
+ in
+ (\y -> (y+3) + y)
+@
+The transformation fails because the originally free-occurrence
+of @y@ in the argument @y+3@ has been ``captured'' by the @\y@-abstraction.
+There are various sophisticated solutions to this difficulty, but
+we adopted a very simple one: we uniquely rename every locally-bound identifier
+on every pass of the simplifier.
+Since we are in any case producing an entirely new program (rather than side-effecting
+an existing one) it costs very little extra to rename the identifiers as we go.
+
+So our example would become
+@
+ let y = E
+ in
+ (\x -> \y -> x + y) (y+3)
+===> WRONG!
+ let y1 = E
+ in
+ (\y2 -> (y1+3) + y2)
+@
+The simplifier accepts as input a program which has arbitrary bound
+variable names, including ``shadowing'' (where a binding hides an
+outer binding for the same identifier), but it produces a program in
+which every bound identifier has a distinct name.
+
+Both the ``old'' and ``new'' identifiers have type @Id@, but when writing
+type signatures for functions in the simplifier we use the types @InId@, for
+identifiers from the input program, and @OutId@ for identifiers from the output program:
+@
+ type InId = Id
+ type OutId = Id
+@
+This nomenclature extends naturally to expressions: a value of type @InExpr@ is an
+expression whose identifiers are from the input-program name-space, and similarly
+@OutExpr@.
+
+
+\section{The simplifier}
+
+The basic algorithm followed by the simplifier is:
+\begin{enumerate}
+\item Analyse: perform occurrence analysis and dependency analysis.
+\item Simplify: apply as many transformations as possible.
+\item Iterate: perform the above two steps repeatedly until no further transformations are possible.
+(A compiler flag allows the programmer to bound the maximum number of iterations.)
+\end{enumerate}
+We make a effort to apply as many transformations as possible in Step
+2. To see why this is a good idea, just consider a sequence of
+transformations in which each transformation enables the next. If
+each iteration of Step 2 only performs one transformation, then the
+entire program will to be re-analysed by Step 1, and re-traversed by
+Step 2, for each transformation of the sequence. Sometimes this is
+unavoidable, but it is often possible to perform a sequence of
+transformtions in a single pass.
+
+The key function, which simplifies expressions, has the following type:
+@
+ simplExpr :: SimplEnv
+ -> InExpr -> [OutArg]
+ -> SmplM OutExpr
+@
+The monad, @SmplM@ can quickly be disposed of. It has only two purposes:
+\begin{itemize}
+\item It plumbs around a supply of unique names, so that the simplifier can
+easily invent new names.
+\item It gathers together counts of how many of each kind of transformation
+has been applied, for statistical purposes. These counts are also used
+in Step 3 to decide when the simplification process has terminated.
+\end{itemize}
+
+The signature can be understood like this:
+\begin{itemize}
+\item The environment, of type @SimplEnv@, provides information about
+identifiers bound by the enclosing context.
+\item The second and third arguments together specify the expression to be simplified.
+\item The result is the simplified expression, wrapped up by the monad.
+\end{itemize}
+The simplifier's invariant is this:
+$$
+@simplExpr@~env~expr~[a_1,\ldots,a_n] = expr[env]~a_1~\ldots~a_n
+$$
+That is, the expression returned by $@simplExpr@~env~expr~[a_1,\ldots,a_n]$
+is semantically equal (although hopefully more efficient than)
+$expr$, with the renamings in $env$ applied to it, applied to the arguments
+$a_1,\ldots,a_n$.
+
+\subsection{Application and beta reduction}
+
+The arguments are carried ``inwards'' by @simplExpr@, as an accumulating parameter.
+This is a convenient way of implementing the transformations which float
+arguments inside a @let@ and @case@. This list of pending arguments
+requires a new data type, @CoreArg@, along with its ``in'' and ``out'' synonyms,
+because an argument might be a type or an atom:
+@
+data CoreArg bindee = TypeArg UniType
+ | ValArg (CoreAtom bindee)
+
+type InArg = CoreArg InId
+type OutArg = CoreArg OutId
+@
+The equations for applications simply apply
+the environment to the argument (to handle renaming) and put the result
+on the argument stack, tagged to say whether it is a type argument or value argument:
+@
+ simplExpr env (CoApp fun arg) args
+ = simplExpr env fun (ValArg (simplAtom env arg) : args)
+ simplExpr env (CoTyApp fun ty) args
+ = simplExpr env fun (TypeArg (simplTy env ty) : args)
+@
+
+
+
+
+
+
+\end{document}