summaryrefslogtreecommitdiff
path: root/compiler/profiling
diff options
context:
space:
mode:
authorSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
committerSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
commit0065d5ab628975892cea1ec7303f968c3338cbe1 (patch)
tree8e2afe0ab48ee33cf95009809d67c9649573ef92 /compiler/profiling
parent28a464a75e14cece5db40f2765a29348273ff2d2 (diff)
downloadhaskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz
Reorganisation of the source tree
Most of the other users of the fptools build system have migrated to Cabal, and with the move to darcs we can now flatten the source tree without losing history, so here goes. The main change is that the ghc/ subdir is gone, and most of what it contained is now at the top level. The build system now makes no pretense at being multi-project, it is just the GHC build system. No doubt this will break many things, and there will be a period of instability while we fix the dependencies. A straightforward build should work, but I haven't yet fixed binary/source distributions. Changes to the Building Guide will follow, too.
Diffstat (limited to 'compiler/profiling')
-rw-r--r--compiler/profiling/CostCentre.lhs373
-rw-r--r--compiler/profiling/NOTES301
-rw-r--r--compiler/profiling/SCCfinal.lhs411
3 files changed, 1085 insertions, 0 deletions
diff --git a/compiler/profiling/CostCentre.lhs b/compiler/profiling/CostCentre.lhs
new file mode 100644
index 0000000000..3ee46a88db
--- /dev/null
+++ b/compiler/profiling/CostCentre.lhs
@@ -0,0 +1,373 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[CostCentre]{The @CostCentre@ data type}
+
+\begin{code}
+module CostCentre (
+ CostCentre(..), CcName, IsDupdCC(..), IsCafCC(..),
+ -- All abstract except to friend: ParseIface.y
+
+ CostCentreStack,
+ CollectedCCs,
+ noCCS, subsumedCCS, currentCCS, overheadCCS, dontCareCCS,
+ noCostCentre, noCCAttached,
+ noCCSAttached, isCurrentCCS, isSubsumedCCS, currentOrSubsumedCCS,
+ isDerivedFromCurrentCCS, maybeSingletonCCS,
+ decomposeCCS,
+
+ mkUserCC, mkAutoCC, mkAllCafsCC,
+ mkSingletonCCS, dupifyCC, pushCCOnCCS,
+ isCafCCS, isCafCC,
+ isSccCountCostCentre,
+ sccAbleCostCentre,
+ ccFromThisModule,
+
+ pprCostCentreCore,
+ costCentreUserName,
+
+ cmpCostCentre -- used for removing dups in a list
+ ) where
+
+#include "HsVersions.h"
+
+import Var ( Id )
+import Name ( getOccName, occNameFS )
+import Module ( Module, moduleFS )
+import Outputable
+import FastTypes
+import FastString
+import Util ( thenCmp )
+\end{code}
+
+A Cost Centre Stack is something that can be attached to a closure.
+This is either:
+
+ - the current cost centre stack (CCCS)
+ - a pre-defined cost centre stack (there are several
+ pre-defined CCSs, see below).
+
+\begin{code}
+data CostCentreStack
+ = NoCCS
+
+ | CurrentCCS -- Pinned on a let(rec)-bound
+ -- thunk/function/constructor, this says that the
+ -- cost centre to be attached to the object, when it
+ -- is allocated, is whatever is in the
+ -- current-cost-centre-stack register.
+
+ | SubsumedCCS -- Cost centre stack for top-level subsumed functions
+ -- (CAFs get an AllCafsCC).
+ -- Its execution costs get subsumed into the caller.
+ -- This guy is *only* ever pinned on static closures,
+ -- and is *never* the cost centre for an SCC construct.
+
+ | OverheadCCS -- We charge costs due to the profiling-system
+ -- doing its work to "overhead".
+ --
+ -- Objects whose CCS is "Overhead"
+ -- have their *allocation* charged to "overhead",
+ -- but have the current CCS put into the object
+ -- itself.
+
+ -- For example, if we transform "f g" to "let
+ -- g' = g in f g'" (so that something about
+ -- profiling works better...), then we charge
+ -- the *allocation* of g' to OverheadCCS, but
+ -- we put the cost-centre of the call to f
+ -- (i.e., current CCS) into the g' object. When
+ -- g' is entered, the CCS of the call
+ -- to f will be set.
+
+ | DontCareCCS -- We need a CCS to stick in static closures
+ -- (for data), but we *don't* expect them to
+ -- accumulate any costs. But we still need
+ -- the placeholder. This CCS is it.
+
+ | PushCC CostCentre CostCentreStack
+ -- These are used during code generation as the CCSs
+ -- attached to closures. A PushCC never appears as
+ -- the argument to an _scc_.
+ --
+ -- The tail (2nd argument) is either NoCCS, indicating
+ -- a staticly allocated CCS, or CurrentCCS indicating
+ -- a dynamically created CCS. We only support
+ -- statically allocated *singleton* CCSs at the
+ -- moment, for the purposes of initialising the CCS
+ -- field of a CAF.
+
+ deriving (Eq, Ord) -- needed for Ord on CLabel
+\end{code}
+
+A Cost Centre is the argument of an _scc_ expression.
+
+\begin{code}
+data CostCentre
+ = NoCostCentre -- Having this constructor avoids having
+ -- to use "Maybe CostCentre" all the time.
+
+ | NormalCC {
+ cc_name :: CcName, -- Name of the cost centre itself
+ cc_mod :: Module, -- Name of module defining this CC.
+ cc_is_dupd :: IsDupdCC, -- see below
+ cc_is_caf :: IsCafCC -- see below
+ }
+
+ | AllCafsCC {
+ cc_mod :: Module -- Name of module defining this CC.
+ }
+
+type CcName = FastString
+
+data IsDupdCC
+ = OriginalCC -- This says how the CC is *used*. Saying that
+ | DupdCC -- it is DupdCC doesn't make it a different
+ -- CC, just that it a sub-expression which has
+ -- been moved ("dupd") into a different scope.
+ --
+ -- The point about a dupd SCC is that we don't
+ -- count entries to it, because it's not the
+ -- "original" one.
+ --
+ -- In the papers, it's called "SCCsub",
+ -- i.e. SCCsub CC == SCC DupdCC,
+ -- but we are trying to avoid confusion between
+ -- "subd" and "subsumed". So we call the former
+ -- "dupd".
+
+data IsCafCC = CafCC | NotCafCC
+
+-- synonym for triple which describes the cost centre info in the generated
+-- code for a module.
+type CollectedCCs
+ = ( [CostCentre] -- local cost-centres that need to be decl'd
+ , [CostCentre] -- "extern" cost-centres
+ , [CostCentreStack] -- pre-defined "singleton" cost centre stacks
+ )
+\end{code}
+
+WILL: Would there be any merit to recording ``I am now using a
+cost-centre from another module''? I don't know if this would help a
+user; it might be interesting to us to know how much computation is
+being moved across module boundaries.
+
+SIMON: Maybe later...
+
+\begin{code}
+
+noCCS = NoCCS
+subsumedCCS = SubsumedCCS
+currentCCS = CurrentCCS
+overheadCCS = OverheadCCS
+dontCareCCS = DontCareCCS
+
+noCostCentre = NoCostCentre
+\end{code}
+
+Predicates on Cost-Centre Stacks
+
+\begin{code}
+noCCSAttached NoCCS = True
+noCCSAttached _ = False
+
+noCCAttached NoCostCentre = True
+noCCAttached _ = False
+
+isCurrentCCS CurrentCCS = True
+isCurrentCCS _ = False
+
+isSubsumedCCS SubsumedCCS = True
+isSubsumedCCS _ = False
+
+isCafCCS (PushCC cc NoCCS) = isCafCC cc
+isCafCCS _ = False
+
+isDerivedFromCurrentCCS CurrentCCS = True
+isDerivedFromCurrentCCS (PushCC _ ccs) = isDerivedFromCurrentCCS ccs
+isDerivedFromCurrentCCS _ = False
+
+currentOrSubsumedCCS SubsumedCCS = True
+currentOrSubsumedCCS CurrentCCS = True
+currentOrSubsumedCCS _ = False
+
+maybeSingletonCCS (PushCC cc NoCCS) = Just cc
+maybeSingletonCCS _ = Nothing
+\end{code}
+
+Building cost centres
+
+\begin{code}
+mkUserCC :: FastString -> Module -> CostCentre
+mkUserCC cc_name mod
+ = NormalCC { cc_name = cc_name, cc_mod = mod,
+ cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-}
+ }
+
+mkAutoCC :: Id -> Module -> IsCafCC -> CostCentre
+mkAutoCC id mod is_caf
+ = NormalCC { cc_name = occNameFS (getOccName id), cc_mod = mod,
+ cc_is_dupd = OriginalCC, cc_is_caf = is_caf
+ }
+
+mkAllCafsCC m = AllCafsCC { cc_mod = m }
+
+
+
+mkSingletonCCS :: CostCentre -> CostCentreStack
+mkSingletonCCS cc = pushCCOnCCS cc NoCCS
+
+pushCCOnCCS :: CostCentre -> CostCentreStack -> CostCentreStack
+pushCCOnCCS = PushCC
+
+dupifyCC cc = cc {cc_is_dupd = DupdCC}
+
+isCafCC, isDupdCC :: CostCentre -> Bool
+
+isCafCC (AllCafsCC {}) = True
+isCafCC (NormalCC {cc_is_caf = CafCC}) = True
+isCafCC _ = False
+
+isDupdCC (NormalCC {cc_is_dupd = DupdCC}) = True
+isDupdCC _ = False
+
+isSccCountCostCentre :: CostCentre -> Bool
+ -- Is this a cost-centre which records scc counts
+
+#if DEBUG
+isSccCountCostCentre NoCostCentre = panic "isSccCount:NoCostCentre"
+#endif
+isSccCountCostCentre cc | isCafCC cc = False
+ | isDupdCC cc = False
+ | otherwise = True
+
+sccAbleCostCentre :: CostCentre -> Bool
+ -- Is this a cost-centre which can be sccd ?
+
+#if DEBUG
+sccAbleCostCentre NoCostCentre = panic "sccAbleCC:NoCostCentre"
+#endif
+sccAbleCostCentre cc | isCafCC cc = False
+ | otherwise = True
+
+ccFromThisModule :: CostCentre -> Module -> Bool
+ccFromThisModule cc m = cc_mod cc == m
+\end{code}
+
+\begin{code}
+instance Eq CostCentre where
+ c1 == c2 = case c1 `cmpCostCentre` c2 of { EQ -> True; _ -> False }
+
+instance Ord CostCentre where
+ compare = cmpCostCentre
+
+cmpCostCentre :: CostCentre -> CostCentre -> Ordering
+
+cmpCostCentre (AllCafsCC {cc_mod = m1}) (AllCafsCC {cc_mod = m2}) = m1 `compare` m2
+
+cmpCostCentre (NormalCC {cc_name = n1, cc_mod = m1, cc_is_caf = c1})
+ (NormalCC {cc_name = n2, cc_mod = m2, cc_is_caf = c2})
+ -- first key is module name, then we use "kinds" (which include
+ -- names) and finally the caf flag
+ = (m1 `compare` m2) `thenCmp` (n1 `compare` n2) `thenCmp` (c1 `cmp_caf` c2)
+
+cmpCostCentre other_1 other_2
+ = let
+ tag1 = tag_CC other_1
+ tag2 = tag_CC other_2
+ in
+ if tag1 <# tag2 then LT else GT
+ where
+ tag_CC (NormalCC {}) = (_ILIT 1 :: FastInt)
+ tag_CC (AllCafsCC {}) = _ILIT 2
+
+cmp_caf NotCafCC CafCC = LT
+cmp_caf NotCafCC NotCafCC = EQ
+cmp_caf CafCC CafCC = EQ
+cmp_caf CafCC NotCafCC = GT
+
+decomposeCCS :: CostCentreStack -> ([CostCentre],CostCentreStack)
+decomposeCCS (PushCC cc ccs) = (cc:more, ccs')
+ where (more,ccs') = decomposeCCS ccs
+decomposeCCS ccs = ([],ccs)
+\end{code}
+
+-----------------------------------------------------------------------------
+Printing Cost Centre Stacks.
+
+The outputable instance for CostCentreStack prints the CCS as a C
+expression.
+
+NOTE: Not all cost centres are suitable for using in a static
+initializer. In particular, the PushCC forms where the tail is CCCS
+may only be used in inline C code because they expand to a
+non-constant C expression.
+
+\begin{code}
+instance Outputable CostCentreStack where
+ ppr NoCCS = ptext SLIT("NO_CCS")
+ ppr CurrentCCS = ptext SLIT("CCCS")
+ ppr OverheadCCS = ptext SLIT("CCS_OVERHEAD")
+ ppr DontCareCCS = ptext SLIT("CCS_DONT_CARE")
+ ppr SubsumedCCS = ptext SLIT("CCS_SUBSUMED")
+ ppr (PushCC cc NoCCS) = ppr cc <> ptext SLIT("_ccs")
+ ppr (PushCC cc ccs) = ptext SLIT("PushCostCentre") <>
+ parens (ppr ccs <> comma <>
+ parens(ptext SLIT("void *")) <> ppr cc)
+\end{code}
+
+-----------------------------------------------------------------------------
+Printing Cost Centres.
+
+There are several different ways in which we might want to print a
+cost centre:
+
+ - the name of the cost centre, for profiling output (a C string)
+ - the label, i.e. C label for cost centre in .hc file.
+ - the debugging name, for output in -ddump things
+ - the interface name, for printing in _scc_ exprs in iface files.
+
+The last 3 are derived from costCentreStr below. The first is given
+by costCentreName.
+
+\begin{code}
+instance Outputable CostCentre where
+ ppr cc = getPprStyle $ \ sty ->
+ if codeStyle sty
+ then ppCostCentreLbl cc
+ else text (costCentreUserName cc)
+
+-- Printing in an interface file or in Core generally
+pprCostCentreCore (AllCafsCC {cc_mod = m})
+ = text "__sccC" <+> braces (ppr_mod m)
+pprCostCentreCore (NormalCC {cc_name = n, cc_mod = m,
+ cc_is_caf = caf, cc_is_dupd = dup})
+ = text "__scc" <+> braces (hsep [
+ ftext (zEncodeFS n),
+ ppr_mod m,
+ pp_dup dup,
+ pp_caf caf
+ ])
+
+pp_dup DupdCC = char '!'
+pp_dup other = empty
+
+pp_caf CafCC = text "__C"
+pp_caf other = empty
+
+ppr_mod m = ftext (zEncodeFS (moduleFS m))
+
+-- Printing as a C label
+ppCostCentreLbl (NoCostCentre) = text "NONE_cc"
+ppCostCentreLbl (AllCafsCC {cc_mod = m}) = ppr m <> text "_CAFs_cc"
+ppCostCentreLbl (NormalCC {cc_name = n, cc_mod = m, cc_is_caf = is_caf})
+ = ppr_mod m <> ftext (zEncodeFS n) <>
+ text (case is_caf of { CafCC -> "_CAF"; _ -> "" }) <> text "_cc"
+
+-- This is the name to go in the user-displayed string,
+-- recorded in the cost centre declaration
+costCentreUserName (NoCostCentre) = "NO_CC"
+costCentreUserName (AllCafsCC {}) = "CAF"
+costCentreUserName cc@(NormalCC {cc_name = name, cc_is_caf = is_caf})
+ = case is_caf of { CafCC -> "CAF:"; _ -> "" } ++ unpackFS name
+\end{code}
diff --git a/compiler/profiling/NOTES b/compiler/profiling/NOTES
new file mode 100644
index 0000000000..c50cf562e3
--- /dev/null
+++ b/compiler/profiling/NOTES
@@ -0,0 +1,301 @@
+Profiling Implementation Notes -- June/July/Sept 1994
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Simon and Will
+
+Pre-code-generator-ish
+~~~~~~~~~~~~~~~~~~~~~~
+
+* Automagic insertion of _sccs_ on...
+
+ - If -auto is specified, add _scc_ on each *exported* top-level definition.
+ NB this includes CAFs. Done by addAutoCostCentres (Core-to-Core pass).
+
+ - If -auto-all is specified, add _scc_ on *all* top-level definitions.
+ Done by same pass.
+
+ - Always: just before code generation of module M, onto any CAF
+ which hasn't already got an explicit cost centre attached, pin
+ "AllCAFs-M".
+
+ Done by finalStgMassageForProfiling (final STG-to-STG pass)
+
+ Only the one-off costs of evaluating the CAFs will be attributed
+ to the AllCAFs-M cost centre. We hope that these costs will be
+ small; since the _scc_s are introduced automatically it's
+ confusing to attribute any significant costs to them. However if
+ there *are* significant one-off costs we'd better know about it.
+
+ Why so late in the compilation process? We aren't *absolutely*
+ sure what is and isn't a CAF until *just* before code generation.
+ So we don't want to mark them as such until then.
+
+ - Individual DICTs
+
+ We do it in the desugarer, because that's the *only* point at
+ which we *know* exactly what bindings are introduced by
+ overloading. NB should include bindings for selected methods, eg
+
+ f d = let op = _scc_ DICT op_sel d in
+ ...op...op...op
+
+ The DICT CC ensures that:
+ (a) [minor] that the selection cost is separately attributed
+ (b) [major] that the cost of executing op is attributed to
+ its call site, eg
+
+ ...(scc "a" op)...(scc "b" op)...(scc "c" op)...
+
+* Automagic "boxing" of higher-order args:
+
+ finalStgMassageForProfiling (final STG-to-STG pass)
+
+ This (as well as CAF stuff above) is really quite separate
+ from the other business of finalStgMassageForProfiling
+ (collecting up CostCentres that need to be
+ declared/registered).
+
+ But throwing it all into the pot together means that we don't
+ have to have Yet Another STG Syntax Walker.
+
+ Furthermore, these "boxes" are really just let-bindings that
+ many other parts of the compiler will happily substitute away!
+ Doing them at the very last instant prevents this.
+
+ A down side of doing these so late is that we get lots of
+ "let"s, which if generated earlier and not substituted away,
+ could be floated outwards. Having them floated outwards would
+ lessen the chance of skewing profiling results (because of
+ gratuitous "let"s added by the compiler into the inner loop of
+ some program...). The allocation itself will be attributed to
+ profiling overhead; the only thing which'll be skewed is time measurement.
+
+ So if we have, post-boxing-higher-order-args...
+
+ _scc_ "foo" ( let f' = [f] \ [] f
+ in
+ map f' xs )
+
+ ... we want "foo" to be put in the thunk for "f'", but we want the
+ allocation cost (heap census stuff) to be attr to OVERHEAD.
+
+ As an example of what could be improved
+ f = _scc_ "f" (g h)
+ To save dynamic allocation, we could have a static closure for h:
+ h_inf = _scc_ "f" h
+ f = _scc_ "f" (g h_inf)
+
+
+
+
+
+Code generator-ish
+~~~~~~~~~~~~~~~~~~
+
+(1) _Entry_ code for a closure *usually* sets CC from the closure,
+ at the fast entry point
+
+ Exceptions:
+
+ (a) Top-level subsumed functions (i.e., w/ no _scc_ on them)
+
+ Refrain from setting CC from the closure
+
+ (b) Constructors
+
+ Again, refrain. (This is *new*)
+
+ Reasons: (i) The CC will be zapped very shortly by the restore
+ of the enclosing CC when we return to the eval'ing "case".
+ (ii) Any intervening updates will indirect to this existing
+ constructor (...mumble... new update mechanism... mumble...)
+
+(2) "_scc_ cc expr"
+
+ Set current CC to "cc".
+ No later "restore" of the previous CC is reqd.
+
+(3) "case e of { ...alts... }" expression (eval)
+
+ Save CC before eval'ing scrutinee
+ Restore CC at the start of the case-alternative(s)
+
+(4) _Updates_ : updatee gets current CC
+
+ (???? not sure this is OK yet 94/07/04)
+
+ Reasons:
+
+ * Constructors : want to be insensitive to return-in-heap vs
+ return-in-regs. For example,
+
+ f x = _scc_ "f" (x, x)
+
+ The pair (x,x) would get CC of "f" if returned-in-heap;
+ therefore, updatees should get CC of "f".
+
+ * PAPs : Example:
+
+ f x = _scc_ "f" (let g = \ y -> ... in g)
+
+ At the moment of update (updatePAP?), CC is "f", which
+ is what we want to set it to if the "updatee" is entered
+
+ When we enter the PAP ("please put the arguments back so I can
+ use them"), we restore the setup as at the moment the
+ arg-satisfaction check failed.
+
+ Be careful! UPDATE_PAP is called from the arg-satis check,
+ which is before the fast entry point. So the cost centre
+ won't yet have been set from the closure which has just
+ been entered. Solution: in UPDATE_PAP see if the cost centre inside
+ the function closure which is being entered is "SUB"; if so, use
+ the current cost centre to update the updatee; otherwise use that
+ inside the function closure. (See the computation of cc_pap
+ in rule 16_l for lexical semantics.)
+
+
+(5) CAFs
+
+CAFs get their own cost centre. Ie
+
+ x = e
+is transformed to
+ x = _scc_ "CAF:x" e
+
+Or sometimes we lump all the CAFs in a module together.
+(Reporting issue or code-gen issue?)
+
+
+
+Hybrid stuff
+~~~~~~~~~~~~
+
+The problem:
+
+ f = _scc_ "CAF:f" (let g = \xy -> ...
+ in (g,g))
+
+Now, g has cost-centre "CAF:f", and is returned as part of
+the result. So whenever the function embedded in the result
+is called, the costs will accumulate to "CAF:f". This is
+particularly (de)pressing for dictionaries, which contain lots
+of functions.
+
+Solution:
+
+ A. Whenever in case (1) above we would otherwise "set the CC from the
+ closure", we *refrain* from doing so if
+ (a) the closure is a function, not a thunk; and
+ (b) the cost-centre in the closure is a CAF cost centre.
+
+ B. Whenever we enter a thunk [at least, one which might return a function]
+ we save the current cost centre in the update frame. Then, UPDATE_PAP
+ restores the saved cost centre from the update frame iff the cost
+ centre at the point of update (cc_pap in (4) above) is a CAF cost centre.
+
+ It isn't necessary to save and possibly-restore the cost centre for
+ thunks which will certainly return a constructor, because the
+ cost centre is about to be restored anyway by the enclosing case.
+
+Both A and B are runtime tests. For A, consider:
+
+ f = _scc_ "CAF:f" (g 2)
+
+ h y = _scc_ "h" g (y+y)
+
+ g x = let w = \p -> ...
+ in (w,w)
+
+
+Now, in the call to g from h, the cost-centre on w will be "h", and
+indeed all calls to the result of the call should be attributed to
+"h".
+
+ ... _scc_ "x1" (let (t,_) = h 2 in t 3) ...
+
+ Costs of executing (w 3) attributed to "h".
+
+But in the call to g from f, the cost-centre on w will be
+"CAF:f", and calls to w should be attributed to the call site.
+
+ ..._scc_ "x2" (let (t,_) = f in t 3)...
+
+ Costs of executing (w 3) attributed to "x2".
+
+
+ Remaining problem
+
+Consider
+
+ _scc_ "CAF:f" (if expensive then g 2 else g 3)
+
+where g is a function with arity 2. In theory we should
+restore the enclosing cost centre once we've reduced to
+(g 2) or (g 3). In practice this is pretty tiresome; and pretty rare.
+
+A quick fix: given (_scc_ "CAF" e) where e might be function-valued
+(in practice we usually know, because CAF sccs are top level), transform to
+
+ _scc_ "CAF" (let f = e in f)
+
+
+
+
+
+============
+
+scc cc x ===> x
+
+ UNLESS
+
+(a) cc is a user-defined, non-dup'd cost
+ centre (so we care about entry counts)
+
+OR
+
+(b) cc is not a CAF/DICT cost centre and x is top-level subsumed
+ function.
+ [If x is lambda/let bound it'll have a cost centre
+ attached dynamically.]
+
+ To repeat, the transformation is OK if
+ x is a not top-level subsumed function
+ OR
+ cc is a CAF/DICT cost centre and x is a top-level
+ subsumed function
+
+
+
+(scc cc e) x ===> (scc cc e x)
+
+ OK????? IFF
+
+cc is not CAF/DICT --- remains to be proved!!!!!!
+True for lex
+False for eval
+Can we tell which in hybrid?
+
+eg Is this ok?
+
+ (scc "f" (scc "CAF" (\x.b))) y ==> (scc "f" (scc "CAF" (\x.b) y))
+
+
+\x -> (scc cc e) ===> (scc cc \x->e)
+
+ OK IFF cc is not CAF/DICT
+
+
+scc cc1 (scc cc2 e)) ===> scc cc2 e
+
+ IFF not interested in cc1's entry count
+ AND cc2 is not CAF/DICT
+
+(scc cc1 ... (scc cc2 e) ...) ===> (scc cc1 ... e ...)
+
+ IFF cc2 is CAF/DICT
+ AND e is a lambda not appearing as the RHS of a let
+ OR
+ e is a variable not bound to SUB
+
+
diff --git a/compiler/profiling/SCCfinal.lhs b/compiler/profiling/SCCfinal.lhs
new file mode 100644
index 0000000000..c95db9c358
--- /dev/null
+++ b/compiler/profiling/SCCfinal.lhs
@@ -0,0 +1,411 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[SCCfinal]{Modify and collect code generation for final STG program}
+
+This is now a sort-of-normal STG-to-STG pass (WDP 94/06), run by stg2stg.
+
+* Traverses the STG program collecting the cost centres. These are
+ required to declare the cost centres at the start of code
+ generation.
+
+ Note: because of cross-module unfolding, some of these cost centres
+ may be from other modules. But will still have to give them
+ "extern" declarations.
+
+* Puts on CAF cost-centres if the user has asked for individual CAF
+ cost-centres.
+
+* Ditto for individual DICT cost-centres.
+
+* Boxes top-level inherited functions passed as arguments.
+
+* "Distributes" given cost-centres to all as-yet-unmarked RHSs.
+
+\begin{code}
+module SCCfinal ( stgMassageForProfiling ) where
+
+#include "HsVersions.h"
+
+import StgSyn
+
+import Packages ( HomeModules )
+import StaticFlags ( opt_AutoSccsOnIndividualCafs )
+import CostCentre -- lots of things
+import Id ( Id )
+import Module ( Module )
+import UniqSupply ( uniqFromSupply, splitUniqSupply, UniqSupply )
+import Unique ( Unique )
+import VarSet
+import ListSetOps ( removeDups )
+import Outputable
+
+infixr 9 `thenMM`, `thenMM_`
+\end{code}
+
+\begin{code}
+stgMassageForProfiling
+ :: HomeModules
+ -> Module -- module name
+ -> UniqSupply -- unique supply
+ -> [StgBinding] -- input
+ -> (CollectedCCs, [StgBinding])
+
+stgMassageForProfiling pdeps mod_name us stg_binds
+ = let
+ ((local_ccs, extern_ccs, cc_stacks),
+ stg_binds2)
+ = initMM mod_name us (do_top_bindings stg_binds)
+
+ (fixed_ccs, fixed_cc_stacks)
+ = if opt_AutoSccsOnIndividualCafs
+ then ([],[]) -- don't need "all CAFs" CC
+ -- (for Prelude, we use PreludeCC)
+ else ([all_cafs_cc], [all_cafs_ccs])
+
+ local_ccs_no_dups = fst (removeDups cmpCostCentre local_ccs)
+ extern_ccs_no_dups = fst (removeDups cmpCostCentre extern_ccs)
+ in
+ ((fixed_ccs ++ local_ccs_no_dups,
+ extern_ccs_no_dups,
+ fixed_cc_stacks ++ cc_stacks), stg_binds2)
+ where
+
+ all_cafs_cc = mkAllCafsCC mod_name
+ all_cafs_ccs = mkSingletonCCS all_cafs_cc
+
+ ----------
+ do_top_bindings :: [StgBinding] -> MassageM [StgBinding]
+
+ do_top_bindings [] = returnMM []
+
+ do_top_bindings (StgNonRec b rhs : bs)
+ = do_top_rhs b rhs `thenMM` \ rhs' ->
+ addTopLevelIshId b (
+ do_top_bindings bs `thenMM` \bs' ->
+ returnMM (StgNonRec b rhs' : bs')
+ )
+
+ do_top_bindings (StgRec pairs : bs)
+ = addTopLevelIshIds binders (
+ mapMM do_pair pairs `thenMM` \ pairs2 ->
+ do_top_bindings bs `thenMM` \ bs' ->
+ returnMM (StgRec pairs2 : bs')
+ )
+ where
+ binders = map fst pairs
+ do_pair (b, rhs)
+ = do_top_rhs b rhs `thenMM` \ rhs2 ->
+ returnMM (b, rhs2)
+
+ ----------
+ do_top_rhs :: Id -> StgRhs -> MassageM StgRhs
+
+ do_top_rhs binder (StgRhsClosure _ bi fv u srt [] (StgSCC cc (StgConApp con args)))
+ | not (isSccCountCostCentre cc) && not (isDllConApp pdeps con args)
+ -- Trivial _scc_ around nothing but static data
+ -- Eliminate _scc_ ... and turn into StgRhsCon
+
+ -- isDllConApp checks for LitLit args too
+ = returnMM (StgRhsCon dontCareCCS con args)
+
+{- Can't do this one with cost-centre stacks: --SDM
+ do_top_rhs binder (StgRhsClosure no_cc bi fv u [] (StgSCC ty cc expr))
+ | (noCCSAttached no_cc || currentOrSubsumedCCS no_cc)
+ && not (isSccCountCostCentre cc)
+ -- Top level CAF without a cost centre attached
+ -- Attach and collect cc of trivial _scc_ in body
+ = collectCC cc `thenMM_`
+ set_prevailing_cc cc (do_expr expr) `thenMM` \ expr' ->
+ returnMM (StgRhsClosure cc bi fv u [] expr')
+-}
+
+ do_top_rhs binder (StgRhsClosure no_cc bi fv u srt [] body)
+ | noCCSAttached no_cc || currentOrSubsumedCCS no_cc
+ -- Top level CAF without a cost centre attached
+ -- Attach CAF cc (collect if individual CAF ccs)
+ = (if opt_AutoSccsOnIndividualCafs
+ then let cc = mkAutoCC binder mod_name CafCC
+ ccs = mkSingletonCCS cc
+ in
+ collectCC cc `thenMM_`
+ collectCCS ccs `thenMM_`
+ returnMM ccs
+ else
+ returnMM all_cafs_ccs) `thenMM` \ caf_ccs ->
+ set_prevailing_cc caf_ccs (do_expr body) `thenMM` \ body' ->
+ returnMM (StgRhsClosure caf_ccs bi fv u srt [] body')
+
+ do_top_rhs binder (StgRhsClosure cc bi fv u srt [] body)
+ -- Top level CAF with cost centre attached
+ -- Should this be a CAF cc ??? Does this ever occur ???
+ = pprPanic "SCCfinal: CAF with cc:" (ppr cc)
+
+ do_top_rhs binder (StgRhsClosure no_ccs bi fv u srt args body)
+ -- Top level function, probably subsumed
+ | noCCSAttached no_ccs
+ = set_lambda_cc (do_expr body) `thenMM` \ body' ->
+ returnMM (StgRhsClosure subsumedCCS bi fv u srt args body')
+
+ | otherwise
+ = pprPanic "SCCfinal: CAF with cc:" (ppr no_ccs)
+
+ do_top_rhs binder (StgRhsCon ccs con args)
+ -- Top-level (static) data is not counted in heap
+ -- profiles; nor do we set CCCS from it; so we
+ -- just slam in dontCareCostCentre
+ = returnMM (StgRhsCon dontCareCCS con args)
+
+ ------
+ do_expr :: StgExpr -> MassageM StgExpr
+
+ do_expr (StgLit l) = returnMM (StgLit l)
+
+ do_expr (StgApp fn args)
+ = boxHigherOrderArgs (StgApp fn) args
+
+ do_expr (StgConApp con args)
+ = boxHigherOrderArgs (\args -> StgConApp con args) args
+
+ do_expr (StgOpApp con args res_ty)
+ = boxHigherOrderArgs (\args -> StgOpApp con args res_ty) args
+
+ do_expr (StgSCC cc expr) -- Ha, we found a cost centre!
+ = collectCC cc `thenMM_`
+ do_expr expr `thenMM` \ expr' ->
+ returnMM (StgSCC cc expr')
+
+ do_expr (StgCase expr fv1 fv2 bndr srt alt_type alts)
+ = do_expr expr `thenMM` \ expr' ->
+ mapMM do_alt alts `thenMM` \ alts' ->
+ returnMM (StgCase expr' fv1 fv2 bndr srt alt_type alts')
+ where
+ do_alt (id, bs, use_mask, e)
+ = do_expr e `thenMM` \ e' ->
+ returnMM (id, bs, use_mask, e')
+
+ do_expr (StgLet b e)
+ = do_let b e `thenMM` \ (b,e) ->
+ returnMM (StgLet b e)
+
+ do_expr (StgLetNoEscape lvs1 lvs2 b e)
+ = do_let b e `thenMM` \ (b,e) ->
+ returnMM (StgLetNoEscape lvs1 lvs2 b e)
+
+#ifdef DEBUG
+ do_expr other = pprPanic "SCCfinal.do_expr" (ppr other)
+#endif
+
+ ----------------------------------
+
+ do_let (StgNonRec b rhs) e
+ = do_rhs rhs `thenMM` \ rhs' ->
+ addTopLevelIshId b (
+ do_expr e `thenMM` \ e' ->
+ returnMM (StgNonRec b rhs',e')
+ )
+
+ do_let (StgRec pairs) e
+ = addTopLevelIshIds binders (
+ mapMM do_pair pairs `thenMM` \ pairs' ->
+ do_expr e `thenMM` \ e' ->
+ returnMM (StgRec pairs', e')
+ )
+ where
+ binders = map fst pairs
+ do_pair (b, rhs)
+ = do_rhs rhs `thenMM` \ rhs2 ->
+ returnMM (b, rhs2)
+
+ ----------------------------------
+ do_rhs :: StgRhs -> MassageM StgRhs
+ -- We play much the same game as we did in do_top_rhs above;
+ -- but we don't have to worry about cafs etc.
+
+{-
+ do_rhs (StgRhsClosure closure_cc bi fv u [] (StgSCC ty cc (StgCon (DataCon con) args _)))
+ | not (isSccCountCostCentre cc)
+ = collectCC cc `thenMM_`
+ returnMM (StgRhsCon cc con args)
+-}
+
+ do_rhs (StgRhsClosure _ bi fv u srt args expr)
+ = slurpSCCs currentCCS expr `thenMM` \ (expr', ccs) ->
+ do_expr expr' `thenMM` \ expr'' ->
+ returnMM (StgRhsClosure ccs bi fv u srt args expr'')
+ where
+ slurpSCCs ccs (StgSCC cc e)
+ = collectCC cc `thenMM_`
+ slurpSCCs (cc `pushCCOnCCS` ccs) e
+ slurpSCCs ccs e
+ = returnMM (e, ccs)
+
+ do_rhs (StgRhsCon cc con args)
+ = returnMM (StgRhsCon currentCCS con args)
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Boxing higher-order args}
+%* *
+%************************************************************************
+
+Boxing is *turned off* at the moment, until we can figure out how to
+do it properly in general.
+
+\begin{code}
+boxHigherOrderArgs
+ :: ([StgArg] -> StgExpr)
+ -- An application lacking its arguments
+ -> [StgArg] -- arguments which we might box
+ -> MassageM StgExpr
+
+#ifndef PROF_DO_BOXING
+boxHigherOrderArgs almost_expr args
+ = returnMM (almost_expr args)
+#else
+boxHigherOrderArgs almost_expr args
+ = getTopLevelIshIds `thenMM` \ ids ->
+ mapAccumMM (do_arg ids) [] args `thenMM` \ (let_bindings, new_args) ->
+ returnMM (foldr (mk_stg_let currentCCS) (almost_expr new_args) let_bindings)
+ where
+ ---------------
+
+ do_arg ids bindings arg@(StgVarArg old_var)
+ | (not (isLocalVar old_var) || elemVarSet old_var ids)
+ && isFunTy (dropForAlls var_type)
+ = -- make a trivial let-binding for the top-level function
+ getUniqueMM `thenMM` \ uniq ->
+ let
+ new_var = mkSysLocal FSLIT("sf") uniq var_type
+ in
+ returnMM ( (new_var, old_var) : bindings, StgVarArg new_var )
+ where
+ var_type = idType old_var
+
+ do_arg ids bindings arg = returnMM (bindings, arg)
+
+ ---------------
+ mk_stg_let :: CostCentreStack -> (Id, Id) -> StgExpr -> StgExpr
+
+ mk_stg_let cc (new_var, old_var) body
+ = let
+ rhs_body = StgApp old_var [{-args-}]
+ rhs_closure = StgRhsClosure cc stgArgOcc [{-fvs-}] ReEntrant NoSRT{-eeek!!!-} [{-args-}] rhs_body
+ in
+ StgLet (StgNonRec new_var rhs_closure) body
+ where
+ bOGUS_LVs = emptyUniqSet -- easier to print than: panic "mk_stg_let: LVs"
+#endif
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Boring monad stuff for this}
+%* *
+%************************************************************************
+
+\begin{code}
+type MassageM result
+ = Module -- module name
+ -> CostCentreStack -- prevailing CostCentre
+ -- if none, subsumedCosts at top-level
+ -- currentCostCentre at nested levels
+ -> UniqSupply
+ -> VarSet -- toplevel-ish Ids for boxing
+ -> CollectedCCs
+ -> (CollectedCCs, result)
+
+-- the initMM function also returns the final CollectedCCs
+
+initMM :: Module -- module name, which we may consult
+ -> UniqSupply
+ -> MassageM a
+ -> (CollectedCCs, a)
+
+initMM mod_name init_us m = m mod_name noCCS init_us emptyVarSet ([],[],[])
+
+thenMM :: MassageM a -> (a -> MassageM b) -> MassageM b
+thenMM_ :: MassageM a -> (MassageM b) -> MassageM b
+
+thenMM expr cont mod scope_cc us ids ccs
+ = case splitUniqSupply us of { (s1, s2) ->
+ case (expr mod scope_cc s1 ids ccs) of { (ccs2, result) ->
+ cont result mod scope_cc s2 ids ccs2 }}
+
+thenMM_ expr cont mod scope_cc us ids ccs
+ = case splitUniqSupply us of { (s1, s2) ->
+ case (expr mod scope_cc s1 ids ccs) of { (ccs2, _) ->
+ cont mod scope_cc s2 ids ccs2 }}
+
+returnMM :: a -> MassageM a
+returnMM result mod scope_cc us ids ccs = (ccs, result)
+
+nopMM :: MassageM ()
+nopMM mod scope_cc us ids ccs = (ccs, ())
+
+mapMM :: (a -> MassageM b) -> [a] -> MassageM [b]
+mapMM f [] = returnMM []
+mapMM f (m:ms)
+ = f m `thenMM` \ r ->
+ mapMM f ms `thenMM` \ rs ->
+ returnMM (r:rs)
+
+mapAccumMM :: (acc -> x -> MassageM (acc, y)) -> acc -> [x] -> MassageM (acc, [y])
+mapAccumMM f b [] = returnMM (b, [])
+mapAccumMM f b (m:ms)
+ = f b m `thenMM` \ (b2, r) ->
+ mapAccumMM f b2 ms `thenMM` \ (b3, rs) ->
+ returnMM (b3, r:rs)
+
+getUniqueMM :: MassageM Unique
+getUniqueMM mod scope_cc us ids ccs = (ccs, uniqFromSupply us)
+
+addTopLevelIshId :: Id -> MassageM a -> MassageM a
+addTopLevelIshId id scope mod scope_cc us ids ccs
+ | isCurrentCCS scope_cc = scope mod scope_cc us ids ccs
+ | otherwise = scope mod scope_cc us (extendVarSet ids id) ccs
+
+addTopLevelIshIds :: [Id] -> MassageM a -> MassageM a
+addTopLevelIshIds [] cont = cont
+addTopLevelIshIds (id:ids) cont
+ = addTopLevelIshId id (addTopLevelIshIds ids cont)
+
+getTopLevelIshIds :: MassageM VarSet
+getTopLevelIshIds mod scope_cc us ids ccs = (ccs, ids)
+\end{code}
+
+The prevailing CCS is used to tell whether we're in a top-levelish
+position, where top-levelish is defined as "not inside a lambda".
+Prevailing CCs used to be used for something much more complicated,
+I'm sure --SDM
+
+\begin{code}
+set_lambda_cc :: MassageM a -> MassageM a
+set_lambda_cc action mod scope_cc us ids ccs
+ = action mod currentCCS us ids ccs
+
+set_prevailing_cc :: CostCentreStack -> MassageM a -> MassageM a
+set_prevailing_cc cc_to_set_to action mod scope_cc us ids ccs
+ = action mod cc_to_set_to us ids ccs
+
+get_prevailing_cc :: MassageM CostCentreStack
+get_prevailing_cc mod scope_cc us ids ccs = (ccs, scope_cc)
+\end{code}
+
+\begin{code}
+collectCC :: CostCentre -> MassageM ()
+
+collectCC cc mod_name scope_cc us ids (local_ccs, extern_ccs, ccss)
+ = ASSERT(not (noCCAttached cc))
+ if (cc `ccFromThisModule` mod_name) then
+ ((cc : local_ccs, extern_ccs, ccss), ())
+ else -- must declare it "extern"
+ ((local_ccs, cc : extern_ccs, ccss), ())
+
+collectCCS :: CostCentreStack -> MassageM ()
+
+collectCCS ccs mod_name scope_cc us ids (local_ccs, extern_ccs, ccss)
+ = ASSERT(not (noCCSAttached ccs))
+ ((local_ccs, extern_ccs, ccs : ccss), ())
+\end{code}