diff options
Diffstat (limited to 'compiler/profiling')
-rw-r--r-- | compiler/profiling/CostCentre.lhs | 373 | ||||
-rw-r--r-- | compiler/profiling/NOTES | 301 | ||||
-rw-r--r-- | compiler/profiling/SCCfinal.lhs | 411 |
3 files changed, 1085 insertions, 0 deletions
diff --git a/compiler/profiling/CostCentre.lhs b/compiler/profiling/CostCentre.lhs new file mode 100644 index 0000000000..3ee46a88db --- /dev/null +++ b/compiler/profiling/CostCentre.lhs @@ -0,0 +1,373 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[CostCentre]{The @CostCentre@ data type} + +\begin{code} +module CostCentre ( + CostCentre(..), CcName, IsDupdCC(..), IsCafCC(..), + -- All abstract except to friend: ParseIface.y + + CostCentreStack, + CollectedCCs, + noCCS, subsumedCCS, currentCCS, overheadCCS, dontCareCCS, + noCostCentre, noCCAttached, + noCCSAttached, isCurrentCCS, isSubsumedCCS, currentOrSubsumedCCS, + isDerivedFromCurrentCCS, maybeSingletonCCS, + decomposeCCS, + + mkUserCC, mkAutoCC, mkAllCafsCC, + mkSingletonCCS, dupifyCC, pushCCOnCCS, + isCafCCS, isCafCC, + isSccCountCostCentre, + sccAbleCostCentre, + ccFromThisModule, + + pprCostCentreCore, + costCentreUserName, + + cmpCostCentre -- used for removing dups in a list + ) where + +#include "HsVersions.h" + +import Var ( Id ) +import Name ( getOccName, occNameFS ) +import Module ( Module, moduleFS ) +import Outputable +import FastTypes +import FastString +import Util ( thenCmp ) +\end{code} + +A Cost Centre Stack is something that can be attached to a closure. +This is either: + + - the current cost centre stack (CCCS) + - a pre-defined cost centre stack (there are several + pre-defined CCSs, see below). + +\begin{code} +data CostCentreStack + = NoCCS + + | CurrentCCS -- Pinned on a let(rec)-bound + -- thunk/function/constructor, this says that the + -- cost centre to be attached to the object, when it + -- is allocated, is whatever is in the + -- current-cost-centre-stack register. + + | SubsumedCCS -- Cost centre stack for top-level subsumed functions + -- (CAFs get an AllCafsCC). + -- Its execution costs get subsumed into the caller. + -- This guy is *only* ever pinned on static closures, + -- and is *never* the cost centre for an SCC construct. + + | OverheadCCS -- We charge costs due to the profiling-system + -- doing its work to "overhead". + -- + -- Objects whose CCS is "Overhead" + -- have their *allocation* charged to "overhead", + -- but have the current CCS put into the object + -- itself. + + -- For example, if we transform "f g" to "let + -- g' = g in f g'" (so that something about + -- profiling works better...), then we charge + -- the *allocation* of g' to OverheadCCS, but + -- we put the cost-centre of the call to f + -- (i.e., current CCS) into the g' object. When + -- g' is entered, the CCS of the call + -- to f will be set. + + | DontCareCCS -- We need a CCS to stick in static closures + -- (for data), but we *don't* expect them to + -- accumulate any costs. But we still need + -- the placeholder. This CCS is it. + + | PushCC CostCentre CostCentreStack + -- These are used during code generation as the CCSs + -- attached to closures. A PushCC never appears as + -- the argument to an _scc_. + -- + -- The tail (2nd argument) is either NoCCS, indicating + -- a staticly allocated CCS, or CurrentCCS indicating + -- a dynamically created CCS. We only support + -- statically allocated *singleton* CCSs at the + -- moment, for the purposes of initialising the CCS + -- field of a CAF. + + deriving (Eq, Ord) -- needed for Ord on CLabel +\end{code} + +A Cost Centre is the argument of an _scc_ expression. + +\begin{code} +data CostCentre + = NoCostCentre -- Having this constructor avoids having + -- to use "Maybe CostCentre" all the time. + + | NormalCC { + cc_name :: CcName, -- Name of the cost centre itself + cc_mod :: Module, -- Name of module defining this CC. + cc_is_dupd :: IsDupdCC, -- see below + cc_is_caf :: IsCafCC -- see below + } + + | AllCafsCC { + cc_mod :: Module -- Name of module defining this CC. + } + +type CcName = FastString + +data IsDupdCC + = OriginalCC -- This says how the CC is *used*. Saying that + | DupdCC -- it is DupdCC doesn't make it a different + -- CC, just that it a sub-expression which has + -- been moved ("dupd") into a different scope. + -- + -- The point about a dupd SCC is that we don't + -- count entries to it, because it's not the + -- "original" one. + -- + -- In the papers, it's called "SCCsub", + -- i.e. SCCsub CC == SCC DupdCC, + -- but we are trying to avoid confusion between + -- "subd" and "subsumed". So we call the former + -- "dupd". + +data IsCafCC = CafCC | NotCafCC + +-- synonym for triple which describes the cost centre info in the generated +-- code for a module. +type CollectedCCs + = ( [CostCentre] -- local cost-centres that need to be decl'd + , [CostCentre] -- "extern" cost-centres + , [CostCentreStack] -- pre-defined "singleton" cost centre stacks + ) +\end{code} + +WILL: Would there be any merit to recording ``I am now using a +cost-centre from another module''? I don't know if this would help a +user; it might be interesting to us to know how much computation is +being moved across module boundaries. + +SIMON: Maybe later... + +\begin{code} + +noCCS = NoCCS +subsumedCCS = SubsumedCCS +currentCCS = CurrentCCS +overheadCCS = OverheadCCS +dontCareCCS = DontCareCCS + +noCostCentre = NoCostCentre +\end{code} + +Predicates on Cost-Centre Stacks + +\begin{code} +noCCSAttached NoCCS = True +noCCSAttached _ = False + +noCCAttached NoCostCentre = True +noCCAttached _ = False + +isCurrentCCS CurrentCCS = True +isCurrentCCS _ = False + +isSubsumedCCS SubsumedCCS = True +isSubsumedCCS _ = False + +isCafCCS (PushCC cc NoCCS) = isCafCC cc +isCafCCS _ = False + +isDerivedFromCurrentCCS CurrentCCS = True +isDerivedFromCurrentCCS (PushCC _ ccs) = isDerivedFromCurrentCCS ccs +isDerivedFromCurrentCCS _ = False + +currentOrSubsumedCCS SubsumedCCS = True +currentOrSubsumedCCS CurrentCCS = True +currentOrSubsumedCCS _ = False + +maybeSingletonCCS (PushCC cc NoCCS) = Just cc +maybeSingletonCCS _ = Nothing +\end{code} + +Building cost centres + +\begin{code} +mkUserCC :: FastString -> Module -> CostCentre +mkUserCC cc_name mod + = NormalCC { cc_name = cc_name, cc_mod = mod, + cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-} + } + +mkAutoCC :: Id -> Module -> IsCafCC -> CostCentre +mkAutoCC id mod is_caf + = NormalCC { cc_name = occNameFS (getOccName id), cc_mod = mod, + cc_is_dupd = OriginalCC, cc_is_caf = is_caf + } + +mkAllCafsCC m = AllCafsCC { cc_mod = m } + + + +mkSingletonCCS :: CostCentre -> CostCentreStack +mkSingletonCCS cc = pushCCOnCCS cc NoCCS + +pushCCOnCCS :: CostCentre -> CostCentreStack -> CostCentreStack +pushCCOnCCS = PushCC + +dupifyCC cc = cc {cc_is_dupd = DupdCC} + +isCafCC, isDupdCC :: CostCentre -> Bool + +isCafCC (AllCafsCC {}) = True +isCafCC (NormalCC {cc_is_caf = CafCC}) = True +isCafCC _ = False + +isDupdCC (NormalCC {cc_is_dupd = DupdCC}) = True +isDupdCC _ = False + +isSccCountCostCentre :: CostCentre -> Bool + -- Is this a cost-centre which records scc counts + +#if DEBUG +isSccCountCostCentre NoCostCentre = panic "isSccCount:NoCostCentre" +#endif +isSccCountCostCentre cc | isCafCC cc = False + | isDupdCC cc = False + | otherwise = True + +sccAbleCostCentre :: CostCentre -> Bool + -- Is this a cost-centre which can be sccd ? + +#if DEBUG +sccAbleCostCentre NoCostCentre = panic "sccAbleCC:NoCostCentre" +#endif +sccAbleCostCentre cc | isCafCC cc = False + | otherwise = True + +ccFromThisModule :: CostCentre -> Module -> Bool +ccFromThisModule cc m = cc_mod cc == m +\end{code} + +\begin{code} +instance Eq CostCentre where + c1 == c2 = case c1 `cmpCostCentre` c2 of { EQ -> True; _ -> False } + +instance Ord CostCentre where + compare = cmpCostCentre + +cmpCostCentre :: CostCentre -> CostCentre -> Ordering + +cmpCostCentre (AllCafsCC {cc_mod = m1}) (AllCafsCC {cc_mod = m2}) = m1 `compare` m2 + +cmpCostCentre (NormalCC {cc_name = n1, cc_mod = m1, cc_is_caf = c1}) + (NormalCC {cc_name = n2, cc_mod = m2, cc_is_caf = c2}) + -- first key is module name, then we use "kinds" (which include + -- names) and finally the caf flag + = (m1 `compare` m2) `thenCmp` (n1 `compare` n2) `thenCmp` (c1 `cmp_caf` c2) + +cmpCostCentre other_1 other_2 + = let + tag1 = tag_CC other_1 + tag2 = tag_CC other_2 + in + if tag1 <# tag2 then LT else GT + where + tag_CC (NormalCC {}) = (_ILIT 1 :: FastInt) + tag_CC (AllCafsCC {}) = _ILIT 2 + +cmp_caf NotCafCC CafCC = LT +cmp_caf NotCafCC NotCafCC = EQ +cmp_caf CafCC CafCC = EQ +cmp_caf CafCC NotCafCC = GT + +decomposeCCS :: CostCentreStack -> ([CostCentre],CostCentreStack) +decomposeCCS (PushCC cc ccs) = (cc:more, ccs') + where (more,ccs') = decomposeCCS ccs +decomposeCCS ccs = ([],ccs) +\end{code} + +----------------------------------------------------------------------------- +Printing Cost Centre Stacks. + +The outputable instance for CostCentreStack prints the CCS as a C +expression. + +NOTE: Not all cost centres are suitable for using in a static +initializer. In particular, the PushCC forms where the tail is CCCS +may only be used in inline C code because they expand to a +non-constant C expression. + +\begin{code} +instance Outputable CostCentreStack where + ppr NoCCS = ptext SLIT("NO_CCS") + ppr CurrentCCS = ptext SLIT("CCCS") + ppr OverheadCCS = ptext SLIT("CCS_OVERHEAD") + ppr DontCareCCS = ptext SLIT("CCS_DONT_CARE") + ppr SubsumedCCS = ptext SLIT("CCS_SUBSUMED") + ppr (PushCC cc NoCCS) = ppr cc <> ptext SLIT("_ccs") + ppr (PushCC cc ccs) = ptext SLIT("PushCostCentre") <> + parens (ppr ccs <> comma <> + parens(ptext SLIT("void *")) <> ppr cc) +\end{code} + +----------------------------------------------------------------------------- +Printing Cost Centres. + +There are several different ways in which we might want to print a +cost centre: + + - the name of the cost centre, for profiling output (a C string) + - the label, i.e. C label for cost centre in .hc file. + - the debugging name, for output in -ddump things + - the interface name, for printing in _scc_ exprs in iface files. + +The last 3 are derived from costCentreStr below. The first is given +by costCentreName. + +\begin{code} +instance Outputable CostCentre where + ppr cc = getPprStyle $ \ sty -> + if codeStyle sty + then ppCostCentreLbl cc + else text (costCentreUserName cc) + +-- Printing in an interface file or in Core generally +pprCostCentreCore (AllCafsCC {cc_mod = m}) + = text "__sccC" <+> braces (ppr_mod m) +pprCostCentreCore (NormalCC {cc_name = n, cc_mod = m, + cc_is_caf = caf, cc_is_dupd = dup}) + = text "__scc" <+> braces (hsep [ + ftext (zEncodeFS n), + ppr_mod m, + pp_dup dup, + pp_caf caf + ]) + +pp_dup DupdCC = char '!' +pp_dup other = empty + +pp_caf CafCC = text "__C" +pp_caf other = empty + +ppr_mod m = ftext (zEncodeFS (moduleFS m)) + +-- Printing as a C label +ppCostCentreLbl (NoCostCentre) = text "NONE_cc" +ppCostCentreLbl (AllCafsCC {cc_mod = m}) = ppr m <> text "_CAFs_cc" +ppCostCentreLbl (NormalCC {cc_name = n, cc_mod = m, cc_is_caf = is_caf}) + = ppr_mod m <> ftext (zEncodeFS n) <> + text (case is_caf of { CafCC -> "_CAF"; _ -> "" }) <> text "_cc" + +-- This is the name to go in the user-displayed string, +-- recorded in the cost centre declaration +costCentreUserName (NoCostCentre) = "NO_CC" +costCentreUserName (AllCafsCC {}) = "CAF" +costCentreUserName cc@(NormalCC {cc_name = name, cc_is_caf = is_caf}) + = case is_caf of { CafCC -> "CAF:"; _ -> "" } ++ unpackFS name +\end{code} diff --git a/compiler/profiling/NOTES b/compiler/profiling/NOTES new file mode 100644 index 0000000000..c50cf562e3 --- /dev/null +++ b/compiler/profiling/NOTES @@ -0,0 +1,301 @@ +Profiling Implementation Notes -- June/July/Sept 1994 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Simon and Will + +Pre-code-generator-ish +~~~~~~~~~~~~~~~~~~~~~~ + +* Automagic insertion of _sccs_ on... + + - If -auto is specified, add _scc_ on each *exported* top-level definition. + NB this includes CAFs. Done by addAutoCostCentres (Core-to-Core pass). + + - If -auto-all is specified, add _scc_ on *all* top-level definitions. + Done by same pass. + + - Always: just before code generation of module M, onto any CAF + which hasn't already got an explicit cost centre attached, pin + "AllCAFs-M". + + Done by finalStgMassageForProfiling (final STG-to-STG pass) + + Only the one-off costs of evaluating the CAFs will be attributed + to the AllCAFs-M cost centre. We hope that these costs will be + small; since the _scc_s are introduced automatically it's + confusing to attribute any significant costs to them. However if + there *are* significant one-off costs we'd better know about it. + + Why so late in the compilation process? We aren't *absolutely* + sure what is and isn't a CAF until *just* before code generation. + So we don't want to mark them as such until then. + + - Individual DICTs + + We do it in the desugarer, because that's the *only* point at + which we *know* exactly what bindings are introduced by + overloading. NB should include bindings for selected methods, eg + + f d = let op = _scc_ DICT op_sel d in + ...op...op...op + + The DICT CC ensures that: + (a) [minor] that the selection cost is separately attributed + (b) [major] that the cost of executing op is attributed to + its call site, eg + + ...(scc "a" op)...(scc "b" op)...(scc "c" op)... + +* Automagic "boxing" of higher-order args: + + finalStgMassageForProfiling (final STG-to-STG pass) + + This (as well as CAF stuff above) is really quite separate + from the other business of finalStgMassageForProfiling + (collecting up CostCentres that need to be + declared/registered). + + But throwing it all into the pot together means that we don't + have to have Yet Another STG Syntax Walker. + + Furthermore, these "boxes" are really just let-bindings that + many other parts of the compiler will happily substitute away! + Doing them at the very last instant prevents this. + + A down side of doing these so late is that we get lots of + "let"s, which if generated earlier and not substituted away, + could be floated outwards. Having them floated outwards would + lessen the chance of skewing profiling results (because of + gratuitous "let"s added by the compiler into the inner loop of + some program...). The allocation itself will be attributed to + profiling overhead; the only thing which'll be skewed is time measurement. + + So if we have, post-boxing-higher-order-args... + + _scc_ "foo" ( let f' = [f] \ [] f + in + map f' xs ) + + ... we want "foo" to be put in the thunk for "f'", but we want the + allocation cost (heap census stuff) to be attr to OVERHEAD. + + As an example of what could be improved + f = _scc_ "f" (g h) + To save dynamic allocation, we could have a static closure for h: + h_inf = _scc_ "f" h + f = _scc_ "f" (g h_inf) + + + + + +Code generator-ish +~~~~~~~~~~~~~~~~~~ + +(1) _Entry_ code for a closure *usually* sets CC from the closure, + at the fast entry point + + Exceptions: + + (a) Top-level subsumed functions (i.e., w/ no _scc_ on them) + + Refrain from setting CC from the closure + + (b) Constructors + + Again, refrain. (This is *new*) + + Reasons: (i) The CC will be zapped very shortly by the restore + of the enclosing CC when we return to the eval'ing "case". + (ii) Any intervening updates will indirect to this existing + constructor (...mumble... new update mechanism... mumble...) + +(2) "_scc_ cc expr" + + Set current CC to "cc". + No later "restore" of the previous CC is reqd. + +(3) "case e of { ...alts... }" expression (eval) + + Save CC before eval'ing scrutinee + Restore CC at the start of the case-alternative(s) + +(4) _Updates_ : updatee gets current CC + + (???? not sure this is OK yet 94/07/04) + + Reasons: + + * Constructors : want to be insensitive to return-in-heap vs + return-in-regs. For example, + + f x = _scc_ "f" (x, x) + + The pair (x,x) would get CC of "f" if returned-in-heap; + therefore, updatees should get CC of "f". + + * PAPs : Example: + + f x = _scc_ "f" (let g = \ y -> ... in g) + + At the moment of update (updatePAP?), CC is "f", which + is what we want to set it to if the "updatee" is entered + + When we enter the PAP ("please put the arguments back so I can + use them"), we restore the setup as at the moment the + arg-satisfaction check failed. + + Be careful! UPDATE_PAP is called from the arg-satis check, + which is before the fast entry point. So the cost centre + won't yet have been set from the closure which has just + been entered. Solution: in UPDATE_PAP see if the cost centre inside + the function closure which is being entered is "SUB"; if so, use + the current cost centre to update the updatee; otherwise use that + inside the function closure. (See the computation of cc_pap + in rule 16_l for lexical semantics.) + + +(5) CAFs + +CAFs get their own cost centre. Ie + + x = e +is transformed to + x = _scc_ "CAF:x" e + +Or sometimes we lump all the CAFs in a module together. +(Reporting issue or code-gen issue?) + + + +Hybrid stuff +~~~~~~~~~~~~ + +The problem: + + f = _scc_ "CAF:f" (let g = \xy -> ... + in (g,g)) + +Now, g has cost-centre "CAF:f", and is returned as part of +the result. So whenever the function embedded in the result +is called, the costs will accumulate to "CAF:f". This is +particularly (de)pressing for dictionaries, which contain lots +of functions. + +Solution: + + A. Whenever in case (1) above we would otherwise "set the CC from the + closure", we *refrain* from doing so if + (a) the closure is a function, not a thunk; and + (b) the cost-centre in the closure is a CAF cost centre. + + B. Whenever we enter a thunk [at least, one which might return a function] + we save the current cost centre in the update frame. Then, UPDATE_PAP + restores the saved cost centre from the update frame iff the cost + centre at the point of update (cc_pap in (4) above) is a CAF cost centre. + + It isn't necessary to save and possibly-restore the cost centre for + thunks which will certainly return a constructor, because the + cost centre is about to be restored anyway by the enclosing case. + +Both A and B are runtime tests. For A, consider: + + f = _scc_ "CAF:f" (g 2) + + h y = _scc_ "h" g (y+y) + + g x = let w = \p -> ... + in (w,w) + + +Now, in the call to g from h, the cost-centre on w will be "h", and +indeed all calls to the result of the call should be attributed to +"h". + + ... _scc_ "x1" (let (t,_) = h 2 in t 3) ... + + Costs of executing (w 3) attributed to "h". + +But in the call to g from f, the cost-centre on w will be +"CAF:f", and calls to w should be attributed to the call site. + + ..._scc_ "x2" (let (t,_) = f in t 3)... + + Costs of executing (w 3) attributed to "x2". + + + Remaining problem + +Consider + + _scc_ "CAF:f" (if expensive then g 2 else g 3) + +where g is a function with arity 2. In theory we should +restore the enclosing cost centre once we've reduced to +(g 2) or (g 3). In practice this is pretty tiresome; and pretty rare. + +A quick fix: given (_scc_ "CAF" e) where e might be function-valued +(in practice we usually know, because CAF sccs are top level), transform to + + _scc_ "CAF" (let f = e in f) + + + + + +============ + +scc cc x ===> x + + UNLESS + +(a) cc is a user-defined, non-dup'd cost + centre (so we care about entry counts) + +OR + +(b) cc is not a CAF/DICT cost centre and x is top-level subsumed + function. + [If x is lambda/let bound it'll have a cost centre + attached dynamically.] + + To repeat, the transformation is OK if + x is a not top-level subsumed function + OR + cc is a CAF/DICT cost centre and x is a top-level + subsumed function + + + +(scc cc e) x ===> (scc cc e x) + + OK????? IFF + +cc is not CAF/DICT --- remains to be proved!!!!!! +True for lex +False for eval +Can we tell which in hybrid? + +eg Is this ok? + + (scc "f" (scc "CAF" (\x.b))) y ==> (scc "f" (scc "CAF" (\x.b) y)) + + +\x -> (scc cc e) ===> (scc cc \x->e) + + OK IFF cc is not CAF/DICT + + +scc cc1 (scc cc2 e)) ===> scc cc2 e + + IFF not interested in cc1's entry count + AND cc2 is not CAF/DICT + +(scc cc1 ... (scc cc2 e) ...) ===> (scc cc1 ... e ...) + + IFF cc2 is CAF/DICT + AND e is a lambda not appearing as the RHS of a let + OR + e is a variable not bound to SUB + + diff --git a/compiler/profiling/SCCfinal.lhs b/compiler/profiling/SCCfinal.lhs new file mode 100644 index 0000000000..c95db9c358 --- /dev/null +++ b/compiler/profiling/SCCfinal.lhs @@ -0,0 +1,411 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[SCCfinal]{Modify and collect code generation for final STG program} + +This is now a sort-of-normal STG-to-STG pass (WDP 94/06), run by stg2stg. + +* Traverses the STG program collecting the cost centres. These are + required to declare the cost centres at the start of code + generation. + + Note: because of cross-module unfolding, some of these cost centres + may be from other modules. But will still have to give them + "extern" declarations. + +* Puts on CAF cost-centres if the user has asked for individual CAF + cost-centres. + +* Ditto for individual DICT cost-centres. + +* Boxes top-level inherited functions passed as arguments. + +* "Distributes" given cost-centres to all as-yet-unmarked RHSs. + +\begin{code} +module SCCfinal ( stgMassageForProfiling ) where + +#include "HsVersions.h" + +import StgSyn + +import Packages ( HomeModules ) +import StaticFlags ( opt_AutoSccsOnIndividualCafs ) +import CostCentre -- lots of things +import Id ( Id ) +import Module ( Module ) +import UniqSupply ( uniqFromSupply, splitUniqSupply, UniqSupply ) +import Unique ( Unique ) +import VarSet +import ListSetOps ( removeDups ) +import Outputable + +infixr 9 `thenMM`, `thenMM_` +\end{code} + +\begin{code} +stgMassageForProfiling + :: HomeModules + -> Module -- module name + -> UniqSupply -- unique supply + -> [StgBinding] -- input + -> (CollectedCCs, [StgBinding]) + +stgMassageForProfiling pdeps mod_name us stg_binds + = let + ((local_ccs, extern_ccs, cc_stacks), + stg_binds2) + = initMM mod_name us (do_top_bindings stg_binds) + + (fixed_ccs, fixed_cc_stacks) + = if opt_AutoSccsOnIndividualCafs + then ([],[]) -- don't need "all CAFs" CC + -- (for Prelude, we use PreludeCC) + else ([all_cafs_cc], [all_cafs_ccs]) + + local_ccs_no_dups = fst (removeDups cmpCostCentre local_ccs) + extern_ccs_no_dups = fst (removeDups cmpCostCentre extern_ccs) + in + ((fixed_ccs ++ local_ccs_no_dups, + extern_ccs_no_dups, + fixed_cc_stacks ++ cc_stacks), stg_binds2) + where + + all_cafs_cc = mkAllCafsCC mod_name + all_cafs_ccs = mkSingletonCCS all_cafs_cc + + ---------- + do_top_bindings :: [StgBinding] -> MassageM [StgBinding] + + do_top_bindings [] = returnMM [] + + do_top_bindings (StgNonRec b rhs : bs) + = do_top_rhs b rhs `thenMM` \ rhs' -> + addTopLevelIshId b ( + do_top_bindings bs `thenMM` \bs' -> + returnMM (StgNonRec b rhs' : bs') + ) + + do_top_bindings (StgRec pairs : bs) + = addTopLevelIshIds binders ( + mapMM do_pair pairs `thenMM` \ pairs2 -> + do_top_bindings bs `thenMM` \ bs' -> + returnMM (StgRec pairs2 : bs') + ) + where + binders = map fst pairs + do_pair (b, rhs) + = do_top_rhs b rhs `thenMM` \ rhs2 -> + returnMM (b, rhs2) + + ---------- + do_top_rhs :: Id -> StgRhs -> MassageM StgRhs + + do_top_rhs binder (StgRhsClosure _ bi fv u srt [] (StgSCC cc (StgConApp con args))) + | not (isSccCountCostCentre cc) && not (isDllConApp pdeps con args) + -- Trivial _scc_ around nothing but static data + -- Eliminate _scc_ ... and turn into StgRhsCon + + -- isDllConApp checks for LitLit args too + = returnMM (StgRhsCon dontCareCCS con args) + +{- Can't do this one with cost-centre stacks: --SDM + do_top_rhs binder (StgRhsClosure no_cc bi fv u [] (StgSCC ty cc expr)) + | (noCCSAttached no_cc || currentOrSubsumedCCS no_cc) + && not (isSccCountCostCentre cc) + -- Top level CAF without a cost centre attached + -- Attach and collect cc of trivial _scc_ in body + = collectCC cc `thenMM_` + set_prevailing_cc cc (do_expr expr) `thenMM` \ expr' -> + returnMM (StgRhsClosure cc bi fv u [] expr') +-} + + do_top_rhs binder (StgRhsClosure no_cc bi fv u srt [] body) + | noCCSAttached no_cc || currentOrSubsumedCCS no_cc + -- Top level CAF without a cost centre attached + -- Attach CAF cc (collect if individual CAF ccs) + = (if opt_AutoSccsOnIndividualCafs + then let cc = mkAutoCC binder mod_name CafCC + ccs = mkSingletonCCS cc + in + collectCC cc `thenMM_` + collectCCS ccs `thenMM_` + returnMM ccs + else + returnMM all_cafs_ccs) `thenMM` \ caf_ccs -> + set_prevailing_cc caf_ccs (do_expr body) `thenMM` \ body' -> + returnMM (StgRhsClosure caf_ccs bi fv u srt [] body') + + do_top_rhs binder (StgRhsClosure cc bi fv u srt [] body) + -- Top level CAF with cost centre attached + -- Should this be a CAF cc ??? Does this ever occur ??? + = pprPanic "SCCfinal: CAF with cc:" (ppr cc) + + do_top_rhs binder (StgRhsClosure no_ccs bi fv u srt args body) + -- Top level function, probably subsumed + | noCCSAttached no_ccs + = set_lambda_cc (do_expr body) `thenMM` \ body' -> + returnMM (StgRhsClosure subsumedCCS bi fv u srt args body') + + | otherwise + = pprPanic "SCCfinal: CAF with cc:" (ppr no_ccs) + + do_top_rhs binder (StgRhsCon ccs con args) + -- Top-level (static) data is not counted in heap + -- profiles; nor do we set CCCS from it; so we + -- just slam in dontCareCostCentre + = returnMM (StgRhsCon dontCareCCS con args) + + ------ + do_expr :: StgExpr -> MassageM StgExpr + + do_expr (StgLit l) = returnMM (StgLit l) + + do_expr (StgApp fn args) + = boxHigherOrderArgs (StgApp fn) args + + do_expr (StgConApp con args) + = boxHigherOrderArgs (\args -> StgConApp con args) args + + do_expr (StgOpApp con args res_ty) + = boxHigherOrderArgs (\args -> StgOpApp con args res_ty) args + + do_expr (StgSCC cc expr) -- Ha, we found a cost centre! + = collectCC cc `thenMM_` + do_expr expr `thenMM` \ expr' -> + returnMM (StgSCC cc expr') + + do_expr (StgCase expr fv1 fv2 bndr srt alt_type alts) + = do_expr expr `thenMM` \ expr' -> + mapMM do_alt alts `thenMM` \ alts' -> + returnMM (StgCase expr' fv1 fv2 bndr srt alt_type alts') + where + do_alt (id, bs, use_mask, e) + = do_expr e `thenMM` \ e' -> + returnMM (id, bs, use_mask, e') + + do_expr (StgLet b e) + = do_let b e `thenMM` \ (b,e) -> + returnMM (StgLet b e) + + do_expr (StgLetNoEscape lvs1 lvs2 b e) + = do_let b e `thenMM` \ (b,e) -> + returnMM (StgLetNoEscape lvs1 lvs2 b e) + +#ifdef DEBUG + do_expr other = pprPanic "SCCfinal.do_expr" (ppr other) +#endif + + ---------------------------------- + + do_let (StgNonRec b rhs) e + = do_rhs rhs `thenMM` \ rhs' -> + addTopLevelIshId b ( + do_expr e `thenMM` \ e' -> + returnMM (StgNonRec b rhs',e') + ) + + do_let (StgRec pairs) e + = addTopLevelIshIds binders ( + mapMM do_pair pairs `thenMM` \ pairs' -> + do_expr e `thenMM` \ e' -> + returnMM (StgRec pairs', e') + ) + where + binders = map fst pairs + do_pair (b, rhs) + = do_rhs rhs `thenMM` \ rhs2 -> + returnMM (b, rhs2) + + ---------------------------------- + do_rhs :: StgRhs -> MassageM StgRhs + -- We play much the same game as we did in do_top_rhs above; + -- but we don't have to worry about cafs etc. + +{- + do_rhs (StgRhsClosure closure_cc bi fv u [] (StgSCC ty cc (StgCon (DataCon con) args _))) + | not (isSccCountCostCentre cc) + = collectCC cc `thenMM_` + returnMM (StgRhsCon cc con args) +-} + + do_rhs (StgRhsClosure _ bi fv u srt args expr) + = slurpSCCs currentCCS expr `thenMM` \ (expr', ccs) -> + do_expr expr' `thenMM` \ expr'' -> + returnMM (StgRhsClosure ccs bi fv u srt args expr'') + where + slurpSCCs ccs (StgSCC cc e) + = collectCC cc `thenMM_` + slurpSCCs (cc `pushCCOnCCS` ccs) e + slurpSCCs ccs e + = returnMM (e, ccs) + + do_rhs (StgRhsCon cc con args) + = returnMM (StgRhsCon currentCCS con args) +\end{code} + +%************************************************************************ +%* * +\subsection{Boxing higher-order args} +%* * +%************************************************************************ + +Boxing is *turned off* at the moment, until we can figure out how to +do it properly in general. + +\begin{code} +boxHigherOrderArgs + :: ([StgArg] -> StgExpr) + -- An application lacking its arguments + -> [StgArg] -- arguments which we might box + -> MassageM StgExpr + +#ifndef PROF_DO_BOXING +boxHigherOrderArgs almost_expr args + = returnMM (almost_expr args) +#else +boxHigherOrderArgs almost_expr args + = getTopLevelIshIds `thenMM` \ ids -> + mapAccumMM (do_arg ids) [] args `thenMM` \ (let_bindings, new_args) -> + returnMM (foldr (mk_stg_let currentCCS) (almost_expr new_args) let_bindings) + where + --------------- + + do_arg ids bindings arg@(StgVarArg old_var) + | (not (isLocalVar old_var) || elemVarSet old_var ids) + && isFunTy (dropForAlls var_type) + = -- make a trivial let-binding for the top-level function + getUniqueMM `thenMM` \ uniq -> + let + new_var = mkSysLocal FSLIT("sf") uniq var_type + in + returnMM ( (new_var, old_var) : bindings, StgVarArg new_var ) + where + var_type = idType old_var + + do_arg ids bindings arg = returnMM (bindings, arg) + + --------------- + mk_stg_let :: CostCentreStack -> (Id, Id) -> StgExpr -> StgExpr + + mk_stg_let cc (new_var, old_var) body + = let + rhs_body = StgApp old_var [{-args-}] + rhs_closure = StgRhsClosure cc stgArgOcc [{-fvs-}] ReEntrant NoSRT{-eeek!!!-} [{-args-}] rhs_body + in + StgLet (StgNonRec new_var rhs_closure) body + where + bOGUS_LVs = emptyUniqSet -- easier to print than: panic "mk_stg_let: LVs" +#endif +\end{code} + +%************************************************************************ +%* * +\subsection{Boring monad stuff for this} +%* * +%************************************************************************ + +\begin{code} +type MassageM result + = Module -- module name + -> CostCentreStack -- prevailing CostCentre + -- if none, subsumedCosts at top-level + -- currentCostCentre at nested levels + -> UniqSupply + -> VarSet -- toplevel-ish Ids for boxing + -> CollectedCCs + -> (CollectedCCs, result) + +-- the initMM function also returns the final CollectedCCs + +initMM :: Module -- module name, which we may consult + -> UniqSupply + -> MassageM a + -> (CollectedCCs, a) + +initMM mod_name init_us m = m mod_name noCCS init_us emptyVarSet ([],[],[]) + +thenMM :: MassageM a -> (a -> MassageM b) -> MassageM b +thenMM_ :: MassageM a -> (MassageM b) -> MassageM b + +thenMM expr cont mod scope_cc us ids ccs + = case splitUniqSupply us of { (s1, s2) -> + case (expr mod scope_cc s1 ids ccs) of { (ccs2, result) -> + cont result mod scope_cc s2 ids ccs2 }} + +thenMM_ expr cont mod scope_cc us ids ccs + = case splitUniqSupply us of { (s1, s2) -> + case (expr mod scope_cc s1 ids ccs) of { (ccs2, _) -> + cont mod scope_cc s2 ids ccs2 }} + +returnMM :: a -> MassageM a +returnMM result mod scope_cc us ids ccs = (ccs, result) + +nopMM :: MassageM () +nopMM mod scope_cc us ids ccs = (ccs, ()) + +mapMM :: (a -> MassageM b) -> [a] -> MassageM [b] +mapMM f [] = returnMM [] +mapMM f (m:ms) + = f m `thenMM` \ r -> + mapMM f ms `thenMM` \ rs -> + returnMM (r:rs) + +mapAccumMM :: (acc -> x -> MassageM (acc, y)) -> acc -> [x] -> MassageM (acc, [y]) +mapAccumMM f b [] = returnMM (b, []) +mapAccumMM f b (m:ms) + = f b m `thenMM` \ (b2, r) -> + mapAccumMM f b2 ms `thenMM` \ (b3, rs) -> + returnMM (b3, r:rs) + +getUniqueMM :: MassageM Unique +getUniqueMM mod scope_cc us ids ccs = (ccs, uniqFromSupply us) + +addTopLevelIshId :: Id -> MassageM a -> MassageM a +addTopLevelIshId id scope mod scope_cc us ids ccs + | isCurrentCCS scope_cc = scope mod scope_cc us ids ccs + | otherwise = scope mod scope_cc us (extendVarSet ids id) ccs + +addTopLevelIshIds :: [Id] -> MassageM a -> MassageM a +addTopLevelIshIds [] cont = cont +addTopLevelIshIds (id:ids) cont + = addTopLevelIshId id (addTopLevelIshIds ids cont) + +getTopLevelIshIds :: MassageM VarSet +getTopLevelIshIds mod scope_cc us ids ccs = (ccs, ids) +\end{code} + +The prevailing CCS is used to tell whether we're in a top-levelish +position, where top-levelish is defined as "not inside a lambda". +Prevailing CCs used to be used for something much more complicated, +I'm sure --SDM + +\begin{code} +set_lambda_cc :: MassageM a -> MassageM a +set_lambda_cc action mod scope_cc us ids ccs + = action mod currentCCS us ids ccs + +set_prevailing_cc :: CostCentreStack -> MassageM a -> MassageM a +set_prevailing_cc cc_to_set_to action mod scope_cc us ids ccs + = action mod cc_to_set_to us ids ccs + +get_prevailing_cc :: MassageM CostCentreStack +get_prevailing_cc mod scope_cc us ids ccs = (ccs, scope_cc) +\end{code} + +\begin{code} +collectCC :: CostCentre -> MassageM () + +collectCC cc mod_name scope_cc us ids (local_ccs, extern_ccs, ccss) + = ASSERT(not (noCCAttached cc)) + if (cc `ccFromThisModule` mod_name) then + ((cc : local_ccs, extern_ccs, ccss), ()) + else -- must declare it "extern" + ((local_ccs, cc : extern_ccs, ccss), ()) + +collectCCS :: CostCentreStack -> MassageM () + +collectCCS ccs mod_name scope_cc us ids (local_ccs, extern_ccs, ccss) + = ASSERT(not (noCCSAttached ccs)) + ((local_ccs, extern_ccs, ccs : ccss), ()) +\end{code} |