summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-10-27 13:47:27 +0100
committerSimon Marlow <marlowsd@gmail.com>2011-11-02 16:34:05 +0000
commit7bb0447df9a783c222c2a077e35e5013c7c68d91 (patch)
tree78d6d2a14f7e42df5cda32199c71ced973f169ef /compiler/codeGen
parentbd72eeb184a95ae0ae79ccad19c8ccc2b45a12e0 (diff)
downloadhaskell-7bb0447df9a783c222c2a077e35e5013c7c68d91.tar.gz
Overhaul of infrastructure for profiling, coverage (HPC) and breakpoints
User visible changes ==================== Profilng -------- Flags renamed (the old ones are still accepted for now): OLD NEW --------- ------------ -auto-all -fprof-auto -auto -fprof-exported -caf-all -fprof-cafs New flags: -fprof-auto Annotates all bindings (not just top-level ones) with SCCs -fprof-top Annotates just top-level bindings with SCCs -fprof-exported Annotates just exported bindings with SCCs -fprof-no-count-entries Do not maintain entry counts when profiling (can make profiled code go faster; useful with heap profiling where entry counts are not used) Cost-centre stacks have a new semantics, which should in most cases result in more useful and intuitive profiles. If you find this not to be the case, please let me know. This is the area where I have been experimenting most, and the current solution is probably not the final version, however it does address all the outstanding bugs and seems to be better than GHC 7.2. Stack traces ------------ +RTS -xc now gives more information. If the exception originates from a CAF (as is common, because GHC tends to lift exceptions out to the top-level), then the RTS walks up the stack and reports the stack in the enclosing update frame(s). Result: +RTS -xc is much more useful now - but you still have to compile for profiling to get it. I've played around a little with adding 'head []' to GHC itself, and +RTS -xc does pinpoint the problem quite accurately. I plan to add more facilities for stack tracing (e.g. in GHCi) in the future. Coverage (HPC) -------------- * derived instances are now coloured yellow if they weren't used * likewise record field names * entry counts are more accurate (hpc --fun-entry-count) * tab width is now correct (markup was previously off in source with tabs) Internal changes ================ In Core, the Note constructor has been replaced by Tick (Tickish b) (Expr b) which is used to represent all the kinds of source annotation we support: profiling SCCs, HPC ticks, and GHCi breakpoints. Depending on the properties of the Tickish, different transformations apply to Tick. See CoreUtils.mkTick for details. Tickets ======= This commit closes the following tickets, test cases to follow: - Close #2552: not a bug, but the behaviour is now more intuitive (test is T2552) - Close #680 (test is T680) - Close #1531 (test is result001) - Close #949 (test is T949) - Close #2466: test case has bitrotted (doesn't compile against current version of vector-space package)
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/CgCallConv.hs15
-rw-r--r--compiler/codeGen/CgClosure.lhs31
-rw-r--r--compiler/codeGen/CgCon.lhs6
-rw-r--r--compiler/codeGen/CgExpr.lhs2
-rw-r--r--compiler/codeGen/CgHeapery.lhs8
-rw-r--r--compiler/codeGen/CgProf.hs195
-rw-r--r--compiler/codeGen/StgCmmBind.hs23
-rw-r--r--compiler/codeGen/StgCmmCon.hs4
-rw-r--r--compiler/codeGen/StgCmmExpr.hs2
-rw-r--r--compiler/codeGen/StgCmmHeap.hs5
-rw-r--r--compiler/codeGen/StgCmmProf.hs177
11 files changed, 82 insertions, 386 deletions
diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs
index 32f6727b04..345b65cd3c 100644
--- a/compiler/codeGen/CgCallConv.hs
+++ b/compiler/codeGen/CgCallConv.hs
@@ -28,6 +28,7 @@ module CgCallConv (
) where
import CgMonad
+import CgProf
import SMRep
import OldCmm
@@ -160,10 +161,16 @@ constructSlowCall amodes
-- fewer arguments than we currently have.
slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)]
slowArgs [] = []
-slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest
- where (arg_pat, args, rest) = matchSlowPattern amodes
- stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
-
+slowArgs amodes
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest
+ | otherwise = this_pat ++ slowArgs rest
+ where
+ (arg_pat, args, rest) = matchSlowPattern amodes
+ stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
+ this_pat = (NonPtrArg, mkLblExpr stg_ap_pat) : args
+ save_cccs = [(NonPtrArg, mkLblExpr save_cccs_lbl), (NonPtrArg, curCCS)]
+ save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
+
matchSlowPattern :: [(CgRep,CmmExpr)]
-> (FastString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)])
matchSlowPattern amodes = (arg_pat, these, rest)
diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs
index bccadb5a5d..32190a3c9c 100644
--- a/compiler/codeGen/CgClosure.lhs
+++ b/compiler/codeGen/CgClosure.lhs
@@ -108,7 +108,7 @@ cgStdRhsClosure
-> [StgArg] -- payload
-> FCode (Id, CgIdInfo)
-cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload
+cgStdRhsClosure bndr _cc _bndr_info _fvs _args _body lf_info payload
= do -- AHA! A STANDARD-FORM THUNK
{ -- LAY OUT THE OBJECT
amodes <- getArgAmodes payload
@@ -122,10 +122,10 @@ cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload
NoC_SRT -- No SRT for a std-form closure
descr
- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
-- BUILD THE OBJECT
- ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
+ ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets
-- RETURN
; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) }
@@ -197,9 +197,9 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do
; let
to_amode (info, offset) = do { amode <- idInfoToAmode info
; return (amode, offset) }
- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
; amodes_w_offsets <- mapFCs to_amode bind_details
- ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
+ ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets
-- RETURN
; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) }
@@ -239,16 +239,15 @@ So it should set up an update frame (if it is shared).
NB: Thunks cannot have a primitive type!
\begin{code}
-closureCodeBody _binder_info cl_info cc [{- No args i.e. thunk -}] body = do
+closureCodeBody _binder_info cl_info _cc [{- No args i.e. thunk -}] body = do
{ body_absC <- getCgStmts $ do
{ tickyEnterThunk cl_info
; ldvEnterClosure cl_info -- NB: Node always points when profiling
; thunkWrapper cl_info $ do
-- We only enter cc after setting up update so
-- that cc of enclosing scope will be recorded
- -- in update frame CAF/DICT functions will be
- -- subsumed by this enclosing cc
- { enterCostCentre cl_info cc body
+ -- in the update frame
+ { enterCostCentreThunk (CmmReg nodeReg)
; cgExpr body }
}
@@ -307,16 +306,14 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do
; bindArgsToStack stk_args
; setRealAndVirtualSp sp_top
- -- Enter the cost-centre, if required
- -- ToDo: It's not clear why this is outside the funWrapper,
- -- but the tickyEnterFun is inside. Perhaps we can put
- -- them together?
- ; enterCostCentre cl_info cc body
-
- -- Do the business
+ -- Do the business
; funWrapper cl_info reg_args reg_save_code $ do
{ tickyEnterFun cl_info
- ; cgExpr body }
+ ; enterCostCentreFun cc $
+ CmmMachOp mo_wordSub [ CmmReg nodeReg
+ , CmmLit (mkIntCLit (funTag cl_info)) ]
+
+ ; cgExpr body }
}
\end{code}
diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs
index 26489e5945..17bb9d0ad8 100644
--- a/compiler/codeGen/CgCon.lhs
+++ b/compiler/codeGen/CgCon.lhs
@@ -223,9 +223,9 @@ buildDynCon' _ binder ccs con args
where
lf_info = mkConLFInfo con
- use_cc -- cost-centre to stick in the object
- | currentOrSubsumedCCS ccs = curCCS
- | otherwise = CmmLit (mkCCostCentreStack ccs)
+ use_cc -- cost-centre to stick in the object
+ | isCurrentCCS ccs = curCCS
+ | otherwise = panic "buildDynCon: non-current CCS not implemented"
blame_cc = use_cc -- cost-centre on which to blame the alloc (same)
\end{code}
diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs
index 1f16c1feee..5ffa4e550d 100644
--- a/compiler/codeGen/CgExpr.lhs
+++ b/compiler/codeGen/CgExpr.lhs
@@ -258,7 +258,7 @@ SCC expressions are treated specially. They set the current cost
centre.
\begin{code}
-cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr
+cgExpr (StgSCC cc tick push expr) = do emitSetCCC cc tick push; cgExpr expr
\end{code}
%********************************************************
diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs
index ebdde2d31a..9e9cc8d1d8 100644
--- a/compiler/codeGen/CgHeapery.lhs
+++ b/compiler/codeGen/CgHeapery.lhs
@@ -575,13 +575,7 @@ allocDynClosure cl_info use_cc _blame_cc amodes_with_offsets
-- SAY WHAT WE ARE ABOUT TO DO
; profDynAlloc cl_info use_cc
- -- ToDo: This is almost certainly wrong
- -- We're ignoring blame_cc. But until we've
- -- fixed the boxing hack in chooseDynCostCentres etc,
- -- we're worried about making things worse by "fixing"
- -- this part to use blame_cc!
-
- ; tickyDynAlloc cl_info
+ ; tickyDynAlloc cl_info
-- ALLOCATE THE OBJECT
; base <- getHpRelOffset info_offset
diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs
index b58fbb4238..b43751361c 100644
--- a/compiler/codeGen/CgProf.hs
+++ b/compiler/codeGen/CgProf.hs
@@ -10,13 +10,13 @@ module CgProf (
mkCCostCentre, mkCCostCentreStack,
-- Cost-centre Profiling
- dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
- enterCostCentre, enterCostCentrePAP, enterCostCentreThunk,
- chooseDynCostCentres,
- costCentreFrom,
+ dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
+ enterCostCentreThunk,
+ enterCostCentreFun,
+ costCentreFrom,
curCCS, curCCSAddr,
emitCostCentreDecl, emitCostCentreStackDecl,
- emitSetCCC, emitCCS,
+ emitSetCCC,
-- Lag/drag/void stuff
ldvEnter, ldvEnterClosure, ldvRecordCreate
@@ -40,10 +40,8 @@ import OldCmm
import OldCmmUtils
import CLabel
-import Id
import qualified Module
import CostCentre
-import StgSyn
import StaticFlags
import FastString
import Module
@@ -108,6 +106,9 @@ profDynAlloc cl_info ccs
-- | Record the allocation of a closure (size is given by a CmmExpr)
-- The size must be in words, because the allocation counter in a CCS counts
-- in words.
+--
+-- This API is used by the @CCS_ALLOC()@ macro in @.cmm@ code.
+--
profAlloc :: CmmExpr -> CmmExpr -> Code
profAlloc words ccs
= ifProfiling $
@@ -121,160 +122,21 @@ profAlloc words ccs
where
alloc_rep = typeWidth REP_CostCentreStack_mem_alloc
--- ----------------------------------------------------------------------
--- Setting the cost centre in a new closure
-
-chooseDynCostCentres :: CostCentreStack
- -> [Id] -- Args
- -> StgExpr -- Body
- -> FCode (CmmExpr, CmmExpr)
--- Called when alllcating a closure
--- Tells which cost centre to put in the object, and which
--- to blame the cost of allocation on
-chooseDynCostCentres ccs args body = do
- -- Cost-centre we record in the object
- use_ccs <- emitCCS ccs
-
- -- Cost-centre on whom we blame the allocation
- let blame_ccs
- | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS)
- | otherwise = use_ccs
-
- return (use_ccs, blame_ccs)
-
-
--- Some CostCentreStacks are a sequence of pushes on top of CCCS.
--- These pushes must be performed before we can refer to the stack in
--- an expression.
-emitCCS :: CostCentreStack -> FCode CmmExpr
-emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's)
- where
- (cc's, ccs') = decomposeCCS ccs
-
- push_em ccs [] = return ccs
- push_em ccs (cc:rest) = do
- tmp <- newTemp bWord -- TODO FIXME NOW
- pushCostCentre tmp ccs cc
- push_em (CmmReg (CmmLocal tmp)) rest
-
-ccsExpr :: CostCentreStack -> CmmExpr
-ccsExpr ccs
- | isCurrentCCS ccs = curCCS
- | otherwise = CmmLit (mkCCostCentreStack ccs)
-
-
-isBox :: StgExpr -> Bool
--- If it's an utterly trivial RHS, then it must be
--- one introduced by boxHigherOrderArgs for profiling,
--- so we charge it to "OVERHEAD".
--- This looks like a GROSS HACK to me --SDM
-isBox (StgApp _ []) = True
-isBox _ = False
-
-
-- -----------------------------------------------------------------------
-- Setting the current cost centre on entry to a closure
--- For lexically scoped profiling we have to load the cost centre from
--- the closure entered, if the costs are not supposed to be inherited.
--- This is done immediately on entering the fast entry point.
-
--- Load current cost centre from closure, if not inherited.
--- Node is guaranteed to point to it, if profiling and not inherited.
-
-enterCostCentre
- :: ClosureInfo
- -> CostCentreStack
- -> StgExpr -- The RHS of the closure
- -> Code
-
--- We used to have a special case for bindings of form
--- f = g True
--- where g has arity 2. The RHS is a thunk, but we don't
--- need to update it; and we want to subsume costs.
--- We don't have these sort of PAPs any more, so the special
--- case has gone away.
-
-enterCostCentre closure_info ccs body
- = ifProfiling $
- ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs)
- enter_cost_centre closure_info ccs body
-
-enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> Code
-enter_cost_centre closure_info ccs body
- | isSubsumedCCS ccs
- = ASSERT(isToplevClosure closure_info)
- ASSERT(re_entrant)
- enter_ccs_fsub
-
- | isDerivedFromCurrentCCS ccs
- = do {
- if re_entrant && not is_box
- then
- enter_ccs_fun node_ccs
- else
- stmtC (CmmStore curCCSAddr node_ccs)
-
- -- don't forget to bump the scc count. This closure might have been
- -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal
- -- pass has turned into simply let x = e in ...x... and attached
- -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that
- -- we don't lose the scc counter, bump it in the entry code for x.
- -- ToDo: for a multi-push we should really bump the counter for
- -- each of the intervening CCSs, not just the top one.
- ; when (not (isCurrentCCS ccs)) $
- stmtC (bumpSccCount curCCS)
- }
-
- | isCafCCS ccs
- = ASSERT(isToplevClosure closure_info)
- ASSERT(not re_entrant)
- do { -- This is just a special case of the isDerivedFromCurrentCCS
- -- case above. We could delete this, but it's a micro
- -- optimisation and saves a bit of code.
- stmtC (CmmStore curCCSAddr enc_ccs)
- ; stmtC (bumpSccCount node_ccs)
- }
-
- | otherwise
- = panic "enterCostCentre"
- where
- enc_ccs = CmmLit (mkCCostCentreStack ccs)
- re_entrant = closureReEntrant closure_info
- node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag))
- is_box = isBox body
-
- -- if this is a function, then node will be tagged; we must subract the tag
- node_tag = funTag closure_info
-
--- set the current CCS when entering a PAP
-enterCostCentrePAP :: CmmExpr -> Code
-enterCostCentrePAP closure =
- ifProfiling $ do
- enter_ccs_fun (costCentreFrom closure)
- enteringPAP 1
-
enterCostCentreThunk :: CmmExpr -> Code
enterCostCentreThunk closure =
ifProfiling $ do
stmtC $ CmmStore curCCSAddr (costCentreFrom closure)
-enter_ccs_fun :: CmmExpr -> Code
-enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [CmmHinted stack AddrHint] False
- -- ToDo: vols
-
-enter_ccs_fsub :: Code
-enter_ccs_fsub = enteringPAP 0
-
--- When entering a PAP, EnterFunCCS is called by both the PAP entry
--- code and the function entry code; we don't want the function's
--- entry code to also update CCCS in the event that it was called via
--- a PAP, so we set the flag entering_PAP to indicate that we are
--- entering via a PAP.
-enteringPAP :: Integer -> Code
-enteringPAP n
- = stmtC (CmmStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP"))))
- (CmmLit (CmmInt n cIntWidth)))
+enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code
+enterCostCentreFun ccs closure =
+ ifProfiling $ do
+ if isCurrentCCS ccs
+ then emitRtsCall rtsPackageId (fsLit "enterFunCCS")
+ [CmmHinted (costCentreFrom closure) AddrHint] False
+ else return () -- top-level function, nothing to do
ifProfiling :: Code -> Code
ifProfiling code
@@ -286,7 +148,6 @@ ifProfilingL xs
| opt_SccProfilingOn = xs
| otherwise = []
-
-- ---------------------------------------------------------------------------
-- Initialising Cost Centres & CCSs
@@ -306,15 +167,15 @@ emitCostCentreDecl cc = do
modl, -- char *module,
zero, -- StgWord time_ticks
zero64, -- StgWord64 mem_alloc
- subsumed, -- StgInt is_caf
- zero -- struct _CostCentre *link
+ is_caf, -- StgInt is_caf
+ zero -- struct _CostCentre *link
]
; emitDataLits (mkCCLabel cc) lits
}
where
- subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF
- | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring
-
+ is_caf | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF
+ | otherwise = zero
+
emitCostCentreStackDecl
:: CostCentreStack
@@ -349,23 +210,21 @@ sizeof_ccs_words
-- ---------------------------------------------------------------------------
-- Set the current cost centre stack
-emitSetCCC :: CostCentre -> Code
-emitSetCCC cc
+emitSetCCC :: CostCentre -> Bool -> Bool -> Code
+emitSetCCC cc tick push
| not opt_SccProfilingOn = nopC
| otherwise = do
tmp <- newTemp bWord -- TODO FIXME NOW
- ASSERT( sccAbleCostCentre cc )
- pushCostCentre tmp curCCS cc
- stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp)))
- when (isSccCountCostCentre cc) $
- stmtC (bumpSccCount curCCS)
+ pushCostCentre tmp curCCS cc
+ when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp)))
+ when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code
pushCostCentre result ccs cc
= emitRtsCallWithResult result AddrHint
rtsPackageId
- (fsLit "PushCostCentre") [CmmHinted ccs AddrHint,
- CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint]
+ (fsLit "pushCostCentre") [CmmHinted ccs AddrHint,
+ CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint]
False
bumpSccCount :: CmmExpr -> CmmStmt
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 67dcd2d90f..4f60d0a6cf 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -151,8 +151,8 @@ cgRhs :: Id -> StgRhs -> FCode (CgIdInfo, CmmAGraph)
-- The returned values are the binding for the environment
-- and the Initialization Code that witnesses the binding
-cgRhs name (StgRhsCon maybe_cc con args)
- = buildDynCon name maybe_cc con args
+cgRhs name (StgRhsCon cc con args)
+ = buildDynCon name cc con args
cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body)
= mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body
@@ -300,12 +300,13 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body
(length args) body fv_details
-- BUILD THE OBJECT
- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+ ; let use_cc = curCCS; blame_cc = curCCS
; emit (mkComment $ mkFastString "calling allocDynClosure")
; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off)
; let info_tbl = mkCmmInfo closure_info
; (tmp, init) <- allocDynClosure info_tbl lf_info use_cc blame_cc
- (map toVarArg fv_details)
+ (map toVarArg fv_details)
-- RETURN
; regIdInfo bndr lf_info tmp init }
@@ -324,7 +325,7 @@ cgStdThunk
-> [StgArg] -- payload
-> FCode (CgIdInfo, CmmAGraph)
-cgStdThunk bndr cc _bndr_info body lf_info payload
+cgStdThunk bndr _cc _bndr_info _body lf_info payload
= do -- AHA! A STANDARD-FORM THUNK
{ -- LAY OUT THE OBJECT
mod_name <- getModuleName
@@ -337,7 +338,8 @@ cgStdThunk bndr cc _bndr_info body lf_info payload
NoC_SRT -- No SRT for a std-form closure
descr
- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body
+-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body
+ ; let use_cc = curCCS; blame_cc = curCCS
-- BUILD THE OBJECT
; let info_tbl = mkCmmInfo closure_info
@@ -394,7 +396,7 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
lf_info = closureLFInfo cl_info
info_tbl = mkCmmInfo cl_info
-closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
+closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details
= ASSERT( length args > 0 )
do { -- Allocate the global ticky counter,
-- and establish the ticky-counter
@@ -424,8 +426,7 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
-- Main payload
; entryHeapCheck cl_info offset node' arity arg_regs $ do
- { enterCostCentre cl_info cc body
- ; fv_bindings <- mapM bind_fv fv_details
+ { fv_bindings <- mapM bind_fv fv_details
-- Load free vars out of closure *after*
-- heap check, to reduce live vars over check
; if node_points then load_fvs node lf_info fv_bindings
@@ -473,7 +474,7 @@ mkSlowEntryCode cl_info arg_regs -- function closure is already in `Node'
-----------------------------------------
thunkCode :: ClosureInfo -> [(NonVoid Id, VirtualHpOffset)] -> CostCentreStack
-> LocalReg -> Int -> StgExpr -> FCode ()
-thunkCode cl_info fv_details cc node arity body
+thunkCode cl_info fv_details _cc node arity body
= do { let node_points = nodeMustPointToIt (closureLFInfo cl_info)
node' = if node_points then Just node else Nothing
; tickyEnterThunk cl_info
@@ -493,7 +494,7 @@ thunkCode cl_info fv_details cc node arity body
-- that cc of enclosing scope will be recorded
-- in update frame CAF/DICT functions will be
-- subsumed by this enclosing cc
- do { enterCostCentre cl_info cc body
+ do { enterCostCentreThunk (CmmReg nodeReg)
; let lf_info = closureLFInfo cl_info
; fv_bindings <- mapM bind_fv fv_details
; load_fvs node lf_info fv_bindings
diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs
index a357db4d05..e17ac4fd32 100644
--- a/compiler/codeGen/StgCmmCon.hs
+++ b/compiler/codeGen/StgCmmCon.hs
@@ -216,8 +216,8 @@ buildDynCon' _ binder ccs con args
lf_info = mkConLFInfo con
use_cc -- cost-centre to stick in the object
- | currentOrSubsumedCCS ccs = curCCS
- | otherwise = CmmLit (mkCCostCentreStack ccs)
+ | isCurrentCCS ccs = curCCS
+ | otherwise = panic "buildDynCon: non-current CCS not implemented"
blame_cc = use_cc -- cost-centre on which to blame the alloc (same)
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index cb68f51bd4..bc4e9df44f 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -59,7 +59,7 @@ cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
cgExpr (StgOpApp op args ty) = cgOpApp op args ty
cgExpr (StgConApp con args) = cgConApp con args
-cgExpr (StgSCC cc expr) = do { emitSetCCC cc; cgExpr expr }
+cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr }
cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr }
cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
emitReturn [CmmLit cmm_lit]
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 857fd38e27..690b0a9622 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -97,11 +97,6 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
; let rep = cit_rep info_tbl
; tickyDynAlloc rep lf_info
; profDynAlloc rep use_cc
- -- ToDo: This is almost certainly wrong
- -- We're ignoring blame_cc. But until we've
- -- fixed the boxing hack in chooseDynCostCentres etc,
- -- we're worried about making things worse by "fixing"
- -- this part to use blame_cc!
-- FIND THE OFFSET OF THE INFO-PTR WORD
; let info_offset = virt_hp + 1
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index b1aca6e37e..17b61c6a59 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -12,11 +12,10 @@ module StgCmmProf (
-- Cost-centre Profiling
dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
- enterCostCentre, enterCostCentrePAP, enterCostCentreThunk,
- chooseDynCostCentres,
- costCentreFrom,
+ enterCostCentreThunk,
+ costCentreFrom,
curCCS, curCCSAddr,
- emitSetCCC, emitCCS,
+ emitSetCCC,
saveCurrentCostCentre, restoreCurrentCostCentre,
@@ -43,17 +42,14 @@ import Cmm
import CmmUtils
import CLabel
-import Id
import qualified Module
import CostCentre
-import StgSyn
import StaticFlags
import FastString
import Module
-import Constants -- Lots of field offsets
+import Constants -- Lots of field offsets
import Outputable
-import Data.Char
import Control.Monad
-----------------------------------------------------------------------------
@@ -177,161 +173,14 @@ profAlloc words ccs
where
alloc_rep = REP_CostCentreStack_mem_alloc
--- ----------------------------------------------------------------------
--- Setting the cost centre in a new closure
-
-chooseDynCostCentres :: CostCentreStack
- -> [Id] -- Args
- -> StgExpr -- Body
- -> FCode (CmmExpr, CmmExpr)
--- Called when allocating a closure
--- Tells which cost centre to put in the object, and which
--- to blame the cost of allocation on
-chooseDynCostCentres ccs args body = do
- -- Cost-centre we record in the object
- use_ccs <- emitCCS ccs
-
- -- Cost-centre on whom we blame the allocation
- let blame_ccs
- | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS)
- | otherwise = use_ccs
-
- return (use_ccs, blame_ccs)
-
-
--- Some CostCentreStacks are a sequence of pushes on top of CCCS.
--- These pushes must be performed before we can refer to the stack in
--- an expression.
-emitCCS :: CostCentreStack -> FCode CmmExpr
-emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's)
- where
- (cc's, ccs') = decomposeCCS ccs
-
- push_em ccs [] = return ccs
- push_em ccs (cc:rest) = do
- tmp <- newTemp ccsType
- pushCostCentre tmp ccs cc
- push_em (CmmReg (CmmLocal tmp)) rest
-
-ccsExpr :: CostCentreStack -> CmmExpr
-ccsExpr ccs
- | isCurrentCCS ccs = curCCS
- | otherwise = CmmLit (mkCCostCentreStack ccs)
-
-
-isBox :: StgExpr -> Bool
--- If it's an utterly trivial RHS, then it must be
--- one introduced by boxHigherOrderArgs for profiling,
--- so we charge it to "OVERHEAD".
--- This looks like a GROSS HACK to me --SDM
-isBox (StgApp _ []) = True
-isBox _ = False
-
-
-- -----------------------------------------------------------------------
-- Setting the current cost centre on entry to a closure
--- For lexically scoped profiling we have to load the cost centre from
--- the closure entered, if the costs are not supposed to be inherited.
--- This is done immediately on entering the fast entry point.
-
--- Load current cost centre from closure, if not inherited.
--- Node is guaranteed to point to it, if profiling and not inherited.
-
-enterCostCentre
- :: ClosureInfo
- -> CostCentreStack
- -> StgExpr -- The RHS of the closure
- -> FCode ()
-
--- We used to have a special case for bindings of form
--- f = g True
--- where g has arity 2. The RHS is a thunk, but we don't
--- need to update it; and we want to subsume costs.
--- We don't have these sort of PAPs any more, so the special
--- case has gone away.
-
-enterCostCentre closure_info ccs body
- = ifProfiling $
- ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs)
- enter_cost_centre closure_info ccs body
-
-enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> FCode ()
-enter_cost_centre closure_info ccs body
- | isSubsumedCCS ccs
- = ASSERT(isToplevClosure closure_info)
- ASSERT(re_entrant)
- enter_ccs_fsub
-
- | isDerivedFromCurrentCCS ccs
- = do {
- if re_entrant && not is_box
- then
- enter_ccs_fun node_ccs
- else
- emit (mkStore curCCSAddr node_ccs)
-
- -- don't forget to bump the scc count. This closure might have been
- -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal
- -- pass has turned into simply let x = e in ...x... and attached
- -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that
- -- we don't lose the scc counter, bump it in the entry code for x.
- -- ToDo: for a multi-push we should really bump the counter for
- -- each of the intervening CCSs, not just the top one.
- ; when (not (isCurrentCCS ccs)) $
- emit (bumpSccCount curCCS)
- }
-
- | isCafCCS ccs
- = ASSERT(isToplevClosure closure_info)
- ASSERT(not re_entrant)
- do { -- This is just a special case of the isDerivedFromCurrentCCS
- -- case above. We could delete this, but it's a micro
- -- optimisation and saves a bit of code.
- emit (mkStore curCCSAddr enc_ccs)
- ; emit (bumpSccCount node_ccs)
- }
-
- | otherwise
- = panic "enterCostCentre"
- where
- enc_ccs = CmmLit (mkCCostCentreStack ccs)
- re_entrant = closureReEntrant closure_info
- node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag))
- is_box = isBox body
-
- -- if this is a function, then node will be tagged; we must subract the tag
- node_tag = funTag closure_info
-
--- set the current CCS when entering a PAP
-enterCostCentrePAP :: CmmExpr -> FCode ()
-enterCostCentrePAP closure =
- ifProfiling $ do
- enter_ccs_fun (costCentreFrom closure)
- enteringPAP 1
-
enterCostCentreThunk :: CmmExpr -> FCode ()
enterCostCentreThunk closure =
ifProfiling $ do
emit $ mkStore curCCSAddr (costCentreFrom closure)
-enter_ccs_fun :: CmmExpr -> FCode ()
-enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [(stack,AddrHint)] False
- -- ToDo: vols
-
-enter_ccs_fsub :: FCode ()
-enter_ccs_fsub = enteringPAP 0
-
--- When entering a PAP, EnterFunCCS is called by both the PAP entry
--- code and the function entry code; we don't want the function's
--- entry code to also update CCCS in the event that it was called via
--- a PAP, so we set the flag entering_PAP to indicate that we are
--- entering via a PAP.
-enteringPAP :: Integer -> FCode ()
-enteringPAP n
- = emit (mkStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP"))))
- (CmmLit (CmmInt n cIntWidth)))
-
ifProfiling :: FCode () -> FCode ()
ifProfiling code
| opt_SccProfilingOn = code
@@ -368,14 +217,10 @@ emitCostCentreDecl cc = do
modl, -- char *module,
zero, -- StgWord time_ticks
zero64, -- StgWord64 mem_alloc
- subsumed, -- StgInt is_caf
- zero -- struct _CostCentre *link
+ zero -- struct _CostCentre *link
]
; emitDataLits (mkCCLabel cc) lits
}
- where
- subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF
- | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring
emitCostCentreStackDecl :: CostCentreStack -> FCode ()
emitCostCentreStackDecl ccs
@@ -408,16 +253,14 @@ sizeof_ccs_words
-- ---------------------------------------------------------------------------
-- Set the current cost centre stack
-emitSetCCC :: CostCentre -> FCode ()
-emitSetCCC cc
+emitSetCCC :: CostCentre -> Bool -> Bool -> FCode ()
+emitSetCCC cc tick push
| not opt_SccProfilingOn = nopC
| otherwise = do
tmp <- newTemp ccsType -- TODO FIXME NOW
- ASSERT( sccAbleCostCentre cc )
- pushCostCentre tmp curCCS cc
- emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp)))
- when (isSccCountCostCentre cc) $
- emit (bumpSccCount curCCS)
+ pushCostCentre tmp curCCS cc
+ when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp)))
+ when push $ emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
pushCostCentre result ccs cc