diff options
97 files changed, 2750 insertions, 2830 deletions
diff --git a/compiler/basicTypes/Id.lhs b/compiler/basicTypes/Id.lhs index 2536497efc..e88bfa1330 100644 --- a/compiler/basicTypes/Id.lhs +++ b/compiler/basicTypes/Id.lhs @@ -54,8 +54,7 @@ module Id ( isFCallId, isFCallId_maybe, isDataConWorkId, isDataConWorkId_maybe, isDataConId_maybe, idDataCon, isConLikeId, isBottomingId, idIsFrom, - isTickBoxOp, isTickBoxOp_maybe, - hasNoBinding, + hasNoBinding, -- ** Evidence variables DictId, isDictId, isEvVar, @@ -426,20 +425,6 @@ isDeadBinder bndr | isId bndr = isDeadOcc (idOccInfo bndr) | otherwise = False -- TyVars count as not dead \end{code} -\begin{code} -isTickBoxOp :: Id -> Bool -isTickBoxOp id = - case Var.idDetails id of - TickBoxOpId _ -> True - _ -> False - -isTickBoxOp_maybe :: Id -> Maybe TickBoxOp -isTickBoxOp_maybe id = - case Var.idDetails id of - TickBoxOpId tick -> Just tick - _ -> Nothing -\end{code} - %************************************************************************ %* * Evidence variables diff --git a/compiler/basicTypes/MkId.lhs b/compiler/basicTypes/MkId.lhs index a35dbdf8fd..e3a46899d7 100644 --- a/compiler/basicTypes/MkId.lhs +++ b/compiler/basicTypes/MkId.lhs @@ -15,8 +15,7 @@ have a standard form, namely: module MkId ( mkDictFunId, mkDictFunTy, mkDictSelId, - mkDataConIds, - mkPrimOpId, mkFCallId, mkTickBoxOpId, mkBreakPointOpId, + mkDataConIds, mkPrimOpId, mkFCallId, mkReboxingAlt, wrapNewTypeBody, unwrapNewTypeBody, wrapFamInstBody, unwrapFamInstScrut, @@ -65,7 +64,6 @@ import Pair import Outputable import FastString import ListSetOps -import Module \end{code} %************************************************************************ @@ -766,30 +764,6 @@ mkFCallId uniq fcall ty (arg_tys, _) = tcSplitFunTys tau arity = length arg_tys strict_sig = mkStrictSig (mkTopDmdType (replicate arity evalDmd) TopRes) - --- Tick boxes and breakpoints are both represented as TickBoxOpIds, --- except for the type: --- --- a plain HPC tick box has type (State# RealWorld) --- a breakpoint Id has type forall a.a --- --- The breakpoint Id will be applied to a list of arbitrary free variables, --- which is why it needs a polymorphic type. - -mkTickBoxOpId :: Unique -> Module -> TickBoxId -> Id -mkTickBoxOpId uniq mod ix = mkTickBox' uniq mod ix realWorldStatePrimTy - -mkBreakPointOpId :: Unique -> Module -> TickBoxId -> Id -mkBreakPointOpId uniq mod ix = mkTickBox' uniq mod ix ty - where ty = mkSigmaTy [openAlphaTyVar] [] openAlphaTy - -mkTickBox' :: Unique -> Module -> TickBoxId -> Type -> Id -mkTickBox' uniq mod ix ty = mkGlobalId (TickBoxOpId tickbox) name ty info - where - tickbox = TickBox mod ix - occ_str = showSDoc (braces (ppr tickbox)) - name = mkTickBoxOpName uniq occ_str - info = noCafIdInfo \end{code} diff --git a/compiler/basicTypes/Name.lhs b/compiler/basicTypes/Name.lhs index 1933740ed7..7a80ac1f16 100644 --- a/compiler/basicTypes/Name.lhs +++ b/compiler/basicTypes/Name.lhs @@ -40,9 +40,8 @@ module Name ( mkSystemName, mkSystemNameAt, mkInternalName, mkDerivedInternalName, mkSystemVarName, mkSysTvName, - mkFCallName, - mkTickBoxOpName, - mkExternalName, mkWiredInName, + mkFCallName, + mkExternalName, mkWiredInName, -- ** Manipulating and deconstructing 'Name's nameUnique, setNameUnique, @@ -254,8 +253,8 @@ mkInternalName uniq occ loc = Name { n_uniq = getKeyFastInt uniq -- * the insides of the compiler don't care: they use the Unique -- * when printing for -ddump-xxx you can switch on -dppr-debug to get the -- uniques if you get confused - -- * for interface files we tidyCore first, which puts the uniques - -- into the print name (see setNameVisibility below) + -- * for interface files we tidyCore first, which makes + -- the OccNames distinct when they need to be mkDerivedInternalName :: (OccName -> OccName) -> Unique -> Name -> Name mkDerivedInternalName derive_occ uniq (Name { n_occ = occ, n_loc = loc }) @@ -291,15 +290,8 @@ mkSysTvName uniq fs = mkSystemName uniq (mkOccNameFS tvName fs) -- | Make a name for a foreign call mkFCallName :: Unique -> String -> Name - -- The encoded string completely describes the ccall -mkFCallName uniq str = Name { n_uniq = getKeyFastInt uniq, n_sort = Internal, - n_occ = mkVarOcc str, n_loc = noSrcSpan } - - -mkTickBoxOpName :: Unique -> String -> Name -mkTickBoxOpName uniq str - = Name { n_uniq = getKeyFastInt uniq, n_sort = Internal, - n_occ = mkVarOcc str, n_loc = noSrcSpan } +mkFCallName uniq str = mkInternalName uniq (mkVarOcc str) noSrcSpan + -- The encoded string completely describes the ccall \end{code} \begin{code} diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index f31468ec11..bdb2c4c918 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -798,7 +798,6 @@ stmtMacros :: UniqFM ([CmmExpr] -> Code) stmtMacros = listToUFM [ ( fsLit "CCS_ALLOC", \[words,ccs] -> profAlloc words ccs ), ( fsLit "CLOSE_NURSERY", \[] -> emitCloseNursery ), - ( fsLit "ENTER_CCS_PAP_CL", \[e] -> enterCostCentrePAP e ), ( fsLit "ENTER_CCS_THUNK", \[e] -> enterCostCentreThunk e ), ( fsLit "HP_CHK_GEN", \[words,liveness,reentry] -> hpChkGen words liveness reentry ), diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index 32f6727b04..345b65cd3c 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -28,6 +28,7 @@ module CgCallConv ( ) where import CgMonad +import CgProf import SMRep import OldCmm @@ -160,10 +161,16 @@ constructSlowCall amodes -- fewer arguments than we currently have. slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)] slowArgs [] = [] -slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest - where (arg_pat, args, rest) = matchSlowPattern amodes - stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat - +slowArgs amodes + | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest + | otherwise = this_pat ++ slowArgs rest + where + (arg_pat, args, rest) = matchSlowPattern amodes + stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat + this_pat = (NonPtrArg, mkLblExpr stg_ap_pat) : args + save_cccs = [(NonPtrArg, mkLblExpr save_cccs_lbl), (NonPtrArg, curCCS)] + save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") + matchSlowPattern :: [(CgRep,CmmExpr)] -> (FastString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)]) matchSlowPattern amodes = (arg_pat, these, rest) diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index bccadb5a5d..32190a3c9c 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -108,7 +108,7 @@ cgStdRhsClosure -> [StgArg] -- payload -> FCode (Id, CgIdInfo) -cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload +cgStdRhsClosure bndr _cc _bndr_info _fvs _args _body lf_info payload = do -- AHA! A STANDARD-FORM THUNK { -- LAY OUT THE OBJECT amodes <- getArgAmodes payload @@ -122,10 +122,10 @@ cgStdRhsClosure bndr cc _bndr_info _fvs args body lf_info payload NoC_SRT -- No SRT for a std-form closure descr - ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body +-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body -- BUILD THE OBJECT - ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets -- RETURN ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } @@ -197,9 +197,9 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do ; let to_amode (info, offset) = do { amode <- idInfoToAmode info ; return (amode, offset) } - ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body +-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body ; amodes_w_offsets <- mapFCs to_amode bind_details - ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + ; heap_offset <- allocDynClosure closure_info curCCS curCCS amodes_w_offsets -- RETURN ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } @@ -239,16 +239,15 @@ So it should set up an update frame (if it is shared). NB: Thunks cannot have a primitive type! \begin{code} -closureCodeBody _binder_info cl_info cc [{- No args i.e. thunk -}] body = do +closureCodeBody _binder_info cl_info _cc [{- No args i.e. thunk -}] body = do { body_absC <- getCgStmts $ do { tickyEnterThunk cl_info ; ldvEnterClosure cl_info -- NB: Node always points when profiling ; thunkWrapper cl_info $ do -- We only enter cc after setting up update so -- that cc of enclosing scope will be recorded - -- in update frame CAF/DICT functions will be - -- subsumed by this enclosing cc - { enterCostCentre cl_info cc body + -- in the update frame + { enterCostCentreThunk (CmmReg nodeReg) ; cgExpr body } } @@ -307,16 +306,14 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do ; bindArgsToStack stk_args ; setRealAndVirtualSp sp_top - -- Enter the cost-centre, if required - -- ToDo: It's not clear why this is outside the funWrapper, - -- but the tickyEnterFun is inside. Perhaps we can put - -- them together? - ; enterCostCentre cl_info cc body - - -- Do the business + -- Do the business ; funWrapper cl_info reg_args reg_save_code $ do { tickyEnterFun cl_info - ; cgExpr body } + ; enterCostCentreFun cc $ + CmmMachOp mo_wordSub [ CmmReg nodeReg + , CmmLit (mkIntCLit (funTag cl_info)) ] + + ; cgExpr body } } \end{code} diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index 26489e5945..17bb9d0ad8 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -223,9 +223,9 @@ buildDynCon' _ binder ccs con args where lf_info = mkConLFInfo con - use_cc -- cost-centre to stick in the object - | currentOrSubsumedCCS ccs = curCCS - | otherwise = CmmLit (mkCCostCentreStack ccs) + use_cc -- cost-centre to stick in the object + | isCurrentCCS ccs = curCCS + | otherwise = panic "buildDynCon: non-current CCS not implemented" blame_cc = use_cc -- cost-centre on which to blame the alloc (same) \end{code} diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs index 1f16c1feee..5ffa4e550d 100644 --- a/compiler/codeGen/CgExpr.lhs +++ b/compiler/codeGen/CgExpr.lhs @@ -258,7 +258,7 @@ SCC expressions are treated specially. They set the current cost centre. \begin{code} -cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr +cgExpr (StgSCC cc tick push expr) = do emitSetCCC cc tick push; cgExpr expr \end{code} %******************************************************** diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index ebdde2d31a..9e9cc8d1d8 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -575,13 +575,7 @@ allocDynClosure cl_info use_cc _blame_cc amodes_with_offsets -- SAY WHAT WE ARE ABOUT TO DO ; profDynAlloc cl_info use_cc - -- ToDo: This is almost certainly wrong - -- We're ignoring blame_cc. But until we've - -- fixed the boxing hack in chooseDynCostCentres etc, - -- we're worried about making things worse by "fixing" - -- this part to use blame_cc! - - ; tickyDynAlloc cl_info + ; tickyDynAlloc cl_info -- ALLOCATE THE OBJECT ; base <- getHpRelOffset info_offset diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index b58fbb4238..b43751361c 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -10,13 +10,13 @@ module CgProf ( mkCCostCentre, mkCCostCentreStack, -- Cost-centre Profiling - dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, - enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, - chooseDynCostCentres, - costCentreFrom, + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentreThunk, + enterCostCentreFun, + costCentreFrom, curCCS, curCCSAddr, emitCostCentreDecl, emitCostCentreStackDecl, - emitSetCCC, emitCCS, + emitSetCCC, -- Lag/drag/void stuff ldvEnter, ldvEnterClosure, ldvRecordCreate @@ -40,10 +40,8 @@ import OldCmm import OldCmmUtils import CLabel -import Id import qualified Module import CostCentre -import StgSyn import StaticFlags import FastString import Module @@ -108,6 +106,9 @@ profDynAlloc cl_info ccs -- | Record the allocation of a closure (size is given by a CmmExpr) -- The size must be in words, because the allocation counter in a CCS counts -- in words. +-- +-- This API is used by the @CCS_ALLOC()@ macro in @.cmm@ code. +-- profAlloc :: CmmExpr -> CmmExpr -> Code profAlloc words ccs = ifProfiling $ @@ -121,160 +122,21 @@ profAlloc words ccs where alloc_rep = typeWidth REP_CostCentreStack_mem_alloc --- ---------------------------------------------------------------------- --- Setting the cost centre in a new closure - -chooseDynCostCentres :: CostCentreStack - -> [Id] -- Args - -> StgExpr -- Body - -> FCode (CmmExpr, CmmExpr) --- Called when alllcating a closure --- Tells which cost centre to put in the object, and which --- to blame the cost of allocation on -chooseDynCostCentres ccs args body = do - -- Cost-centre we record in the object - use_ccs <- emitCCS ccs - - -- Cost-centre on whom we blame the allocation - let blame_ccs - | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) - | otherwise = use_ccs - - return (use_ccs, blame_ccs) - - --- Some CostCentreStacks are a sequence of pushes on top of CCCS. --- These pushes must be performed before we can refer to the stack in --- an expression. -emitCCS :: CostCentreStack -> FCode CmmExpr -emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) - where - (cc's, ccs') = decomposeCCS ccs - - push_em ccs [] = return ccs - push_em ccs (cc:rest) = do - tmp <- newTemp bWord -- TODO FIXME NOW - pushCostCentre tmp ccs cc - push_em (CmmReg (CmmLocal tmp)) rest - -ccsExpr :: CostCentreStack -> CmmExpr -ccsExpr ccs - | isCurrentCCS ccs = curCCS - | otherwise = CmmLit (mkCCostCentreStack ccs) - - -isBox :: StgExpr -> Bool --- If it's an utterly trivial RHS, then it must be --- one introduced by boxHigherOrderArgs for profiling, --- so we charge it to "OVERHEAD". --- This looks like a GROSS HACK to me --SDM -isBox (StgApp _ []) = True -isBox _ = False - - -- ----------------------------------------------------------------------- -- Setting the current cost centre on entry to a closure --- For lexically scoped profiling we have to load the cost centre from --- the closure entered, if the costs are not supposed to be inherited. --- This is done immediately on entering the fast entry point. - --- Load current cost centre from closure, if not inherited. --- Node is guaranteed to point to it, if profiling and not inherited. - -enterCostCentre - :: ClosureInfo - -> CostCentreStack - -> StgExpr -- The RHS of the closure - -> Code - --- We used to have a special case for bindings of form --- f = g True --- where g has arity 2. The RHS is a thunk, but we don't --- need to update it; and we want to subsume costs. --- We don't have these sort of PAPs any more, so the special --- case has gone away. - -enterCostCentre closure_info ccs body - = ifProfiling $ - ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) - enter_cost_centre closure_info ccs body - -enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> Code -enter_cost_centre closure_info ccs body - | isSubsumedCCS ccs - = ASSERT(isToplevClosure closure_info) - ASSERT(re_entrant) - enter_ccs_fsub - - | isDerivedFromCurrentCCS ccs - = do { - if re_entrant && not is_box - then - enter_ccs_fun node_ccs - else - stmtC (CmmStore curCCSAddr node_ccs) - - -- don't forget to bump the scc count. This closure might have been - -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal - -- pass has turned into simply let x = e in ...x... and attached - -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that - -- we don't lose the scc counter, bump it in the entry code for x. - -- ToDo: for a multi-push we should really bump the counter for - -- each of the intervening CCSs, not just the top one. - ; when (not (isCurrentCCS ccs)) $ - stmtC (bumpSccCount curCCS) - } - - | isCafCCS ccs - = ASSERT(isToplevClosure closure_info) - ASSERT(not re_entrant) - do { -- This is just a special case of the isDerivedFromCurrentCCS - -- case above. We could delete this, but it's a micro - -- optimisation and saves a bit of code. - stmtC (CmmStore curCCSAddr enc_ccs) - ; stmtC (bumpSccCount node_ccs) - } - - | otherwise - = panic "enterCostCentre" - where - enc_ccs = CmmLit (mkCCostCentreStack ccs) - re_entrant = closureReEntrant closure_info - node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) - is_box = isBox body - - -- if this is a function, then node will be tagged; we must subract the tag - node_tag = funTag closure_info - --- set the current CCS when entering a PAP -enterCostCentrePAP :: CmmExpr -> Code -enterCostCentrePAP closure = - ifProfiling $ do - enter_ccs_fun (costCentreFrom closure) - enteringPAP 1 - enterCostCentreThunk :: CmmExpr -> Code enterCostCentreThunk closure = ifProfiling $ do stmtC $ CmmStore curCCSAddr (costCentreFrom closure) -enter_ccs_fun :: CmmExpr -> Code -enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [CmmHinted stack AddrHint] False - -- ToDo: vols - -enter_ccs_fsub :: Code -enter_ccs_fsub = enteringPAP 0 - --- When entering a PAP, EnterFunCCS is called by both the PAP entry --- code and the function entry code; we don't want the function's --- entry code to also update CCCS in the event that it was called via --- a PAP, so we set the flag entering_PAP to indicate that we are --- entering via a PAP. -enteringPAP :: Integer -> Code -enteringPAP n - = stmtC (CmmStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP")))) - (CmmLit (CmmInt n cIntWidth))) +enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code +enterCostCentreFun ccs closure = + ifProfiling $ do + if isCurrentCCS ccs + then emitRtsCall rtsPackageId (fsLit "enterFunCCS") + [CmmHinted (costCentreFrom closure) AddrHint] False + else return () -- top-level function, nothing to do ifProfiling :: Code -> Code ifProfiling code @@ -286,7 +148,6 @@ ifProfilingL xs | opt_SccProfilingOn = xs | otherwise = [] - -- --------------------------------------------------------------------------- -- Initialising Cost Centres & CCSs @@ -306,15 +167,15 @@ emitCostCentreDecl cc = do modl, -- char *module, zero, -- StgWord time_ticks zero64, -- StgWord64 mem_alloc - subsumed, -- StgInt is_caf - zero -- struct _CostCentre *link + is_caf, -- StgInt is_caf + zero -- struct _CostCentre *link ] ; emitDataLits (mkCCLabel cc) lits } where - subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF - | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring - + is_caf | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = zero + emitCostCentreStackDecl :: CostCentreStack @@ -349,23 +210,21 @@ sizeof_ccs_words -- --------------------------------------------------------------------------- -- Set the current cost centre stack -emitSetCCC :: CostCentre -> Code -emitSetCCC cc +emitSetCCC :: CostCentre -> Bool -> Bool -> Code +emitSetCCC cc tick push | not opt_SccProfilingOn = nopC | otherwise = do tmp <- newTemp bWord -- TODO FIXME NOW - ASSERT( sccAbleCostCentre cc ) - pushCostCentre tmp curCCS cc - stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) - when (isSccCountCostCentre cc) $ - stmtC (bumpSccCount curCCS) + pushCostCentre tmp curCCS cc + when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp))) + when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code pushCostCentre result ccs cc = emitRtsCallWithResult result AddrHint rtsPackageId - (fsLit "PushCostCentre") [CmmHinted ccs AddrHint, - CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint] + (fsLit "pushCostCentre") [CmmHinted ccs AddrHint, + CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint] False bumpSccCount :: CmmExpr -> CmmStmt diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 67dcd2d90f..4f60d0a6cf 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -151,8 +151,8 @@ cgRhs :: Id -> StgRhs -> FCode (CgIdInfo, CmmAGraph) -- The returned values are the binding for the environment -- and the Initialization Code that witnesses the binding -cgRhs name (StgRhsCon maybe_cc con args) - = buildDynCon name maybe_cc con args +cgRhs name (StgRhsCon cc con args) + = buildDynCon name cc con args cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body @@ -300,12 +300,13 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body (length args) body fv_details -- BUILD THE OBJECT - ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body +-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + ; let use_cc = curCCS; blame_cc = curCCS ; emit (mkComment $ mkFastString "calling allocDynClosure") ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off) ; let info_tbl = mkCmmInfo closure_info ; (tmp, init) <- allocDynClosure info_tbl lf_info use_cc blame_cc - (map toVarArg fv_details) + (map toVarArg fv_details) -- RETURN ; regIdInfo bndr lf_info tmp init } @@ -324,7 +325,7 @@ cgStdThunk -> [StgArg] -- payload -> FCode (CgIdInfo, CmmAGraph) -cgStdThunk bndr cc _bndr_info body lf_info payload +cgStdThunk bndr _cc _bndr_info _body lf_info payload = do -- AHA! A STANDARD-FORM THUNK { -- LAY OUT THE OBJECT mod_name <- getModuleName @@ -337,7 +338,8 @@ cgStdThunk bndr cc _bndr_info body lf_info payload NoC_SRT -- No SRT for a std-form closure descr - ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body +-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body + ; let use_cc = curCCS; blame_cc = curCCS -- BUILD THE OBJECT ; let info_tbl = mkCmmInfo closure_info @@ -394,7 +396,7 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details lf_info = closureLFInfo cl_info info_tbl = mkCmmInfo cl_info -closureCodeBody top_lvl bndr cl_info cc args arity body fv_details +closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details = ASSERT( length args > 0 ) do { -- Allocate the global ticky counter, -- and establish the ticky-counter @@ -424,8 +426,7 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details -- Main payload ; entryHeapCheck cl_info offset node' arity arg_regs $ do - { enterCostCentre cl_info cc body - ; fv_bindings <- mapM bind_fv fv_details + { fv_bindings <- mapM bind_fv fv_details -- Load free vars out of closure *after* -- heap check, to reduce live vars over check ; if node_points then load_fvs node lf_info fv_bindings @@ -473,7 +474,7 @@ mkSlowEntryCode cl_info arg_regs -- function closure is already in `Node' ----------------------------------------- thunkCode :: ClosureInfo -> [(NonVoid Id, VirtualHpOffset)] -> CostCentreStack -> LocalReg -> Int -> StgExpr -> FCode () -thunkCode cl_info fv_details cc node arity body +thunkCode cl_info fv_details _cc node arity body = do { let node_points = nodeMustPointToIt (closureLFInfo cl_info) node' = if node_points then Just node else Nothing ; tickyEnterThunk cl_info @@ -493,7 +494,7 @@ thunkCode cl_info fv_details cc node arity body -- that cc of enclosing scope will be recorded -- in update frame CAF/DICT functions will be -- subsumed by this enclosing cc - do { enterCostCentre cl_info cc body + do { enterCostCentreThunk (CmmReg nodeReg) ; let lf_info = closureLFInfo cl_info ; fv_bindings <- mapM bind_fv fv_details ; load_fvs node lf_info fv_bindings diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index a357db4d05..e17ac4fd32 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -216,8 +216,8 @@ buildDynCon' _ binder ccs con args lf_info = mkConLFInfo con use_cc -- cost-centre to stick in the object - | currentOrSubsumedCCS ccs = curCCS - | otherwise = CmmLit (mkCCostCentreStack ccs) + | isCurrentCCS ccs = curCCS + | otherwise = panic "buildDynCon: non-current CCS not implemented" blame_cc = use_cc -- cost-centre on which to blame the alloc (same) diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index cb68f51bd4..bc4e9df44f 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -59,7 +59,7 @@ cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) = cgExpr (StgOpApp op args ty) = cgOpApp op args ty cgExpr (StgConApp con args) = cgConApp con args -cgExpr (StgSCC cc expr) = do { emitSetCCC cc; cgExpr expr } +cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr } cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr } cgExpr (StgLit lit) = do cmm_lit <- cgLit lit emitReturn [CmmLit cmm_lit] diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 857fd38e27..690b0a9622 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -97,11 +97,6 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets ; let rep = cit_rep info_tbl ; tickyDynAlloc rep lf_info ; profDynAlloc rep use_cc - -- ToDo: This is almost certainly wrong - -- We're ignoring blame_cc. But until we've - -- fixed the boxing hack in chooseDynCostCentres etc, - -- we're worried about making things worse by "fixing" - -- this part to use blame_cc! -- FIND THE OFFSET OF THE INFO-PTR WORD ; let info_offset = virt_hp + 1 diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index b1aca6e37e..17b61c6a59 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -12,11 +12,10 @@ module StgCmmProf ( -- Cost-centre Profiling dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, - enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, - chooseDynCostCentres, - costCentreFrom, + enterCostCentreThunk, + costCentreFrom, curCCS, curCCSAddr, - emitSetCCC, emitCCS, + emitSetCCC, saveCurrentCostCentre, restoreCurrentCostCentre, @@ -43,17 +42,14 @@ import Cmm import CmmUtils import CLabel -import Id import qualified Module import CostCentre -import StgSyn import StaticFlags import FastString import Module -import Constants -- Lots of field offsets +import Constants -- Lots of field offsets import Outputable -import Data.Char import Control.Monad ----------------------------------------------------------------------------- @@ -177,161 +173,14 @@ profAlloc words ccs where alloc_rep = REP_CostCentreStack_mem_alloc --- ---------------------------------------------------------------------- --- Setting the cost centre in a new closure - -chooseDynCostCentres :: CostCentreStack - -> [Id] -- Args - -> StgExpr -- Body - -> FCode (CmmExpr, CmmExpr) --- Called when allocating a closure --- Tells which cost centre to put in the object, and which --- to blame the cost of allocation on -chooseDynCostCentres ccs args body = do - -- Cost-centre we record in the object - use_ccs <- emitCCS ccs - - -- Cost-centre on whom we blame the allocation - let blame_ccs - | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) - | otherwise = use_ccs - - return (use_ccs, blame_ccs) - - --- Some CostCentreStacks are a sequence of pushes on top of CCCS. --- These pushes must be performed before we can refer to the stack in --- an expression. -emitCCS :: CostCentreStack -> FCode CmmExpr -emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) - where - (cc's, ccs') = decomposeCCS ccs - - push_em ccs [] = return ccs - push_em ccs (cc:rest) = do - tmp <- newTemp ccsType - pushCostCentre tmp ccs cc - push_em (CmmReg (CmmLocal tmp)) rest - -ccsExpr :: CostCentreStack -> CmmExpr -ccsExpr ccs - | isCurrentCCS ccs = curCCS - | otherwise = CmmLit (mkCCostCentreStack ccs) - - -isBox :: StgExpr -> Bool --- If it's an utterly trivial RHS, then it must be --- one introduced by boxHigherOrderArgs for profiling, --- so we charge it to "OVERHEAD". --- This looks like a GROSS HACK to me --SDM -isBox (StgApp _ []) = True -isBox _ = False - - -- ----------------------------------------------------------------------- -- Setting the current cost centre on entry to a closure --- For lexically scoped profiling we have to load the cost centre from --- the closure entered, if the costs are not supposed to be inherited. --- This is done immediately on entering the fast entry point. - --- Load current cost centre from closure, if not inherited. --- Node is guaranteed to point to it, if profiling and not inherited. - -enterCostCentre - :: ClosureInfo - -> CostCentreStack - -> StgExpr -- The RHS of the closure - -> FCode () - --- We used to have a special case for bindings of form --- f = g True --- where g has arity 2. The RHS is a thunk, but we don't --- need to update it; and we want to subsume costs. --- We don't have these sort of PAPs any more, so the special --- case has gone away. - -enterCostCentre closure_info ccs body - = ifProfiling $ - ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) - enter_cost_centre closure_info ccs body - -enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> FCode () -enter_cost_centre closure_info ccs body - | isSubsumedCCS ccs - = ASSERT(isToplevClosure closure_info) - ASSERT(re_entrant) - enter_ccs_fsub - - | isDerivedFromCurrentCCS ccs - = do { - if re_entrant && not is_box - then - enter_ccs_fun node_ccs - else - emit (mkStore curCCSAddr node_ccs) - - -- don't forget to bump the scc count. This closure might have been - -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal - -- pass has turned into simply let x = e in ...x... and attached - -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that - -- we don't lose the scc counter, bump it in the entry code for x. - -- ToDo: for a multi-push we should really bump the counter for - -- each of the intervening CCSs, not just the top one. - ; when (not (isCurrentCCS ccs)) $ - emit (bumpSccCount curCCS) - } - - | isCafCCS ccs - = ASSERT(isToplevClosure closure_info) - ASSERT(not re_entrant) - do { -- This is just a special case of the isDerivedFromCurrentCCS - -- case above. We could delete this, but it's a micro - -- optimisation and saves a bit of code. - emit (mkStore curCCSAddr enc_ccs) - ; emit (bumpSccCount node_ccs) - } - - | otherwise - = panic "enterCostCentre" - where - enc_ccs = CmmLit (mkCCostCentreStack ccs) - re_entrant = closureReEntrant closure_info - node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) - is_box = isBox body - - -- if this is a function, then node will be tagged; we must subract the tag - node_tag = funTag closure_info - --- set the current CCS when entering a PAP -enterCostCentrePAP :: CmmExpr -> FCode () -enterCostCentrePAP closure = - ifProfiling $ do - enter_ccs_fun (costCentreFrom closure) - enteringPAP 1 - enterCostCentreThunk :: CmmExpr -> FCode () enterCostCentreThunk closure = ifProfiling $ do emit $ mkStore curCCSAddr (costCentreFrom closure) -enter_ccs_fun :: CmmExpr -> FCode () -enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [(stack,AddrHint)] False - -- ToDo: vols - -enter_ccs_fsub :: FCode () -enter_ccs_fsub = enteringPAP 0 - --- When entering a PAP, EnterFunCCS is called by both the PAP entry --- code and the function entry code; we don't want the function's --- entry code to also update CCCS in the event that it was called via --- a PAP, so we set the flag entering_PAP to indicate that we are --- entering via a PAP. -enteringPAP :: Integer -> FCode () -enteringPAP n - = emit (mkStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP")))) - (CmmLit (CmmInt n cIntWidth))) - ifProfiling :: FCode () -> FCode () ifProfiling code | opt_SccProfilingOn = code @@ -368,14 +217,10 @@ emitCostCentreDecl cc = do modl, -- char *module, zero, -- StgWord time_ticks zero64, -- StgWord64 mem_alloc - subsumed, -- StgInt is_caf - zero -- struct _CostCentre *link + zero -- struct _CostCentre *link ] ; emitDataLits (mkCCLabel cc) lits } - where - subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF - | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring emitCostCentreStackDecl :: CostCentreStack -> FCode () emitCostCentreStackDecl ccs @@ -408,16 +253,14 @@ sizeof_ccs_words -- --------------------------------------------------------------------------- -- Set the current cost centre stack -emitSetCCC :: CostCentre -> FCode () -emitSetCCC cc +emitSetCCC :: CostCentre -> Bool -> Bool -> FCode () +emitSetCCC cc tick push | not opt_SccProfilingOn = nopC | otherwise = do tmp <- newTemp ccsType -- TODO FIXME NOW - ASSERT( sccAbleCostCentre cc ) - pushCostCentre tmp curCCS cc - emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) - when (isSccCountCostCentre cc) $ - emit (bumpSccCount curCCS) + pushCostCentre tmp curCCS cc + when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp))) + when push $ emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () pushCostCentre result ccs cc diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs index a3acd47802..0292cf967e 100644 --- a/compiler/coreSyn/CoreArity.lhs +++ b/compiler/coreSyn/CoreArity.lhs @@ -67,7 +67,7 @@ manifestArity :: CoreExpr -> Arity -- ^ manifestArity sees how many leading value lambdas there are manifestArity (Lam v e) | isId v = 1 + manifestArity e | otherwise = manifestArity e -manifestArity (Note n e) | notSccNote n = manifestArity e +manifestArity (Tick t e) | not (tickishIsCode t) = manifestArity e manifestArity (Cast e _) = manifestArity e manifestArity _ = 0 @@ -79,7 +79,7 @@ exprArity e = go e go (Var v) = idArity v go (Lam x e) | isId x = go e + 1 | otherwise = go e - go (Note n e) | notSccNote n = go e + go (Tick t e) | not (tickishIsCode t) = go e go (Cast e co) = go e `min` length (typeArity (pSnd (coercionKind co))) -- Note [exprArity invariant] go (App e (Type _)) = go e @@ -454,7 +454,7 @@ type OneShot = Bool -- False <=> Know nothing vanillaArityType :: ArityType vanillaArityType = ATop [] -- Totally uninformative --- ^ The Arity returned is the number of value args the [_$_] +-- ^ The Arity returned is the number of value args the -- expression can be applied to without doing much work exprEtaExpandArity :: CheapFun -> CoreExpr -> Arity -- exprEtaExpandArity is used when eta expanding @@ -467,7 +467,7 @@ exprEtaExpandArity cheap_fun e ATop [] -> 0 ABot n -> n where - has_lam (Note _ e) = has_lam e + has_lam (Tick _ e) = has_lam e has_lam (Lam b e) = isId b || has_lam e has_lam _ = False @@ -552,9 +552,8 @@ type CheapFun = CoreExpr -> Maybe Type -> Bool -- of the expression; Nothing means "don't know" arityType :: CheapFun -> CoreExpr -> ArityType -arityType cheap_fn (Note n e) - | notSccNote n = arityType cheap_fn e -arityType cheap_fn (Cast e co) + +arityType cheap_fn (Cast e co) = arityType cheap_fn e `andArityType` ATop (typeArity (pSnd (coercionKind co))) -- See Note [exprArity invariant]; must be true of @@ -614,6 +613,9 @@ arityType cheap_fn (Let b e) cheap_bind (Rec prs) = all is_cheap prs is_cheap (b,e) = cheap_fn e (Just (idType b)) +arityType cheap_fn (Tick t e) + | not (tickishIsCode t) = arityType cheap_fn e + arityType _ _ = vanillaArityType \end{code} @@ -762,8 +764,8 @@ etaInfoApp subst (Let b e) eis where (subst', b') = subst_bind subst b -etaInfoApp subst (Note note e) eis - = Note note (etaInfoApp subst e eis) +etaInfoApp subst (Tick t e) eis + = Tick (substTickish subst t) (etaInfoApp subst e eis) etaInfoApp subst e eis = go (subst_expr subst e) eis diff --git a/compiler/coreSyn/CoreFVs.lhs b/compiler/coreSyn/CoreFVs.lhs index 5d8ded0044..85c23aeb32 100644 --- a/compiler/coreSyn/CoreFVs.lhs +++ b/compiler/coreSyn/CoreFVs.lhs @@ -185,7 +185,7 @@ expr_fvs (Type ty) = someVars (tyVarsOfType ty) expr_fvs (Coercion co) = someVars (tyCoVarsOfCo co) expr_fvs (Var var) = oneVar var expr_fvs (Lit _) = noVars -expr_fvs (Note _ expr) = expr_fvs expr +expr_fvs (Tick t expr) = tickish_fvs t `union` expr_fvs expr expr_fvs (App fun arg) = expr_fvs fun `union` expr_fvs arg expr_fvs (Lam bndr body) = addBndr bndr (expr_fvs body) expr_fvs (Cast expr co) = expr_fvs expr `union` someVars (tyCoVarsOfCo co) @@ -212,6 +212,10 @@ rhs_fvs (bndr, rhs) = expr_fvs rhs `union` --------- exprs_fvs :: [CoreExpr] -> FV exprs_fvs exprs = foldr (union . expr_fvs) noVars exprs + +tickish_fvs :: Tickish Id -> FV +tickish_fvs (Breakpoint _ ids) = someVars (mkVarSet ids) +tickish_fvs _ = noVars \end{code} @@ -255,7 +259,7 @@ exprOrphNames e go (Coercion co) = orphNamesOfCo co go (App e1 e2) = go e1 `unionNameSets` go e2 go (Lam v e) = go e `delFromNameSet` idName v - go (Note _ e) = go e + go (Tick _ e) = go e go (Cast e co) = go e `unionNameSets` orphNamesOfCo co go (Let (NonRec _ r) e) = go e `unionNameSets` go r go (Let (Rec prs) e) = exprsOrphNames (map snd prs) `unionNameSets` go e @@ -536,10 +540,12 @@ freeVars (Cast expr co) expr2 = freeVars expr cfvs = tyCoVarsOfCo co -freeVars (Note other_note expr) - = (freeVarsOf expr2, AnnNote other_note expr2) +freeVars (Tick tickish expr) + = (tickishFVs tickish `unionFVs` freeVarsOf expr2, AnnTick tickish expr2) where expr2 = freeVars expr + tickishFVs (Breakpoint _ ids) = mkVarSet ids + tickishFVs _ = emptyVarSet freeVars (Type ty) = (tyVarsOfType ty, AnnType ty) diff --git a/compiler/coreSyn/CoreLint.lhs b/compiler/coreSyn/CoreLint.lhs index 1b152e122f..e5ed04bb69 100644 --- a/compiler/coreSyn/CoreLint.lhs +++ b/compiler/coreSyn/CoreLint.lhs @@ -249,7 +249,13 @@ lintCoreExpr (Cast expr co) ; checkTys from_ty expr_ty (mkCastErr from_ty expr_ty) ; return to_ty } -lintCoreExpr (Note _ expr) +lintCoreExpr (Tick (Breakpoint _ ids) expr) + = do forM_ ids $ \id -> do + checkDeadIdOcc id + lookupIdInScope id + lintCoreExpr expr + +lintCoreExpr (Tick _other_tickish expr) = lintCoreExpr expr lintCoreExpr (Let (NonRec tv (Type ty)) body) diff --git a/compiler/coreSyn/CorePrep.lhs b/compiler/coreSyn/CorePrep.lhs index 1c076716db..06d8d5bab4 100644 --- a/compiler/coreSyn/CorePrep.lhs +++ b/compiler/coreSyn/CorePrep.lhs @@ -478,12 +478,17 @@ cpeRhsE env (Let bind expr) ; (floats, body) <- cpeRhsE env' expr ; return (new_binds `appendFloats` floats, body) } -cpeRhsE env (Note note expr) - | ignoreNote note +cpeRhsE env (Tick tickish expr) + | ignoreTickish tickish = cpeRhsE env expr - | otherwise -- Just SCCs actually + | otherwise -- Just SCCs actually = do { body <- cpeBodyNF env expr - ; return (emptyFloats, Note note body) } + ; return (emptyFloats, Tick tickish' body) } + where + tickish' | Breakpoint n fvs <- tickish + = Breakpoint n (map (lookupCorePrepEnv env) fvs) + | otherwise + = tickish cpeRhsE env (Cast expr co) = do { (floats, expr') <- cpeRhsE env expr @@ -495,11 +500,6 @@ cpeRhsE env expr@(Lam {}) ; body' <- cpeBodyNF env' body ; return (emptyFloats, mkLams bndrs' body') } -cpeRhsE env (Case (Var id) bndr ty [(DEFAULT,[],expr)]) - | Just (TickBox {}) <- isTickBoxOp_maybe id - = do { body <- cpeBodyNF env expr - ; return (emptyFloats, Case (Var id) bndr ty [(DEFAULT,[],body)]) } - cpeRhsE env (Case scrut bndr ty alts) = do { (floats, scrut') <- cpeBody env scrut ; let bndr1 = bndr `setIdUnfolding` evaldUnfolding @@ -561,13 +561,14 @@ rhsToBodyNF rhs = do { (floats,body) <- rhsToBody rhs rhsToBody :: CpeRhs -> UniqSM (Floats, CpeBody) -- Remove top level lambdas by let-binding -rhsToBody (Note n expr) - -- You can get things like - -- case e of { p -> coerce t (\s -> ...) } +rhsToBody (Tick t expr) + | not (tickishScoped t) -- we can only float out of non-scoped annotations = do { (floats, expr') <- rhsToBody expr - ; return (floats, Note n expr') } + ; return (floats, Tick t expr') } rhsToBody (Cast e co) + -- You can get things like + -- case e of { p -> coerce t (\s -> ...) } = do { (floats, e') <- rhsToBody e ; return (floats, Cast e' co) } @@ -662,8 +663,8 @@ cpeApp env expr ; (fun', hd, _, floats, ss) <- collect_args fun depth ; return (Cast fun' co, hd, ty2, floats, ss) } - collect_args (Note note fun) depth - | ignoreNote note -- Drop these notes altogether + collect_args (Tick tickish fun) depth + | ignoreTickish tickish -- Drop these notes altogether = collect_args fun depth -- They aren't used by the code generator -- N-variable fun, better let-bind it @@ -759,9 +760,9 @@ saturateDataToTag sat_expr ; return (Case arg arg_id1 (exprType app) [(DEFAULT, [], fun `App` Var arg_id1)]) } - eval_data2tag_arg (Note note app) -- Scc notes can appear + eval_data2tag_arg (Tick t app) -- Scc notes can appear = do { app' <- eval_data2tag_arg app - ; return (Note note app') } + ; return (Tick t app') } eval_data2tag_arg other -- Should not happen = pprPanic "eval_data2tag" (ppr other) @@ -784,18 +785,9 @@ of the scope of a `seq`, or dropped the `seq` altogether. %************************************************************************ \begin{code} - -- We don't ignore SCCs, since they require some code generation -ignoreNote :: Note -> Bool --- Tells which notes to drop altogether; they are ignored by code generation --- Do not ignore SCCs! --- It's important that we do drop InlineMe notes; for example --- unzip = __inline_me__ (/\ab. foldr (..) (..)) --- Here unzip gets arity 1 so we'll eta-expand it. But we don't --- want to get this: --- unzip = /\ab \xs. (__inline_me__ ...) a b xs -ignoreNote (CoreNote _) = True -ignoreNote _other = False - +-- we don't ignore any Tickishes at the moment. +ignoreTickish :: Tickish Id -> Bool +ignoreTickish _ = False cpe_ExprIsTrivial :: CoreExpr -> Bool -- Version that doesn't consider an scc annotation to be trivial. @@ -804,7 +796,7 @@ cpe_ExprIsTrivial (Type _) = True cpe_ExprIsTrivial (Coercion _) = True cpe_ExprIsTrivial (Lit _) = True cpe_ExprIsTrivial (App e arg) = isTypeArg arg && cpe_ExprIsTrivial e -cpe_ExprIsTrivial (Note n e) = notSccNote n && cpe_ExprIsTrivial e +cpe_ExprIsTrivial (Tick t e) = not (tickishIsCode t) && cpe_ExprIsTrivial e cpe_ExprIsTrivial (Cast e _) = cpe_ExprIsTrivial e cpe_ExprIsTrivial (Lam b body) | isTyVar b = cpe_ExprIsTrivial body cpe_ExprIsTrivial _ = False @@ -1056,8 +1048,8 @@ dropDeadCode (Case scrut bndr t alts) dropDeadCode (Cast e c) = (Cast e' c, fvs) where !(e', fvs) = dropDeadCode e -dropDeadCode (Note n e) - = (Note n e', fvs) +dropDeadCode (Tick t e) + = (Tick t e', fvs) where !(e', fvs) = dropDeadCode e dropDeadCode e = (e, emptyVarSet) -- Lit, Type, Coercion diff --git a/compiler/coreSyn/CoreSubst.lhs b/compiler/coreSyn/CoreSubst.lhs index 47658a03ee..c61e20e935 100644 --- a/compiler/coreSyn/CoreSubst.lhs +++ b/compiler/coreSyn/CoreSubst.lhs @@ -16,6 +16,7 @@ module CoreSubst ( substTy, substCo, substExpr, substExprSC, substBind, substBindSC, substUnfolding, substUnfoldingSC, substUnfoldingSource, lookupIdSubst, lookupTvSubst, lookupCvSubst, substIdOcc, + substTickish, -- ** Operations on substitutions emptySubst, mkEmptySubst, mkSubst, mkOpenSubst, substInScope, isEmptySubst, @@ -371,7 +372,7 @@ subst_expr subst expr go (Coercion co) = Coercion (substCo subst co) go (Lit lit) = Lit lit go (App fun arg) = App (go fun) (go arg) - go (Note note e) = Note (go_note note) (go e) + go (Tick tickish e) = Tick (substTickish subst tickish) (go e) go (Cast e co) = Cast (go e) (substCo subst co) -- Do not optimise even identity coercions -- Reason: substitution applies to the LHS of RULES, and @@ -395,8 +396,6 @@ subst_expr subst expr where (subst', bndrs') = substBndrs subst bndrs - go_note note = note - -- | Apply a substititon to an entire 'CoreBind', additionally returning an updated 'Subst' -- that should be used by subsequent substitutons. substBind, substBindSC :: Subst -> CoreBind -> (Subst, CoreBind) @@ -758,6 +757,28 @@ substVarSet subst fvs subst_fv subst fv | isId fv = exprFreeVars (lookupIdSubst (text "substVarSet") subst fv) | otherwise = Type.tyVarsOfType (lookupTvSubst subst fv) + +------------------ +substTickish :: Subst -> Tickish Id -> Tickish Id +substTickish subst (Breakpoint n ids) = Breakpoint n (map do_one ids) + where do_one = getIdFromTrivialExpr . lookupIdSubst (text "subst_tickish") subst +substTickish _subst other = other + +{- Note [substTickish] + +A Breakpoint contains a list of Ids. What happens if we ever want to +substitute an expression for one of these Ids? + +First, we ensure that we only ever substitute trivial expressions for +these Ids, by marking them as NoOccInfo in the occurrence analyser. +Then, when substituting for the Id, we unwrap any type applications +and abstractions to get back to an Id, with getIdFromTrivialExpr. + +Second, we have to ensure that we never try to substitute a literal +for an Id in a breakpoint. We ensure this by never storing an Id with +an unlifted type in a Breakpoint - see Coverage.mkTickish. +Breakpoints can't handle free variables with unlifted types anyway. +-} \end{code} Note [Worker inlining] @@ -902,7 +923,7 @@ simple_opt_expr' subst expr go (Type ty) = Type (substTy subst ty) go (Coercion co) = Coercion (optCoercion (getCvSubst subst) co) go (Lit lit) = Lit lit - go (Note note e) = Note note (go e) + go (Tick tickish e) = Tick (substTickish subst tickish) (go e) go (Cast e co) | isReflCo co' = go e | otherwise = Cast (go e) co' where @@ -1136,8 +1157,8 @@ exprIsConApp_maybe id_unf expr go :: Either InScopeSet Subst -> CoreExpr -> ConCont -> Maybe (DataCon, [Type], [CoreExpr]) - go subst (Note note expr) cont - | notSccNote note = go subst expr cont + go subst (Tick t expr) cont + | not (tickishIsCode t) = go subst expr cont go subst (Cast expr co1) (CC [] co2) = go subst expr (CC [] (subst_co subst co1 `mkTransCo` co2)) go subst (App fun arg) (CC args co) @@ -1276,7 +1297,7 @@ exprIsLiteral_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe Literal exprIsLiteral_maybe id_unf e = case e of Lit l -> Just l - Note _ e' -> exprIsLiteral_maybe id_unf e' + Tick _ e' -> exprIsLiteral_maybe id_unf e' -- dubious? Var v | Just rhs <- expandUnfolding_maybe (id_unf v) -> exprIsLiteral_maybe id_unf rhs _ -> Nothing diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs index 73e2c92f67..e5c14b4356 100644 --- a/compiler/coreSyn/CoreSyn.lhs +++ b/compiler/coreSyn/CoreSyn.lhs @@ -9,9 +9,9 @@ -- | CoreSyn holds all the main data types for use by for the Glasgow Haskell Compiler midsection module CoreSyn ( -- * Main data types - Expr(..), Alt, Bind(..), AltCon(..), Arg, Note(..), - CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr, - TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), + Expr(..), Alt, Bind(..), AltCon(..), Arg, Tickish(..), + CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr, + TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), -- ** 'Expr' construction mkLets, mkLams, @@ -35,9 +35,11 @@ module CoreSyn ( isValArg, isTypeArg, isTyCoArg, valArgCount, valBndrCount, isRuntimeArg, isRuntimeVar, - notSccNote, - -- * Unfolding data types + tickishCounts, tickishScoped, tickishIsCode, mkNoTick, mkNoScope, + tickishCanSplit, + + -- * Unfolding data types Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..), -- ** Constructing 'Unfolding's @@ -87,6 +89,7 @@ import Coercion import Name import Literal import DataCon +import Module import TyCon import BasicTypes import FastString @@ -253,7 +256,7 @@ data Expr b | Let (Bind b) (Expr b) | Case (Expr b) b Type [Alt b] -- See #case_invariant# | Cast (Expr b) Coercion - | Note Note (Expr b) + | Tick (Tickish Id) (Expr b) | Type Type | Coercion Coercion deriving (Data, Typeable) @@ -312,12 +315,85 @@ Note [Type let] See #type_let# \begin{code} +-- | Allows attaching extra information to points in expressions +data Tickish id = + -- | An @{-# SCC #-}@ profiling annotation, either automatically + -- added by the desugarer as a result of -auto-all, or added by + -- the user. + ProfNote { + profNoteCC :: CostCentre, -- ^ the cost centre + profNoteCount :: !Bool, -- ^ bump the entry count? + profNoteScope :: !Bool -- ^ scopes over the enclosed expression + -- (i.e. not just a tick) + } --- | Allows attaching extra information to points in expressions rather than e.g. identifiers. -data Note - = SCC CostCentre -- ^ A cost centre annotation for profiling - | CoreNote String -- ^ A generic core annotation, propagated but not used by GHC - deriving (Data, Typeable) + -- | A "tick" used by HPC to track the execution of each + -- subexpression in the original source code. + | HpcTick { + tickModule :: Module, + tickId :: !Int + } + + -- | A breakpoint for the GHCi debugger. This behaves like an HPC + -- tick, but has a list of free variables which will be available + -- for inspection in GHCi when the program stops at the breakpoint. + -- + -- NB. we must take account of these Ids when (a) counting free variables, + -- and (b) substituting (don't substitute for them) + | Breakpoint + { breakpointId :: !Int + , breakpointFVs :: [id] -- ^ the order of this list is important: + -- it matches the order of the lists in the + -- appropriate entry in HscTypes.ModBreaks. + -- + -- Careful about substitution! See + -- Note [substTickish] in CoreSubst. + } + + deriving (Eq, Ord, Data, Typeable) + + +-- | A "tick" note is one that counts evaluations in some way. We +-- cannot discard a tick, and the compiler should preserve the number +-- of ticks as far as possible. +-- +-- Hwever, we stil allow the simplifier to increase or decrease +-- sharing, so in practice the actual number of ticks may vary, except +-- that we never change the value from zero to non-zero or vice versa. +-- +tickishCounts :: Tickish id -> Bool +tickishCounts n@ProfNote{} = profNoteCount n +tickishCounts HpcTick{} = True +tickishCounts Breakpoint{} = True + +tickishScoped :: Tickish id -> Bool +tickishScoped n@ProfNote{} = profNoteScope n +tickishScoped HpcTick{} = False +tickishScoped Breakpoint{} = True + -- Breakpoints are scoped: eventually we're going to do call + -- stacks, but also this helps prevent the simplifier from moving + -- breakpoints around and changing their result type (see #1531). + +mkNoTick :: Tickish id -> Tickish id +mkNoTick n@ProfNote{} = n {profNoteCount = False} +mkNoTick Breakpoint{} = panic "mkNoTick: Breakpoint" -- cannot split a BP +mkNoTick t = t + +mkNoScope :: Tickish id -> Tickish id +mkNoScope n@ProfNote{} = n {profNoteScope = False} +mkNoScope Breakpoint{} = panic "mkNoScope: Breakpoint" -- cannot split a BP +mkNoScope t = t + +-- | Return True if this source annotation compiles to some code, or will +-- disappear before the backend. +tickishIsCode :: Tickish id -> Bool +tickishIsCode _tickish = True -- all of them for now + +-- | Return True if this Tick can be split into (tick,scope) parts with +-- 'mkNoScope' and 'mkNoTick' respectively. +tickishCanSplit :: Tickish Id -> Bool +tickishCanSplit Breakpoint{} = False +tickishCanSplit _ = True \end{code} @@ -1085,10 +1161,8 @@ collectArgs expr -- | Gets the cost centre enclosing an expression, if any. -- It looks inside lambdas because @(scc \"foo\" \\x.e) = \\x. scc \"foo\" e@ coreExprCc :: Expr b -> CostCentre -coreExprCc (Note (SCC cc) _) = cc -coreExprCc (Note _ e) = coreExprCc e -coreExprCc (Lam _ e) = coreExprCc e -coreExprCc _ = noCostCentre +coreExprCc (Tick (ProfNote { profNoteCC = cc}) _) = cc +coreExprCc _ = noCostCentre \end{code} %************************************************************************ @@ -1136,10 +1210,6 @@ valBndrCount = count isId -- | The number of argument expressions that are values rather than types at their top level valArgCount :: [Arg b] -> Int valArgCount = count isValArg - -notSccNote :: Note -> Bool -notSccNote (SCC {}) = False -notSccNote _ = True \end{code} @@ -1158,7 +1228,7 @@ seqExpr (Lam b e) = seqBndr b `seq` seqExpr e seqExpr (Let b e) = seqBind b `seq` seqExpr e seqExpr (Case e b t as) = seqExpr e `seq` seqBndr b `seq` seqType t `seq` seqAlts as seqExpr (Cast e co) = seqExpr e `seq` seqCo co -seqExpr (Note n e) = seqNote n `seq` seqExpr e +seqExpr (Tick n e) = seqTickish n `seq` seqExpr e seqExpr (Type t) = seqType t seqExpr (Coercion co) = seqCo co @@ -1166,9 +1236,10 @@ seqExprs :: [CoreExpr] -> () seqExprs [] = () seqExprs (e:es) = seqExpr e `seq` seqExprs es -seqNote :: Note -> () -seqNote (CoreNote s) = s `seq` () -seqNote _ = () +seqTickish :: Tickish Id -> () +seqTickish ProfNote{ profNoteCC = cc } = cc `seq` () +seqTickish HpcTick{} = () +seqTickish Breakpoint{ breakpointFVs = ids } = seqBndrs ids seqBndr :: CoreBndr -> () seqBndr b = b `seq` () @@ -1216,7 +1287,7 @@ data AnnExpr' bndr annot | AnnLet (AnnBind bndr annot) (AnnExpr bndr annot) | AnnCast (AnnExpr bndr annot) (annot, Coercion) -- Put an annotation on the (root of) the coercion - | AnnNote Note (AnnExpr bndr annot) + | AnnTick (Tickish Id) (AnnExpr bndr annot) | AnnType Type | AnnCoercion Coercion @@ -1245,14 +1316,14 @@ deAnnotate :: AnnExpr bndr annot -> Expr bndr deAnnotate (_, e) = deAnnotate' e deAnnotate' :: AnnExpr' bndr annot -> Expr bndr -deAnnotate' (AnnType t) = Type t +deAnnotate' (AnnType t) = Type t deAnnotate' (AnnCoercion co) = Coercion co deAnnotate' (AnnVar v) = Var v deAnnotate' (AnnLit lit) = Lit lit deAnnotate' (AnnLam binder body) = Lam binder (deAnnotate body) deAnnotate' (AnnApp fun arg) = App (deAnnotate fun) (deAnnotate arg) deAnnotate' (AnnCast e (_,co)) = Cast (deAnnotate e) co -deAnnotate' (AnnNote note body) = Note note (deAnnotate body) +deAnnotate' (AnnTick tick body) = Tick tick (deAnnotate body) deAnnotate' (AnnLet bind body) = Let (deAnnBind bind) (deAnnotate body) diff --git a/compiler/coreSyn/CoreTidy.lhs b/compiler/coreSyn/CoreTidy.lhs index 110fd72701..4004d3fa0e 100644 --- a/compiler/coreSyn/CoreTidy.lhs +++ b/compiler/coreSyn/CoreTidy.lhs @@ -59,7 +59,7 @@ tidyExpr env (Type ty) = Type (tidyType env ty) tidyExpr env (Coercion co) = Coercion (tidyCo env co) tidyExpr _ (Lit lit) = Lit lit tidyExpr env (App f a) = App (tidyExpr env f) (tidyExpr env a) -tidyExpr env (Note n e) = Note (tidyNote env n) (tidyExpr env e) +tidyExpr env (Tick t e) = Tick (tidyTickish env t) (tidyExpr env e) tidyExpr env (Cast e co) = Cast (tidyExpr env e) (tidyCo env co) tidyExpr env (Let b e) @@ -81,9 +81,10 @@ tidyAlt _case_bndr env (con, vs, rhs) = tidyBndrs env vs =: \ (env', vs) -> (con, vs, tidyExpr env' rhs) ------------- Notes -------------- -tidyNote :: TidyEnv -> Note -> Note -tidyNote _ note = note +------------ Tickish -------------- +tidyTickish :: TidyEnv -> Tickish Id -> Tickish Id +tidyTickish env (Breakpoint ix ids) = Breakpoint ix (map (tidyVarOcc env) ids) +tidyTickish _ other_tickish = other_tickish ------------ Rules -------------- tidyRules :: TidyEnv -> [CoreRule] -> [CoreRule] diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 71f815f478..5c9d1b2a4c 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -204,7 +204,7 @@ inlineBoringOk e go credit (App f (Type {})) = go credit f go credit (App f a) | credit > 0 , exprIsTrivial a = go (credit-1) f - go credit (Note _ e) = go credit e + go credit (Tick _ e) = go credit e -- dubious go credit (Cast e _) = go credit e go _ (Var {}) = boringCxtOk go _ _ = boringCxtNotOk @@ -348,7 +348,7 @@ sizeExpr bOMB_OUT_SIZE top_args expr = size_up expr where size_up (Cast e _) = size_up e - size_up (Note _ e) = size_up e + size_up (Tick _ e) = size_up e size_up (Type _) = sizeZero -- Types cost nothing size_up (Coercion _) = sizeZero size_up (Lit lit) = sizeN (litSize lit) @@ -1187,7 +1187,7 @@ interestingArg e = go e 0 go (App fn (Type _)) n = go fn n go (App fn (Coercion _)) n = go fn n go (App fn _) n = go fn (n+1) - go (Note _ a) n = go a n + go (Tick _ a) n = go a n go (Cast e _) n = go e n go (Lam v e) n | isTyVar v = go e n diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs index 82c0484bdf..d8c95e2f5a 100644 --- a/compiler/coreSyn/CoreUtils.lhs +++ b/compiler/coreSyn/CoreUtils.lhs @@ -9,16 +9,16 @@ Utility functions on @Core@ syntax -- | Commonly useful utilites for manipulating the Core language module CoreUtils ( -- * Constructing expressions - mkSCC, mkCoerce, - bindNonRec, needsCaseBinding, - mkAltExpr, mkPiType, mkPiTypes, + mkTick, mkTickNoHNF, mkCoerce, + bindNonRec, needsCaseBinding, + mkAltExpr, mkPiType, mkPiTypes, -- * Taking expressions apart findDefault, findAlt, isDefaultAlt, mergeAlts, trimConArgs, -- * Properties of expressions - exprType, coreAltType, coreAltsType, - exprIsDupable, exprIsTrivial, exprIsBottom, + exprType, coreAltType, coreAltsType, + exprIsDupable, exprIsTrivial, getIdFromTrivialExpr, exprIsBottom, exprIsCheap, exprIsExpandable, exprIsCheap', CheapAppFun, exprIsHNF, exprOkForSpeculation, exprIsBig, exprIsConLike, rhsIsStatic, isCheapApp, isExpandableApp, @@ -58,7 +58,6 @@ import IdInfo import Type import Coercion import TyCon -import CostCentre import Unique import Outputable import TysPrim @@ -88,7 +87,7 @@ exprType (Coercion co) = coercionType co exprType (Let _ body) = exprType body exprType (Case _ _ ty _) = ty exprType (Cast _ co) = pSnd (coercionKind co) -exprType (Note _ e) = exprType e +exprType (Tick _ e) = exprType e exprType (Lam binder expr) = mkPiType binder (exprType expr) exprType e@(App _ _) = case collectArgs e of @@ -208,20 +207,62 @@ mkCoerce co expr \end{code} \begin{code} --- | Wraps the given expression in the cost centre unless --- in a way that maximises their utility to the user -mkSCC :: CostCentre -> Expr b -> Expr b - -- Note: Nested SCC's *are* preserved for the benefit of - -- cost centre stack profiling -mkSCC _ (Lit lit) = Lit lit -mkSCC cc (Lam x e) = Lam x (mkSCC cc e) -- Move _scc_ inside lambda -mkSCC cc (Note (SCC cc') e) = Note (SCC cc) (Note (SCC cc') e) -mkSCC cc (Note n e) = Note n (mkSCC cc e) -- Move _scc_ inside notes -mkSCC cc (Cast e co) = Cast (mkSCC cc e) co -- Move _scc_ inside cast -mkSCC cc expr = Note (SCC cc) expr +-- | Wraps the given expression in the source annotation, dropping the +-- annotation if possible. +mkTick :: Tickish Id -> CoreExpr -> CoreExpr + +mkTick t (Cast e co) + = Cast (mkTick t e) co -- Move tick inside cast + +mkTick _ (Lit l) = Lit l + +mkTick t expr@(App f arg) + | not (isRuntimeArg arg) = App (mkTick t f) arg + | isSaturatedConApp expr + = if not (tickishCounts t) + then tickHNFArgs t expr + else if tickishScoped t && tickishCanSplit t + then Tick (mkNoScope t) (tickHNFArgs (mkNoTick t) expr) + else Tick t expr + +mkTick t (Lam x e) + -- if this is a type lambda, or the tick does not count entries, + -- then we can push the tick inside: + | not (isRuntimeVar x) || not (tickishCounts t) = Lam x (mkTick t e) + -- if it is both counting and scoped, we split the tick into its + -- two components, keep the counting tick on the outside of the lambda + -- and push the scoped tick inside. The point of this is that the + -- counting tick can probably be floated, and the lambda may then be + -- in a position to be beta-reduced. + | tickishScoped t && tickishCanSplit t + = Tick (mkNoScope t) (Lam x (mkTick (mkNoTick t) e)) + -- just a counting tick: leave it on the outside + | otherwise = Tick t (Lam x e) + +mkTick t other = Tick t other + +isSaturatedConApp :: CoreExpr -> Bool +isSaturatedConApp e = go e [] + where go (App f a) as = go f (a:as) + go (Var fun) args + = isConLikeId fun && idArity fun == valArgCount args + go (Cast f _) as = go f as + go _ _ = False + +mkTickNoHNF :: Tickish Id -> CoreExpr -> CoreExpr +mkTickNoHNF t e + | exprIsHNF e = tickHNFArgs t e + | otherwise = mkTick t e + +-- push a tick into the arguments of a HNF (call or constructor app) +tickHNFArgs :: Tickish Id -> CoreExpr -> CoreExpr +tickHNFArgs t e = push t e + where + push t (App f (Type u)) = App (push t f) (Type u) + push t (App f arg) = App (push t f) (mkTick t arg) + push _t e = e \end{code} - %************************************************************************ %* * \subsection{Other expression construction} @@ -394,14 +435,11 @@ completely un-applied primops and foreign-call Ids are sufficiently rare that I plan to allow them to be duplicated and put up with saturating them. -Note [SCCs are trivial] -~~~~~~~~~~~~~~~~~~~~~~~ -We used not to treat (_scc_ "foo" x) as trivial, because it really -generates code, (and a heap object when it's a function arg) to -capture the cost centre. However, the profiling system discounts the -allocation costs for such "boxing thunks" whereas the extra costs of -*not* inlining otherwise-trivial bindings can be high, and are hard to -discount. +Note [Tick trivial] +~~~~~~~~~~~~~~~~~~~ +Ticks are not trivial. If we treat "tick<n> x" as trivial, it will be +inlined inside lambdas and the entry count will be skewed, for +example. Furthermore "scc<n> x" will turn into just "x" in mkTick. \begin{code} exprIsTrivial :: CoreExpr -> Bool @@ -410,12 +448,27 @@ exprIsTrivial (Type _) = True exprIsTrivial (Coercion _) = True exprIsTrivial (Lit lit) = litIsTrivial lit exprIsTrivial (App e arg) = not (isRuntimeArg arg) && exprIsTrivial e -exprIsTrivial (Note _ e) = exprIsTrivial e -- See Note [SCCs are trivial] +exprIsTrivial (Tick _ _) = False -- See Note [Tick trivial] exprIsTrivial (Cast e _) = exprIsTrivial e exprIsTrivial (Lam b body) = not (isRuntimeVar b) && exprIsTrivial body exprIsTrivial _ = False \end{code} +When substituting in a breakpoint we need to strip away the type cruft +from a trivial expression and get back to the Id. The invariant is +that the expression we're substituting was originally trivial +according to exprIsTrivial. + +\begin{code} +getIdFromTrivialExpr :: CoreExpr -> Id +getIdFromTrivialExpr e = go e + where go (Var v) = v + go (App f t) | not (isRuntimeArg t) = go f + go (Cast e _) = go e + go (Lam b e) | not (isRuntimeVar b) = go e + go e = pprPanic "getIdFromTrivialExpr" (ppr e) +\end{code} + exprIsBottom is a very cheap and cheerful function; it may return False for bottoming expressions, but it never costs much to ask. See also CoreArity.exprBotStrictness_maybe, but that's a bit more @@ -429,7 +482,7 @@ exprIsBottom e go n (Var v) = isBottomingId v && n >= idArity v go n (App e a) | isTypeArg a = go n e | otherwise = go (n+1) e - go n (Note _ e) = go n e + go n (Tick _ e) = go n e go n (Cast e _) = go n e go n (Let _ e) = go n e go _ _ = False @@ -464,7 +517,7 @@ exprIsDupable e go n (Type {}) = Just n go n (Coercion {}) = Just n go n (Var {}) = decrement n - go n (Note _ e) = go n e + go n (Tick _ e) = go n e go n (Cast e _) = go n e go n (App f a) | Just n' <- go n a = go n' f go n (Lit lit) | litIsDupable lit = decrement n @@ -537,7 +590,6 @@ exprIsCheap' _ (Lit _) = True exprIsCheap' _ (Type _) = True exprIsCheap' _ (Coercion _) = True exprIsCheap' _ (Var _) = True -exprIsCheap' good_app (Note _ e) = exprIsCheap' good_app e exprIsCheap' good_app (Cast e _) = exprIsCheap' good_app e exprIsCheap' good_app (Lam x e) = isRuntimeVar x || exprIsCheap' good_app e @@ -549,6 +601,12 @@ exprIsCheap' good_app (Case e _ _ alts) = exprIsCheap' good_app e && -- This improves arities of overloaded functions where -- there is only dictionary selection (no construction) involved +exprIsCheap' good_app (Tick t e) + | tickishCounts t = False + | otherwise = exprIsCheap' good_app e + -- never duplicate ticks. If we get this wrong, then HPC's entry + -- counts will be off (check test in libraries/hpc/tests/raytrace) + exprIsCheap' good_app (Let (NonRec x _) e) | isUnLiftedType (idType x) = exprIsCheap' good_app e | otherwise = False @@ -689,13 +747,18 @@ exprOkForSpeculation (Type _) = True exprOkForSpeculation (Coercion _) = True exprOkForSpeculation (Var v) - | isTickBoxOp v = False -- Tick boxes are *not* suitable for speculation - | otherwise = isUnLiftedType (idType v) -- c.f. the Var case of exprIsHNF - || isDataConWorkId v -- Nullary constructors - || idArity v > 0 -- Functions - || isEvaldUnfolding (idUnfolding v) -- Let-bound values + = isUnLiftedType (idType v) -- c.f. the Var case of exprIsHNF + || isDataConWorkId v -- Nullary constructors + || idArity v > 0 -- Functions + || isEvaldUnfolding (idUnfolding v) -- Let-bound values + +-- Tick annotations that *tick* cannot be speculated, because these +-- are meant to identify whether or not (and how often) the particular +-- source expression was evaluated at runtime. +exprOkForSpeculation (Tick tickish e) + | tickishCounts tickish = False + | otherwise = exprOkForSpeculation e -exprOkForSpeculation (Note _ e) = exprOkForSpeculation e exprOkForSpeculation (Cast e _) = exprOkForSpeculation e exprOkForSpeculation (Case e _ _ alts) @@ -908,7 +971,9 @@ exprIsHNFlike is_con is_con_unf = is_hnf_like -- we don't mind copying them is_hnf_like (Coercion _) = True -- Same for coercions is_hnf_like (Lam b e) = isRuntimeVar b || is_hnf_like e - is_hnf_like (Note _ e) = is_hnf_like e + is_hnf_like (Tick tickish e) = not (tickishCounts tickish) + && is_hnf_like e + -- See Note [exprIsHNF Tick] is_hnf_like (Cast e _) = is_hnf_like e is_hnf_like (App e (Type _)) = is_hnf_like e is_hnf_like (App e (Coercion _)) = is_hnf_like e @@ -921,10 +986,25 @@ exprIsHNFlike is_con is_con_unf = is_hnf_like app_is_value (Var fun) args = idArity fun > valArgCount args -- Under-applied function || is_con fun -- or constructor-like - app_is_value (Note _ f) as = app_is_value f as + app_is_value (Tick _ f) as = app_is_value f as app_is_value (Cast f _) as = app_is_value f as app_is_value (App f a) as = app_is_value f (a:as) app_is_value _ _ = False + +{- +Note [exprIsHNF Tick] + +We can discard source annotations on HNFs as long as they aren't +tick-like: + + scc c (\x . e) => \x . e + scc c (C x1..xn) => C x1..xn + +So we regard these as HNFs. Tick annotations that tick are not +regarded as HNF if the expression they surround is HNF, because the +tick is there to tell us that the expression was evaluated, so we +don't want to discard a seq on it. +-} \end{code} @@ -1084,7 +1164,7 @@ eqExprX id_unfolding_fun env e1 e2 go env (Coercion co1) (Coercion co2) = coreEqCoercion2 env co1 co2 go env (Cast e1 co1) (Cast e2 co2) = coreEqCoercion2 env co1 co2 && go env e1 e2 go env (App f1 a1) (App f2 a2) = go env f1 f2 && go env a1 a2 - go env (Note n1 e1) (Note n2 e2) = go_note n1 n2 && go env e1 e2 + go env (Tick n1 e1) (Tick n2 e2) = go_tickish n1 n2 && go env e1 e2 go env (Lam b1 e1) (Lam b2 e2) = eqTypeX env (varType b1) (varType b2) -- False for Id/TyVar combination @@ -1113,9 +1193,9 @@ eqExprX id_unfolding_fun env e1 e2 = c1 == c2 && go (rnBndrs2 env bs1 bs2) e1 e2 ----------- - go_note (SCC cc1) (SCC cc2) = cc1 == cc2 - go_note (CoreNote s1) (CoreNote s2) = s1 == s2 - go_note _ _ = False + go_tickish (Breakpoint lid lids) (Breakpoint rid rids) + = lid == rid && map (rnOccL env) lids == map (rnOccR env) rids + go_tickish l r = l == r \end{code} Auxiliary functions @@ -1148,13 +1228,13 @@ exprSize (Lam b e) = varSize b + exprSize e exprSize (Let b e) = bindSize b + exprSize e exprSize (Case e b t as) = seqType t `seq` exprSize e + varSize b + 1 + foldr ((+) . altSize) 0 as exprSize (Cast e co) = (seqCo co `seq` 1) + exprSize e -exprSize (Note n e) = noteSize n + exprSize e -exprSize (Type t) = seqType t `seq` 1 +exprSize (Tick n e) = tickSize n + exprSize e +exprSize (Type t) = seqType t `seq` 1 exprSize (Coercion co) = seqCo co `seq` 1 -noteSize :: Note -> Int -noteSize (SCC cc) = cc `seq` 1 -noteSize (CoreNote s) = s `seq` 1 -- hdaume: core annotations +tickSize :: Tickish Id -> Int +tickSize (ProfNote cc _ _) = cc `seq` 1 +tickSize _ = 1 -- the rest are strict varSize :: Var -> Int varSize b | isTyVar b = 1 @@ -1214,7 +1294,7 @@ exprStats (Lam b e) = bndrStats b `plusCS` exprStats e exprStats (Let b e) = bindStats b `plusCS` exprStats e exprStats (Case e b _ as) = exprStats e `plusCS` bndrStats b `plusCS` sumCS altStats as exprStats (Cast e co) = coStats co `plusCS` exprStats e -exprStats (Note _ e) = exprStats e +exprStats (Tick _ e) = exprStats e altStats :: CoreAlt -> CoreStats altStats (_, bs, r) = sumCS bndrStats bs `plusCS` exprStats r @@ -1253,7 +1333,7 @@ type HashEnv = (Int, VarEnv Int) -- Hash code for bound variables hash_expr :: HashEnv -> CoreExpr -> Word32 -- Word32, because we're expecting overflows here, and overflowing -- signed types just isn't cool. In C it's even undefined. -hash_expr env (Note _ e) = hash_expr env e +hash_expr env (Tick _ e) = hash_expr env e hash_expr env (Cast e _) = hash_expr env e hash_expr env (Var v) = hashVar env v hash_expr _ (Lit lit) = fromIntegral (hashLiteral lit) @@ -1274,7 +1354,7 @@ fast_hash_expr env (Type t) = fast_hash_type env t fast_hash_expr env (Coercion co) = fast_hash_co env co fast_hash_expr _ (Lit lit) = fromIntegral (hashLiteral lit) fast_hash_expr env (Cast e _) = fast_hash_expr env e -fast_hash_expr env (Note _ e) = fast_hash_expr env e +fast_hash_expr env (Tick _ e) = fast_hash_expr env e fast_hash_expr env (App _ a) = fast_hash_expr env a -- A bit idiosyncratic ('a' not 'f')! fast_hash_expr _ _ = 1 @@ -1530,7 +1610,8 @@ rhsIsStatic _is_dynamic_name rhs = is_static False rhs -> CoreExpr -> Bool is_static False (Lam b e) = isRuntimeVar b || is_static False e - is_static in_arg (Note n e) = notSccNote n && is_static in_arg e + is_static in_arg (Tick n e) = not (tickishIsCode n) + && is_static in_arg e is_static in_arg (Cast e _) = is_static in_arg e is_static _ (Coercion {}) = True -- Behaves just like a literal is_static _ (Lit (LitInteger {})) = False @@ -1571,7 +1652,7 @@ rhsIsStatic _is_dynamic_name rhs = is_static False rhs -- x = D# (1.0## /## 2.0##) -- can't float because /## can fail. - go (Note n f) n_val_args = notSccNote n && go f n_val_args + go (Tick n f) n_val_args = not (tickishIsCode n) && go f n_val_args go (Cast e _) n_val_args = go e n_val_args go _ _ = False diff --git a/compiler/coreSyn/ExternalCore.lhs b/compiler/coreSyn/ExternalCore.lhs index e407f73a37..51342beb7f 100644 --- a/compiler/coreSyn/ExternalCore.lhs +++ b/compiler/coreSyn/ExternalCore.lhs @@ -33,7 +33,7 @@ data Exp | Let Vdefg Exp | Case Exp Vbind Ty [Alt] {- non-empty list -} | Cast Exp Ty - | Note String Exp + | Tick String Exp {- XXX probably wrong -} | External String String Ty {- target name, convention, and type -} | DynExternal String Ty {- convention and type (incl. Addr# of target as first arg) -} | Label String diff --git a/compiler/coreSyn/MkExternalCore.lhs b/compiler/coreSyn/MkExternalCore.lhs index 940e058e3d..c185e49bfd 100644 --- a/compiler/coreSyn/MkExternalCore.lhs +++ b/compiler/coreSyn/MkExternalCore.lhs @@ -160,8 +160,7 @@ make_exp (Case e v ty alts) = do scrut <- make_exp e newAlts <- mapM make_alt alts return $ C.Case scrut (make_vbind v) (make_ty ty) newAlts -make_exp (Note (SCC _) e) = make_exp e >>= (return . C.Note "SCC") -- temporary -make_exp (Note (CoreNote s) e) = make_exp e >>= (return . C.Note s) -- hdaume: core annotations +make_exp (Tick _ e) = make_exp e >>= (return . C.Tick "SCC") -- temporary make_exp _ = error "MkExternalCore died: make_exp" make_alt :: CoreAlt -> CoreM C.Alt diff --git a/compiler/coreSyn/PprCore.lhs b/compiler/coreSyn/PprCore.lhs index a26578097c..877814037d 100644 --- a/compiler/coreSyn/PprCore.lhs +++ b/compiler/coreSyn/PprCore.lhs @@ -13,7 +13,6 @@ module PprCore ( ) where import CoreSyn -import CostCentre import Literal( pprLiteral ) import Var import Id @@ -225,13 +224,8 @@ ppr_expr add_par (Let bind expr) Rec _ -> (sLit "letrec {") NonRec _ _ -> (sLit "let {") -ppr_expr add_par (Note (SCC cc) expr) - = add_par (sep [pprCostCentreCore cc, pprCoreExpr expr]) - -ppr_expr add_par (Note (CoreNote s) expr) - = add_par $ - sep [sep [ptext (sLit "__core_note"), pprHsString (mkFastString s)], - pprParendExpr expr] +ppr_expr add_par (Tick tickish expr) + = add_par (sep [ppr tickish, pprCoreExpr expr]) pprCoreAlt :: OutputableBndr a => (AltCon, [a] , Expr a) -> SDoc pprCoreAlt (con, args, rhs) @@ -467,6 +461,31 @@ pprRule (Rule { ru_name = name, ru_act = act, ru_fn = fn, \end{code} ----------------------------------------------------- +-- Tickish +----------------------------------------------------- + +\begin{code} +instance Outputable id => Outputable (Tickish id) where + ppr (HpcTick modl ix) = + hcat [ptext (sLit "tick<"), + ppr modl, comma, + ppr ix, + ptext (sLit ">")] + ppr (Breakpoint ix vars) = + hcat [ptext (sLit "break<"), + ppr ix, + ptext (sLit ">"), + parens (hcat (punctuate comma (map ppr vars)))] + ppr (ProfNote { profNoteCC = cc, + profNoteCount = tick, + profNoteScope = scope }) = + case (tick,scope) of + (True,True) -> hcat [ptext (sLit "scctick<"), ppr cc, char '>'] + (True,False) -> hcat [ptext (sLit "tick<"), ppr cc, char '>'] + _ -> hcat [ptext (sLit "scc<"), ppr cc, char '>'] +\end{code} + +----------------------------------------------------- -- Vectorisation declarations ----------------------------------------------------- diff --git a/compiler/coreSyn/PprExternalCore.lhs b/compiler/coreSyn/PprExternalCore.lhs index 5564538fc4..d6b8b53dfd 100644 --- a/compiler/coreSyn/PprExternalCore.lhs +++ b/compiler/coreSyn/PprExternalCore.lhs @@ -161,7 +161,7 @@ pexp (Case e vb ty alts) = sep [text "%case" <+> paty ty <+> paexp e, text "%of" <+> pvbind vb] $$ (indent (braces (vcat (punctuate (char ';') (map palt alts))))) pexp (Cast e co) = (text "%cast" <+> parens (pexp e)) $$ paty co -pexp (Note s e) = (text "%note" <+> pstring s) $$ pexp e +pexp (Tick s e) = (text "%source" <+> pstring s) $$ pexp e pexp (External n cc t) = (text "%external" <+> text cc <+> pstring n) $$ paty t pexp (DynExternal cc t) = (text "%dynexternal" <+> text cc) $$ paty t pexp (Label n) = (text "%label" <+> pstring n) diff --git a/compiler/coreSyn/TrieMap.lhs b/compiler/coreSyn/TrieMap.lhs index 120b67654f..831baeade1 100644 --- a/compiler/coreSyn/TrieMap.lhs +++ b/compiler/coreSyn/TrieMap.lhs @@ -21,7 +21,6 @@ import Name import Type import TypeRep import Var -import CostCentre import UniqFM import Unique( Unique ) @@ -239,7 +238,7 @@ data CoreMap a , cm_co :: CoercionMap a , cm_type :: TypeMap a , cm_cast :: CoreMap (CoercionMap a) - , cm_scc :: CoreMap (CostCentreMap a) + , cm_source :: CoreMap (TickishMap a) , cm_app :: CoreMap (CoreMap a) , cm_lam :: CoreMap (TypeMap a) , cm_letn :: CoreMap (CoreMap (BndrMap a)) @@ -255,7 +254,7 @@ wrapEmptyCM = CM { cm_var = emptyTM, cm_lit = emptyLiteralMap , cm_cast = emptyTM, cm_app = emptyTM , cm_lam = emptyTM, cm_letn = emptyTM , cm_letr = emptyTM, cm_case = emptyTM - , cm_scc = emptyTM } + , cm_source = emptyTM } instance TrieMap CoreMap where type Key CoreMap = CoreExpr @@ -289,7 +288,7 @@ fdE k m . foldTM k (cm_co m) . foldTM k (cm_type m) . foldTM (foldTM k) (cm_cast m) - . foldTM (foldTM k) (cm_scc m) + . foldTM (foldTM k) (cm_source m) . foldTM (foldTM k) (cm_app m) . foldTM (foldTM k) (cm_lam m) . foldTM (foldTM (foldTM k)) (cm_letn m) @@ -307,8 +306,7 @@ lkE env expr cm go (Type t) = cm_type >.> lkT env t go (Coercion c) = cm_co >.> lkC env c go (Cast e c) = cm_cast >.> lkE env e >=> lkC env c - go (Note (SCC cc) e) = cm_scc >.> lkE env e >=> lkCC cc - go (Note _ e) = lkE env e + go (Tick tickish e) = cm_source >.> lkE env e >=> lkTickish tickish go (App e1 e2) = cm_app >.> lkE env e2 >=> lkE env e1 go (Lam v e) = cm_lam >.> lkE (extendCME env v) e >=> lkBndr env v go (Let (NonRec b r) e) = cm_letn >.> lkE env r @@ -329,8 +327,7 @@ xtE env (Coercion c) f m = m { cm_co = cm_co m |> xtC env c f } xtE _ (Lit l) f m = m { cm_lit = cm_lit m |> xtLit l f } xtE env (Cast e c) f m = m { cm_cast = cm_cast m |> xtE env e |>> xtC env c f } -xtE env (Note (SCC cc) e) f m = m { cm_scc = cm_scc m |> xtE env e |>> xtCC cc f } -xtE env (Note _ e) f m = xtE env e f m +xtE env (Tick t e) f m = m { cm_source = cm_source m |> xtE env e |>> xtTickish t f } xtE env (App e1 e2) f m = m { cm_app = cm_app m |> xtE env e2 |>> xtE env e1 f } xtE env (Lam v e) f m = m { cm_lam = cm_lam m |> xtE (extendCME env v) e |>> xtBndr env v f } @@ -347,12 +344,12 @@ xtE env (Case e b _ as) f m = m { cm_case = cm_case m |> xtE env e |>> let env1 = extendCME env b in xtList (xtA env1) as f } -type CostCentreMap a = Map.Map CostCentre a -lkCC :: CostCentre -> CostCentreMap a -> Maybe a -lkCC = lookupTM +type TickishMap a = Map.Map (Tickish Id) a +lkTickish :: Tickish Id -> TickishMap a -> Maybe a +lkTickish = lookupTM -xtCC :: CostCentre -> XT a -> CostCentreMap a -> CostCentreMap a -xtCC = alterTM +xtTickish :: Tickish Id -> XT a -> TickishMap a -> TickishMap a +xtTickish = alterTM ------------------------ data AltMap a -- A single alternative @@ -600,4 +597,4 @@ lkFreeVar var env = lookupVarEnv env var xtFreeVar :: Var -> XT a -> VarEnv a -> VarEnv a xtFreeVar v f m = alterVarEnv f m v -\end{code}
\ No newline at end of file +\end{code} diff --git a/compiler/deSugar/Coverage.lhs b/compiler/deSugar/Coverage.lhs index abb8948de6..117e1deb3b 100644 --- a/compiler/deSugar/Coverage.lhs +++ b/compiler/deSugar/Coverage.lhs @@ -2,11 +2,10 @@ % (c) Galois, 2006 % (c) University of Glasgow, 2007 % -\section[Coverage]{@coverage@: the main function} - \begin{code} -module Coverage (addCoverageTicksToBinds, hpcInitCode) where +module Coverage (addTicksToBinds, hpcInitCode) where +import Type import HsSyn import Module import Outputable @@ -14,8 +13,11 @@ import DynFlags import Control.Monad import SrcLoc import ErrUtils +import NameSet hiding (FreeVars) import Name import Bag +import CostCentre +import CoreSyn import Id import VarSet import Data.List @@ -24,6 +26,7 @@ import HscTypes import Platform import StaticFlags import TyCon +import BasicTypes import MonadUtils import Maybes import CLabel @@ -44,174 +47,306 @@ import qualified Data.Map as Map %************************************************************************ %* * -%* The main function: addCoverageTicksToBinds +%* The main function: addTicksToBinds %* * %************************************************************************ \begin{code} -addCoverageTicksToBinds +addTicksToBinds :: DynFlags -> Module - -> ModLocation -- of the current module - -> [TyCon] -- type constructor in this module + -> ModLocation -- ... off the current module + -> NameSet -- Exported Ids. When we call addTicksToBinds, + -- isExportedId doesn't work yet (the desugarer + -- hasn't set it), so we have to work from this set. + -> [TyCon] -- Type constructor in this module -> LHsBinds Id -> IO (LHsBinds Id, HpcInfo, ModBreaks) -addCoverageTicksToBinds dflags mod mod_loc tyCons binds = - case ml_hs_file mod_loc of - Nothing -> return (binds, emptyHpcInfo False, emptyModBreaks) - Just orig_file -> do - - if "boot" `isSuffixOf` orig_file then return (binds, emptyHpcInfo False, emptyModBreaks) else do - - -- Now, we try look for a file generated from a .hsc file to a .hs file, by peeking ahead. +addTicksToBinds dflags mod mod_loc exports tyCons binds = - let top_pos = catMaybes $ foldrBag (\ (L pos _) rest -> srcSpanFileName_maybe pos : rest) [] binds - let orig_file2 = case top_pos of - (file_name:_) - | ".hsc" `isSuffixOf` unpackFS file_name -> unpackFS file_name - _ -> orig_file + case ml_hs_file mod_loc of + Nothing -> return (binds, emptyHpcInfo False, emptyModBreaks) + Just orig_file -> do - let mod_name = moduleNameString (moduleName mod) + if "boot" `isSuffixOf` orig_file + then return (binds, emptyHpcInfo False, emptyModBreaks) + else do + + let orig_file2 = guessSourceFile binds orig_file - let (binds1,_,st) + (binds1,_,st) = unTM (addTickLHsBinds binds) (TTE - { fileName = mkFastString orig_file2 + { fileName = mkFastString orig_file2 , declPath = [] + , dflags = dflags + , exports = exports , inScope = emptyVarSet - , blackList = Map.fromList [ (getSrcSpan (tyConName tyCon),()) - | tyCon <- tyCons ] - }) + , blackList = Map.fromList + [ (getSrcSpan (tyConName tyCon),()) + | tyCon <- tyCons ] + , density = mkDensity dflags + , this_mod = mod + }) (TT { tickBoxCount = 0 , mixEntries = [] }) - let entries = reverse $ mixEntries st - - -- write the mix entries for this module - hashNo <- if opt_Hpc then do - let hpc_dir = hpcDir dflags - - let hpc_mod_dir = if modulePackageId mod == mainPackageId - then hpc_dir - else hpc_dir ++ "/" ++ packageIdString (modulePackageId mod) - - let tabStop = 1 -- <tab> counts as a normal char in GHC's location ranges. - createDirectoryIfMissing True hpc_mod_dir - modTime <- getModificationTime orig_file2 - let entries' = [ (hpcPos, box) - | (span,_,_,box) <- entries, hpcPos <- [mkHpcPos span] ] - when (length entries' /= tickBoxCount st) $ do - panic "the number of .mix entries are inconsistent" - let hashNo = mixHash orig_file2 modTime tabStop entries' - mixCreate hpc_mod_dir mod_name - $ Mix orig_file2 modTime (toHash hashNo) tabStop entries' - return $ hashNo - else do - return $ 0 - - -- Todo: use proper src span type - breakArray <- newBreakArray $ length entries + let entries = reverse $ mixEntries st - let locsTicks = listArray (0,tickBoxCount st-1) - [ span | (span,_,_,_) <- entries ] - varsTicks = listArray (0,tickBoxCount st-1) - [ vars | (_,_,vars,_) <- entries ] - declsTicks= listArray (0,tickBoxCount st-1) - [ decls | (_,decls,_,_) <- entries ] - modBreaks = emptyModBreaks - { modBreaks_flags = breakArray - , modBreaks_locs = locsTicks - , modBreaks_vars = varsTicks - , modBreaks_decls = declsTicks - } - - doIfSet_dyn dflags Opt_D_dump_hpc $ do - printDump (pprLHsBinds binds1) - - return (binds1, HpcInfo (tickBoxCount st) hashNo, modBreaks) -\end{code} + let count = tickBoxCount st + hashNo <- writeMixEntries dflags mod count entries orig_file2 + modBreaks <- mkModBreaks count entries + doIfSet_dyn dflags Opt_D_dump_ticked $ printDump (pprLHsBinds binds1) + + return (binds1, HpcInfo count hashNo, modBreaks) -\begin{code} -liftL :: (Monad m) => (a -> m a) -> Located a -> m (Located a) -liftL f (L loc a) = do - a' <- f a - return $ L loc a' + +guessSourceFile :: LHsBinds Id -> FilePath -> FilePath +guessSourceFile binds orig_file = + -- Try look for a file generated from a .hsc file to a + -- .hs file, by peeking ahead. + let top_pos = catMaybes $ foldrBag (\ (L pos _) rest -> + srcSpanFileName_maybe pos : rest) [] binds + in + case top_pos of + (file_name:_) | ".hsc" `isSuffixOf` unpackFS file_name + -> unpackFS file_name + _ -> orig_file + + +mkModBreaks :: Int -> [MixEntry_] -> IO ModBreaks +mkModBreaks count entries = do + breakArray <- newBreakArray $ length entries + let + locsTicks = listArray (0,count-1) [ span | (span,_,_,_) <- entries ] + varsTicks = listArray (0,count-1) [ vars | (_,_,vars,_) <- entries ] + declsTicks= listArray (0,count-1) [ decls | (_,decls,_,_) <- entries ] + modBreaks = emptyModBreaks + { modBreaks_flags = breakArray + , modBreaks_locs = locsTicks + , modBreaks_vars = varsTicks + , modBreaks_decls = declsTicks + } + -- + return modBreaks + + +writeMixEntries :: DynFlags -> Module -> Int -> [MixEntry_] -> FilePath -> IO Int +writeMixEntries dflags mod count entries filename + | not opt_Hpc = return 0 + | otherwise = do + let + hpc_dir = hpcDir dflags + mod_name = moduleNameString (moduleName mod) + + hpc_mod_dir + | modulePackageId mod == mainPackageId = hpc_dir + | otherwise = hpc_dir ++ "/" ++ packageIdString (modulePackageId mod) + + tabStop = 8 -- <tab> counts as a normal char in GHC's location ranges. + + createDirectoryIfMissing True hpc_mod_dir + modTime <- getModificationTime filename + let entries' = [ (hpcPos, box) + | (span,_,_,box) <- entries, hpcPos <- [mkHpcPos span] ] + when (length entries' /= count) $ do + panic "the number of .mix entries are inconsistent" + let hashNo = mixHash filename modTime tabStop entries' + mixCreate hpc_mod_dir mod_name + $ Mix filename modTime (toHash hashNo) tabStop entries' + return hashNo + + +-- ----------------------------------------------------------------------------- +-- TickDensity: where to insert ticks + +data TickDensity + = TickForCoverage -- for Hpc + | TickForBreakPoints -- for GHCi + | TickAllFunctions -- for -prof-auto-all + | TickTopFunctions -- for -prof-auto-top + | TickExportedFunctions -- for -prof-auto-exported + -- maybe also: + -- | TickCallSites -- for stack tracing + deriving Eq + +mkDensity :: DynFlags -> TickDensity +mkDensity dflags + | opt_Hpc = TickForCoverage + | HscInterpreted <- hscTarget dflags = TickForBreakPoints + | ProfAutoAll <- profAuto dflags = TickAllFunctions + | ProfAutoTop <- profAuto dflags = TickTopFunctions + | ProfAutoExports <- profAuto dflags = TickExportedFunctions + | otherwise = panic "desnity" + + +-- | Decide whether to add a tick to a binding or not. +shouldTickBind :: TickDensity + -> Bool -- top level? + -> Bool -- exported? + -> Bool -- simple pat bind? + -> Bool -- INLINE pragma? + -> Bool + +shouldTickBind density top_lev exported simple_pat inline + = case density of + TickForBreakPoints -> not simple_pat + -- we never add breakpoints to simple pattern bindings + -- (there's always a tick on the rhs anyway). + TickAllFunctions -> not inline + TickTopFunctions -> top_lev && not inline + TickExportedFunctions -> exported && not inline + TickForCoverage -> True + +shouldTickPatBind :: TickDensity -> Bool -> Bool +shouldTickPatBind density top_lev + = case density of + TickForBreakPoints -> False + TickAllFunctions -> True + TickTopFunctions -> top_lev + TickExportedFunctions -> False + TickForCoverage -> False + +-- ----------------------------------------------------------------------------- +-- Adding ticks to bindings addTickLHsBinds :: LHsBinds Id -> TM (LHsBinds Id) addTickLHsBinds binds = mapBagM addTickLHsBind binds addTickLHsBind :: LHsBind Id -> TM (LHsBind Id) -addTickLHsBind (L pos bind@(AbsBinds { abs_binds = binds })) = do +addTickLHsBind (L pos bind@(AbsBinds { abs_binds = binds, + abs_exports = abs_exports })) = do + withEnv add_exports $ do binds' <- addTickLHsBinds binds return $ L pos $ bind { abs_binds = binds' } -addTickLHsBind (L pos (funBind@(FunBind { fun_id = (L _ id) }))) = do + where + -- in AbsBinds, the Id on each binding is not the actual top-level + -- Id that we are defining, they are related by the abs_exports + -- field of AbsBinds. So if we're doing TickExportedFunctions we need + -- to add the local Ids to the set of exported Names so that we know to + -- tick the right bindings. + add_exports env = + env{ exports = exports env `addListToNameSet` + [ idName mid + | ABE{ abe_poly = pid, abe_mono = mid } <- abs_exports + , idName pid `elemNameSet` (exports env) ] } + +addTickLHsBind (L pos (funBind@(FunBind { fun_id = (L _ id) }))) = do let name = getOccString id decl_path <- getPathEntry (fvs, (MatchGroup matches' ty)) <- getFreeVars $ addPathEntry name $ - addTickMatchGroup (fun_matches funBind) + addTickMatchGroup False (fun_matches funBind) blackListed <- isBlackListed pos + density <- getDensity + exported_names <- liftM exports getEnv - -- Todo: we don't want redundant ticks on simple pattern bindings -- We don't want to generate code for blacklisted positions - if blackListed || (not opt_Hpc && isSimplePatBind funBind) - then - return $ L pos $ funBind { fun_matches = MatchGroup matches' ty - , fun_tick = Nothing - } - else do - tick_no <- allocATickBox (if null decl_path - then TopLevelBox [name] - else LocalBox (decl_path ++ [name])) - pos fvs - - return $ L pos $ funBind { fun_matches = MatchGroup matches' ty - , fun_tick = tick_no - } + -- We don't want redundant ticks on simple pattern bindings + -- We don't want to tick non-exported bindings in TickExportedFunctions + let simple = isSimplePatBind funBind + toplev = null decl_path + exported = idName id `elemNameSet` exported_names + inline = pprTrace "inline" (ppr id <+> ppr (idInlinePragma id)) $ isAnyInlinePragma (idInlinePragma id) + + tick <- if not blackListed && + shouldTickBind density toplev exported simple inline + then + bindTick density name pos fvs + else + return Nothing + + return $ L pos $ funBind { fun_matches = MatchGroup matches' ty + , fun_tick = tick } + where -- a binding is a simple pattern binding if it is a funbind with zero patterns isSimplePatBind :: HsBind a -> Bool isSimplePatBind funBind = matchGroupArity (fun_matches funBind) == 0 -- TODO: Revisit this -addTickLHsBind (L pos (pat@(PatBind { pat_rhs = rhs }))) = do +addTickLHsBind (L pos (pat@(PatBind { pat_lhs = lhs, pat_rhs = rhs }))) = do let name = "(...)" - rhs' <- addPathEntry name $ addTickGRHSs False rhs -{- + (fvs, rhs') <- getFreeVars $ addPathEntry name $ addTickGRHSs False False rhs + + density <- getDensity decl_path <- getPathEntry - tick_me <- allocTickBox (if null decl_path - then TopLevelBox [name] - else LocalBox (name : decl_path)) --} - return $ L pos $ pat { pat_rhs = rhs' } + let top_lev = null decl_path + let add_ticks = shouldTickPatBind density top_lev + + tickish <- if add_ticks + then bindTick density name pos fvs + else return Nothing + + let patvars = map getOccString (collectPatBinders lhs) + patvar_ticks <- if add_ticks + then mapM (\v -> bindTick density v pos fvs) patvars + else return [] + + return $ L pos $ pat { pat_rhs = rhs', + pat_ticks = (tickish, patvar_ticks)} -- Only internal stuff, not from source, uses VarBind, so we ignore it. addTickLHsBind var_bind@(L _ (VarBind {})) = return var_bind + +bindTick :: TickDensity -> String -> SrcSpan -> FreeVars -> TM (Maybe (Tickish Id)) +bindTick density name pos fvs = do + decl_path <- getPathEntry + let + toplev = null decl_path + count_entries = toplev || density == TickAllFunctions + top_only = density /= TickAllFunctions + box_label = if toplev then TopLevelBox [name] + else LocalBox (decl_path ++ [name]) + -- + allocATickBox box_label count_entries top_only pos fvs + + +-- ----------------------------------------------------------------------------- +-- Decorate an LHsExpr with ticks + +-- selectively add ticks to interesting expressions +addTickLHsExpr :: LHsExpr Id -> TM (LHsExpr Id) +addTickLHsExpr (L pos e0) = do + d <- getDensity + case d of + TickForCoverage -> tick_it + TickForBreakPoints -> if isGoodBreakExpr e0 then tick_it else dont_tick_it + _other -> dont_tick_it + where + tick_it = allocTickBox (ExpBox False) False False pos $ addTickHsExpr e0 + dont_tick_it = do e1 <- addTickHsExpr e0; return $ L pos e1 + -- Add a tick to the expression no matter what it is. There is one exception: -- for the debugger, if the expression is a 'let', then we don't want to add -- a tick here because there will definititely be a tick on the body anyway. addTickLHsExprAlways :: LHsExpr Id -> TM (LHsExpr Id) -addTickLHsExprAlways (L pos e0) - | not opt_Hpc, HsLet _ _ <- e0 = addTickLHsExprNever (L pos e0) - | otherwise = allocTickBox (ExpBox False) pos $ addTickHsExpr e0 - -addTickLHsExprNeverOrAlways :: LHsExpr Id -> TM (LHsExpr Id) -addTickLHsExprNeverOrAlways e - | opt_Hpc = addTickLHsExprNever e - | otherwise = addTickLHsExprAlways e - -addTickLHsExprNeverOrMaybe :: LHsExpr Id -> TM (LHsExpr Id) -addTickLHsExprNeverOrMaybe e - | opt_Hpc = addTickLHsExprNever e - | otherwise = addTickLHsExpr e +addTickLHsExprAlways (L pos e0) = do + d <- getDensity + case d of + TickForBreakPoints | HsLet _ _ <- e0 -> dont_tick_it + | otherwise -> tick_it + TickForCoverage -> tick_it + _other -> dont_tick_it + where + tick_it = allocTickBox (ExpBox False) False False pos $ addTickHsExpr e0 + dont_tick_it = do e1 <- addTickHsExpr e0; return $ L pos e1 + +-- | A let body is ticked only if we're doing breakpoints. For coverage, the +-- whole let is ticked, so there's no need to tick the body. +addTickLHsExprLetBody :: LHsExpr Id -> TM (LHsExpr Id) +addTickLHsExprLetBody e + = ifDensity TickForBreakPoints + (addTickLHsExprAlways e) + (addTickLHsExprNever e) -- version of addTick that does not actually add a tick, -- because the scope of this tick is completely subsumed by @@ -221,16 +356,6 @@ addTickLHsExprNever (L pos e0) = do e1 <- addTickHsExpr e0 return $ L pos e1 --- selectively add ticks to interesting expressions -addTickLHsExpr :: LHsExpr Id -> TM (LHsExpr Id) -addTickLHsExpr (L pos e0) = do - if opt_Hpc || isGoodBreakExpr e0 - then do - allocTickBox (ExpBox False) pos $ addTickHsExpr e0 - else do - e1 <- addTickHsExpr e0 - return $ L pos e1 - -- general heuristic: expressions which do not denote values are good break points isGoodBreakExpr :: HsExpr Id -> Bool isGoodBreakExpr (HsApp {}) = True @@ -246,15 +371,19 @@ isGoodBreakExpr _other = False addTickLHsExprOptAlt :: Bool -> LHsExpr Id -> TM (LHsExpr Id) addTickLHsExprOptAlt oneOfMany (L pos e0) - | not opt_Hpc = addTickLHsExpr (L pos e0) - | otherwise = - allocTickBox (ExpBox oneOfMany) pos $ - addTickHsExpr e0 + = ifDensity TickForCoverage + (allocTickBox (ExpBox oneOfMany) False False pos $ addTickHsExpr e0) + (addTickLHsExpr (L pos e0)) addBinTickLHsExpr :: (Bool -> BoxLabel) -> LHsExpr Id -> TM (LHsExpr Id) -addBinTickLHsExpr boxLabel (L pos e0) = - allocBinTickBox boxLabel pos $ - addTickHsExpr e0 +addBinTickLHsExpr boxLabel (L pos e0) + = ifDensity TickForCoverage + (allocBinTickBox boxLabel pos $ addTickHsExpr e0) + (addTickLHsExpr (L pos e0)) + + +-- ----------------------------------------------------------------------------- +-- Decoarate an HsExpr with ticks addTickHsExpr :: HsExpr Id -> TM (HsExpr Id) addTickHsExpr e@(HsVar id) = do freeVar id; return e @@ -262,20 +391,23 @@ addTickHsExpr e@(HsIPVar _) = return e addTickHsExpr e@(HsOverLit _) = return e addTickHsExpr e@(HsLit _) = return e addTickHsExpr (HsLam matchgroup) = - liftM HsLam (addTickMatchGroup matchgroup) -addTickHsExpr (HsApp e1 e2) = + liftM HsLam (addTickMatchGroup True matchgroup) +addTickHsExpr (HsApp e1 e2) = liftM2 HsApp (addTickLHsExprNever e1) (addTickLHsExpr e2) addTickHsExpr (OpApp e1 e2 fix e3) = liftM4 OpApp (addTickLHsExpr e1) (addTickLHsExprNever e2) (return fix) - (addTickLHsExpr e3) + (addTickLHsExpr e3) addTickHsExpr (NegApp e neg) = liftM2 NegApp (addTickLHsExpr e) (addTickSyntaxExpr hpcSrcSpan neg) -addTickHsExpr (HsPar e) = liftM HsPar (addTickLHsExprNeverOrMaybe e) +addTickHsExpr (HsPar e) = + liftM HsPar $ + ifDensity TickForCoverage (addTickLHsExprNever e) + (addTickLHsExpr e) addTickHsExpr (SectionL e1 e2) = liftM2 SectionL (addTickLHsExpr e1) @@ -291,7 +423,7 @@ addTickHsExpr (ExplicitTuple es boxity) = addTickHsExpr (HsCase e mgs) = liftM2 HsCase (addTickLHsExpr e) - (addTickMatchGroup mgs) + (addTickMatchGroup False mgs) addTickHsExpr (HsIf cnd e1 e2 e3) = liftM3 (HsIf cnd) (addBinTickLHsExpr (BinBox CondBinBox) e1) @@ -301,7 +433,7 @@ addTickHsExpr (HsLet binds e) = bindLocals (collectLocalBinders binds) $ liftM2 HsLet (addTickHsLocalBinds binds) -- to think about: !patterns. - (addTickLHsExprNeverOrAlways e) + (addTickLHsExprLetBody e) addTickHsExpr (HsDo cxt stmts srcloc) = do { (stmts', _) <- addTickLStmts' forQual stmts (return ()) ; return (HsDo cxt stmts' srcloc) } @@ -338,7 +470,7 @@ addTickHsExpr (ArithSeq ty arith_seq) = (return ty) (addTickArithSeqInfo arith_seq) addTickHsExpr (HsTickPragma _ (L pos e0)) = do - e2 <- allocTickBox (ExpBox False) pos $ + e2 <- allocTickBox (ExpBox False) False False pos $ addTickHsExpr e0 return $ unLoc e2 addTickHsExpr (PArrSeq ty arith_seq) = @@ -374,34 +506,48 @@ addTickTupArg :: HsTupArg Id -> TM (HsTupArg Id) addTickTupArg (Present e) = do { e' <- addTickLHsExpr e; return (Present e') } addTickTupArg (Missing ty) = return (Missing ty) -addTickMatchGroup :: MatchGroup Id -> TM (MatchGroup Id) -addTickMatchGroup (MatchGroup matches ty) = do +addTickMatchGroup :: Bool{-is lambda-} -> MatchGroup Id -> TM (MatchGroup Id) +addTickMatchGroup is_lam (MatchGroup matches ty) = do let isOneOfMany = matchesOneOfMany matches - matches' <- mapM (liftL (addTickMatch isOneOfMany)) matches + matches' <- mapM (liftL (addTickMatch isOneOfMany is_lam)) matches return $ MatchGroup matches' ty -addTickMatch :: Bool -> Match Id -> TM (Match Id) -addTickMatch isOneOfMany (Match pats opSig gRHSs) = +addTickMatch :: Bool -> Bool -> Match Id -> TM (Match Id) +addTickMatch isOneOfMany isLambda (Match pats opSig gRHSs) = bindLocals (collectPatsBinders pats) $ do - gRHSs' <- addTickGRHSs isOneOfMany gRHSs + gRHSs' <- addTickGRHSs isOneOfMany isLambda gRHSs return $ Match pats opSig gRHSs' -addTickGRHSs :: Bool -> GRHSs Id -> TM (GRHSs Id) -addTickGRHSs isOneOfMany (GRHSs guarded local_binds) = do +addTickGRHSs :: Bool -> Bool -> GRHSs Id -> TM (GRHSs Id) +addTickGRHSs isOneOfMany isLambda (GRHSs guarded local_binds) = do bindLocals binders $ do local_binds' <- addTickHsLocalBinds local_binds - guarded' <- mapM (liftL (addTickGRHS isOneOfMany)) guarded + guarded' <- mapM (liftL (addTickGRHS isOneOfMany isLambda)) guarded return $ GRHSs guarded' local_binds' where binders = collectLocalBinders local_binds -addTickGRHS :: Bool -> GRHS Id -> TM (GRHS Id) -addTickGRHS isOneOfMany (GRHS stmts expr) = do +addTickGRHS :: Bool -> Bool -> GRHS Id -> TM (GRHS Id) +addTickGRHS isOneOfMany isLambda (GRHS stmts expr) = do (stmts',expr') <- addTickLStmts' (Just $ BinBox $ GuardBinBox) stmts - (if opt_Hpc then addTickLHsExprOptAlt isOneOfMany expr - else addTickLHsExprAlways expr) + (addTickGRHSBody isOneOfMany isLambda expr) return $ GRHS stmts' expr' +addTickGRHSBody :: Bool -> Bool -> LHsExpr Id -> TM (LHsExpr Id) +addTickGRHSBody isOneOfMany isLambda expr@(L pos e0) = do + d <- getDensity + case d of + TickForCoverage -> addTickLHsExprOptAlt isOneOfMany expr + TickAllFunctions | isLambda -> + addPathEntry "\\" $ + allocTickBox (ExpBox False) True{-count-} False{-not top-} pos $ + addTickHsExpr e0 + TickTopFunctions -> + allocTickBox (ExpBox False) False{-no count-} True{-top-} pos $ + addTickHsExpr e0 + _otherwise -> + addTickLHsExprAlways expr + addTickLStmts :: (Maybe (Bool -> BoxLabel)) -> [LStmt Id] -> TM [LStmt Id] addTickLStmts isGuard stmts = do (stmts, _) <- addTickLStmts' isGuard stmts (return ()) @@ -673,6 +819,11 @@ addTickArithSeqInfo (FromThenTo e1 e2 e3) = (addTickLHsExpr e1) (addTickLHsExpr e2) (addTickLHsExpr e3) + +liftL :: (Monad m) => (a -> m a) -> Located a -> m (Located a) +liftL f (L loc a) = do + a' <- f a + return $ L loc a' \end{code} \begin{code} @@ -680,11 +831,15 @@ data TickTransState = TT { tickBoxCount:: Int , mixEntries :: [MixEntry_] } -data TickTransEnv = TTE { fileName :: FastString - , declPath :: [String] +data TickTransEnv = TTE { fileName :: FastString + , density :: TickDensity + , dflags :: DynFlags + , exports :: NameSet + , declPath :: [String] , inScope :: VarSet - , blackList :: Map SrcSpan () - } + , blackList :: Map SrcSpan () + , this_mod :: Module + } -- deriving Show @@ -731,6 +886,12 @@ withEnv f (TM m) = TM $ \ env st -> case m (f env) st of (a, fvs, st') -> (a, fvs, st') +getDensity :: TM TickDensity +getDensity = TM $ \env st -> (density env, noFVs, st) + +ifDensity :: TickDensity -> TM a -> TM a -> TM a +ifDensity d th el = do d0 <- getDensity; if d == d0 then th else el + getFreeVars :: TM a -> TM (FreeVars, a) getFreeVars (TM m) = TM $ \ env st -> case m env st of (a, fv, st') -> ((fv,a), fv, st') @@ -773,46 +934,73 @@ isBlackListed pos = TM $ \ env st -> -- the tick application inherits the source position of its -- expression argument to support nested box allocations -allocTickBox :: BoxLabel -> SrcSpan -> TM (HsExpr Id) -> TM (LHsExpr Id) -allocTickBox boxLabel pos m | isGoodSrcSpan' pos = - sameFileName pos - (do e <- m; return (L pos e)) $ do - (fvs, e) <- getFreeVars m +allocTickBox :: BoxLabel -> Bool -> Bool -> SrcSpan -> TM (HsExpr Id) + -> TM (LHsExpr Id) +allocTickBox boxLabel countEntries topOnly pos m | isGoodSrcSpan' pos = + sameFileName pos (do e <- m; return (L pos e)) $ do + (fvs, e) <- getFreeVars m + env <- getEnv + tickish <- mkTickish boxLabel countEntries topOnly pos fvs (declPath env) + return (L pos (HsTick tickish (L pos e))) +allocTickBox _boxLabel _countEntries _topOnly pos m = do + e <- m + return (L pos e) + + +-- the tick application inherits the source position of its +-- expression argument to support nested box allocations +allocATickBox :: BoxLabel -> Bool -> Bool -> SrcSpan -> FreeVars + -> TM (Maybe (Tickish Id)) +allocATickBox boxLabel countEntries topOnly pos fvs | isGoodSrcSpan' pos = + sameFileName pos (return Nothing) $ do + let + mydecl_path = case boxLabel of + TopLevelBox x -> x + LocalBox xs -> xs + _ -> panic "allocATickBox" + tickish <- mkTickish boxLabel countEntries topOnly pos fvs mydecl_path + return (Just tickish) +allocATickBox _boxLabel _countEntries _topOnly _pos _fvs = + return Nothing + + +mkTickish :: BoxLabel -> Bool -> Bool -> SrcSpan -> OccEnv Id -> [String] + -> TM (Tickish Id) +mkTickish boxLabel countEntries topOnly pos fvs decl_path = TM $ \ env st -> let c = tickBoxCount st - ids = occEnvElts fvs + ids = filter (not . isUnLiftedType . idType) $ occEnvElts fvs + -- unlifted types cause two problems here: + -- * we can't bind them at the GHCi prompt + -- (bindLocalsAtBreakpoint already fliters them out), + -- * the simplifier might try to substitute a literal for + -- the Id, and we can't handle that. + mes = mixEntries st - me = (pos, declPath env, map (nameOccName.idName) ids, boxLabel) + me = (pos, decl_path, map (nameOccName.idName) ids, boxLabel) + + cc_name | topOnly = head decl_path + | otherwise = concat (intersperse "." decl_path) + + cc = mkUserCC (mkFastString cc_name) (this_mod env) + + count = countEntries && dopt Opt_ProfCountEntries (dflags env) + + tickish + | opt_Hpc = HpcTick (this_mod env) c + | opt_SccProfilingOn = ProfNote cc count True{-scopes-} + | otherwise = Breakpoint c ids in - ( L pos (HsTick c ids (L pos e)) + ( tickish , fvs , st {tickBoxCount=c+1,mixEntries=me:mes} ) -allocTickBox _boxLabel pos m = do e <- m; return (L pos e) --- the tick application inherits the source position of its --- expression argument to support nested box allocations -allocATickBox :: BoxLabel -> SrcSpan -> FreeVars -> TM (Maybe (Int,[Id])) -allocATickBox boxLabel pos fvs | isGoodSrcSpan' pos = - sameFileName pos - (return Nothing) $ TM $ \ env st -> - let mydecl_path - | null (declPath env), TopLevelBox x <- boxLabel = x - | otherwise = declPath env - me = (pos, mydecl_path, map (nameOccName.idName) ids, boxLabel) - c = tickBoxCount st - mes = mixEntries st - ids = occEnvElts fvs - in ( Just (c, ids) - , noFVs - , st {tickBoxCount=c+1, mixEntries=me:mes} - ) -allocATickBox _boxLabel _pos _fvs = return Nothing allocBinTickBox :: (Bool -> BoxLabel) -> SrcSpan -> TM (HsExpr Id) -> TM (LHsExpr Id) allocBinTickBox boxLabel pos m - | not opt_Hpc = allocTickBox (ExpBox False) pos m + | not opt_Hpc = allocTickBox (ExpBox False) False False pos m | isGoodSrcSpan' pos = do e <- m @@ -823,7 +1011,7 @@ allocBinTickBox boxLabel pos m c = tickBoxCount st mes = mixEntries st in - ( L pos $ HsTick c [] $ L pos $ HsBinTick (c+1) (c+2) (L pos e) + ( L pos $ HsTick (HpcTick (this_mod env) c) $ L pos $ HsBinTick (c+1) (c+2) (L pos e) -- notice that F and T are reversed, -- because we are building the list in -- reverse... diff --git a/compiler/deSugar/Desugar.lhs b/compiler/deSugar/Desugar.lhs index d36883462c..b8f1af3a65 100644 --- a/compiler/deSugar/Desugar.lhs +++ b/compiler/deSugar/Desugar.lhs @@ -89,7 +89,6 @@ deSugar hsc_env -- Desugar the program ; let export_set = availsToNameSet exports - ; let auto_scc = mkAutoScc dflags mod export_set ; let target = hscTarget dflags ; let hpcInfo = emptyHpcInfo other_hpc_info ; (msgs, mb_res) @@ -98,15 +97,23 @@ deSugar hsc_env return (emptyMessages, Just ([], nilOL, [], [], NoStubs, hpcInfo, emptyModBreaks)) _ -> do + + let want_ticks = opt_Hpc + || target == HscInterpreted + || (opt_SccProfilingOn + && case profAuto dflags of + NoProfAuto -> False + _ -> True) + (binds_cvr,ds_hpc_info, modBreaks) - <- if (opt_Hpc - || target == HscInterpreted) - && (not (isHsBoot hsc_src)) - then addCoverageTicksToBinds dflags mod mod_loc tcs binds + <- if want_ticks && not (isHsBoot hsc_src) + then addTicksToBinds dflags mod mod_loc export_set + (typeEnvTyCons type_env) binds else return (binds, hpcInfo, emptyModBreaks) + initDs hsc_env mod rdr_env type_env $ do do { ds_ev_binds <- dsEvBinds ev_binds - ; core_prs <- dsTopLHsBinds auto_scc binds_cvr + ; core_prs <- dsTopLHsBinds binds_cvr ; (spec_prs, spec_rules) <- dsImpSpecs imp_specs ; (ds_fords, foreign_prs) <- dsForeigns fords ; ds_rules <- mapMaybeM dsRule rules @@ -215,22 +222,6 @@ and Rec the rest. \begin{code} -mkAutoScc :: DynFlags -> Module -> NameSet -> AutoScc -mkAutoScc dflags mod exports - | not opt_SccProfilingOn -- No profiling - = NoSccs - -- Add auto-scc on all top-level things - | dopt Opt_AutoSccsOnAllToplevs dflags - = AddSccs mod (\id -> not $ isDerivedOccName $ getOccName id) - -- See #1641. This is pretty yucky, but I can't see a better way - -- to identify compiler-generated Ids, and at least this should - -- catch them all. - -- Only on exported things - | dopt Opt_AutoSccsOnExportedToplevs dflags - = AddSccs mod (\id -> idName id `elemNameSet` exports) - | otherwise - = NoSccs - deSugarExpr :: HscEnv -> Module -> GlobalRdrEnv -> TypeEnv -> LHsExpr Id diff --git a/compiler/deSugar/DsArrows.lhs b/compiler/deSugar/DsArrows.lhs index d7d5e7023b..b3fdc8f8b7 100644 --- a/compiler/deSugar/DsArrows.lhs +++ b/compiler/deSugar/DsArrows.lhs @@ -557,10 +557,9 @@ dsCmd _ids local_vars env_ids _stack _res_ty (HsArrForm op _ args) = do unionVarSets fv_sets) -dsCmd ids local_vars env_ids stack res_ty (HsTick ix vars expr) = do +dsCmd ids local_vars env_ids stack res_ty (HsTick tickish expr) = do (expr1,id_set) <- dsLCmd ids local_vars env_ids stack res_ty expr - expr2 <- mkTickBox ix vars expr1 - return (expr2,id_set) + return (Tick tickish expr1, id_set) dsCmd _ _ _ _ _ c = pprPanic "dsCmd" (ppr c) diff --git a/compiler/deSugar/DsBinds.lhs b/compiler/deSugar/DsBinds.lhs index 2b2b3229d7..f207074cd8 100644 --- a/compiler/deSugar/DsBinds.lhs +++ b/compiler/deSugar/DsBinds.lhs @@ -11,8 +11,7 @@ lower levels it is preserved with @let@/@letrec@s). \begin{code} module DsBinds ( dsTopLHsBinds, dsLHsBinds, decomposeRuleLhs, dsSpec, - dsHsWrapper, dsTcEvBinds, dsEvBinds, - AutoScc(..) + dsHsWrapper, dsTcEvBinds, dsEvBinds, ) where #include "HsVersions.h" @@ -39,8 +38,6 @@ import TcType import Type import Coercion hiding (substCo) import TysWiredIn ( eqBoxDataCon, tupleCon ) -import CostCentre -import Module import Id import Class import DataCon ( dataConWorkId ) @@ -69,70 +66,68 @@ import MonadUtils %************************************************************************ \begin{code} -dsTopLHsBinds :: AutoScc -> LHsBinds Id -> DsM (OrdList (Id,CoreExpr)) -dsTopLHsBinds auto_scc binds = ds_lhs_binds auto_scc binds +dsTopLHsBinds :: LHsBinds Id -> DsM (OrdList (Id,CoreExpr)) +dsTopLHsBinds binds = ds_lhs_binds binds dsLHsBinds :: LHsBinds Id -> DsM [(Id,CoreExpr)] -dsLHsBinds binds = do { binds' <- ds_lhs_binds NoSccs binds +dsLHsBinds binds = do { binds' <- ds_lhs_binds binds ; return (fromOL binds') } ------------------------ -ds_lhs_binds :: AutoScc -> LHsBinds Id -> DsM (OrdList (Id,CoreExpr)) +ds_lhs_binds :: LHsBinds Id -> DsM (OrdList (Id,CoreExpr)) - -- scc annotation policy (see below) -ds_lhs_binds auto_scc binds = do { ds_bs <- mapBagM (dsLHsBind auto_scc) binds - ; return (foldBag appOL id nilOL ds_bs) } +ds_lhs_binds binds = do { ds_bs <- mapBagM dsLHsBind binds + ; return (foldBag appOL id nilOL ds_bs) } -dsLHsBind :: AutoScc -> LHsBind Id -> DsM (OrdList (Id,CoreExpr)) -dsLHsBind auto_scc (L loc bind) - = putSrcSpanDs loc $ dsHsBind auto_scc bind +dsLHsBind :: LHsBind Id -> DsM (OrdList (Id,CoreExpr)) +dsLHsBind (L loc bind) + = putSrcSpanDs loc $ dsHsBind bind -dsHsBind :: AutoScc -> HsBind Id -> DsM (OrdList (Id,CoreExpr)) +dsHsBind :: HsBind Id -> DsM (OrdList (Id,CoreExpr)) -dsHsBind _ (VarBind { var_id = var, var_rhs = expr, var_inline = inline_regardless }) +dsHsBind (VarBind { var_id = var, var_rhs = expr, var_inline = inline_regardless }) = do { core_expr <- dsLExpr expr -- Dictionary bindings are always VarBinds, -- so we only need do this here - ; core_expr' <- addDictScc var core_expr - ; let var' | inline_regardless = var `setIdUnfolding` mkCompulsoryUnfolding core_expr' + ; let var' | inline_regardless = var `setIdUnfolding` mkCompulsoryUnfolding core_expr | otherwise = var - ; return (unitOL (makeCorePair var' False 0 core_expr')) } + ; return (unitOL (makeCorePair var' False 0 core_expr)) } -dsHsBind auto_scc (FunBind { fun_id = L _ fun, fun_matches = matches - , fun_co_fn = co_fn, fun_tick = tick - , fun_infix = inf }) +dsHsBind (FunBind { fun_id = L _ fun, fun_matches = matches + , fun_co_fn = co_fn, fun_tick = tick + , fun_infix = inf }) = do { (args, body) <- matchWrapper (FunRhs (idName fun) inf) matches - ; body' <- mkOptTickBox tick body - ; wrap_fn' <- dsHsWrapper co_fn - ; let rhs = addAutoScc auto_scc fun $ wrap_fn' (mkLams args body') - ; return (unitOL (makeCorePair fun False 0 rhs)) } - -dsHsBind auto_scc (PatBind { pat_lhs = pat, pat_rhs = grhss, pat_rhs_ty = ty }) + ; let body' = mkOptTickBox tick body + ; wrap_fn' <- dsHsWrapper co_fn + ; let rhs = wrap_fn' (mkLams args body') + ; {- pprTrace "dsHsBind" (ppr fun <+> ppr (idInlinePragma fun)) $ -} + return (unitOL (makeCorePair fun False 0 rhs)) } + +dsHsBind (PatBind { pat_lhs = pat, pat_rhs = grhss, pat_rhs_ty = ty + , pat_ticks = (rhs_tick, var_ticks) }) = do { body_expr <- dsGuarded grhss ty - ; sel_binds <- mkSelectorBinds pat body_expr + ; let body' = mkOptTickBox rhs_tick body_expr + ; sel_binds <- mkSelectorBinds var_ticks pat body' -- We silently ignore inline pragmas; no makeCorePair -- Not so cool, but really doesn't matter - ; let sel_binds' = [ (v, addAutoScc auto_scc v expr) - | (v, expr) <- sel_binds ] - ; return (toOL sel_binds') } + ; return (toOL sel_binds) } -- A common case: one exported variable -- Non-recursive bindings come through this way -- So do self-recursive bindings, and recursive bindings -- that have been chopped up with type signatures -dsHsBind auto_scc (AbsBinds { abs_tvs = tyvars, abs_ev_vars = dicts - , abs_exports = [export] - , abs_ev_binds = ev_binds, abs_binds = binds }) +dsHsBind (AbsBinds { abs_tvs = tyvars, abs_ev_vars = dicts + , abs_exports = [export] + , abs_ev_binds = ev_binds, abs_binds = binds }) | ABE { abe_wrap = wrap, abe_poly = global , abe_mono = local, abe_prags = prags } <- export - = do { bind_prs <- ds_lhs_binds NoSccs binds + = do { bind_prs <- ds_lhs_binds binds ; ds_ev_binds <- dsTcEvBinds ev_binds ; wrap_fn <- dsHsWrapper wrap ; let core_bind = Rec (fromOL bind_prs) - rhs = addAutoScc auto_scc global $ - wrap_fn $ -- Usually the identity + rhs = wrap_fn $ -- Usually the identity mkLams tyvars $ mkLams dicts $ mkCoreLets ds_ev_binds $ Let core_bind $ @@ -146,17 +141,12 @@ dsHsBind auto_scc (AbsBinds { abs_tvs = tyvars, abs_ev_vars = dicts ; return (main_bind `consOL` spec_binds) } -dsHsBind auto_scc (AbsBinds { abs_tvs = tyvars, abs_ev_vars = dicts - , abs_exports = exports, abs_ev_binds = ev_binds - , abs_binds = binds }) - = do { bind_prs <- ds_lhs_binds NoSccs binds +dsHsBind (AbsBinds { abs_tvs = tyvars, abs_ev_vars = dicts + , abs_exports = exports, abs_ev_binds = ev_binds + , abs_binds = binds }) + = do { bind_prs <- ds_lhs_binds binds ; ds_ev_binds <- dsTcEvBinds ev_binds - ; let env = mkABEnv exports - do_one (lcl_id,rhs) | Just export <- lookupVarEnv env lcl_id - = (lcl_id, addAutoScc auto_scc (abe_poly export) rhs) - | otherwise = (lcl_id,rhs) - - core_bind = Rec (map do_one (fromOL bind_prs)) + ; let core_bind = Rec (fromOL bind_prs) -- Monomorphic recursion possible, hence Rec tup_expr = mkBigCoreVarTup locals @@ -181,8 +171,7 @@ dsHsBind auto_scc (AbsBinds { abs_tvs = tyvars, abs_ev_vars = dicts ; let global' = addIdSpecialisations global rules ; return ((global', rhs) `consOL` spec_binds) } - ; export_binds_s <- mapM mk_bind exports - -- Don't scc (auto-)annotate the tuple itself. + ; export_binds_s <- mapM mk_bind exports ; return ((poly_tup_id, poly_tup_rhs) `consOL` concatOL export_binds_s) } @@ -310,17 +299,6 @@ makeCorePair gbl_id is_default_method dict_arity rhs dictArity :: [Var] -> Arity -- Don't count coercion variables in arity dictArity dicts = count isId dicts - - ------------------------- -type AbsBindEnv = VarEnv (ABExport Id) - -- Maps the "lcl_id" for an AbsBind to - -- its "gbl_id" and associated pragmas, if any - -mkABEnv :: [ABExport Id] -> AbsBindEnv --- Takes the exports of a AbsBinds, and returns a mapping --- lcl_id -> (tyvars, gbl_id, lcl_id, prags) -mkABEnv exports = mkVarEnv [ (abe_mono export, export) | export <- exports] \end{code} Note [Rules and inlining] @@ -691,52 +669,6 @@ as the old one, but with an Internal name and no IdInfo. %************************************************************************ %* * -\subsection[addAutoScc]{Adding automatic sccs} -%* * -%************************************************************************ - -\begin{code} -data AutoScc = NoSccs - | AddSccs Module (Id -> Bool) --- The (Id->Bool) says which Ids to add SCCs to --- But we never add a SCC to function marked INLINE - -addAutoScc :: AutoScc - -> Id -- Binder - -> CoreExpr -- Rhs - -> CoreExpr -- Scc'd Rhs - -addAutoScc NoSccs _ rhs - = rhs -addAutoScc _ id rhs | isInlinePragma (idInlinePragma id) - = rhs -addAutoScc (AddSccs mod add_scc) id rhs - | add_scc id = mkSCC (mkAutoCC id mod NotCafCC) rhs - | otherwise = rhs -\end{code} - -If profiling and dealing with a dict binding, -wrap the dict in @_scc_ DICT <dict>@: - -\begin{code} -addDictScc :: Id -> CoreExpr -> DsM CoreExpr -addDictScc _ rhs = return rhs - -{- DISABLED for now (need to somehow make up a name for the scc) -- SDM - | not ( opt_SccProfilingOn && opt_AutoSccsOnDicts) - || not (isDictId var) - = return rhs -- That's easy: do nothing - - | otherwise - = do (mod, grp) <- getModuleAndGroupDs - -- ToDo: do -dicts-all flag (mark dict things with individual CCs) - return (Note (SCC (mkAllDictsCC mod grp False)) rhs) --} -\end{code} - - -%************************************************************************ -%* * Desugaring coercions %* * %************************************************************************ diff --git a/compiler/deSugar/DsExpr.lhs b/compiler/deSugar/DsExpr.lhs index 6b476a6ca3..3d79ce7150 100644 --- a/compiler/deSugar/DsExpr.lhs +++ b/compiler/deSugar/DsExpr.lhs @@ -148,7 +148,7 @@ dsStrictBind (FunBind { fun_id = L _ fun, fun_matches = matches, fun_co_fn = co_ = do { (args, rhs) <- matchWrapper (FunRhs (idName fun ) inf) matches ; MASSERT( null args ) -- Functions aren't lifted ; MASSERT( isIdHsWrapper co_fn ) - ; rhs' <- mkOptTickBox tick rhs + ; let rhs' = mkOptTickBox tick rhs ; return (bindNonRec fun rhs' body) } dsStrictBind (PatBind {pat_lhs = pat, pat_rhs = grhss, pat_rhs_ty = ty }) body @@ -317,10 +317,11 @@ dsExpr (ExplicitTuple tup_args boxity) dsExpr (HsSCC cc expr) = do mod_name <- getModuleDs - Note (SCC (mkUserCC cc mod_name)) <$> dsLExpr expr + count <- doptDs Opt_ProfCountEntries + Tick (ProfNote (mkUserCC cc mod_name) count True) <$> dsLExpr expr -dsExpr (HsCoreAnn fs expr) - = Note (CoreNote $ unpackFS fs) <$> dsLExpr expr +dsExpr (HsCoreAnn _ expr) + = dsLExpr expr dsExpr (HsCase discrim matches@(MatchGroup _ rhs_ty)) | isEmptyMatchGroup matches -- A Core 'case' is always non-empty @@ -586,9 +587,9 @@ dsExpr (HsProc pat cmd) = dsProcExpr pat cmd Hpc Support \begin{code} -dsExpr (HsTick ix vars e) = do +dsExpr (HsTick tickish e) = do e' <- dsLExpr e - mkTickBox ix vars e' + return (Tick tickish e') -- There is a problem here. The then and else branches -- have no free variables, so they are open to lifting. diff --git a/compiler/deSugar/DsGRHSs.lhs b/compiler/deSugar/DsGRHSs.lhs index d3fcf76d1c..9b80852a86 100644 --- a/compiler/deSugar/DsGRHSs.lhs +++ b/compiler/deSugar/DsGRHSs.lhs @@ -140,12 +140,17 @@ isTrueLHsExpr (L _ (HsVar v)) | v `hasKey` otherwiseIdKey || v `hasKey` getUnique trueDataConId = Just return -- trueDataConId doesn't have the same unique as trueDataCon -isTrueLHsExpr (L _ (HsTick ix frees e)) - | Just ticks <- isTrueLHsExpr e = Just (\x -> ticks x >>= mkTickBox ix frees) +isTrueLHsExpr (L _ (HsTick tickish e)) + | Just ticks <- isTrueLHsExpr e + = Just (\x -> ticks x >>= return . (Tick tickish)) -- This encodes that the result is constant True for Hpc tick purposes; -- which is specifically what isTrueLHsExpr is trying to find out. isTrueLHsExpr (L _ (HsBinTick ixT _ e)) - | Just ticks <- isTrueLHsExpr e = Just (\x -> ticks x >>= mkTickBox ixT []) + | Just ticks <- isTrueLHsExpr e + = Just (\x -> do e <- ticks x + this_mod <- getModuleDs + return (Tick (HpcTick this_mod ixT) e)) + isTrueLHsExpr (L _ (HsPar e)) = isTrueLHsExpr e isTrueLHsExpr _ = Nothing \end{code} diff --git a/compiler/deSugar/DsUtils.lhs b/compiler/deSugar/DsUtils.lhs index 462137ade8..1bdeafb411 100644 --- a/compiler/deSugar/DsUtils.lhs +++ b/compiler/deSugar/DsUtils.lhs @@ -35,7 +35,7 @@ module DsUtils ( dsSyntaxTable, lookupEvidence, selectSimpleMatchVarL, selectMatchVars, selectMatchVar, - mkTickBox, mkOptTickBox, mkBinaryTickBox + mkOptTickBox, mkBinaryTickBox ) where #include "HsVersions.h" @@ -70,7 +70,8 @@ import SrcLoc import Util import ListSetOps import FastString -import StaticFlags + +import Control.Monad ( zipWithM ) \end{code} @@ -568,14 +569,17 @@ cases like (p,q) = e \begin{code} -mkSelectorBinds :: LPat Id -- The pattern +mkSelectorBinds :: [Maybe (Tickish Id)] -- ticks to add, possibly + -> LPat Id -- The pattern -> CoreExpr -- Expression to which the pattern is bound -> DsM [(Id,CoreExpr)] -mkSelectorBinds (L _ (VarPat v)) val_expr - = return [(v, val_expr)] +mkSelectorBinds ticks (L _ (VarPat v)) val_expr + = return [(v, case ticks of + [t] -> mkOptTickBox t val_expr + _ -> val_expr)] -mkSelectorBinds pat val_expr +mkSelectorBinds ticks pat val_expr | null binders = return [] @@ -599,7 +603,7 @@ mkSelectorBinds pat val_expr -- But we need it at different types... so we use coerce for that ; err_expr <- mkErrorAppDs iRREFUT_PAT_ERROR_ID unitTy (ppr pat) ; err_var <- newSysLocalDs unitTy - ; binds <- mapM (mk_bind val_var err_var) binders + ; binds <- zipWithM (mk_bind val_var err_var) ticks' binders ; return ( (val_var, val_expr) : (err_var, err_expr) : binds ) } @@ -608,22 +612,26 @@ mkSelectorBinds pat val_expr = do { error_expr <- mkErrorAppDs iRREFUT_PAT_ERROR_ID tuple_ty (ppr pat) ; tuple_expr <- matchSimply val_expr PatBindRhs pat local_tuple error_expr ; tuple_var <- newSysLocalDs tuple_ty - ; let mk_tup_bind binder - = (binder, mkTupleSelector local_binders binder tuple_var (Var tuple_var)) - ; return ( (tuple_var, tuple_expr) : map mk_tup_bind binders ) } + ; let mk_tup_bind tick binder + = (binder, mkOptTickBox tick $ + mkTupleSelector local_binders binder + tuple_var (Var tuple_var)) + ; return ( (tuple_var, tuple_expr) : zipWith mk_tup_bind ticks' binders ) } where binders = collectPatBinders pat - local_binders = map localiseId binders -- See Note [Localise pattern binders] + ticks' = ticks ++ repeat Nothing + + local_binders = map localiseId binders -- See Note [Localise pattern binders] local_tuple = mkBigCoreVarTup binders tuple_ty = exprType local_tuple - mk_bind scrut_var err_var bndr_var = do + mk_bind scrut_var err_var tick bndr_var = do -- (mk_bind sv err_var) generates -- bv = case sv of { pat -> bv; other -> coerce (type-of-bv) err_var } -- Remember, pat binds bv rhs_expr <- matchSimply (Var scrut_var) PatBindRhs pat (Var bndr_var) error_expr - return (bndr_var, rhs_expr) + return (bndr_var, mkOptTickBox tick rhs_expr) where error_expr = mkCoerce co (Var err_var) co = mkUnsafeCo (exprType (Var err_var)) (idType bndr_var) @@ -767,38 +775,19 @@ CPR-friendly. This matters a lot: if you don't get it right, you lose the tail call property. For example, see Trac #3403. \begin{code} -mkOptTickBox :: Maybe (Int,[Id]) -> CoreExpr -> DsM CoreExpr -mkOptTickBox Nothing e = return e -mkOptTickBox (Just (ix,ids)) e = mkTickBox ix ids e - -mkTickBox :: Int -> [Id] -> CoreExpr -> DsM CoreExpr -mkTickBox ix vars e = do - uq <- newUnique - mod <- getModuleDs - let tick | opt_Hpc = mkTickBoxOpId uq mod ix - | otherwise = mkBreakPointOpId uq mod ix - uq2 <- newUnique - let occName = mkVarOcc "tick" - let name = mkInternalName uq2 occName noSrcSpan -- use mkSysLocal? - let var = Id.mkLocalId name realWorldStatePrimTy - scrut <- - if opt_Hpc - then return (Var tick) - else do - let tickVar = Var tick - let tickType = mkFunTys (map idType vars) realWorldStatePrimTy - let scrutApTy = App tickVar (Type tickType) - return (mkApps scrutApTy (map Var vars) :: Expr Id) - return $ Case scrut var ty [(DEFAULT,[],e)] - where - ty = exprType e +mkOptTickBox :: Maybe (Tickish Id) -> CoreExpr -> CoreExpr +mkOptTickBox Nothing e = e +mkOptTickBox (Just tickish) e = Tick tickish e mkBinaryTickBox :: Int -> Int -> CoreExpr -> DsM CoreExpr mkBinaryTickBox ixT ixF e = do uq <- newUnique - let bndr1 = mkSysLocal (fsLit "t1") uq boolTy - falseBox <- mkTickBox ixF [] $ Var falseDataConId - trueBox <- mkTickBox ixT [] $ Var trueDataConId + this_mod <- getModuleDs + let bndr1 = mkSysLocal (fsLit "t1") uq boolTy + let + falseBox = Tick (HpcTick this_mod ixF) (Var falseDataConId) + trueBox = Tick (HpcTick this_mod ixT) (Var trueDataConId) + -- return $ Case e bndr1 boolTy [ (DataAlt falseDataCon, [], falseBox) , (DataAlt trueDataCon, [], trueBox) diff --git a/compiler/deSugar/Match.lhs b/compiler/deSugar/Match.lhs index d2a56d1848..69f378eb1b 100644 --- a/compiler/deSugar/Match.lhs +++ b/compiler/deSugar/Match.lhs @@ -492,7 +492,7 @@ tidy1 v (AsPat (L _ var) pat) -} tidy1 v (LazyPat pat) - = do { sel_prs <- mkSelectorBinds pat (Var v) + = do { sel_prs <- mkSelectorBinds [] pat (Var v) ; let sel_binds = [NonRec b rhs | (b,rhs) <- sel_prs] ; return (mkCoreLets sel_binds, WildPat (idType v)) } diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs index 71a21c185f..34d976ec2e 100644 --- a/compiler/ghci/ByteCodeGen.lhs +++ b/compiler/ghci/ByteCodeGen.lhs @@ -55,7 +55,6 @@ import UniqSupply import BreakArray import Data.Maybe import Module -import IdInfo import Data.Map (Map) import qualified Data.Map as Map @@ -65,19 +64,20 @@ import qualified FiniteMap as Map -- Generating byte code for a complete module byteCodeGen :: DynFlags + -> Module -> CoreProgram -> [TyCon] -> ModBreaks -> IO CompiledByteCode -byteCodeGen dflags binds tycs modBreaks +byteCodeGen dflags this_mod binds tycs modBreaks = do showPass dflags "ByteCodeGen" let flatBinds = [ (bndr, freeVars rhs) | (bndr, rhs) <- flattenBinds binds] us <- mkSplitUniqSupply 'y' - (BcM_State _us _final_ctr mallocd _, proto_bcos) - <- runBc us modBreaks (mapM schemeTopBind flatBinds) + (BcM_State _us _this_mod _final_ctr mallocd _, proto_bcos) + <- runBc us this_mod modBreaks (mapM schemeTopBind flatBinds) when (notNull mallocd) (panic "ByteCodeGen.byteCodeGen: missing final emitBc?") @@ -93,9 +93,10 @@ byteCodeGen dflags binds tycs modBreaks -- Returns: (the root BCO for this expression, -- a list of auxilary BCOs resulting from compiling closures) coreExprToBCOs :: DynFlags + -> Module -> CoreExpr -> IO UnlinkedBCO -coreExprToBCOs dflags expr +coreExprToBCOs dflags this_mod expr = do showPass dflags "ByteCodeGen" -- create a totally bogus name for the top-level BCO; this @@ -106,8 +107,9 @@ coreExprToBCOs dflags expr -- the uniques are needed to generate fresh variables when we introduce new -- let bindings for ticked expressions us <- mkSplitUniqSupply 'y' - (BcM_State _us _final_ctr mallocd _ , proto_bco) - <- runBc us emptyModBreaks (schemeTopBind (invented_id, freeVars expr)) + (BcM_State _us _this_mod _final_ctr mallocd _ , proto_bco) + <- runBc us this_mod emptyModBreaks $ + schemeTopBind (invented_id, freeVars expr) when (notNull mallocd) (panic "ByteCodeGen.coreExprToBCOs: missing final emitBc?") @@ -291,25 +293,25 @@ schemeR_wrk fvs nm original_body (args, body) -- introduce break instructions for ticked expressions schemeER_wrk :: Word16 -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList schemeER_wrk d p rhs - | Just (tickInfo, (_annot, newRhs)) <- isTickedExp' rhs = do - code <- schemeE d 0 p newRhs + | AnnTick (Breakpoint tick_no fvs) (_annot, newRhs) <- rhs + = do code <- schemeE d 0 p newRhs arr <- getBreakArray - let idOffSets = getVarOffSets d p tickInfo - let tickNumber = tickInfo_number tickInfo + this_mod <- getCurrentModule + let idOffSets = getVarOffSets d p fvs let breakInfo = BreakInfo - { breakInfo_module = tickInfo_module tickInfo - , breakInfo_number = tickNumber + { breakInfo_module = this_mod + , breakInfo_number = tick_no , breakInfo_vars = idOffSets , breakInfo_resty = exprType (deAnnotate' newRhs) } let breakInstr = case arr of BA arr# -> - BRK_FUN arr# (fromIntegral tickNumber) breakInfo + BRK_FUN arr# (fromIntegral tick_no) breakInfo return $ breakInstr `consOL` code | otherwise = schemeE d 0 p rhs -getVarOffSets :: Word16 -> BCEnv -> TickInfo -> [(Id, Word16)] -getVarOffSets d p = catMaybes . map (getOffSet d p) . tickInfo_locals +getVarOffSets :: Word16 -> BCEnv -> [Id] -> [(Id, Word16)] +getVarOffSets d p = catMaybes . map (getOffSet d p) getOffSet :: Word16 -> BCEnv -> Id -> Maybe (Id, Word16) getOffSet d env id @@ -333,19 +335,7 @@ fvsToEnv p fvs = [v | v <- varSetElems fvs, -- ----------------------------------------------------------------------------- -- schemeE -data TickInfo - = TickInfo - { tickInfo_number :: Int -- the (module) unique number of the tick - , tickInfo_module :: Module -- the origin of the ticked expression - , tickInfo_locals :: [Id] -- the local vars in scope at the ticked expression - } - -instance Outputable TickInfo where - ppr info = text "TickInfo" <+> - parens (int (tickInfo_number info) <+> ppr (tickInfo_module info) <+> - ppr (tickInfo_locals info)) - -returnUnboxedAtom :: Word16 -> Sequel -> BCEnv +returnUnboxedAtom :: Word16 -> Sequel -> BCEnv -> AnnExpr' Id VarSet -> CgRep -> BcM BCInstrList -- Returning an unlifted value. @@ -451,12 +441,11 @@ schemeE d s p (AnnLet binds (_,body)) -- call exprFreeVars on a deAnnotated expression, this may not be the -- best way to calculate the free vars but it seemed like the least -- intrusive thing to do -schemeE d s p exp@(AnnCase {}) - | Just (_tickInfo, _rhs) <- isTickedExp' exp +schemeE d s p exp@(AnnTick (Breakpoint _id _fvs) _rhs) = if isUnLiftedType ty then do -- If the result type is unlifted, then we must generate - -- let f = \s . case tick# of _ -> e + -- let f = \s . tick<n> e -- in f realWorld# -- When we stop at the breakpoint, _result will have an unlifted -- type and hence won't be bound in the environment, but the @@ -476,6 +465,9 @@ schemeE d s p exp@(AnnCase {}) fvs = exprFreeVars exp' ty = exprType exp' +-- ignore other kinds of tick +schemeE d s p (AnnTick _ (_, rhs)) = schemeE d s p rhs + schemeE d s p (AnnCase scrut _ _ [(DataAlt dc, [bind1, bind2], rhs)]) | isUnboxedTupleCon dc, VoidArg <- typeCgRep (idType bind1) -- Convert @@ -514,50 +506,12 @@ schemeE _ _ _ expr Ticked Expressions ------------------ - A ticked expression looks like this: - - case tick<n> var1 ... varN of DEFAULT -> e - - (*) <n> is the number of the tick, which is unique within a module - (*) var1 ... varN are the local variables in scope at the tick site - - If we find a ticked expression we return: - - Just ((n, [var1 ... varN]), e) - - otherwise we return Nothing. - - The idea is that the "case tick<n> ..." is really just an annotation on + The idea is that the "breakpoint<n,fvs> E" is really just an annotation on the code. When we find such a thing, we pull out the useful information, - and then compile the code as if it was just the expression "e". + and then compile the code as if it was just the expression E. -} -isTickedExp' :: AnnExpr' Id a -> Maybe (TickInfo, AnnExpr Id a) -isTickedExp' (AnnCase scrut _bndr _type alts) - | Just tickInfo <- isTickedScrut scrut, - [(DEFAULT, _bndr, rhs)] <- alts - = Just (tickInfo, rhs) - where - isTickedScrut :: (AnnExpr Id a) -> Maybe TickInfo - isTickedScrut expr - | Var id <- f, - Just (TickBox modName tickNumber) <- isTickBoxOp_maybe id - = Just $ TickInfo { tickInfo_number = tickNumber - , tickInfo_module = modName - , tickInfo_locals = idsOfArgs args - } - | otherwise = Nothing - where - (f, args) = collectArgs $ deAnnotate expr - idsOfArgs :: [Expr Id] -> [Id] - idsOfArgs = catMaybes . map exprId - exprId :: Expr Id -> Maybe Id - exprId (Var id) = Just id - exprId _ = Nothing - -isTickedExp' _ = Nothing - -- Compile code to do a tail call. Specifically, push the fn, -- slide the on-stack app back down to the sequel depth, -- and enter. Four cases: @@ -1452,13 +1406,14 @@ bcView :: AnnExpr' Var ann -> Maybe (AnnExpr' Var ann) -- a) type abstractions -- b) type applications -- c) casts --- d) notes +-- d) ticks (but not breakpoints) -- Type lambdas *can* occur in random expressions, -- whereas value lambdas cannot; that is why they are nuked here -bcView (AnnNote _ (_,e)) = Just e -bcView (AnnCast (_,e) _) = Just e +bcView (AnnCast (_,e) _) = Just e bcView (AnnLam v (_,e)) | isTyVar v = Just e bcView (AnnApp (_,e) (_, AnnType _)) = Just e +bcView (AnnTick Breakpoint{} _) = Nothing +bcView (AnnTick _other_tick (_,e)) = Just e bcView _ = Nothing isVoidArgAtom :: AnnExpr' Var ann -> Bool @@ -1493,12 +1448,13 @@ mkStackOffsets original_depth szsw type BcPtr = Either ItblPtr (Ptr ()) data BcM_State - = BcM_State { - uniqSupply :: UniqSupply, -- for generating fresh variable names - nextlabel :: Word16, -- for generating local labels - malloced :: [BcPtr], -- thunks malloced for current BCO - -- Should be free()d when it is GCd - breakArray :: BreakArray -- array of breakpoint flags + = BcM_State + { uniqSupply :: UniqSupply -- for generating fresh variable names + , thisModule :: Module -- current module (for breakpoints) + , nextlabel :: Word16 -- for generating local labels + , malloced :: [BcPtr] -- thunks malloced for current BCO + -- Should be free()d when it is GCd + , breakArray :: BreakArray -- array of breakpoint flags } newtype BcM r = BcM (BcM_State -> IO (BcM_State, r)) @@ -1508,9 +1464,9 @@ ioToBc io = BcM $ \st -> do x <- io return (st, x) -runBc :: UniqSupply -> ModBreaks -> BcM r -> IO (BcM_State, r) -runBc us modBreaks (BcM m) - = m (BcM_State us 0 [] breakArray) +runBc :: UniqSupply -> Module -> ModBreaks -> BcM r -> IO (BcM_State, r) +runBc us this_mod modBreaks (BcM m) + = m (BcM_State us this_mod 0 [] breakArray) where breakArray = modBreaks_flags modBreaks @@ -1568,6 +1524,9 @@ newUnique = BcM $ (uniq, us) -> let newState = st { uniqSupply = us } in return (newState, uniq) +getCurrentModule :: BcM Module +getCurrentModule = BcM $ \st -> return (st, thisModule st) + newId :: Type -> BcM Id newId ty = do uniq <- newUnique diff --git a/compiler/hsSyn/Convert.lhs b/compiler/hsSyn/Convert.lhs index 5ece574e25..f9c275e4f3 100644 --- a/compiler/hsSyn/Convert.lhs +++ b/compiler/hsSyn/Convert.lhs @@ -130,7 +130,8 @@ cvtDec (TH.ValD pat body ds) ; ds' <- cvtLocalDecs (ptext (sLit "a where clause")) ds ; returnL $ Hs.ValD $ PatBind { pat_lhs = pat', pat_rhs = GRHSs body' ds' - , pat_rhs_ty = void, bind_fvs = placeHolderNames } } + , pat_rhs_ty = void, bind_fvs = placeHolderNames + , pat_ticks = (Nothing,[]) } } cvtDec (TH.FunD nm cls) | null cls diff --git a/compiler/hsSyn/HsBinds.lhs b/compiler/hsSyn/HsBinds.lhs index 31e7c29798..56d0040788 100644 --- a/compiler/hsSyn/HsBinds.lhs +++ b/compiler/hsSyn/HsBinds.lhs @@ -18,6 +18,7 @@ import {-# SOURCE #-} HsPat ( LPat ) import HsTypes import PprCore () +import CoreSyn import Coercion import Type import Name @@ -120,7 +121,7 @@ data HsBindLR idL idR -- See Note [Bind free vars] - fun_tick :: Maybe (Int,[Id]) -- ^ This is the (optional) module-local tick number. + fun_tick :: Maybe (Tickish Id) -- ^ Tick to put on the rhs, if any } | PatBind { -- The pattern is never a simple variable; @@ -128,7 +129,10 @@ data HsBindLR idL idR pat_lhs :: LPat idL, pat_rhs :: GRHSs idR, pat_rhs_ty :: PostTcType, -- Type of the GRHSs - bind_fvs :: NameSet -- See Note [Bind free vars] + bind_fvs :: NameSet, -- See Note [Bind free vars] + pat_ticks :: (Maybe (Tickish Id), [Maybe (Tickish Id)]) + -- ^ Tick to put on the rhs, if any, and ticks to put on + -- the bound variables. } | VarBind { -- Dictionary binding and suchlike @@ -383,9 +387,12 @@ instance (OutputableBndr id) => Outputable (ABExport id) where pprTicks :: SDoc -> SDoc -> SDoc -- Print stuff about ticks only when -dppr-debug is on, to avoid -- them appearing in error messages (from the desugarer); see Trac # 3263 +-- Also print ticks in dumpStyle, so that -ddump-hpc actually does +-- something useful. pprTicks pp_no_debug pp_when_debug - = getPprStyle (\ sty -> if debugStyle sty then pp_when_debug - else pp_no_debug) + = getPprStyle (\ sty -> if debugStyle sty || dumpStyle sty + then pp_when_debug + else pp_no_debug) \end{code} %************************************************************************ diff --git a/compiler/hsSyn/HsExpr.lhs b/compiler/hsSyn/HsExpr.lhs index 995c66068c..31d65b47db 100644 --- a/compiler/hsSyn/HsExpr.lhs +++ b/compiler/hsSyn/HsExpr.lhs @@ -18,6 +18,7 @@ import HsTypes import HsBinds -- others: +import CoreSyn import Var import Name import BasicTypes @@ -248,8 +249,7 @@ data HsExpr id -- Haskell program coverage (Hpc) Support | HsTick - Int -- module-local tick number - [id] -- variables in scope + (Tickish id) (LHsExpr id) -- sub-expression | HsBinTick @@ -298,6 +298,7 @@ tupArgPresent (Missing {}) = False type PendingSplice = (Name, LHsExpr Id) -- Typechecked splices, waiting to be -- pasted back in by the desugarer + \end{code} Note [Parens in HsSyn] @@ -503,14 +504,9 @@ ppr_expr (HsQuasiQuoteE qq) = ppr qq ppr_expr (HsProc pat (L _ (HsCmdTop cmd _ _ _))) = hsep [ptext (sLit "proc"), ppr pat, ptext (sLit "->"), ppr cmd] -ppr_expr (HsTick tickId vars exp) +ppr_expr (HsTick tickish exp) = pprTicks (ppr exp) $ - hcat [ptext (sLit "tick<"), - ppr tickId, - ptext (sLit ">("), - hsep (map pprHsVar vars), - ppr exp, - ptext (sLit ")")] + ppr tickish <+> ppr exp ppr_expr (HsBinTick tickIdTrue tickIdFalse exp) = pprTicks (ppr exp) $ hcat [ptext (sLit "bintick<"), diff --git a/compiler/iface/BinIface.hs b/compiler/iface/BinIface.hs index eaf5c070ba..d7d6311a7e 100644 --- a/compiler/iface/BinIface.hs +++ b/compiler/iface/BinIface.hs @@ -462,7 +462,6 @@ data BinDictionary = BinDictionary { -- CostCentre {-! for IsCafCC derive: Binary !-} -{-! for IsDupdCC derive: Binary !-} {-! for CostCentre derive: Binary !-} @@ -911,26 +910,14 @@ instance Binary IsCafCC where 0 -> do return CafCC _ -> do return NotCafCC -instance Binary IsDupdCC where - put_ bh OriginalCC = do - putByte bh 0 - put_ bh DupdCC = do - putByte bh 1 - get bh = do - h <- getByte bh - case h of - 0 -> do return OriginalCC - _ -> do return DupdCC - instance Binary CostCentre where put_ bh NoCostCentre = do putByte bh 0 - put_ bh (NormalCC aa ab ac ad) = do + put_ bh (NormalCC aa ab ac) = do putByte bh 1 put_ bh aa put_ bh ab put_ bh ac - put_ bh ad put_ bh (AllCafsCC ae) = do putByte bh 2 put_ bh ae @@ -941,8 +928,7 @@ instance Binary CostCentre where 1 -> do aa <- get bh ab <- get bh ac <- get bh - ad <- get bh - return (NormalCC aa ab ac ad) + return (NormalCC aa ab ac) _ -> do ae <- get bh return (AllCafsCC ae) @@ -1085,7 +1071,7 @@ instance Binary IfaceExpr where putByte bh 7 put_ bh al put_ bh am - put_ bh (IfaceNote an ao) = do + put_ bh (IfaceTick an ao) = do putByte bh 8 put_ bh an put_ bh ao @@ -1103,10 +1089,6 @@ instance Binary IfaceExpr where putByte bh 12 put_ bh ie put_ bh ico - put_ bh (IfaceTick m ix) = do - putByte bh 13 - put_ bh m - put_ bh ix get bh = do h <- getByte bh case h of @@ -1134,7 +1116,7 @@ instance Binary IfaceExpr where return (IfaceLet al am) 8 -> do an <- get bh ao <- get bh - return (IfaceNote an ao) + return (IfaceTick an ao) 9 -> do ap <- get bh return (IfaceLit ap) 10 -> do as <- get bh @@ -1145,9 +1127,6 @@ instance Binary IfaceExpr where 12 -> do ie <- get bh ico <- get bh return (IfaceCast ie ico) - 13 -> do m <- get bh - ix <- get bh - return (IfaceTick m ix) _ -> panic ("get IfaceExpr " ++ show h) instance Binary IfaceConAlt where @@ -1288,21 +1267,28 @@ instance Binary IfaceUnfolding where _ -> do e <- get bh return (IfCompulsory e) -instance Binary IfaceNote where - put_ bh (IfaceSCC aa) = do - putByte bh 0 - put_ bh aa - put_ bh (IfaceCoreNote s) = do - putByte bh 4 - put_ bh s +instance Binary IfaceTickish where + put_ bh (IfaceHpcTick m ix) = do + putByte bh 0 + put_ bh m + put_ bh ix + put_ bh (IfaceSCC cc tick push) = do + putByte bh 1 + put_ bh cc + put_ bh tick + put_ bh push + get bh = do - h <- getByte bh - case h of - 0 -> do aa <- get bh - return (IfaceSCC aa) - 4 -> do ac <- get bh - return (IfaceCoreNote ac) - _ -> panic ("get IfaceNote " ++ show h) + h <- getByte bh + case h of + 0 -> do m <- get bh + ix <- get bh + return (IfaceHpcTick m ix) + 1 -> do cc <- get bh + tick <- get bh + push <- get bh + return (IfaceSCC cc tick push) + _ -> panic ("get IfaceTickish " ++ show h) ------------------------------------------------------------------------- -- IfaceDecl and friends diff --git a/compiler/iface/IfaceEnv.lhs b/compiler/iface/IfaceEnv.lhs index eb34402594..0cecf39d1f 100644 --- a/compiler/iface/IfaceEnv.lhs +++ b/compiler/iface/IfaceEnv.lhs @@ -8,7 +8,6 @@ module IfaceEnv ( newIPName, newIfaceName, newIfaceNames, extendIfaceIdEnv, extendIfaceTyVarEnv, tcIfaceLclId, tcIfaceTyVar, lookupIfaceTyVar, - tcIfaceTick, ifaceExportNames, @@ -34,7 +33,6 @@ import UniqFM import FastString import UniqSupply import SrcLoc -import MkId import BasicTypes import Outputable @@ -320,19 +318,3 @@ newIfaceNames occs ; return [ mkInternalName uniq occ noSrcSpan | (occ,uniq) <- occs `zip` uniqsFromSupply uniqs] } \end{code} - -%************************************************************************ -%* * - (Re)creating tick boxes -%* * -%************************************************************************ - -\begin{code} -tcIfaceTick :: Module -> Int -> IfL Id -tcIfaceTick modName tickNo - = do { uniq <- newUnique - ; return $ mkTickBoxOpId uniq modName tickNo - } -\end{code} - - diff --git a/compiler/iface/IfaceSyn.lhs b/compiler/iface/IfaceSyn.lhs index 9a2e89db70..57b179946e 100644 --- a/compiler/iface/IfaceSyn.lhs +++ b/compiler/iface/IfaceSyn.lhs @@ -9,11 +9,11 @@ module IfaceSyn ( IfaceDecl(..), IfaceClassOp(..), IfaceAT(..), IfaceATDefault(..), IfaceConDecl(..), IfaceConDecls(..), - IfaceExpr(..), IfaceAlt, IfaceNote(..), IfaceLetBndr(..), + IfaceExpr(..), IfaceAlt, IfaceLetBndr(..), IfaceBinding(..), IfaceConAlt(..), IfaceIdInfo(..), IfaceIdDetails(..), IfaceUnfolding(..), IfaceInfoItem(..), IfaceRule(..), IfaceAnnotation(..), IfaceAnnTarget, - IfaceInst(..), IfaceFamInst(..), + IfaceInst(..), IfaceFamInst(..), IfaceTickish(..), -- Misc ifaceDeclSubBndrs, visibleIfConDecls, @@ -243,14 +243,15 @@ data IfaceExpr | IfaceApp IfaceExpr IfaceExpr | IfaceCase IfaceExpr IfLclName [IfaceAlt] | IfaceLet IfaceBinding IfaceExpr - | IfaceNote IfaceNote IfaceExpr | IfaceCast IfaceExpr IfaceCoercion | IfaceLit Literal | IfaceFCall ForeignCall IfaceType - | IfaceTick Module Int + | IfaceTick IfaceTickish IfaceExpr -- from Tick tickish E -data IfaceNote = IfaceSCC CostCentre - | IfaceCoreNote String +data IfaceTickish + = IfaceHpcTick Module Int -- from HpcTick x + | IfaceSCC CostCentre Bool Bool -- from ProfNote + -- no breakpoints: we never export these into interface files type IfaceAlt = (IfaceConAlt, [IfLclName], IfaceExpr) -- Note: IfLclName, not IfaceBndr (and same with the case binder) @@ -573,7 +574,6 @@ pprIfaceExpr _ (IfaceLcl v) = ppr v pprIfaceExpr _ (IfaceExt v) = ppr v pprIfaceExpr _ (IfaceLit l) = ppr l pprIfaceExpr _ (IfaceFCall cc ty) = braces (ppr cc <+> ppr ty) -pprIfaceExpr _ (IfaceTick m ix) = braces (text "tick" <+> ppr m <+> ppr ix) pprIfaceExpr _ (IfaceType ty) = char '@' <+> pprParendIfaceType ty pprIfaceExpr _ (IfaceCo co) = text "@~" <+> pprParendIfaceType co @@ -617,8 +617,8 @@ pprIfaceExpr add_par (IfaceLet (IfaceRec pairs) body) ptext (sLit "} in"), pprIfaceExpr noParens body]) -pprIfaceExpr add_par (IfaceNote note body) = add_par $ ppr note - <+> pprParendIfaceExpr body +pprIfaceExpr add_par (IfaceTick tickish e) + = add_par (pprIfaceTickish tickish <+> pprIfaceExpr noParens e) ppr_alt :: (IfaceConAlt, [IfLclName], IfaceExpr) -> SDoc ppr_alt (con, bs, rhs) = sep [ppr_con_bs con bs, @@ -633,18 +633,19 @@ ppr_bind (IfLetBndr b ty info, rhs) equals <+> pprIfaceExpr noParens rhs] ------------------ +pprIfaceTickish :: IfaceTickish -> SDoc +pprIfaceTickish (IfaceHpcTick m ix) + = braces (text "tick" <+> ppr m <+> ppr ix) +pprIfaceTickish (IfaceSCC cc tick scope) + = braces (pprCostCentreCore cc <+> ppr tick <+> ppr scope) + +------------------ pprIfaceApp :: IfaceExpr -> [SDoc] -> SDoc pprIfaceApp (IfaceApp fun arg) args = pprIfaceApp fun $ nest 2 (pprParendIfaceExpr arg) : args pprIfaceApp fun args = sep (pprParendIfaceExpr fun : args) ------------------ -instance Outputable IfaceNote where - ppr (IfaceSCC cc) = pprCostCentreCore cc - ppr (IfaceCoreNote s) = ptext (sLit "__core_note") - <+> pprHsString (mkFastString s) - - instance Outputable IfaceConAlt where ppr IfaceDefault = text "DEFAULT" ppr (IfaceLitAlt l) = ppr l @@ -818,7 +819,7 @@ freeNamesIfExpr (IfaceTuple _ as) = fnList freeNamesIfExpr as freeNamesIfExpr (IfaceLam b body) = freeNamesIfBndr b &&& freeNamesIfExpr body freeNamesIfExpr (IfaceApp f a) = freeNamesIfExpr f &&& freeNamesIfExpr a freeNamesIfExpr (IfaceCast e co) = freeNamesIfExpr e &&& freeNamesIfType co -freeNamesIfExpr (IfaceNote _n r) = freeNamesIfExpr r +freeNamesIfExpr (IfaceTick _ e) = freeNamesIfExpr e freeNamesIfExpr (IfaceCase s _ alts) = freeNamesIfExpr s diff --git a/compiler/iface/MkIface.lhs b/compiler/iface/MkIface.lhs index 992b8c7cb0..4fea1166b0 100644 --- a/compiler/iface/MkIface.lhs +++ b/compiler/iface/MkIface.lhs @@ -1629,12 +1629,13 @@ toIfaceExpr (App f a) = toIfaceApp f [a] toIfaceExpr (Case s x _ as) = IfaceCase (toIfaceExpr s) (getFS x) (map toIfaceAlt as) toIfaceExpr (Let b e) = IfaceLet (toIfaceBind b) (toIfaceExpr e) toIfaceExpr (Cast e co) = IfaceCast (toIfaceExpr e) (coToIfaceType co) -toIfaceExpr (Note n e) = IfaceNote (toIfaceNote n) (toIfaceExpr e) +toIfaceExpr (Tick t e) = IfaceTick (toIfaceTickish t) (toIfaceExpr e) --------------------- -toIfaceNote :: Note -> IfaceNote -toIfaceNote (SCC cc) = IfaceSCC cc -toIfaceNote (CoreNote s) = IfaceCoreNote s +toIfaceTickish :: Tickish Id -> IfaceTickish +toIfaceTickish (ProfNote cc tick push) = IfaceSCC cc tick push +toIfaceTickish (HpcTick modl ix) = IfaceHpcTick modl ix +toIfaceTickish _ = panic "toIfaceTickish" --------------------- toIfaceBind :: Bind Id -> IfaceBinding @@ -1679,7 +1680,6 @@ toIfaceVar v | Just fcall <- isFCallId_maybe v = IfaceFCall fcall (toIfaceType (idType v)) -- Foreign calls have special syntax | isExternalName name = IfaceExt name - | Just (TickBox m ix) <- isTickBoxOp_maybe v = IfaceTick m ix - | otherwise = IfaceLcl (getFS name) + | otherwise = IfaceLcl (getFS name) where name = idName v \end{code} diff --git a/compiler/iface/TcIface.lhs b/compiler/iface/TcIface.lhs index dff668f5ac..04b9147717 100644 --- a/compiler/iface/TcIface.lhs +++ b/compiler/iface/TcIface.lhs @@ -874,9 +874,6 @@ tcIfaceExpr (IfaceCast expr co) tcIfaceExpr (IfaceLcl name) = Var <$> tcIfaceLclId name -tcIfaceExpr (IfaceTick modName tickNo) - = Var <$> tcIfaceTick modName tickNo - tcIfaceExpr (IfaceExt gbl) = Var <$> tcIfaceExtId gbl @@ -950,11 +947,15 @@ tcIfaceExpr (IfaceLet (IfaceRec pairs) body) (idName id) (idType id) info ; return (setIdInfo id id_info, rhs') } -tcIfaceExpr (IfaceNote note expr) = do +tcIfaceExpr (IfaceTick tickish expr) = do expr' <- tcIfaceExpr expr - case note of - IfaceSCC cc -> return (Note (SCC cc) expr') - IfaceCoreNote n -> return (Note (CoreNote n) expr') + tickish' <- tcIfaceTickish tickish + return (Tick tickish' expr') + +------------------------- +tcIfaceTickish :: IfaceTickish -> IfM lcl (Tickish Id) +tcIfaceTickish (IfaceHpcTick modl ix) = return (HpcTick modl ix) +tcIfaceTickish (IfaceSCC cc tick push) = return (ProfNote cc tick push) ------------------------- tcIfaceLit :: Literal -> IfL Literal diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 59b6c49648..92ed45e796 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -17,6 +17,7 @@ module DynFlags ( WarningFlag(..), ExtensionFlag(..), LogAction, + ProfAuto(..), glasgowExtsFlags, dopt, dopt_set, @@ -203,7 +204,7 @@ data DynFlag | Opt_D_dump_splices | Opt_D_dump_BCOs | Opt_D_dump_vect - | Opt_D_dump_hpc + | Opt_D_dump_ticked | Opt_D_dump_rtti | Opt_D_source_stats | Opt_D_verbose_core2core @@ -251,9 +252,8 @@ data DynFlag | Opt_ExposeAllUnfoldings -- profiling opts - | Opt_AutoSccsOnAllToplevs - | Opt_AutoSccsOnExportedToplevs | Opt_AutoSccsOnIndividualCafs + | Opt_ProfCountEntries -- misc opts | Opt_Pp @@ -569,9 +569,18 @@ data DynFlags = DynFlags { -- | Message output action: use "ErrUtils" instead of this if you can log_action :: LogAction, - haddockOptions :: Maybe String + haddockOptions :: Maybe String, + + -- | what kind of {-# SCC #-} to add automatically + profAuto :: ProfAuto } +data ProfAuto + = NoProfAuto -- ^ no SCC annotations added + | ProfAutoAll -- ^ top-level and nested functions are annotated + | ProfAutoTop -- ^ top-level functions annotated only + | ProfAutoExports -- ^ exported functions annotated only + data Settings = Settings { sTargetPlatform :: Platform, -- Filled in by SysTools sGhcUsagePath :: FilePath, -- Filled in by SysTools @@ -889,7 +898,8 @@ defaultDynFlags mySettings = newDerivOnLoc = noSrcSpan, extensions = [], extensionFlags = flattenExtensionFlags Nothing [], - log_action = defaultLogAction + log_action = defaultLogAction, + profAuto = NoProfAuto } type LogAction = Severity -> SrcSpan -> PprStyle -> Message -> IO () @@ -1537,7 +1547,8 @@ dynamic_flags = [ , Flag "ddump-hi" (setDumpFlag Opt_D_dump_hi) , Flag "ddump-minimal-imports" (setDumpFlag Opt_D_dump_minimal_imports) , Flag "ddump-vect" (setDumpFlag Opt_D_dump_vect) - , Flag "ddump-hpc" (setDumpFlag Opt_D_dump_hpc) + , Flag "ddump-hpc" (setDumpFlag Opt_D_dump_ticked) -- back compat + , Flag "ddump-ticked" (setDumpFlag Opt_D_dump_ticked) , Flag "ddump-mod-cycles" (setDumpFlag Opt_D_dump_mod_cycles) , Flag "ddump-view-pattern-commoning" (setDumpFlag Opt_D_dump_view_pattern_commoning) , Flag "ddump-to-file" (setDumpFlag Opt_DumpToFile) @@ -1597,17 +1608,19 @@ dynamic_flags = [ ------ Profiling ---------------------------------------------------- - -- XXX Should the -f* flags be deprecated? - -- They don't seem to be documented - , Flag "fauto-sccs-on-all-toplevs" (NoArg (setDynFlag Opt_AutoSccsOnAllToplevs)) - , Flag "auto-all" (NoArg (setDynFlag Opt_AutoSccsOnAllToplevs)) - , Flag "no-auto-all" (NoArg (unSetDynFlag Opt_AutoSccsOnAllToplevs)) - , Flag "fauto-sccs-on-exported-toplevs" (NoArg (setDynFlag Opt_AutoSccsOnExportedToplevs)) - , Flag "auto" (NoArg (setDynFlag Opt_AutoSccsOnExportedToplevs)) - , Flag "no-auto" (NoArg (unSetDynFlag Opt_AutoSccsOnExportedToplevs)) - , Flag "fauto-sccs-on-individual-cafs" (NoArg (setDynFlag Opt_AutoSccsOnIndividualCafs)) - , Flag "caf-all" (NoArg (setDynFlag Opt_AutoSccsOnIndividualCafs)) - , Flag "no-caf-all" (NoArg (unSetDynFlag Opt_AutoSccsOnIndividualCafs)) + -- OLD profiling flags + , Flag "auto-all" (noArg (\d -> d { profAuto = ProfAutoAll } )) + , Flag "no-auto-all" (noArg (\d -> d { profAuto = NoProfAuto } )) + , Flag "auto" (noArg (\d -> d { profAuto = ProfAutoExports } )) + , Flag "no-auto" (noArg (\d -> d { profAuto = NoProfAuto } )) + , Flag "caf-all" (NoArg (setDynFlag Opt_AutoSccsOnIndividualCafs)) + , Flag "no-caf-all" (NoArg (unSetDynFlag Opt_AutoSccsOnIndividualCafs)) + + -- NEW profiling flags + , Flag "fprof-auto" (noArg (\d -> d { profAuto = ProfAutoAll } )) + , Flag "fprof-auto-top" (noArg (\d -> d { profAuto = ProfAutoTop } )) + , Flag "fprof-auto-exported" (noArg (\d -> d { profAuto = ProfAutoExports } )) + , Flag "fno-prof-auto" (noArg (\d -> d { profAuto = NoProfAuto } )) ------ DPH flags ---------------------------------------------------- @@ -1784,7 +1797,9 @@ fFlags = [ ( "ghci-history", Opt_GhciHistory, nop ), ( "helpful-errors", Opt_HelpfulErrors, nop ), ( "building-cabal-package", Opt_BuildingCabalPackage, nop ), - ( "implicit-import-qualified", Opt_ImplicitImportQualified, nop ) + ( "implicit-import-qualified", Opt_ImplicitImportQualified, nop ), + ( "prof-count-entries", Opt_ProfCountEntries, nop ), + ( "prof-cafs", Opt_AutoSccsOnIndividualCafs, nop ) ] -- | These @-f\<blah\>@ flags can all be reversed with @-fno-\<blah\>@ @@ -1959,7 +1974,8 @@ defaultFlags Opt_PrintBindContents, Opt_GhciSandbox, Opt_GhciHistory, - Opt_HelpfulErrors + Opt_HelpfulErrors, + Opt_ProfCountEntries ] ++ [f | (ns,f) <- optLevelFlags, 0 `elem` ns] diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs index 8d69fcbda4..2a14fd545f 100644 --- a/compiler/main/HscMain.hs +++ b/compiler/main/HscMain.hs @@ -1177,7 +1177,8 @@ hscInteractive (iface, details, cgguts) mod_summary = do prepd_binds <- {-# SCC "CorePrep" #-} liftIO $ corePrepPgm dflags core_binds data_tycons ; ----------------- Generate byte code ------------------ - comp_bc <- liftIO $ byteCodeGen dflags prepd_binds data_tycons mod_breaks + comp_bc <- liftIO $ byteCodeGen dflags this_mod prepd_binds + data_tycons mod_breaks ------------------ Create f-x-dynamic C-side stuff --- (_istub_h_exists, istub_c_exists) <- liftIO $ outputForeignStubs dflags this_mod @@ -1361,9 +1362,10 @@ hscDeclsWithLocation hsc_env str source linenumber = runHsc hsc_env $ do (tidy_cg, _mod_details) <- liftIO $ tidyProgram hsc_env simpl_mg let dflags = hsc_dflags hsc_env - CgGuts{ cg_binds = core_binds, - cg_tycons = tycons, - cg_modBreaks = mod_breaks } = tidy_cg + !CgGuts{ cg_module = this_mod, + cg_binds = core_binds, + cg_tycons = tycons, + cg_modBreaks = mod_breaks } = tidy_cg data_tycons = filter isDataTyCon tycons {- Prepare For Code Generation -} @@ -1372,7 +1374,8 @@ hscDeclsWithLocation hsc_env str source linenumber = runHsc hsc_env $ do liftIO $ corePrepPgm dflags core_binds data_tycons {- Generate byte code -} - cbc <- liftIO $ byteCodeGen dflags prepd_binds data_tycons mod_breaks + cbc <- liftIO $ byteCodeGen dflags this_mod + prepd_binds data_tycons mod_breaks let src_span = srcLocSpan interactiveSrcLoc hsc_env <- getHscEnv @@ -1382,7 +1385,7 @@ hscDeclsWithLocation hsc_env str source linenumber = runHsc hsc_env $ do clss = mg_clss simpl_mg ext_vars = filter (isExternalName . idName) $ - bindersOfBinds (cg_binds tidy_cg) + bindersOfBinds core_binds (sys_vars, user_vars) = partition is_sys_var ext_vars is_sys_var id = isDFunId id @@ -1556,7 +1559,7 @@ hscCompileCoreExpr hsc_env srcspan ds_expr Nothing -> return () {- Convert to BCOs -} - bcos <- coreExprToBCOs dflags prepd_expr + bcos <- coreExprToBCOs dflags iNTERACTIVE prepd_expr {- link it -} hval <- linkExpr hsc_env srcspan bcos diff --git a/compiler/main/TidyPgm.lhs b/compiler/main/TidyPgm.lhs index d800bc6db7..85e8802f95 100644 --- a/compiler/main/TidyPgm.lhs +++ b/compiler/main/TidyPgm.lhs @@ -785,7 +785,8 @@ dffvExpr :: CoreExpr -> DFFV () dffvExpr (Var v) = insert v dffvExpr (App e1 e2) = dffvExpr e1 >> dffvExpr e2 dffvExpr (Lam v e) = extendScope v (dffvExpr e) -dffvExpr (Note _ e) = dffvExpr e +dffvExpr (Tick (Breakpoint _ ids) e) = mapM_ insert ids >> dffvExpr e +dffvExpr (Tick _other e) = dffvExpr e dffvExpr (Cast e _) = dffvExpr e dffvExpr (Let (NonRec x r) e) = dffvBind (x,r) >> extendScope x (dffvExpr e) dffvExpr (Let (Rec prs) e) = extendScopeList (map fst prs) $ @@ -1196,7 +1197,7 @@ cafRefsE p (App f a) = fastOr (cafRefsE p f) (cafRefsE p) a cafRefsE p (Lam _ e) = cafRefsE p e cafRefsE p (Let b e) = fastOr (cafRefsEs p (rhssOfBind b)) (cafRefsE p) e cafRefsE p (Case e _bndr _ alts) = fastOr (cafRefsE p e) (cafRefsEs p) (rhssOfAlts alts) -cafRefsE p (Note _n e) = cafRefsE p e +cafRefsE p (Tick _n e) = cafRefsE p e cafRefsE p (Cast e _co) = cafRefsE p e cafRefsE _ (Type _) = fastBool False cafRefsE _ (Coercion _) = fastBool False diff --git a/compiler/parser/Parser.y.pp b/compiler/parser/Parser.y.pp index bcefaf4c03..62075e724b 100644 --- a/compiler/parser/Parser.y.pp +++ b/compiler/parser/Parser.y.pp @@ -1241,7 +1241,7 @@ decl :: { Located (OrdList (LHsDecl RdrName)) } pat <- checkPattern e; return $ LL $ unitOL $ LL $ ValD $ PatBind pat (unLoc $3) - placeHolderType placeHolderNames } } + placeHolderType placeHolderNames (Nothing,[]) } } -- Turn it all into an expression so that -- checkPattern can check that bangs are enabled diff --git a/compiler/parser/RdrHsSyn.lhs b/compiler/parser/RdrHsSyn.lhs index 094efac789..20055e3b7d 100644 --- a/compiler/parser/RdrHsSyn.lhs +++ b/compiler/parser/RdrHsSyn.lhs @@ -726,7 +726,8 @@ checkPatBind :: LHsExpr RdrName -> P (HsBind RdrName) checkPatBind lhs (L _ grhss) = do { lhs <- checkPattern lhs - ; return (PatBind lhs grhss placeHolderType placeHolderNames) } + ; return (PatBind lhs grhss placeHolderType placeHolderNames + (Nothing,[])) } checkValSig :: LHsExpr RdrName diff --git a/compiler/profiling/CostCentre.lhs b/compiler/profiling/CostCentre.lhs index 9e08831c97..8c2d938b8e 100644 --- a/compiler/profiling/CostCentre.lhs +++ b/compiler/profiling/CostCentre.lhs @@ -1,35 +1,20 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 -% -\section[CostCentre]{The @CostCentre@ data type} - \begin{code} -{-# OPTIONS -fno-warn-incomplete-patterns #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details {-# LANGUAGE BangPatterns, DeriveDataTypeable #-} module CostCentre ( - CostCentre(..), CcName, IsDupdCC(..), IsCafCC(..), + CostCentre(..), CcName, IsCafCC(..), -- All abstract except to friend: ParseIface.y CostCentreStack, CollectedCCs, - noCCS, subsumedCCS, currentCCS, overheadCCS, dontCareCCS, + noCCS, currentCCS, dontCareCCS, noCostCentre, noCCAttached, - noCCSAttached, isCurrentCCS, isSubsumedCCS, currentOrSubsumedCCS, - isDerivedFromCurrentCCS, maybeSingletonCCS, - decomposeCCS, pushCCisNop, + noCCSAttached, isCurrentCCS, + maybeSingletonCCS, mkUserCC, mkAutoCC, mkAllCafsCC, - mkSingletonCCS, dupifyCC, pushCCOnCCS, - isCafCCS, isCafCC, - isSccCountCostCentre, - sccAbleCostCentre, - ccFromThisModule, + mkSingletonCCS, + isCafCCS, isCafCC, isSccCountCC, sccAbleCC, ccFromThisModule, pprCostCentreCore, costCentreUserName, @@ -37,81 +22,22 @@ module CostCentre ( cmpCostCentre -- used for removing dups in a list ) where -import Var ( Id ) +import Var import Name -import Module ( Module ) +import Module import Unique import Outputable import FastTypes import FastString -import Util ( thenCmp ) +import Util import Data.Data -\end{code} - -A Cost Centre Stack is something that can be attached to a closure. -This is either: - - - the current cost centre stack (CCCS) - - a pre-defined cost centre stack (there are several - pre-defined CCSs, see below). - -\begin{code} -data CostCentreStack - = NoCCS - | CurrentCCS -- Pinned on a let(rec)-bound - -- thunk/function/constructor, this says that the - -- cost centre to be attached to the object, when it - -- is allocated, is whatever is in the - -- current-cost-centre-stack register. - - | SubsumedCCS -- Cost centre stack for top-level subsumed functions - -- (CAFs get an AllCafsCC). - -- Its execution costs get subsumed into the caller. - -- This guy is *only* ever pinned on static closures, - -- and is *never* the cost centre for an SCC construct. - - | OverheadCCS -- We charge costs due to the profiling-system - -- doing its work to "overhead". - -- - -- Objects whose CCS is "Overhead" - -- have their *allocation* charged to "overhead", - -- but have the current CCS put into the object - -- itself. - - -- For example, if we transform "f g" to "let - -- g' = g in f g'" (so that something about - -- profiling works better...), then we charge - -- the *allocation* of g' to OverheadCCS, but - -- we put the cost-centre of the call to f - -- (i.e., current CCS) into the g' object. When - -- g' is entered, the CCS of the call - -- to f will be set. - - | DontCareCCS -- We need a CCS to stick in static closures - -- (for data), but we *don't* expect them to - -- accumulate any costs. But we still need - -- the placeholder. This CCS is it. - - | PushCC CostCentre CostCentreStack - -- These are used during code generation as the CCSs - -- attached to closures. A PushCC never appears as - -- the argument to an _scc_. - -- - -- The tail (2nd argument) is either NoCCS, indicating - -- a staticly allocated CCS, or CurrentCCS indicating - -- a dynamically created CCS. We only support - -- statically allocated *singleton* CCSs at the - -- moment, for the purposes of initialising the CCS - -- field of a CAF. - - deriving (Eq, Ord) -- needed for Ord on CLabel -\end{code} +----------------------------------------------------------------------------- +-- Cost Centres -A Cost Centre is the argument of an _scc_ expression. +-- | A Cost Centre is the argument of an _scc_ expression. -\begin{code} data CostCentre = NoCostCentre -- Having this constructor avoids having -- to use "Maybe CostCentre" all the time. @@ -119,8 +45,7 @@ data CostCentre | NormalCC { cc_name :: CcName, -- Name of the cost centre itself cc_mod :: Module, -- Name of module defining this CC. - cc_is_dupd :: IsDupdCC, -- see below - cc_is_caf :: IsCafCC -- see below + cc_is_caf :: IsCafCC -- see below } | AllCafsCC { @@ -130,113 +55,77 @@ data CostCentre type CcName = FastString -data IsDupdCC - = OriginalCC -- This says how the CC is *used*. Saying that - | DupdCC -- it is DupdCC doesn't make it a different - -- CC, just that it a sub-expression which has - -- been moved ("dupd") into a different scope. - -- - -- The point about a dupd SCC is that we don't - -- count entries to it, because it's not the - -- "original" one. - -- - -- In the papers, it's called "SCCsub", - -- i.e. SCCsub CC == SCC DupdCC, - -- but we are trying to avoid confusion between - -- "subd" and "subsumed". So we call the former - -- "dupd". - deriving (Data, Typeable) - -data IsCafCC = CafCC | NotCafCC - deriving (Data, Typeable) - --- synonym for triple which describes the cost centre info in the generated --- code for a module. -type CollectedCCs - = ( [CostCentre] -- local cost-centres that need to be decl'd - , [CostCentre] -- "extern" cost-centres - , [CostCentreStack] -- pre-defined "singleton" cost centre stacks - ) -\end{code} +data IsCafCC = NotCafCC | CafCC + deriving (Eq, Ord, Data, Typeable) -WILL: Would there be any merit to recording ``I am now using a -cost-centre from another module''? I don't know if this would help a -user; it might be interesting to us to know how much computation is -being moved across module boundaries. +noCostCentre :: CostCentre +noCostCentre = NoCostCentre -SIMON: Maybe later... -\begin{code} -noCCS, subsumedCCS, currentCCS, overheadCCS, dontCareCCS :: CostCentreStack +instance Eq CostCentre where + c1 == c2 = case c1 `cmpCostCentre` c2 of { EQ -> True; _ -> False } -noCCS = NoCCS -subsumedCCS = SubsumedCCS -currentCCS = CurrentCCS -overheadCCS = OverheadCCS -dontCareCCS = DontCareCCS +instance Ord CostCentre where + compare = cmpCostCentre -noCostCentre :: CostCentre -noCostCentre = NoCostCentre -\end{code} +cmpCostCentre :: CostCentre -> CostCentre -> Ordering -Predicates on Cost-Centre Stacks +cmpCostCentre (AllCafsCC {cc_mod = m1}) (AllCafsCC {cc_mod = m2}) + = m1 `compare` m2 -\begin{code} -noCCSAttached :: CostCentreStack -> Bool -noCCSAttached NoCCS = True -noCCSAttached _ = False +cmpCostCentre (NormalCC {cc_name = n1, cc_mod = m1, cc_is_caf = c1}) + (NormalCC {cc_name = n2, cc_mod = m2, cc_is_caf = c2}) + -- first key is module name, then the name, then the cafness + = (m1 `compare` m2) `thenCmp` (n1 `compare` n2) `thenCmp` (c1 `compare` c2) -noCCAttached :: CostCentre -> Bool -noCCAttached NoCostCentre = True -noCCAttached _ = False +cmpCostCentre other_1 other_2 + = let + !tag1 = tag_CC other_1 + !tag2 = tag_CC other_2 + in + if tag1 <# tag2 then LT else GT + where + tag_CC NoCostCentre = _ILIT(0) + tag_CC (NormalCC {}) = _ILIT(1) + tag_CC (AllCafsCC {}) = _ILIT(2) -isCurrentCCS :: CostCentreStack -> Bool -isCurrentCCS CurrentCCS = True -isCurrentCCS _ = False -isSubsumedCCS :: CostCentreStack -> Bool -isSubsumedCCS SubsumedCCS = True -isSubsumedCCS _ = False +----------------------------------------------------------------------------- +-- Predicates on CostCentre -isCafCCS :: CostCentreStack -> Bool -isCafCCS (PushCC cc NoCCS) = isCafCC cc -isCafCCS _ = False +isCafCC :: CostCentre -> Bool +isCafCC (AllCafsCC {}) = True +isCafCC (NormalCC {cc_is_caf = CafCC}) = True +isCafCC _ = False -isDerivedFromCurrentCCS :: CostCentreStack -> Bool -isDerivedFromCurrentCCS CurrentCCS = True -isDerivedFromCurrentCCS (PushCC _ ccs) = isDerivedFromCurrentCCS ccs -isDerivedFromCurrentCCS _ = False +-- | Is this a cost-centre which records scc counts +isSccCountCC :: CostCentre -> Bool +isSccCountCC cc | isCafCC cc = False + | otherwise = True -currentOrSubsumedCCS :: CostCentreStack -> Bool -currentOrSubsumedCCS SubsumedCCS = True -currentOrSubsumedCCS CurrentCCS = True -currentOrSubsumedCCS _ = False +-- | Is this a cost-centre which can be sccd ? +sccAbleCC :: CostCentre -> Bool +sccAbleCC NoCostCentre = panic "sccAbleCC:NoCostCentre" +sccAbleCC cc | isCafCC cc = False + | otherwise = True -maybeSingletonCCS :: CostCentreStack -> Maybe CostCentre -maybeSingletonCCS (PushCC cc NoCCS) = Just cc -maybeSingletonCCS _ = Nothing +ccFromThisModule :: CostCentre -> Module -> Bool +ccFromThisModule cc m = cc_mod cc == m -pushCCisNop :: CostCentre -> CostCentreStack -> Bool --- (pushCCisNop cc ccs) = True => pushing cc on ccs is a no-op --- It's safe to return False, but the optimiser can remove --- redundant pushes if this function returns True. -pushCCisNop cc (PushCC cc' _) = cc == cc' -pushCCisNop _ _ = False -\end{code} -Building cost centres +----------------------------------------------------------------------------- +-- Building cost centres -\begin{code} mkUserCC :: FastString -> Module -> CostCentre mkUserCC cc_name mod = NormalCC { cc_name = cc_name, cc_mod = mod, - cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-} + cc_is_caf = NotCafCC {-might be changed-} } mkAutoCC :: Id -> Module -> IsCafCC -> CostCentre mkAutoCC id mod is_caf = NormalCC { cc_name = str, cc_mod = mod, - cc_is_dupd = OriginalCC, cc_is_caf = is_caf + cc_is_caf = is_caf } where name = getName id @@ -249,153 +138,126 @@ mkAutoCC id mod is_caf ftext (occNameFS (getOccName id)) <> char '_' <> pprUnique (getUnique name) mkAllCafsCC :: Module -> CostCentre -mkAllCafsCC m = AllCafsCC { cc_mod = m } +mkAllCafsCC m = AllCafsCC { cc_mod = m } +----------------------------------------------------------------------------- +-- Cost Centre Stacks +-- | A Cost Centre Stack is something that can be attached to a closure. +-- This is either: +-- +-- * the current cost centre stack (CCCS) +-- * a pre-defined cost centre stack (there are several +-- pre-defined CCSs, see below). -mkSingletonCCS :: CostCentre -> CostCentreStack -mkSingletonCCS cc = pushCCOnCCS cc NoCCS +data CostCentreStack + = NoCCS -pushCCOnCCS :: CostCentre -> CostCentreStack -> CostCentreStack -pushCCOnCCS = PushCC + | CurrentCCS -- Pinned on a let(rec)-bound + -- thunk/function/constructor, this says that the + -- cost centre to be attached to the object, when it + -- is allocated, is whatever is in the + -- current-cost-centre-stack register. -dupifyCC :: CostCentre -> CostCentre -dupifyCC cc = cc {cc_is_dupd = DupdCC} + | DontCareCCS -- We need a CCS to stick in static closures + -- (for data), but we *don't* expect them to + -- accumulate any costs. But we still need + -- the placeholder. This CCS is it. -isCafCC, isDupdCC :: CostCentre -> Bool + | SingletonCCS CostCentre -isCafCC (AllCafsCC {}) = True -isCafCC (NormalCC {cc_is_caf = CafCC}) = True -isCafCC _ = False + deriving (Eq, Ord) -- needed for Ord on CLabel -isDupdCC (NormalCC {cc_is_dupd = DupdCC}) = True -isDupdCC _ = False -isSccCountCostCentre :: CostCentre -> Bool - -- Is this a cost-centre which records scc counts +-- synonym for triple which describes the cost centre info in the generated +-- code for a module. +type CollectedCCs + = ( [CostCentre] -- local cost-centres that need to be decl'd + , [CostCentre] -- "extern" cost-centres + , [CostCentreStack] -- pre-defined "singleton" cost centre stacks + ) -#if DEBUG -isSccCountCostCentre NoCostCentre = panic "isSccCount:NoCostCentre" -#endif -isSccCountCostCentre cc | isCafCC cc = False - | isDupdCC cc = False - | otherwise = True -sccAbleCostCentre :: CostCentre -> Bool - -- Is this a cost-centre which can be sccd ? +noCCS, currentCCS, dontCareCCS :: CostCentreStack -#if DEBUG -sccAbleCostCentre NoCostCentre = panic "sccAbleCC:NoCostCentre" -#endif -sccAbleCostCentre cc | isCafCC cc = False - | otherwise = True +noCCS = NoCCS +currentCCS = CurrentCCS +dontCareCCS = DontCareCCS -ccFromThisModule :: CostCentre -> Module -> Bool -ccFromThisModule cc m = cc_mod cc == m -\end{code} +----------------------------------------------------------------------------- +-- Predicates on Cost-Centre Stacks -\begin{code} -instance Eq CostCentre where - c1 == c2 = case c1 `cmpCostCentre` c2 of { EQ -> True; _ -> False } +noCCSAttached :: CostCentreStack -> Bool +noCCSAttached NoCCS = True +noCCSAttached _ = False -instance Ord CostCentre where - compare = cmpCostCentre +noCCAttached :: CostCentre -> Bool +noCCAttached NoCostCentre = True +noCCAttached _ = False -cmpCostCentre :: CostCentre -> CostCentre -> Ordering +isCurrentCCS :: CostCentreStack -> Bool +isCurrentCCS CurrentCCS = True +isCurrentCCS _ = False -cmpCostCentre (AllCafsCC {cc_mod = m1}) (AllCafsCC {cc_mod = m2}) = m1 `compare` m2 +isCafCCS :: CostCentreStack -> Bool +isCafCCS (SingletonCCS cc) = isCafCC cc +isCafCCS _ = False -cmpCostCentre (NormalCC {cc_name = n1, cc_mod = m1, cc_is_caf = c1}) - (NormalCC {cc_name = n2, cc_mod = m2, cc_is_caf = c2}) - -- first key is module name, then we use "kinds" (which include - -- names) and finally the caf flag - = (m1 `compare` m2) `thenCmp` (n1 `compare` n2) `thenCmp` (c1 `cmp_caf` c2) +maybeSingletonCCS :: CostCentreStack -> Maybe CostCentre +maybeSingletonCCS (SingletonCCS cc) = Just cc +maybeSingletonCCS _ = Nothing -cmpCostCentre other_1 other_2 - = let - !tag1 = tag_CC other_1 - !tag2 = tag_CC other_2 - in - if tag1 <# tag2 then LT else GT - where - tag_CC (NormalCC {}) = _ILIT(1) - tag_CC (AllCafsCC {}) = _ILIT(2) +mkSingletonCCS :: CostCentre -> CostCentreStack +mkSingletonCCS cc = SingletonCCS cc --- TODO: swap order of IsCafCC, add deriving Ord -cmp_caf :: IsCafCC -> IsCafCC -> Ordering -cmp_caf NotCafCC CafCC = LT -cmp_caf NotCafCC NotCafCC = EQ -cmp_caf CafCC CafCC = EQ -cmp_caf CafCC NotCafCC = GT - -decomposeCCS :: CostCentreStack -> ([CostCentre],CostCentreStack) -decomposeCCS (PushCC cc ccs) = (cc:more, ccs') - where (more,ccs') = decomposeCCS ccs -decomposeCCS ccs = ([],ccs) -\end{code} ----------------------------------------------------------------------------- -Printing Cost Centre Stacks. - -The outputable instance for CostCentreStack prints the CCS as a C -expression. +-- Printing Cost Centre Stacks. -NOTE: Not all cost centres are suitable for using in a static -initializer. In particular, the PushCC forms where the tail is CCCS -may only be used in inline C code because they expand to a -non-constant C expression. +-- The outputable instance for CostCentreStack prints the CCS as a C +-- expression. -\begin{code} instance Outputable CostCentreStack where ppr NoCCS = ptext (sLit "NO_CCS") ppr CurrentCCS = ptext (sLit "CCCS") - ppr OverheadCCS = ptext (sLit "CCS_OVERHEAD") - ppr DontCareCCS = ptext (sLit "CCS_DONT_CARE") - ppr SubsumedCCS = ptext (sLit "CCS_SUBSUMED") - ppr (PushCC cc NoCCS) = ppr cc <> ptext (sLit "_ccs") - ppr (PushCC cc ccs) = ptext (sLit "PushCostCentre") <> - parens (ppr ccs <> comma <> - parens(ptext (sLit "void *")) <> ppr cc) -\end{code} - ------------------------------------------------------------------------------ -Printing Cost Centres. - -There are several different ways in which we might want to print a -cost centre: + ppr DontCareCCS = ptext (sLit "CCS_DONT_CARE") + ppr (SingletonCCS cc) = ppr cc <> ptext (sLit "_ccs") - - the name of the cost centre, for profiling output (a C string) - - the label, i.e. C label for cost centre in .hc file. - - the debugging name, for output in -ddump things - - the interface name, for printing in _scc_ exprs in iface files. -The last 3 are derived from costCentreStr below. The first is given -by costCentreName. +----------------------------------------------------------------------------- +-- Printing Cost Centres +-- +-- There are several different ways in which we might want to print a +-- cost centre: +-- +-- - the name of the cost centre, for profiling output (a C string) +-- - the label, i.e. C label for cost centre in .hc file. +-- - the debugging name, for output in -ddump things +-- - the interface name, for printing in _scc_ exprs in iface files. +-- +-- The last 3 are derived from costCentreStr below. The first is given +-- by costCentreName. -\begin{code} instance Outputable CostCentre where ppr cc = getPprStyle $ \ sty -> if codeStyle sty then ppCostCentreLbl cc else text (costCentreUserName cc) --- Printing in an interface file or in Core generally +-- Printing in Core pprCostCentreCore :: CostCentre -> SDoc +pprCostCentreCore NoCostCentre + = text "__no_cc" pprCostCentreCore (AllCafsCC {cc_mod = m}) = text "__sccC" <+> braces (ppr m) pprCostCentreCore (NormalCC {cc_name = n, cc_mod = m, - cc_is_caf = caf, cc_is_dupd = dup}) + cc_is_caf = caf}) = text "__scc" <+> braces (hsep [ ftext (zEncodeFS n), ppr m, - pp_dup dup, - pp_caf caf + pp_caf caf ]) -pp_dup :: IsDupdCC -> SDoc -pp_dup DupdCC = char '!' -pp_dup _ = empty - pp_caf :: IsCafCC -> SDoc pp_caf CafCC = text "__C" pp_caf _ = empty @@ -415,4 +277,5 @@ costCentreUserName (NoCostCentre) = "NO_CC" costCentreUserName (AllCafsCC {}) = "CAF" costCentreUserName (NormalCC {cc_name = name, cc_is_caf = is_caf}) = case is_caf of { CafCC -> "CAF:"; _ -> "" } ++ unpackFS name + \end{code} diff --git a/compiler/profiling/SCCfinal.lhs b/compiler/profiling/SCCfinal.lhs index f09b291db7..96a21eb056 100644 --- a/compiler/profiling/SCCfinal.lhs +++ b/compiler/profiling/SCCfinal.lhs @@ -1,27 +1,23 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -\section[SCCfinal]{Modify and collect code generation for final STG program} - -This is now a sort-of-normal STG-to-STG pass (WDP 94/06), run by stg2stg. - - - Traverses the STG program collecting the cost centres. These are required - to declare the cost centres at the start of code generation. - - Note: because of cross-module unfolding, some of these cost centres may be - from other modules. But will still have to give them "extern" - declarations. - - - Puts on CAF cost-centres if the user has asked for individual CAF - cost-centres. - - - Ditto for individual DICT cost-centres. - - - Boxes top-level inherited functions passed as arguments. +\begin{code} +----------------------------------------------------------------------------- +-- Modify and collect code generation for final STG program - - "Distributes" given cost-centres to all as-yet-unmarked RHSs. +{- + This is now a sort-of-normal STG-to-STG pass (WDP 94/06), run by stg2stg. + + - Traverses the STG program collecting the cost centres. These are required + to declare the cost centres at the start of code generation. + + Note: because of cross-module unfolding, some of these cost centres may be + from other modules. + + - Puts on CAF cost-centres if the user has asked for individual CAF + cost-centres. +-} -\begin{code} module SCCfinal ( stgMassageForProfiling ) where #include "HsVersions.h" @@ -32,17 +28,12 @@ import CostCentre -- lots of things import Id import Name import Module -import UniqSupply ( splitUniqSupply, UniqSupply ) -#ifdef PROF_DO_BOXING -import UniqSupply ( uniqFromSupply ) -#endif -import VarSet +import UniqSupply ( UniqSupply ) import ListSetOps ( removeDups ) import Outputable import DynFlags -\end{code} -\begin{code} + stgMassageForProfiling :: DynFlags -> Module -- module name @@ -50,16 +41,15 @@ stgMassageForProfiling -> [StgBinding] -- input -> (CollectedCCs, [StgBinding]) -stgMassageForProfiling dflags mod_name us stg_binds +stgMassageForProfiling dflags mod_name _us stg_binds = let ((local_ccs, extern_ccs, cc_stacks), stg_binds2) - = initMM mod_name us (do_top_bindings stg_binds) + = initMM mod_name (do_top_bindings stg_binds) (fixed_ccs, fixed_cc_stacks) = if dopt Opt_AutoSccsOnIndividualCafs dflags then ([],[]) -- don't need "all CAFs" CC - -- (for Prelude, we use PreludeCC) else ([all_cafs_cc], [all_cafs_ccs]) local_ccs_no_dups = fst (removeDups cmpCostCentre local_ccs) @@ -80,17 +70,14 @@ stgMassageForProfiling dflags mod_name us stg_binds do_top_bindings (StgNonRec b rhs : bs) = do rhs' <- do_top_rhs b rhs - addTopLevelIshId b $ do - bs' <- do_top_bindings bs - return (StgNonRec b rhs' : bs') - - do_top_bindings (StgRec pairs : bs) - = addTopLevelIshIds binders $ do - pairs2 <- mapM do_pair pairs - bs' <- do_top_bindings bs - return (StgRec pairs2 : bs') + bs' <- do_top_bindings bs + return (StgNonRec b rhs' : bs') + + do_top_bindings (StgRec pairs : bs) = do + pairs2 <- mapM do_pair pairs + bs' <- do_top_bindings bs + return (StgRec pairs2 : bs') where - binders = map fst pairs do_pair (b, rhs) = do rhs2 <- do_top_rhs b rhs return (b, rhs2) @@ -98,27 +85,17 @@ stgMassageForProfiling dflags mod_name us stg_binds ---------- do_top_rhs :: Id -> StgRhs -> MassageM StgRhs - do_top_rhs _ (StgRhsClosure _ _ _ _ _ [] (StgSCC cc (StgConApp con args))) - | not (isSccCountCostCentre cc) && not (isDllConApp dflags con args) + do_top_rhs _ (StgRhsClosure _ _ _ _ _ [] + (StgSCC _cc False{-not tick-} _push (StgConApp con args))) + | not (isDllConApp dflags con args) -- Trivial _scc_ around nothing but static data -- Eliminate _scc_ ... and turn into StgRhsCon -- isDllConApp checks for LitLit args too = return (StgRhsCon dontCareCCS con args) -{- Can't do this one with cost-centre stacks: --SDM - do_top_rhs binder (StgRhsClosure no_cc bi fv u [] (StgSCC ty cc expr)) - | (noCCSAttached no_cc || currentOrSubsumedCCS no_cc) - && not (isSccCountCostCentre cc) - -- Top level CAF without a cost centre attached - -- Attach and collect cc of trivial _scc_ in body - = do collectCC cc - expr' <- set_prevailing_cc cc (do_expr expr) - return (StgRhsClosure cc bi fv u [] expr') --} - - do_top_rhs binder (StgRhsClosure no_cc bi fv u srt [] body) - | noCCSAttached no_cc || currentOrSubsumedCCS no_cc = do + do_top_rhs binder (StgRhsClosure _ bi fv u srt [] body) + = do -- Top level CAF without a cost centre attached -- Attach CAF cc (collect if individual CAF ccs) caf_ccs <- if dopt Opt_AutoSccsOnIndividualCafs dflags @@ -135,22 +112,12 @@ stgMassageForProfiling dflags mod_name us stg_binds return ccs else return all_cafs_ccs - body' <- set_prevailing_cc caf_ccs (do_expr body) + body' <- do_expr body return (StgRhsClosure caf_ccs bi fv u srt [] body') - do_top_rhs _ (StgRhsClosure cc _ _ _ _ [] _) - -- Top level CAF with cost centre attached - -- Should this be a CAF cc ??? Does this ever occur ??? - = pprPanic "SCCfinal: CAF with cc:" (ppr cc) - - do_top_rhs _ (StgRhsClosure no_ccs bi fv u srt args body) - -- Top level function, probably subsumed - | noCCSAttached no_ccs - = do body' <- set_lambda_cc (do_expr body) - return (StgRhsClosure subsumedCCS bi fv u srt args body') - - | otherwise - = pprPanic "SCCfinal: CAF with cc:" (ppr no_ccs) + do_top_rhs _ (StgRhsClosure _no_ccs bi fv u srt args body) + = do body' <- do_expr body + return (StgRhsClosure dontCareCCS bi fv u srt args body') do_top_rhs _ (StgRhsCon _ con args) -- Top-level (static) data is not counted in heap @@ -164,18 +131,18 @@ stgMassageForProfiling dflags mod_name us stg_binds do_expr (StgLit l) = return (StgLit l) do_expr (StgApp fn args) - = boxHigherOrderArgs (StgApp fn) args + = return (StgApp fn args) do_expr (StgConApp con args) - = boxHigherOrderArgs (\args -> StgConApp con args) args + = return (StgConApp con args) do_expr (StgOpApp con args res_ty) - = boxHigherOrderArgs (\args -> StgOpApp con args res_ty) args + = return (StgOpApp con args res_ty) - do_expr (StgSCC cc expr) = do -- Ha, we found a cost centre! + do_expr (StgSCC cc tick push expr) = do -- Ha, we found a cost centre! collectCC cc expr' <- do_expr expr - return (StgSCC cc expr') + return (StgSCC cc tick push expr') do_expr (StgCase expr fv1 fv2 bndr srt alt_type alts) = do expr' <- do_expr expr @@ -204,17 +171,14 @@ stgMassageForProfiling dflags mod_name us stg_binds do_let (StgNonRec b rhs) e = do rhs' <- do_rhs rhs - addTopLevelIshId b $ do - e' <- do_expr e - return (StgNonRec b rhs',e') - - do_let (StgRec pairs) e - = addTopLevelIshIds binders $ do - pairs' <- mapM do_pair pairs - e' <- do_expr e - return (StgRec pairs', e') + e' <- do_expr e + return (StgNonRec b rhs',e') + + do_let (StgRec pairs) e = do + pairs' <- mapM do_pair pairs + e' <- do_expr e + return (StgRec pairs', e') where - binders = map fst pairs do_pair (b, rhs) = do rhs2 <- do_rhs rhs return (b, rhs2) @@ -224,171 +188,62 @@ stgMassageForProfiling dflags mod_name us stg_binds -- We play much the same game as we did in do_top_rhs above; -- but we don't have to worry about cafs etc. -{- - do_rhs (StgRhsClosure closure_cc bi fv u [] (StgSCC ty cc (StgCon (DataCon con) args _))) - | not (isSccCountCostCentre cc) + -- throw away the SCC if we don't have to count entries. This + -- is a little bit wrong, because we're attributing the + -- allocation of the constructor to the wrong place (XXX) + -- We should really attach (PushCC cc CurrentCCS) to the rhs, + -- but need to reinstate PushCC for that. + do_rhs (StgRhsClosure _closure_cc _bi _fv _u _srt [] + (StgSCC cc False{-not tick-} _push (StgConApp con args))) = do collectCC cc - return (StgRhsCon cc con args) --} + return (StgRhsCon currentCCS con args) do_rhs (StgRhsClosure _ bi fv u srt args expr) = do - (expr', ccs) <- slurpSCCs currentCCS expr - expr'' <- do_expr expr' - return (StgRhsClosure ccs bi fv u srt args expr'') - where - slurpSCCs ccs (StgSCC cc e) - = do collectCC cc - slurpSCCs (cc `pushCCOnCCS` ccs) e - slurpSCCs ccs e - = return (e, ccs) + expr' <- do_expr expr + return (StgRhsClosure currentCCS bi fv u srt args expr') do_rhs (StgRhsCon _ con args) = return (StgRhsCon currentCCS con args) -\end{code} - -%************************************************************************ -%* * -\subsection{Boxing higher-order args} -%* * -%************************************************************************ - -Boxing is *turned off* at the moment, until we can figure out how to -do it properly in general. - -\begin{code} -boxHigherOrderArgs - :: ([StgArg] -> StgExpr) - -- An application lacking its arguments - -> [StgArg] -- arguments which we might box - -> MassageM StgExpr - -#ifndef PROF_DO_BOXING -boxHigherOrderArgs almost_expr args - = return (almost_expr args) -#else -boxHigherOrderArgs almost_expr args = do - ids <- getTopLevelIshIds - (let_bindings, new_args) <- mapAccumLM (do_arg ids) [] args - return (foldr (mk_stg_let currentCCS) (almost_expr new_args) let_bindings) - where - --------------- - - do_arg ids bindings arg@(StgVarArg old_var) - | (not (isLocalVar old_var) || elemVarSet old_var ids) - && isFunTy (dropForAlls var_type) - = do -- make a trivial let-binding for the top-level function - uniq <- getUniqueMM - let - new_var = mkSysLocal (fsLit "sf") uniq var_type - return ( (new_var, old_var) : bindings, StgVarArg new_var ) - where - var_type = idType old_var - - do_arg ids bindings arg = return (bindings, arg) - - --------------- - mk_stg_let :: CostCentreStack -> (Id, Id) -> StgExpr -> StgExpr - mk_stg_let cc (new_var, old_var) body - = let - rhs_body = StgApp old_var [{-args-}] - rhs_closure = StgRhsClosure cc stgArgOcc [{-fvs-}] ReEntrant NoSRT{-eeek!!!-} [{-args-}] rhs_body - in - StgLet (StgNonRec new_var rhs_closure) body - where - bOGUS_LVs = emptyUniqSet -- easier to print than: panic "mk_stg_let: LVs" -#endif -\end{code} -%************************************************************************ -%* * -\subsection{Boring monad stuff for this} -%* * -%************************************************************************ +-- ----------------------------------------------------------------------------- +-- Boring monad stuff for this -\begin{code} newtype MassageM result = MassageM { unMassageM :: Module -- module name - -> CostCentreStack -- prevailing CostCentre - -- if none, subsumedCosts at top-level - -- currentCostCentre at nested levels - -> UniqSupply - -> VarSet -- toplevel-ish Ids for boxing -> CollectedCCs -> (CollectedCCs, result) } instance Monad MassageM where - return x = MassageM (\_ _ _ _ ccs -> (ccs, x)) + return x = MassageM (\_ ccs -> (ccs, x)) (>>=) = thenMM (>>) = thenMM_ -- the initMM function also returns the final CollectedCCs initMM :: Module -- module name, which we may consult - -> UniqSupply -> MassageM a -> (CollectedCCs, a) -initMM mod_name init_us (MassageM m) = m mod_name noCCS init_us emptyVarSet ([],[],[]) +initMM mod_name (MassageM m) = m mod_name ([],[],[]) thenMM :: MassageM a -> (a -> MassageM b) -> MassageM b thenMM_ :: MassageM a -> (MassageM b) -> MassageM b -thenMM expr cont = MassageM $ \mod scope_cc us ids ccs -> - case splitUniqSupply us of { (s1, s2) -> - case unMassageM expr mod scope_cc s1 ids ccs of { (ccs2, result) -> - unMassageM (cont result) mod scope_cc s2 ids ccs2 }} - -thenMM_ expr cont = MassageM $ \mod scope_cc us ids ccs -> - case splitUniqSupply us of { (s1, s2) -> - case unMassageM expr mod scope_cc s1 ids ccs of { (ccs2, _) -> - unMassageM cont mod scope_cc s2 ids ccs2 }} - -#ifdef PROF_DO_BOXING -getUniqueMM :: MassageM Unique -getUniqueMM = MassageM \mod scope_cc us ids ccs -> (ccs, uniqFromSupply us) -#endif - -addTopLevelIshId :: Id -> MassageM a -> MassageM a -addTopLevelIshId id scope - = MassageM $ \mod scope_cc us ids ccs -> - if isCurrentCCS scope_cc then unMassageM scope mod scope_cc us ids ccs - else unMassageM scope mod scope_cc us (extendVarSet ids id) ccs - -addTopLevelIshIds :: [Id] -> MassageM a -> MassageM a -addTopLevelIshIds [] cont = cont -addTopLevelIshIds (id:ids) cont - = addTopLevelIshId id (addTopLevelIshIds ids cont) - -#ifdef PROF_DO_BOXING -getTopLevelIshIds :: MassageM VarSet -getTopLevelIshIds = MassageM $ \_mod _scope_cc _us ids ccs -> (ccs, ids) -#endif -\end{code} +thenMM expr cont = MassageM $ \mod ccs -> + case unMassageM expr mod ccs of { (ccs2, result) -> + unMassageM (cont result) mod ccs2 } -The prevailing CCS is used to tell whether we're in a top-levelish -position, where top-levelish is defined as "not inside a lambda". -Prevailing CCs used to be used for something much more complicated, -I'm sure --SDM +thenMM_ expr cont = MassageM $ \mod ccs -> + case unMassageM expr mod ccs of { (ccs2, _) -> + unMassageM cont mod ccs2 } -\begin{code} -set_lambda_cc :: MassageM a -> MassageM a -set_lambda_cc action - = MassageM $ \mod _scope_cc us ids ccs - -> unMassageM action mod currentCCS us ids ccs - -set_prevailing_cc :: CostCentreStack -> MassageM a -> MassageM a -set_prevailing_cc cc_to_set_to action - = MassageM $ \mod _scope_cc us ids ccs - -> unMassageM action mod cc_to_set_to us ids ccs -\end{code} -\begin{code} collectCC :: CostCentre -> MassageM () collectCC cc - = MassageM $ \mod_name _scope_cc _us _ids (local_ccs, extern_ccs, ccss) + = MassageM $ \mod_name (local_ccs, extern_ccs, ccss) -> ASSERT(not (noCCAttached cc)) if (cc `ccFromThisModule` mod_name) then ((cc : local_ccs, extern_ccs, ccss), ()) @@ -401,13 +256,13 @@ collectCC cc -- test prof001,prof002. collectNewCC :: CostCentre -> MassageM () collectNewCC cc - = MassageM $ \_mod_name _scope_cc _us _ids (local_ccs, extern_ccs, ccss) + = MassageM $ \_mod_name (local_ccs, extern_ccs, ccss) -> ((cc : local_ccs, extern_ccs, ccss), ()) collectCCS :: CostCentreStack -> MassageM () collectCCS ccs - = MassageM $ \_mod_name _scope_cc _us _ids (local_ccs, extern_ccs, ccss) + = MassageM $ \_mod_name (local_ccs, extern_ccs, ccss) -> ASSERT(not (noCCSAttached ccs)) ((local_ccs, extern_ccs, ccs : ccss), ()) \end{code} diff --git a/compiler/simplCore/CSE.lhs b/compiler/simplCore/CSE.lhs index 2f5c38e387..4dc8f875e8 100644 --- a/compiler/simplCore/CSE.lhs +++ b/compiler/simplCore/CSE.lhs @@ -233,7 +233,7 @@ cseExpr env (Coercion c) = Coercion (substCo (csEnvSubst env) c) cseExpr _ (Lit lit) = Lit lit cseExpr env (Var v) = lookupSubst env v cseExpr env (App f a) = App (cseExpr env f) (tryForCSE env a) -cseExpr env (Note n e) = Note n (cseExpr env e) +cseExpr env (Tick t e) = Tick t (cseExpr env e) cseExpr env (Cast e co) = Cast (cseExpr env e) (substCo (csEnvSubst env) co) cseExpr env (Lam b e) = let (env', b') = addBinder env b in Lam b' (cseExpr env' e) diff --git a/compiler/simplCore/FloatIn.lhs b/compiler/simplCore/FloatIn.lhs index 0e49f160e6..ea915ed68a 100644 --- a/compiler/simplCore/FloatIn.lhs +++ b/compiler/simplCore/FloatIn.lhs @@ -210,12 +210,13 @@ We don't float lets inwards past an SCC. cc, change current cc to the new one and float binds into expr. \begin{code} -fiExpr to_drop (_, AnnNote note@(SCC _) expr) - = -- Wimp out for now - mkCoLets' to_drop (Note note (fiExpr [] expr)) +fiExpr to_drop (_, AnnTick tickish expr) + | tickishScoped tickish + = -- Wimp out for now - we could push values in + mkCoLets' to_drop (Tick tickish (fiExpr [] expr)) -fiExpr to_drop (_, AnnNote note@(CoreNote _) expr) - = Note note (fiExpr to_drop expr) + | otherwise + = Tick tickish (fiExpr to_drop expr) \end{code} For @Lets@, the possible ``drop points'' for the \tr{to_drop} diff --git a/compiler/simplCore/FloatOut.lhs b/compiler/simplCore/FloatOut.lhs index bcd118a12c..3a51c4f659 100644 --- a/compiler/simplCore/FloatOut.lhs +++ b/compiler/simplCore/FloatOut.lhs @@ -15,8 +15,7 @@ import CoreMonad ( FloatOutSwitches(..) ) import DynFlags ( DynFlags, DynFlag(..) ) import ErrUtils ( dumpIfSet_dyn ) -import CostCentre ( dupifyCC, CostCentre ) -import DataCon ( DataCon ) +import DataCon ( DataCon ) import Id ( Id, idArity, isBottomingId ) import Var ( Var ) import SetLevels @@ -195,7 +194,6 @@ installUnderLambdas floats e | otherwise = go e where go (Lam b e) = Lam b (go e) - go (Note n e) | notSccNote n = Note n (go e) go e = install floats e --------------- @@ -278,18 +276,19 @@ floatExpr lam@(Lam (TB _ lam_spec) _) case (floatBody bndr_lvl body) of { (fs, floats, body') -> (add_to_stats fs floats, floats, mkLams bndrs body') } -floatExpr (Note note@(SCC cc) expr) +floatExpr (Tick tickish expr) + | tickishScoped tickish = case (floatExpr expr) of { (fs, floating_defns, expr') -> let -- Annotate bindings floated outwards past an scc expression -- with the cc. We mark that cc as "duplicated", though. - annotated_defns = wrapCostCentre (dupifyCC cc) floating_defns + annotated_defns = wrapTick (mkNoTick tickish) floating_defns in - (fs, annotated_defns, Note note expr') } + (fs, annotated_defns, Tick tickish expr') } -floatExpr (Note note expr) -- Other than SCCs + | otherwise -- not scoped, can just float = case (floatExpr expr) of { (fs, floating_defns, expr') -> - (fs, floating_defns, Note note expr') } + (fs, floating_defns, Tick tickish expr') } floatExpr (Cast expr co) = case (floatExpr expr) of { (fs, floating_defns, expr') -> @@ -555,15 +554,15 @@ partitionByLevel (Level major minor) (FB tops defns) Just min_defns -> M.splitLookup minor min_defns here_min = mb_here_min `orElse` emptyBag -wrapCostCentre :: CostCentre -> FloatBinds -> FloatBinds -wrapCostCentre cc (FB tops defns) +wrapTick :: Tickish Id -> FloatBinds -> FloatBinds +wrapTick t (FB tops defns) = FB (mapBag wrap_bind tops) (M.map (M.map wrap_defns) defns) where wrap_defns = mapBag wrap_one - wrap_bind (NonRec binder rhs) = NonRec binder (mkSCC cc rhs) - wrap_bind (Rec pairs) = Rec (mapSnd (mkSCC cc) pairs) + wrap_bind (NonRec binder rhs) = NonRec binder (mkTick t rhs) + wrap_bind (Rec pairs) = Rec (mapSnd (mkTick t) pairs) wrap_one (FloatLet bind) = FloatLet (wrap_bind bind) - wrap_one (FloatCase e b c bs) = FloatCase (mkSCC cc e) b c bs + wrap_one (FloatCase e b c bs) = FloatCase (mkTick t e) b c bs \end{code} diff --git a/compiler/simplCore/LiberateCase.lhs b/compiler/simplCore/LiberateCase.lhs index 8caa29a568..5388b4210c 100644 --- a/compiler/simplCore/LiberateCase.lhs +++ b/compiler/simplCore/LiberateCase.lhs @@ -201,7 +201,7 @@ libCase _ (Lit lit) = Lit lit libCase _ (Type ty) = Type ty libCase _ (Coercion co) = Coercion co libCase env (App fun arg) = App (libCase env fun) (libCase env arg) -libCase env (Note note body) = Note note (libCase env body) +libCase env (Tick tickish body) = Tick tickish (libCase env body) libCase env (Cast e co) = Cast (libCase env e) co libCase env (Lam binder body) diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs index b53c98f6ea..df6c76be33 100644 --- a/compiler/simplCore/OccurAnal.lhs +++ b/compiler/simplCore/OccurAnal.lhs @@ -1,3 +1,4 @@ + % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % @@ -11,6 +12,7 @@ The occurrence analyser re-typechecks a core expression, returning a new core expression with (hopefully) improved usage information. \begin{code} +{-# LANGUAGE BangPatterns #-} module OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) where @@ -827,7 +829,7 @@ reOrderNodes depth bndr_set weak_fvs (node : nodes) binds is_con_app (Var v) = isConLikeId v is_con_app (App f _) = is_con_app f is_con_app (Lam _ e) = is_con_app e - is_con_app (Note _ e) = is_con_app e + is_con_app (Tick _ e) = is_con_app e is_con_app _ = False \end{code} @@ -1072,18 +1074,19 @@ We need to gather info about what coercion variables appear, so that we can sort them into the right place when doing dependency analysis. \begin{code} -\end{code} +occAnal env (Tick tickish body) + | Breakpoint _ ids <- tickish + = (mapVarEnv markInsideSCC usage + +++ mkVarEnv (zip ids (repeat NoOccInfo)), Tick tickish body') + -- never substitute for any of the Ids in a Breakpoint -\begin{code} -occAnal env (Note note@(SCC _) body) - = case occAnal env body of { (usage, body') -> - (mapVarEnv markInsideSCC usage, Note note body') - } + | tickishScoped tickish + = (mapVarEnv markInsideSCC usage, Tick tickish body') -occAnal env (Note note body) - = case occAnal env body of { (usage, body') -> - (usage, Note note body') - } + | otherwise + = (usage, Tick tickish body') + where + !(usage,body') = occAnal env body occAnal env (Cast expr co) = case occAnal env expr of { (usage, expr') -> @@ -1896,7 +1899,8 @@ markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo markMany _ = NoOccInfo -markInsideSCC occ = markMany occ +markInsideSCC occ = markInsideLam occ + -- inside an SCC, we can inline lambdas only. markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt markInsideLam occ = occ diff --git a/compiler/simplCore/SAT.lhs b/compiler/simplCore/SAT.lhs index acc11ca81b..b11411ce1b 100644 --- a/compiler/simplCore/SAT.lhs +++ b/compiler/simplCore/SAT.lhs @@ -227,9 +227,9 @@ satExpr (Let bind body) interesting_ids = do (bind', sat_info_bind) <- satBind bind interesting_ids return (Let bind' body', mergeIdSATInfo sat_info_body sat_info_bind, body_app) -satExpr (Note note expr) interesting_ids = do +satExpr (Tick tickish expr) interesting_ids = do (expr', sat_info_expr, expr_app) <- satExpr expr interesting_ids - return (Note note expr', sat_info_expr, expr_app) + return (Tick tickish expr', sat_info_expr, expr_app) satExpr ty@(Type _) _ = do return (ty, emptyIdSATInfo, Nothing) diff --git a/compiler/simplCore/SetLevels.lhs b/compiler/simplCore/SetLevels.lhs index 50e547db06..76a44f4128 100644 --- a/compiler/simplCore/SetLevels.lhs +++ b/compiler/simplCore/SetLevels.lhs @@ -309,9 +309,9 @@ lvlExpr ctxt_lvl env expr@(_, AnnApp _ _) = do fun' <- lvlExpr ctxt_lvl env fun return (foldl App fun' args') -lvlExpr ctxt_lvl env (_, AnnNote note expr) = do +lvlExpr ctxt_lvl env (_, AnnTick tickish expr) = do expr' <- lvlExpr ctxt_lvl env expr - return (Note note expr') + return (Tick tickish expr') lvlExpr ctxt_lvl env (_, AnnCast expr (_, co)) = do expr' <- lvlExpr ctxt_lvl env expr @@ -446,9 +446,9 @@ lvlMFE _ _ env (_, AnnType ty) -- If we do we'll transform lvl = e |> co -- to lvl' = e; lvl = lvl' |> co -- and then inline lvl. Better just to float out the payload. -lvlMFE strict_ctxt ctxt_lvl env (_, AnnNote n e) +lvlMFE strict_ctxt ctxt_lvl env (_, AnnTick t e) = do { e' <- lvlMFE strict_ctxt ctxt_lvl env e - ; return (Note n e') } + ; return (Tick t e') } lvlMFE strict_ctxt ctxt_lvl env (_, AnnCast e (_, co)) = do { e' <- lvlMFE strict_ctxt ctxt_lvl env e @@ -838,7 +838,7 @@ isFunction :: CoreExprWithFVs -> Bool -- constructors. So the simple thing is just to look for lambdas isFunction (_, AnnLam b e) | isId b = True | otherwise = isFunction e -isFunction (_, AnnNote _ e) = isFunction e +-- isFunction (_, AnnTick _ e) = isFunction e -- dubious isFunction _ = False countFreeIds :: VarSet -> Int diff --git a/compiler/simplCore/SimplCore.lhs b/compiler/simplCore/SimplCore.lhs index 6237bd5705..9c0cc822cc 100644 --- a/compiler/simplCore/SimplCore.lhs +++ b/compiler/simplCore/SimplCore.lhs @@ -628,7 +628,7 @@ simplifyPgmIO pass@(CoreDoSimplify max_iterations mode) case initSmpl dflags rule_base2 fam_envs us1 sz simpl_binds of { (env1, counts1) -> do { - let { binds1 = getFloats env1 + let { binds1 = getFloatBinds env1 ; rules1 = substRulesForImportedIds (mkCoreSubst (text "imp-rules") env1) rules } ; diff --git a/compiler/simplCore/SimplEnv.lhs b/compiler/simplCore/SimplEnv.lhs index 862bc8dccc..298cc63597 100644 --- a/compiler/simplCore/SimplEnv.lhs +++ b/compiler/simplCore/SimplEnv.lhs @@ -12,9 +12,7 @@ module SimplEnv ( -- The simplifier mode setMode, getMode, updMode, - setEnclosingCC, getEnclosingCC, - - -- Environments + -- Environments SimplEnv(..), StaticEnv, pprSimplEnv, -- Temp not abstract mkSimplEnv, extendIdSubst, SimplEnv.extendTvSubst, SimplEnv.extendCvSubst, zapSubstEnv, setSubstEnv, @@ -32,7 +30,7 @@ module SimplEnv ( -- Floats Floats, emptyFloats, isEmptyFloats, addNonRec, addFloats, extendFloats, wrapFloats, floatBinds, setFloats, zapFloats, addRecFloats, - doFloatFromRhs, getFloats + doFloatFromRhs, getFloatBinds, getFloats, mapFloatRhss ) where #include "HsVersions.h" @@ -42,7 +40,6 @@ import CoreMonad ( SimplifierMode(..) ) import IdInfo import CoreSyn import CoreUtils -import CostCentre import Var import VarEnv import VarSet @@ -59,6 +56,7 @@ import BasicTypes import MonadUtils import Outputable import FastString +import Util import Data.List \end{code} @@ -107,7 +105,6 @@ data SimplEnv -- wrt the original expression seMode :: SimplifierMode, - seCC :: CostCentreStack, -- The enclosing CCS (when profiling) -- The current substitution seTvSubst :: TvSubstEnv, -- InTyVar |--> OutType @@ -227,8 +224,7 @@ seIdSubst: \begin{code} mkSimplEnv :: SimplifierMode -> SimplEnv mkSimplEnv mode - = SimplEnv { seCC = subsumedCCS - , seMode = mode + = SimplEnv { seMode = mode , seInScope = init_in_scope , seFloats = emptyFloats , seTvSubst = emptyVarEnv @@ -270,13 +266,6 @@ updMode :: (SimplifierMode -> SimplifierMode) -> SimplEnv -> SimplEnv updMode upd env = env { seMode = upd (seMode env) } --------------------- -getEnclosingCC :: SimplEnv -> CostCentreStack -getEnclosingCC env = seCC env - -setEnclosingCC :: SimplEnv -> CostCentreStack -> SimplEnv -setEnclosingCC env cc = env {seCC = cc} - ---------------------- extendIdSubst :: SimplEnv -> Id -> SimplSR -> SimplEnv extendIdSubst env@(SimplEnv {seIdSubst = subst}) var res = ASSERT2( isId var && not (isCoVar var), ppr var ) @@ -432,6 +421,13 @@ addNonRec env id rhs env { seFloats = seFloats env `addFlts` unitFloat (NonRec id rhs), seInScope = extendInScopeSet (seInScope env) id } +mapFloatRhss :: SimplEnv -> (CoreExpr -> CoreExpr) -> SimplEnv +mapFloatRhss env@SimplEnv { seFloats = Floats fs ff } fun + = env { seFloats = Floats (mapOL app fs) ff } + where + app (NonRec b e) = NonRec b (fun e) + app (Rec bs) = Rec (mapSnd fun bs) + extendFloats :: SimplEnv -> OutBind -> SimplEnv -- Add these bindings to the floats, and extend the in-scope env too extendFloats env bind @@ -474,8 +470,11 @@ wrapFlts (Floats bs _) body = foldrOL wrap body bs wrap (Rec prs) body = Let (Rec prs) body wrap (NonRec b r) body = bindNonRec b r body -getFloats :: SimplEnv -> [CoreBind] -getFloats (SimplEnv {seFloats = Floats bs _}) = fromOL bs +getFloatBinds :: SimplEnv -> [CoreBind] +getFloatBinds env = floatBinds (seFloats env) + +getFloats :: SimplEnv -> Floats +getFloats env = seFloats env isEmptyFloats :: SimplEnv -> Bool isEmptyFloats env = isEmptyFlts (seFloats env) diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs index d521def489..553e64ec65 100644 --- a/compiler/simplCore/SimplUtils.lhs +++ b/compiler/simplCore/SimplUtils.lhs @@ -124,7 +124,11 @@ data SimplCont CallCtxt -- Whether *this* argument position is interesting SimplCont -data ArgInfo + | TickIt + (Tickish Id) -- Tick tickish [] + SimplCont + +data ArgInfo = ArgInfo { ai_fun :: Id, -- The function ai_args :: [OutExpr], -- ...applied to these args (which are in *reverse* order) @@ -154,6 +158,7 @@ instance Outputable SimplCont where ppr (Select dup bndr alts se cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont ppr (CoerceIt co cont) = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont + ppr (TickIt t cont) = (ptext (sLit "TickIt") <+> ppr t) $$ ppr cont data DupFlag = NoDup -- Unsimplified, might be big | Simplified -- Simplified @@ -227,6 +232,7 @@ contResultType env ty cont go (StrictArg ai _ cont) _ = go cont (funResultTy (argInfoResultTy ai)) go (Select _ _ alts se cont) _ = go cont (subst_ty se (coreAltsType alts)) go (ApplyTo _ arg se cont) ty = go cont (apply_to_arg ty arg se) + go (TickIt _ cont) ty = go cont ty apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg) apply_to_arg ty (Coercion co_arg) se = applyCo ty (subst_co se co_arg) @@ -331,6 +337,7 @@ interestingCallContext cont interesting (StrictArg _ cci _) = cci interesting (StrictBind {}) = BoringCtxt interesting (Stop cci) = cci + interesting (TickIt _ cci) = interesting cci interesting (CoerceIt _ cont) = interesting cont -- If this call is the arg of a strict function, the context -- is a bit interesting. If we inline here, we may get useful @@ -453,6 +460,7 @@ interestingArgContext rules call_cont go (StrictBind {}) = False -- ?? go (CoerceIt _ c) = go c go (Stop cci) = interesting cci + go (TickIt _ c) = go c interesting (ArgCtxt rules) = rules interesting _ = False @@ -829,7 +837,7 @@ preInlineUnconditionally env top_lvl bndr rhs -- E.g. let f = \ab.BIG in \y. map f xs -- Don't want to substitute for f, because then we allocate -- its closure every time the \y is called - -- But: let f = \ab.BIG in \y. map (f y) xs + -- But: let f = \ab.BIG in \y. map (f y) xs -- Now we do want to substitute for f, even though it's not -- saturated, because we're going to allocate a closure for -- (f y) every time round the loop anyhow. @@ -839,8 +847,9 @@ preInlineUnconditionally env top_lvl bndr rhs -- Sadly, not quite the same as exprIsHNF. canInlineInLam (Lit _) = True canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e - canInlineInLam (Note _ e) = canInlineInLam e - canInlineInLam _ = False + canInlineInLam _ = False + -- not ticks. Counting ticks cannot be duplicated, and non-counting + -- ticks around a Lam will disappear anyway. early_phase = case sm_phase mode of Phase 0 -> False @@ -1356,7 +1365,7 @@ abstractFloats main_tvs body_env body ; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) } where main_tv_set = mkVarSet main_tvs - body_floats = getFloats body_env + body_floats = getFloatBinds body_env empty_subst = CoreSubst.mkEmptySubst (seInScope body_env) abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind) @@ -1846,4 +1855,4 @@ without getting changed to c1=I# c2. I don't think this is worth fixing, even if I knew how. It'll all come out in the next pass anyway. -
\ No newline at end of file + diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs index bc04d4878c..d4108d20fd 100644 --- a/compiler/simplCore/Simplify.lhs +++ b/compiler/simplCore/Simplify.lhs @@ -33,7 +33,6 @@ import qualified CoreSubst import CoreArity import Rules ( lookupRule, getRules ) import BasicTypes ( isMarkedStrict, Arity ) -import CostCentre ( currentCCS, pushCCisNop ) import TysPrim ( realWorldStatePrimTy ) import BasicTypes ( TopLevelFlag(..), isTopLevel, RecFlag(..) ) import MonadUtils ( foldlM, mapAccumLM ) @@ -648,7 +647,9 @@ completeBind env top_lvl old_bndr new_bndr new_rhs -- Simplify the unfolding ; new_unfolding <- simplUnfolding env top_lvl old_bndr final_rhs old_unf - ; if postInlineUnconditionally env top_lvl new_bndr occ_info final_rhs new_unfolding + ; if postInlineUnconditionally env top_lvl new_bndr occ_info + final_rhs new_unfolding + -- Inline and discard the binding then do { tick (PostInlineUnconditionally old_bndr) ; return (extendIdSubst env old_bndr (DoneEx final_rhs)) } @@ -902,7 +903,7 @@ simplExprF1 :: SimplEnv -> InExpr -> SimplCont -> SimplM (SimplEnv, OutExpr) simplExprF1 env (Var v) cont = simplIdF env v cont simplExprF1 env (Lit lit) cont = rebuild env (Lit lit) cont -simplExprF1 env (Note n expr) cont = simplNote env n expr cont +simplExprF1 env (Tick t expr) cont = simplTick env t expr cont simplExprF1 env (Cast body co) cont = simplCast env body co cont simplExprF1 env (Coercion co) cont = simplCoercionF env co cont simplExprF1 env (Type ty) cont = ASSERT( contIsRhsOrArg cont ) @@ -989,6 +990,77 @@ simplCoercion :: SimplEnv -> InCoercion -> SimplM OutCoercion simplCoercion env co = let opt_co = optCoercion (getCvSubst env) co in opt_co `seq` return opt_co + +----------------------------------- +-- | Push a TickIt context outwards past applications and cases, as +-- long as this is a non-scoping tick, to let case and application +-- optimisations apply. + +simplTick :: SimplEnv -> Tickish Id -> InExpr -> SimplCont + -> SimplM (SimplEnv, OutExpr) +simplTick env tickish expr cont + -- A scoped tick turns into a continuation, so that we can spot + -- (scc t (\x . e)) in simplLam and eliminate the scc. If we didn't do + -- it this way, then it would take two passes of the simplifier to + -- reduce ((scc t (\x . e)) e'). + -- NB, don't do this with counting ticks, because if the expr is + -- bottom, then rebuildCall will discard the continuation. + +-- XXX: we cannot do this, because the simplifier assumes that +-- the context can be pushed into a case with a single branch. e.g. +-- scc<f> case expensive of p -> e +-- becomes +-- case expensive of p -> scc<f> e +-- +-- So I'm disabling this for now. It just means we will do more +-- simplifier iterations that necessary in some cases. + +-- | tickishScoped tickish && not (tickishCounts tickish) +-- = simplExprF env expr (TickIt tickish cont) + + -- For non-scoped ticks, we push the continuation inside the + -- tick. This has the effect of moving the tick to the outside of a + -- case or application context, allowing the normal case and + -- application optimisations to fire. + | not (tickishScoped tickish) + = do { (env', expr') <- simplExprF env expr cont + ; return (env', mkTick tickish expr') + } + + -- the last case handles scoped/counting ticks, where all we + -- can do is simplify the inner expression and then rebuild. + -- + -- NB. float handling here is tricky. We have some floats already + -- in the env, and there may be floats arising from the inner + -- expression. We must be careful to wrap any floats arising from + -- the inner expression with a non-counting tick, but not those from + -- the env passed in. + -- + + -- For breakpoints, we cannot do any floating of bindings around the + -- tick. So + | Breakpoint{} <- tickish + = do { (env', expr') <- simplExprF (zapFloats env) expr mkBoringStop + ; let tickish' = simplTickish env tickish + ; (env'', expr'') <- rebuild (zapFloats env') (wrapFloats env' expr') (TickIt tickish' cont) + ; return (env'', wrapFloats env expr'') + } + + | otherwise + = do { (env', expr') <- simplExprF (zapFloats env) expr mkBoringStop + ; let tickish' = simplTickish env tickish + ; let env'' = addFloats env (mapFloatRhss env' (mkTick (mkNoTick tickish'))) + ; rebuild env'' expr' (TickIt tickish' cont) + } + where + simplTickish env tickish + | Breakpoint n ids <- tickish + = Breakpoint n (map (getDoneId . substId env) ids) + | otherwise = tickish + + getDoneId (DoneId id) = id + getDoneId (DoneEx e) = getIdFromTrivialExpr e -- Note [substTickish] in CoreSubst + getDoneId other = pprPanic "getDoneId" (ppr other) \end{code} @@ -1014,6 +1086,7 @@ rebuild env expr cont | isSimplified dup_flag -> rebuild env (App expr arg) cont | otherwise -> do { arg' <- simplExpr (se `setInScope` env) arg ; rebuild env (App expr arg') cont } + TickIt t cont -> rebuild env (mkTick t expr) cont \end{code} @@ -1127,6 +1200,14 @@ simplLam env (bndr:bndrs) body (ApplyTo _ arg arg_se cont) = setIdUnfolding bndr NoUnfolding | otherwise = bndr + -- discard a non-counting tick on a lambda. This may change the + -- cost attribution slightly (moving the allocation of the + -- lambda elsewhere), but we don't care: optimisation changes + -- cost attribution all the time. +simplLam env bndrs body (TickIt tickish cont) + | not (tickishCounts tickish) + = simplLam env bndrs body cont + -- Not enough args, so there are real lambdas left to put in the result simplLam env bndrs body cont = do { (env', bndrs') <- simplLamBndrs env bndrs @@ -1168,7 +1249,7 @@ simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont ; -- pprTrace "preInlineUncond" (ppr bndr <+> ppr rhs) $ simplLam (extendIdSubst env bndr (mkContEx rhs_se rhs)) bndrs body cont } - | isStrictId bndr -- Includes coercions + | isStrictId bndr -- Includes coercions = do { simplExprF (rhs_se `setFloats` env) rhs (StrictBind bndr bndrs body env cont) } @@ -1180,31 +1261,6 @@ simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont ; simplLam env3 bndrs body cont } \end{code} - -%************************************************************************ -%* * -\subsection{Notes} -%* * -%************************************************************************ - -\begin{code} --- Hack alert: we only distinguish subsumed cost centre stacks for the --- purposes of inlining. All other CCCSs are mapped to currentCCS. -simplNote :: SimplEnv -> Note -> CoreExpr -> SimplCont - -> SimplM (SimplEnv, OutExpr) -simplNote env (SCC cc) e cont - | pushCCisNop cc (getEnclosingCC env) -- scc "f" (...(scc "f" e)...) - = simplExprF env e cont -- ==> scc "f" (...e...) - | otherwise - = do { e' <- simplExpr (setEnclosingCC env currentCCS) e - ; rebuild env (mkSCC cc e') cont } - -simplNote env (CoreNote s) e cont - = do { e' <- simplExpr env e - ; rebuild env (Note (CoreNote s) e') cont } -\end{code} - - %************************************************************************ %* * Variables @@ -1643,9 +1699,7 @@ rebuildCase env scrut case_bndr [(_, bndrs, rhs)] cont strict_case_bndr = isStrictDmd (idDemandInfo case_bndr) scrut_is_var (Cast s _) = scrut_is_var s - scrut_is_var (Var v) = not (isTickBoxOp v) - -- ugly hack; covering this case is what - -- exprOkForSpeculation was intended for. + scrut_is_var (Var _) = True scrut_is_var _ = False @@ -2082,6 +2136,10 @@ mkDupableCont env (CoerceIt ty cont) = do { (env', dup, nodup) <- mkDupableCont env cont ; return (env', CoerceIt ty dup, nodup) } +-- Duplicating ticks for now, not sure if this is good or not +mkDupableCont env cont@(TickIt{}) + = return (env, mkBoringStop, cont) + mkDupableCont env cont@(StrictBind {}) = return (env, mkBoringStop, cont) -- See Note [Duplicating StrictBind] diff --git a/compiler/simplStg/SRT.lhs b/compiler/simplStg/SRT.lhs index abad3ce48d..c9eabfd1a1 100644 --- a/compiler/simplStg/SRT.lhs +++ b/compiler/simplStg/SRT.lhs @@ -116,7 +116,7 @@ srtExpr _ e@(StgLit _) = e srtExpr _ e@(StgConApp _ _) = e srtExpr _ e@(StgOpApp _ _ _) = e -srtExpr table (StgSCC cc expr) = StgSCC cc $! srtExpr table expr +srtExpr table (StgSCC cc tick push expr) = StgSCC cc tick push $! srtExpr table expr srtExpr table (StgTick m n expr) = StgTick m n $! srtExpr table expr diff --git a/compiler/simplStg/StgStats.lhs b/compiler/simplStg/StgStats.lhs index 74a4fc3cbf..aca29740ac 100644 --- a/compiler/simplStg/StgStats.lhs +++ b/compiler/simplStg/StgStats.lhs @@ -159,7 +159,7 @@ statExpr (StgApp _ _) = countOne Applications statExpr (StgLit _) = countOne Literals statExpr (StgConApp _ _) = countOne ConstructorApps statExpr (StgOpApp _ _ _) = countOne PrimitiveApps -statExpr (StgSCC _ e) = statExpr e +statExpr (StgSCC _ _ _ e) = statExpr e statExpr (StgTick _ _ e) = statExpr e statExpr (StgLetNoEscape _ _ binds body) diff --git a/compiler/specialise/Rules.lhs b/compiler/specialise/Rules.lhs index a439f2ac27..ef29eb58eb 100644 --- a/compiler/specialise/Rules.lhs +++ b/compiler/specialise/Rules.lhs @@ -626,10 +626,6 @@ match :: RuleEnv -- The Var case follows closely what happens in Unify.match match renv subst (Var v1) e2 = match_var renv subst v1 e2 -match renv subst (Note _ e1) e2 = match renv subst e1 e2 -match renv subst e1 (Note _ e2) = match renv subst e1 e2 - -- Ignore notes in both template and thing to be matched - -- See Note [Notes in RULE matching] match renv subst e1 (Var v2) -- Note [Expanding variables] | not (inRnEnvR rn_env v2) -- Note [Do not expand locally-bound variables] @@ -885,13 +881,12 @@ the entire match. Hence, (a) the guard (not (isLocallyBoundR v2)) (b) when we expand we nuke the renaming envt (nukeRnEnvR). -Note [Notes in RULE matching] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Look through Notes in both template and expression being matched. In -particular, we don't want to be confused by InlineMe notes. Maybe we -should be more careful about profiling notes, but for now I'm just -riding roughshod over them. cf Note [Notes in call patterns] in -SpecConstr +Note [Tick annotations in RULE matching] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We used to look through Notes in both template and expression being +matched. This would be incorrect for ticks, which we cannot discard, +so we do not look through Ticks at all. cf Note [Notes in call +patterns] in SpecConstr Note [Matching lets] ~~~~~~~~~~~~~~~~~~~~ @@ -1051,7 +1046,7 @@ ruleCheck _ (Lit _) = emptyBag ruleCheck _ (Type _) = emptyBag ruleCheck _ (Coercion _) = emptyBag ruleCheck env (App f a) = ruleCheckApp env (App f a) [] -ruleCheck env (Note _ e) = ruleCheck env e +ruleCheck env (Tick _ e) = ruleCheck env e ruleCheck env (Cast e _) = ruleCheck env e ruleCheck env (Let bd e) = ruleCheckBind env bd `unionBags` ruleCheck env e ruleCheck env (Lam _ e) = ruleCheck env e diff --git a/compiler/specialise/SpecConstr.lhs b/compiler/specialise/SpecConstr.lhs index 34cfc9c90b..0959425e8f 100644 --- a/compiler/specialise/SpecConstr.lhs +++ b/compiler/specialise/SpecConstr.lhs @@ -970,8 +970,8 @@ combineOccs xs ys = zipWithEqual "combineOccs" combineOcc xs ys setScrutOcc :: ScEnv -> ScUsage -> OutExpr -> ArgOcc -> ScUsage -- _Overwrite_ the occurrence info for the scrutinee, if the scrutinee -- is a variable, and an interesting variable -setScrutOcc env usg (Cast e _) occ = setScrutOcc env usg e occ -setScrutOcc env usg (Note _ e) occ = setScrutOcc env usg e occ +setScrutOcc env usg (Cast e _) occ = setScrutOcc env usg e occ +setScrutOcc env usg (Tick _ e) occ = setScrutOcc env usg e occ setScrutOcc env usg (Var v) occ | Just RecArg <- lookupHowBound env v = usg { scu_occs = extendVarEnv (scu_occs usg) v occ } | otherwise = usg @@ -1003,8 +1003,8 @@ scExpr' env (Var v) = case scSubstId env v of scExpr' env (Type t) = return (nullUsage, Type (scSubstTy env t)) scExpr' env (Coercion c) = return (nullUsage, Coercion (scSubstCo env c)) scExpr' _ e@(Lit {}) = return (nullUsage, e) -scExpr' env (Note n e) = do (usg,e') <- scExpr env e - return (usg, Note n e') +scExpr' env (Tick t e) = do (usg,e') <- scExpr env e + return (usg, Tick t e') scExpr' env (Cast e co) = do (usg, e') <- scExpr env e return (usg, Cast e' (scSubstCo env co)) scExpr' env e@(App _ _) = scApp env (collectArgs e) @@ -1583,7 +1583,7 @@ argToPat _env _in_scope _val_env arg@(Type {}) _arg_occ argToPat _env _in_scope _val_env arg@(Coercion {}) _arg_occ = return (False, arg) -argToPat env in_scope val_env (Note _ arg) arg_occ +argToPat env in_scope val_env (Tick _ arg) arg_occ = argToPat env in_scope val_env arg arg_occ -- Note [Notes in call patterns] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1765,9 +1765,9 @@ samePat (vs1, as1) (vs2, as2) same (Type {}) (Type {}) = True -- Note [Ignore type differences] same (Coercion {}) (Coercion {}) = True - same (Note _ e1) e2 = same e1 e2 -- Ignore casts and notes + same (Tick _ e1) e2 = same e1 e2 -- Ignore casts and notes same (Cast e1 _) e2 = same e1 e2 - same e1 (Note _ e2) = same e1 e2 + same e1 (Tick _ e2) = same e1 e2 same e1 (Cast e2 _) = same e1 e2 same e1 e2 = WARN( bad e1 || bad e2, ppr e1 $$ ppr e2) diff --git a/compiler/specialise/Specialise.lhs b/compiler/specialise/Specialise.lhs index 68d023b52c..24f9d080db 100644 --- a/compiler/specialise/Specialise.lhs +++ b/compiler/specialise/Specialise.lhs @@ -716,9 +716,9 @@ specExpr _ (Lit lit) = return (Lit lit, emptyUDs) specExpr subst (Cast e co) = do (e', uds) <- specExpr subst e return ((Cast e' (CoreSubst.substCo subst co)), uds) -specExpr subst (Note note body) = do +specExpr subst (Tick tickish body) = do (body', uds) <- specExpr subst body - return (Note (specNote subst note) body', uds) + return (Tick (specTickish subst tickish) body', uds) ---------------- Applications might generate a call instance -------------------- @@ -766,10 +766,12 @@ specExpr subst (Let bind body) = do -- All done return (foldr Let body' binds', uds) --- Must apply the type substitution to coerceions -specNote :: Subst -> Note -> Note -specNote _ note = note - +specTickish :: Subst -> Tickish Id -> Tickish Id +specTickish subst (Breakpoint ix ids) + = Breakpoint ix [ id' | id <- ids, Var id' <- [specVar subst id]] + -- drop vars from the list if they have a non-variable substitution. + -- should never happen, but it's harmless to drop them anyway. +specTickish _ other_tickish = other_tickish specCase :: Subst -> CoreExpr -- Scrutinee, already done @@ -1611,7 +1613,7 @@ interestingDict (Type _) = False interestingDict (Coercion _) = False interestingDict (App fn (Type _)) = interestingDict fn interestingDict (App fn (Coercion _)) = interestingDict fn -interestingDict (Note _ a) = interestingDict a +interestingDict (Tick _ a) = interestingDict a interestingDict (Cast e _) = interestingDict e interestingDict _ = True \end{code} diff --git a/compiler/stgSyn/CoreToStg.lhs b/compiler/stgSyn/CoreToStg.lhs index 2a72489ebf..3194974c8f 100644 --- a/compiler/stgSyn/CoreToStg.lhs +++ b/compiler/stgSyn/CoreToStg.lhs @@ -340,17 +340,16 @@ coreToStgExpr expr@(Lam _ _) return (result_expr, fvs, escs) -coreToStgExpr (Note (SCC cc) expr) = do - (expr2, fvs, escs) <- coreToStgExpr expr - return (StgSCC cc expr2, fvs, escs) +coreToStgExpr (Tick (HpcTick m n) expr) + = do (expr2, fvs, escs) <- coreToStgExpr expr + return (StgTick m n expr2, fvs, escs) -coreToStgExpr (Case (Var id) _bndr _ty [(DEFAULT,[],expr)]) - | Just (TickBox m n) <- isTickBoxOp_maybe id = do - (expr2, fvs, escs) <- coreToStgExpr expr - return (StgTick m n expr2, fvs, escs) +coreToStgExpr (Tick (ProfNote cc tick push) expr) + = do (expr2, fvs, escs) <- coreToStgExpr expr + return (StgSCC cc tick push expr2, fvs, escs) -coreToStgExpr (Note _ expr) - = coreToStgExpr expr +coreToStgExpr (Tick Breakpoint{} _expr) + = panic "coreToStgExpr: breakpoint should not happen" coreToStgExpr (Cast expr _) = coreToStgExpr expr @@ -1108,15 +1107,16 @@ filterStgBinders bndrs = filter isId bndrs \begin{code} - -- Ignore all notes except SCC myCollectBinders :: Expr Var -> ([Var], Expr Var) myCollectBinders expr = go [] expr where go bs (Lam b e) = go (b:bs) e - go bs e@(Note (SCC _) _) = (reverse bs, e) + go bs e@(Tick t e') + | tickishIsCode t = (reverse bs, e) + | otherwise = go bs e' + -- Ignore only non-code source annotations go bs (Cast e _) = go bs e - go bs (Note _ e) = go bs e go bs e = (reverse bs, e) myCollectArgs :: CoreExpr -> (Id, [CoreArg]) @@ -1127,9 +1127,8 @@ myCollectArgs expr where go (Var v) as = (v, as) go (App f a) as = go f (a:as) - go (Note (SCC _) _) _ = pprPanic "CoreToStg.myCollectArgs" (ppr expr) + go (Tick _ _) _ = pprPanic "CoreToStg.myCollectArgs" (ppr expr) go (Cast e _) as = go e as - go (Note _ e) as = go e as go (Lam b e) as | isTyVar b = go e as -- Note [Collect args] go _ _ = pprPanic "CoreToStg.myCollectArgs" (ppr expr) diff --git a/compiler/stgSyn/StgLint.lhs b/compiler/stgSyn/StgLint.lhs index 945d6c96d6..98b2de1444 100644 --- a/compiler/stgSyn/StgLint.lhs +++ b/compiler/stgSyn/StgLint.lhs @@ -191,7 +191,7 @@ lintStgExpr (StgLetNoEscape _ _ binds body) = do addInScopeVars binders $ lintStgExpr body -lintStgExpr (StgSCC _ expr) = lintStgExpr expr +lintStgExpr (StgSCC _ _ _ expr) = lintStgExpr expr lintStgExpr (StgCase scrut _ _ bndr _ alts_type alts) = runMaybeT $ do _ <- MaybeT $ lintStgExpr scrut diff --git a/compiler/stgSyn/StgSyn.lhs b/compiler/stgSyn/StgSyn.lhs index d332a4e279..c6a6762a15 100644 --- a/compiler/stgSyn/StgSyn.lhs +++ b/compiler/stgSyn/StgSyn.lhs @@ -374,7 +374,9 @@ Finally for @scc@ expressions we introduce a new STG construct. \begin{code} | StgSCC CostCentre -- label of SCC expression - (GenStgExpr bndr occ) -- scc expression + !Bool -- bump the entry count? + !Bool -- push the cost centre? + (GenStgExpr bndr occ) -- scc expression \end{code} %************************************************************************ @@ -761,9 +763,12 @@ pprStgExpr (StgLetNoEscape lvs_whole lvs_rhss bind expr) char ']'])))) 2 (ppr expr)] -pprStgExpr (StgSCC cc expr) - = sep [ hsep [ptext (sLit "_scc_"), ppr cc], - pprStgExpr expr ] +pprStgExpr (StgSCC cc tick push expr) + = sep [ hsep [scc, ppr cc], pprStgExpr expr ] + where + scc | tick && push = ptext (sLit "_scc_") + | tick = ptext (sLit "_tick_") + | otherwise = ptext (sLit "_push_") pprStgExpr (StgTick m n expr) = sep [ hsep [ptext (sLit "_tick_"), pprModule m,text (show n)], diff --git a/compiler/stranal/DmdAnal.lhs b/compiler/stranal/DmdAnal.lhs index 9955490c25..f52ab78180 100644 --- a/compiler/stranal/DmdAnal.lhs +++ b/compiler/stranal/DmdAnal.lhs @@ -166,8 +166,8 @@ dmdAnal env dmd (Cast e co) -- inside recursive products -- we might not reach -- a fixpoint. So revert to a vanilla Eval demand -dmdAnal env dmd (Note n e) - = (dmd_ty, Note n e') +dmdAnal env dmd (Tick t e) + = (dmd_ty, Tick t e') where (dmd_ty, e') = dmdAnal env dmd e diff --git a/compiler/stranal/WorkWrap.lhs b/compiler/stranal/WorkWrap.lhs index c1e9c48390..c947388d18 100644 --- a/compiler/stranal/WorkWrap.lhs +++ b/compiler/stranal/WorkWrap.lhs @@ -110,8 +110,8 @@ wwExpr (Lam binder expr) wwExpr (App f a) = App <$> wwExpr f <*> wwExpr a -wwExpr (Note note expr) - = Note note <$> wwExpr expr +wwExpr (Tick note expr) + = Tick note <$> wwExpr expr wwExpr (Cast expr co) = do new_expr <- wwExpr expr @@ -379,7 +379,7 @@ get_one_shots :: Expr Var -> [Bool] get_one_shots (Lam b e) | isId b = isOneShotLambda b : get_one_shots e | otherwise = get_one_shots e -get_one_shots (Note _ e) = get_one_shots e +get_one_shots (Tick _ e) = get_one_shots e get_one_shots _ = noOneShotInfo \end{code} diff --git a/compiler/stranal/WwLib.lhs b/compiler/stranal/WwLib.lhs index 223cb81e8c..b18ded3118 100644 --- a/compiler/stranal/WwLib.lhs +++ b/compiler/stranal/WwLib.lhs @@ -475,10 +475,18 @@ mkWWcpr body_ty _other -- No CPR info -- \ x -> case (_scc_ "foo" E) of I# x -> x) -- -- This transform doesn't move work or allocation --- from one cost centre to another +-- from one cost centre to another. +-- +-- Later [SDM]: presumably this is because we want the simplifier to +-- eliminate the case, and the scc would get in the way? I'm ok with +-- including the case itself in the cost centre, since it is morally +-- part of the function (post transformation) anyway. + workerCase :: Id -> CoreExpr -> [Id] -> DataCon -> CoreExpr -> CoreExpr -workerCase bndr (Note (SCC cc) e) args con body = Note (SCC cc) (mkUnpackCase bndr e args con body) -workerCase bndr e args con body = mkUnpackCase bndr e args con body +workerCase bndr (Tick tickish e) args con body + = Tick tickish (mkUnpackCase bndr e args con body) +workerCase bndr e args con body + = mkUnpackCase bndr e args con body \end{code} diff --git a/compiler/typecheck/TcBinds.lhs b/compiler/typecheck/TcBinds.lhs index 543df90b33..ac2fe8c11b 100644 --- a/compiler/typecheck/TcBinds.lhs +++ b/compiler/typecheck/TcBinds.lhs @@ -889,7 +889,8 @@ tcRhs (TcPatBind _ pat' grhss pat_ty) ; grhss' <- addErrCtxt (patMonoBindsCtxt pat' grhss) $ tcGRHSsPat grhss pat_ty ; return (PatBind { pat_lhs = pat', pat_rhs = grhss', pat_rhs_ty = pat_ty - , bind_fvs = placeHolderNames }) } + , bind_fvs = placeHolderNames + , pat_ticks = (Nothing,[]) }) } --------------------- diff --git a/compiler/vectorise/Vectorise/Exp.hs b/compiler/vectorise/Vectorise/Exp.hs index ee3dfdfefa..3959a947bd 100644 --- a/compiler/vectorise/Vectorise/Exp.hs +++ b/compiler/vectorise/Vectorise/Exp.hs @@ -43,14 +43,15 @@ import Data.List -- | Vectorise a polymorphic expression. -- -vectPolyExpr :: Bool -- ^ When vectorising the RHS of a binding, whether that - -- binding is a loop breaker. +vectPolyExpr :: Bool -- ^ When vectorising the RHS of a + -- binding, whether that binding is a + -- loop breaker. -> [Var] -> CoreExprWithFVs -> VM (Inline, Bool, VExpr) -vectPolyExpr loop_breaker recFns (_, AnnNote note expr) +vectPolyExpr loop_breaker recFns (_, AnnTick tickish expr) = do (inline, isScalarFn, expr') <- vectPolyExpr loop_breaker recFns expr - return (inline, isScalarFn, vNote note expr') + return (inline, isScalarFn, vTick tickish expr') vectPolyExpr loop_breaker recFns expr = do arity <- polyArity tvs @@ -75,8 +76,8 @@ vectExpr (_, AnnVar v) vectExpr (_, AnnLit lit) = vectLiteral lit -vectExpr (_, AnnNote note expr) - = liftM (vNote note) (vectExpr expr) +vectExpr (_, AnnTick tickish expr) + = liftM (vTick tickish) (vectExpr expr) -- SPECIAL CASE: Vectorise/lift 'patError @ ty err' by only vectorising/lifting the type 'ty'; -- its only purpose is to abort the program, but we need to adjust the type to keep CoreLint @@ -247,7 +248,7 @@ vectScalarFun forceScalar recFns expr where scalars' = scalars `extendVarSet` var is_scalar scalars isScalarTC (Cast e _coe) = is_scalar scalars isScalarTC e - is_scalar scalars isScalarTC (Note _ e ) = is_scalar scalars isScalarTC e + is_scalar scalars isScalarTC (Tick _ e ) = is_scalar scalars isScalarTC e is_scalar _scalars _isScalarTC (Type {}) = True is_scalar _scalars _isScalarTC (Coercion {}) = True diff --git a/compiler/vectorise/Vectorise/Vect.hs b/compiler/vectorise/Vectorise/Vect.hs index 6dcffa2509..156d540bc6 100644 --- a/compiler/vectorise/Vectorise/Vect.hs +++ b/compiler/vectorise/Vectorise/Vect.hs @@ -12,7 +12,7 @@ module Vectorise.Vect ( vRec, vVar, vType, - vNote, + vTick, vLet, vLams, vLamsWithoutLC, @@ -66,8 +66,8 @@ vType ty = (Type ty, Type ty) -- | Make a vectorised note. -vNote :: Note -> VExpr -> VExpr -vNote = mapVect . Note +vTick :: Tickish Id -> VExpr -> VExpr +vTick = mapVect . Tick -- | Make a vectorised non-recursive binding. diff --git a/docs/users_guide/flags.xml b/docs/users_guide/flags.xml index 244bbf64a6..e765525c13 100644 --- a/docs/users_guide/flags.xml +++ b/docs/users_guide/flags.xml @@ -1668,32 +1668,45 @@ phase <replaceable>n</replaceable></entry> </thead> <tbody> <row> - <entry><option>-auto</option></entry> - <entry>Auto-add <literal>_scc_</literal>s to all - exported functions not marked INLINE</entry> - <entry>dynamic</entry> - <entry><option>-no-auto</option></entry> + <entry><option>-prof</option></entry> + <entry>Turn on profiling</entry> + <entry>static</entry> + <entry>-</entry> </row> <row> - <entry><option>-auto-all</option></entry> - <entry>Auto-add <literal>_scc_</literal>s to all - top-level functions not marked INLINE</entry> + <entry><option>-fprof-auto</option></entry> + <entry>Auto-add <literal>SCC</literal>s to all bindings + not marked INLINE</entry> <entry>dynamic</entry> - <entry><option>-no-auto-all</option></entry> + <entry><option>-fno-prof-auto</option></entry> </row> <row> - <entry><option>-caf-all</option></entry> - <entry>Auto-add <literal>_scc_</literal>s to all CAFs</entry> - <entry>dynamic</entry> - <entry><option>-no-caf-all</option></entry> + <entry><option>-fprof-auto-top</option></entry> + <entry>Auto-add <literal>SCC</literal>s to all top-level + bindings not marked INLINE</entry> + <entry>dynamic</entry> + <entry><option>-fno-prof-auto</option></entry> </row> <row> - <entry><option>-prof</option></entry> - <entry>Turn on profiling</entry> - <entry>static</entry> - <entry>-</entry> + <entry><option>-fprof-auto-exported</option></entry> + <entry>Auto-add <literal>SCC</literal>s to all exported + bindings not marked INLINE</entry> + <entry>dynamic</entry> + <entry><option>-fno-prof-auto</option></entry> </row> <row> + <entry><option>-fprof-cafs</option></entry> + <entry>Auto-add <literal>SCC</literal>s to all CAFs</entry> + <entry>dynamic</entry> + <entry><option>-fno-prof-cafs</option></entry> + </row> + <row> + <entry><option>-fno-prof-count-entries</option></entry> + <entry>Do not collect entry counts</entry> + <entry>dynamic</entry> + <entry><option>-fprof-count-entries</option></entry> + </row> + <row> <entry><option>-ticky</option></entry> <entry><link linkend="ticky-ticky">Turn on ticky-ticky profiling</link></entry> <entry>static</entry> diff --git a/docs/users_guide/profiling.xml b/docs/users_guide/profiling.xml index 01c7576b63..16dead80ae 100644 --- a/docs/users_guide/profiling.xml +++ b/docs/users_guide/profiling.xml @@ -5,45 +5,58 @@ </indexterm> <indexterm><primary>cost-centre profiling</primary></indexterm> - <para> Glasgow Haskell comes with a time and space profiling - system. Its purpose is to help you improve your understanding of - your program's execution behaviour, so you can improve it.</para> + <para>GHC comes with a time and space profiling system, so that you + can answer questions like "why is my program so slow?", or "why is + my program using so much memory?".</para> - <para> Any comments, suggestions and/or improvements you have are - welcome. Recommended “profiling tricks” would be - especially cool! </para> + <para>Note that multi-processor execution (e.g. <literal>+RTS + -N2</literal>) is not currently supported with GHC's time and space + profiling. However, there is a separate tool specifically for + profiling concurrent and parallel programs: <ulink + url="http://www.haskell.org/haskellwiki/ThreadScope">ThreadScope</ulink>.</para> <para>Profiling a program is a three-step process:</para> <orderedlist> <listitem> - <para> Re-compile your program for profiling with the - <literal>-prof</literal> option, and probably one of the - <literal>-auto</literal> or <literal>-auto-all</literal> - options. These options are described in more detail in <xref - linkend="prof-compiler-options"/> </para> - <indexterm><primary><literal>-prof</literal></primary> - </indexterm> - <indexterm><primary><literal>-auto</literal></primary> - </indexterm> - <indexterm><primary><literal>-auto-all</literal></primary> - </indexterm> + <para>Re-compile your program for profiling with the + <literal>-prof</literal> option, and probably one of the options + for adding automatic annotations: + <literal>-fprof-auto</literal> is the most common<footnote><para><option>-fprof-auto</option> was known as <option>-auto-all</option><indexterm><primary><literal>-auto-all</literal></primary> + </indexterm> prior to GHC 7.4.1.</para></footnote>. + <indexterm><primary><literal>-fprof-auto</literal></primary> + </indexterm></para> + + <para>If you are using external packages with + <literal>cabal</literal>, you may need to reinstall these + packages with profiling support; typically this is done with + <literal>cabal install -p <replaceble>package</replaceble> + --reinstall</literal>.</para> </listitem> <listitem> - <para> Run your program with one of the profiling options, eg. - <literal>+RTS -p -RTS</literal>. This generates a file of - profiling information. Note that multi-processor execution - (e.g. <literal>+RTS -N2</literal>) is not supported while - profiling.</para> - <indexterm><primary><option>-p</option></primary><secondary>RTS - option</secondary></indexterm> + <para>Having compiled the program for profiling, you now need to + run it to generate the profile. For example, a simple time + profile can be generated by running the program with + <option>+RTS + -p</option><indexterm><primary><option>-p</option></primary><secondary>RTS + option</secondary></indexterm>, which generates a file named + <literal><replaceable>prog</replaceable>.prof</literal> where + <replaceable>prog</replaceable> is the name of your program + (without the <literal>.exe</literal> extension, if you are on + Windows).</para> + + <para>There are many different kinds of profile that can be + generated, selected by different RTS options. We will be + describing the various kinds of profile throughout the rest of + this chapter. Some profiles require further processing using + additional tools after running the program.</para> </listitem> <listitem> - <para> Examine the generated profiling information, using one of - GHC's profiling tools. The tool to use will depend on the kind - of profiling information generated.</para> + <para>Examine the generated profiling information, use the + information to optimise your program, and repeat as + necessary.</para> </listitem> </orderedlist> @@ -53,24 +66,24 @@ <para>GHC's profiling system assigns <firstterm>costs</firstterm> to <firstterm>cost centres</firstterm>. A cost is simply the time - or space required to evaluate an expression. Cost centres are + or space (memory) required to evaluate an expression. Cost centres are program annotations around expressions; all costs incurred by the annotated expression are assigned to the enclosing cost centre. Furthermore, GHC will remember the stack of enclosing cost centres - for any given expression at run-time and generate a call-graph of + for any given expression at run-time and generate a call-tree of cost attributions.</para> <para>Let's take a look at an example:</para> <programlisting> -main = print (nfib 25) -nfib n = if n < 2 then 1 else nfib (n-1) + nfib (n-2) +main = print (fib 30) +fib n = if n < 2 then 1 else fib (n-1) + fib (n-2) </programlisting> <para>Compile and run this program as follows:</para> <screen> -$ ghc -prof -auto-all -o Main Main.hs +$ ghc -prof -fprof-auto -rtsopts Main.hs $ ./Main +RTS -p 121393 $ @@ -78,53 +91,52 @@ $ <para>When a GHC-compiled program is run with the <option>-p</option> RTS option, it generates a file called - <filename><prog>.prof</filename>. In this case, the file + <filename><replaceable>prog</replaceable>.prof</filename>. In this case, the file will contain something like this:</para> <screen> - Fri May 12 14:06 2000 Time and Allocation Profiling Report (Final) + Wed Oct 12 16:14 2011 Time and Allocation Profiling Report (Final) Main +RTS -p -RTS - total time = 0.14 secs (7 ticks @ 20 ms) - total alloc = 8,741,204 bytes (excludes profiling overheads) + total time = 0.68 secs (34 ticks @ 20 ms) + total alloc = 204,677,844 bytes (excludes profiling overheads) -COST CENTRE MODULE %time %alloc +COST CENTRE MODULE %time %alloc -nfib Main 100.0 100.0 +fib Main 100.0 100.0 - individual inherited -COST CENTRE MODULE entries %time %alloc %time %alloc + individual inherited +COST CENTRE MODULE no. entries %time %alloc %time %alloc -MAIN MAIN 0 0.0 0.0 100.0 100.0 - main Main 0 0.0 0.0 0.0 0.0 - CAF PrelHandle 3 0.0 0.0 0.0 0.0 - CAF PrelAddr 1 0.0 0.0 0.0 0.0 - CAF Main 6 0.0 0.0 100.0 100.0 - main Main 1 0.0 0.0 100.0 100.0 - nfib Main 242785 100.0 100.0 100.0 100.0 +MAIN MAIN 102 0 0.0 0.0 100.0 100.0 + CAF GHC.IO.Handle.FD 128 0 0.0 0.0 0.0 0.0 + CAF GHC.IO.Encoding.Iconv 120 0 0.0 0.0 0.0 0.0 + CAF GHC.Conc.Signal 110 0 0.0 0.0 0.0 0.0 + CAF Main 108 0 0.0 0.0 100.0 100.0 + main Main 204 1 0.0 0.0 100.0 100.0 + fib Main 205 2692537 100.0 100.0 100.0 100.0 </screen> - <para>The first part of the file gives the program name and options, and the total time and total memory allocation measured during the run of the program (note that the total memory allocation figure isn't the same as the amount of <emphasis>live</emphasis> memory needed by the program at any one time; the latter can be determined using heap profiling, which we - will describe shortly).</para> + will describe later in <xref linkend="prof-heap" />).</para> <para>The second part of the file is a break-down by cost centre of the most costly functions in the program. In this case, there was only one significant function in the program, namely - <function>nfib</function>, and it was responsible for 100% + <function>fib</function>, and it was responsible for 100% of both the time and allocation costs of the program.</para> <para>The third and final section of the file gives a profile - break-down by cost-centre stack. This is roughly a call-graph + break-down by cost-centre stack. This is roughly a call-tree profile of the program. In the example above, it is clear that - the costly call to <function>nfib</function> came from + the costly call to <function>fib</function> came from <function>main</function>.</para> <para>The time and allocation incurred by a given part of the @@ -137,33 +149,39 @@ MAIN MAIN 0 0.0 0.0 100.0 100.0 by modifying the example slightly:</para> <programlisting> -main = print (f 25 + g 25) -f n = nfib n -g n = nfib (n `div` 2) -nfib n = if n < 2 then 1 else nfib (n-1) + nfib (n-2) +main = print (f 30 + g 30) + where + f n = fib n + g n = fib (n `div` 2) + +fib n = if n < 2 then 1 else fib (n-1) + fib (n-2) </programlisting> <para>Compile and run this program as before, and take a look at the new profiling results:</para> <screen> -COST CENTRE MODULE scc %time %alloc %time %alloc - -MAIN MAIN 0 0.0 0.0 100.0 100.0 - main Main 0 0.0 0.0 0.0 0.0 - CAF PrelHandle 3 0.0 0.0 0.0 0.0 - CAF PrelAddr 1 0.0 0.0 0.0 0.0 - CAF Main 9 0.0 0.0 100.0 100.0 - main Main 1 0.0 0.0 100.0 100.0 - g Main 1 0.0 0.0 0.0 0.2 - nfib Main 465 0.0 0.2 0.0 0.2 - f Main 1 0.0 0.0 100.0 99.8 - nfib Main 242785 100.0 99.8 100.0 99.8 +COST CENTRE MODULE no. entries %time %alloc %time %alloc + +MAIN MAIN 102 0 0.0 0.0 100.0 100.0 + CAF GHC.IO.Handle.FD 128 0 0.0 0.0 0.0 0.0 + CAF GHC.IO.Encoding.Iconv 120 0 0.0 0.0 0.0 0.0 + CAF GHC.Conc.Signal 110 0 0.0 0.0 0.0 0.0 + CAF Main 108 0 0.0 0.0 100.0 100.0 + main Main 204 1 0.0 0.0 100.0 100.0 + main.g Main 207 1 0.0 0.0 0.0 0.1 + fib Main 208 1973 0.0 0.1 0.0 0.1 + main.f Main 205 1 0.0 0.0 100.0 99.9 + fib Main 206 2692537 100.0 99.9 100.0 99.9 </screen> - <para>Now although we had two calls to <function>nfib</function> - in the program, it is immediately clear that it was the call from - <function>f</function> which took all the time.</para> + <para>Now although we had two calls to <function>fib</function> in + the program, it is immediately clear that it was the call from + <function>f</function> which took all the time. The functions + <literal>f</literal> and <literal>g</literal> which are defined in + the <literal>where</literal> clause in <literal>main</literal> are + given their own cost centres, <literal>main.f</literal> and + <literal>main.g</literal> respectively.</para> <para>The actual meaning of the various columns in the output is:</para> @@ -172,7 +190,7 @@ MAIN MAIN 0 0.0 0.0 100.0 100.0 <term>entries</term> <listitem> <para>The number of times this particular point in the call - graph was entered.</para> + tree was entered.</para> </listitem> </varlistentry> @@ -180,7 +198,7 @@ MAIN MAIN 0 0.0 0.0 100.0 100.0 <term>individual %time</term> <listitem> <para>The percentage of the total run time of the program - spent at this point in the call graph.</para> + spent at this point in the call tree.</para> </listitem> </varlistentry> @@ -197,7 +215,7 @@ MAIN MAIN 0 0.0 0.0 100.0 100.0 <term>inherited %time</term> <listitem> <para>The percentage of the total run time of the program - spent below this point in the call graph.</para> + spent below this point in the call tree.</para> </listitem> </varlistentry> @@ -242,19 +260,24 @@ MAIN MAIN 0 0.0 0.0 100.0 100.0 although GHC does keep information about which groups of functions called each other recursively, this information isn't displayed in the basic time and allocation profile, instead the call-graph is - flattened into a tree.</para> + flattened into a tree as follows: a call to a function that occurs + elsewhere on the current stack does not push another entry on the + stack, instead the costs for this call are aggregated into the + caller<footnote><para>Note that this policy has changed slightly + in GHC 7.4.1 relative to earlier versions, and may yet change + further, feedback is welcome.</para></footnote>.</para> - <sect2><title>Inserting cost centres by hand</title> + <sect2 id="scc-pragma"><title>Inserting cost centres by hand</title> <para>Cost centres are just program annotations. When you say - <option>-auto-all</option> to the compiler, it automatically - inserts a cost centre annotation around every top-level function - not marked INLINE in your program, but you are entirely free to - add the cost centre annotations yourself.</para> + <option>-fprof-auto</option> to the compiler, it automatically + inserts a cost centre annotation around every binding not marked + INLINE in your program, but you are entirely free to add cost + centre annotations yourself.</para> <para>The syntax of a cost centre annotation is</para> - <programlisting> +<programlisting> {-# SCC "name" #-} <expression> </programlisting> @@ -270,92 +293,72 @@ MAIN MAIN 0 0.0 0.0 100.0 100.0 <programlisting> main :: IO () -main = do let xs = {-# SCC "X" #-} [1..1000000] - let ys = {-# SCC "Y" #-} [1..2000000] - print $ last xs - print $ last $ init xs - print $ last ys - print $ last $ init ys +main = do let xs = [1..1000000] + let ys = [1..2000000] + print $ {-# SCC "last_xs" #-} last xs + print $ {-# SCC "last_init_xs" #-} last $ init xs + print $ {-# SCC "last_ys" #-} last ys + print $ {-# SCC "last_init_ys" #-}last $ init ys </programlisting> - <para>which gives this heap profile when run:</para> + <para>which gives this profile when run:</para> - <!-- contentwidth/contentheight don't appear to have any effect - other than making the PS file generation work, rather than - falling over. The result seems to be broken PS on the page - with the image. --> - <imagedata fileref="prof_scc" contentwidth="645px" - contentdepth="428px"/> +<screen> +COST CENTRE MODULE no. entries %time %alloc %time %alloc + +MAIN MAIN 102 0 0.0 0.0 100.0 100.0 + CAF GHC.IO.Handle.FD 130 0 0.0 0.0 0.0 0.0 + CAF GHC.IO.Encoding.Iconv 122 0 0.0 0.0 0.0 0.0 + CAF GHC.Conc.Signal 111 0 0.0 0.0 0.0 0.0 + CAF Main 108 0 0.0 0.0 100.0 100.0 + main Main 204 1 0.0 0.0 100.0 100.0 + last_init_ys Main 210 1 25.0 27.4 25.0 27.4 + main.ys Main 209 1 25.0 39.2 25.0 39.2 + last_ys Main 208 1 12.5 0.0 12.5 0.0 + last_init_xs Main 207 1 12.5 13.7 12.5 13.7 + main.xs Main 206 1 18.8 19.6 18.8 19.6 + last_xs Main 205 1 6.2 0.0 6.2 0.0 +</screen> </sect2> <sect2 id="prof-rules"> <title>Rules for attributing costs</title> - <para>The cost of evaluating any expression in your program is - attributed to a cost-centre stack using the following rules:</para> - - <itemizedlist> - <listitem> - <para>If the expression is part of the - <firstterm>one-off</firstterm> costs of evaluating the - enclosing top-level definition, then costs are attributed to - the stack of lexically enclosing <literal>SCC</literal> - annotations on top of the special <literal>CAF</literal> - cost-centre. </para> - </listitem> - - <listitem> - <para>Otherwise, costs are attributed to the stack of - lexically-enclosing <literal>SCC</literal> annotations, - appended to the cost-centre stack in effect at the - <firstterm>call site</firstterm> of the current top-level - definition<footnote> <para>The call-site is just the place - in the source code which mentions the particular function or - variable.</para></footnote>. Notice that this is a recursive - definition.</para> - </listitem> - - <listitem> - <para>Time spent in foreign code (see <xref linkend="ffi"/>) - is always attributed to the cost centre in force at the - Haskell call-site of the foreign function.</para> - </listitem> - </itemizedlist> - - <para>What do we mean by one-off costs? Well, Haskell is a lazy - language, and certain expressions are only ever evaluated once. - For example, if we write:</para> - - <programlisting> -x = nfib 25 -</programlisting> - - <para>then <varname>x</varname> will only be evaluated once (if - at all), and subsequent demands for <varname>x</varname> will - immediately get to see the cached result. The definition - <varname>x</varname> is called a CAF (Constant Applicative - Form), because it has no arguments.</para> - - <para>For the purposes of profiling, we say that the expression - <literal>nfib 25</literal> belongs to the one-off costs of - evaluating <varname>x</varname>.</para> - - <para>Since one-off costs aren't strictly speaking part of the - call-graph of the program, they are attributed to a special - top-level cost centre, <literal>CAF</literal>. There may be one - <literal>CAF</literal> cost centre for each module (the - default), or one for each top-level definition with any one-off - costs (this behaviour can be selected by giving GHC the - <option>-caf-all</option> flag).</para> - - <indexterm><primary><literal>-caf-all</literal></primary> - </indexterm> - - <para>If you think you have a weird profile, or the call-graph - doesn't look like you expect it to, feel free to send it (and - your program) to us at - <email>glasgow-haskell-bugs@haskell.org</email>.</para> + <para>While running a program with profiling turned on, GHC + maintains a cost-centre stack behind the scenes, and attributes + any costs (memory allocation and time) to whatever the current + cost-centre stack is at the time the cost is incurred.</para> + + <para>The mechanism is simple: whenever the program evaluates an + expression with an SCC annotation, <literal>{-# SCC c -#} + E</literal>, the cost centre <literal>c</literal> is pushed on + the current stack, and the entry count for this stack is + incremented by one. The stack also sometimes has to be saved + and restored; in particular when the program creates a + <firstterm>thunk</firstterm> (a lazy suspension), the current + cost-centre stack is stored in the thunk, and restored when the + thunk is evaluated. In this way, the cost-centre stack is + independent of the actual evaluation order used by GHC at + runtime.</para> + + <para>At a function call, GHC takes the stack stored in the + function being called (which for a top-level function will be + empty), and <emphasis>appends</emphasis> it to the current + stack, ignoring any prefix that is identical to a prefix of the + current stack.</para> + + <para>We mentioned earlier that lazy computations, i.e. thunks, + capture the current stack when they are created, and restore + this stack when they are evaluated. What about top-level + thunks? They are "created" when the program is compiled, so + what stack should we give them? The technical name for a + top-level thunk is a CAF ("Constant Applicative Form"). GHC + assigns every CAF in a module a stack consisting of the single + cost centre <literal>M.CAF</literal>, where <literal>M</literal> + is the name of the module. It is also possible to give each CAF + a different stack, using the option + <option>-fprof-cafs</option><indexterm><primary><option>-fprof-cafs</option></primary></indexterm>.</para> </sect2> </sect1> @@ -372,13 +375,13 @@ x = nfib 25 <indexterm><primary><option>-prof</option></primary></indexterm> </term> <listitem> - <para> To make use of the profiling system + <para>To make use of the profiling system <emphasis>all</emphasis> modules must be compiled and linked with the <option>-prof</option> option. Any <literal>SCC</literal> annotations you've put in your source will spring to life.</para> - <para> Without a <option>-prof</option> option, your + <para>Without a <option>-prof</option> option, your <literal>SCC</literal>s are ignored; so you can compile <literal>SCC</literal>-laden code without changing it.</para> @@ -394,40 +397,52 @@ x = nfib 25 <variablelist> <varlistentry> <term> - <option>-auto</option>: - <indexterm><primary><option>-auto</option></primary></indexterm> + <option>-fprof-auto</option>: + <indexterm><primary><option>-fprof-auto</option></primary></indexterm> + </term> + <listitem> + <para><emphasis>All</emphasis> bindings not marked INLINE, + whether exported or not, top level or nested, will be given + automatic <literal>SCC</literal> annotations. Functions + marked INLINE must be given a cost centre manually.</para> + </listitem> + </varlistentry> + + <varlistentry> + <term> + <option>-fprof-auto-top</option>: + <indexterm><primary><option>-fprof-auto-top</option></primary></indexterm> <indexterm><primary>cost centres</primary><secondary>automatically inserting</secondary></indexterm> </term> <listitem> - <para> GHC will automatically add - <function>_scc_</function> constructs for all - top-level, exported functions not marked INLINE. If you - want a cost centre on an INLINE function, you have to add - it manually.</para> + <para>GHC will automatically add <literal>SCC</literal> + annotations for all top-level bindings not marked INLINE. If + you want a cost centre on an INLINE function, you have to + add it manually.</para> </listitem> </varlistentry> <varlistentry> <term> - <option>-auto-all</option>: - <indexterm><primary><option>-auto-all</option></primary></indexterm> + <option>-fprof-auto-exported</option>: + <indexterm><primary><option>-fprof-auto-top</option></primary></indexterm> + <indexterm><primary>cost centres</primary><secondary>automatically inserting</secondary></indexterm> </term> <listitem> - <para> <emphasis>All</emphasis> top-level functions - not marked INLINE, exported or not, will be automatically - <function>_scc_</function>'d. - The functions marked INLINE must be given a cost centre - manually.</para> + <para>GHC will automatically add <literal>SCC</literal> + annotations for all exported functions not marked + INLINE. If you want a cost centre on an INLINE function, you + have to add it manually.</para> </listitem> </varlistentry> <varlistentry> <term> - <option>-caf-all</option>: - <indexterm><primary><option>-caf-all</option></primary></indexterm> + <option>-fprof-cafs</option>: + <indexterm><primary><option>-fprof-cafs</option></primary></indexterm> </term> <listitem> - <para> The costs of all CAFs in a module are usually + <para>The costs of all CAFs in a module are usually attributed to one “big” CAF cost-centre. With this option, all CAFs get their own cost-centre. An “if all else fails” option…</para> @@ -436,17 +451,46 @@ x = nfib 25 <varlistentry> <term> - <option>-ignore-scc</option>: - <indexterm><primary><option>-ignore-scc</option></primary></indexterm> + <option>-fno-prof-auto</option>: + <indexterm><primary><option>-no-fprof-auto</option></primary></indexterm> </term> <listitem> - <para>Ignore any <function>_scc_</function> - constructs, so a module which already has - <function>_scc_</function>s can be compiled - for profiling with the annotations ignored.</para> - </listitem> + <para>Disables any previous <option>-fprof-auto</option>, + <option>-fprof-auto-top</option>, or + <option>-fprof-auto-exported</option> options. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term> + <option>-fno-prof-cafs</option>: + <indexterm><primary><option>-fno-prof-cafs</option></primary></indexterm> + </term> + <listitem> + <para>Disables any previous <option>-fprof-cafs</option> option. + </para> + </listitem> </varlistentry> + <varlistentry> + <term> + <option>-fno-prof-count-entries</option>: + <indexterm><primary><option>-fno-prof-count-entries</option></primary></indexterm> + </term> + <listitem> + <para>Tells GHC not to collect information about how often + functions are entered at runtime (the "entries" column of + the time profile), for this module. This tends to make the + profiled code run faster, and hence closer to the speed of + the unprofiled code, because GHC is able to optimise more + aggressively if it doesn't have to maintain correct entry + counts. This option can be useful if you aren't interested + in the entry counts (for example, if you only intend to do + heap profiling). + </para> + </listitem> + </varlistentry> </variablelist> </sect1> @@ -491,7 +535,7 @@ x = nfib 25 <listitem> <para>Sets the interval that the RTS clock ticks at, which is also the sampling interval of the time and allocation profile. - The default is 0.02 second.</para> + The default is 0.02 seconds.</para> </listitem> </varlistentry> @@ -501,13 +545,14 @@ x = nfib 25 <indexterm><primary><option>-xc</option></primary><secondary>RTS option</secondary></indexterm> </term> <listitem> - <para>This option makes use of the extra information - maintained by the cost-centre-stack profiler to provide - useful information about the location of runtime errors. - See <xref linkend="rts-options-debugging"/>.</para> - </listitem> + <para>This option causes the runtime to print out the + current cost-centre stack whenever an exception is raised. + This can be particularly useful for debugging the location + of exceptions, such as the notorious <literal>Prelude.head: + empty list</literal> error. See <xref + linkend="rts-options-debugging"/>.</para> + </listitem> </varlistentry> - </variablelist> </sect1> @@ -520,7 +565,7 @@ x = nfib 25 over time. This is useful for detecting the causes of <firstterm>space leaks</firstterm>, when your program holds on to more memory at run-time that it needs to. Space leaks lead to - longer run-times due to heavy garbage collector activity, and may + slower execution due to heavy garbage collector activity, and may even cause the program to run out of memory altogether.</para> <para>To generate a heap profile from your program:</para> @@ -532,7 +577,7 @@ x = nfib 25 </listitem> <listitem> <para>Run it with one of the heap profiling options described - below (eg. <option>-hc</option> for a basic producer profile). + below (eg. <option>-h</option> for a basic producer profile). This generates the file <filename><replaceable>prog</replaceable>.hp</filename>.</para> </listitem> @@ -550,6 +595,16 @@ x = nfib 25 </listitem> </orderedlist> + <para>For example, here is a heap profile produced for the program given above in <xref linkend="scc-pragma" />:</para> + + <!-- + contentwidth/contentheight don't appear to have any effect + other than making the PS file generation work, rather than + falling over. The result seems to be broken PS on the page + with the image. --> + <imagedata fileref="prof_scc.png" contentwidth="645px" + contentdepth="428px"/> + <para>You might also want to take a look at <ulink url="http://www.haskell.org/haskellwiki/Hp2any">hp2any</ulink>, a more advanced suite of tools (not distributed with GHC) for @@ -571,7 +626,7 @@ x = nfib 25 <indexterm><primary><option>-hc</option></primary><secondary>RTS option</secondary></indexterm> </term> <listitem> - <para>Breaks down the graph by the cost-centre stack which + <para>(can be shortened to <option>-h</option>). Breaks down the graph by the cost-centre stack which produced the data.</para> </listitem> </varlistentry> @@ -782,8 +837,9 @@ x = nfib 25 space the program is using.</para> <para>Memory occupied by threads and their stacks is - labelled as “TSO” when displaying the profile - by closure description or type description.</para> + labelled as “TSO” and “STACK” + respectively when displaying the profile by closure + description or type description.</para> </listitem> </varlistentry> @@ -1305,14 +1361,13 @@ to re-read its input file: <indexterm><primary>hpc</primary></indexterm> <para> - Code coverage tools allow a programmer to determine what parts of - their code have been actually executed, and which parts have + Code coverage tools allow a programmer to determine what parts + of their code have been actually executed, and which parts have never actually been invoked. GHC has an option for generating instrumented code that records code coverage as part of the - <ulink url="http://www.haskell.org/hpc">Haskell Program Coverage - </ulink>(HPC) toolkit, which is included with GHC. HPC tools can - be used to render the generated code coverage information into - human understandable format. </para> + Haskell Program Coverage (HPC) toolkit, which is included with + GHC. HPC tools can be used to render the generated code coverage + information into human understandable format. </para> <para> Correctly instrumented code provides coverage information of two @@ -1327,8 +1382,8 @@ to re-read its input file: <para> HPC displays both kinds of information in two primary ways: - textual reports with summary statistics (hpc report) and sources - with color mark-up (hpc markup). For boolean coverage, there + textual reports with summary statistics (<literal>hpc report</literal>) and sources + with color mark-up (<literal>hpc markup</literal>). For boolean coverage, there are four possible outcomes for each guard, condition or qualifier: both True and False values occur; only True; only False; never evaluated. In hpc-markup output, highlighting with @@ -1340,7 +1395,7 @@ to re-read its input file: <sect2><title>A small example: Reciprocation</title> <para> - For an example we have a program, called Recip.hs, which computes exact decimal + For an example we have a program, called <filename>Recip.hs</filename>, which computes exact decimal representations of reciprocals, with recurring parts indicated in brackets. </para> @@ -1377,21 +1432,35 @@ main = do main </programlisting> - <para>The HPC instrumentation is enabled using the -fhpc flag. + <para>HPC instrumentation is enabled with the -fhpc flag: </para> <screen> -$ ghc -fhpc Recip.hs --make +$ ghc -fhpc Recip.hs </screen> - <para>HPC index (.mix) files are placed in .hpc subdirectory. These can be considered like - the .hi files for HPC. - </para> + <para>GHC creates a subdirectory <filename>.hpc</filename> in the + current directory, and puts HPC index (<filename>.mix</filename>) + files in there, one for each module compiled. You don't need to + worry about these files: they contain information needed by the + <literal>hpc</literal> tool to generate the coverage data for + compiled modules after the program is run.</para> <screen> $ ./Recip 1/3 = 0.(3) </screen> - <para>We can generate a textual summary of coverage:</para> + <para>Running the program generates a file with the + <literal>.tix</literal> suffix, in this case + <filename>Recip.tix</filename>, which contains the coverage data + for this run of the program. The program may be run multiple + times (e.g. with different test data), and the coverage data from + the separate runs is accumulated in the <filename>.tix</filename> + file. To reset the coverage data and start again, just remove the + <filename>.tix</filename> file. + </para> + + <para>Having run the program, we can generate a textual summary of + coverage:</para> <screen> $ hpc report Recip 80% expressions used (81/101) @@ -1418,20 +1487,32 @@ writing Recip.hs.html </sect2> <sect2><title>Options for instrumenting code for coverage</title> - <para> - Turning on code coverage is easy, use the -fhpc flag. - Instrumented and non-instrumented can be freely mixed. - When compiling the Main module GHC automatically detects when there - is an hpc compiled file, and adds the correct initialization code. - </para> + + <variablelist> + <varlistentry> + <term><option>-fhpc</option></term> + <indexterm><primary><option>-fhpc</option></primary></indexterm> + <listitem> + <para>Enable code coverage for the current module or modules + being compiled.</para> + + <para>Modules compiled with this option can be freely mixed + with modules compiled without it; indeed, most libraries + will typically be compiled without <option>-fhpc</option>. + When the program is run, coverage data will only be + generated for those modules that were compiled with + <option>-fhpc</option>, and the <literal>hpc</literal> tool + will only show information about those modules. + </para> + </listitem> + </varlistentry> + </variablelist> </sect2> <sect2><title>The hpc toolkit</title> - <para> - The hpc toolkit uses a cvs/svn/darcs-like interface, where a - single binary contains many function units.</para> + <para>The hpc command has several sub-commands:</para> <screen> $ hpc Usage: hpc COMMAND ... @@ -1453,18 +1534,17 @@ Others: version Display version for hpc </screen> - <para>In general, these options act on .tix file after an - instrumented binary has generated it, which hpc acting as a - conduit between the raw .tix file, and the more detailed reports - produced. + <para>In general, these options act on a + <filename>.tix</filename> file after an instrumented binary has + generated it. </para> <para> The hpc tool assumes you are in the top-level directory of - the location where you built your application, and the .tix + the location where you built your application, and the <filename>.tix</filename> file is in the same top-level directory. You can use the - flag --srcdir to use hpc for any other directory, and use - --srcdir multiple times to analyse programs compiled from + flag <option>--srcdir</option> to use <literal>hpc</literal> for any other directory, and use + <option>--srcdir</option> multiple times to analyse programs compiled from difference locations, as is typical for packages. </para> @@ -1473,10 +1553,10 @@ Others: </para> <sect3><title>hpc report</title> - <para>hpc report gives a textual report of coverage. By default, + <para><literal>hpc report</literal> gives a textual report of coverage. By default, all modules and packages are considered in generating report, unless include or exclude are used. The report is a summary - unless the --per-module flag is used. The --xml-output option + unless the <option>--per-module</option> flag is used. The <option>--xml-output</option> option allows for tools to use hpc to glean coverage. </para> <screen> @@ -1497,7 +1577,7 @@ Options: </screen> </sect3> <sect3><title>hpc markup</title> - <para>hpc markup marks up source files into colored html. + <para><literal>hpc markup</literal> marks up source files into colored html. </para> <screen> $ hpc help markup @@ -1518,8 +1598,8 @@ Options: </sect3> <sect3><title>hpc sum</title> - <para>hpc sum adds together any number of .tix files into a single - .tix file. hpc sum does not change the original .tix file; it generates a new .tix file. + <para><literal>hpc sum</literal> adds together any number of <filename>.tix</filename> files into a single + <filename>.tix</filename> file. <literal>hpc sum</literal> does not change the original <filename>.tix</filename> file; it generates a new <filename>.tix</filename> file. </para> <screen> $ hpc help sum @@ -1535,10 +1615,10 @@ Options: </screen> </sect3> <sect3><title>hpc combine</title> - <para>hpc combine is the swiss army knife of hpc. It can be - used to take the difference between .tix files, to subtract one - .tix file from another, or to add two .tix files. hpc combine does not - change the original .tix file; it generates a new .tix file. + <para><literal>hpc combine</literal> is the swiss army knife of <literal>hpc</literal>. It can be + used to take the difference between <filename>.tix</filename> files, to subtract one + <filename>.tix</filename> file from another, or to add two <filename>.tix</filename> files. hpc combine does not + change the original <filename>.tix</filename> file; it generates a new <filename>.tix</filename> file. </para> <screen> $ hpc help combine @@ -1556,8 +1636,8 @@ Options: </screen> </sect3> <sect3><title>hpc map</title> - <para>hpc map inverts or zeros a .tix file. hpc map does not - change the original .tix file; it generates a new .tix file. + <para>hpc map inverts or zeros a <filename>.tix</filename> file. hpc map does not + change the original <filename>.tix</filename> file; it generates a new <filename>.tix</filename> file. </para> <screen> $ hpc help map @@ -1608,9 +1688,9 @@ Options: </sect2> <sect2><title>Caveats and Shortcomings of Haskell Program Coverage</title> <para> - HPC does not attempt to lock the .tix file, so multiple concurrently running + HPC does not attempt to lock the <filename>.tix</filename> file, so multiple concurrently running binaries in the same directory will exhibit a race condition. There is no way - to change the name of the .tix file generated, apart from renaming the binary. + to change the name of the <filename>.tix</filename> file generated, apart from renaming the binary. HPC does not work with GHCi. </para> </sect2> diff --git a/docs/users_guide/runtime_control.xml b/docs/users_guide/runtime_control.xml index 2af87bcc41..20aca07cd3 100644 --- a/docs/users_guide/runtime_control.xml +++ b/docs/users_guide/runtime_control.xml @@ -1068,7 +1068,7 @@ char *ghc_rts_opts = "-H128m -K1m"; option</secondary></indexterm> </term> <listitem> - <para>Generates a basic heap profile, in the + <para>(can be shortened to <option>-h</option>.) Generates a basic heap profile, in the file <literal><replaceable>prog</replaceable>.hp</literal>. To produce the heap profile graph, use <command>hp2ps</command> (see <xref linkend="hp2ps" @@ -1101,7 +1101,7 @@ char *ghc_rts_opts = "-H128m -K1m"; <para> In binary format to a file for later analysis by a variety of tools. One such tool - is <ulink url="http://hackage.haskell.org/package/ThreadScope">ThreadScope</ulink><indexterm><primary>ThreadScope</primary></indexterm>, + is <ulink url="http://www.haskell.org/haskellwiki/ThreadScope">ThreadScope</ulink><indexterm><primary>ThreadScope</primary></indexterm>, which interprets the event log to produce a visual parallel execution profile of the program. </para> @@ -1314,33 +1314,53 @@ char *ghc_rts_opts = "-H128m -K1m"; <para>(Only available when the program is compiled for profiling.) When an exception is raised in the program, this option causes the current cost-centre-stack to be - dumped to <literal>stderr</literal>.</para> + dumped to <literal>stderr</literal>.</para> <para>This can be particularly useful for debugging: if your program is complaining about a <literal>head []</literal> error and you haven't got a clue which bit of code is causing it, compiling with <literal>-prof - -auto-all</literal> and running with <literal>+RTS -xc + -fprof-auto</literal> and running with <literal>+RTS -xc -RTS</literal> will tell you exactly the call stack at the point the error was raised.</para> - <para>The output contains one line for each exception raised - in the program (the program might raise and catch several - exceptions during its execution), where each line is of the - form:</para> + <para>The output contains one report for each exception + raised in the program (the program might raise and catch + several exceptions during its execution), where each report + looks something like this: + </para> <screen> -< cc<subscript>1</subscript>, ..., cc<subscript>n</subscript> > +*** Exception raised (reporting due to +RTS -xc), stack trace: + GHC.List.CAF + --> evaluated by: Main.polynomial.table_search, + called from Main.polynomial.theta_index, + called from Main.polynomial, + called from Main.zonal_pressure, + called from Main.make_pressure.p, + called from Main.make_pressure, + called from Main.compute_initial_state.p, + called from Main.compute_initial_state, + called from Main.CAF + ... </screen> - <para>each <literal>cc</literal><subscript>i</subscript> is - a cost centre in the program (see <xref - linkend="cost-centres"/>), and the sequence represents the - “call stack” at the point the exception was - raised. The leftmost item is the innermost function in the - call stack, and the rightmost item is the outermost - function.</para> - - </listitem> + <para>The stack trace may often begin with something + uninformative like <literal>GHC.List.CAF</literal>; this is + an artifact of GHC's optimiser, which lifts out exceptions + to the top-level where the profiling system assigns them to + the cost centre "CAF". However, <literal>+RTS -xc</literal> + doesn't just print the current stack, it looks deeper and + reports the stack at the time the CAF was evaluated, and it + may report further stacks until a non-CAF stack is found. In + the example above, the next stack (after <literal>--> + evaluated by</literal>) contains plenty of information about + what the program was doing when it evaluated <literal>head + []</literal>.</para> + + <para>Implementation details aside, the function names in + the stack should hopefully give you enough clues to track + down the bug.</para> + </listitem> </varlistentry> <varlistentry> diff --git a/includes/rts/prof/CCS.h b/includes/rts/prof/CCS.h index b210991017..4692d166b5 100644 --- a/includes/rts/prof/CCS.h +++ b/includes/rts/prof/CCS.h @@ -21,6 +21,7 @@ int rts_isProfiled(void); * Data Structures * ---------------------------------------------------------------------------*/ /* + * Note [struct alignment] * NB. be careful to avoid unwanted padding between fields, by * putting the 8-byte fields on an 8-byte boundary. Padding can * vary between C compilers, and we don't take into account any @@ -29,35 +30,49 @@ int rts_isProfiled(void); */ typedef struct _CostCentre { - StgInt ccID; + StgInt ccID; // Unique Id, allocated by the RTS - char * label; - char * module; - - /* used for accumulating costs at the end of the run... */ - StgWord time_ticks; - StgWord64 mem_alloc; /* align 8 (see above) */ + char * label; + char * module; + + // used for accumulating costs at the end of the run... + StgWord time_ticks; + StgWord64 mem_alloc; // align 8 (Note [struct alignment]) - StgInt is_caf; + StgInt is_caf; // non-zero for a CAF cost centre - struct _CostCentre *link; + struct _CostCentre *link; } CostCentre; typedef struct _CostCentreStack { - StgInt ccsID; + StgInt ccsID; // unique ID, allocated by the RTS + + CostCentre *cc; // Cost centre at the top of the stack + + struct _CostCentreStack *prevStack; // parent + struct _IndexTable *indexTable; // children + struct _CostCentreStack *root; // root of stack + StgWord depth; // number of items in the stack - CostCentre *cc; - struct _CostCentreStack *prevStack; - struct _IndexTable *indexTable; + StgWord64 scc_count; // Count of times this CCS is entered + // align 8 (Note [struct alignment]) - StgWord64 scc_count; /* align 8 (see above) */ - StgWord selected; - StgWord time_ticks; - StgWord64 mem_alloc; /* align 8 (see above) */ - StgWord64 inherited_alloc; /* align 8 (see above) */ - StgWord inherited_ticks; + StgWord selected; // is this CCS shown in the heap + // profile? (zero if excluded via -hc + // -hm etc.) - CostCentre *root; + StgWord time_ticks; // number of time ticks accumulated by + // this CCS + + StgWord64 mem_alloc; // mem allocated by this CCS + // align 8 (Note [struct alignment]) + + StgWord64 inherited_alloc; // sum of mem_alloc over all children + // (calculated at the end) + // align 8 (Note [struct alignment]) + + StgWord inherited_ticks; // sum of time_ticks over all children + // (calculated at the end) } CostCentreStack; @@ -74,21 +89,24 @@ typedef struct _CostCentreStack { #define EMPTY_STACK NULL #define EMPTY_TABLE NULL -/* Constants used to set sumbsumed flag on CostCentres */ - +/* Constants used to set is_caf flag on CostCentres */ #define CC_IS_CAF 'c' /* 'c' => *is* a CAF cc */ -#define CC_IS_BORING 'B' /* 'B' => *not* a CAF/sub cc */ - +#define CC_NOT_CAF 0 /* ----------------------------------------------------------------------------- * Data Structures * ---------------------------------------------------------------------------*/ +// IndexTable is the list of children of a CCS. (Alternatively it is a +// cache of the results of pushing onto a CCS, so that the second and +// subsequent times we push a certain CC on a CCS we get the same +// result). + typedef struct _IndexTable { - CostCentre *cc; - CostCentreStack *ccs; - struct _IndexTable *next; - unsigned int back_edge; + CostCentre *cc; + CostCentreStack *ccs; + struct _IndexTable *next; + unsigned int back_edge; } IndexTable; @@ -101,48 +119,44 @@ extern CostCentreStack * RTS_VAR(CCCS); /* current CCS */ #if IN_STG_CODE extern StgWord CC_MAIN[]; -extern StgWord CCS_MAIN[]; /* Top CCS */ +extern StgWord CCS_MAIN[]; // Top CCS extern StgWord CC_SYSTEM[]; -extern StgWord CCS_SYSTEM[]; /* RTS costs */ +extern StgWord CCS_SYSTEM[]; // RTS costs extern StgWord CC_GC[]; -extern StgWord CCS_GC[]; /* Garbage collector costs */ - -extern StgWord CC_SUBSUMED[]; -extern StgWord CCS_SUBSUMED[]; /* Costs are subsumed by caller */ +extern StgWord CCS_GC[]; // Garbage collector costs extern StgWord CC_OVERHEAD[]; -extern StgWord CCS_OVERHEAD[]; /* Profiling overhead */ +extern StgWord CCS_OVERHEAD[]; // Profiling overhead extern StgWord CC_DONT_CARE[]; -extern StgWord CCS_DONT_CARE[]; /* shouldn't ever get set */ +extern StgWord CCS_DONT_CARE[]; // CCS attached to static constructors #else extern CostCentre CC_MAIN[]; -extern CostCentreStack CCS_MAIN[]; /* Top CCS */ +extern CostCentreStack CCS_MAIN[]; // Top CCS extern CostCentre CC_SYSTEM[]; -extern CostCentreStack CCS_SYSTEM[]; /* RTS costs */ +extern CostCentreStack CCS_SYSTEM[]; // RTS costs extern CostCentre CC_GC[]; -extern CostCentreStack CCS_GC[]; /* Garbage collector costs */ - -extern CostCentre CC_SUBSUMED[]; -extern CostCentreStack CCS_SUBSUMED[]; /* Costs are subsumed by caller */ +extern CostCentreStack CCS_GC[]; // Garbage collector costs extern CostCentre CC_OVERHEAD[]; -extern CostCentreStack CCS_OVERHEAD[]; /* Profiling overhead */ +extern CostCentreStack CCS_OVERHEAD[]; // Profiling overhead extern CostCentre CC_DONT_CARE[]; -extern CostCentreStack CCS_DONT_CARE[]; /* shouldn't ever get set */ +extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set + +extern CostCentre CC_PINNED[]; +extern CostCentreStack CCS_PINNED[]; // pinned memory #endif /* IN_STG_CODE */ -extern unsigned int RTS_VAR(CC_ID); /* global ids */ +extern unsigned int RTS_VAR(CC_ID); // global ids extern unsigned int RTS_VAR(CCS_ID); -extern unsigned int RTS_VAR(HP_ID); extern unsigned int RTS_VAR(era); @@ -150,37 +164,23 @@ extern unsigned int RTS_VAR(era); * Functions * ---------------------------------------------------------------------------*/ -void EnterFunCCS ( CostCentreStack *ccsfn ); -CostCentreStack *PushCostCentre ( CostCentreStack *, CostCentre * ); -CostCentreStack *AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ); - -extern unsigned int RTS_VAR(entering_PAP); +CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *); +void enterFunCCS (CostCentreStack *); /* ----------------------------------------------------------------------------- - * Registering CCs - - Cost centres are registered at startup by calling a registering - routine in each module. Each module registers its cost centres and - calls the registering routine for all imported modules. The RTS calls - the registering routine for the module Main. This registering must be - done before initialisation since the evaluation required for - initialisation may use the cost centres. - - As the code for each module uses tail calls we use an auxiliary stack - (in the heap) to record imported modules still to be registered. At - the bottom of the stack is NULL which indicates that - @miniInterpretEnd@ should be resumed. + Registering CCs and CCSs - @START_REGISTER@ and @END_REGISTER@ are special macros used to - delimit the function. @END_REGISTER@ pops the next registering - routine off the stack and jumps to it. @REGISTER_CC@ registers a cost - centre. @REGISTER_IMPORT@ pushes a modules registering routine onto - the register stack. + Registering a CC or CCS consists of + - assigning it a unique ID + - linking it onto the list of registered CCs/CCSs + Cost centres are registered at startup by a C constructor function + generated by the compiler in the _stub.c file for each module. The + macros below are invoked by that C code to register CCs and CCSs. -------------------------------------------------------------------------- */ -extern CostCentre * RTS_VAR(CC_LIST); /* registered CC list */ -extern CostCentreStack * RTS_VAR(CCS_LIST); /* registered CCS list */ +extern CostCentre * RTS_VAR(CC_LIST); // registered CC list +extern CostCentreStack * RTS_VAR(CCS_LIST); // registered CCS list #define REGISTER_CC(cc) \ do { \ @@ -202,29 +202,31 @@ extern CostCentreStack * RTS_VAR(CCS_LIST); /* registered CCS list */ * Declaring Cost Centres & Cost Centre Stacks. * -------------------------------------------------------------------------- */ -# define CC_DECLARE(cc_ident,name,module,caf,is_local) \ - is_local CostCentre cc_ident[1] \ - = {{ 0, \ - name, \ - module, \ - 0, \ - 0, \ - caf, \ - 0 }}; - -# define CCS_DECLARE(ccs_ident,cc_ident,is_local) \ - is_local CostCentreStack ccs_ident[1] \ - = {{ ccsID : 0, \ - cc : cc_ident, \ - prevStack : NULL, \ - indexTable : NULL, \ - selected : 0, \ - scc_count : 0, \ - time_ticks : 0, \ - mem_alloc : 0, \ - inherited_ticks : 0, \ - inherited_alloc : 0, \ - root : 0, \ +# define CC_DECLARE(cc_ident,name,mod,caf,is_local) \ + is_local CostCentre cc_ident[1] \ + = {{ ccID : 0, \ + label : name, \ + module : mod, \ + time_ticks : 0, \ + mem_alloc : 0, \ + link : 0, \ + is_caf : caf \ + }}; + +# define CCS_DECLARE(ccs_ident,cc_ident,is_local) \ + is_local CostCentreStack ccs_ident[1] \ + = {{ ccsID : 0, \ + cc : cc_ident, \ + prevStack : NULL, \ + indexTable : NULL, \ + root : NULL, \ + depth : 0, \ + selected : 0, \ + scc_count : 0, \ + time_ticks : 0, \ + mem_alloc : 0, \ + inherited_ticks : 0, \ + inherited_alloc : 0 \ }}; /* ----------------------------------------------------------------------------- diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index ed0bf655e1..45dc8369c7 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -62,6 +62,7 @@ RTS_RET(stg_unmaskAsyncExceptionszh_ret); RTS_RET(stg_maskUninterruptiblezh_ret); RTS_RET(stg_maskAsyncExceptionszh_ret); RTS_RET(stg_stack_underflow_frame); +RTS_RET(stg_restore_cccs); // RTS_FUN(stg_interp_constr_entry); // diff --git a/rts/Apply.cmm b/rts/Apply.cmm index f9ac3b353c..5397fc55df 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -85,8 +85,9 @@ stg_PAP_apply // profiling TICK_ENT_PAP(); LDV_ENTER(pap); - // Enter PAP cost centre - ENTER_CCS_PAP_CL(pap); +#ifdef PROFILING + foreign "C" enterFunCCS(StgHeader_ccs(pap)); +#endif // Reload the stack W_ i; @@ -175,11 +176,9 @@ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP") TICK_ENT_AP(); LDV_ENTER(ap); + ENTER_CCS_THUNK(ap); - // Enter PAP cost centre - ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL - - // Reload the stack + // Reload the stack W_ i; W_ p; p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload; @@ -245,11 +244,9 @@ INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD") TICK_ENT_AP(); LDV_ENTER(ap); + ENTER_CCS_THUNK(ap); - // Enter PAP cost centre - ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL - - // Reload the stack + // Reload the stack W_ i; W_ p; p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload; @@ -326,9 +323,7 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") TICK_ENT_AP(); LDV_ENTER(ap); - - // Enter PAP cost centre - ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL + ENTER_CCS_THUNK(ap); // Reload the stack W_ i; @@ -379,9 +374,7 @@ INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK, TICK_ENT_AP(); LDV_ENTER(ap); - - // Enter PAP cost centre - ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL + ENTER_CCS_THUNK(ap); // Reload the stack W_ i; diff --git a/rts/AutoApply.h b/rts/AutoApply.h index bbec1224ff..547c5d2f28 100644 --- a/rts/AutoApply.h +++ b/rts/AutoApply.h @@ -76,5 +76,16 @@ Sp_adj(n+1); \ jump %ENTRY_CODE(Sp(0)); +// Jump to target, saving CCCS and restoring it on return +#if defined(PROFILING) +#define jump_SAVE_CCCS(target) \ + Sp(-1) = W_[CCCS]; \ + Sp(-2) = stg_restore_cccs_info; \ + Sp_adj(-2); \ + jump (target) +#else +#define jump_SAVE_CCCS(target) jump (target) +#endif + #endif /* APPLY_H */ diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 591fa7ab9b..9f48f5d8f5 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -426,7 +426,9 @@ stg_raisezh * the info was only displayed for an *uncaught* exception. */ if (RtsFlags_ProfFlags_showCCSOnException(RtsFlags) != 0::I32) { - foreign "C" fprintCCS_stderr(W_[CCCS] "ptr") []; + SAVE_THREAD_STATE(); + foreign "C" fprintCCS_stderr(W_[CCCS] "ptr", CurrentTSO "ptr") []; + LOAD_THREAD_STATE(); } #endif diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index bb4f73bbab..c96e459975 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1204,6 +1204,8 @@ stg_takeMVarzh // into the heap check generated by the code generator, so we // have to do it in stg_gc_gen (see HeapStackCheck.cmm). HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); + TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0); + CCCS_ALLOC(SIZEOF_StgMVarTSOQueue); q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); @@ -1369,6 +1371,8 @@ stg_putMVarzh // see Note [mvar-heap-check] above HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); + TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0); + CCCS_ALLOC(SIZEOF_StgMVarTSOQueue); q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 9d95b4ccc0..56c44519fb 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -919,12 +919,12 @@ heapCensusChain( Census *census, bdescr *bd ) for (; bd != NULL; bd = bd->link) { // HACK: pretend a pinned block is just one big ARR_WORDS - // owned by CCS_SYSTEM. These blocks can be full of holes due + // owned by CCS_PINNED. These blocks can be full of holes due // to alignment constraints so we can't traverse the memory // and do a proper census. if (bd->flags & BF_PINNED) { StgClosure arr; - SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_SYSTEM); + SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_PINNED); heapProfObject(census, &arr, bd->blocks * BLOCK_SIZE_W, rtsTrue); continue; } diff --git a/rts/Profiling.c b/rts/Profiling.c index 5648f31e00..55495cdf94 100644 --- a/rts/Profiling.c +++ b/rts/Profiling.c @@ -36,12 +36,11 @@ Arena *prof_arena; unsigned int CC_ID = 1; unsigned int CCS_ID = 1; -unsigned int HP_ID = 1; /* figures for the profiling report. */ static StgWord64 total_alloc; -static lnat total_prof_ticks; +static lnat total_prof_ticks; /* Globals for opening the profiling log file(s) */ @@ -55,7 +54,7 @@ FILE *hp_file; */ CostCentreStack *CCCS; -/* Linked lists to keep track of cc's and ccs's that haven't +/* Linked lists to keep track of CCs and CCSs that haven't * been declared in the log file yet */ CostCentre *CC_LIST = NULL; @@ -78,67 +77,59 @@ CostCentreStack *CCS_LIST = NULL; * itself. These are costs that would not be incurred * during non-profiled execution of the program. * - * SUBSUMED is the one-and-only CCS placed on top-level functions. - * It indicates that all costs are to be attributed to the - * enclosing cost centre stack. SUBSUMED never accumulates - * any costs. The is_caf flag is set on the subsumed cost - * centre. - * * DONT_CARE is a placeholder cost-centre we assign to static * constructors. It should *never* accumulate any costs. + * + * PINNED accumulates memory allocated to pinned objects, which + * cannot be profiled separately because we cannot reliably + * traverse pinned memory. */ -CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, ); -CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, ); -CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, ); -CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, ); -CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, ); -CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, ); +CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_NOT_CAF, ); +CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", CC_NOT_CAF, ); +CC_DECLARE(CC_GC, "GC", "GC", CC_NOT_CAF, ); +CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_NOT_CAF, ); +CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_NOT_CAF, ); +CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", CC_NOT_CAF, ); CCS_DECLARE(CCS_MAIN, CC_MAIN, ); CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, ); CCS_DECLARE(CCS_GC, CC_GC, ); CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, ); -CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, ); -CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, ); +CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, ); +CCS_DECLARE(CCS_PINNED, CC_PINNED, ); -/* - * Uniques for the XML log-file format - */ -#define CC_UQ 1 -#define CCS_UQ 2 -#define TC_UQ 3 -#define HEAP_OBJ_UQ 4 -#define TIME_UPD_UQ 5 -#define HEAP_UPD_UQ 6 - -/* +/* * Static Functions */ -static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, - CostCentreStack *new_ccs ); -static rtsBool ccs_to_ignore ( CostCentreStack *ccs ); -static void count_ticks ( CostCentreStack *ccs ); -static void inherit_costs ( CostCentreStack *ccs ); -static void findCCSMaxLens ( CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len ); -static void logCCS ( CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len ); +static CostCentreStack * appendCCS ( CostCentreStack *ccs1, + CostCentreStack *ccs2 ); +static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc, + CostCentreStack *new_ccs ); +static rtsBool ignoreCCS ( CostCentreStack *ccs ); +static void countTickss ( CostCentreStack *ccs ); +static void inheritCosts ( CostCentreStack *ccs ); +static void findCCSMaxLens ( CostCentreStack *ccs, + nat indent, + nat *max_label_len, + nat *max_module_len ); +static void logCCS ( CostCentreStack *ccs, + nat indent, + nat max_label_len, + nat max_module_len ); static void reportCCS ( CostCentreStack *ccs ); -static void DecCCS ( CostCentreStack *ccs ); -static void DecBackEdge ( CostCentreStack *ccs, - CostCentreStack *oldccs ); -static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc ); +static CostCentreStack * checkLoop ( CostCentreStack *ccs, + CostCentre *cc ); static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs ); -static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * ); -static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * ); -static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *, +static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * ); +static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * ); +static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *, CostCentre *, unsigned int ); static void ccsSetSelected ( CostCentreStack *ccs ); -static void initTimeProfiling ( void ); -static void initProfilingLogFile( void ); - -static void reportCCS_XML ( CostCentreStack *ccs ); +static void initTimeProfiling ( void ); +static void initProfilingLogFile ( void ); /* ----------------------------------------------------------------------------- Initialise the profiling environment @@ -147,11 +138,11 @@ static void reportCCS_XML ( CostCentreStack *ccs ); void initProfiling1 (void) { - // initialise our arena - prof_arena = newArena(); + // initialise our arena + prof_arena = newArena(); - /* for the benefit of allocate()... */ - CCCS = CCS_SYSTEM; + /* for the benefit of allocate()... */ + CCCS = CCS_SYSTEM; } void @@ -163,93 +154,57 @@ freeProfiling (void) void initProfiling2 (void) { - CostCentreStack *ccs, *next; - - CCCS = CCS_SYSTEM; - - /* Set up the log file, and dump the header and cost centre - * information into it. */ - initProfilingLogFile(); - - /* Register all the cost centres / stacks in the program - * CC_MAIN gets link = 0, all others have non-zero link. - */ - REGISTER_CC(CC_MAIN); - REGISTER_CC(CC_SYSTEM); - REGISTER_CC(CC_GC); - REGISTER_CC(CC_OVERHEAD); - REGISTER_CC(CC_SUBSUMED); - REGISTER_CC(CC_DONT_CARE); - - REGISTER_CCS(CCS_SYSTEM); - REGISTER_CCS(CCS_GC); - REGISTER_CCS(CCS_OVERHEAD); - REGISTER_CCS(CCS_SUBSUMED); - REGISTER_CCS(CCS_DONT_CARE); - REGISTER_CCS(CCS_MAIN); - - /* find all the "special" cost centre stacks, and make them children - * of CCS_MAIN. - */ - ASSERT(CCS_LIST == CCS_MAIN); - CCS_LIST = CCS_LIST->prevStack; - CCS_MAIN->prevStack = NULL; - CCS_MAIN->root = CC_MAIN; - ccsSetSelected(CCS_MAIN); - DecCCS(CCS_MAIN); - - for (ccs = CCS_LIST; ccs != NULL; ) { - next = ccs->prevStack; - ccs->prevStack = NULL; - ActualPush_(CCS_MAIN,ccs->cc,ccs); - ccs->root = ccs->cc; - ccs = next; - } - - if (RtsFlags.CcFlags.doCostCentres) { - initTimeProfiling(); - } - - if (RtsFlags.ProfFlags.doHeapProfile) { - initHeapProfiling(); - } -} - -// Decide whether closures with this CCS should contribute to the heap -// profile. -static void -ccsSetSelected( CostCentreStack *ccs ) -{ - if (RtsFlags.ProfFlags.modSelector) { - if (! strMatchesSelector( ccs->cc->module, - RtsFlags.ProfFlags.modSelector ) ) { - ccs->selected = 0; - return; - } + CostCentreStack *ccs, *next; + + CCCS = CCS_SYSTEM; + + /* Set up the log file, and dump the header and cost centre + * information into it. + */ + initProfilingLogFile(); + + /* Register all the cost centres / stacks in the program + * CC_MAIN gets link = 0, all others have non-zero link. + */ + REGISTER_CC(CC_MAIN); + REGISTER_CC(CC_SYSTEM); + REGISTER_CC(CC_GC); + REGISTER_CC(CC_OVERHEAD); + REGISTER_CC(CC_DONT_CARE); + REGISTER_CC(CC_PINNED); + + REGISTER_CCS(CCS_SYSTEM); + REGISTER_CCS(CCS_GC); + REGISTER_CCS(CCS_OVERHEAD); + REGISTER_CCS(CCS_DONT_CARE); + REGISTER_CCS(CCS_PINNED); + REGISTER_CCS(CCS_MAIN); + + /* find all the registered cost centre stacks, and make them + * children of CCS_MAIN. + */ + ASSERT(CCS_LIST == CCS_MAIN); + CCS_LIST = CCS_LIST->prevStack; + CCS_MAIN->prevStack = NULL; + CCS_MAIN->root = CCS_MAIN; + ccsSetSelected(CCS_MAIN); + + // make CCS_MAIN the parent of all the pre-defined CCSs. + for (ccs = CCS_LIST; ccs != NULL; ) { + next = ccs->prevStack; + ccs->prevStack = NULL; + actualPush_(CCS_MAIN,ccs->cc,ccs); + ccs->root = ccs; + ccs = next; } - if (RtsFlags.ProfFlags.ccSelector) { - if (! strMatchesSelector( ccs->cc->label, - RtsFlags.ProfFlags.ccSelector ) ) { - ccs->selected = 0; - return; - } - } - if (RtsFlags.ProfFlags.ccsSelector) { - CostCentreStack *c; - for (c = ccs; c != NULL; c = c->prevStack) { - if ( strMatchesSelector( c->cc->label, - RtsFlags.ProfFlags.ccsSelector )) { - break; - } - } - if (c == NULL) { - ccs->selected = 0; - return; - } + + if (RtsFlags.CcFlags.doCostCentres) { + initTimeProfiling(); } - ccs->selected = 1; - return; + if (RtsFlags.ProfFlags.doHeapProfile) { + initHeapProfiling(); + } } @@ -294,21 +249,6 @@ initProfilingLogFile(void) RtsFlags.ProfFlags.doHeapProfile = 0; return; } - - if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) { - /* dump the time, and the profiling interval */ - fprintf(prof_file, "\"%s\"\n", time_str()); - fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval); - - /* declare all the cost centres */ - { - CostCentre *cc; - for (cc = CC_LIST; cc != NULL; cc = cc->link) { - fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n", - CC_UQ, cc->ccID, cc->label, cc->module); - } - } - } } if (RtsFlags.ProfFlags.doHeapProfile) { @@ -329,40 +269,151 @@ initProfilingLogFile(void) void initTimeProfiling(void) { - /* Start ticking */ - startProfTimer(); + /* Start ticking */ + startProfTimer(); }; void endProfiling ( void ) { - if (RtsFlags.CcFlags.doCostCentres) { - stopProfTimer(); - } - if (RtsFlags.ProfFlags.doHeapProfile) { - endHeapProfiling(); - } + if (RtsFlags.CcFlags.doCostCentres) { + stopProfTimer(); + } + if (RtsFlags.ProfFlags.doHeapProfile) { + endHeapProfiling(); + } } /* ----------------------------------------------------------------------------- - Set cost centre stack when entering a function. + Set CCCS when entering a function. + + The algorithm is as follows. + + ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn + + where + + dropCommonPrefix A B + -- returns the suffix of B after removing any prefix common + -- to both A and B. + + e.g. + + <a,b,c> ++> <> = <a,b,c> + <a,b,c> ++> <d> = <a,b,c,d> + <a,b,c> ++> <a,b> = <a,b,c> + <a,b> ++> <a,b,c> = <a,b,c> + <a,b,c> ++> <a,b,d> = <a,b,c,d> + -------------------------------------------------------------------------- */ -rtsBool entering_PAP; -void -EnterFunCCS ( CostCentreStack *ccsfn ) +// implements c1 ++> c2, where c1 and c2 are equal depth +// +static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn) { - /* PAP_entry has already set CCCS for us */ - if (entering_PAP) { - entering_PAP = rtsFalse; - return; - } + ASSERT(ccs->depth == ccsfn->depth); + if (ccs == ccsfn) return; + enterFunEqualStacks(ccs->prevStack, ccsfn->prevStack); + CCCS = pushCostCentre(CCCS, ccsfn->cc); +} + +// implements c1 ++> c2, where c2 is deeper than c1. +// Drop elements of c2 until we have equal stacks, call +// enterFunEqualStacks(), and then push on the elements that we +// dropped in reverse order. +// +static void enterFunCurShorter (CostCentreStack *ccsfn, StgWord n) +{ + if (n == 0) { + ASSERT(ccsfn->depth == CCCS->depth); + enterFunEqualStacks(CCCS,ccsfn); + return; + } + enterFunCurShorter(ccsfn->prevStack, n-1); + CCCS = pushCostCentre(CCCS, ccsfn->cc); +} + +void enterFunCCS ( CostCentreStack *ccsfn ) +{ + // common case 1: both stacks are the same + if (ccsfn == CCCS) { + return; + } + + // common case 2: the function stack is empty, or just CAF + if (ccsfn->prevStack == CCS_MAIN) { + return; + } + + // common case 3: the stacks are completely different (e.g. one is a + // descendent of MAIN and the other of a CAF): we append the whole + // of the function stack to the current CCS. + if (ccsfn->root != CCCS->root) { + CCCS = appendCCS(CCCS,ccsfn); + return; + } + + // uncommon case 4: CCCS is deeper than ccsfn + if (CCCS->depth > ccsfn->depth) { + nat i, n; + CostCentreStack *tmp = CCCS; + n = CCCS->depth - ccsfn->depth; + for (i = 0; i < n; i++) { + tmp = tmp->prevStack; + } + enterFunEqualStacks(tmp,ccsfn); + return; + } + + // uncommon case 5: ccsfn is deeper than CCCS + if (ccsfn->depth > CCCS->depth) { + enterFunCurShorter(ccsfn, ccsfn->depth - CCCS->depth); + return; + } - if (ccsfn->root->is_caf == CC_IS_CAF) { - CCCS = AppendCCS(CCCS,ccsfn); - } else { - CCCS = ccsfn; - } + // uncommon case 6: stacks are equal depth, but different + enterFunEqualStacks(CCCS,ccsfn); +} + +/* ----------------------------------------------------------------------------- + Decide whether closures with this CCS should contribute to the heap + profile. + -------------------------------------------------------------------------- */ + +static void +ccsSetSelected (CostCentreStack *ccs) +{ + if (RtsFlags.ProfFlags.modSelector) { + if (! strMatchesSelector (ccs->cc->module, + RtsFlags.ProfFlags.modSelector) ) { + ccs->selected = 0; + return; + } + } + if (RtsFlags.ProfFlags.ccSelector) { + if (! strMatchesSelector (ccs->cc->label, + RtsFlags.ProfFlags.ccSelector) ) { + ccs->selected = 0; + return; + } + } + if (RtsFlags.ProfFlags.ccsSelector) { + CostCentreStack *c; + for (c = ccs; c != NULL; c = c->prevStack) + { + if ( strMatchesSelector (c->cc->label, + RtsFlags.ProfFlags.ccsSelector) ) { + break; + } + } + if (c == NULL) { + ccs->selected = 0; + return; + } + } + + ccs->selected = 1; + return; } /* ----------------------------------------------------------------------------- @@ -370,211 +421,192 @@ EnterFunCCS ( CostCentreStack *ccsfn ) -------------------------------------------------------------------------- */ #ifdef DEBUG -CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc ); +CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc ); CostCentreStack * -PushCostCentre ( CostCentreStack *ccs, CostCentre *cc ) -#define PushCostCentre _PushCostCentre +pushCostCentre ( CostCentreStack *ccs, CostCentre *cc ) +#define pushCostCentre _pushCostCentre { IF_DEBUG(prof, traceBegin("pushing %s on ", cc->label); debugCCS(ccs); traceEnd();); - return PushCostCentre(ccs,cc); + return pushCostCentre(ccs,cc); } #endif -CostCentreStack * -PushCostCentre ( CostCentreStack *ccs, CostCentre *cc ) -{ - CostCentreStack *temp_ccs; - - if (ccs == EMPTY_STACK) - return ActualPush(ccs,cc); - else { - if (ccs->cc == cc) - return ccs; - else { - /* check if we've already memoized this stack */ - temp_ccs = IsInIndexTable(ccs->indexTable,cc); - - if (temp_ccs != EMPTY_STACK) - return temp_ccs; - else { - temp_ccs = CheckLoop(ccs,cc); - if (temp_ccs != NULL) { - /* we have recursed to an older CCS. Mark this in - * the index table, and emit a "back edge" into the - * log file. - */ - ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1); - DecBackEdge(temp_ccs,ccs); - return temp_ccs; - } else { - return ActualPush(ccs,cc); - } - } - } - } -} - -static CostCentreStack * -CheckLoop ( CostCentreStack *ccs, CostCentre *cc ) -{ - while (ccs != EMPTY_STACK) { - if (ccs->cc == cc) - return ccs; - ccs = ccs->prevStack; - } - return NULL; -} - /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */ #ifdef DEBUG -CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ); +CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ); CostCentreStack * -AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) -#define AppendCCS _AppendCCS -{ - IF_DEBUG(prof, - if (ccs1 != ccs2) { - debugBelch("Appending "); - debugCCS(ccs1); - debugBelch(" to "); - debugCCS(ccs2); - debugBelch("\n");}); - return AppendCCS(ccs1,ccs2); +appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) +#define appendCCS _appendCCS +{ + IF_DEBUG(prof, + if (ccs1 != ccs2) { + debugBelch("Appending "); + debugCCS(ccs1); + debugBelch(" to "); + debugCCS(ccs2); + debugBelch("\n");}); + return appendCCS(ccs1,ccs2); } #endif CostCentreStack * -AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) +appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) { - CostCentreStack *ccs = NULL; + if (ccs1 == ccs2) { + return ccs1; + } + + if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) { + // stop at a CAF element + return ccs1; + } - if (ccs1 == ccs2) { - return ccs1; - } + return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc); +} - if (ccs2->cc->is_caf == CC_IS_CAF) { - return ccs1; - } - - if (ccs2->prevStack != NULL) { - ccs = AppendCCS(ccs1, ccs2->prevStack); - } +// Pick one: +// #define RECURSION_DROPS +#define RECURSION_TRUNCATES - return PushCostCentre(ccs,ccs2->cc); +CostCentreStack * +pushCostCentre (CostCentreStack *ccs, CostCentre *cc) +{ + CostCentreStack *temp_ccs; + + if (ccs == EMPTY_STACK) + return actualPush(ccs,cc); + else { + if (ccs->cc == cc) + return ccs; + else { + // check if we've already memoized this stack + temp_ccs = isInIndexTable(ccs->indexTable,cc); + + if (temp_ccs != EMPTY_STACK) + return temp_ccs; + else { + temp_ccs = checkLoop(ccs,cc); + if (temp_ccs != NULL) { + // This CC is already in the stack somewhere. + // This could be recursion, or just calling + // another function with the same CC. + // A number of policies are possible at this + // point, we implement two here: + // - truncate the stack to the previous instance + // of this CC + // - ignore this push, return the same stack. + // + CostCentreStack *new_ccs; +#if defined(RECURSION_TRUNCATES) + new_ccs = temp_ccs; +#else // defined(RECURSION_DROPS) + new_ccs = ccs; +#endif + ccs->indexTable = addToIndexTable (ccs->indexTable, + new_ccs, cc, 1); + return new_ccs; + } else { + return actualPush (ccs,cc); + } + } + } + } } static CostCentreStack * -ActualPush ( CostCentreStack *ccs, CostCentre *cc ) +checkLoop (CostCentreStack *ccs, CostCentre *cc) { - CostCentreStack *new_ccs; - - /* allocate space for a new CostCentreStack */ - new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack)); - - return ActualPush_(ccs, cc, new_ccs); + while (ccs != EMPTY_STACK) { + if (ccs->cc == cc) + return ccs; + ccs = ccs->prevStack; + } + return NULL; } static CostCentreStack * -ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs ) +actualPush (CostCentreStack *ccs, CostCentre *cc) { - /* assign values to each member of the structure */ - new_ccs->ccsID = CCS_ID++; - new_ccs->cc = cc; - new_ccs->prevStack = ccs; - - new_ccs->indexTable = EMPTY_TABLE; - - /* Initialise the various _scc_ counters to zero - */ - new_ccs->scc_count = 0; - - /* Initialize all other stats here. There should be a quick way - * that's easily used elsewhere too - */ - new_ccs->time_ticks = 0; - new_ccs->mem_alloc = 0; - new_ccs->inherited_ticks = 0; - new_ccs->inherited_alloc = 0; - - new_ccs->root = ccs->root; + CostCentreStack *new_ccs; - // Set the selected field. - ccsSetSelected(new_ccs); + // allocate space for a new CostCentreStack + new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack)); - /* update the memoization table for the parent stack */ - if (ccs != EMPTY_STACK) - ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc, - 0/*not a back edge*/); - - /* make sure this CC is declared at the next heap/time sample */ - DecCCS(new_ccs); - - /* return a pointer to the new stack */ - return new_ccs; + return actualPush_(ccs, cc, new_ccs); } - static CostCentreStack * -IsInIndexTable(IndexTable *it, CostCentre *cc) +actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs) { - while (it!=EMPTY_TABLE) - { - if (it->cc==cc) - return it->ccs; - else - it = it->next; + /* assign values to each member of the structure */ + new_ccs->ccsID = CCS_ID++; + new_ccs->cc = cc; + new_ccs->prevStack = ccs; + new_ccs->root = ccs->root; + new_ccs->depth = ccs->depth + 1; + + new_ccs->indexTable = EMPTY_TABLE; + + /* Initialise the various _scc_ counters to zero + */ + new_ccs->scc_count = 0; + + /* Initialize all other stats here. There should be a quick way + * that's easily used elsewhere too + */ + new_ccs->time_ticks = 0; + new_ccs->mem_alloc = 0; + new_ccs->inherited_ticks = 0; + new_ccs->inherited_alloc = 0; + + // Set the selected field. + ccsSetSelected(new_ccs); + + /* update the memoization table for the parent stack */ + if (ccs != EMPTY_STACK) { + ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc, + 0/*not a back edge*/); } - - /* otherwise we never found it so return EMPTY_TABLE */ - return EMPTY_TABLE; + + /* return a pointer to the new stack */ + return new_ccs; } -static IndexTable * -AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, - CostCentre *cc, unsigned int back_edge) +static CostCentreStack * +isInIndexTable(IndexTable *it, CostCentre *cc) { - IndexTable *new_it; - - new_it = arenaAlloc(prof_arena, sizeof(IndexTable)); + while (it!=EMPTY_TABLE) + { + if (it->cc == cc) + return it->ccs; + else + it = it->next; + } - new_it->cc = cc; - new_it->ccs = new_ccs; - new_it->next = it; - new_it->back_edge = back_edge; - return new_it; + /* otherwise we never found it so return EMPTY_TABLE */ + return EMPTY_TABLE; } -static void -DecCCS(CostCentreStack *ccs) +static IndexTable * +addToIndexTable (IndexTable *it, CostCentreStack *new_ccs, + CostCentre *cc, unsigned int back_edge) { - if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) { - if (ccs->prevStack == EMPTY_STACK) - fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ, - ccs->ccsID, ccs->cc->ccID); - else - fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ, - ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID); - } -} + IndexTable *new_it; -static void -DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs ) -{ - if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) { - if (ccs->prevStack == EMPTY_STACK) - fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ, - ccs->ccsID, ccs->cc->ccID); - else - fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ, - ccs->ccsID, ccs->cc->ccID, oldccs->ccsID); - } + new_it = arenaAlloc(prof_arena, sizeof(IndexTable)); + + new_it->cc = cc; + new_it->ccs = new_ccs; + new_it->next = it; + new_it->back_edge = back_edge; + return new_it; } /* ----------------------------------------------------------------------------- @@ -585,12 +617,13 @@ DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs ) * reports, so as not to cause confusion. */ static rtsBool -cc_to_ignore (CostCentre *cc) +ignoreCC (CostCentre *cc) { - if ( cc == CC_OVERHEAD + if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL && + ( cc == CC_OVERHEAD || cc == CC_DONT_CARE || cc == CC_GC - || cc == CC_SYSTEM) { + || cc == CC_SYSTEM)) { return rtsTrue; } else { return rtsFalse; @@ -598,13 +631,14 @@ cc_to_ignore (CostCentre *cc) } static rtsBool -ccs_to_ignore (CostCentreStack *ccs) +ignoreCCS (CostCentreStack *ccs) { - if ( ccs == CCS_OVERHEAD - || ccs == CCS_DONT_CARE - || ccs == CCS_GC - || ccs == CCS_SYSTEM) { - return rtsTrue; + if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL && + ( ccs == CCS_OVERHEAD + || ccs == CCS_DONT_CARE + || ccs == CCS_GC + || ccs == CCS_SYSTEM)) { + return rtsTrue; } else { return rtsFalse; } @@ -617,88 +651,89 @@ ccs_to_ignore (CostCentreStack *ccs) static CostCentre *sorted_cc_list; static void -aggregate_cc_costs( CostCentreStack *ccs ) +aggregateCCCosts( CostCentreStack *ccs ) { - IndexTable *i; + IndexTable *i; - ccs->cc->mem_alloc += ccs->mem_alloc; - ccs->cc->time_ticks += ccs->time_ticks; + ccs->cc->mem_alloc += ccs->mem_alloc; + ccs->cc->time_ticks += ccs->time_ticks; - for (i = ccs->indexTable; i != 0; i = i->next) { - if (!i->back_edge) { - aggregate_cc_costs(i->ccs); + for (i = ccs->indexTable; i != 0; i = i->next) { + if (!i->back_edge) { + aggregateCCCosts(i->ccs); + } } - } } static void -insert_cc_in_sorted_list( CostCentre *new_cc ) +insertCCInSortedList( CostCentre *new_cc ) { - CostCentre **prev, *cc; + CostCentre **prev, *cc; - prev = &sorted_cc_list; - for (cc = sorted_cc_list; cc != NULL; cc = cc->link) { - if (new_cc->time_ticks > cc->time_ticks) { - new_cc->link = cc; - *prev = new_cc; - return; - } else { - prev = &(cc->link); + prev = &sorted_cc_list; + for (cc = sorted_cc_list; cc != NULL; cc = cc->link) { + if (new_cc->time_ticks > cc->time_ticks) { + new_cc->link = cc; + *prev = new_cc; + return; + } else { + prev = &(cc->link); + } } - } - new_cc->link = NULL; - *prev = new_cc; + new_cc->link = NULL; + *prev = new_cc; } static void -report_per_cc_costs( void ) +reportPerCCCosts( void ) { - CostCentre *cc, *next; - nat max_label_len, max_module_len; + CostCentre *cc, *next; + nat max_label_len, max_module_len; - aggregate_cc_costs(CCS_MAIN); - sorted_cc_list = NULL; + aggregateCCCosts(CCS_MAIN); + sorted_cc_list = NULL; - max_label_len = max_module_len = 0; + max_label_len = 11; // no shorter than the "COST CENTRE" header + max_module_len = 7; // no shorter than the "MODULE" header - for (cc = CC_LIST; cc != NULL; cc = next) { - next = cc->link; - if (cc->time_ticks > total_prof_ticks/100 - || cc->mem_alloc > total_alloc/100 - || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) { - insert_cc_in_sorted_list(cc); - - max_label_len = stg_max(strlen(cc->label), max_label_len); - max_module_len = stg_max(strlen(cc->module), max_module_len); + for (cc = CC_LIST; cc != NULL; cc = next) { + next = cc->link; + if (cc->time_ticks > total_prof_ticks/100 + || cc->mem_alloc > total_alloc/100 + || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) { + insertCCInSortedList(cc); + + max_label_len = stg_max(strlen(cc->label), max_label_len); + max_module_len = stg_max(strlen(cc->module), max_module_len); + } } - } - - fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE"); - fprintf(prof_file, "%6s %6s", "%time", "%alloc"); - if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { - fprintf(prof_file, " %5s %9s", "ticks", "bytes"); - } - fprintf(prof_file, "\n\n"); - - for (cc = sorted_cc_list; cc != NULL; cc = cc->link) { - if (cc_to_ignore(cc)) { - continue; - } - fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module); - fprintf(prof_file, "%6.1f %6.1f", - total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100), - total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) - total_alloc * 100) - ); - - if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { - fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64, - (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_)); - } - fprintf(prof_file, "\n"); - } - fprintf(prof_file,"\n\n"); + fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE"); + fprintf(prof_file, "%6s %6s", "%time", "%alloc"); + if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { + fprintf(prof_file, " %5s %9s", "ticks", "bytes"); + } + fprintf(prof_file, "\n\n"); + + for (cc = sorted_cc_list; cc != NULL; cc = cc->link) { + if (ignoreCC(cc)) { + continue; + } + fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module); + fprintf(prof_file, "%6.1f %6.1f", + total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100), + total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) + total_alloc * 100) + ); + + if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { + fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64, + (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_)); + } + fprintf(prof_file, "\n"); + } + + fprintf(prof_file,"\n\n"); } /* ----------------------------------------------------------------------------- @@ -706,22 +741,18 @@ report_per_cc_costs( void ) -------------------------------------------------------------------------- */ static void -fprint_header( nat max_label_len, nat max_module_len ) +fprintHeader( nat max_label_len, nat max_module_len ) { - fprintf(prof_file, "%-24s %-10s individual inherited\n", "", ""); + fprintf(prof_file, "%-*s %-*s%6s %11s %11s %11s\n", max_label_len, "", max_module_len, "", "", "", "individual", "inherited"); - fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE"); - fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc"); + fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE"); + fprintf(prof_file, "%6s %11s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc"); - if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { - fprintf(prof_file, " %5s %9s", "ticks", "bytes"); -#if defined(PROFILING_DETAIL_COUNTS) - fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s", - "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub"); -#endif - } + if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { + fprintf(prof_file, " %5s %9s", "ticks", "bytes"); + } - fprintf(prof_file, "\n\n"); + fprintf(prof_file, "\n\n"); } void @@ -734,17 +765,9 @@ reportCCSProfiling( void ) total_prof_ticks = 0; total_alloc = 0; - count_ticks(CCS_MAIN); + countTickss(CCS_MAIN); - switch (RtsFlags.CcFlags.doCostCentres) { - case 0: - return; - case COST_CENTRES_XML: - gen_XML_logfile(); - return; - default: - break; - } + if (RtsFlags.CcFlags.doCostCentres == 0) return; fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n", time_str(), "Final"); @@ -769,92 +792,83 @@ reportCCSProfiling( void ) showStgWord64(total_alloc * sizeof(W_), temp, rtsTrue/*commas*/)); -#if defined(PROFILING_DETAIL_COUNTS) - fprintf(prof_file, " (%lu closures)", total_allocs); -#endif fprintf(prof_file, " (excludes profiling overheads)\n\n"); - report_per_cc_costs(); + reportPerCCCosts(); - inherit_costs(CCS_MAIN); + inheritCosts(CCS_MAIN); reportCCS(pruneCCSTree(CCS_MAIN)); } static void findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) { - CostCentre *cc; - IndexTable *i; - - cc = ccs->cc; - - *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label)); - *max_module_len = stg_max(*max_module_len, strlen(cc->module)); - - for (i = ccs->indexTable; i != 0; i = i->next) { - if (!i->back_edge) { - findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len); + CostCentre *cc; + IndexTable *i; + + cc = ccs->cc; + + *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label)); + *max_module_len = stg_max(*max_module_len, strlen(cc->module)); + + for (i = ccs->indexTable; i != 0; i = i->next) { + if (!i->back_edge) { + findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len); + } } - } } static void logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len) { - CostCentre *cc; - IndexTable *i; + CostCentre *cc; + IndexTable *i; - cc = ccs->cc; - - /* Only print cost centres with non 0 data ! */ - - if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL || - ! ccs_to_ignore(ccs)) - /* force printing of *all* cost centres if -P -P */ + cc = ccs->cc; + + /* Only print cost centres with non 0 data ! */ + + if (!ignoreCCS(ccs)) + /* force printing of *all* cost centres if -Pa */ { - fprintf(prof_file, "%-*s%-*s %-*s", - indent, "", max_label_len-indent, cc->label, max_module_len, cc->module); + fprintf(prof_file, "%-*s%-*s %-*s", + indent, "", max_label_len-indent, cc->label, max_module_len, cc->module); - fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f", - ccs->ccsID, (double) ccs->scc_count, - total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0), - total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0), - total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0), - total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0) + fprintf(prof_file, "%6ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f", + ccs->ccsID, ccs->scc_count, + total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0), + total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0), + total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0), + total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0) ); - if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { - fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64, - (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_)); -#if defined(PROFILING_DETAIL_COUNTS) - fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld", - ccs->mem_allocs, ccs->thunk_count, - ccs->function_count, ccs->pap_count, - ccs->subsumed_fun_count, ccs->subsumed_caf_count, - ccs->caffun_subsumed); -#endif + if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { + fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64, + (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_)); + } + fprintf(prof_file, "\n"); } - fprintf(prof_file, "\n"); - } - for (i = ccs->indexTable; i != 0; i = i->next) { - if (!i->back_edge) { - logCCS(i->ccs, indent+1, max_label_len, max_module_len); + for (i = ccs->indexTable; i != 0; i = i->next) { + if (!i->back_edge) { + logCCS(i->ccs, indent+1, max_label_len, max_module_len); + } } - } } static void reportCCS(CostCentreStack *ccs) { - nat max_label_len, max_module_len; - max_label_len = max_module_len = 0; - - findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len); - - fprint_header(max_label_len, max_module_len); - logCCS(ccs, 0, max_label_len, max_module_len); + nat max_label_len, max_module_len; + + max_label_len = 11; // no shorter than "COST CENTRE" header + max_module_len = 7; // no shorter than "MODULE" header + + findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len); + + fprintHeader(max_label_len, max_module_len); + logCCS(ccs, 0, max_label_len, max_module_len); } @@ -862,138 +876,176 @@ reportCCS(CostCentreStack *ccs) * ticks/allocations. */ static void -count_ticks(CostCentreStack *ccs) +countTickss(CostCentreStack *ccs) { - IndexTable *i; - - if (!ccs_to_ignore(ccs)) { - total_alloc += ccs->mem_alloc; - total_prof_ticks += ccs->time_ticks; - } - for (i = ccs->indexTable; i != NULL; i = i->next) - if (!i->back_edge) { - count_ticks(i->ccs); + IndexTable *i; + + if (!ignoreCCS(ccs)) { + total_alloc += ccs->mem_alloc; + total_prof_ticks += ccs->time_ticks; } + for (i = ccs->indexTable; i != NULL; i = i->next) + if (!i->back_edge) { + countTickss(i->ccs); + } } /* Traverse the cost centre stack tree and inherit ticks & allocs. */ static void -inherit_costs(CostCentreStack *ccs) +inheritCosts(CostCentreStack *ccs) { - IndexTable *i; + IndexTable *i; - if (ccs_to_ignore(ccs)) { return; } + if (ignoreCCS(ccs)) { return; } - ccs->inherited_ticks += ccs->time_ticks; - ccs->inherited_alloc += ccs->mem_alloc; + ccs->inherited_ticks += ccs->time_ticks; + ccs->inherited_alloc += ccs->mem_alloc; - for (i = ccs->indexTable; i != NULL; i = i->next) - if (!i->back_edge) { - inherit_costs(i->ccs); - ccs->inherited_ticks += i->ccs->inherited_ticks; - ccs->inherited_alloc += i->ccs->inherited_alloc; - } - - return; + for (i = ccs->indexTable; i != NULL; i = i->next) + if (!i->back_edge) { + inheritCosts(i->ccs); + ccs->inherited_ticks += i->ccs->inherited_ticks; + ccs->inherited_alloc += i->ccs->inherited_alloc; + } + + return; } +// +// Prune CCSs with zero entries, zero ticks or zero allocation from +// the tree, unless COST_CENTRES_ALL is on. +// static CostCentreStack * -pruneCCSTree( CostCentreStack *ccs ) +pruneCCSTree (CostCentreStack *ccs) { - CostCentreStack *ccs1; - IndexTable *i, **prev; - - prev = &ccs->indexTable; - for (i = ccs->indexTable; i != 0; i = i->next) { - if (i->back_edge) { continue; } + CostCentreStack *ccs1; + IndexTable *i, **prev; + + prev = &ccs->indexTable; + for (i = ccs->indexTable; i != 0; i = i->next) { + if (i->back_edge) { continue; } + + ccs1 = pruneCCSTree(i->ccs); + if (ccs1 == NULL) { + *prev = i->next; + } else { + prev = &(i->next); + } + } + + if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL + /* force printing of *all* cost centres if -P -P */ ) - ccs1 = pruneCCSTree(i->ccs); - if (ccs1 == NULL) { - *prev = i->next; + || ( ccs->indexTable != 0 ) + || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc ) + ) { + return ccs; } else { - prev = &(i->next); + return NULL; } - } - - if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL - /* force printing of *all* cost centres if -P -P */ ) - - || ( ccs->indexTable != 0 ) - || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc ) - ) { - return ccs; - } else { - return NULL; - } } -/* ----------------------------------------------------------------------------- - Generate the XML time/allocation profile - -------------------------------------------------------------------------- */ - void -gen_XML_logfile( void ) +fprintCCS( FILE *f, CostCentreStack *ccs ) { - fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks); - - reportCCS_XML(pruneCCSTree(CCS_MAIN)); - - fprintf(prof_file, " 0\n"); + fprintf(f,"<"); + for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { + fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label); + if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { + fprintf(f,","); + } + } + fprintf(f,">"); } -static void -reportCCS_XML(CostCentreStack *ccs) +// Returns: True if the call stack ended with CAF +static rtsBool fprintCallStack (CostCentreStack *ccs) { - CostCentre *cc; - IndexTable *i; - - if (ccs_to_ignore(ccs)) { return; } - - cc = ccs->cc; - - fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64, - ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc); - - for (i = ccs->indexTable; i != 0; i = i->next) { - if (!i->back_edge) { - reportCCS_XML(i->ccs); + CostCentreStack *prev; + + fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label); + prev = ccs->prevStack; + while (prev && prev != CCS_MAIN) { + ccs = prev; + fprintf(stderr, ",\n called from %s.%s", + ccs->cc->module, ccs->cc->label); + prev = ccs->prevStack; } - } -} + fprintf(stderr, "\n"); -void -fprintCCS( FILE *f, CostCentreStack *ccs ) -{ - fprintf(f,"<"); - for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { - fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label); - if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { - fprintf(f,","); - } - } - fprintf(f,">"); + return (!strncmp(ccs->cc->label, "CAF", 3)); } /* For calling from .cmm code, where we can't reliably refer to stderr */ void -fprintCCS_stderr( CostCentreStack *ccs ) +fprintCCS_stderr (CostCentreStack *ccs, StgTSO *tso) { - fprintCCS(stderr, ccs); + rtsBool is_caf; + StgPtr frame; + StgStack *stack; + CostCentreStack *prev_ccs; + nat depth = 0; + const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks + + fprintf(stderr, "*** Exception raised (reporting due to +RTS -xc), stack trace:\n "); + is_caf = fprintCallStack(ccs); + + // traverse the stack down to the enclosing update frame to + // find out where this CCS was evaluated from... + + stack = tso->stackobj; + frame = stack->sp; + prev_ccs = ccs; + + for (; is_caf && depth < MAX_DEPTH; depth++) + { + switch (get_itbl((StgClosure*)frame)->type) + { + case UPDATE_FRAME: + ccs = ((StgUpdateFrame*)frame)->header.prof.ccs; + frame += sizeofW(StgUpdateFrame); + if (ccs == CCS_MAIN) { + goto done; + } + if (ccs == prev_ccs) { + // ignore if this is the same as the previous stack, + // we're probably in library code and haven't + // accumulated any more interesting stack items + // since the last update frame. + break; + } + prev_ccs = ccs; + fprintf(stderr, " --> evaluated by: "); + is_caf = fprintCallStack(ccs); + break; + case UNDERFLOW_FRAME: + stack = ((StgUnderflowFrame*)frame)->next_chunk; + frame = stack->sp; + break; + case STOP_FRAME: + goto done; + default: + frame += stack_frame_sizeW((StgClosure*)frame); + break; + } + } +done: + return; } #ifdef DEBUG void debugCCS( CostCentreStack *ccs ) { - debugBelch("<"); - for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { - debugBelch("%s.%s", ccs->cc->module, ccs->cc->label); - if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { - debugBelch(","); - } - } - debugBelch(">"); + debugBelch("<"); + for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { + debugBelch("%s.%s", ccs->cc->module, ccs->cc->label); + if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { + debugBelch(","); + } + } + debugBelch(">"); } #endif /* DEBUG */ diff --git a/rts/Profiling.h b/rts/Profiling.h index 3e365fe536..2ee3311c81 100644 --- a/rts/Profiling.h +++ b/rts/Profiling.h @@ -30,13 +30,12 @@ extern FILE *hp_file; #ifdef PROFILING -void gen_XML_logfile ( void ); void reportCCSProfiling ( void ); void PrintNewStackDecls ( void ); void fprintCCS( FILE *f, CostCentreStack *ccs ); -void fprintCCS_stderr( CostCentreStack *ccs ); +void fprintCCS_stderr (CostCentreStack *ccs, StgTSO *tso); #ifdef DEBUG void debugCCS( CostCentreStack *ccs ); diff --git a/rts/Proftimer.c b/rts/Proftimer.c index dfcc709625..82838184b7 100644 --- a/rts/Proftimer.c +++ b/rts/Proftimer.c @@ -65,11 +65,13 @@ initProfTimer( void ) startHeapProfTimer(); } +nat total_ticks = 0; void handleProfTick(void) { #ifdef PROFILING + total_ticks++; if (do_prof_ticks) { CCCS->time_ticks++; } diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 775505f887..acc87b1938 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -739,7 +739,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, */ if (RtsFlags.ProfFlags.showCCSOnException) { - fprintCCS_stderr(tso->prof.CCCS); + fprintCCS_stderr(tso->prof.CCCS,tso); } #endif // ASSUMES: the thread is not already complete or dead diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 7009ea23a6..d2b4945c19 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -258,14 +258,12 @@ usage_text[] = { " -B Sound the bell at the start of each garbage collection", #if defined(PROFILING) "", -" -px Time/allocation profile (XML) (output file <program>.prof)", " -p Time/allocation profile (output file <program>.prof)", " -P More detailed Time/Allocation profile", " -Pa Give information about *all* cost centres", # if defined(PROFILING) "", -" -hx Heap residency profile (XML) (output file <program>.prof)", " -h<break-down> Heap residency profile (hp2ps) (output file <program>.hp)", " break-down: c = cost centre stack (default)", " m = module", @@ -936,10 +934,7 @@ error = rtsTrue; OPTION_SAFE; PROFILING_BUILD_ONLY( switch (rts_argv[arg][2]) { - case 'x': - RtsFlags.CcFlags.doCostCentres = COST_CENTRES_XML; - break; - case 'a': + case 'a': RtsFlags.CcFlags.doCostCentres = COST_CENTRES_ALL; break; default: diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index b4a037d5d6..26f24f6f39 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -36,6 +36,19 @@ INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, P_ unused) } /* ---------------------------------------------------------------------------- + Restore a saved cost centre + ------------------------------------------------------------------------- */ + +INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ cccs) +{ +#if defined(PROFILING) + W_[CCCS] = Sp(1); +#endif + Sp_adj(2); + jump %ENTRY_CODE(Sp(0)); +} + +/* ---------------------------------------------------------------------------- Support for the bytecode interpreter. ------------------------------------------------------------------------- */ @@ -226,9 +239,6 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") LDV_ENTER(R1); - /* Enter PAP cost centre */ - ENTER_CCS_PAP_CL(R1); - /* For ticky-ticky, change the perm_ind to a normal ind on first * entry, so the number of ent_perm_inds is the number of *thunks* * entered again, not the number of subsequent entries. diff --git a/rts/Updates.cmm b/rts/Updates.cmm index 789bdd5e53..0b43b9cdf1 100644 --- a/rts/Updates.cmm +++ b/rts/Updates.cmm @@ -41,7 +41,7 @@ INFO_TABLE_RET( stg_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS) W_ updatee; updatee = StgUpdateFrame_updatee(Sp); - + /* remove the update frame from the stack */ Sp = Sp + SIZEOF_StgUpdateFrame; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 82e89a5470..1dad6c8df0 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -607,6 +607,9 @@ allocate (Capability *cap, lnat n) bdescr *bd; StgPtr p; + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; @@ -638,9 +641,6 @@ allocate (Capability *cap, lnat n) /* small allocation (<LARGE_OBJECT_THRESHOLD) */ - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - bd = cap->r.rCurrentAlloc; if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs index d9e6041a61..2ffa81bb76 100644 --- a/utils/genapply/GenApply.hs +++ b/utils/genapply/GenApply.hs @@ -230,8 +230,10 @@ genMkPAP regstatus macro jump ticker disamb else empty, if is_fun_case then mb_tag_node arity else empty, - text "jump " <> text jump <> semi - ]) $$ + if overflow_regs + then text "jump_SAVE_CCCS" <> parens (text jump) <> semi + else text "jump " <> text jump <> semi + ]) $$ text "}" where @@ -280,18 +282,37 @@ genMkPAP regstatus macro jump ticker disamb loadSpWordOff "W_" 0 <> text " = " <> mkApplyInfoName rest_args <> semi - shuffle_extra_args - = vcat (map shuffle_down - [sp_stk_args .. sp_stk_args+stack_args_size-1]) $$ - loadSpWordOff "W_" (sp_stk_args+stack_args_size-1) - <> text " = " - <> mkApplyInfoName rest_args <> semi $$ - text "Sp_adj(" <> int (sp_stk_args - 1) <> text ");" - - shuffle_down i = - loadSpWordOff "W_" (i-1) <> text " = " <> + shuffle_extra_args + = vcat [text "#ifdef PROFILING", + shuffle True, + text "#else", + shuffle False, + text "#endif"] + where + -- Sadly here we have to insert an stg_restore_cccs frame + -- just underneath the stg_ap_*_info frame if we're + -- profiling; see Note [jump_SAVE_CCCS] + shuffle prof = + let offset = if prof then 2 else 0 in + vcat (map (shuffle_down (offset+1)) + [sp_stk_args .. sp_stk_args+stack_args_size-1]) $$ + (if prof + then + loadSpWordOff "W_" (sp_stk_args+stack_args_size-3) + <> text " = stg_restore_cccs_info;" $$ + loadSpWordOff "W_" (sp_stk_args+stack_args_size-2) + <> text " = W_[CCCS];" + else empty) $$ + loadSpWordOff "W_" (sp_stk_args+stack_args_size-1) + <> text " = " + <> mkApplyInfoName rest_args <> semi $$ + text "Sp_adj(" <> int (sp_stk_args - 1 - offset) <> text ");" + + shuffle_down j i = + loadSpWordOff "W_" (i-j) <> text " = " <> loadSpWordOff "W_" i <> semi + -- The EXACT ARITY case -- -- if (arity == 1) { @@ -357,6 +378,21 @@ genMkPAP regstatus macro jump ticker disamb = assignRegs regstatus stk_args_slow_offset args -- BUILD_PAP assumes args start at offset 1 +-- Note [jump_SAVE_CCCS] + +-- when profiling, if we have some extra arguments to apply that we +-- save to the stack, we must also save the current cost centre stack +-- and restore it when applying the extra arguments. This is all +-- handled by the macro jump_SAVE_CCCS(target), defined in +-- rts/AutoApply.h. +-- +-- At the jump, the stack will look like this: +-- +-- ... extra args ... +-- stg_ap_pp_info +-- CCCS +-- stg_restore_cccs_info + -- -------------------------------------- -- Examine tag bits of function pointer and enter it -- directly if needed. @@ -579,8 +615,9 @@ genApply regstatus args = -- overwritten by an indirection, so we must enter the original -- info pointer we read, don't read it again, because it might -- not be enterable any more. - text "jump %ENTRY_CODE(info);", - text "" + text "jump_SAVE_CCCS(%ENTRY_CODE(info));", + -- see Note [jump_SAVE_CCCS] + text "" ]), text "}", |