diff options
-rw-r--r-- | compiler/cmm/CLabel.hs | 51 | ||||
-rw-r--r-- | compiler/cmm/CmmType.hs | 6 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmArgRep.hs | 135 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 43 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmClosure.hs | 5 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmEnv.hs | 6 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 10 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmHeap.hs | 5 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 84 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmTicky.hs | 669 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 5 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 1 | ||||
-rw-r--r-- | compiler/main/DynFlags.hs | 6 | ||||
-rw-r--r-- | docs/users_guide/profiling.xml | 164 | ||||
-rw-r--r-- | includes/Cmm.h | 41 | ||||
-rw-r--r-- | includes/rts/Ticky.h | 2 | ||||
-rw-r--r-- | includes/stg/Ticky.h | 48 | ||||
-rw-r--r-- | mk/ways.mk | 4 | ||||
-rw-r--r-- | rts/AutoApply.h | 6 | ||||
-rw-r--r-- | rts/Exception.cmm | 10 | ||||
-rw-r--r-- | rts/Linker.c | 34 | ||||
-rw-r--r-- | rts/RaiseAsync.c | 6 | ||||
-rw-r--r-- | rts/Ticky.c | 125 | ||||
-rw-r--r-- | rts/sm/Storage.c | 4 | ||||
-rw-r--r-- | utils/deriveConstants/DeriveConstants.hs | 1 |
25 files changed, 829 insertions, 642 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index ebc9e53c72..8fe8c3c874 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -61,7 +61,7 @@ module CLabel ( mkCAFBlackHoleInfoTableLabel, mkCAFBlackHoleEntryLabel, mkRtsPrimOpLabel, - mkRtsSlowTickyCtrLabel, + mkRtsSlowFastTickyCtrLabel, mkSelectorInfoLabel, mkSelectorEntryLabel, @@ -99,7 +99,7 @@ module CLabel ( isCFunctionLabel, isGcPtrLabel, labelDynamic, -- * Conversions - toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, toRednCountsLbl, + toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, toRednCountsLbl, hasHaskellName, pprCLabel ) where @@ -313,7 +313,7 @@ data RtsLabelInfo | RtsPrimOp PrimOp | RtsApFast FastString -- ^ _fast versions of generic apply - | RtsSlowTickyCtr String + | RtsSlowFastTickyCtr String deriving (Eq, Ord) -- NOTE: Eq on LitString compares the pointer only, so this isn't @@ -356,9 +356,10 @@ mkTopSRTLabel :: Unique -> CLabel mkTopSRTLabel u = SRTLabel u mkSRTLabel :: Name -> CafInfo -> CLabel -mkRednCountsLabel :: Name -> CafInfo -> CLabel +mkRednCountsLabel :: Name -> CLabel mkSRTLabel name c = IdLabel name c SRT -mkRednCountsLabel name c = IdLabel name c RednCounts +mkRednCountsLabel name = + IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE] -- These have local & (possibly) external variants: mkLocalClosureLabel :: Name -> CafInfo -> CLabel @@ -503,8 +504,8 @@ mkCCSLabel ccs = CCS_Label ccs mkRtsApFastLabel :: FastString -> CLabel mkRtsApFastLabel str = RtsLabel (RtsApFast str) -mkRtsSlowTickyCtrLabel :: String -> CLabel -mkRtsSlowTickyCtrLabel pat = RtsLabel (RtsSlowTickyCtr pat) +mkRtsSlowFastTickyCtrLabel :: String -> CLabel +mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat) -- Constructing Code Coverage Labels @@ -549,10 +550,6 @@ toSlowEntryLbl :: CLabel -> CLabel toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l) -toRednCountsLbl :: CLabel -> CLabel -toRednCountsLbl (IdLabel n c _) = IdLabel n c RednCounts -toRednCountsLbl l = pprPanic "toRednCountsLbl" (ppr l) - toEntryLbl :: CLabel -> CLabel toEntryLbl (IdLabel n c LocalInfoTable) = IdLabel n c LocalEntry toEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry @@ -574,12 +571,38 @@ toInfoLbl (CmmLabel m str CmmEntry) = CmmLabel m str CmmInfo toInfoLbl (CmmLabel m str CmmRet) = CmmLabel m str CmmRetInfo toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l) +toRednCountsLbl :: CLabel -> Maybe CLabel +toRednCountsLbl = fmap mkRednCountsLabel . hasHaskellName + +hasHaskellName :: CLabel -> Maybe Name +hasHaskellName (IdLabel n _ _) = Just n +hasHaskellName _ = Nothing + -- ----------------------------------------------------------------------------- --- Does a CLabel refer to a CAF? +-- Does a CLabel's referent itself refer to a CAF? hasCAF :: CLabel -> Bool +hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE] hasCAF (IdLabel _ MayHaveCafRefs _) = True hasCAF _ = False +-- Note [ticky for LNE] +-- ~~~~~~~~~~~~~~~~~~~~~ + +-- Until 14 Feb 2013, every ticky counter was associated with a +-- closure. Thus, ticky labels used IdLabel. It is odd that +-- CmmBuildInfoTables.cafTransfers would consider such a ticky label +-- reason to add the name to the CAFEnv (and thus eventually the SRT), +-- but it was harmless because the ticky was only used if the closure +-- was also. +-- +-- Since we now have ticky counters for LNEs, it is no longer the case +-- that every ticky counter has an actual closure. So I changed the +-- generation of ticky counters' CLabels to not result in their +-- associated id ending up in the SRT. +-- +-- NB IdLabel is still appropriate for ticky ids (as opposed to +-- CmmLabel) because the LNE's counter is still related to an .hs Id, +-- that Id just isn't for a proper closure. -- ----------------------------------------------------------------------------- -- Does a CLabel need declaring before use or not? @@ -1051,8 +1074,8 @@ pprCLbl (CmmLabel _ fs CmmClosure) pprCLbl (RtsLabel (RtsPrimOp primop)) = ptext (sLit "stg_") <> ppr primop -pprCLbl (RtsLabel (RtsSlowTickyCtr pat)) - = ptext (sLit "SLOW_CALL_") <> text pat <> ptext (sLit "_ctr") +pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat)) + = ptext (sLit "SLOW_CALL_fast_") <> text pat <> ptext (sLit "_ctr") pprCLbl (ForeignLabel str _ _ _) = ftext str diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs index 49a2dc1107..8eac8c9e85 100644 --- a/compiler/cmm/CmmType.hs +++ b/compiler/cmm/CmmType.hs @@ -15,6 +15,7 @@ module CmmType , rEP_CostCentreStack_mem_alloc , rEP_CostCentreStack_scc_count , rEP_StgEntCounter_allocs + , rEP_StgEntCounter_allocd , ForeignHint(..) @@ -337,6 +338,11 @@ rEP_StgEntCounter_allocs dflags = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc)) where pc = sPlatformConstants (settings dflags) +rEP_StgEntCounter_allocd :: DynFlags -> CmmType +rEP_StgEntCounter_allocd dflags + = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc)) + where pc = sPlatformConstants (settings dflags) + ------------------------------------------------------------------------- {- Note [Signed vs unsigned] ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs new file mode 100644 index 0000000000..bd228d4617 --- /dev/null +++ b/compiler/codeGen/StgCmmArgRep.hs @@ -0,0 +1,135 @@ +----------------------------------------------------------------------------- +-- +-- Argument representations used in StgCmmLayout. +-- +-- (c) The University of Glasgow 2013 +-- +----------------------------------------------------------------------------- + +module StgCmmArgRep ( + ArgRep(..), toArgRep, argRepSizeW, + + argRepString, isNonV, idArgRep, + + slowCallPattern, + + ) where + +import StgCmmClosure ( idPrimRep ) + +import SMRep ( WordOff ) +import Id ( Id ) +import TyCon ( PrimRep(..), primElemRepSizeB ) +import BasicTypes ( RepArity ) +import Constants ( wORD64_SIZE ) +import DynFlags + +import Outputable +import FastString + +-- I extricated this code as this new module in order to avoid a +-- cyclic dependency between StgCmmLayout and StgCmmTicky. +-- +-- NSF 18 Feb 2013 + +------------------------------------------------------------------------- +-- Classifying arguments: ArgRep +------------------------------------------------------------------------- + +-- ArgRep is re-exported by StgCmmLayout, but only for use in the +-- byte-code generator which also needs to know about the +-- classification of arguments. + +data ArgRep = P -- GC Ptr + | N -- Word-sized non-ptr + | L -- 64-bit non-ptr (long) + | V -- Void + | F -- Float + | D -- Double + | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc. +instance Outputable ArgRep where ppr = text . argRepString + +argRepString :: ArgRep -> String +argRepString P = "P" +argRepString N = "N" +argRepString L = "L" +argRepString V = "V" +argRepString F = "F" +argRepString D = "D" +argRepString V16 = "V16" + +toArgRep :: PrimRep -> ArgRep +toArgRep VoidRep = V +toArgRep PtrRep = P +toArgRep IntRep = N +toArgRep WordRep = N +toArgRep AddrRep = N +toArgRep Int64Rep = L +toArgRep Word64Rep = L +toArgRep FloatRep = F +toArgRep DoubleRep = D +toArgRep (VecRep len elem) + | len*primElemRepSizeB elem == 16 = V16 + | otherwise = error "toArgRep: bad vector primrep" + +isNonV :: ArgRep -> Bool +isNonV V = False +isNonV _ = True + +argRepSizeW :: DynFlags -> ArgRep -> WordOff -- Size in words +argRepSizeW _ N = 1 +argRepSizeW _ P = 1 +argRepSizeW _ F = 1 +argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags +argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags +argRepSizeW _ V = 0 +argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags + +idArgRep :: Id -> ArgRep +idArgRep = toArgRep . idPrimRep + +-- This list of argument patterns should be kept in sync with at least +-- the following: +-- +-- * StgCmmLayout.stdPattern maybe to some degree? +-- +-- * the RTS_RET(stg_ap_*) and RTS_FUN_DECL(stg_ap_*_fast) +-- declarations in includes/stg/MiscClosures.h +-- +-- * the SLOW_CALL_*_ctr declarations in includes/stg/Ticky.h, +-- +-- * the TICK_SLOW_CALL_*() #defines in includes/Cmm.h, +-- +-- * the PR_CTR(SLOW_CALL_*_ctr) calls in rts/Ticky.c, +-- +-- * and the SymI_HasProto(stg_ap_*_{ret,info,fast}) calls and +-- SymI_HasProto(SLOW_CALL_*_ctr) calls in rts/Linker.c +-- +-- There may be more places that I haven't found; I merely igrep'd for +-- pppppp and excluded things that seemed ghci-specific. +-- +-- Also, it seems at the moment that ticky counters with void +-- arguments will never be bumped, but I'm still declaring those +-- counters, defensively. +-- +-- NSF 6 Mar 2013 + +-- These cases were found to cover about 99% of all slow calls: +slowCallPattern :: [ArgRep] -> (FastString, RepArity) +-- Returns the generic apply function and arity +slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6) +slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5) +slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4) +slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4) +slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3) +slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3) +slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2) +slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2) +slowCallPattern (P: _) = (fsLit "stg_ap_p", 1) +slowCallPattern (V: _) = (fsLit "stg_ap_v", 1) +slowCallPattern (N: _) = (fsLit "stg_ap_n", 1) +slowCallPattern (F: _) = (fsLit "stg_ap_f", 1) +slowCallPattern (D: _) = (fsLit "stg_ap_d", 1) +slowCallPattern (L: _) = (fsLit "stg_ap_l", 1) +slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1) +slowCallPattern [] = (fsLit "stg_ap_0", 0) diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 136bb52b07..1e5d6b9f4f 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -296,7 +296,7 @@ mkRhsClosure dflags bndr _cc _bi (StgApp fun_id args) | args `lengthIs` (arity-1) - && all (isGcPtrRep . idPrimRep . stripNV) fvs + && all (isGcPtrRep . idPrimRep . unsafe_stripNV) fvs && isUpdatable upd_flag && arity <= mAX_SPEC_AP_SIZE dflags && not (gopt Opt_SccProfilingOn dflags) @@ -344,7 +344,7 @@ mkRhsClosure _ bndr cc _ fvs upd_flag args body fv_details :: [(NonVoid Id, VirtualHpOffset)] (tot_wds, ptr_wds, fv_details) = mkVirtHeapOffsets dflags (isLFThunk lf_info) - (addIdReps (map stripNV reduced_fvs)) + (addIdReps (map unsafe_stripNV reduced_fvs)) closure_info = mkClosureInfo dflags False -- Not static bndr lf_info tot_wds ptr_wds descr @@ -369,11 +369,6 @@ mkRhsClosure _ bndr cc _ fvs upd_flag args body -- RETURN ; return (mkRhsInit dflags reg lf_info hp_plus_n) } - --- Use with care; if used inappropriately, it could break invariants. -stripNV :: NonVoid a -> a -stripNV (NonVoid a) = a - ------------------------- cgRhsStdThunk :: Id @@ -418,10 +413,10 @@ mkClosureLFInfo :: Id -- The binder -> [Id] -- Args -> FCode LambdaFormInfo mkClosureLFInfo bndr top fvs upd_flag args - | null args = return (mkLFThunk (idType bndr) top (map stripNV fvs) upd_flag) + | null args = return (mkLFThunk (idType bndr) top (map unsafe_stripNV fvs) upd_flag) | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args - ; return (mkLFReEntrant top (map stripNV fvs) args arg_descr) } + ; return (mkLFReEntrant top (map unsafe_stripNV fvs) args arg_descr) } ------------------------------------------------------------------------ @@ -453,7 +448,8 @@ closureCodeBody :: Bool -- whether this is a top-level binding closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details | arity == 0 -- No args i.e. thunk - = emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $ + = withNewTickyCounterThunk cl_info $ + emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $ \(_, node, _) -> thunkCode cl_info fv_details cc node arity body where lf_info = closureLFInfo cl_info @@ -461,12 +457,7 @@ closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details closureCodeBody top_lvl bndr cl_info cc args arity body fv_details = -- Note: args may be [], if all args are Void - do { -- Allocate the global ticky counter, - -- and establish the ticky-counter - -- label for this block - let ticky_ctr_lbl = closureRednCountsLabel cl_info - ; emitTickyCounter cl_info (map stripNV args) - ; setTickyCtrLabel ticky_ctr_lbl $ do + withNewTickyCounterFun (closureName cl_info) args $ do { ; let lf_info = closureLFInfo cl_info @@ -479,20 +470,20 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details { mkSlowEntryCode bndr cl_info arg_regs ; dflags <- getDynFlags - ; let lf_info = closureLFInfo cl_info - node_points = nodeMustPointToIt dflags lf_info + ; let node_points = nodeMustPointToIt dflags lf_info node' = if node_points then Just node else Nothing - ; tickyEnterFun cl_info - ; enterCostCentreFun cc - (CmmMachOp (mo_wordSub dflags) - [ CmmReg nodeReg - , mkIntExpr dflags (funTag dflags cl_info) ]) ; when node_points (ldvEnterClosure cl_info) ; granYield arg_regs node_points -- Main payload ; entryHeapCheck cl_info node' arity arg_regs $ do - { fv_bindings <- mapM bind_fv fv_details + { -- ticky after heap check to avoid double counting + tickyEnterFun cl_info + ; enterCostCentreFun cc + (CmmMachOp (mo_wordSub dflags) + [ CmmReg nodeReg + , mkIntExpr dflags (funTag dflags cl_info) ]) + ; fv_bindings <- mapM bind_fv fv_details -- Load free vars out of closure *after* -- heap check, to reduce live vars over check ; when node_points $ load_fvs node lf_info fv_bindings @@ -545,7 +536,6 @@ thunkCode cl_info fv_details _cc node arity body = do { dflags <- getDynFlags ; let node_points = nodeMustPointToIt dflags (closureLFInfo cl_info) node' = if node_points then Just node else Nothing - ; tickyEnterThunk cl_info ; ldvEnterClosure cl_info -- NB: Node always points when profiling ; granThunk node_points @@ -562,7 +552,8 @@ thunkCode cl_info fv_details _cc node arity body -- that cc of enclosing scope will be recorded -- in update frame CAF/DICT functions will be -- subsumed by this enclosing cc - do { enterCostCentreThunk (CmmReg nodeReg) + do { tickyEnterThunk cl_info + ; enterCostCentreThunk (CmmReg nodeReg) ; let lf_info = closureLFInfo cl_info ; fv_bindings <- mapM bind_fv fv_details ; load_fvs node lf_info fv_bindings diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 7f44f67ff7..a057484d39 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -49,7 +49,7 @@ module StgCmmClosure ( -- ** Labels -- These just need the info table label closureInfoLabel, staticClosureLabel, - closureRednCountsLabel, closureSlowEntryLabel, closureLocalEntryLabel, + closureSlowEntryLabel, closureLocalEntryLabel, -- ** Predicates -- These are really just functions on LambdaFormInfo @@ -772,9 +772,6 @@ isToplevClosure (ClosureInfo { closureLFInfo = lf_info }) staticClosureLabel :: ClosureInfo -> CLabel staticClosureLabel = toClosureLbl . closureInfoLabel -closureRednCountsLabel :: ClosureInfo -> CLabel -closureRednCountsLabel = toRednCountsLbl . closureInfoLabel - closureSlowEntryLabel :: ClosureInfo -> CLabel closureSlowEntryLabel = toSlowEntryLbl . closureInfoLabel diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index 42e4da0e1f..1fdb364b56 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -13,7 +13,7 @@ module StgCmmEnv ( litIdInfo, lneIdInfo, rhsIdInfo, mkRhsInit, idInfoToAmode, - NonVoid(..), isVoidId, nonVoidIds, + NonVoid(..), unsafe_stripNV, isVoidId, nonVoidIds, addBindC, addBindsC, @@ -55,6 +55,10 @@ import Outputable newtype NonVoid a = NonVoid a deriving (Eq, Show) +-- Use with care; if used inappropriately, it could break invariants. +unsafe_stripNV :: NonVoid a -> a +unsafe_stripNV (NonVoid a) = a + instance (Outputable a) => Outputable (NonVoid a) where ppr (NonVoid a) = ppr a diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index f4186f7b9b..78080218f8 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -161,10 +161,11 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body return ( lneIdInfo dflags bndr args , code ) where - code = forkProc $ do - { restoreCurrentCostCentre cc_slot - ; arg_regs <- bindArgsToRegs args - ; void $ noEscapeHeapCheck arg_regs (cgExpr body) } + code = forkProc $ do { + ; withNewTickyCounterLNE (idName bndr) args $ do + ; restoreCurrentCostCentre cc_slot + ; arg_regs <- bindArgsToRegs args + ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) } ------------------------------------------------------------------------ @@ -416,6 +417,7 @@ cgCase scrut bndr alt_type alts | isSingleton alts = False | up_hp_usg > 0 = False | otherwise = True + -- cf Note [Compiling case expressions] gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts ; mb_cc <- maybeSaveCostCentre simple_scrut diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index b1cddbe5a4..50fcfdc812 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -100,7 +100,7 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets -- SAY WHAT WE ARE ABOUT TO DO ; let rep = cit_rep info_tbl - ; tickyDynAlloc rep lf_info + ; tickyDynAlloc (toRednCountsLbl $ cit_lbl info_tbl) rep lf_info ; profDynAlloc rep use_cc -- FIND THE OFFSET OF THE INFO-PTR WORD @@ -215,7 +215,6 @@ mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info = staticGranHdr ++ staticParHdr ++ staticProfHdr dflags ccs - ++ staticTickyHdr -- JD: Simon had ellided this padding, but without it the C back end asserts -- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary? @@ -527,7 +526,7 @@ heapCheck checkStack checkYield do_gc code stk_hwm | checkStack = Just (CmmLit CmmHighStackMark) | otherwise = Nothing ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc - ; tickyAllocHeap hpHw + ; tickyAllocHeap True hpHw ; doGranAllocate hpHw ; setRealHp hpHw ; code } diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index a3bbefeb44..06a47c151b 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -24,7 +24,7 @@ module StgCmmLayout ( mkVirtHeapOffsets, mkVirtConstrOffsets, getHpRelOffset, hpRel, - ArgRep(..), toArgRep, argRepSizeW + ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep ) where @@ -32,6 +32,7 @@ module StgCmmLayout ( import StgCmmClosure import StgCmmEnv +import StgCmmArgRep -- notably: ( slowCallPattern ) import StgCmmTicky import StgCmmMonad import StgCmmUtils @@ -46,12 +47,11 @@ import CLabel import StgSyn import Id import Name -import TyCon ( PrimRep(..), primElemRepSizeB ) +import TyCon ( PrimRep(..) ) import BasicTypes ( RepArity ) import DynFlags import Module -import Constants import Util import Data.List import Outputable @@ -148,7 +148,7 @@ adjustHpBackwards then mkNop else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp - ; tickyAllocHeap adjust_words -- ...ditto + ; tickyAllocHeap False adjust_words -- ...ditto ; setRealHp vHp } @@ -298,82 +298,6 @@ slowArgs dflags args -- careful: reps contains voids (V), but args does not save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)] save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") - - --- These cases were found to cover about 99% of all slow calls: -slowCallPattern :: [ArgRep] -> (FastString, RepArity) --- Returns the generic apply function and arity -slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6) -slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5) -slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4) -slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4) -slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3) -slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3) -slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2) -slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2) -slowCallPattern (P: _) = (fsLit "stg_ap_p", 1) -slowCallPattern (V: _) = (fsLit "stg_ap_v", 1) -slowCallPattern (N: _) = (fsLit "stg_ap_n", 1) -slowCallPattern (F: _) = (fsLit "stg_ap_f", 1) -slowCallPattern (D: _) = (fsLit "stg_ap_d", 1) -slowCallPattern (L: _) = (fsLit "stg_ap_l", 1) -slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1) -slowCallPattern [] = (fsLit "stg_ap_0", 0) - - -------------------------------------------------------------------------- --- Classifying arguments: ArgRep -------------------------------------------------------------------------- - --- ArgRep is exported, but only for use in the byte-code generator which --- also needs to know about the classification of arguments. - -data ArgRep = P -- GC Ptr - | N -- Word-sized non-ptr - | L -- 64-bit non-ptr (long) - | V -- Void - | F -- Float - | D -- Double - | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc. -instance Outputable ArgRep where - ppr P = text "P" - ppr N = text "N" - ppr L = text "L" - ppr V = text "V" - ppr F = text "F" - ppr D = text "D" - ppr V16 = text "V16" - -toArgRep :: PrimRep -> ArgRep -toArgRep VoidRep = V -toArgRep PtrRep = P -toArgRep IntRep = N -toArgRep WordRep = N -toArgRep AddrRep = N -toArgRep Int64Rep = L -toArgRep Word64Rep = L -toArgRep FloatRep = F -toArgRep DoubleRep = D -toArgRep (VecRep len elem) - | len*primElemRepSizeB elem == 16 = V16 - | otherwise = error "toArgRep: bad vector primrep" - -isNonV :: ArgRep -> Bool -isNonV V = False -isNonV _ = True - -argRepSizeW :: DynFlags -> ArgRep -> WordOff -- Size in words -argRepSizeW _ N = 1 -argRepSizeW _ P = 1 -argRepSizeW _ F = 1 -argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags -argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags -argRepSizeW _ V = 0 -argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags - -idArgRep :: Id -> ArgRep -idArgRep = toArgRep . idPrimRep - ------------------------------------------------------------------------- ---- Laying out objects on the heap and stack ------------------------------------------------------------------------- diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index 2bca544ac8..09938a6704 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -6,47 +6,100 @@ -- ----------------------------------------------------------------------------- -{-# OPTIONS -fno-warn-tabs #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and --- detab the module (please do the detabbing in a separate patch). See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces --- for details +{- OVERVIEW: ticky ticky profiling + +Please see +http://hackage.haskell.org/trac/ghc/wiki/Debugging/TickyTicky and also +edit it and the rest of this comment to keep them up-to-date if you +change ticky-ticky. Thanks! + + *** All allocation ticky numbers are in bytes. *** + +Some of the relevant source files: + + ***not necessarily an exhaustive list*** + + * some codeGen/ modules import this one + + * this module imports cmm/CLabel.hs to manage labels + + * cmm/CmmParse.y expands some macros using generators defined in + this module + + * includes/stg/Ticky.h declares all of the global counters + + * includes/rts/Ticky.h declares the C data type for an + STG-declaration's counters + + * some macros defined in includes/Cmm.h (and used within the RTS's + CMM code) update the global ticky counters + + * at the end of execution rts/Ticky.c generates the final report + +RTS -r<report-file> -RTS + +The rts/Ticky.c function that generates the report includes an +STG-declaration's ticky counters if + + * that declaration was entered, or + + * it was allocated (if -ticky-allocd) + +On either of those events, the counter is "registered" by adding it to +a linked list; cf the CMM generated by registerTickyCtr. + +Ticky-ticky profiling has evolved over many years. Many of the +counters from its most sophisticated days are no longer +active/accurate. As the RTS has changed, sometimes the ticky code for +relevant counters was not accordingly updated. Unfortunately, neither +were the comments. + +As of March 2013, there still exist deprecated code and comments in +the code generator as well as the RTS because: + + * I don't know what is out-of-date versus merely commented out for + momentary convenience, and + + * someone else might know how to repair it! + +-} module StgCmmTicky ( - emitTickyCounter, + withNewTickyCounterFun, + withNewTickyCounterThunk, + withNewTickyCounterLNE, - tickyDynAlloc, - tickyAllocHeap, - tickyAllocPrim, - tickyAllocThunk, - tickyAllocPAP, - - tickySlowCall, tickyDirectCall, + tickyDynAlloc, + tickyAllocHeap, + tickyAllocPrim, + tickyAllocThunk, + tickyAllocPAP, - tickyPushUpdateFrame, - tickyUpdateFrameOmitted, + tickyUnknownCall, tickyDirectCall, - tickyEnterDynCon, - tickyEnterStaticCon, - tickyEnterViaNode, + tickyPushUpdateFrame, + tickyUpdateFrameOmitted, - tickyEnterFun, - tickyEnterThunk, + tickyEnterDynCon, + tickyEnterStaticCon, + tickyEnterViaNode, - tickyUpdateBhCaf, - tickyBlackHole, - tickyUnboxedTupleReturn, tickyVectoredReturn, - tickyReturnOldCon, tickyReturnNewCon, + tickyEnterFun, + tickyEnterThunk, + tickyEnterLNE, - tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, - tickyUnknownCall, tickySlowCallPat, + tickyUpdateBhCaf, + tickyBlackHole, + tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyReturnOldCon, tickyReturnNewCon, - staticTickyHdr, + tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, + tickySlowCall, tickySlowCallPat, ) where #include "HsVersions.h" +import StgCmmArgRep ( slowCallPattern , toArgRep , argRepString ) +import StgCmmEnv ( NonVoid, unsafe_stripNV ) import StgCmmClosure import StgCmmUtils import StgCmmMonad @@ -74,52 +127,87 @@ import Type import TyCon import Data.Maybe +import qualified Data.Char +import Control.Monad ( when ) ----------------------------------------------------------------------------- -- --- Ticky-ticky profiling +-- Ticky-ticky profiling -- ----------------------------------------------------------------------------- -staticTickyHdr :: [CmmLit] --- krc: not using this right now -- --- in the new version of ticky-ticky, we --- don't change the closure layout. --- leave it defined, though, to avoid breaking --- other things. -staticTickyHdr = [] - -emitTickyCounter :: ClosureInfo -> [Id] -> FCode () -emitTickyCounter cl_info args - = ifTicky $ - do { dflags <- getDynFlags +data TickyClosureType = TickyFun | TickyThunk | TickyLNE + +withNewTickyCounterFun, withNewTickyCounterLNE :: Name -> [NonVoid Id] -> FCode () -> FCode () +withNewTickyCounterFun = withNewTickyCounter TickyFun + +withNewTickyCounterLNE nm args code = do + b <- tickyLNEIsOn + if not b then code else withNewTickyCounter TickyLNE nm args code + +withNewTickyCounterThunk :: ClosureInfo -> FCode () -> FCode () +withNewTickyCounterThunk cl_info code + | isStaticClosure cl_info = code -- static thunks are uninteresting + | otherwise = do + b <- tickyDynThunkIsOn + if not b then code else withNewTickyCounter TickyThunk (closureName cl_info) [] code + +-- args does not include the void arguments +withNewTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode () -> FCode () +withNewTickyCounter cloType name args m = do + lbl <- emitTickyCounter cloType name args + setTickyCtrLabel lbl m + +emitTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode CLabel +emitTickyCounter cloType name args + = let ctr_lbl = mkRednCountsLabel name in + (>> return ctr_lbl) $ + ifTicky $ do + { dflags <- getDynFlags + ; parent <- getTickyCtrLabel ; mod_name <- getModuleName - ; let ticky_ctr_label = closureRednCountsLabel cl_info - arg_descr = map (showTypeCategory . idType) args - fun_descr mod_name = ppr_for_ticky_name dflags mod_name (closureName cl_info) - ; fun_descr_lit <- newStringCLit (fun_descr mod_name) - ; arg_descr_lit <- newStringCLit arg_descr - ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter --- krc: note that all the fields are I32 now; some were I16 before, --- but the code generator wasn't handling that properly and it led to chaos, --- panic and disorder. - [ mkIntCLit dflags 0, - mkIntCLit dflags (length args), -- Arity - mkIntCLit dflags 0, -- XXX: we no longer know this! Words passed on stack - fun_descr_lit, - arg_descr_lit, - zeroCLit dflags, -- Entry count - zeroCLit dflags, -- Allocs - zeroCLit dflags -- Link - ] } - --- When printing the name of a thing in a ticky file, we want to --- give the module name even for *local* things. We print --- just "x (M)" rather that "M.x" to distinguish them from the global kind. -ppr_for_ticky_name :: DynFlags -> Module -> Name -> String -ppr_for_ticky_name dflags mod_name name - | isInternalName name = showSDocDebug dflags (ppr name <+> (parens (ppr mod_name))) - | otherwise = showSDocDebug dflags (ppr name) + + -- When printing the name of a thing in a ticky file, we + -- want to give the module name even for *local* things. We + -- print just "x (M)" rather that "M.x" to distinguish them + -- from the global kind. + ; let ppr_for_ticky_name :: SDoc + ppr_for_ticky_name = + let n = ppr name + p = case hasHaskellName parent of + -- NB the default "top" ticky ctr does not + -- have a Haskell name + Just pname -> text "in" <+> ppr (nameUnique pname) + _ -> empty + in (<+> p) $ if isInternalName name + then let s = n <+> (parens (ppr mod_name)) + in case cloType of + TickyFun -> s + TickyThunk -> s <+> parens (text "thk") + TickyLNE -> s <+> parens (text "LNE") + else case cloType of + TickyFun -> n + TickyThunk -> n <+> parens (text "thk") + TickyLNE -> panic "emitTickyCounter: how is this an external LNE?" + + ; fun_descr_lit <- newStringCLit $ showSDocDebug dflags ppr_for_ticky_name + ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . unsafe_stripNV) args + ; emitDataLits ctr_lbl + -- Must match layout of includes/rts/Ticky.h's StgEntCounter + -- + -- krc: note that all the fields are I32 now; some were I16 + -- before, but the code generator wasn't handling that + -- properly and it led to chaos, panic and disorder. + [ mkIntCLit dflags 0, -- registered? + mkIntCLit dflags (length args), -- Arity + mkIntCLit dflags 0, -- Heap allocated for this thing + fun_descr_lit, + arg_descr_lit, + zeroCLit dflags, -- Entries into this thing + zeroCLit dflags, -- Heap allocated by this thing + zeroCLit dflags -- Link to next StgEntCounter + ] + } -- ----------------------------------------------------------------------------- -- Ticky stack frames @@ -131,10 +219,9 @@ tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (fsLit "UPDF_OMITTED_ctr") -- ----------------------------------------------------------------------------- -- Ticky entries -tickyEnterDynCon, tickyEnterDynThunk, tickyEnterStaticCon, +tickyEnterDynCon, tickyEnterStaticCon, tickyEnterStaticThunk, tickyEnterViaNode :: FCode () tickyEnterDynCon = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_CON_ctr") -tickyEnterDynThunk = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_THK_ctr") tickyEnterStaticCon = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_CON_ctr") tickyEnterStaticThunk = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_THK_ctr") tickyEnterViaNode = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr") @@ -142,41 +229,65 @@ tickyEnterViaNode = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr") tickyEnterThunk :: ClosureInfo -> FCode () tickyEnterThunk cl_info | isStaticClosure cl_info = tickyEnterStaticThunk - | otherwise = tickyEnterDynThunk + | otherwise = ifTicky $ do + bumpTickyCounter (fsLit "ENT_DYN_THK_ctr") + ifTickyDynThunk $ do + ticky_ctr_lbl <- getTickyCtrLabel + registerTickyCtrAtEntryDyn ticky_ctr_lbl + bumpTickyEntryCount ticky_ctr_lbl tickyBlackHole :: Bool{-updatable-} -> FCode () tickyBlackHole updatable = ifTicky (bumpTickyCounter ctr) where ctr | updatable = (fsLit "UPD_BH_SINGLE_ENTRY_ctr") - | otherwise = (fsLit "UPD_BH_UPDATABLE_ctr") + | otherwise = (fsLit "UPD_BH_UPDATABLE_ctr") tickyUpdateBhCaf :: ClosureInfo -> FCode () tickyUpdateBhCaf cl_info = ifTicky (bumpTickyCounter ctr) where ctr | closureUpdReqd cl_info = (fsLit "UPD_CAF_BH_SINGLE_ENTRY_ctr") - | otherwise = (fsLit "UPD_CAF_BH_UPDATABLE_ctr") + | otherwise = (fsLit "UPD_CAF_BH_UPDATABLE_ctr") tickyEnterFun :: ClosureInfo -> FCode () -tickyEnterFun cl_info - = ifTicky $ - do { dflags <- getDynFlags - ; bumpTickyCounter ctr - ; fun_ctr_lbl <- getTickyCtrLabel - ; registerTickyCtr fun_ctr_lbl - ; bumpTickyCounter' (cmmLabelOffB fun_ctr_lbl (oFFSET_StgEntCounter_entry_count dflags)) - } - where - ctr | isStaticClosure cl_info = (fsLit "ENT_STATIC_FUN_DIRECT_ctr") - | otherwise = (fsLit "ENT_DYN_FUN_DIRECT_ctr") +tickyEnterFun cl_info = ifTicky $ do + ctr_lbl <- getTickyCtrLabel + + if isStaticClosure cl_info + then do bumpTickyCounter (fsLit "ENT_STATIC_FUN_DIRECT_ctr") + registerTickyCtr ctr_lbl + else do bumpTickyCounter (fsLit "ENT_DYN_FUN_DIRECT_ctr") + registerTickyCtrAtEntryDyn ctr_lbl + + bumpTickyEntryCount ctr_lbl + +tickyEnterLNE :: FCode () +tickyEnterLNE = ifTicky $ do + bumpTickyCounter (fsLit "ENT_LNE_ctr") + ifTickyLNE $ do + ctr_lbl <- getTickyCtrLabel + registerTickyCtr ctr_lbl + bumpTickyEntryCount ctr_lbl + +-- needn't register a counter upon entry if +-- +-- 1) it's for a dynamic closure, and +-- +-- 2) -ticky-allocd is on +-- +-- since the counter was registered already upon being alloc'd +registerTickyCtrAtEntryDyn :: CLabel -> FCode () +registerTickyCtrAtEntryDyn ctr_lbl = do + already_registered <- tickyAllocdIsOn + when (not already_registered) $ registerTickyCtr ctr_lbl registerTickyCtr :: CLabel -> FCode () -- Register a ticky counter -- if ( ! f_ct.registeredp ) { --- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ --- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ --- f_ct.registeredp = 1 } +-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ +-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ +-- f_ct.registeredp = 1 } registerTickyCtr ctr_lbl = do dflags <- getDynFlags let @@ -196,22 +307,22 @@ registerTickyCtr ctr_lbl = do emit =<< mkCmmIfThen test (catAGraphs register_stmts) tickyReturnOldCon, tickyReturnNewCon :: RepArity -> FCode () -tickyReturnOldCon arity +tickyReturnOldCon arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_OLD_ctr") - ; bumpHistogram (fsLit "RET_OLD_hst") arity } -tickyReturnNewCon arity + ; bumpHistogram (fsLit "RET_OLD_hst") arity } +tickyReturnNewCon arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr") - ; bumpHistogram (fsLit "RET_NEW_hst") arity } + ; bumpHistogram (fsLit "RET_NEW_hst") arity } tickyUnboxedTupleReturn :: RepArity -> FCode () tickyUnboxedTupleReturn arity = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr") - ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity } + ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity } tickyVectoredReturn :: Int -> FCode () -tickyVectoredReturn family_size +tickyVectoredReturn family_size = ifTicky $ do { bumpTickyCounter (fsLit "VEC_RETURN_ctr") - ; bumpHistogram (fsLit "RET_VEC_RETURN_hst") family_size } + ; bumpHistogram (fsLit "RET_VEC_RETURN_hst") family_size } -- ----------------------------------------------------------------------------- -- Ticky calls @@ -221,7 +332,7 @@ tickyDirectCall :: RepArity -> [StgArg] -> FCode () tickyDirectCall arity args | arity == length args = tickyKnownCallExact | otherwise = do tickyKnownCallExtraArgs - tickySlowCallPat (map argPrimRep (drop arity args)) + tickySlowCallPat (map argPrimRep (drop arity args)) tickyKnownCallTooFewArgs :: FCode () tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_TOO_FEW_ARGS_ctr") @@ -238,130 +349,224 @@ tickyUnknownCall = ifTicky $ bumpTickyCounter (fsLit "UNKNOWN_CALL_ctr") -- Tick for the call pattern at slow call site (i.e. in addition to -- tickyUnknownCall, tickyKnownCallExtraArgs, etc.) tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode () -tickySlowCall _ [] - = return () -tickySlowCall lf_info args - = do { if (isKnownFun lf_info) - then tickyKnownCallTooFewArgs - else tickyUnknownCall - ; tickySlowCallPat (map argPrimRep args) } +tickySlowCall _ [] = return () +tickySlowCall lf_info args = do + -- see Note [Ticky for slow calls] + if isKnownFun lf_info + then tickyKnownCallTooFewArgs + else tickyUnknownCall + tickySlowCallPat (map argPrimRep args) tickySlowCallPat :: [PrimRep] -> FCode () -tickySlowCallPat _args = return () -{- LATER: (introduces recursive module dependency now). - case callPattern args of - (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat) - (str, False) -> bumpTickyCounter (sLit "TICK_SLOW_CALL_OTHER") - --- Don't use CgRep; put this function in StgCmmLayout -callPattern :: [CgRep] -> (String,Bool) -callPattern reps - | match == length reps = (chars, True) - | otherwise = (chars, False) - where (_,match) = findMatch reps - chars = map argChar reps - -argChar VoidArg = 'v' -argChar PtrArg = 'p' -argChar NonPtrArg = 'n' -argChar LongArg = 'l' -argChar FloatArg = 'f' -argChar DoubleArg = 'd' --} +tickySlowCallPat args = ifTicky $ + let argReps = map toArgRep args + (_, n_matched) = slowCallPattern argReps + in if n_matched > 0 && n_matched == length args + then bumpTickyLbl $ mkRtsSlowFastTickyCtrLabel $ concatMap (map Data.Char.toLower . argRepString) argReps + else bumpTickyCounter $ fsLit "VERY_SLOW_CALL_ctr" --- ----------------------------------------------------------------------------- --- Ticky allocation - -tickyDynAlloc :: SMRep -> LambdaFormInfo -> FCode () --- Called when doing a dynamic heap allocation --- LambdaFormInfo only needed to distinguish between updatable/non-updatable thunks -tickyDynAlloc rep lf - = ifTicky $ - case () of - _ | isConRep rep -> tick_alloc_con - | isThunkRep rep -> tick_alloc_thk - | isFunRep rep -> tick_alloc_fun - | otherwise -> return () - where - -- will be needed when we fill in stubs --- _cl_size = heapClosureSize rep --- _slop_size = slopSize cl_info +{- - tick_alloc_thk - | lfUpdatable lf = tick_alloc_up_thk - | otherwise = tick_alloc_se_thk +Note [Ticky for slow calls] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Terminology is unfortunately a bit mixed up for these calls. codeGen +uses "slow call" to refer to unknown calls and under-saturated known +calls. - -- krc: changed from panic to return () - -- just to get something working - tick_alloc_con = return () - tick_alloc_fun = return () - tick_alloc_up_thk = return () - tick_alloc_se_thk = return () +Nowadays, though (ie as of the eval/apply paper), the significantly +slower calls are actually just a subset of these: the ones with no +built-in argument pattern (cf StgCmmArgRep.slowCallPattern) +So for ticky profiling, we split slow calls into +"SLOW_CALL_fast_<pattern>_ctr" (those matching a built-in pattern) and +VERY_SLOW_CALL_ctr (those without a built-in pattern; these are very +bad for both space and time). -tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () -tickyAllocPrim _hdr _goods _slop = ifTicky $ pprTrace "ToDo: tickyAllocPrim" empty (return ()) - -tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode () -tickyAllocThunk _goods _slop = ifTicky $ pprTrace "ToDo: tickyAllocThunk" empty (return ()) +-} -tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode () -tickyAllocPAP _goods _slop = ifTicky $ pprTrace "ToDo: tickyAllocPAP" empty (return ()) +-- ----------------------------------------------------------------------------- +-- Ticky allocation -tickyAllocHeap :: VirtualHpOffset -> FCode () +tickyDynAlloc :: Maybe CLabel -> SMRep -> LambdaFormInfo -> FCode () +-- Called when doing a dynamic heap allocation; the LambdaFormInfo +-- used to distinguish between closure types +-- +-- TODO what else to count while we're here? +tickyDynAlloc mb_ctr_lbl rep lf = ifTicky $ getDynFlags >>= \dflags -> + let bytes = wORD_SIZE dflags * heapClosureSize dflags rep + + countGlobal tot ctr = do + bumpTickyCounterBy tot bytes + bumpTickyCounter ctr + countSpecific = ifTickyAllocd $ case mb_ctr_lbl of + Nothing -> return () + Just ctr_lbl -> do + registerTickyCtr ctr_lbl + bumpTickyAllocd ctr_lbl bytes + + -- TODO are we still tracking "good stuff" (_gds) versus + -- administrative (_adm) versus slop (_slp)? I'm going with all _gds + -- for now, since I don't currently know neither if we do nor how to + -- distinguish. NSF Mar 2013 + + in case () of + _ | isConRep rep -> + countGlobal (fsLit "ALLOC_CON_gds") (fsLit "ALLOC_CON_ctr") + | isThunkRep rep -> + ifTickyDynThunk countSpecific >> + if lfUpdatable lf + then countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_UP_THK_ctr") + else countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_SE_THK_ctr") + | isFunRep rep -> + countSpecific >> + countGlobal (fsLit "ALLOC_FUN_gds") (fsLit "ALLOC_FUN_ctr") + | otherwise -> panic "How is this heap object not a con, thunk, or fun?" + + + +tickyAllocHeap :: + Bool -> -- is this a genuine allocation? As opposed to + -- StgCmmLayout.adjustHpBackwards + VirtualHpOffset -> FCode () -- Called when doing a heap check [TICK_ALLOC_HEAP] -- Must be lazy in the amount of allocation! -tickyAllocHeap hp +tickyAllocHeap genuine hp = ifTicky $ do { dflags <- getDynFlags ; ticky_ctr <- getTickyCtrLabel - ; emit $ catAGraphs $ - if hp == 0 then [] -- Inside the emitMiddle to avoid control - else [ -- dependency on the argument - -- Bump the allcoation count in the StgEntCounter - addToMem (rEP_StgEntCounter_allocs dflags) - (CmmLit (cmmLabelOffB ticky_ctr - (oFFSET_StgEntCounter_allocs dflags))) hp, - -- Bump ALLOC_HEAP_ctr - addToMemLbl (cLong dflags) (mkCmmDataLabel rtsPackageId (fsLit "ALLOC_HEAP_ctr")) 1, - -- Bump ALLOC_HEAP_tot - addToMemLbl (cLong dflags) (mkCmmDataLabel rtsPackageId (fsLit "ALLOC_HEAP_tot")) hp] } + ; emit $ catAGraphs $ + -- only test hp from within the emit so that the monadic + -- computation itself is not strict in hp (cf knot in + -- StgCmmMonad.getHeapUsage) + if hp == 0 then [] + else let !bytes = wORD_SIZE dflags * hp in [ + -- Bump the allocation total in the closure's StgEntCounter + addToMem (rEP_StgEntCounter_allocs dflags) + (CmmLit (cmmLabelOffB ticky_ctr (oFFSET_StgEntCounter_allocs dflags))) + bytes, + -- Bump the global allocation total ALLOC_HEAP_tot + addToMemLbl (cLong dflags) + (mkCmmDataLabel rtsPackageId (fsLit "ALLOC_HEAP_tot")) + bytes, + -- Bump the global allocation counter ALLOC_HEAP_ctr + if not genuine then mkNop + else addToMemLbl (cLong dflags) + (mkCmmDataLabel rtsPackageId (fsLit "ALLOC_HEAP_ctr")) + 1 + ]} + + +-------------------------------------------------------------------------------- +-- these three are only called from CmmParse.y (ie ultimately from the RTS) + +-- the units are bytes + +tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +tickyAllocPrim _hdr _goods _slop = ifTicky $ do + bumpTickyCounter (fsLit "ALLOC_PRIM_ctr") + bumpTickyCounterByE (fsLit "ALLOC_PRIM_adm") _hdr + bumpTickyCounterByE (fsLit "ALLOC_PRIM_gds") _goods + bumpTickyCounterByE (fsLit "ALLOC_PRIM_slp") _slop + +tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode () +tickyAllocThunk _goods _slop = ifTicky $ do + -- TODO is it ever called with a Single-Entry thunk? + bumpTickyCounter (fsLit "ALLOC_UP_THK_ctr") + bumpTickyCounterByE (fsLit "ALLOC_THK_gds") _goods + bumpTickyCounterByE (fsLit "ALLOC_THK_slp") _slop + +tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode () +tickyAllocPAP _goods _slop = ifTicky $ do + bumpTickyCounter (fsLit "ALLOC_PAP_ctr") + bumpTickyCounterByE (fsLit "ALLOC_PAP_gds") _goods + bumpTickyCounterByE (fsLit "ALLOC_PAP_slp") _slop -- ----------------------------------------------------------------------------- -- Ticky utils ifTicky :: FCode () -> FCode () -ifTicky code = do dflags <- getDynFlags - if gopt Opt_Ticky dflags then code - else return () +ifTicky code = + getDynFlags >>= \dflags -> when (gopt Opt_Ticky dflags) code + +tickyAllocdIsOn :: FCode Bool +tickyAllocdIsOn = gopt Opt_Ticky_Allocd `fmap` getDynFlags + +tickyLNEIsOn :: FCode Bool +tickyLNEIsOn = gopt Opt_Ticky_LNE `fmap` getDynFlags + +tickyDynThunkIsOn :: FCode Bool +tickyDynThunkIsOn = gopt Opt_Ticky_Dyn_Thunk `fmap` getDynFlags + +ifTickyAllocd :: FCode () -> FCode () +ifTickyAllocd code = tickyAllocdIsOn >>= \b -> when b code + +ifTickyLNE :: FCode () -> FCode () +ifTickyLNE code = tickyLNEIsOn >>= \b -> when b code + +ifTickyDynThunk :: FCode () -> FCode () +ifTickyDynThunk code = tickyDynThunkIsOn >>= \b -> when b code --- All the ticky-ticky counters are declared "unsigned long" in C bumpTickyCounter :: FastString -> FCode () -bumpTickyCounter lbl = bumpTickyCounter' (cmmLabelOffB (mkCmmDataLabel rtsPackageId lbl) 0) +bumpTickyCounter lbl = bumpTickyLbl (mkCmmDataLabel rtsPackageId lbl) + +bumpTickyCounterBy :: FastString -> Int -> FCode () +bumpTickyCounterBy lbl = bumpTickyLblBy (mkCmmDataLabel rtsPackageId lbl) -bumpTickyCounter' :: CmmLit -> FCode () --- krc: note that we're incrementing the _entry_count_ field of the ticky counter -bumpTickyCounter' lhs = do dflags <- getDynFlags - emit (addToMem (cLong dflags) (CmmLit lhs) 1) +bumpTickyCounterByE :: FastString -> CmmExpr -> FCode () +bumpTickyCounterByE lbl = bumpTickyLblByE (mkCmmDataLabel rtsPackageId lbl) + +bumpTickyEntryCount :: CLabel -> FCode () +bumpTickyEntryCount lbl = do + dflags <- getDynFlags + bumpTickyLit (cmmLabelOffB lbl (oFFSET_StgEntCounter_entry_count dflags)) + +bumpTickyAllocd :: CLabel -> Int -> FCode () +bumpTickyAllocd lbl bytes = do + dflags <- getDynFlags + bumpTickyLitBy (cmmLabelOffB lbl (oFFSET_StgEntCounter_allocd dflags)) bytes + +bumpTickyLbl :: CLabel -> FCode () +bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1 + +bumpTickyLblBy :: CLabel -> Int -> FCode () +bumpTickyLblBy lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) + +bumpTickyLblByE :: CLabel -> CmmExpr -> FCode () +bumpTickyLblByE lhs = bumpTickyLitByE (cmmLabelOffB lhs 0) + +bumpTickyLit :: CmmLit -> FCode () +bumpTickyLit lhs = bumpTickyLitBy lhs 1 + +bumpTickyLitBy :: CmmLit -> Int -> FCode () +bumpTickyLitBy lhs n = do + dflags <- getDynFlags + -- All the ticky-ticky counters are declared "unsigned long" in C + emit (addToMem (cLong dflags) (CmmLit lhs) n) + +bumpTickyLitByE :: CmmLit -> CmmExpr -> FCode () +bumpTickyLitByE lhs e = do + dflags <- getDynFlags + -- All the ticky-ticky counters are declared "unsigned long" in C + emit (addToMemE (cLong dflags) (CmmLit lhs) e) bumpHistogram :: FastString -> Int -> FCode () bumpHistogram _lbl _n -- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongWidth)) - = return () -- TEMP SPJ Apr 07 + = return () -- TEMP SPJ Apr 07 {- bumpHistogramE :: LitString -> CmmExpr -> FCode () -bumpHistogramE lbl n +bumpHistogramE lbl n = do t <- newTemp cLong emitAssign (CmmLocal t) n - emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) - (mkAssign (CmmLocal t) eight)) - emit (addToMem cLong - (cmmIndexExpr cLongWidth - (CmmLit (CmmLabel (mkRtsDataLabel lbl))) - (CmmReg (CmmLocal t))) - 1) - where + emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) + (mkAssign (CmmLocal t) eight)) + emit (addToMem cLong + (cmmIndexExpr cLongWidth + (CmmLit (CmmLabel (mkRtsDataLabel lbl))) + (CmmReg (CmmLocal t))) + 1) + where eight = CmmLit (CmmInt 8 cLongWidth) -} @@ -369,47 +574,53 @@ bumpHistogramE lbl n -- Showing the "type category" for ticky-ticky profiling showTypeCategory :: Type -> Char - {- {C,I,F,D} char, int, float, double - T tuple - S other single-constructor type - {c,i,f,d} unboxed ditto - t *unpacked* tuple - s *unpacked" single-cons... - - v void# - a primitive array - - E enumeration type - + dictionary, unless it's a ... - L List - > function - M other (multi-constructor) data-con type - . other type - - reserved for others to mark as "uninteresting" + {- + + dictionary + + > function + + {C,I,F,D,W} char, int, float, double, word + {c,i,f,d,w} unboxed ditto + + T tuple + + P other primitive type + p unboxed ditto + + L list + E enumeration type + S other single-constructor type + M other multi-constructor data-con type + + . other type + + - reserved for others to mark as "uninteresting" + + Accurate as of Mar 2013, but I eliminated the Array category instead + of updating it, for simplicity. It's in P/p, I think --NSF + -} showTypeCategory ty - = if isDictTy ty - then '+' - else - case tcSplitTyConApp_maybe ty of - Nothing -> if isJust (tcSplitFunTy_maybe ty) - then '>' - else '.' - - Just (tycon, _) -> - let utc = getUnique tycon in - if utc == charDataConKey then 'C' - else if utc == intDataConKey then 'I' - else if utc == floatDataConKey then 'F' - else if utc == doubleDataConKey then 'D' - else if utc == charPrimTyConKey then 'c' - else if (utc == intPrimTyConKey || utc == wordPrimTyConKey - || utc == addrPrimTyConKey) then 'i' - else if utc == floatPrimTyConKey then 'f' - else if utc == doublePrimTyConKey then 'd' - else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus - else if isEnumerationTyCon tycon then 'E' - else if isTupleTyCon tycon then 'T' - else if isJust (tyConSingleDataCon_maybe tycon) then 'S' - else if utc == listTyConKey then 'L' - else 'M' -- oh, well... + | isDictTy ty = '+' + | otherwise = case tcSplitTyConApp_maybe ty of + Nothing -> '.' + Just (tycon, _) -> + (if isUnLiftedTyCon tycon then Data.Char.toLower else \x -> x) $ + let anyOf us = getUnique tycon `elem` us in + case () of + _ | anyOf [funTyConKey] -> '>' + | anyOf [charPrimTyConKey, charTyConKey] -> 'C' + | anyOf [doublePrimTyConKey, doubleTyConKey] -> 'D' + | anyOf [floatPrimTyConKey, floatTyConKey] -> 'F' + | anyOf [intPrimTyConKey, int32PrimTyConKey, int64PrimTyConKey, + intTyConKey, int8TyConKey, int16TyConKey, int32TyConKey, int64TyConKey + ] -> 'I' + | anyOf [wordPrimTyConKey, word32PrimTyConKey, word64PrimTyConKey, wordTyConKey, + word8TyConKey, word16TyConKey, word32TyConKey, word64TyConKey + ] -> 'W' + | anyOf [listTyConKey] -> 'L' + | isTupleTyCon tycon -> 'T' + | isPrimTyCon tycon -> 'P' + | isEnumerationTyCon tycon -> 'E' + | isJust (tyConSingleDataCon_maybe tycon) -> 'S' + | otherwise -> 'M' -- oh, well... diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index af5e9577b6..3df75ceaa2 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -33,7 +33,7 @@ module StgCmmUtils ( cmmUntag, cmmIsTagged, - addToMem, addToMemE, addToMemLbl, + addToMem, addToMemE, addToMemLblE, addToMemLbl, mkWordCLit, newStringCLit, newByteStringCLit, blankWord @@ -118,6 +118,9 @@ mkSimpleLit _ other = pprPanic "mkSimpleLit" (ppr other) addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n +addToMemLblE :: CmmType -> CLabel -> CmmExpr -> CmmAGraph +addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl)) + addToMem :: CmmType -- rep of the counter -> CmmExpr -- Address -> Int -- What to add (a word) diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index a69e25eb7a..cf105a0405 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -209,6 +209,7 @@ Library StgCmmGran StgCmmHeap StgCmmHpc + StgCmmArgRep StgCmmLayout StgCmmMonad StgCmmPrim diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 2f76c359df..ba860622ed 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -341,6 +341,9 @@ data GeneralFlag | Opt_PIC | Opt_SccProfilingOn | Opt_Ticky + | Opt_Ticky_Allocd + | Opt_Ticky_LNE + | Opt_Ticky_Dyn_Thunk | Opt_Static | Opt_RPath | Opt_RelativeDynlibPaths @@ -2086,6 +2089,9 @@ dynamic_flags = [ , Flag "hpcdir" (SepArg setOptHpcDir) , Flag "ghci-script" (hasArg addGhciScript) , Flag "interactive-print" (hasArg setInteractivePrint) + , Flag "ticky-allocd" (NoArg (setGeneralFlag Opt_Ticky_Allocd)) + , Flag "ticky-LNE" (NoArg (setGeneralFlag Opt_Ticky_LNE)) + , Flag "ticky-dyn-thunk" (NoArg (setGeneralFlag Opt_Ticky_Dyn_Thunk)) ------- recompilation checker -------------------------------------- , Flag "recomp" (NoArg (do unSetGeneralFlag Opt_ForceRecomp deprecate "Use -fno-force-recomp instead")) diff --git a/docs/users_guide/profiling.xml b/docs/users_guide/profiling.xml index 3937642a06..a776382645 100644 --- a/docs/users_guide/profiling.xml +++ b/docs/users_guide/profiling.xml @@ -1769,164 +1769,12 @@ Options: <title>Using “ticky-ticky” profiling (for implementors)</title> <indexterm><primary>ticky-ticky profiling</primary></indexterm> - <para>(ToDo: document properly.)</para> - - <para>It is possible to compile Haskell programs so that - they will count lots and lots of interesting things, e.g., number - of updates, number of data constructors entered, etc., etc. We - call this “ticky-ticky” - profiling,<indexterm><primary>ticky-ticky - profiling</primary></indexterm> <indexterm><primary>profiling, - ticky-ticky</primary></indexterm> because that's the sound a CPU - makes when it is running up all those counters - (<emphasis>slowly</emphasis>).</para> - - <para>Ticky-ticky profiling is mainly intended for implementors; - it is quite separate from the main “cost-centre” - profiling system, intended for all users everywhere.</para> - - <para> - You don't need to build GHC, the libraries, or the RTS a special - way in order to use ticky-ticky profiling. You can decide on a - module-by-module basis which parts of a program have the - counters compiled in, using the - compile-time <option>-ticky</option> option. Those modules that - were not compiled with <option>-ticky</option> won't contribute - to the ticky-ticky profiling results, and that will normally - include all the pre-compiled packages that your program links - with. - </para> - - <para> - To get your compiled program to spit out the ticky-ticky - numbers: - - <itemizedlist> - <listitem> - <para> - Link the program with <option>-debug</option> - (<option>-ticky</option> is a synonym - for <option>-debug</option> at link-time). This links in - the debug version of the RTS, which includes the code for - aggregating and reporting the results of ticky-ticky - profiling. - </para> - </listitem> - <listitem> - <para> - Run the program with the <option>-r</option> RTS - option<indexterm><primary>-r RTS option</primary></indexterm>. - See <xref linkend="runtime-control"/>. - </para> - </listitem> - </itemizedlist> - </para> - - <para> - Here is a sample ticky-ticky statistics file, generated by - the invocation - <command>foo +RTS -rfoo.ticky</command>. - </para> - -<screen> - foo +RTS -rfoo.ticky - -ALLOCATIONS: 3964631 (11330900 words total: 3999476 admin, 6098829 goods, 1232595 slop) - total words: 2 3 4 5 6+ - 69647 ( 1.8%) function values 50.0 50.0 0.0 0.0 0.0 -2382937 ( 60.1%) thunks 0.0 83.9 16.1 0.0 0.0 -1477218 ( 37.3%) data values 66.8 33.2 0.0 0.0 0.0 - 0 ( 0.0%) big tuples - 2 ( 0.0%) black holes 0.0 100.0 0.0 0.0 0.0 - 0 ( 0.0%) prim things - 34825 ( 0.9%) partial applications 0.0 0.0 0.0 100.0 0.0 - 2 ( 0.0%) thread state objects 0.0 0.0 0.0 0.0 100.0 - -Total storage-manager allocations: 3647137 (11882004 words) - [551104 words lost to speculative heap-checks] - -STACK USAGE: - -ENTERS: 9400092 of which 2005772 (21.3%) direct to the entry code - [the rest indirected via Node's info ptr] -1860318 ( 19.8%) thunks -3733184 ( 39.7%) data values -3149544 ( 33.5%) function values - [of which 1999880 (63.5%) bypassed arg-satisfaction chk] - 348140 ( 3.7%) partial applications - 308906 ( 3.3%) normal indirections - 0 ( 0.0%) permanent indirections - -RETURNS: 5870443 -2137257 ( 36.4%) from entering a new constructor - [the rest from entering an existing constructor] -2349219 ( 40.0%) vectored [the rest unvectored] - -RET_NEW: 2137257: 32.5% 46.2% 21.3% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% -RET_OLD: 3733184: 2.8% 67.9% 29.3% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% -RET_UNBOXED_TUP: 2: 0.0% 0.0%100.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% - -RET_VEC_RETURN : 2349219: 0.0% 0.0%100.0% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% - -UPDATE FRAMES: 2241725 (0 omitted from thunks) -SEQ FRAMES: 1 -CATCH FRAMES: 1 -UPDATES: 2241725 - 0 ( 0.0%) data values - 34827 ( 1.6%) partial applications - [2 in place, 34825 allocated new space] -2206898 ( 98.4%) updates to existing heap objects (46 by squeezing) -UPD_CON_IN_NEW: 0: 0 0 0 0 0 0 0 0 0 -UPD_PAP_IN_NEW: 34825: 0 0 0 34825 0 0 0 0 0 - -NEW GEN UPDATES: 2274700 ( 99.9%) - -OLD GEN UPDATES: 1852 ( 0.1%) - -Total bytes copied during GC: 190096 - -************************************************** -3647137 ALLOC_HEAP_ctr -11882004 ALLOC_HEAP_tot - 69647 ALLOC_FUN_ctr - 69647 ALLOC_FUN_adm - 69644 ALLOC_FUN_gds - 34819 ALLOC_FUN_slp - 34831 ALLOC_FUN_hst_0 - 34816 ALLOC_FUN_hst_1 - 0 ALLOC_FUN_hst_2 - 0 ALLOC_FUN_hst_3 - 0 ALLOC_FUN_hst_4 -2382937 ALLOC_UP_THK_ctr - 0 ALLOC_SE_THK_ctr - 308906 ENT_IND_ctr - 0 E!NT_PERM_IND_ctr requires +RTS -Z -[... lots more info omitted ...] - 0 GC_SEL_ABANDONED_ctr - 0 GC_SEL_MINOR_ctr - 0 GC_SEL_MAJOR_ctr - 0 GC_FAILED_PROMOTION_ctr - 47524 GC_WORDS_COPIED_ctr -</screen> - - <para>The formatting of the information above the row of asterisks - is subject to change, but hopefully provides a useful - human-readable summary. Below the asterisks <emphasis>all - counters</emphasis> maintained by the ticky-ticky system are - dumped, in a format intended to be machine-readable: zero or more - spaces, an integer, a space, the counter name, and a newline.</para> - - <para>In fact, not <emphasis>all</emphasis> counters are - necessarily dumped; compile- or run-time flags can render certain - counters invalid. In this case, either the counter will simply - not appear, or it will appear with a modified counter name, - possibly along with an explanation for the omission (notice - <literal>ENT_PERM_IND_ctr</literal> appears - with an inserted <literal>!</literal> above). Software analysing - this output should always check that it has the counters it - expects. Also, beware: some of the counters can have - <emphasis>large</emphasis> values!</para> - + <para>Because ticky-ticky profiling requires a certain familiarity + with GHC internals, we have moved the documentation to the + wiki. Take a look at its <ulink + url="http://hackage.haskell.org/trac/ghc/wiki/Commentary/Profiling">overview + of the profiling options</ulink>, which includeds a link to the + ticky-ticky profiling page.</para> </sect1> </chapter> diff --git a/includes/Cmm.h b/includes/Cmm.h index ca8e51af78..7e051c1a30 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -394,16 +394,18 @@ /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */ #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS) -#define HP_CHK_GEN_TICKY(alloc) \ - HP_CHK_GEN(alloc); \ - TICK_ALLOC_HEAP_NOCTR(alloc); +#define HP_CHK_GEN_TICKY(bytes) \ + HP_CHK_GEN(bytes); \ + TICK_ALLOC_HEAP_NOCTR(bytes); #define HP_CHK_P(bytes, fun, arg) \ HEAP_CHECK(bytes, GC_PRIM_P(fun,arg)) -#define ALLOC_P_TICKY(alloc, fun, arg) \ - HP_CHK_P(alloc); \ - TICK_ALLOC_HEAP_NOCTR(alloc); +// TODO I'm not seeing where ALLOC_P_TICKY is used; can it be removed? +// -NSF March 2013 +#define ALLOC_P_TICKY(bytes, fun, arg) \ + HP_CHK_P(bytes); \ + TICK_ALLOC_HEAP_NOCTR(bytes); #define CHECK_GC() \ (bdescr_link(CurrentNursery) == NULL || \ @@ -610,6 +612,7 @@ #define TICK_ENT_AP() TICK_BUMP(ENT_AP_ctr) #define TICK_ENT_AP_STACK() TICK_BUMP(ENT_AP_STACK_ctr) #define TICK_ENT_BH() TICK_BUMP(ENT_BH_ctr) +#define TICK_ENT_LNE() TICK_BUMP(ENT_LNE_ctr) #define TICK_UNKNOWN_CALL() TICK_BUMP(UNKNOWN_CALL_ctr) #define TICK_UPDF_PUSHED() TICK_BUMP(UPDF_PUSHED_ctr) #define TICK_CATCHF_PUSHED() TICK_BUMP(CATCHF_PUSHED_ctr) @@ -626,14 +629,18 @@ #define TICK_SLOW_CALL_PAP_CORRECT() TICK_BUMP(SLOW_CALL_PAP_CORRECT_ctr) #define TICK_SLOW_CALL_PAP_TOO_MANY() TICK_BUMP(SLOW_CALL_PAP_TOO_MANY_ctr) -#define TICK_SLOW_CALL_v() TICK_BUMP(SLOW_CALL_v_ctr) -#define TICK_SLOW_CALL_p() TICK_BUMP(SLOW_CALL_p_ctr) -#define TICK_SLOW_CALL_pv() TICK_BUMP(SLOW_CALL_pv_ctr) -#define TICK_SLOW_CALL_pp() TICK_BUMP(SLOW_CALL_pp_ctr) -#define TICK_SLOW_CALL_ppp() TICK_BUMP(SLOW_CALL_ppp_ctr) -#define TICK_SLOW_CALL_pppp() TICK_BUMP(SLOW_CALL_pppp_ctr) -#define TICK_SLOW_CALL_ppppp() TICK_BUMP(SLOW_CALL_ppppp_ctr) -#define TICK_SLOW_CALL_pppppp() TICK_BUMP(SLOW_CALL_pppppp_ctr) +#define TICK_SLOW_CALL_fast_v16() TICK_BUMP(SLOW_CALL_fast_v16_ctr) +#define TICK_SLOW_CALL_fast_v() TICK_BUMP(SLOW_CALL_fast_v_ctr) +#define TICK_SLOW_CALL_fast_p() TICK_BUMP(SLOW_CALL_fast_p_ctr) +#define TICK_SLOW_CALL_fast_pv() TICK_BUMP(SLOW_CALL_fast_pv_ctr) +#define TICK_SLOW_CALL_fast_pp() TICK_BUMP(SLOW_CALL_fast_pp_ctr) +#define TICK_SLOW_CALL_fast_ppv() TICK_BUMP(SLOW_CALL_fast_ppv_ctr) +#define TICK_SLOW_CALL_fast_ppp() TICK_BUMP(SLOW_CALL_fast_ppp_ctr) +#define TICK_SLOW_CALL_fast_pppv() TICK_BUMP(SLOW_CALL_fast_pppv_ctr) +#define TICK_SLOW_CALL_fast_pppp() TICK_BUMP(SLOW_CALL_fast_pppp_ctr) +#define TICK_SLOW_CALL_fast_ppppp() TICK_BUMP(SLOW_CALL_fast_ppppp_ctr) +#define TICK_SLOW_CALL_fast_pppppp() TICK_BUMP(SLOW_CALL_fast_pppppp_ctr) +#define TICK_VERY_SLOW_CALL() TICK_BUMP(VERY_SLOW_CALL_ctr) /* NOTE: TICK_HISTO_BY and TICK_HISTO currently have no effect. @@ -672,9 +679,9 @@ TICK_BUMP(UPD_CON_IN_NEW_ctr); \ TICK_HISTO(UPD_CON_IN_NEW,n) -#define TICK_ALLOC_HEAP_NOCTR(n) \ - TICK_BUMP(ALLOC_HEAP_ctr); \ - TICK_BUMP_BY(ALLOC_HEAP_tot,n) +#define TICK_ALLOC_HEAP_NOCTR(bytes) \ + TICK_BUMP(ALLOC_RTS_ctr); \ + TICK_BUMP_BY(ALLOC_RTS_tot,bytes) /* ----------------------------------------------------------------------------- Saving and restoring STG registers diff --git a/includes/rts/Ticky.h b/includes/rts/Ticky.h index 7045854cde..304baf7592 100644 --- a/includes/rts/Ticky.h +++ b/includes/rts/Ticky.h @@ -23,7 +23,7 @@ typedef struct _StgEntCounter { generators make trouble if you try to pack things tighter */ StgWord registeredp; /* 0 == no, 1 == yes */ StgInt arity; /* arity (static info) */ - StgInt stk_args; /* # of args off stack */ + StgInt allocd; /* # allocation of this closure */ /* (rest of args are in registers) */ char *str; /* name of the thing */ char *arg_kinds; /* info about the args types */ diff --git a/includes/stg/Ticky.h b/includes/stg/Ticky.h index a811aec4eb..32a7f2006d 100644 --- a/includes/stg/Ticky.h +++ b/includes/stg/Ticky.h @@ -59,24 +59,26 @@ EXTERN StgInt ENT_PAP_ctr INIT(0); EXTERN StgInt ENT_AP_ctr INIT(0); EXTERN StgInt ENT_AP_STACK_ctr INIT(0); EXTERN StgInt ENT_BH_ctr INIT(0); +EXTERN StgInt ENT_LNE_ctr INIT(0); EXTERN StgInt UNKNOWN_CALL_ctr INIT(0); -EXTERN StgInt SLOW_CALL_v_ctr INIT(0); -EXTERN StgInt SLOW_CALL_f_ctr INIT(0); -EXTERN StgInt SLOW_CALL_d_ctr INIT(0); -EXTERN StgInt SLOW_CALL_l_ctr INIT(0); -EXTERN StgInt SLOW_CALL_n_ctr INIT(0); -EXTERN StgInt SLOW_CALL_p_ctr INIT(0); -EXTERN StgInt SLOW_CALL_pv_ctr INIT(0); -EXTERN StgInt SLOW_CALL_pp_ctr INIT(0); -EXTERN StgInt SLOW_CALL_ppv_ctr INIT(0); -EXTERN StgInt SLOW_CALL_ppp_ctr INIT(0); -EXTERN StgInt SLOW_CALL_pppv_ctr INIT(0); -EXTERN StgInt SLOW_CALL_pppp_ctr INIT(0); -EXTERN StgInt SLOW_CALL_ppppp_ctr INIT(0); -EXTERN StgInt SLOW_CALL_pppppp_ctr INIT(0); -EXTERN StgInt SLOW_CALL_OTHER_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_v16_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_v_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_f_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_d_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_l_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_n_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_p_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_ppv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_ppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pppv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_ppppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pppppp_ctr INIT(0); +EXTERN StgInt VERY_SLOW_CALL_ctr INIT(0); EXTERN StgInt ticky_slow_call_unevald; EXTERN StgInt SLOW_CALL_ctr INIT(0); @@ -107,12 +109,15 @@ EXTERN StgInt UPD_PAP_IN_NEW_ctr INIT(0); EXTERN StgInt UPD_PAP_IN_PLACE_ctr INIT(0); EXTERN StgInt ALLOC_HEAP_ctr INIT(0); -EXTERN StgInt ALLOC_HEAP_tot; +EXTERN StgInt ALLOC_HEAP_tot INIT(0); + +EXTERN StgInt ALLOC_RTS_ctr INIT(0); +EXTERN StgInt ALLOC_RTS_tot INIT(0); EXTERN StgInt ALLOC_FUN_ctr INIT(0); -EXTERN StgInt ALLOC_FUN_adm; -EXTERN StgInt ALLOC_FUN_gds; -EXTERN StgInt ALLOC_FUN_slp; +EXTERN StgInt ALLOC_FUN_adm INIT(0); +EXTERN StgInt ALLOC_FUN_gds INIT(0); +EXTERN StgInt ALLOC_FUN_slp INIT(0); EXTERN StgInt UPD_NEW_IND_ctr INIT(0); EXTERN StgInt UPD_NEW_PERM_IND_ctr INIT(0); @@ -184,11 +189,14 @@ EXTERN StgInt RET_SEMI_loads_avoided INIT(0); TICKY_TICKY is defined or not. */ #ifndef CMINUSMINUS +#define TICK_BUMP_BY(ctr,n) ctr = (StgInt) ctr + n +#define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1) + #define TICK_ALLOC_PRIM(x,y,z) #define TICK_UPD_OLD_IND() #define TICK_UPD_NEW_IND() #define TICK_UPD_SQUEEZED() -#define TICK_ALLOC_HEAP_NOCTR(x) +#define TICK_ALLOC_HEAP_NOCTR(bytes) #define TICK_GC_FAILED_PROMOTION() #define TICK_ALLOC_TSO() #define TICK_ALLOC_STACK(g) diff --git a/mk/ways.mk b/mk/ways.mk index 5608dbd9ef..cbe035b4fb 100644 --- a/mk/ways.mk +++ b/mk/ways.mk @@ -70,7 +70,7 @@ WAY_thr_l_HC_OPTS= -static -optc-DTHREADED_RTS -eventlog # Way 'debug': WAY_debug_NAME=debug -WAY_debug_HC_OPTS= -static -optc-DDEBUG +WAY_debug_HC_OPTS= -static -optc-DDEBUG -ticky -DTICKY_TICKY # Way 'debug_p': WAY_debug_p_NAME=debug profiled @@ -104,7 +104,7 @@ WAY_thr_debug_dyn_HC_OPTS=-fPIC -dynamic -optc-DTHREADED_RTS -optc-DDEBUG # Way 'debug_dyn': WAY_debug_dyn_NAME=debug_dyn -WAY_debug_dyn_HC_OPTS=-fPIC -dynamic -optc-DDEBUG +WAY_debug_dyn_HC_OPTS=-fPIC -dynamic -optc-DDEBUG -ticky -DTICKY_TICKY # Way 'l_dyn': WAY_l_dyn_NAME=event logging dynamic diff --git a/rts/AutoApply.h b/rts/AutoApply.h index c5dbbcd344..c48bdf4701 100644 --- a/rts/AutoApply.h +++ b/rts/AutoApply.h @@ -19,8 +19,7 @@ W_ i; \ size = SIZEOF_StgPAP + WDS(n); \ HP_CHK_NP_ASSIGN_SP0(size,f); \ - TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \ - TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \ + TICK_ALLOC_PAP(size, 0); \ pap = Hp + WDS(1) - size; \ SET_HDR(pap, stg_PAP_info, CCCS); \ StgPAP_arity(pap) = HALF_W_(arity - m); \ @@ -49,8 +48,7 @@ pap = R1; \ size = SIZEOF_StgPAP + WDS(TO_W_(StgPAP_n_args(pap))) + WDS(n); \ HP_CHK_NP_ASSIGN_SP0(size,f); \ - TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \ - TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \ + TICK_ALLOC_PAP(size, 0); \ new_pap = Hp + WDS(1) - size; \ SET_HDR(new_pap, stg_PAP_info, CCCS); \ StgPAP_arity(new_pap) = HALF_W_(arity - m); \ diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 5b656fab5c..25da0d6d80 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -150,7 +150,7 @@ stg_maskAsyncExceptionszh /* explicit stack */ TO_W_(StgTSO_flags(CurrentTSO)) | TSO_BLOCKEX | TSO_INTERRUPTIBLE); TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_v(); + TICK_SLOW_CALL_fast_v(); jump stg_ap_v_fast [R1]; } @@ -178,7 +178,7 @@ stg_maskUninterruptiblezh /* explicit stack */ (TO_W_(StgTSO_flags(CurrentTSO)) | TSO_BLOCKEX) & ~TSO_INTERRUPTIBLE); TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_v(); + TICK_SLOW_CALL_fast_v(); jump stg_ap_v_fast [R1]; } @@ -253,7 +253,7 @@ stg_unmaskAsyncExceptionszh /* explicit stack */ } TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_v(); + TICK_SLOW_CALL_fast_v(); R1 = io; jump stg_ap_v_fast [R1]; } @@ -394,7 +394,7 @@ stg_catchzh ( P_ io, /* :: IO a */ /* Apply R1 to the realworld token */ TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_v(); + TICK_SLOW_CALL_fast_v(); jump stg_ap_v_fast (CATCH_FRAME_FIELDS(,,stg_catch_frame_info, CCCS, 0, @@ -612,7 +612,7 @@ retry_pop_stack: R1 = handler; Sp_adj(-1); TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_pv(); + TICK_SLOW_CALL_fast_pv(); jump RET_LBL(stg_ap_pv) [R1]; } diff --git a/rts/Linker.c b/rts/Linker.c index db27c3f300..3f66313b76 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -914,22 +914,24 @@ typedef struct _RtsSymbolVal { SymI_HasProto(ENT_AP_ctr) \ SymI_HasProto(ENT_AP_STACK_ctr) \ SymI_HasProto(ENT_BH_ctr) \ + SymI_HasProto(ENT_LNE_ctr) \ SymI_HasProto(UNKNOWN_CALL_ctr) \ - SymI_HasProto(SLOW_CALL_v_ctr) \ - SymI_HasProto(SLOW_CALL_f_ctr) \ - SymI_HasProto(SLOW_CALL_d_ctr) \ - SymI_HasProto(SLOW_CALL_l_ctr) \ - SymI_HasProto(SLOW_CALL_n_ctr) \ - SymI_HasProto(SLOW_CALL_p_ctr) \ - SymI_HasProto(SLOW_CALL_pv_ctr) \ - SymI_HasProto(SLOW_CALL_pp_ctr) \ - SymI_HasProto(SLOW_CALL_ppv_ctr) \ - SymI_HasProto(SLOW_CALL_ppp_ctr) \ - SymI_HasProto(SLOW_CALL_pppv_ctr) \ - SymI_HasProto(SLOW_CALL_pppp_ctr) \ - SymI_HasProto(SLOW_CALL_ppppp_ctr) \ - SymI_HasProto(SLOW_CALL_pppppp_ctr) \ - SymI_HasProto(SLOW_CALL_OTHER_ctr) \ + SymI_HasProto(SLOW_CALL_fast_v16_ctr) \ + SymI_HasProto(SLOW_CALL_fast_v_ctr) \ + SymI_HasProto(SLOW_CALL_fast_f_ctr) \ + SymI_HasProto(SLOW_CALL_fast_d_ctr) \ + SymI_HasProto(SLOW_CALL_fast_l_ctr) \ + SymI_HasProto(SLOW_CALL_fast_n_ctr) \ + SymI_HasProto(SLOW_CALL_fast_p_ctr) \ + SymI_HasProto(SLOW_CALL_fast_pv_ctr) \ + SymI_HasProto(SLOW_CALL_fast_pp_ctr) \ + SymI_HasProto(SLOW_CALL_fast_ppv_ctr) \ + SymI_HasProto(SLOW_CALL_fast_ppp_ctr) \ + SymI_HasProto(SLOW_CALL_fast_pppv_ctr) \ + SymI_HasProto(SLOW_CALL_fast_pppp_ctr) \ + SymI_HasProto(SLOW_CALL_fast_ppppp_ctr) \ + SymI_HasProto(SLOW_CALL_fast_pppppp_ctr) \ + SymI_HasProto(VERY_SLOW_CALL_ctr) \ SymI_HasProto(ticky_slow_call_unevald) \ SymI_HasProto(SLOW_CALL_ctr) \ SymI_HasProto(MULTI_CHUNK_SLOW_CALL_ctr) \ @@ -956,6 +958,8 @@ typedef struct _RtsSymbolVal { SymI_HasProto(UPD_PAP_IN_PLACE_ctr) \ SymI_HasProto(ALLOC_HEAP_ctr) \ SymI_HasProto(ALLOC_HEAP_tot) \ + SymI_HasProto(ALLOC_RTS_ctr) \ + SymI_HasProto(ALLOC_RTS_tot) \ SymI_HasProto(ALLOC_FUN_ctr) \ SymI_HasProto(ALLOC_FUN_adm) \ SymI_HasProto(ALLOC_FUN_gds) \ diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index f5669cb8ec..11f518a87d 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -829,7 +829,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, SET_HDR(ap,&stg_AP_STACK_info, ((StgClosure *)frame)->header.prof.ccs /* ToDo */); - TICK_ALLOC_UP_THK(words+1,0); + TICK_ALLOC_UP_THK(WDS(words+1),0); //IF_DEBUG(scheduler, // debugBelch("sched: Updating "); @@ -880,7 +880,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, SET_HDR(ap,&stg_AP_STACK_NOUPD_info, ((StgClosure *)frame)->header.prof.ccs /* ToDo */); - TICK_ALLOC_SE_THK(words+1,0); + TICK_ALLOC_SE_THK(WDS(words+1),0); stack->sp = sp; threadStackUnderflow(cap,tso); @@ -916,7 +916,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, // handler in this frame. // raise = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1); - TICK_ALLOC_SE_THK(1,0); + TICK_ALLOC_SE_THK(WDS(1),0); SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs); raise->payload[0] = exception; diff --git a/rts/Ticky.c b/rts/Ticky.c index af33805d69..243897fbe4 100644 --- a/rts/Ticky.c +++ b/rts/Ticky.c @@ -142,14 +142,14 @@ PrintTickyInfo(void) (PC(INTAVG(ALLOC_##categ##_hst[3], ALLOC_##categ##_ctr))), \ (PC(INTAVG(ALLOC_##categ##_hst[4], ALLOC_##categ##_ctr))) - fprintf(tf,"%7ld (%5.1f%%) function values", + fprintf(tf,"%11ld (%5.1f%%) function values", ALLOC_FUN_ctr, PC(INTAVG(ALLOC_FUN_ctr, tot_allocs))); if (ALLOC_FUN_ctr != 0) fprintf(tf,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FUN)); - fprintf(tf,"\n%7ld (%5.1f%%) thunks", + fprintf(tf,"\n%11ld (%5.1f%%) thunks", ALLOC_SE_THK_ctr + ALLOC_UP_THK_ctr, PC(INTAVG(ALLOC_SE_THK_ctr + ALLOC_UP_THK_ctr, tot_allocs))); @@ -159,37 +159,37 @@ PrintTickyInfo(void) fprintf(tf,"\t\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(THK)); #undef ALLOC_THK_ctr - fprintf(tf,"\n%7ld (%5.1f%%) data values", + fprintf(tf,"\n%11ld (%5.1f%%) data values", ALLOC_CON_ctr, PC(INTAVG(ALLOC_CON_ctr, tot_allocs))); if (ALLOC_CON_ctr != 0) fprintf(tf,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(CON)); - fprintf(tf,"\n%7ld (%5.1f%%) big tuples", + fprintf(tf,"\n%11ld (%5.1f%%) big tuples", ALLOC_TUP_ctr, PC(INTAVG(ALLOC_TUP_ctr, tot_allocs))); if (ALLOC_TUP_ctr != 0) fprintf(tf,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(TUP)); - fprintf(tf,"\n%7ld (%5.1f%%) black holes", + fprintf(tf,"\n%11ld (%5.1f%%) black holes", ALLOC_BH_ctr, PC(INTAVG(ALLOC_BH_ctr, tot_allocs))); if (ALLOC_BH_ctr != 0) fprintf(tf,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(BH)); - fprintf(tf,"\n%7ld (%5.1f%%) prim things", + fprintf(tf,"\n%11ld (%5.1f%%) prim things", ALLOC_PRIM_ctr, PC(INTAVG(ALLOC_PRIM_ctr, tot_allocs))); if (ALLOC_PRIM_ctr != 0) fprintf(tf,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(PRIM)); - fprintf(tf,"\n%7ld (%5.1f%%) partial applications", + fprintf(tf,"\n%11ld (%5.1f%%) partial applications", ALLOC_PAP_ctr, PC(INTAVG(ALLOC_PAP_ctr, tot_allocs))); if (ALLOC_PAP_ctr != 0) fprintf(tf,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(PAP)); - fprintf(tf,"\n%7ld (%5.1f%%) thread state objects", + fprintf(tf,"\n%11ld (%5.1f%%) thread state objects", ALLOC_TSO_ctr, PC(INTAVG(ALLOC_TSO_ctr, tot_allocs))); if (ALLOC_TSO_ctr != 0) @@ -208,16 +208,16 @@ PrintTickyInfo(void) tot_enters, jump_direct_enters, PC(INTAVG(jump_direct_enters,tot_enters))); - fprintf(tf,"%7ld (%5.1f%%) thunks\n", + fprintf(tf,"%11ld (%5.1f%%) thunks\n", tot_thk_enters, PC(INTAVG(tot_thk_enters,tot_enters))); - fprintf(tf,"%7ld (%5.1f%%) data values\n", + fprintf(tf,"%11ld (%5.1f%%) data values\n", tot_con_enters, PC(INTAVG(tot_con_enters,tot_enters))); - fprintf(tf,"%7ld (%5.1f%%) normal indirections\n", + fprintf(tf,"%11ld (%5.1f%%) normal indirections\n", tot_ind_enters, PC(INTAVG(tot_ind_enters,tot_enters))); - fprintf(tf,"%7" FMT_Int " (%5.1f%%) permanent indirections\n", + fprintf(tf,"%11" FMT_Int " (%5.1f%%) permanent indirections\n", ENT_PERM_IND_ctr, PC(INTAVG(ENT_PERM_IND_ctr,tot_enters))); @@ -239,22 +239,22 @@ PrintTickyInfo(void) fprintf(tf, "\n"); fprintf(tf,"\nRETURNS: %ld\n", tot_returns); - fprintf(tf,"%7ld (%5.1f%%) from entering a new constructor\n\t\t [the rest from entering an existing constructor]\n", + fprintf(tf,"%11ld (%5.1f%%) from entering a new constructor\n\t\t [the rest from entering an existing constructor]\n", tot_returns_of_new, PC(INTAVG(tot_returns_of_new,tot_returns))); /* krc: comment out some of this stuff temporarily */ /* - fprintf(tf, "\nRET_NEW: %7ld: ", RET_NEW_ctr); + fprintf(tf, "\nRET_NEW: %11ld: ", RET_NEW_ctr); for (i = 0; i < 9; i++) { fprintf(tf, "%5.1f%%", PC(INTAVG(RET_NEW_hst[i],RET_NEW_ctr))); } fprintf(tf, "\n"); - fprintf(tf, "RET_OLD: %7ld: ", RET_OLD_ctr); + fprintf(tf, "RET_OLD: %11ld: ", RET_OLD_ctr); for (i = 0; i < 9; i++) { fprintf(tf, "%5.1f%%", PC(INTAVG(RET_OLD_hst[i],RET_OLD_ctr))); } fprintf(tf, "\n"); - fprintf(tf, "RET_UNBOXED_TUP: %7ld: ", RET_UNBOXED_TUP_ctr); + fprintf(tf, "RET_UNBOXED_TUP: %11ld: ", RET_UNBOXED_TUP_ctr); for (i = 0; i < 9; i++) { fprintf(tf, "%5.1f%%", PC(INTAVG(RET_UNBOXED_TUP_hst[i], RET_UNBOXED_TUP_ctr))); } @@ -268,33 +268,33 @@ PrintTickyInfo(void) fprintf(tf,"\nCATCH FRAMES: %" FMT_Int "", CATCHF_PUSHED_ctr); if (UPDF_RCC_PUSHED_ctr != 0) - fprintf(tf,"%7" FMT_Int " restore cost centre frames (%" FMT_Int " omitted)\n", + fprintf(tf,"%11" FMT_Int " restore cost centre frames (%" FMT_Int " omitted)\n", UPDF_RCC_PUSHED_ctr, UPDF_RCC_OMITTED_ctr); fprintf(tf,"\nUPDATES: %ld\n", tot_updates); - fprintf(tf,"%7ld (%5.1f%%) data values\n\t\t [%" FMT_Int " in place, %" FMT_Int " allocated new space]\n", + fprintf(tf,"%11ld (%5.1f%%) data values\n\t\t [%" FMT_Int " in place, %" FMT_Int " allocated new space]\n", con_updates, PC(INTAVG(con_updates,tot_updates)), UPD_CON_IN_PLACE_ctr, UPD_CON_IN_NEW_ctr); - fprintf(tf,"%7ld (%5.1f%%) partial applications\n\t\t [%" FMT_Int " in place, %" FMT_Int " allocated new space]\n", + fprintf(tf,"%11ld (%5.1f%%) partial applications\n\t\t [%" FMT_Int " in place, %" FMT_Int " allocated new space]\n", pap_updates, PC(INTAVG(pap_updates,tot_updates)), UPD_PAP_IN_PLACE_ctr, UPD_PAP_IN_NEW_ctr); - fprintf(tf,"%7" FMT_Int " (%5.1f%%) updates by squeezing\n", + fprintf(tf,"%11" FMT_Int " (%5.1f%%) updates by squeezing\n", UPD_SQUEEZED_ctr, PC(INTAVG(UPD_SQUEEZED_ctr, tot_updates))); /* krc: also avoid dealing with this for now */ #if FALSE - fprintf(tf, "\nUPD_CON_IN_NEW: %7ld: ", UPD_CON_IN_NEW_ctr); - for (i = 0; i < 9; i++) { fprintf(tf, "%7ld", UPD_CON_IN_NEW_hst[i]); } + fprintf(tf, "\nUPD_CON_IN_NEW: %11ld: ", UPD_CON_IN_NEW_ctr); + for (i = 0; i < 9; i++) { fprintf(tf, "%11ld", UPD_CON_IN_NEW_hst[i]); } fprintf(tf, "\n"); - fprintf(tf, "UPD_CON_IN_PLACE: %7ld: ", UPD_CON_IN_PLACE_ctr); - for (i = 0; i < 9; i++) { fprintf(tf, "%7ld", UPD_CON_IN_PLACE_hst[i]); } + fprintf(tf, "UPD_CON_IN_PLACE: %11ld: ", UPD_CON_IN_PLACE_ctr); + for (i = 0; i < 9; i++) { fprintf(tf, "%11ld", UPD_CON_IN_PLACE_hst[i]); } fprintf(tf, "\n"); - fprintf(tf, "UPD_PAP_IN_NEW: %7ld: ", UPD_PAP_IN_NEW_ctr); - for (i = 0; i < 9; i++) { fprintf(tf, "%7ld", UPD_PAP_IN_NEW_hst[i]); } + fprintf(tf, "UPD_PAP_IN_NEW: %11ld: ", UPD_PAP_IN_NEW_ctr); + for (i = 0; i < 9; i++) { fprintf(tf, "%11ld", UPD_PAP_IN_NEW_hst[i]); } fprintf(tf, "\n"); #endif @@ -317,20 +317,39 @@ PrintTickyInfo(void) */ #define PR_CTR(ctr) \ - do { fprintf(tf,"%7" FMT_Int " " #ctr "\n", ctr); } while(0) + do { fprintf(tf,"%11" FMT_Int " " #ctr "\n", ctr); } while(0) /* COND_PR_CTR takes a boolean; if false then msg is the printname rather than ctr */ #define COND_PR_CTR(ctr,b,msg) \ - if (b) { fprintf(tf,"%7" FMT_Int " " #ctr "\n", ctr); } else { fprintf(tf,"%7" FMT_Int " " msg "\n", ctr); } + if (b) { fprintf(tf,"%11" FMT_Int " " #ctr "\n", ctr); } else { fprintf(tf,"%11" FMT_Int " " msg "\n", ctr); } #define PR_HST(hst,i) \ - do { fprintf(tf,"%7ld " #hst "_" #i "\n", hst[i]); } while(0) + do { fprintf(tf,"%11ld " #hst "_" #i "\n", hst[i]); } while(0) + + ALLOC_HEAP_ctr = (StgInt)ALLOC_HEAP_ctr + (StgInt)ALLOC_RTS_ctr; + ALLOC_HEAP_tot = (StgInt)ALLOC_HEAP_tot + (StgInt)ALLOC_RTS_tot; PR_CTR(ALLOC_HEAP_ctr); PR_CTR(ALLOC_HEAP_tot); + PR_CTR(ALLOC_RTS_ctr); + PR_CTR(ALLOC_RTS_tot); + PR_CTR(ALLOC_FUN_ctr); - PR_CTR(ALLOC_FUN_adm); PR_CTR(ALLOC_FUN_gds); - PR_CTR(ALLOC_FUN_slp); + + PR_CTR(ALLOC_PAP_ctr); + PR_CTR(ALLOC_PAP_adm); + PR_CTR(ALLOC_PAP_gds); + + PR_CTR(ALLOC_UP_THK_ctr); + PR_CTR(ALLOC_SE_THK_ctr); + PR_CTR(ALLOC_THK_gds); + + PR_CTR(ALLOC_CON_ctr); + PR_CTR(ALLOC_CON_gds); + + PR_CTR(ALLOC_PRIM_ctr); + PR_CTR(ALLOC_PRIM_gds); + PR_CTR(ALLOC_PRIM_slp); /* krc: comment out some of this stuff temporarily PR_HST(ALLOC_FUN_hst,0); @@ -384,9 +403,6 @@ PrintTickyInfo(void) PR_HST(ALLOC_PRIM_hst,2); PR_HST(ALLOC_PRIM_hst,3); PR_HST(ALLOC_PRIM_hst,4); - PR_CTR(ALLOC_PAP_ctr); - PR_CTR(ALLOC_PAP_adm); - PR_CTR(ALLOC_PAP_gds); PR_CTR(ALLOC_PAP_slp); PR_HST(ALLOC_PAP_hst,0); PR_HST(ALLOC_PAP_hst,1); @@ -410,6 +426,7 @@ PrintTickyInfo(void) PR_CTR(ENT_DYN_CON_ctr); PR_CTR(ENT_STATIC_FUN_DIRECT_ctr); PR_CTR(ENT_DYN_FUN_DIRECT_ctr); + PR_CTR(ENT_LNE_ctr); PR_CTR(ENT_STATIC_IND_ctr); PR_CTR(ENT_DYN_IND_ctr); @@ -435,21 +452,22 @@ PrintTickyInfo(void) PR_CTR(ENT_STATIC_THK_ctr); PR_CTR(ENT_DYN_THK_ctr); - PR_CTR(SLOW_CALL_v_ctr); - PR_CTR(SLOW_CALL_f_ctr); - PR_CTR(SLOW_CALL_d_ctr); - PR_CTR(SLOW_CALL_l_ctr); - PR_CTR(SLOW_CALL_n_ctr); - PR_CTR(SLOW_CALL_p_ctr); - PR_CTR(SLOW_CALL_pv_ctr); - PR_CTR(SLOW_CALL_pp_ctr); - PR_CTR(SLOW_CALL_ppv_ctr); - PR_CTR(SLOW_CALL_ppp_ctr); - PR_CTR(SLOW_CALL_pppv_ctr); - PR_CTR(SLOW_CALL_pppp_ctr); - PR_CTR(SLOW_CALL_ppppp_ctr); - PR_CTR(SLOW_CALL_pppppp_ctr); - PR_CTR(SLOW_CALL_OTHER_ctr); + PR_CTR(SLOW_CALL_fast_v16_ctr); + PR_CTR(SLOW_CALL_fast_v_ctr); + PR_CTR(SLOW_CALL_fast_f_ctr); + PR_CTR(SLOW_CALL_fast_d_ctr); + PR_CTR(SLOW_CALL_fast_l_ctr); + PR_CTR(SLOW_CALL_fast_n_ctr); + PR_CTR(SLOW_CALL_fast_p_ctr); + PR_CTR(SLOW_CALL_fast_pv_ctr); + PR_CTR(SLOW_CALL_fast_pp_ctr); + PR_CTR(SLOW_CALL_fast_ppv_ctr); + PR_CTR(SLOW_CALL_fast_ppp_ctr); + PR_CTR(SLOW_CALL_fast_pppv_ctr); + PR_CTR(SLOW_CALL_fast_pppp_ctr); + PR_CTR(SLOW_CALL_fast_ppppp_ctr); + PR_CTR(SLOW_CALL_fast_pppppp_ctr); + PR_CTR(VERY_SLOW_CALL_ctr); PR_CTR(UNKNOWN_CALL_ctr); PR_CTR(KNOWN_CALL_ctr); @@ -574,19 +592,20 @@ printRegisteredCounterInfo (FILE *tf) StgEntCounter *p; if ( ticky_entry_ctrs != NULL ) { + fprintf(tf,"\nThe following table is explained by http://hackage.haskell.org/trac/ghc/wiki/Debugging/TickyTicky\nAll allocation numbers are in bytes.\n"); fprintf(tf,"\n**************************************************\n\n"); } - fprintf(tf, "%11s%11s %6s%6s %-11s%-30s\n", - "Entries", "Allocs", "Arity", "Stack", "Kinds", "Function"); + fprintf(tf, "%11s%11s%11s %-23s %s\n", + "Entries", "Alloc", "Alloc'd", "Non-void Arguments", "STG Name"); fprintf(tf, "--------------------------------------------------------------------------------\n"); /* Function name at the end so it doesn't mess up the tabulation */ for (p = ticky_entry_ctrs; p != NULL; p = p->link) { - fprintf(tf, "%11" FMT_Int "%11" FMT_Int " %6lu%6lu %-11s%-30s", + fprintf(tf, "%11" FMT_Int "%11" FMT_Int "%11" FMT_Int " %3lu %-20.20s %s", p->entry_count, p->allocs, + p->allocd, (unsigned long)p->arity, - (unsigned long)p->stk_args, p->arg_kinds, p->str); diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index f14b3b0c9d..5c4e54f98d 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -638,7 +638,7 @@ allocate (Capability *cap, W_ n) bdescr *bd; StgPtr p; - TICK_ALLOC_HEAP_NOCTR(n); + TICK_ALLOC_HEAP_NOCTR(WDS(n)); CCS_ALLOC(cap->r.rCCCS,n); if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { @@ -753,7 +753,7 @@ allocatePinned (Capability *cap, W_ n) return p; } - TICK_ALLOC_HEAP_NOCTR(n); + TICK_ALLOC_HEAP_NOCTR(WDS(n)); CCS_ALLOC(cap->r.rCCCS,n); bd = cap->pinned_object_block; diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs index 77daf5c790..78233a5b98 100644 --- a/utils/deriveConstants/DeriveConstants.hs +++ b/utils/deriveConstants/DeriveConstants.hs @@ -365,6 +365,7 @@ wanteds = concat ,closurePayload C "StgClosure" "payload" ,structFieldH Both "StgEntCounter" "allocs" + ,structFieldH Both "StgEntCounter" "allocd" ,structField Both "StgEntCounter" "registeredp" ,structField Both "StgEntCounter" "link" ,structField Both "StgEntCounter" "entry_count" |