summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorNicolas Frisby <nicolas.frisby@gmail.com>2013-03-06 21:46:14 +0000
committerNicolas Frisby <nicolas.frisby@gmail.com>2013-03-29 15:20:50 +0000
commit460abd75c4f99d813ed226d2ff6aa592d62fafd4 (patch)
tree9e602d6733d90c1b26fccb2509497454bf619766 /compiler/codeGen
parentc7d80c6524390551b64e9c1d651e1a03ed3c7617 (diff)
downloadhaskell-460abd75c4f99d813ed226d2ff6aa592d62fafd4.tar.gz
ticky enhancements
* the new StgCmmArgRep module breaks a dependency cycle; I also untabified it, but made no real changes * updated the documentation in the wiki and change the user guide to point there * moved the allocation enters for ticky and CCS to after the heap check * I left LDV where it was, which was before the heap check at least once, since I have no idea what it is * standardized all (active?) ticky alloc totals to bytes * in order to avoid double counting StgCmmLayout.adjustHpBackwards no longer bumps ALLOC_HEAP_ctr * I resurrected the SLOW_CALL counters * the new module StgCmmArgRep breaks cyclic dependency between Layout and Ticky (which the SLOW_CALL counters cause) * renamed them SLOW_CALL_fast_<pattern> and VERY_SLOW_CALL * added ALLOC_RTS_ctr and _tot ticky counters * eg allocation by Storage.c:allocate or a BUILD_PAP in stg_ap_*_info * resurrected ticky counters for ALLOC_THK, ALLOC_PAP, and ALLOC_PRIM * added -ticky and -DTICKY_TICKY in ways.mk for debug ways * added a ticky counter for total LNE entries * new flags for ticky: -ticky-allocd -ticky-dyn-thunk -ticky-LNE * all off by default * -ticky-allocd: tracks allocation *of* closure in addition to allocation *by* that closure * -ticky-dyn-thunk tracks dynamic thunks as if they were functions * -ticky-LNE tracks LNEs as if they were functions * updated the ticky report format, including making the argument categories (more?) accurate again * the printed name for things in the report include the unique of their ticky parent as well as if they are not top-level
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/StgCmmArgRep.hs135
-rw-r--r--compiler/codeGen/StgCmmBind.hs43
-rw-r--r--compiler/codeGen/StgCmmClosure.hs5
-rw-r--r--compiler/codeGen/StgCmmEnv.hs6
-rw-r--r--compiler/codeGen/StgCmmExpr.hs10
-rw-r--r--compiler/codeGen/StgCmmHeap.hs5
-rw-r--r--compiler/codeGen/StgCmmLayout.hs84
-rw-r--r--compiler/codeGen/StgCmmTicky.hs669
-rw-r--r--compiler/codeGen/StgCmmUtils.hs5
9 files changed, 614 insertions, 348 deletions
diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs
new file mode 100644
index 0000000000..bd228d4617
--- /dev/null
+++ b/compiler/codeGen/StgCmmArgRep.hs
@@ -0,0 +1,135 @@
+-----------------------------------------------------------------------------
+--
+-- Argument representations used in StgCmmLayout.
+--
+-- (c) The University of Glasgow 2013
+--
+-----------------------------------------------------------------------------
+
+module StgCmmArgRep (
+ ArgRep(..), toArgRep, argRepSizeW,
+
+ argRepString, isNonV, idArgRep,
+
+ slowCallPattern,
+
+ ) where
+
+import StgCmmClosure ( idPrimRep )
+
+import SMRep ( WordOff )
+import Id ( Id )
+import TyCon ( PrimRep(..), primElemRepSizeB )
+import BasicTypes ( RepArity )
+import Constants ( wORD64_SIZE )
+import DynFlags
+
+import Outputable
+import FastString
+
+-- I extricated this code as this new module in order to avoid a
+-- cyclic dependency between StgCmmLayout and StgCmmTicky.
+--
+-- NSF 18 Feb 2013
+
+-------------------------------------------------------------------------
+-- Classifying arguments: ArgRep
+-------------------------------------------------------------------------
+
+-- ArgRep is re-exported by StgCmmLayout, but only for use in the
+-- byte-code generator which also needs to know about the
+-- classification of arguments.
+
+data ArgRep = P -- GC Ptr
+ | N -- Word-sized non-ptr
+ | L -- 64-bit non-ptr (long)
+ | V -- Void
+ | F -- Float
+ | D -- Double
+ | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
+instance Outputable ArgRep where ppr = text . argRepString
+
+argRepString :: ArgRep -> String
+argRepString P = "P"
+argRepString N = "N"
+argRepString L = "L"
+argRepString V = "V"
+argRepString F = "F"
+argRepString D = "D"
+argRepString V16 = "V16"
+
+toArgRep :: PrimRep -> ArgRep
+toArgRep VoidRep = V
+toArgRep PtrRep = P
+toArgRep IntRep = N
+toArgRep WordRep = N
+toArgRep AddrRep = N
+toArgRep Int64Rep = L
+toArgRep Word64Rep = L
+toArgRep FloatRep = F
+toArgRep DoubleRep = D
+toArgRep (VecRep len elem)
+ | len*primElemRepSizeB elem == 16 = V16
+ | otherwise = error "toArgRep: bad vector primrep"
+
+isNonV :: ArgRep -> Bool
+isNonV V = False
+isNonV _ = True
+
+argRepSizeW :: DynFlags -> ArgRep -> WordOff -- Size in words
+argRepSizeW _ N = 1
+argRepSizeW _ P = 1
+argRepSizeW _ F = 1
+argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags
+argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
+argRepSizeW _ V = 0
+argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags
+
+idArgRep :: Id -> ArgRep
+idArgRep = toArgRep . idPrimRep
+
+-- This list of argument patterns should be kept in sync with at least
+-- the following:
+--
+-- * StgCmmLayout.stdPattern maybe to some degree?
+--
+-- * the RTS_RET(stg_ap_*) and RTS_FUN_DECL(stg_ap_*_fast)
+-- declarations in includes/stg/MiscClosures.h
+--
+-- * the SLOW_CALL_*_ctr declarations in includes/stg/Ticky.h,
+--
+-- * the TICK_SLOW_CALL_*() #defines in includes/Cmm.h,
+--
+-- * the PR_CTR(SLOW_CALL_*_ctr) calls in rts/Ticky.c,
+--
+-- * and the SymI_HasProto(stg_ap_*_{ret,info,fast}) calls and
+-- SymI_HasProto(SLOW_CALL_*_ctr) calls in rts/Linker.c
+--
+-- There may be more places that I haven't found; I merely igrep'd for
+-- pppppp and excluded things that seemed ghci-specific.
+--
+-- Also, it seems at the moment that ticky counters with void
+-- arguments will never be bumped, but I'm still declaring those
+-- counters, defensively.
+--
+-- NSF 6 Mar 2013
+
+-- These cases were found to cover about 99% of all slow calls:
+slowCallPattern :: [ArgRep] -> (FastString, RepArity)
+-- Returns the generic apply function and arity
+slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6)
+slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5)
+slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4)
+slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4)
+slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3)
+slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3)
+slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2)
+slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2)
+slowCallPattern (P: _) = (fsLit "stg_ap_p", 1)
+slowCallPattern (V: _) = (fsLit "stg_ap_v", 1)
+slowCallPattern (N: _) = (fsLit "stg_ap_n", 1)
+slowCallPattern (F: _) = (fsLit "stg_ap_f", 1)
+slowCallPattern (D: _) = (fsLit "stg_ap_d", 1)
+slowCallPattern (L: _) = (fsLit "stg_ap_l", 1)
+slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1)
+slowCallPattern [] = (fsLit "stg_ap_0", 0)
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 136bb52b07..1e5d6b9f4f 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -296,7 +296,7 @@ mkRhsClosure dflags bndr _cc _bi
(StgApp fun_id args)
| args `lengthIs` (arity-1)
- && all (isGcPtrRep . idPrimRep . stripNV) fvs
+ && all (isGcPtrRep . idPrimRep . unsafe_stripNV) fvs
&& isUpdatable upd_flag
&& arity <= mAX_SPEC_AP_SIZE dflags
&& not (gopt Opt_SccProfilingOn dflags)
@@ -344,7 +344,7 @@ mkRhsClosure _ bndr cc _ fvs upd_flag args body
fv_details :: [(NonVoid Id, VirtualHpOffset)]
(tot_wds, ptr_wds, fv_details)
= mkVirtHeapOffsets dflags (isLFThunk lf_info)
- (addIdReps (map stripNV reduced_fvs))
+ (addIdReps (map unsafe_stripNV reduced_fvs))
closure_info = mkClosureInfo dflags False -- Not static
bndr lf_info tot_wds ptr_wds
descr
@@ -369,11 +369,6 @@ mkRhsClosure _ bndr cc _ fvs upd_flag args body
-- RETURN
; return (mkRhsInit dflags reg lf_info hp_plus_n) }
-
--- Use with care; if used inappropriately, it could break invariants.
-stripNV :: NonVoid a -> a
-stripNV (NonVoid a) = a
-
-------------------------
cgRhsStdThunk
:: Id
@@ -418,10 +413,10 @@ mkClosureLFInfo :: Id -- The binder
-> [Id] -- Args
-> FCode LambdaFormInfo
mkClosureLFInfo bndr top fvs upd_flag args
- | null args = return (mkLFThunk (idType bndr) top (map stripNV fvs) upd_flag)
+ | null args = return (mkLFThunk (idType bndr) top (map unsafe_stripNV fvs) upd_flag)
| otherwise =
do { arg_descr <- mkArgDescr (idName bndr) args
- ; return (mkLFReEntrant top (map stripNV fvs) args arg_descr) }
+ ; return (mkLFReEntrant top (map unsafe_stripNV fvs) args arg_descr) }
------------------------------------------------------------------------
@@ -453,7 +448,8 @@ closureCodeBody :: Bool -- whether this is a top-level binding
closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details
| arity == 0 -- No args i.e. thunk
- = emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
+ = withNewTickyCounterThunk cl_info $
+ emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
\(_, node, _) -> thunkCode cl_info fv_details cc node arity body
where
lf_info = closureLFInfo cl_info
@@ -461,12 +457,7 @@ closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details
closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
= -- Note: args may be [], if all args are Void
- do { -- Allocate the global ticky counter,
- -- and establish the ticky-counter
- -- label for this block
- let ticky_ctr_lbl = closureRednCountsLabel cl_info
- ; emitTickyCounter cl_info (map stripNV args)
- ; setTickyCtrLabel ticky_ctr_lbl $ do
+ withNewTickyCounterFun (closureName cl_info) args $ do {
; let
lf_info = closureLFInfo cl_info
@@ -479,20 +470,20 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
{ mkSlowEntryCode bndr cl_info arg_regs
; dflags <- getDynFlags
- ; let lf_info = closureLFInfo cl_info
- node_points = nodeMustPointToIt dflags lf_info
+ ; let node_points = nodeMustPointToIt dflags lf_info
node' = if node_points then Just node else Nothing
- ; tickyEnterFun cl_info
- ; enterCostCentreFun cc
- (CmmMachOp (mo_wordSub dflags)
- [ CmmReg nodeReg
- , mkIntExpr dflags (funTag dflags cl_info) ])
; when node_points (ldvEnterClosure cl_info)
; granYield arg_regs node_points
-- Main payload
; entryHeapCheck cl_info node' arity arg_regs $ do
- { fv_bindings <- mapM bind_fv fv_details
+ { -- ticky after heap check to avoid double counting
+ tickyEnterFun cl_info
+ ; enterCostCentreFun cc
+ (CmmMachOp (mo_wordSub dflags)
+ [ CmmReg nodeReg
+ , mkIntExpr dflags (funTag dflags cl_info) ])
+ ; fv_bindings <- mapM bind_fv fv_details
-- Load free vars out of closure *after*
-- heap check, to reduce live vars over check
; when node_points $ load_fvs node lf_info fv_bindings
@@ -545,7 +536,6 @@ thunkCode cl_info fv_details _cc node arity body
= do { dflags <- getDynFlags
; let node_points = nodeMustPointToIt dflags (closureLFInfo cl_info)
node' = if node_points then Just node else Nothing
- ; tickyEnterThunk cl_info
; ldvEnterClosure cl_info -- NB: Node always points when profiling
; granThunk node_points
@@ -562,7 +552,8 @@ thunkCode cl_info fv_details _cc node arity body
-- that cc of enclosing scope will be recorded
-- in update frame CAF/DICT functions will be
-- subsumed by this enclosing cc
- do { enterCostCentreThunk (CmmReg nodeReg)
+ do { tickyEnterThunk cl_info
+ ; enterCostCentreThunk (CmmReg nodeReg)
; let lf_info = closureLFInfo cl_info
; fv_bindings <- mapM bind_fv fv_details
; load_fvs node lf_info fv_bindings
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
index 7f44f67ff7..a057484d39 100644
--- a/compiler/codeGen/StgCmmClosure.hs
+++ b/compiler/codeGen/StgCmmClosure.hs
@@ -49,7 +49,7 @@ module StgCmmClosure (
-- ** Labels
-- These just need the info table label
closureInfoLabel, staticClosureLabel,
- closureRednCountsLabel, closureSlowEntryLabel, closureLocalEntryLabel,
+ closureSlowEntryLabel, closureLocalEntryLabel,
-- ** Predicates
-- These are really just functions on LambdaFormInfo
@@ -772,9 +772,6 @@ isToplevClosure (ClosureInfo { closureLFInfo = lf_info })
staticClosureLabel :: ClosureInfo -> CLabel
staticClosureLabel = toClosureLbl . closureInfoLabel
-closureRednCountsLabel :: ClosureInfo -> CLabel
-closureRednCountsLabel = toRednCountsLbl . closureInfoLabel
-
closureSlowEntryLabel :: ClosureInfo -> CLabel
closureSlowEntryLabel = toSlowEntryLbl . closureInfoLabel
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
index 42e4da0e1f..1fdb364b56 100644
--- a/compiler/codeGen/StgCmmEnv.hs
+++ b/compiler/codeGen/StgCmmEnv.hs
@@ -13,7 +13,7 @@ module StgCmmEnv (
litIdInfo, lneIdInfo, rhsIdInfo, mkRhsInit,
idInfoToAmode,
- NonVoid(..), isVoidId, nonVoidIds,
+ NonVoid(..), unsafe_stripNV, isVoidId, nonVoidIds,
addBindC, addBindsC,
@@ -55,6 +55,10 @@ import Outputable
newtype NonVoid a = NonVoid a
deriving (Eq, Show)
+-- Use with care; if used inappropriately, it could break invariants.
+unsafe_stripNV :: NonVoid a -> a
+unsafe_stripNV (NonVoid a) = a
+
instance (Outputable a) => Outputable (NonVoid a) where
ppr (NonVoid a) = ppr a
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index f4186f7b9b..78080218f8 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -161,10 +161,11 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
return ( lneIdInfo dflags bndr args
, code )
where
- code = forkProc $ do
- { restoreCurrentCostCentre cc_slot
- ; arg_regs <- bindArgsToRegs args
- ; void $ noEscapeHeapCheck arg_regs (cgExpr body) }
+ code = forkProc $ do {
+ ; withNewTickyCounterLNE (idName bndr) args $ do
+ ; restoreCurrentCostCentre cc_slot
+ ; arg_regs <- bindArgsToRegs args
+ ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
------------------------------------------------------------------------
@@ -416,6 +417,7 @@ cgCase scrut bndr alt_type alts
| isSingleton alts = False
| up_hp_usg > 0 = False
| otherwise = True
+ -- cf Note [Compiling case expressions]
gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
; mb_cc <- maybeSaveCostCentre simple_scrut
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index b1cddbe5a4..50fcfdc812 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -100,7 +100,7 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
-- SAY WHAT WE ARE ABOUT TO DO
; let rep = cit_rep info_tbl
- ; tickyDynAlloc rep lf_info
+ ; tickyDynAlloc (toRednCountsLbl $ cit_lbl info_tbl) rep lf_info
; profDynAlloc rep use_cc
-- FIND THE OFFSET OF THE INFO-PTR WORD
@@ -215,7 +215,6 @@ mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info
= staticGranHdr
++ staticParHdr
++ staticProfHdr dflags ccs
- ++ staticTickyHdr
-- JD: Simon had ellided this padding, but without it the C back end asserts
-- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
@@ -527,7 +526,7 @@ heapCheck checkStack checkYield do_gc code
stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
| otherwise = Nothing
; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
- ; tickyAllocHeap hpHw
+ ; tickyAllocHeap True hpHw
; doGranAllocate hpHw
; setRealHp hpHw
; code }
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index a3bbefeb44..06a47c151b 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -24,7 +24,7 @@ module StgCmmLayout (
mkVirtHeapOffsets, mkVirtConstrOffsets, getHpRelOffset, hpRel,
- ArgRep(..), toArgRep, argRepSizeW
+ ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
) where
@@ -32,6 +32,7 @@ module StgCmmLayout (
import StgCmmClosure
import StgCmmEnv
+import StgCmmArgRep -- notably: ( slowCallPattern )
import StgCmmTicky
import StgCmmMonad
import StgCmmUtils
@@ -46,12 +47,11 @@ import CLabel
import StgSyn
import Id
import Name
-import TyCon ( PrimRep(..), primElemRepSizeB )
+import TyCon ( PrimRep(..) )
import BasicTypes ( RepArity )
import DynFlags
import Module
-import Constants
import Util
import Data.List
import Outputable
@@ -148,7 +148,7 @@ adjustHpBackwards
then mkNop
else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
- ; tickyAllocHeap adjust_words -- ...ditto
+ ; tickyAllocHeap False adjust_words -- ...ditto
; setRealHp vHp
}
@@ -298,82 +298,6 @@ slowArgs dflags args -- careful: reps contains voids (V), but args does not
save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
-
-
--- These cases were found to cover about 99% of all slow calls:
-slowCallPattern :: [ArgRep] -> (FastString, RepArity)
--- Returns the generic apply function and arity
-slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6)
-slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5)
-slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4)
-slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4)
-slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3)
-slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3)
-slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2)
-slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2)
-slowCallPattern (P: _) = (fsLit "stg_ap_p", 1)
-slowCallPattern (V: _) = (fsLit "stg_ap_v", 1)
-slowCallPattern (N: _) = (fsLit "stg_ap_n", 1)
-slowCallPattern (F: _) = (fsLit "stg_ap_f", 1)
-slowCallPattern (D: _) = (fsLit "stg_ap_d", 1)
-slowCallPattern (L: _) = (fsLit "stg_ap_l", 1)
-slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1)
-slowCallPattern [] = (fsLit "stg_ap_0", 0)
-
-
--------------------------------------------------------------------------
--- Classifying arguments: ArgRep
--------------------------------------------------------------------------
-
--- ArgRep is exported, but only for use in the byte-code generator which
--- also needs to know about the classification of arguments.
-
-data ArgRep = P -- GC Ptr
- | N -- Word-sized non-ptr
- | L -- 64-bit non-ptr (long)
- | V -- Void
- | F -- Float
- | D -- Double
- | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
-instance Outputable ArgRep where
- ppr P = text "P"
- ppr N = text "N"
- ppr L = text "L"
- ppr V = text "V"
- ppr F = text "F"
- ppr D = text "D"
- ppr V16 = text "V16"
-
-toArgRep :: PrimRep -> ArgRep
-toArgRep VoidRep = V
-toArgRep PtrRep = P
-toArgRep IntRep = N
-toArgRep WordRep = N
-toArgRep AddrRep = N
-toArgRep Int64Rep = L
-toArgRep Word64Rep = L
-toArgRep FloatRep = F
-toArgRep DoubleRep = D
-toArgRep (VecRep len elem)
- | len*primElemRepSizeB elem == 16 = V16
- | otherwise = error "toArgRep: bad vector primrep"
-
-isNonV :: ArgRep -> Bool
-isNonV V = False
-isNonV _ = True
-
-argRepSizeW :: DynFlags -> ArgRep -> WordOff -- Size in words
-argRepSizeW _ N = 1
-argRepSizeW _ P = 1
-argRepSizeW _ F = 1
-argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags
-argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
-argRepSizeW _ V = 0
-argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags
-
-idArgRep :: Id -> ArgRep
-idArgRep = toArgRep . idPrimRep
-
-------------------------------------------------------------------------
---- Laying out objects on the heap and stack
-------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs
index 2bca544ac8..09938a6704 100644
--- a/compiler/codeGen/StgCmmTicky.hs
+++ b/compiler/codeGen/StgCmmTicky.hs
@@ -6,47 +6,100 @@
--
-----------------------------------------------------------------------------
-{-# OPTIONS -fno-warn-tabs #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and
--- detab the module (please do the detabbing in a separate patch). See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
--- for details
+{- OVERVIEW: ticky ticky profiling
+
+Please see
+http://hackage.haskell.org/trac/ghc/wiki/Debugging/TickyTicky and also
+edit it and the rest of this comment to keep them up-to-date if you
+change ticky-ticky. Thanks!
+
+ *** All allocation ticky numbers are in bytes. ***
+
+Some of the relevant source files:
+
+ ***not necessarily an exhaustive list***
+
+ * some codeGen/ modules import this one
+
+ * this module imports cmm/CLabel.hs to manage labels
+
+ * cmm/CmmParse.y expands some macros using generators defined in
+ this module
+
+ * includes/stg/Ticky.h declares all of the global counters
+
+ * includes/rts/Ticky.h declares the C data type for an
+ STG-declaration's counters
+
+ * some macros defined in includes/Cmm.h (and used within the RTS's
+ CMM code) update the global ticky counters
+
+ * at the end of execution rts/Ticky.c generates the final report
+ +RTS -r<report-file> -RTS
+
+The rts/Ticky.c function that generates the report includes an
+STG-declaration's ticky counters if
+
+ * that declaration was entered, or
+
+ * it was allocated (if -ticky-allocd)
+
+On either of those events, the counter is "registered" by adding it to
+a linked list; cf the CMM generated by registerTickyCtr.
+
+Ticky-ticky profiling has evolved over many years. Many of the
+counters from its most sophisticated days are no longer
+active/accurate. As the RTS has changed, sometimes the ticky code for
+relevant counters was not accordingly updated. Unfortunately, neither
+were the comments.
+
+As of March 2013, there still exist deprecated code and comments in
+the code generator as well as the RTS because:
+
+ * I don't know what is out-of-date versus merely commented out for
+ momentary convenience, and
+
+ * someone else might know how to repair it!
+
+-}
module StgCmmTicky (
- emitTickyCounter,
+ withNewTickyCounterFun,
+ withNewTickyCounterThunk,
+ withNewTickyCounterLNE,
- tickyDynAlloc,
- tickyAllocHeap,
- tickyAllocPrim,
- tickyAllocThunk,
- tickyAllocPAP,
-
- tickySlowCall, tickyDirectCall,
+ tickyDynAlloc,
+ tickyAllocHeap,
+ tickyAllocPrim,
+ tickyAllocThunk,
+ tickyAllocPAP,
- tickyPushUpdateFrame,
- tickyUpdateFrameOmitted,
+ tickyUnknownCall, tickyDirectCall,
- tickyEnterDynCon,
- tickyEnterStaticCon,
- tickyEnterViaNode,
+ tickyPushUpdateFrame,
+ tickyUpdateFrameOmitted,
- tickyEnterFun,
- tickyEnterThunk,
+ tickyEnterDynCon,
+ tickyEnterStaticCon,
+ tickyEnterViaNode,
- tickyUpdateBhCaf,
- tickyBlackHole,
- tickyUnboxedTupleReturn, tickyVectoredReturn,
- tickyReturnOldCon, tickyReturnNewCon,
+ tickyEnterFun,
+ tickyEnterThunk,
+ tickyEnterLNE,
- tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
- tickyUnknownCall, tickySlowCallPat,
+ tickyUpdateBhCaf,
+ tickyBlackHole,
+ tickyUnboxedTupleReturn, tickyVectoredReturn,
+ tickyReturnOldCon, tickyReturnNewCon,
- staticTickyHdr,
+ tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
+ tickySlowCall, tickySlowCallPat,
) where
#include "HsVersions.h"
+import StgCmmArgRep ( slowCallPattern , toArgRep , argRepString )
+import StgCmmEnv ( NonVoid, unsafe_stripNV )
import StgCmmClosure
import StgCmmUtils
import StgCmmMonad
@@ -74,52 +127,87 @@ import Type
import TyCon
import Data.Maybe
+import qualified Data.Char
+import Control.Monad ( when )
-----------------------------------------------------------------------------
--
--- Ticky-ticky profiling
+-- Ticky-ticky profiling
--
-----------------------------------------------------------------------------
-staticTickyHdr :: [CmmLit]
--- krc: not using this right now --
--- in the new version of ticky-ticky, we
--- don't change the closure layout.
--- leave it defined, though, to avoid breaking
--- other things.
-staticTickyHdr = []
-
-emitTickyCounter :: ClosureInfo -> [Id] -> FCode ()
-emitTickyCounter cl_info args
- = ifTicky $
- do { dflags <- getDynFlags
+data TickyClosureType = TickyFun | TickyThunk | TickyLNE
+
+withNewTickyCounterFun, withNewTickyCounterLNE :: Name -> [NonVoid Id] -> FCode () -> FCode ()
+withNewTickyCounterFun = withNewTickyCounter TickyFun
+
+withNewTickyCounterLNE nm args code = do
+ b <- tickyLNEIsOn
+ if not b then code else withNewTickyCounter TickyLNE nm args code
+
+withNewTickyCounterThunk :: ClosureInfo -> FCode () -> FCode ()
+withNewTickyCounterThunk cl_info code
+ | isStaticClosure cl_info = code -- static thunks are uninteresting
+ | otherwise = do
+ b <- tickyDynThunkIsOn
+ if not b then code else withNewTickyCounter TickyThunk (closureName cl_info) [] code
+
+-- args does not include the void arguments
+withNewTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode () -> FCode ()
+withNewTickyCounter cloType name args m = do
+ lbl <- emitTickyCounter cloType name args
+ setTickyCtrLabel lbl m
+
+emitTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode CLabel
+emitTickyCounter cloType name args
+ = let ctr_lbl = mkRednCountsLabel name in
+ (>> return ctr_lbl) $
+ ifTicky $ do
+ { dflags <- getDynFlags
+ ; parent <- getTickyCtrLabel
; mod_name <- getModuleName
- ; let ticky_ctr_label = closureRednCountsLabel cl_info
- arg_descr = map (showTypeCategory . idType) args
- fun_descr mod_name = ppr_for_ticky_name dflags mod_name (closureName cl_info)
- ; fun_descr_lit <- newStringCLit (fun_descr mod_name)
- ; arg_descr_lit <- newStringCLit arg_descr
- ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter
--- krc: note that all the fields are I32 now; some were I16 before,
--- but the code generator wasn't handling that properly and it led to chaos,
--- panic and disorder.
- [ mkIntCLit dflags 0,
- mkIntCLit dflags (length args), -- Arity
- mkIntCLit dflags 0, -- XXX: we no longer know this! Words passed on stack
- fun_descr_lit,
- arg_descr_lit,
- zeroCLit dflags, -- Entry count
- zeroCLit dflags, -- Allocs
- zeroCLit dflags -- Link
- ] }
-
--- When printing the name of a thing in a ticky file, we want to
--- give the module name even for *local* things. We print
--- just "x (M)" rather that "M.x" to distinguish them from the global kind.
-ppr_for_ticky_name :: DynFlags -> Module -> Name -> String
-ppr_for_ticky_name dflags mod_name name
- | isInternalName name = showSDocDebug dflags (ppr name <+> (parens (ppr mod_name)))
- | otherwise = showSDocDebug dflags (ppr name)
+
+ -- When printing the name of a thing in a ticky file, we
+ -- want to give the module name even for *local* things. We
+ -- print just "x (M)" rather that "M.x" to distinguish them
+ -- from the global kind.
+ ; let ppr_for_ticky_name :: SDoc
+ ppr_for_ticky_name =
+ let n = ppr name
+ p = case hasHaskellName parent of
+ -- NB the default "top" ticky ctr does not
+ -- have a Haskell name
+ Just pname -> text "in" <+> ppr (nameUnique pname)
+ _ -> empty
+ in (<+> p) $ if isInternalName name
+ then let s = n <+> (parens (ppr mod_name))
+ in case cloType of
+ TickyFun -> s
+ TickyThunk -> s <+> parens (text "thk")
+ TickyLNE -> s <+> parens (text "LNE")
+ else case cloType of
+ TickyFun -> n
+ TickyThunk -> n <+> parens (text "thk")
+ TickyLNE -> panic "emitTickyCounter: how is this an external LNE?"
+
+ ; fun_descr_lit <- newStringCLit $ showSDocDebug dflags ppr_for_ticky_name
+ ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . unsafe_stripNV) args
+ ; emitDataLits ctr_lbl
+ -- Must match layout of includes/rts/Ticky.h's StgEntCounter
+ --
+ -- krc: note that all the fields are I32 now; some were I16
+ -- before, but the code generator wasn't handling that
+ -- properly and it led to chaos, panic and disorder.
+ [ mkIntCLit dflags 0, -- registered?
+ mkIntCLit dflags (length args), -- Arity
+ mkIntCLit dflags 0, -- Heap allocated for this thing
+ fun_descr_lit,
+ arg_descr_lit,
+ zeroCLit dflags, -- Entries into this thing
+ zeroCLit dflags, -- Heap allocated by this thing
+ zeroCLit dflags -- Link to next StgEntCounter
+ ]
+ }
-- -----------------------------------------------------------------------------
-- Ticky stack frames
@@ -131,10 +219,9 @@ tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (fsLit "UPDF_OMITTED_ctr")
-- -----------------------------------------------------------------------------
-- Ticky entries
-tickyEnterDynCon, tickyEnterDynThunk, tickyEnterStaticCon,
+tickyEnterDynCon, tickyEnterStaticCon,
tickyEnterStaticThunk, tickyEnterViaNode :: FCode ()
tickyEnterDynCon = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_CON_ctr")
-tickyEnterDynThunk = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_THK_ctr")
tickyEnterStaticCon = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_CON_ctr")
tickyEnterStaticThunk = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_THK_ctr")
tickyEnterViaNode = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr")
@@ -142,41 +229,65 @@ tickyEnterViaNode = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr")
tickyEnterThunk :: ClosureInfo -> FCode ()
tickyEnterThunk cl_info
| isStaticClosure cl_info = tickyEnterStaticThunk
- | otherwise = tickyEnterDynThunk
+ | otherwise = ifTicky $ do
+ bumpTickyCounter (fsLit "ENT_DYN_THK_ctr")
+ ifTickyDynThunk $ do
+ ticky_ctr_lbl <- getTickyCtrLabel
+ registerTickyCtrAtEntryDyn ticky_ctr_lbl
+ bumpTickyEntryCount ticky_ctr_lbl
tickyBlackHole :: Bool{-updatable-} -> FCode ()
tickyBlackHole updatable
= ifTicky (bumpTickyCounter ctr)
where
ctr | updatable = (fsLit "UPD_BH_SINGLE_ENTRY_ctr")
- | otherwise = (fsLit "UPD_BH_UPDATABLE_ctr")
+ | otherwise = (fsLit "UPD_BH_UPDATABLE_ctr")
tickyUpdateBhCaf :: ClosureInfo -> FCode ()
tickyUpdateBhCaf cl_info
= ifTicky (bumpTickyCounter ctr)
where
ctr | closureUpdReqd cl_info = (fsLit "UPD_CAF_BH_SINGLE_ENTRY_ctr")
- | otherwise = (fsLit "UPD_CAF_BH_UPDATABLE_ctr")
+ | otherwise = (fsLit "UPD_CAF_BH_UPDATABLE_ctr")
tickyEnterFun :: ClosureInfo -> FCode ()
-tickyEnterFun cl_info
- = ifTicky $
- do { dflags <- getDynFlags
- ; bumpTickyCounter ctr
- ; fun_ctr_lbl <- getTickyCtrLabel
- ; registerTickyCtr fun_ctr_lbl
- ; bumpTickyCounter' (cmmLabelOffB fun_ctr_lbl (oFFSET_StgEntCounter_entry_count dflags))
- }
- where
- ctr | isStaticClosure cl_info = (fsLit "ENT_STATIC_FUN_DIRECT_ctr")
- | otherwise = (fsLit "ENT_DYN_FUN_DIRECT_ctr")
+tickyEnterFun cl_info = ifTicky $ do
+ ctr_lbl <- getTickyCtrLabel
+
+ if isStaticClosure cl_info
+ then do bumpTickyCounter (fsLit "ENT_STATIC_FUN_DIRECT_ctr")
+ registerTickyCtr ctr_lbl
+ else do bumpTickyCounter (fsLit "ENT_DYN_FUN_DIRECT_ctr")
+ registerTickyCtrAtEntryDyn ctr_lbl
+
+ bumpTickyEntryCount ctr_lbl
+
+tickyEnterLNE :: FCode ()
+tickyEnterLNE = ifTicky $ do
+ bumpTickyCounter (fsLit "ENT_LNE_ctr")
+ ifTickyLNE $ do
+ ctr_lbl <- getTickyCtrLabel
+ registerTickyCtr ctr_lbl
+ bumpTickyEntryCount ctr_lbl
+
+-- needn't register a counter upon entry if
+--
+-- 1) it's for a dynamic closure, and
+--
+-- 2) -ticky-allocd is on
+--
+-- since the counter was registered already upon being alloc'd
+registerTickyCtrAtEntryDyn :: CLabel -> FCode ()
+registerTickyCtrAtEntryDyn ctr_lbl = do
+ already_registered <- tickyAllocdIsOn
+ when (not already_registered) $ registerTickyCtr ctr_lbl
registerTickyCtr :: CLabel -> FCode ()
-- Register a ticky counter
-- if ( ! f_ct.registeredp ) {
--- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */
--- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */
--- f_ct.registeredp = 1 }
+-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */
+-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */
+-- f_ct.registeredp = 1 }
registerTickyCtr ctr_lbl = do
dflags <- getDynFlags
let
@@ -196,22 +307,22 @@ registerTickyCtr ctr_lbl = do
emit =<< mkCmmIfThen test (catAGraphs register_stmts)
tickyReturnOldCon, tickyReturnNewCon :: RepArity -> FCode ()
-tickyReturnOldCon arity
+tickyReturnOldCon arity
= ifTicky $ do { bumpTickyCounter (fsLit "RET_OLD_ctr")
- ; bumpHistogram (fsLit "RET_OLD_hst") arity }
-tickyReturnNewCon arity
+ ; bumpHistogram (fsLit "RET_OLD_hst") arity }
+tickyReturnNewCon arity
= ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr")
- ; bumpHistogram (fsLit "RET_NEW_hst") arity }
+ ; bumpHistogram (fsLit "RET_NEW_hst") arity }
tickyUnboxedTupleReturn :: RepArity -> FCode ()
tickyUnboxedTupleReturn arity
= ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr")
- ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity }
+ ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity }
tickyVectoredReturn :: Int -> FCode ()
-tickyVectoredReturn family_size
+tickyVectoredReturn family_size
= ifTicky $ do { bumpTickyCounter (fsLit "VEC_RETURN_ctr")
- ; bumpHistogram (fsLit "RET_VEC_RETURN_hst") family_size }
+ ; bumpHistogram (fsLit "RET_VEC_RETURN_hst") family_size }
-- -----------------------------------------------------------------------------
-- Ticky calls
@@ -221,7 +332,7 @@ tickyDirectCall :: RepArity -> [StgArg] -> FCode ()
tickyDirectCall arity args
| arity == length args = tickyKnownCallExact
| otherwise = do tickyKnownCallExtraArgs
- tickySlowCallPat (map argPrimRep (drop arity args))
+ tickySlowCallPat (map argPrimRep (drop arity args))
tickyKnownCallTooFewArgs :: FCode ()
tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_TOO_FEW_ARGS_ctr")
@@ -238,130 +349,224 @@ tickyUnknownCall = ifTicky $ bumpTickyCounter (fsLit "UNKNOWN_CALL_ctr")
-- Tick for the call pattern at slow call site (i.e. in addition to
-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.)
tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode ()
-tickySlowCall _ []
- = return ()
-tickySlowCall lf_info args
- = do { if (isKnownFun lf_info)
- then tickyKnownCallTooFewArgs
- else tickyUnknownCall
- ; tickySlowCallPat (map argPrimRep args) }
+tickySlowCall _ [] = return ()
+tickySlowCall lf_info args = do
+ -- see Note [Ticky for slow calls]
+ if isKnownFun lf_info
+ then tickyKnownCallTooFewArgs
+ else tickyUnknownCall
+ tickySlowCallPat (map argPrimRep args)
tickySlowCallPat :: [PrimRep] -> FCode ()
-tickySlowCallPat _args = return ()
-{- LATER: (introduces recursive module dependency now).
- case callPattern args of
- (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat)
- (str, False) -> bumpTickyCounter (sLit "TICK_SLOW_CALL_OTHER")
-
--- Don't use CgRep; put this function in StgCmmLayout
-callPattern :: [CgRep] -> (String,Bool)
-callPattern reps
- | match == length reps = (chars, True)
- | otherwise = (chars, False)
- where (_,match) = findMatch reps
- chars = map argChar reps
-
-argChar VoidArg = 'v'
-argChar PtrArg = 'p'
-argChar NonPtrArg = 'n'
-argChar LongArg = 'l'
-argChar FloatArg = 'f'
-argChar DoubleArg = 'd'
--}
+tickySlowCallPat args = ifTicky $
+ let argReps = map toArgRep args
+ (_, n_matched) = slowCallPattern argReps
+ in if n_matched > 0 && n_matched == length args
+ then bumpTickyLbl $ mkRtsSlowFastTickyCtrLabel $ concatMap (map Data.Char.toLower . argRepString) argReps
+ else bumpTickyCounter $ fsLit "VERY_SLOW_CALL_ctr"
--- -----------------------------------------------------------------------------
--- Ticky allocation
-
-tickyDynAlloc :: SMRep -> LambdaFormInfo -> FCode ()
--- Called when doing a dynamic heap allocation
--- LambdaFormInfo only needed to distinguish between updatable/non-updatable thunks
-tickyDynAlloc rep lf
- = ifTicky $
- case () of
- _ | isConRep rep -> tick_alloc_con
- | isThunkRep rep -> tick_alloc_thk
- | isFunRep rep -> tick_alloc_fun
- | otherwise -> return ()
- where
- -- will be needed when we fill in stubs
--- _cl_size = heapClosureSize rep
--- _slop_size = slopSize cl_info
+{-
- tick_alloc_thk
- | lfUpdatable lf = tick_alloc_up_thk
- | otherwise = tick_alloc_se_thk
+Note [Ticky for slow calls]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Terminology is unfortunately a bit mixed up for these calls. codeGen
+uses "slow call" to refer to unknown calls and under-saturated known
+calls.
- -- krc: changed from panic to return ()
- -- just to get something working
- tick_alloc_con = return ()
- tick_alloc_fun = return ()
- tick_alloc_up_thk = return ()
- tick_alloc_se_thk = return ()
+Nowadays, though (ie as of the eval/apply paper), the significantly
+slower calls are actually just a subset of these: the ones with no
+built-in argument pattern (cf StgCmmArgRep.slowCallPattern)
+So for ticky profiling, we split slow calls into
+"SLOW_CALL_fast_<pattern>_ctr" (those matching a built-in pattern) and
+VERY_SLOW_CALL_ctr (those without a built-in pattern; these are very
+bad for both space and time).
-tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-tickyAllocPrim _hdr _goods _slop = ifTicky $ pprTrace "ToDo: tickyAllocPrim" empty (return ())
-
-tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode ()
-tickyAllocThunk _goods _slop = ifTicky $ pprTrace "ToDo: tickyAllocThunk" empty (return ())
+-}
-tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode ()
-tickyAllocPAP _goods _slop = ifTicky $ pprTrace "ToDo: tickyAllocPAP" empty (return ())
+-- -----------------------------------------------------------------------------
+-- Ticky allocation
-tickyAllocHeap :: VirtualHpOffset -> FCode ()
+tickyDynAlloc :: Maybe CLabel -> SMRep -> LambdaFormInfo -> FCode ()
+-- Called when doing a dynamic heap allocation; the LambdaFormInfo
+-- used to distinguish between closure types
+--
+-- TODO what else to count while we're here?
+tickyDynAlloc mb_ctr_lbl rep lf = ifTicky $ getDynFlags >>= \dflags ->
+ let bytes = wORD_SIZE dflags * heapClosureSize dflags rep
+
+ countGlobal tot ctr = do
+ bumpTickyCounterBy tot bytes
+ bumpTickyCounter ctr
+ countSpecific = ifTickyAllocd $ case mb_ctr_lbl of
+ Nothing -> return ()
+ Just ctr_lbl -> do
+ registerTickyCtr ctr_lbl
+ bumpTickyAllocd ctr_lbl bytes
+
+ -- TODO are we still tracking "good stuff" (_gds) versus
+ -- administrative (_adm) versus slop (_slp)? I'm going with all _gds
+ -- for now, since I don't currently know neither if we do nor how to
+ -- distinguish. NSF Mar 2013
+
+ in case () of
+ _ | isConRep rep ->
+ countGlobal (fsLit "ALLOC_CON_gds") (fsLit "ALLOC_CON_ctr")
+ | isThunkRep rep ->
+ ifTickyDynThunk countSpecific >>
+ if lfUpdatable lf
+ then countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_UP_THK_ctr")
+ else countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_SE_THK_ctr")
+ | isFunRep rep ->
+ countSpecific >>
+ countGlobal (fsLit "ALLOC_FUN_gds") (fsLit "ALLOC_FUN_ctr")
+ | otherwise -> panic "How is this heap object not a con, thunk, or fun?"
+
+
+
+tickyAllocHeap ::
+ Bool -> -- is this a genuine allocation? As opposed to
+ -- StgCmmLayout.adjustHpBackwards
+ VirtualHpOffset -> FCode ()
-- Called when doing a heap check [TICK_ALLOC_HEAP]
-- Must be lazy in the amount of allocation!
-tickyAllocHeap hp
+tickyAllocHeap genuine hp
= ifTicky $
do { dflags <- getDynFlags
; ticky_ctr <- getTickyCtrLabel
- ; emit $ catAGraphs $
- if hp == 0 then [] -- Inside the emitMiddle to avoid control
- else [ -- dependency on the argument
- -- Bump the allcoation count in the StgEntCounter
- addToMem (rEP_StgEntCounter_allocs dflags)
- (CmmLit (cmmLabelOffB ticky_ctr
- (oFFSET_StgEntCounter_allocs dflags))) hp,
- -- Bump ALLOC_HEAP_ctr
- addToMemLbl (cLong dflags) (mkCmmDataLabel rtsPackageId (fsLit "ALLOC_HEAP_ctr")) 1,
- -- Bump ALLOC_HEAP_tot
- addToMemLbl (cLong dflags) (mkCmmDataLabel rtsPackageId (fsLit "ALLOC_HEAP_tot")) hp] }
+ ; emit $ catAGraphs $
+ -- only test hp from within the emit so that the monadic
+ -- computation itself is not strict in hp (cf knot in
+ -- StgCmmMonad.getHeapUsage)
+ if hp == 0 then []
+ else let !bytes = wORD_SIZE dflags * hp in [
+ -- Bump the allocation total in the closure's StgEntCounter
+ addToMem (rEP_StgEntCounter_allocs dflags)
+ (CmmLit (cmmLabelOffB ticky_ctr (oFFSET_StgEntCounter_allocs dflags)))
+ bytes,
+ -- Bump the global allocation total ALLOC_HEAP_tot
+ addToMemLbl (cLong dflags)
+ (mkCmmDataLabel rtsPackageId (fsLit "ALLOC_HEAP_tot"))
+ bytes,
+ -- Bump the global allocation counter ALLOC_HEAP_ctr
+ if not genuine then mkNop
+ else addToMemLbl (cLong dflags)
+ (mkCmmDataLabel rtsPackageId (fsLit "ALLOC_HEAP_ctr"))
+ 1
+ ]}
+
+
+--------------------------------------------------------------------------------
+-- these three are only called from CmmParse.y (ie ultimately from the RTS)
+
+-- the units are bytes
+
+tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+tickyAllocPrim _hdr _goods _slop = ifTicky $ do
+ bumpTickyCounter (fsLit "ALLOC_PRIM_ctr")
+ bumpTickyCounterByE (fsLit "ALLOC_PRIM_adm") _hdr
+ bumpTickyCounterByE (fsLit "ALLOC_PRIM_gds") _goods
+ bumpTickyCounterByE (fsLit "ALLOC_PRIM_slp") _slop
+
+tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode ()
+tickyAllocThunk _goods _slop = ifTicky $ do
+ -- TODO is it ever called with a Single-Entry thunk?
+ bumpTickyCounter (fsLit "ALLOC_UP_THK_ctr")
+ bumpTickyCounterByE (fsLit "ALLOC_THK_gds") _goods
+ bumpTickyCounterByE (fsLit "ALLOC_THK_slp") _slop
+
+tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode ()
+tickyAllocPAP _goods _slop = ifTicky $ do
+ bumpTickyCounter (fsLit "ALLOC_PAP_ctr")
+ bumpTickyCounterByE (fsLit "ALLOC_PAP_gds") _goods
+ bumpTickyCounterByE (fsLit "ALLOC_PAP_slp") _slop
-- -----------------------------------------------------------------------------
-- Ticky utils
ifTicky :: FCode () -> FCode ()
-ifTicky code = do dflags <- getDynFlags
- if gopt Opt_Ticky dflags then code
- else return ()
+ifTicky code =
+ getDynFlags >>= \dflags -> when (gopt Opt_Ticky dflags) code
+
+tickyAllocdIsOn :: FCode Bool
+tickyAllocdIsOn = gopt Opt_Ticky_Allocd `fmap` getDynFlags
+
+tickyLNEIsOn :: FCode Bool
+tickyLNEIsOn = gopt Opt_Ticky_LNE `fmap` getDynFlags
+
+tickyDynThunkIsOn :: FCode Bool
+tickyDynThunkIsOn = gopt Opt_Ticky_Dyn_Thunk `fmap` getDynFlags
+
+ifTickyAllocd :: FCode () -> FCode ()
+ifTickyAllocd code = tickyAllocdIsOn >>= \b -> when b code
+
+ifTickyLNE :: FCode () -> FCode ()
+ifTickyLNE code = tickyLNEIsOn >>= \b -> when b code
+
+ifTickyDynThunk :: FCode () -> FCode ()
+ifTickyDynThunk code = tickyDynThunkIsOn >>= \b -> when b code
--- All the ticky-ticky counters are declared "unsigned long" in C
bumpTickyCounter :: FastString -> FCode ()
-bumpTickyCounter lbl = bumpTickyCounter' (cmmLabelOffB (mkCmmDataLabel rtsPackageId lbl) 0)
+bumpTickyCounter lbl = bumpTickyLbl (mkCmmDataLabel rtsPackageId lbl)
+
+bumpTickyCounterBy :: FastString -> Int -> FCode ()
+bumpTickyCounterBy lbl = bumpTickyLblBy (mkCmmDataLabel rtsPackageId lbl)
-bumpTickyCounter' :: CmmLit -> FCode ()
--- krc: note that we're incrementing the _entry_count_ field of the ticky counter
-bumpTickyCounter' lhs = do dflags <- getDynFlags
- emit (addToMem (cLong dflags) (CmmLit lhs) 1)
+bumpTickyCounterByE :: FastString -> CmmExpr -> FCode ()
+bumpTickyCounterByE lbl = bumpTickyLblByE (mkCmmDataLabel rtsPackageId lbl)
+
+bumpTickyEntryCount :: CLabel -> FCode ()
+bumpTickyEntryCount lbl = do
+ dflags <- getDynFlags
+ bumpTickyLit (cmmLabelOffB lbl (oFFSET_StgEntCounter_entry_count dflags))
+
+bumpTickyAllocd :: CLabel -> Int -> FCode ()
+bumpTickyAllocd lbl bytes = do
+ dflags <- getDynFlags
+ bumpTickyLitBy (cmmLabelOffB lbl (oFFSET_StgEntCounter_allocd dflags)) bytes
+
+bumpTickyLbl :: CLabel -> FCode ()
+bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1
+
+bumpTickyLblBy :: CLabel -> Int -> FCode ()
+bumpTickyLblBy lhs = bumpTickyLitBy (cmmLabelOffB lhs 0)
+
+bumpTickyLblByE :: CLabel -> CmmExpr -> FCode ()
+bumpTickyLblByE lhs = bumpTickyLitByE (cmmLabelOffB lhs 0)
+
+bumpTickyLit :: CmmLit -> FCode ()
+bumpTickyLit lhs = bumpTickyLitBy lhs 1
+
+bumpTickyLitBy :: CmmLit -> Int -> FCode ()
+bumpTickyLitBy lhs n = do
+ dflags <- getDynFlags
+ -- All the ticky-ticky counters are declared "unsigned long" in C
+ emit (addToMem (cLong dflags) (CmmLit lhs) n)
+
+bumpTickyLitByE :: CmmLit -> CmmExpr -> FCode ()
+bumpTickyLitByE lhs e = do
+ dflags <- getDynFlags
+ -- All the ticky-ticky counters are declared "unsigned long" in C
+ emit (addToMemE (cLong dflags) (CmmLit lhs) e)
bumpHistogram :: FastString -> Int -> FCode ()
bumpHistogram _lbl _n
-- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongWidth))
- = return () -- TEMP SPJ Apr 07
+ = return () -- TEMP SPJ Apr 07
{-
bumpHistogramE :: LitString -> CmmExpr -> FCode ()
-bumpHistogramE lbl n
+bumpHistogramE lbl n
= do t <- newTemp cLong
emitAssign (CmmLocal t) n
- emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight])
- (mkAssign (CmmLocal t) eight))
- emit (addToMem cLong
- (cmmIndexExpr cLongWidth
- (CmmLit (CmmLabel (mkRtsDataLabel lbl)))
- (CmmReg (CmmLocal t)))
- 1)
- where
+ emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight])
+ (mkAssign (CmmLocal t) eight))
+ emit (addToMem cLong
+ (cmmIndexExpr cLongWidth
+ (CmmLit (CmmLabel (mkRtsDataLabel lbl)))
+ (CmmReg (CmmLocal t)))
+ 1)
+ where
eight = CmmLit (CmmInt 8 cLongWidth)
-}
@@ -369,47 +574,53 @@ bumpHistogramE lbl n
-- Showing the "type category" for ticky-ticky profiling
showTypeCategory :: Type -> Char
- {- {C,I,F,D} char, int, float, double
- T tuple
- S other single-constructor type
- {c,i,f,d} unboxed ditto
- t *unpacked* tuple
- s *unpacked" single-cons...
-
- v void#
- a primitive array
-
- E enumeration type
- + dictionary, unless it's a ...
- L List
- > function
- M other (multi-constructor) data-con type
- . other type
- - reserved for others to mark as "uninteresting"
+ {-
+ + dictionary
+
+ > function
+
+ {C,I,F,D,W} char, int, float, double, word
+ {c,i,f,d,w} unboxed ditto
+
+ T tuple
+
+ P other primitive type
+ p unboxed ditto
+
+ L list
+ E enumeration type
+ S other single-constructor type
+ M other multi-constructor data-con type
+
+ . other type
+
+ - reserved for others to mark as "uninteresting"
+
+ Accurate as of Mar 2013, but I eliminated the Array category instead
+ of updating it, for simplicity. It's in P/p, I think --NSF
+
-}
showTypeCategory ty
- = if isDictTy ty
- then '+'
- else
- case tcSplitTyConApp_maybe ty of
- Nothing -> if isJust (tcSplitFunTy_maybe ty)
- then '>'
- else '.'
-
- Just (tycon, _) ->
- let utc = getUnique tycon in
- if utc == charDataConKey then 'C'
- else if utc == intDataConKey then 'I'
- else if utc == floatDataConKey then 'F'
- else if utc == doubleDataConKey then 'D'
- else if utc == charPrimTyConKey then 'c'
- else if (utc == intPrimTyConKey || utc == wordPrimTyConKey
- || utc == addrPrimTyConKey) then 'i'
- else if utc == floatPrimTyConKey then 'f'
- else if utc == doublePrimTyConKey then 'd'
- else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus
- else if isEnumerationTyCon tycon then 'E'
- else if isTupleTyCon tycon then 'T'
- else if isJust (tyConSingleDataCon_maybe tycon) then 'S'
- else if utc == listTyConKey then 'L'
- else 'M' -- oh, well...
+ | isDictTy ty = '+'
+ | otherwise = case tcSplitTyConApp_maybe ty of
+ Nothing -> '.'
+ Just (tycon, _) ->
+ (if isUnLiftedTyCon tycon then Data.Char.toLower else \x -> x) $
+ let anyOf us = getUnique tycon `elem` us in
+ case () of
+ _ | anyOf [funTyConKey] -> '>'
+ | anyOf [charPrimTyConKey, charTyConKey] -> 'C'
+ | anyOf [doublePrimTyConKey, doubleTyConKey] -> 'D'
+ | anyOf [floatPrimTyConKey, floatTyConKey] -> 'F'
+ | anyOf [intPrimTyConKey, int32PrimTyConKey, int64PrimTyConKey,
+ intTyConKey, int8TyConKey, int16TyConKey, int32TyConKey, int64TyConKey
+ ] -> 'I'
+ | anyOf [wordPrimTyConKey, word32PrimTyConKey, word64PrimTyConKey, wordTyConKey,
+ word8TyConKey, word16TyConKey, word32TyConKey, word64TyConKey
+ ] -> 'W'
+ | anyOf [listTyConKey] -> 'L'
+ | isTupleTyCon tycon -> 'T'
+ | isPrimTyCon tycon -> 'P'
+ | isEnumerationTyCon tycon -> 'E'
+ | isJust (tyConSingleDataCon_maybe tycon) -> 'S'
+ | otherwise -> 'M' -- oh, well...
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index af5e9577b6..3df75ceaa2 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -33,7 +33,7 @@ module StgCmmUtils (
cmmUntag, cmmIsTagged,
- addToMem, addToMemE, addToMemLbl,
+ addToMem, addToMemE, addToMemLblE, addToMemLbl,
mkWordCLit,
newStringCLit, newByteStringCLit,
blankWord
@@ -118,6 +118,9 @@ mkSimpleLit _ other = pprPanic "mkSimpleLit" (ppr other)
addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph
addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n
+addToMemLblE :: CmmType -> CLabel -> CmmExpr -> CmmAGraph
+addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl))
+
addToMem :: CmmType -- rep of the counter
-> CmmExpr -- Address
-> Int -- What to add (a word)