summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Pickering <matthewtpickering@gmail.com>2021-11-25 17:03:08 +0000
committerAndreas Klebinger <klebinger.andreas@gmx.at>2022-03-01 14:03:25 +0100
commit1a23d2008ed90b33924ffe35f5c69eb9e3391456 (patch)
tree4ed576840047bfbada7e3e8575c2c57edeb5b05e
parent80f9133e128abc61913d264ecd8b102517b266f5 (diff)
downloadhaskell-wip/ticky-eventlog-rebase.tar.gz
Ticky profiling improvements.wip/ticky-eventlog-rebase
This adds a number of changes to ticky-ticky profiling. When an executable is profiled with IPE profiling it's now possible to associate id-related ticky counters to their source location. This works by emitting the info table address as part of the counter which can be looked up in the IPE table. Add a `-ticky-ap-thunk` flag. This flag prevents the use of some standard thunks which are precompiled into the RTS. This means reduced cache locality and increased code size. But it allows better attribution of execution cost to specific source locations instead of simple attributing it to the standard thunk. ticky-ticky now uses the `arg` field to emit additional information about counters in json format. When ticky-ticky is used in combination with the eventlog eventlog2html can be used to generate a html table from the eventlog similar to the old text output for ticky-ticky.
-rw-r--r--compiler/GHC/Cmm/Parser.y1
-rw-r--r--compiler/GHC/Driver/Config/StgToCmm.hs1
-rw-r--r--compiler/GHC/Driver/Flags.hs1
-rw-r--r--compiler/GHC/Driver/Session.hs2
-rw-r--r--compiler/GHC/Iface/Tidy.hs2
-rw-r--r--compiler/GHC/StgToCmm/Bind.hs32
-rw-r--r--compiler/GHC/StgToCmm/Closure.hs7
-rw-r--r--compiler/GHC/StgToCmm/Config.hs1
-rw-r--r--compiler/GHC/StgToCmm/Env.hs11
-rw-r--r--compiler/GHC/StgToCmm/Expr.hs2
-rw-r--r--compiler/GHC/StgToCmm/Ticky.hs142
-rw-r--r--compiler/GHC/Types/Var/Env.hs3
-rw-r--r--compiler/GHC/Utils/Json.hs10
-rw-r--r--docs/users_guide/profiling.rst148
-rw-r--r--rts/Ticky.c6
-rw-r--r--rts/eventlog/EventLog.c3
-rw-r--r--rts/include/rts/Ticky.h1
-rw-r--r--testsuite/tests/driver/T16167.stdout2
-rw-r--r--testsuite/tests/driver/json.stderr2
-rw-r--r--testsuite/tests/driver/json2.stderr2
20 files changed, 325 insertions, 54 deletions
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index 6e6a130233..100e4f9b65 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -1519,6 +1519,7 @@ parseCmmFile dflags this_mod home_unit filename = do
let fstate = F.initFCodeState (profilePlatform $ targetProfile dflags)
let fcode = do
((), cmm) <- getCmm $ unEC code "global" (initEnv (targetProfile dflags)) [] >> return ()
+ -- See Note [Mapping Info Tables to Source Positions] (IPE Maps)
let used_info = map (cmmInfoTableToInfoProvEnt this_mod)
(mapMaybe topInfoTable cmm)
((), cmm2) <- getCmm $ mapM_ emitInfoTableProv used_info
diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs
index 4b0e126f98..9896fed3bc 100644
--- a/compiler/GHC/Driver/Config/StgToCmm.hs
+++ b/compiler/GHC/Driver/Config/StgToCmm.hs
@@ -57,6 +57,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig
, stgToCmmAvx = isAvxEnabled dflags
, stgToCmmAvx2 = isAvx2Enabled dflags
, stgToCmmAvx512f = isAvx512fEnabled dflags
+ , stgToCmmTickyAP = gopt Opt_Ticky_AP dflags
} where profile = targetProfile dflags
platform = profilePlatform profile
bk_end = backend dflags
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index b8abc2f9a2..21530048f2 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -296,6 +296,7 @@ data GeneralFlag
| Opt_Ticky_LNE
| Opt_Ticky_Dyn_Thunk
| Opt_Ticky_Tag
+ | Opt_Ticky_AP -- ^ Use regular thunks even when we could use std ap thunks in order to get entry counts
| Opt_RPath
| Opt_RelativeDynlibPaths
| Opt_CompactUnwind -- ^ @-fcompact-unwind@
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index bf4eaae513..d4d41accf6 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -2332,6 +2332,8 @@ dynamic_flags_deps = [
(NoArg (setGeneralFlag Opt_Ticky_Allocd))
, make_ord_flag defGhcFlag "ticky-LNE"
(NoArg (setGeneralFlag Opt_Ticky_LNE))
+ , make_ord_flag defGhcFlag "ticky-ap-thunk"
+ (NoArg (setGeneralFlag Opt_Ticky_AP))
, make_ord_flag defGhcFlag "ticky-dyn-thunk"
(NoArg (setGeneralFlag Opt_Ticky_Dyn_Thunk))
, make_ord_flag defGhcFlag "ticky-tag-checks"
diff --git a/compiler/GHC/Iface/Tidy.hs b/compiler/GHC/Iface/Tidy.hs
index c453cc5336..268c43945e 100644
--- a/compiler/GHC/Iface/Tidy.hs
+++ b/compiler/GHC/Iface/Tidy.hs
@@ -370,7 +370,7 @@ tidyProgram hsc_env (ModGuts { mg_module = mod
= Err.withTiming logger
(text "CoreTidy"<+>brackets (ppr mod))
- (const ()) $
+ (const ()) $!
do { let { omit_prags = gopt Opt_OmitInterfacePragmas dflags
; expose_all = gopt Opt_ExposeAllUnfoldings dflags
; print_unqual = mkPrintUnqualified (hsc_unit_env hsc_env) rdr_env
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 17d8556b15..5507173dc7 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -212,7 +212,7 @@ cgRhs :: Id
)
cgRhs id (StgRhsCon cc con mn _ts args)
- = withNewTickyCounterCon (idName id) con $
+ = withNewTickyCounterCon id con mn $
buildDynCon id mn True cc con (assertNonVoidStgArgs args)
-- con args are always non-void,
-- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
@@ -223,14 +223,16 @@ cgRhs id (StgRhsClosure fvs cc upd_flag args body)
checkFunctionArgTags (text "TagCheck Failed: Rhs of" <> ppr id) id args
profile <- getProfile
check_tags <- stgToCmmDoTagCheck <$> getStgToCmmConfig
- mkRhsClosure profile check_tags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
-
+ use_std_ap_thunk <- stgToCmmTickyAP <$> getStgToCmmConfig
+ mkRhsClosure profile use_std_ap_thunk check_tags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
------------------------------------------------------------------------
-- Non-constructor right hand sides
------------------------------------------------------------------------
-mkRhsClosure :: Profile -> Bool
+mkRhsClosure :: Profile
+ -> Bool -- Omit AP Thunks to improve profiling
+ -> Bool -- Lint tag inference checks
-> Id -> CostCentreStack
-> [NonVoid Id] -- Free vars
-> UpdateFlag
@@ -274,7 +276,7 @@ for semi-obvious reasons.
-}
---------- See Note [Selectors] ------------------
-mkRhsClosure profile _check_tags bndr _cc
+mkRhsClosure profile _ _check_tags bndr _cc
[NonVoid the_fv] -- Just one free var
upd_flag -- Updatable thunk
[] -- A thunk
@@ -307,7 +309,7 @@ mkRhsClosure profile _check_tags bndr _cc
in cgRhsStdThunk bndr lf_info [StgVarArg the_fv]
---------- See Note [Ap thunks] ------------------
-mkRhsClosure profile check_tags bndr _cc
+mkRhsClosure profile use_std_ap check_tags bndr _cc
fvs
upd_flag
[] -- No args; a thunk
@@ -316,7 +318,8 @@ mkRhsClosure profile check_tags bndr _cc
-- We are looking for an "ApThunk"; see data con ApThunk in GHC.StgToCmm.Closure
-- of form (x1 x2 .... xn), where all the xi are locals (not top-level)
-- So the xi will all be free variables
- | args `lengthIs` (n_fvs-1) -- This happens only if the fun_id and
+ | use_std_ap
+ , args `lengthIs` (n_fvs-1) -- This happens only if the fun_id and
-- args are all distinct local variables
-- The "-1" is for fun_id
-- Missed opportunity: (f x x) is not detected
@@ -340,7 +343,7 @@ mkRhsClosure profile check_tags bndr _cc
payload = StgVarArg fun_id : args
---------- Default case ------------------
-mkRhsClosure profile _check_tags bndr cc fvs upd_flag args body
+mkRhsClosure profile _use_ap _check_tags bndr cc fvs upd_flag args body
= do { let lf_info = mkClosureLFInfo (profilePlatform profile) bndr NotTopLevel fvs upd_flag args
; (id_info, reg) <- rhsIdInfo bndr lf_info
; return (id_info, gen_code lf_info reg) }
@@ -404,13 +407,13 @@ cgRhsStdThunk bndr lf_info payload
}
where
gen_code reg -- AHA! A STANDARD-FORM THUNK
- = withNewTickyCounterStdThunk (lfUpdatable lf_info) (idName bndr) $
+ = withNewTickyCounterStdThunk (lfUpdatable lf_info) (bndr) payload $
do
{ -- LAY OUT THE OBJECT
mod_name <- getModuleName
- ; cfg <- getStgToCmmConfig
- ; let profile = stgToCmmProfile cfg
- ; let platform = stgToCmmPlatform cfg
+ ; profile <- getProfile
+ ; platform <- getPlatform
+ ; let
header = if isLFThunk lf_info then ThunkHeader else StdHeader
(tot_wds, ptr_wds, payload_w_offsets)
= mkVirtHeapOffsets profile header
@@ -476,7 +479,8 @@ closureCodeBody top_lvl bndr cl_info cc [] body fv_details
= withNewTickyCounterThunk
(isStaticClosure cl_info)
(closureUpdReqd cl_info)
- (closureName cl_info) $
+ (closureName cl_info)
+ (map fst fv_details) $
emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
\(_, node, _) -> thunkCode cl_info fv_details cc node body
where
@@ -488,7 +492,7 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
arity = length args
in
-- See Note [OneShotInfo overview] in GHC.Types.Basic.
- withNewTickyCounterFun (isOneShotBndr arg0) (closureName cl_info)
+ withNewTickyCounterFun (isOneShotBndr arg0) (closureName cl_info) (map fst fv_details)
nv_args $ do {
; let
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
index 2609606292..7c1b5250e4 100644
--- a/compiler/GHC/StgToCmm/Closure.hs
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -65,6 +65,7 @@ module GHC.StgToCmm.Closure (
cafBlackHoleInfoTable,
indStaticInfoTable,
staticClosureNeedsLink,
+ mkClosureInfoTableLabel
) where
import GHC.Prelude
@@ -642,7 +643,7 @@ getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method"
data ClosureInfo
= ClosureInfo {
- closureName :: !Name, -- The thing bound to this closure
+ closureName :: !Id, -- The thing bound to this closure
-- we don't really need this field: it's only used in generating
-- code for ticky and profiling, and we could pass the information
-- around separately, but it doesn't do much harm to keep it here.
@@ -679,13 +680,12 @@ mkClosureInfo :: Profile
-> String -- String descriptor
-> ClosureInfo
mkClosureInfo profile is_static id lf_info tot_wds ptr_wds val_descr
- = ClosureInfo { closureName = name
+ = ClosureInfo { closureName = id
, closureLFInfo = lf_info
, closureInfoLabel = info_lbl -- These three fields are
, closureSMRep = sm_rep -- (almost) an info table
, closureProf = prof } -- (we don't have an SRT yet)
where
- name = idName id
sm_rep = mkHeapRep profile is_static ptr_wds nonptr_wds (lfClosureType lf_info)
prof = mkProfilingInfo profile id val_descr
nonptr_wds = tot_wds - ptr_wds
@@ -839,6 +839,7 @@ closureLocalEntryLabel platform
| platformTablesNextToCode platform = toInfoLbl platform . closureInfoLabel
| otherwise = toEntryLbl platform . closureInfoLabel
+-- | Get the info table label for a *thunk*.
mkClosureInfoTableLabel :: Platform -> Id -> LambdaFormInfo -> CLabel
mkClosureInfoTableLabel platform id lf_info
= case lf_info of
diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs
index 623a7b8f0a..b3014fd302 100644
--- a/compiler/GHC/StgToCmm/Config.hs
+++ b/compiler/GHC/StgToCmm/Config.hs
@@ -61,6 +61,7 @@ data StgToCmmConfig = StgToCmmConfig
, stgToCmmAllowExtendedAddSubInstrs :: !Bool -- ^ Allowed to generate AddWordC, SubWordC, Add2, etc.
, stgToCmmAllowIntMul2Instr :: !Bool -- ^ Allowed to generate IntMul2 instruction
, stgToCmmAllowFabsInstrs :: !Bool -- ^ Allowed to generate Fabs instructions
+ , stgToCmmTickyAP :: !Bool -- ^ Disable use of precomputed standard thunks.
------------------------------ SIMD flags ------------------------------------
-- Each of these flags checks vector compatibility with the backend requested
-- during compilation. In essence, this means checking for @-fllvm@ which is
diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs
index eef1420a72..4d1fff30df 100644
--- a/compiler/GHC/StgToCmm/Env.hs
+++ b/compiler/GHC/StgToCmm/Env.hs
@@ -17,7 +17,7 @@ module GHC.StgToCmm.Env (
bindArgsToRegs, bindToReg, rebindToReg,
bindArgToReg, idToReg,
- getCgIdInfo,
+ getCgIdInfo, getCgInfo_maybe,
maybeLetNoEscape,
) where
@@ -44,6 +44,9 @@ import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
+import GHC.Builtin.Names (getUnique)
+
+
-------------------------------------
-- Manipulating CgIdInfo
-------------------------------------
@@ -150,6 +153,12 @@ getCgIdInfo id
cgLookupPanic id -- Bug
}}}
+-- | Retrieve cg info for a name if it already exists.
+getCgInfo_maybe :: Name -> FCode (Maybe CgIdInfo)
+getCgInfo_maybe name
+ = do { local_binds <- getBinds -- Try local bindings first
+ ; return $ lookupVarEnv_Directly local_binds (getUnique name) }
+
cgLookupPanic :: Id -> FCode a
cgLookupPanic id
= do local_binds <- getBinds
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index 5129a45b1c..55892b8789 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -216,7 +216,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
= do platform <- getPlatform
return ( lneIdInfo platform bndr args, code )
where
- code = forkLneBody $ withNewTickyCounterLNE (idName bndr) args $ do
+ code = forkLneBody $ withNewTickyCounterLNE bndr args $ do
{ restoreCurrentCostCentre cc_slot
; arg_regs <- bindArgsToRegs args
; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs
index 4aec817412..a538ff2f4d 100644
--- a/compiler/GHC/StgToCmm/Ticky.hs
+++ b/compiler/GHC/StgToCmm/Ticky.hs
@@ -135,6 +135,7 @@ import GHC.Cmm.Utils
import GHC.Cmm.CLabel
import GHC.Runtime.Heap.Layout
+
import GHC.Types.Name
import GHC.Types.Id
import GHC.Types.Basic
@@ -154,6 +155,11 @@ import GHC.Core.Predicate
import Data.Maybe
import qualified Data.Char
import Control.Monad ( when, unless )
+import GHC.Types.Id.Info
+import GHC.Utils.Trace
+import GHC.StgToCmm.Env (getCgInfo_maybe)
+import Data.Coerce (coerce)
+import GHC.Utils.Json
-----------------------------------------------------------------------------
--
@@ -161,23 +167,73 @@ import Control.Monad ( when, unless )
--
-----------------------------------------------------------------------------
+-- | Number of arguments for a ticky counter.
+--
+-- Ticky currently treats args to constructor allocations differently than those for functions/LNE bindings.
+tickyArgArity :: TickyClosureType -> Int
+tickyArgArity (TickyFun _ _fvs args) = length args
+tickyArgArity (TickyLNE args) = length args
+tickyArgArity (TickyCon{}) = 0
+tickyArgArity (TickyThunk{}) = 0
+
+tickyArgDesc :: TickyClosureType -> String
+tickyArgDesc arg_info =
+ case arg_info of
+ TickyFun _ _fvs args -> map (showTypeCategory . idType . fromNonVoid) args
+ TickyLNE args -> map (showTypeCategory . idType . fromNonVoid) args
+ TickyThunk{} -> ""
+ TickyCon{} -> ""
+
+tickyFvDesc :: TickyClosureType -> String
+tickyFvDesc arg_info =
+ case arg_info of
+ TickyFun _ fvs _args -> map (showTypeCategory . idType . fromNonVoid) fvs
+ TickyLNE{} -> ""
+ TickyThunk _ _ fvs -> map (showTypeCategory . stgArgType) fvs
+ TickyCon{} -> ""
+
+instance ToJson TickyClosureType where
+ json info = case info of
+ (TickyFun {}) -> mkInfo (tickyFvDesc info) (tickyArgDesc info) "fun"
+ (TickyLNE {}) -> mkInfo [] (tickyArgDesc info) "lne"
+ (TickyThunk uf _ _) -> mkInfo (tickyFvDesc info) [] ("thk" ++ if uf then "_u" else "")
+ (TickyCon{}) -> mkInfo [] [] "con"
+ where
+ mkInfo :: String -> String -> String -> JsonDoc
+ mkInfo fvs args ty =
+ JSObject
+ [("type", json "entCntr")
+ ,("subTy", json ty)
+ ,("fvs_c", json (length fvs))
+ ,("fvs" , json fvs)
+ ,("args", json args)
+ ]
+
+tickyEntryDesc :: (SDocContext -> TickyClosureType -> String)
+tickyEntryDesc ctxt = renderWithContext ctxt . renderJSON . json
+
data TickyClosureType
= TickyFun
Bool -- True <-> single entry
+ [NonVoid Id] -- ^ FVs
+ [NonVoid Id] -- ^ Args
| TickyCon
DataCon -- the allocated constructor
+ ConstructorNumber
| TickyThunk
Bool -- True <-> updateable
Bool -- True <-> standard thunk (AP or selector), has no entry counter
+ [StgArg] -- ^ FVS, StgArg because for thunks these can also be literals.
| TickyLNE
+ [NonVoid Id] -- ^ Args
-withNewTickyCounterFun :: Bool -> Name -> [NonVoid Id] -> FCode a -> FCode a
-withNewTickyCounterFun single_entry = withNewTickyCounter (TickyFun single_entry)
+withNewTickyCounterFun :: Bool -> Id -> [NonVoid Id] -> [NonVoid Id] -> FCode a -> FCode a
+withNewTickyCounterFun single_entry f fvs args = withNewTickyCounter (TickyFun single_entry fvs args) f
-withNewTickyCounterLNE :: Name -> [NonVoid Id] -> FCode a -> FCode a
+withNewTickyCounterLNE :: Id -> [NonVoid Id] -> FCode a -> FCode a
withNewTickyCounterLNE nm args code = do
b <- isEnabled stgToCmmTickyLNE
- if not b then code else withNewTickyCounter TickyLNE nm args code
+ if not b then code else withNewTickyCounter (TickyLNE args) nm code
thunkHasCounter :: Bool -> FCode Bool
thunkHasCounter isStatic = (not isStatic &&) <$> isEnabled stgToCmmTickyDynThunk
@@ -185,46 +241,50 @@ thunkHasCounter isStatic = (not isStatic &&) <$> isEnabled stgToCmmTickyDynThunk
withNewTickyCounterThunk
:: Bool -- ^ static
-> Bool -- ^ updateable
- -> Name
+ -> Id
+ -> [NonVoid Id] -- ^ Free vars
-> FCode a
-> FCode a
-withNewTickyCounterThunk isStatic isUpdatable name code = do
+withNewTickyCounterThunk isStatic isUpdatable name fvs code = do
has_ctr <- thunkHasCounter isStatic
if not has_ctr
then code
- else withNewTickyCounter (TickyThunk isUpdatable False) name [] code
+ else withNewTickyCounter (TickyThunk isUpdatable False (map StgVarArg $ coerce fvs)) name code
withNewTickyCounterStdThunk
:: Bool -- ^ updateable
- -> Name
+ -> Id
+ -> [StgArg] -- ^ Free vars + function
-> FCode a
-> FCode a
-withNewTickyCounterStdThunk isUpdatable name code = do
+withNewTickyCounterStdThunk isUpdatable name fvs code = do
has_ctr <- thunkHasCounter False
if not has_ctr
then code
- else withNewTickyCounter (TickyThunk isUpdatable True) name [] code
+ else withNewTickyCounter (TickyThunk isUpdatable True fvs) name code
withNewTickyCounterCon
- :: Name
+ :: Id
-> DataCon
+ -> ConstructorNumber
-> FCode a
-> FCode a
-withNewTickyCounterCon name datacon code = do
+withNewTickyCounterCon name datacon info code = do
has_ctr <- thunkHasCounter False
if not has_ctr
then code
- else withNewTickyCounter (TickyCon datacon) name [] code
+ else withNewTickyCounter (TickyCon datacon info) name code
-- args does not include the void arguments
-withNewTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode a -> FCode a
-withNewTickyCounter cloType name args m = do
- lbl <- emitTickyCounter cloType name args
+withNewTickyCounter :: TickyClosureType -> Id -> FCode a -> FCode a
+withNewTickyCounter cloType name m = do
+ lbl <- emitTickyCounter cloType name
setTickyCtrLabel lbl m
-emitTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode CLabel
-emitTickyCounter cloType name args
- = let ctr_lbl = mkRednCountsLabel name in
+emitTickyCounter :: TickyClosureType -> Id -> FCode CLabel
+emitTickyCounter cloType tickee
+ = let name = idName tickee in
+ let ctr_lbl = mkRednCountsLabel name in
(>> return ctr_lbl) $
ifTicky $ do
{ cfg <- getStgToCmmConfig
@@ -239,23 +299,52 @@ emitTickyCounter cloType name args
ppr_for_ticky_name :: SDoc
ppr_for_ticky_name =
let ext = case cloType of
- TickyFun single_entry -> parens $ hcat $ punctuate comma $
+ TickyFun single_entry _ _-> parens $ hcat $ punctuate comma $
[text "fun"] ++ [text "se"|single_entry]
- TickyCon datacon -> parens (text "con:" <+> ppr (dataConName datacon))
- TickyThunk upd std -> parens $ hcat $ punctuate comma $
+ TickyCon datacon _cn -> parens (text "con:" <+> ppr (dataConName datacon))
+ TickyThunk upd std _-> parens $ hcat $ punctuate comma $
[text "thk"] ++ [text "se"|not upd] ++ [text "std"|std]
- TickyLNE | isInternalName name -> parens (text "LNE")
- | otherwise -> panic "emitTickyCounter: how is this an external LNE?"
+ TickyLNE _ | isInternalName name -> parens (text "LNE")
+ | otherwise -> panic "emitTickyCounter: how is this an external LNE?"
p = case hasHaskellName parent of
-- NB the default "top" ticky ctr does not
-- have a Haskell name
Just pname -> text "in" <+> ppr (nameUnique pname)
_ -> empty
in pprTickyName mod_name name <+> ext <+> p
+ ; this_mod <- getModuleName
+ ; let t = case cloType of
+ TickyCon {} -> "C"
+ TickyFun {} -> "F"
+ TickyThunk {} -> "T"
+ TickyLNE {} -> "L"
+ ; info_lbl <- case cloType of
+ TickyCon dc mn -> case mn of
+ NoNumber -> return $! CmmLabel $ mkConInfoTableLabel (dataConName dc) DefinitionSite
+ (Numbered n) -> return $! CmmLabel $ mkConInfoTableLabel (dataConName dc) (UsageSite this_mod n)
+ TickyFun {} ->
+ return $! CmmLabel $ mkInfoTableLabel name NoCafRefs
+
+ TickyThunk _ std_thunk _fvs
+ | not std_thunk
+ -> return $! CmmLabel $ mkInfoTableLabel name NoCafRefs
+ -- IPE Maps have no entry for std thunks.
+ | otherwise
+ -> do
+ lf_info <- getCgInfo_maybe name
+ profile <- getProfile
+ case lf_info of
+ Just (CgIdInfo { cg_lf = cg_lf })
+ | isLFThunk cg_lf
+ -> return $! CmmLabel $ mkClosureInfoTableLabel (profilePlatform profile) tickee cg_lf
+ _ -> pprTraceDebug "tickyThunkUnknown" (text t <> colon <> ppr name <+> ppr (mkInfoTableLabel name NoCafRefs))
+ return $! zeroCLit platform
+
+ TickyLNE {} -> return $! zeroCLit platform
; let ctx = defaultSDocContext {sdocPprDebug = True}
; fun_descr_lit <- newStringCLit $ renderWithContext ctx ppr_for_ticky_name
- ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . fromNonVoid) args
+ ; arg_descr_lit <- newStringCLit $ tickyEntryDesc ctx cloType
; emitDataLits ctr_lbl
-- Must match layout of rts/include/rts/Ticky.h's StgEntCounter
--
@@ -263,10 +352,11 @@ emitTickyCounter cloType name args
-- before, but the code generator wasn't handling that
-- properly and it led to chaos, panic and disorder.
[ mkIntCLit platform 0, -- registered?
- mkIntCLit platform (length args), -- Arity
+ mkIntCLit platform (tickyArgArity cloType), -- Arity
mkIntCLit platform 0, -- Heap allocated for this thing
fun_descr_lit,
arg_descr_lit,
+ info_lbl,
zeroCLit platform, -- Entries into this thing
zeroCLit platform, -- Heap allocated by this thing
zeroCLit platform -- Link to next StgEntCounter
diff --git a/compiler/GHC/Types/Var/Env.hs b/compiler/GHC/Types/Var/Env.hs
index 3ad0f10156..e70e89f8ea 100644
--- a/compiler/GHC/Types/Var/Env.hs
+++ b/compiler/GHC/Types/Var/Env.hs
@@ -17,6 +17,7 @@ module GHC.Types.Var.Env (
delVarEnvList, delVarEnv,
minusVarEnv,
lookupVarEnv, lookupVarEnv_NF, lookupWithDefaultVarEnv,
+ lookupVarEnv_Directly,
mapVarEnv, zipVarEnv,
modifyVarEnv, modifyVarEnv_Directly,
isEmptyVarEnv,
@@ -506,6 +507,7 @@ modifyVarEnv :: (a -> a) -> VarEnv a -> Var -> VarEnv a
isEmptyVarEnv :: VarEnv a -> Bool
lookupVarEnv :: VarEnv a -> Var -> Maybe a
+lookupVarEnv_Directly :: VarEnv a -> Unique -> Maybe a
filterVarEnv :: (a -> Bool) -> VarEnv a -> VarEnv a
lookupVarEnv_NF :: VarEnv a -> Var -> a
lookupWithDefaultVarEnv :: VarEnv a -> a -> Var -> a
@@ -530,6 +532,7 @@ minusVarEnv = minusUFM
plusVarEnv = plusUFM
plusVarEnvList = plusUFMList
lookupVarEnv = lookupUFM
+lookupVarEnv_Directly = lookupUFM_Directly
filterVarEnv = filterUFM
lookupWithDefaultVarEnv = lookupWithDefaultUFM
mapVarEnv = mapUFM
diff --git a/compiler/GHC/Utils/Json.hs b/compiler/GHC/Utils/Json.hs
index 21358847c0..65880fe76e 100644
--- a/compiler/GHC/Utils/Json.hs
+++ b/compiler/GHC/Utils/Json.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE GADTs #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
module GHC.Utils.Json where
import GHC.Prelude
@@ -29,7 +31,7 @@ renderJSON d =
JSObject fs -> braces $ pprList renderField fs
where
renderField :: (String, JsonDoc) -> SDoc
- renderField (s, j) = doubleQuotes (text s) <> colon <+> renderJSON j
+ renderField (s, j) = doubleQuotes (text s) <> colon <> renderJSON j
pprList pp xs = hcat (punctuate comma (map pp xs))
@@ -54,3 +56,9 @@ escapeJsonString = concatMap escapeChar
class ToJson a where
json :: a -> JsonDoc
+
+instance ToJson String where
+ json = JSString . escapeJsonString
+
+instance ToJson Int where
+ json = JSInt
diff --git a/docs/users_guide/profiling.rst b/docs/users_guide/profiling.rst
index dcfd42b84e..9a905ba3de 100644
--- a/docs/users_guide/profiling.rst
+++ b/docs/users_guide/profiling.rst
@@ -1726,6 +1726,37 @@ Using “ticky-ticky” profiling (for implementors)
*by* each closure type. See :ghc-flag:`-ticky-allocd` to keep track of
allocations *of* each closure type as well.
+
+GHC's ticky-ticky profiler provides a low-level facility for tracking
+entry and allocation counts of particular individual closures.
+Ticky-ticky profiling requires a certain familiarity with GHC
+internals, so it is best suited for expert users, but can provide an invaluable
+precise insight into the allocation behaviour of your programs.
+
+Getting started with ticky profiling consists of three steps.
+
+1. Add the ``-ticky`` flag when compiling a Haskell module to enable "ticky-ticky" profiling of that module. This makes GHC emit performance-counting instructions in every STG function.
+
+2. Add ``-ticky`` to the command line when linking, so that you link against a version of the runtime system that allows you to display the results. In fact, in the link phase -ticky implies -debug, so you get the debug version of the runtime system too.
+
+3. Then when running your program you can collect the results of the profiling in two ways.
+
+ * Using the eventlog, the :rts-flag:`-lT <-l ⟨flags⟩>` flag will emit ticky samples
+ to the eventlog periodically.
+ This has the advantage of being able to resolve dynamic behaviors over the program's
+ lifetime. See :ref:`ticky-event-format` for details on the event types
+ reported. The ticky information can be rendered into an interactive table
+ using eventlog2html.
+ * A legacy textual format is emitted using the :rts-flag:`-r ⟨file⟩` flag. This
+ produces a textual table containing information about how much each counter
+ ticked throughout the duration of the program.
+
+Additional Ticky Flags
+----------------------
+
+There are some additional flags which can be used to increase the number of
+ticky counters and the quality of the profile.
+
.. ghc-flag:: -ticky-allocd
:shortdesc: Track the number of times each closure type is allocated.
:type: dynamic
@@ -1765,6 +1796,14 @@ Using “ticky-ticky” profiling (for implementors)
Note that these counters are currently not processed well be eventlog2html. So if you want to check them you will have to use the text based interface.
+.. ghc-flag:: -ticky-ap-thunk
+ :shortdesc: Don't use standard AP thunks on order to get more reliable entry counters.
+ :type: dynamic
+ :category:
+
+ This allows us to get accurate entry counters for code like `f x y` at the cost of code size.
+ We do this but not using the precomputed standard AP thunk code.
+
GHC's ticky-ticky profiler provides a low-level facility for tracking
entry and allocation counts of particular individual closures.
Because ticky-ticky profiling requires a certain familiarity with GHC
@@ -1780,6 +1819,115 @@ advantage of being able to resolve dynamic behaviors over the program's
lifetime. See :ref:`ticky-event-format` for details on the event types
reported.
+Understanding the Output of Ticky-Ticky profiles
+------------------------------------------------
+
+Once you have your rendered profile then you can begin to understand the allocation
+behaviour of your program. There are two classes of ticky-ticky counters.
+
+Name-specific counters
+
+ Each "name-specific counter" is associated with a name that is defined in the
+ result of the optimiser. For each such name, there are three possible counters:
+ entries, heap allocation by the named thing, and heap used to allocate that
+ named thing.
+
+Global counters
+
+ Each "global counter" describes some aspect of the entire program execution.
+ For example, one global counter tracks total heap allocation; another tracks allocation for PAPs.
+
+In general you are probably interested mostly in the name-specific counters as these
+can provided detailed information about where allocates how much in your program.
+
+Information about name-specific counters
+----------------------------------------
+
+Name-specific counters provide the following information about a closure.
+
+* Entries - How many times the closure was entered.
+* Allocs - How much (in bytes) is allocated *by* that closure.
+* Allod - How often the closure is allocated.
+* FVs - The free variables captured by that closure.
+* Args - The arguments that closure takes.
+
+The FVs and Args information is encoded using a small DSL.
+
++------------------+---------------------------------------------------+
+|Classification |Description |
++==================+===================================================+
+|+ |dictionary |
++------------------+---------------------------------------------------+
+|\> |function |
++------------------+---------------------------------------------------+
+|{C,I,F,D,W} | char, int, float, double, word |
++------------------+---------------------------------------------------+
+|{c,i,f,d,w} | unboxed ditto |
++------------------+---------------------------------------------------+
+|T | tuple |
++------------------+---------------------------------------------------+
+|P | other primitive type |
++------------------+---------------------------------------------------+
+|p | unboxed primitive type |
++------------------+---------------------------------------------------+
+|L | list |
++------------------+---------------------------------------------------+
+|E | enumeration type |
++------------------+---------------------------------------------------+
+|S | Single constructor type |
++------------------+---------------------------------------------------+
+|M | Multi constructor type |
++------------------+---------------------------------------------------+
+|. |other type |
++------------------+---------------------------------------------------+
+|- |reserved for others to mark as "uninteresting" |
++------------------+---------------------------------------------------+
+
+In particular note that you can use the ticky profiler to see any function
+calls to dictionary arguments by searching the profile for the ``+`` classifier.
+This indicates that the function has failed to specialise for one reason or another.
+
+Examples
+--------
+
+A typical use of ticky-ticky would be to generate a ticky report using the eventlog by evoking an
+application with RTS arguments like this:
+
+``app <args> +RTS -l-augT``
+
+This will produce an eventlog file which contains results from ticky counters. This file can
+be manually inspected like any regular eventlog. However for ticky-ticky eventlog2html has
+good support for producing tables from these logs.
+
+With an up to date version of eventlog2html this can be simply done by invoking eventlog2html
+on the produced eventlog. In the example above the invocation would then be ``eventlog2html app.eventlog``
+Which will produce a searchable and sortable table containing all the ticky counters in the log.
+
+Notes about ticky profiling
+---------------------------
+
+* You can mix together modules compiled with and without ``-ticky`` but you will
+ miss out on allocations and counts from uninstrumented modules in the profile.
+
+* Linking with the ``-ticky`` has a quite severe performance impact on your program.
+ ``-ticky`` implies using the unoptimised ``-debug`` RTS. Therefore ``-ticky``
+ shouldn't be used for production builds.
+
+* Building with ``-ticky`` doesn't affect core optimisations of your program as the
+ counters are inserted after the STG pipeline. At which point most optimizations have
+ already been run.
+
+* When using the eventlog it is possible to combine together ticky-ticky and IPE
+ based profiling as each ticky counter definition has an associated info table.
+ This address can be looked up in the IPE map so that further information (such
+ as source location) can be determined about that closure.
+
+* Global ticky counters are only available in the textual ticky output (``+RTS -r``).
+ But this mode has some limitations (e.g. on column widths) and will contain raw json output
+ in some columns. For this reason using an eventlog-based approach should be prefered if
+ possible.
+
+
.. [1]
:ghc-flag:`-fprof-auto` was known as ``-auto-all`` prior to
GHC 7.4.1.
diff --git a/rts/Ticky.c b/rts/Ticky.c
index 2ce4aab658..f82cd27104 100644
--- a/rts/Ticky.c
+++ b/rts/Ticky.c
@@ -17,7 +17,7 @@
*/
StgEntCounter top_ct
= { 0, 0, 0,
- "TOP", "",
+ "TOP", "", NULL,
0, 0, NULL };
/* Data structure used in ``registering'' one of these counters. */
@@ -377,13 +377,13 @@ printRegisteredCounterInfo (FILE *tf)
fprintf(tf,"\nThe following table is explained by https://gitlab.haskell.org/ghc/ghc/wikis/debugging/ticky-ticky\nAll allocation numbers are in bytes.\n");
fprintf(tf,"\n**************************************************\n\n");
}
- fprintf(tf, "%11s%11s%11s %-23s %s\n",
+ fprintf(tf, "%11s%12s%12s %-63s %s\n",
"Entries", "Alloc", "Alloc'd", "Non-void Arguments", "STG Name");
fprintf(tf, "--------------------------------------------------------------------------------\n");
/* Function name at the end so it doesn't mess up the tabulation */
for (p = ticky_entry_ctrs; p != NULL; p = p->link) {
- fprintf(tf, "%11" FMT_Int "%11" FMT_Int "%11" FMT_Int " %3lu %-20.20s %s",
+ fprintf(tf, "%11" FMT_Int "%12" FMT_Int "%12" FMT_Int " %3lu %-60.60s %s",
p->entry_count,
p->allocs,
p->allocd,
diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c
index ca37b441c6..d67f9e7bb1 100644
--- a/rts/eventlog/EventLog.c
+++ b/rts/eventlog/EventLog.c
@@ -1355,7 +1355,7 @@ void postProfBegin(void)
#if defined(TICKY_TICKY)
static void postTickyCounterDef(EventsBuf *eb, StgEntCounter *p)
{
- StgWord len = 8 + 2 + strlen(p->arg_kinds)+1 + strlen(p->str)+1;
+ StgWord len = 8 + 2 + strlen(p->arg_kinds)+1 + strlen(p->str)+1 + 8;
ensureRoomForVariableEvent(eb, len);
postEventHeader(eb, EVENT_TICKY_COUNTER_DEF);
postPayloadSize(eb, len);
@@ -1364,6 +1364,7 @@ static void postTickyCounterDef(EventsBuf *eb, StgEntCounter *p)
postWord16(eb, (uint16_t) p->arity);
postString(eb, p->arg_kinds);
postString(eb, p->str);
+ postWord64(eb, (W_) (INFO_PTR_TO_STRUCT(p->info)));
}
void postTickyCounterDefs(StgEntCounter *counters)
diff --git a/rts/include/rts/Ticky.h b/rts/include/rts/Ticky.h
index 93043d8514..7658e3c08a 100644
--- a/rts/include/rts/Ticky.h
+++ b/rts/include/rts/Ticky.h
@@ -26,6 +26,7 @@ typedef struct _StgEntCounter {
/* (rest of args are in registers) */
char *str; /* name of the thing */
char *arg_kinds; /* info about the args types */
+ StgInfoTable *info; /* Info table corresponding to this closure */
StgInt entry_count; /* Trips to fast entry code */
StgInt allocs; /* number of allocations by this fun */
struct _StgEntCounter *link;/* link to chain them all together */
diff --git a/testsuite/tests/driver/T16167.stdout b/testsuite/tests/driver/T16167.stdout
index 5416b95894..3da369eb8b 100644
--- a/testsuite/tests/driver/T16167.stdout
+++ b/testsuite/tests/driver/T16167.stdout
@@ -1 +1 @@
-{"span": {"file": "T16167.hs","startLine": 1,"startCol": 8,"endLine": 1,"endCol": 9},"doc": "parse error on input \u2018f\u2019","messageClass": "MCDiagnostic SevError ErrorWithoutFlag"}
+{"span":{"file":"T16167.hs","startLine":1,"startCol":8,"endLine":1,"endCol":9},"doc":"parse error on input \u2018f\u2019","messageClass":"MCDiagnostic SevError ErrorWithoutFlag"}
diff --git a/testsuite/tests/driver/json.stderr b/testsuite/tests/driver/json.stderr
index cb34abd7ac..ec28dbb206 100644
--- a/testsuite/tests/driver/json.stderr
+++ b/testsuite/tests/driver/json.stderr
@@ -1 +1 @@
-{"span": {"file": "json.hs","startLine": 6,"startCol": 7,"endLine": 6,"endCol": 8},"doc": "\u2022 No instance for (Num (a -> a)) arising from the literal \u20185\u2019\n (maybe you haven't applied a function to enough arguments?)\n\u2022 In the expression: 5\n In an equation for \u2018id1\u2019: id1 = 5","messageClass": "MCDiagnostic SevError ErrorWithoutFlag"}
+{"span":{"file":"json.hs","startLine":6,"startCol":7,"endLine":6,"endCol":8},"doc":"\u2022 No instance for (Num (a -> a)) arising from the literal \u20185\u2019\n (maybe you haven't applied a function to enough arguments?)\n\u2022 In the expression: 5\n In an equation for \u2018id1\u2019: id1 = 5","messageClass":"MCDiagnostic SevError ErrorWithoutFlag"}
diff --git a/testsuite/tests/driver/json2.stderr b/testsuite/tests/driver/json2.stderr
index 71d7f5edfa..6c69d32881 100644
--- a/testsuite/tests/driver/json2.stderr
+++ b/testsuite/tests/driver/json2.stderr
@@ -1 +1 @@
-{"span": null,"doc": "TYPE SIGNATURES\n foo :: forall a. a -> a\nDependent modules: []\nDependent packages: [base-4.16.0.0]","messageClass": "MCOutput"}
+{"span":null,"doc":"TYPE SIGNATURES\n foo :: forall a. a -> a\nDependent modules: []\nDependent packages: [base-4.16.0.0]","messageClass":"MCOutput"}