diff options
-rw-r--r-- | compiler/GHC/Cmm/Parser.y | 1 | ||||
-rw-r--r-- | compiler/GHC/Driver/Config/StgToCmm.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Driver/Flags.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Driver/Session.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Iface/Tidy.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Bind.hs | 32 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Closure.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Config.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Env.hs | 11 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Expr.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Ticky.hs | 142 | ||||
-rw-r--r-- | compiler/GHC/Types/Var/Env.hs | 3 | ||||
-rw-r--r-- | compiler/GHC/Utils/Json.hs | 10 | ||||
-rw-r--r-- | docs/users_guide/profiling.rst | 148 | ||||
-rw-r--r-- | rts/Ticky.c | 6 | ||||
-rw-r--r-- | rts/eventlog/EventLog.c | 3 | ||||
-rw-r--r-- | rts/include/rts/Ticky.h | 1 | ||||
-rw-r--r-- | testsuite/tests/driver/T16167.stdout | 2 | ||||
-rw-r--r-- | testsuite/tests/driver/json.stderr | 2 | ||||
-rw-r--r-- | testsuite/tests/driver/json2.stderr | 2 |
20 files changed, 325 insertions, 54 deletions
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index 6e6a130233..100e4f9b65 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -1519,6 +1519,7 @@ parseCmmFile dflags this_mod home_unit filename = do let fstate = F.initFCodeState (profilePlatform $ targetProfile dflags) let fcode = do ((), cmm) <- getCmm $ unEC code "global" (initEnv (targetProfile dflags)) [] >> return () + -- See Note [Mapping Info Tables to Source Positions] (IPE Maps) let used_info = map (cmmInfoTableToInfoProvEnt this_mod) (mapMaybe topInfoTable cmm) ((), cmm2) <- getCmm $ mapM_ emitInfoTableProv used_info diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs index 4b0e126f98..9896fed3bc 100644 --- a/compiler/GHC/Driver/Config/StgToCmm.hs +++ b/compiler/GHC/Driver/Config/StgToCmm.hs @@ -57,6 +57,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig , stgToCmmAvx = isAvxEnabled dflags , stgToCmmAvx2 = isAvx2Enabled dflags , stgToCmmAvx512f = isAvx512fEnabled dflags + , stgToCmmTickyAP = gopt Opt_Ticky_AP dflags } where profile = targetProfile dflags platform = profilePlatform profile bk_end = backend dflags diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs index b8abc2f9a2..21530048f2 100644 --- a/compiler/GHC/Driver/Flags.hs +++ b/compiler/GHC/Driver/Flags.hs @@ -296,6 +296,7 @@ data GeneralFlag | Opt_Ticky_LNE | Opt_Ticky_Dyn_Thunk | Opt_Ticky_Tag + | Opt_Ticky_AP -- ^ Use regular thunks even when we could use std ap thunks in order to get entry counts | Opt_RPath | Opt_RelativeDynlibPaths | Opt_CompactUnwind -- ^ @-fcompact-unwind@ diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index bf4eaae513..d4d41accf6 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -2332,6 +2332,8 @@ dynamic_flags_deps = [ (NoArg (setGeneralFlag Opt_Ticky_Allocd)) , make_ord_flag defGhcFlag "ticky-LNE" (NoArg (setGeneralFlag Opt_Ticky_LNE)) + , make_ord_flag defGhcFlag "ticky-ap-thunk" + (NoArg (setGeneralFlag Opt_Ticky_AP)) , make_ord_flag defGhcFlag "ticky-dyn-thunk" (NoArg (setGeneralFlag Opt_Ticky_Dyn_Thunk)) , make_ord_flag defGhcFlag "ticky-tag-checks" diff --git a/compiler/GHC/Iface/Tidy.hs b/compiler/GHC/Iface/Tidy.hs index c453cc5336..268c43945e 100644 --- a/compiler/GHC/Iface/Tidy.hs +++ b/compiler/GHC/Iface/Tidy.hs @@ -370,7 +370,7 @@ tidyProgram hsc_env (ModGuts { mg_module = mod = Err.withTiming logger (text "CoreTidy"<+>brackets (ppr mod)) - (const ()) $ + (const ()) $! do { let { omit_prags = gopt Opt_OmitInterfacePragmas dflags ; expose_all = gopt Opt_ExposeAllUnfoldings dflags ; print_unqual = mkPrintUnqualified (hsc_unit_env hsc_env) rdr_env diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs index 17d8556b15..5507173dc7 100644 --- a/compiler/GHC/StgToCmm/Bind.hs +++ b/compiler/GHC/StgToCmm/Bind.hs @@ -212,7 +212,7 @@ cgRhs :: Id ) cgRhs id (StgRhsCon cc con mn _ts args) - = withNewTickyCounterCon (idName id) con $ + = withNewTickyCounterCon id con mn $ buildDynCon id mn True cc con (assertNonVoidStgArgs args) -- con args are always non-void, -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise @@ -223,14 +223,16 @@ cgRhs id (StgRhsClosure fvs cc upd_flag args body) checkFunctionArgTags (text "TagCheck Failed: Rhs of" <> ppr id) id args profile <- getProfile check_tags <- stgToCmmDoTagCheck <$> getStgToCmmConfig - mkRhsClosure profile check_tags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body - + use_std_ap_thunk <- stgToCmmTickyAP <$> getStgToCmmConfig + mkRhsClosure profile use_std_ap_thunk check_tags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body ------------------------------------------------------------------------ -- Non-constructor right hand sides ------------------------------------------------------------------------ -mkRhsClosure :: Profile -> Bool +mkRhsClosure :: Profile + -> Bool -- Omit AP Thunks to improve profiling + -> Bool -- Lint tag inference checks -> Id -> CostCentreStack -> [NonVoid Id] -- Free vars -> UpdateFlag @@ -274,7 +276,7 @@ for semi-obvious reasons. -} ---------- See Note [Selectors] ------------------ -mkRhsClosure profile _check_tags bndr _cc +mkRhsClosure profile _ _check_tags bndr _cc [NonVoid the_fv] -- Just one free var upd_flag -- Updatable thunk [] -- A thunk @@ -307,7 +309,7 @@ mkRhsClosure profile _check_tags bndr _cc in cgRhsStdThunk bndr lf_info [StgVarArg the_fv] ---------- See Note [Ap thunks] ------------------ -mkRhsClosure profile check_tags bndr _cc +mkRhsClosure profile use_std_ap check_tags bndr _cc fvs upd_flag [] -- No args; a thunk @@ -316,7 +318,8 @@ mkRhsClosure profile check_tags bndr _cc -- We are looking for an "ApThunk"; see data con ApThunk in GHC.StgToCmm.Closure -- of form (x1 x2 .... xn), where all the xi are locals (not top-level) -- So the xi will all be free variables - | args `lengthIs` (n_fvs-1) -- This happens only if the fun_id and + | use_std_ap + , args `lengthIs` (n_fvs-1) -- This happens only if the fun_id and -- args are all distinct local variables -- The "-1" is for fun_id -- Missed opportunity: (f x x) is not detected @@ -340,7 +343,7 @@ mkRhsClosure profile check_tags bndr _cc payload = StgVarArg fun_id : args ---------- Default case ------------------ -mkRhsClosure profile _check_tags bndr cc fvs upd_flag args body +mkRhsClosure profile _use_ap _check_tags bndr cc fvs upd_flag args body = do { let lf_info = mkClosureLFInfo (profilePlatform profile) bndr NotTopLevel fvs upd_flag args ; (id_info, reg) <- rhsIdInfo bndr lf_info ; return (id_info, gen_code lf_info reg) } @@ -404,13 +407,13 @@ cgRhsStdThunk bndr lf_info payload } where gen_code reg -- AHA! A STANDARD-FORM THUNK - = withNewTickyCounterStdThunk (lfUpdatable lf_info) (idName bndr) $ + = withNewTickyCounterStdThunk (lfUpdatable lf_info) (bndr) payload $ do { -- LAY OUT THE OBJECT mod_name <- getModuleName - ; cfg <- getStgToCmmConfig - ; let profile = stgToCmmProfile cfg - ; let platform = stgToCmmPlatform cfg + ; profile <- getProfile + ; platform <- getPlatform + ; let header = if isLFThunk lf_info then ThunkHeader else StdHeader (tot_wds, ptr_wds, payload_w_offsets) = mkVirtHeapOffsets profile header @@ -476,7 +479,8 @@ closureCodeBody top_lvl bndr cl_info cc [] body fv_details = withNewTickyCounterThunk (isStaticClosure cl_info) (closureUpdReqd cl_info) - (closureName cl_info) $ + (closureName cl_info) + (map fst fv_details) $ emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $ \(_, node, _) -> thunkCode cl_info fv_details cc node body where @@ -488,7 +492,7 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details arity = length args in -- See Note [OneShotInfo overview] in GHC.Types.Basic. - withNewTickyCounterFun (isOneShotBndr arg0) (closureName cl_info) + withNewTickyCounterFun (isOneShotBndr arg0) (closureName cl_info) (map fst fv_details) nv_args $ do { ; let diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs index 2609606292..7c1b5250e4 100644 --- a/compiler/GHC/StgToCmm/Closure.hs +++ b/compiler/GHC/StgToCmm/Closure.hs @@ -65,6 +65,7 @@ module GHC.StgToCmm.Closure ( cafBlackHoleInfoTable, indStaticInfoTable, staticClosureNeedsLink, + mkClosureInfoTableLabel ) where import GHC.Prelude @@ -642,7 +643,7 @@ getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method" data ClosureInfo = ClosureInfo { - closureName :: !Name, -- The thing bound to this closure + closureName :: !Id, -- The thing bound to this closure -- we don't really need this field: it's only used in generating -- code for ticky and profiling, and we could pass the information -- around separately, but it doesn't do much harm to keep it here. @@ -679,13 +680,12 @@ mkClosureInfo :: Profile -> String -- String descriptor -> ClosureInfo mkClosureInfo profile is_static id lf_info tot_wds ptr_wds val_descr - = ClosureInfo { closureName = name + = ClosureInfo { closureName = id , closureLFInfo = lf_info , closureInfoLabel = info_lbl -- These three fields are , closureSMRep = sm_rep -- (almost) an info table , closureProf = prof } -- (we don't have an SRT yet) where - name = idName id sm_rep = mkHeapRep profile is_static ptr_wds nonptr_wds (lfClosureType lf_info) prof = mkProfilingInfo profile id val_descr nonptr_wds = tot_wds - ptr_wds @@ -839,6 +839,7 @@ closureLocalEntryLabel platform | platformTablesNextToCode platform = toInfoLbl platform . closureInfoLabel | otherwise = toEntryLbl platform . closureInfoLabel +-- | Get the info table label for a *thunk*. mkClosureInfoTableLabel :: Platform -> Id -> LambdaFormInfo -> CLabel mkClosureInfoTableLabel platform id lf_info = case lf_info of diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs index 623a7b8f0a..b3014fd302 100644 --- a/compiler/GHC/StgToCmm/Config.hs +++ b/compiler/GHC/StgToCmm/Config.hs @@ -61,6 +61,7 @@ data StgToCmmConfig = StgToCmmConfig , stgToCmmAllowExtendedAddSubInstrs :: !Bool -- ^ Allowed to generate AddWordC, SubWordC, Add2, etc. , stgToCmmAllowIntMul2Instr :: !Bool -- ^ Allowed to generate IntMul2 instruction , stgToCmmAllowFabsInstrs :: !Bool -- ^ Allowed to generate Fabs instructions + , stgToCmmTickyAP :: !Bool -- ^ Disable use of precomputed standard thunks. ------------------------------ SIMD flags ------------------------------------ -- Each of these flags checks vector compatibility with the backend requested -- during compilation. In essence, this means checking for @-fllvm@ which is diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs index eef1420a72..4d1fff30df 100644 --- a/compiler/GHC/StgToCmm/Env.hs +++ b/compiler/GHC/StgToCmm/Env.hs @@ -17,7 +17,7 @@ module GHC.StgToCmm.Env ( bindArgsToRegs, bindToReg, rebindToReg, bindArgToReg, idToReg, - getCgIdInfo, + getCgIdInfo, getCgInfo_maybe, maybeLetNoEscape, ) where @@ -44,6 +44,9 @@ import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Utils.Panic.Plain +import GHC.Builtin.Names (getUnique) + + ------------------------------------- -- Manipulating CgIdInfo ------------------------------------- @@ -150,6 +153,12 @@ getCgIdInfo id cgLookupPanic id -- Bug }}} +-- | Retrieve cg info for a name if it already exists. +getCgInfo_maybe :: Name -> FCode (Maybe CgIdInfo) +getCgInfo_maybe name + = do { local_binds <- getBinds -- Try local bindings first + ; return $ lookupVarEnv_Directly local_binds (getUnique name) } + cgLookupPanic :: Id -> FCode a cgLookupPanic id = do local_binds <- getBinds diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs index 5129a45b1c..55892b8789 100644 --- a/compiler/GHC/StgToCmm/Expr.hs +++ b/compiler/GHC/StgToCmm/Expr.hs @@ -216,7 +216,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body = do platform <- getPlatform return ( lneIdInfo platform bndr args, code ) where - code = forkLneBody $ withNewTickyCounterLNE (idName bndr) args $ do + code = forkLneBody $ withNewTickyCounterLNE bndr args $ do { restoreCurrentCostCentre cc_slot ; arg_regs <- bindArgsToRegs args ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) } diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs index 4aec817412..a538ff2f4d 100644 --- a/compiler/GHC/StgToCmm/Ticky.hs +++ b/compiler/GHC/StgToCmm/Ticky.hs @@ -135,6 +135,7 @@ import GHC.Cmm.Utils import GHC.Cmm.CLabel import GHC.Runtime.Heap.Layout + import GHC.Types.Name import GHC.Types.Id import GHC.Types.Basic @@ -154,6 +155,11 @@ import GHC.Core.Predicate import Data.Maybe import qualified Data.Char import Control.Monad ( when, unless ) +import GHC.Types.Id.Info +import GHC.Utils.Trace +import GHC.StgToCmm.Env (getCgInfo_maybe) +import Data.Coerce (coerce) +import GHC.Utils.Json ----------------------------------------------------------------------------- -- @@ -161,23 +167,73 @@ import Control.Monad ( when, unless ) -- ----------------------------------------------------------------------------- +-- | Number of arguments for a ticky counter. +-- +-- Ticky currently treats args to constructor allocations differently than those for functions/LNE bindings. +tickyArgArity :: TickyClosureType -> Int +tickyArgArity (TickyFun _ _fvs args) = length args +tickyArgArity (TickyLNE args) = length args +tickyArgArity (TickyCon{}) = 0 +tickyArgArity (TickyThunk{}) = 0 + +tickyArgDesc :: TickyClosureType -> String +tickyArgDesc arg_info = + case arg_info of + TickyFun _ _fvs args -> map (showTypeCategory . idType . fromNonVoid) args + TickyLNE args -> map (showTypeCategory . idType . fromNonVoid) args + TickyThunk{} -> "" + TickyCon{} -> "" + +tickyFvDesc :: TickyClosureType -> String +tickyFvDesc arg_info = + case arg_info of + TickyFun _ fvs _args -> map (showTypeCategory . idType . fromNonVoid) fvs + TickyLNE{} -> "" + TickyThunk _ _ fvs -> map (showTypeCategory . stgArgType) fvs + TickyCon{} -> "" + +instance ToJson TickyClosureType where + json info = case info of + (TickyFun {}) -> mkInfo (tickyFvDesc info) (tickyArgDesc info) "fun" + (TickyLNE {}) -> mkInfo [] (tickyArgDesc info) "lne" + (TickyThunk uf _ _) -> mkInfo (tickyFvDesc info) [] ("thk" ++ if uf then "_u" else "") + (TickyCon{}) -> mkInfo [] [] "con" + where + mkInfo :: String -> String -> String -> JsonDoc + mkInfo fvs args ty = + JSObject + [("type", json "entCntr") + ,("subTy", json ty) + ,("fvs_c", json (length fvs)) + ,("fvs" , json fvs) + ,("args", json args) + ] + +tickyEntryDesc :: (SDocContext -> TickyClosureType -> String) +tickyEntryDesc ctxt = renderWithContext ctxt . renderJSON . json + data TickyClosureType = TickyFun Bool -- True <-> single entry + [NonVoid Id] -- ^ FVs + [NonVoid Id] -- ^ Args | TickyCon DataCon -- the allocated constructor + ConstructorNumber | TickyThunk Bool -- True <-> updateable Bool -- True <-> standard thunk (AP or selector), has no entry counter + [StgArg] -- ^ FVS, StgArg because for thunks these can also be literals. | TickyLNE + [NonVoid Id] -- ^ Args -withNewTickyCounterFun :: Bool -> Name -> [NonVoid Id] -> FCode a -> FCode a -withNewTickyCounterFun single_entry = withNewTickyCounter (TickyFun single_entry) +withNewTickyCounterFun :: Bool -> Id -> [NonVoid Id] -> [NonVoid Id] -> FCode a -> FCode a +withNewTickyCounterFun single_entry f fvs args = withNewTickyCounter (TickyFun single_entry fvs args) f -withNewTickyCounterLNE :: Name -> [NonVoid Id] -> FCode a -> FCode a +withNewTickyCounterLNE :: Id -> [NonVoid Id] -> FCode a -> FCode a withNewTickyCounterLNE nm args code = do b <- isEnabled stgToCmmTickyLNE - if not b then code else withNewTickyCounter TickyLNE nm args code + if not b then code else withNewTickyCounter (TickyLNE args) nm code thunkHasCounter :: Bool -> FCode Bool thunkHasCounter isStatic = (not isStatic &&) <$> isEnabled stgToCmmTickyDynThunk @@ -185,46 +241,50 @@ thunkHasCounter isStatic = (not isStatic &&) <$> isEnabled stgToCmmTickyDynThunk withNewTickyCounterThunk :: Bool -- ^ static -> Bool -- ^ updateable - -> Name + -> Id + -> [NonVoid Id] -- ^ Free vars -> FCode a -> FCode a -withNewTickyCounterThunk isStatic isUpdatable name code = do +withNewTickyCounterThunk isStatic isUpdatable name fvs code = do has_ctr <- thunkHasCounter isStatic if not has_ctr then code - else withNewTickyCounter (TickyThunk isUpdatable False) name [] code + else withNewTickyCounter (TickyThunk isUpdatable False (map StgVarArg $ coerce fvs)) name code withNewTickyCounterStdThunk :: Bool -- ^ updateable - -> Name + -> Id + -> [StgArg] -- ^ Free vars + function -> FCode a -> FCode a -withNewTickyCounterStdThunk isUpdatable name code = do +withNewTickyCounterStdThunk isUpdatable name fvs code = do has_ctr <- thunkHasCounter False if not has_ctr then code - else withNewTickyCounter (TickyThunk isUpdatable True) name [] code + else withNewTickyCounter (TickyThunk isUpdatable True fvs) name code withNewTickyCounterCon - :: Name + :: Id -> DataCon + -> ConstructorNumber -> FCode a -> FCode a -withNewTickyCounterCon name datacon code = do +withNewTickyCounterCon name datacon info code = do has_ctr <- thunkHasCounter False if not has_ctr then code - else withNewTickyCounter (TickyCon datacon) name [] code + else withNewTickyCounter (TickyCon datacon info) name code -- args does not include the void arguments -withNewTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode a -> FCode a -withNewTickyCounter cloType name args m = do - lbl <- emitTickyCounter cloType name args +withNewTickyCounter :: TickyClosureType -> Id -> FCode a -> FCode a +withNewTickyCounter cloType name m = do + lbl <- emitTickyCounter cloType name setTickyCtrLabel lbl m -emitTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode CLabel -emitTickyCounter cloType name args - = let ctr_lbl = mkRednCountsLabel name in +emitTickyCounter :: TickyClosureType -> Id -> FCode CLabel +emitTickyCounter cloType tickee + = let name = idName tickee in + let ctr_lbl = mkRednCountsLabel name in (>> return ctr_lbl) $ ifTicky $ do { cfg <- getStgToCmmConfig @@ -239,23 +299,52 @@ emitTickyCounter cloType name args ppr_for_ticky_name :: SDoc ppr_for_ticky_name = let ext = case cloType of - TickyFun single_entry -> parens $ hcat $ punctuate comma $ + TickyFun single_entry _ _-> parens $ hcat $ punctuate comma $ [text "fun"] ++ [text "se"|single_entry] - TickyCon datacon -> parens (text "con:" <+> ppr (dataConName datacon)) - TickyThunk upd std -> parens $ hcat $ punctuate comma $ + TickyCon datacon _cn -> parens (text "con:" <+> ppr (dataConName datacon)) + TickyThunk upd std _-> parens $ hcat $ punctuate comma $ [text "thk"] ++ [text "se"|not upd] ++ [text "std"|std] - TickyLNE | isInternalName name -> parens (text "LNE") - | otherwise -> panic "emitTickyCounter: how is this an external LNE?" + TickyLNE _ | isInternalName name -> parens (text "LNE") + | otherwise -> panic "emitTickyCounter: how is this an external LNE?" p = case hasHaskellName parent of -- NB the default "top" ticky ctr does not -- have a Haskell name Just pname -> text "in" <+> ppr (nameUnique pname) _ -> empty in pprTickyName mod_name name <+> ext <+> p + ; this_mod <- getModuleName + ; let t = case cloType of + TickyCon {} -> "C" + TickyFun {} -> "F" + TickyThunk {} -> "T" + TickyLNE {} -> "L" + ; info_lbl <- case cloType of + TickyCon dc mn -> case mn of + NoNumber -> return $! CmmLabel $ mkConInfoTableLabel (dataConName dc) DefinitionSite + (Numbered n) -> return $! CmmLabel $ mkConInfoTableLabel (dataConName dc) (UsageSite this_mod n) + TickyFun {} -> + return $! CmmLabel $ mkInfoTableLabel name NoCafRefs + + TickyThunk _ std_thunk _fvs + | not std_thunk + -> return $! CmmLabel $ mkInfoTableLabel name NoCafRefs + -- IPE Maps have no entry for std thunks. + | otherwise + -> do + lf_info <- getCgInfo_maybe name + profile <- getProfile + case lf_info of + Just (CgIdInfo { cg_lf = cg_lf }) + | isLFThunk cg_lf + -> return $! CmmLabel $ mkClosureInfoTableLabel (profilePlatform profile) tickee cg_lf + _ -> pprTraceDebug "tickyThunkUnknown" (text t <> colon <> ppr name <+> ppr (mkInfoTableLabel name NoCafRefs)) + return $! zeroCLit platform + + TickyLNE {} -> return $! zeroCLit platform ; let ctx = defaultSDocContext {sdocPprDebug = True} ; fun_descr_lit <- newStringCLit $ renderWithContext ctx ppr_for_ticky_name - ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . fromNonVoid) args + ; arg_descr_lit <- newStringCLit $ tickyEntryDesc ctx cloType ; emitDataLits ctr_lbl -- Must match layout of rts/include/rts/Ticky.h's StgEntCounter -- @@ -263,10 +352,11 @@ emitTickyCounter cloType name args -- before, but the code generator wasn't handling that -- properly and it led to chaos, panic and disorder. [ mkIntCLit platform 0, -- registered? - mkIntCLit platform (length args), -- Arity + mkIntCLit platform (tickyArgArity cloType), -- Arity mkIntCLit platform 0, -- Heap allocated for this thing fun_descr_lit, arg_descr_lit, + info_lbl, zeroCLit platform, -- Entries into this thing zeroCLit platform, -- Heap allocated by this thing zeroCLit platform -- Link to next StgEntCounter diff --git a/compiler/GHC/Types/Var/Env.hs b/compiler/GHC/Types/Var/Env.hs index 3ad0f10156..e70e89f8ea 100644 --- a/compiler/GHC/Types/Var/Env.hs +++ b/compiler/GHC/Types/Var/Env.hs @@ -17,6 +17,7 @@ module GHC.Types.Var.Env ( delVarEnvList, delVarEnv, minusVarEnv, lookupVarEnv, lookupVarEnv_NF, lookupWithDefaultVarEnv, + lookupVarEnv_Directly, mapVarEnv, zipVarEnv, modifyVarEnv, modifyVarEnv_Directly, isEmptyVarEnv, @@ -506,6 +507,7 @@ modifyVarEnv :: (a -> a) -> VarEnv a -> Var -> VarEnv a isEmptyVarEnv :: VarEnv a -> Bool lookupVarEnv :: VarEnv a -> Var -> Maybe a +lookupVarEnv_Directly :: VarEnv a -> Unique -> Maybe a filterVarEnv :: (a -> Bool) -> VarEnv a -> VarEnv a lookupVarEnv_NF :: VarEnv a -> Var -> a lookupWithDefaultVarEnv :: VarEnv a -> a -> Var -> a @@ -530,6 +532,7 @@ minusVarEnv = minusUFM plusVarEnv = plusUFM plusVarEnvList = plusUFMList lookupVarEnv = lookupUFM +lookupVarEnv_Directly = lookupUFM_Directly filterVarEnv = filterUFM lookupWithDefaultVarEnv = lookupWithDefaultUFM mapVarEnv = mapUFM diff --git a/compiler/GHC/Utils/Json.hs b/compiler/GHC/Utils/Json.hs index 21358847c0..65880fe76e 100644 --- a/compiler/GHC/Utils/Json.hs +++ b/compiler/GHC/Utils/Json.hs @@ -1,4 +1,6 @@ {-# LANGUAGE GADTs #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE FlexibleInstances #-} module GHC.Utils.Json where import GHC.Prelude @@ -29,7 +31,7 @@ renderJSON d = JSObject fs -> braces $ pprList renderField fs where renderField :: (String, JsonDoc) -> SDoc - renderField (s, j) = doubleQuotes (text s) <> colon <+> renderJSON j + renderField (s, j) = doubleQuotes (text s) <> colon <> renderJSON j pprList pp xs = hcat (punctuate comma (map pp xs)) @@ -54,3 +56,9 @@ escapeJsonString = concatMap escapeChar class ToJson a where json :: a -> JsonDoc + +instance ToJson String where + json = JSString . escapeJsonString + +instance ToJson Int where + json = JSInt diff --git a/docs/users_guide/profiling.rst b/docs/users_guide/profiling.rst index dcfd42b84e..9a905ba3de 100644 --- a/docs/users_guide/profiling.rst +++ b/docs/users_guide/profiling.rst @@ -1726,6 +1726,37 @@ Using “ticky-ticky” profiling (for implementors) *by* each closure type. See :ghc-flag:`-ticky-allocd` to keep track of allocations *of* each closure type as well. + +GHC's ticky-ticky profiler provides a low-level facility for tracking +entry and allocation counts of particular individual closures. +Ticky-ticky profiling requires a certain familiarity with GHC +internals, so it is best suited for expert users, but can provide an invaluable +precise insight into the allocation behaviour of your programs. + +Getting started with ticky profiling consists of three steps. + +1. Add the ``-ticky`` flag when compiling a Haskell module to enable "ticky-ticky" profiling of that module. This makes GHC emit performance-counting instructions in every STG function. + +2. Add ``-ticky`` to the command line when linking, so that you link against a version of the runtime system that allows you to display the results. In fact, in the link phase -ticky implies -debug, so you get the debug version of the runtime system too. + +3. Then when running your program you can collect the results of the profiling in two ways. + + * Using the eventlog, the :rts-flag:`-lT <-l ⟨flags⟩>` flag will emit ticky samples + to the eventlog periodically. + This has the advantage of being able to resolve dynamic behaviors over the program's + lifetime. See :ref:`ticky-event-format` for details on the event types + reported. The ticky information can be rendered into an interactive table + using eventlog2html. + * A legacy textual format is emitted using the :rts-flag:`-r ⟨file⟩` flag. This + produces a textual table containing information about how much each counter + ticked throughout the duration of the program. + +Additional Ticky Flags +---------------------- + +There are some additional flags which can be used to increase the number of +ticky counters and the quality of the profile. + .. ghc-flag:: -ticky-allocd :shortdesc: Track the number of times each closure type is allocated. :type: dynamic @@ -1765,6 +1796,14 @@ Using “ticky-ticky” profiling (for implementors) Note that these counters are currently not processed well be eventlog2html. So if you want to check them you will have to use the text based interface. +.. ghc-flag:: -ticky-ap-thunk + :shortdesc: Don't use standard AP thunks on order to get more reliable entry counters. + :type: dynamic + :category: + + This allows us to get accurate entry counters for code like `f x y` at the cost of code size. + We do this but not using the precomputed standard AP thunk code. + GHC's ticky-ticky profiler provides a low-level facility for tracking entry and allocation counts of particular individual closures. Because ticky-ticky profiling requires a certain familiarity with GHC @@ -1780,6 +1819,115 @@ advantage of being able to resolve dynamic behaviors over the program's lifetime. See :ref:`ticky-event-format` for details on the event types reported. +Understanding the Output of Ticky-Ticky profiles +------------------------------------------------ + +Once you have your rendered profile then you can begin to understand the allocation +behaviour of your program. There are two classes of ticky-ticky counters. + +Name-specific counters + + Each "name-specific counter" is associated with a name that is defined in the + result of the optimiser. For each such name, there are three possible counters: + entries, heap allocation by the named thing, and heap used to allocate that + named thing. + +Global counters + + Each "global counter" describes some aspect of the entire program execution. + For example, one global counter tracks total heap allocation; another tracks allocation for PAPs. + +In general you are probably interested mostly in the name-specific counters as these +can provided detailed information about where allocates how much in your program. + +Information about name-specific counters +---------------------------------------- + +Name-specific counters provide the following information about a closure. + +* Entries - How many times the closure was entered. +* Allocs - How much (in bytes) is allocated *by* that closure. +* Allod - How often the closure is allocated. +* FVs - The free variables captured by that closure. +* Args - The arguments that closure takes. + +The FVs and Args information is encoded using a small DSL. + ++------------------+---------------------------------------------------+ +|Classification |Description | ++==================+===================================================+ +|+ |dictionary | ++------------------+---------------------------------------------------+ +|\> |function | ++------------------+---------------------------------------------------+ +|{C,I,F,D,W} | char, int, float, double, word | ++------------------+---------------------------------------------------+ +|{c,i,f,d,w} | unboxed ditto | ++------------------+---------------------------------------------------+ +|T | tuple | ++------------------+---------------------------------------------------+ +|P | other primitive type | ++------------------+---------------------------------------------------+ +|p | unboxed primitive type | ++------------------+---------------------------------------------------+ +|L | list | ++------------------+---------------------------------------------------+ +|E | enumeration type | ++------------------+---------------------------------------------------+ +|S | Single constructor type | ++------------------+---------------------------------------------------+ +|M | Multi constructor type | ++------------------+---------------------------------------------------+ +|. |other type | ++------------------+---------------------------------------------------+ +|- |reserved for others to mark as "uninteresting" | ++------------------+---------------------------------------------------+ + +In particular note that you can use the ticky profiler to see any function +calls to dictionary arguments by searching the profile for the ``+`` classifier. +This indicates that the function has failed to specialise for one reason or another. + +Examples +-------- + +A typical use of ticky-ticky would be to generate a ticky report using the eventlog by evoking an +application with RTS arguments like this: + +``app <args> +RTS -l-augT`` + +This will produce an eventlog file which contains results from ticky counters. This file can +be manually inspected like any regular eventlog. However for ticky-ticky eventlog2html has +good support for producing tables from these logs. + +With an up to date version of eventlog2html this can be simply done by invoking eventlog2html +on the produced eventlog. In the example above the invocation would then be ``eventlog2html app.eventlog`` +Which will produce a searchable and sortable table containing all the ticky counters in the log. + +Notes about ticky profiling +--------------------------- + +* You can mix together modules compiled with and without ``-ticky`` but you will + miss out on allocations and counts from uninstrumented modules in the profile. + +* Linking with the ``-ticky`` has a quite severe performance impact on your program. + ``-ticky`` implies using the unoptimised ``-debug`` RTS. Therefore ``-ticky`` + shouldn't be used for production builds. + +* Building with ``-ticky`` doesn't affect core optimisations of your program as the + counters are inserted after the STG pipeline. At which point most optimizations have + already been run. + +* When using the eventlog it is possible to combine together ticky-ticky and IPE + based profiling as each ticky counter definition has an associated info table. + This address can be looked up in the IPE map so that further information (such + as source location) can be determined about that closure. + +* Global ticky counters are only available in the textual ticky output (``+RTS -r``). + But this mode has some limitations (e.g. on column widths) and will contain raw json output + in some columns. For this reason using an eventlog-based approach should be prefered if + possible. + + .. [1] :ghc-flag:`-fprof-auto` was known as ``-auto-all`` prior to GHC 7.4.1. diff --git a/rts/Ticky.c b/rts/Ticky.c index 2ce4aab658..f82cd27104 100644 --- a/rts/Ticky.c +++ b/rts/Ticky.c @@ -17,7 +17,7 @@ */ StgEntCounter top_ct = { 0, 0, 0, - "TOP", "", + "TOP", "", NULL, 0, 0, NULL }; /* Data structure used in ``registering'' one of these counters. */ @@ -377,13 +377,13 @@ printRegisteredCounterInfo (FILE *tf) fprintf(tf,"\nThe following table is explained by https://gitlab.haskell.org/ghc/ghc/wikis/debugging/ticky-ticky\nAll allocation numbers are in bytes.\n"); fprintf(tf,"\n**************************************************\n\n"); } - fprintf(tf, "%11s%11s%11s %-23s %s\n", + fprintf(tf, "%11s%12s%12s %-63s %s\n", "Entries", "Alloc", "Alloc'd", "Non-void Arguments", "STG Name"); fprintf(tf, "--------------------------------------------------------------------------------\n"); /* Function name at the end so it doesn't mess up the tabulation */ for (p = ticky_entry_ctrs; p != NULL; p = p->link) { - fprintf(tf, "%11" FMT_Int "%11" FMT_Int "%11" FMT_Int " %3lu %-20.20s %s", + fprintf(tf, "%11" FMT_Int "%12" FMT_Int "%12" FMT_Int " %3lu %-60.60s %s", p->entry_count, p->allocs, p->allocd, diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c index ca37b441c6..d67f9e7bb1 100644 --- a/rts/eventlog/EventLog.c +++ b/rts/eventlog/EventLog.c @@ -1355,7 +1355,7 @@ void postProfBegin(void) #if defined(TICKY_TICKY) static void postTickyCounterDef(EventsBuf *eb, StgEntCounter *p) { - StgWord len = 8 + 2 + strlen(p->arg_kinds)+1 + strlen(p->str)+1; + StgWord len = 8 + 2 + strlen(p->arg_kinds)+1 + strlen(p->str)+1 + 8; ensureRoomForVariableEvent(eb, len); postEventHeader(eb, EVENT_TICKY_COUNTER_DEF); postPayloadSize(eb, len); @@ -1364,6 +1364,7 @@ static void postTickyCounterDef(EventsBuf *eb, StgEntCounter *p) postWord16(eb, (uint16_t) p->arity); postString(eb, p->arg_kinds); postString(eb, p->str); + postWord64(eb, (W_) (INFO_PTR_TO_STRUCT(p->info))); } void postTickyCounterDefs(StgEntCounter *counters) diff --git a/rts/include/rts/Ticky.h b/rts/include/rts/Ticky.h index 93043d8514..7658e3c08a 100644 --- a/rts/include/rts/Ticky.h +++ b/rts/include/rts/Ticky.h @@ -26,6 +26,7 @@ typedef struct _StgEntCounter { /* (rest of args are in registers) */ char *str; /* name of the thing */ char *arg_kinds; /* info about the args types */ + StgInfoTable *info; /* Info table corresponding to this closure */ StgInt entry_count; /* Trips to fast entry code */ StgInt allocs; /* number of allocations by this fun */ struct _StgEntCounter *link;/* link to chain them all together */ diff --git a/testsuite/tests/driver/T16167.stdout b/testsuite/tests/driver/T16167.stdout index 5416b95894..3da369eb8b 100644 --- a/testsuite/tests/driver/T16167.stdout +++ b/testsuite/tests/driver/T16167.stdout @@ -1 +1 @@ -{"span": {"file": "T16167.hs","startLine": 1,"startCol": 8,"endLine": 1,"endCol": 9},"doc": "parse error on input \u2018f\u2019","messageClass": "MCDiagnostic SevError ErrorWithoutFlag"} +{"span":{"file":"T16167.hs","startLine":1,"startCol":8,"endLine":1,"endCol":9},"doc":"parse error on input \u2018f\u2019","messageClass":"MCDiagnostic SevError ErrorWithoutFlag"} diff --git a/testsuite/tests/driver/json.stderr b/testsuite/tests/driver/json.stderr index cb34abd7ac..ec28dbb206 100644 --- a/testsuite/tests/driver/json.stderr +++ b/testsuite/tests/driver/json.stderr @@ -1 +1 @@ -{"span": {"file": "json.hs","startLine": 6,"startCol": 7,"endLine": 6,"endCol": 8},"doc": "\u2022 No instance for (Num (a -> a)) arising from the literal \u20185\u2019\n (maybe you haven't applied a function to enough arguments?)\n\u2022 In the expression: 5\n In an equation for \u2018id1\u2019: id1 = 5","messageClass": "MCDiagnostic SevError ErrorWithoutFlag"} +{"span":{"file":"json.hs","startLine":6,"startCol":7,"endLine":6,"endCol":8},"doc":"\u2022 No instance for (Num (a -> a)) arising from the literal \u20185\u2019\n (maybe you haven't applied a function to enough arguments?)\n\u2022 In the expression: 5\n In an equation for \u2018id1\u2019: id1 = 5","messageClass":"MCDiagnostic SevError ErrorWithoutFlag"} diff --git a/testsuite/tests/driver/json2.stderr b/testsuite/tests/driver/json2.stderr index 71d7f5edfa..6c69d32881 100644 --- a/testsuite/tests/driver/json2.stderr +++ b/testsuite/tests/driver/json2.stderr @@ -1 +1 @@ -{"span": null,"doc": "TYPE SIGNATURES\n foo :: forall a. a -> a\nDependent modules: []\nDependent packages: [base-4.16.0.0]","messageClass": "MCOutput"} +{"span":null,"doc":"TYPE SIGNATURES\n foo :: forall a. a -> a\nDependent modules: []\nDependent packages: [base-4.16.0.0]","messageClass":"MCOutput"} |