summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2021-12-23 15:46:18 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-02-10 04:35:35 -0500
commit48f257151f30f6b4d2f8b1a6ea4185ccd00ebd95 (patch)
treec440e41d67f5c4d2f62deaed0b9dbb6991e06338
parentac2d18a7353cd3ac1ba4b5993f2776fe0c5eedc9 (diff)
downloadhaskell-48f257151f30f6b4d2f8b1a6ea4185ccd00ebd95.tar.gz
Add late cost centre support
This allows cost centres to be inserted after the core optimization pipeline has run.
-rw-r--r--compiler/GHC/Core/LateCC.hs82
-rw-r--r--compiler/GHC/Core/Lint.hs1
-rw-r--r--compiler/GHC/Core/Opt/Monad.hs2
-rw-r--r--compiler/GHC/Core/Opt/Pipeline.hs10
-rw-r--r--compiler/GHC/Core/Type.hs2
-rw-r--r--compiler/GHC/Core/Utils.hs2
-rw-r--r--compiler/GHC/Driver/Flags.hs1
-rw-r--r--compiler/GHC/Driver/Session.hs1
-rw-r--r--compiler/GHC/Hs/Dump.hs7
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs5
-rw-r--r--compiler/GHC/Types/Tickish.hs9
-rw-r--r--compiler/ghc.cabal.in1
-rw-r--r--docs/users_guide/profiling.rst13
-rw-r--r--testsuite/tests/profiling/should_compile/all.T2
-rw-r--r--testsuite/tests/profiling/should_compile/prof-late-cc.hs12
-rw-r--r--testsuite/tests/profiling/should_compile/prof-late-cc2.hs12
-rw-r--r--testsuite/tests/profiling/should_compile/prof-late-cc2.stdout1
17 files changed, 157 insertions, 6 deletions
diff --git a/compiler/GHC/Core/LateCC.hs b/compiler/GHC/Core/LateCC.hs
new file mode 100644
index 0000000000..d7a3b0cd8d
--- /dev/null
+++ b/compiler/GHC/Core/LateCC.hs
@@ -0,0 +1,82 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE TupleSections #-}
+
+-- | Adds cost-centers after the core piple has run.
+module GHC.Core.LateCC
+ ( addLateCostCentres
+ ) where
+
+import Control.Applicative
+import GHC.Utils.Monad.State.Strict
+import Control.Monad
+
+import GHC.Prelude
+import GHC.Driver.Session
+import GHC.Types.CostCentre
+import GHC.Types.CostCentre.State
+import GHC.Types.Name hiding (varName)
+import GHC.Types.Tickish
+import GHC.Unit.Module.ModGuts
+import GHC.Types.Var
+import GHC.Unit.Types
+import GHC.Data.FastString
+import GHC.Core
+import GHC.Core.Opt.Monad
+import GHC.Types.Id
+import GHC.Core.Utils (mkTick)
+
+addLateCostCentres :: ModGuts -> CoreM ModGuts
+addLateCostCentres guts = do
+ dflags <- getDynFlags
+ let env :: Env
+ env = Env
+ { thisModule = mg_module guts
+ , ccState = newCostCentreState
+ , dflags = dflags
+ }
+ let guts' = guts { mg_binds = doCoreProgram env (mg_binds guts)
+ }
+ return guts'
+
+doCoreProgram :: Env -> CoreProgram -> CoreProgram
+doCoreProgram env binds = flip evalState newCostCentreState $ do
+ mapM (doBind env) binds
+
+doBind :: Env -> CoreBind -> M CoreBind
+doBind env (NonRec b rhs) = NonRec b <$> doBndr env b rhs
+doBind env (Rec bs) = Rec <$> mapM doPair bs
+ where
+ doPair :: ((Id, CoreExpr) -> M (Id, CoreExpr))
+ doPair (b,rhs) = (b,) <$> doBndr env b rhs
+
+doBndr :: Env -> Id -> CoreExpr -> M CoreExpr
+doBndr env bndr rhs = do
+ let name = idName bndr
+ name_loc = nameSrcSpan name
+ cc_name = getOccFS name
+ count = gopt Opt_ProfCountEntries (dflags env)
+ cc_flavour <- getCCExprFlavour cc_name
+ let cc_mod = thisModule env
+ bndrCC = NormalCC cc_flavour cc_name cc_mod name_loc
+ note = ProfNote bndrCC count True
+ return $ mkTick note rhs
+
+type M = State CostCentreState
+
+getCCExprFlavour :: FastString -> M CCFlavour
+getCCExprFlavour name = ExprCC <$> getCCIndex' name
+
+getCCIndex' :: FastString -> M CostCentreIndex
+getCCIndex' name = state (getCCIndex name)
+
+data Env = Env
+ { thisModule :: Module
+ , dflags :: DynFlags
+ , ccState :: CostCentreState
+ }
+
diff --git a/compiler/GHC/Core/Lint.hs b/compiler/GHC/Core/Lint.hs
index b93df23356..1200a4102a 100644
--- a/compiler/GHC/Core/Lint.hs
+++ b/compiler/GHC/Core/Lint.hs
@@ -352,6 +352,7 @@ coreDumpFlag CorePrep = Just Opt_D_dump_prep
coreDumpFlag CoreOccurAnal = Just Opt_D_dump_occur_anal
coreDumpFlag CoreAddCallerCcs = Nothing
+coreDumpFlag CoreAddLateCcs = Nothing
coreDumpFlag CoreDoPrintCore = Nothing
coreDumpFlag (CoreDoRuleCheck {}) = Nothing
coreDumpFlag CoreDoNothing = Nothing
diff --git a/compiler/GHC/Core/Opt/Monad.hs b/compiler/GHC/Core/Opt/Monad.hs
index c7454cba91..ef9f851e61 100644
--- a/compiler/GHC/Core/Opt/Monad.hs
+++ b/compiler/GHC/Core/Opt/Monad.hs
@@ -130,6 +130,7 @@ data CoreToDo -- These are diff core-to-core passes,
| CoreTidy
| CorePrep
| CoreAddCallerCcs
+ | CoreAddLateCcs
| CoreOccurAnal
instance Outputable CoreToDo where
@@ -151,6 +152,7 @@ instance Outputable CoreToDo where
ppr CoreDesugarOpt = text "Desugar (after optimization)"
ppr CoreTidy = text "Tidy Core"
ppr CoreAddCallerCcs = text "Add caller cost-centres"
+ ppr CoreAddLateCcs = text "Add late core cost-centres"
ppr CorePrep = text "CorePrep"
ppr CoreOccurAnal = text "Occurrence analysis"
ppr CoreDoPrintCore = text "Print core"
diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs
index 1320d9d3ec..741552f815 100644
--- a/compiler/GHC/Core/Opt/Pipeline.hs
+++ b/compiler/GHC/Core/Opt/Pipeline.hs
@@ -43,6 +43,7 @@ import GHC.Core.Opt.CallArity ( callArityAnalProgram )
import GHC.Core.Opt.Exitify ( exitifyProgram )
import GHC.Core.Opt.WorkWrap ( wwTopBinds )
import GHC.Core.Opt.CallerCC ( addCallerCostCentres )
+import GHC.Core.LateCC (addLateCostCentres)
import GHC.Core.Seq (seqBinds)
import GHC.Core.FamInstEnv
@@ -225,6 +226,9 @@ getCoreToDo logger dflags
add_caller_ccs =
runWhen (profiling && not (null $ callerCcFilters dflags)) CoreAddCallerCcs
+ add_late_ccs =
+ runWhen (profiling && gopt Opt_ProfLateCcs dflags) $ CoreAddLateCcs
+
core_todo =
[
-- We want to do the static argument transform before full laziness as it
@@ -369,7 +373,8 @@ getCoreToDo logger dflags
maybe_rule_check FinalPhase,
- add_caller_ccs
+ add_caller_ccs,
+ add_late_ccs
]
-- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
@@ -523,6 +528,9 @@ doCorePass pass guts = do
CoreAddCallerCcs -> {-# SCC "AddCallerCcs" #-}
addCallerCostCentres guts
+ CoreAddLateCcs -> {-# SCC "AddLateCcs" #-}
+ addLateCostCentres guts
+
CoreDoPrintCore -> {-# SCC "PrintCore" #-}
liftIO $ printCore logger (mg_binds guts) >> return guts
diff --git a/compiler/GHC/Core/Type.hs b/compiler/GHC/Core/Type.hs
index e3ad6e989c..bd3b0122d5 100644
--- a/compiler/GHC/Core/Type.hs
+++ b/compiler/GHC/Core/Type.hs
@@ -1498,7 +1498,7 @@ tyConAppTyCon_maybe ty = case coreFullView ty of
FunTy {} -> Just funTyCon
_ -> Nothing
-tyConAppTyCon :: Type -> TyCon
+tyConAppTyCon :: HasDebugCallStack => Type -> TyCon
tyConAppTyCon ty = tyConAppTyCon_maybe ty `orElse` pprPanic "tyConAppTyCon" (ppr ty)
-- | The same as @snd . splitTyConApp@
diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs
index 24ab87ac06..411628c261 100644
--- a/compiler/GHC/Core/Utils.hs
+++ b/compiler/GHC/Core/Utils.hs
@@ -28,7 +28,7 @@ module GHC.Core.Utils (
exprIsCheap, exprIsExpandable, exprIsCheapX, CheapAppFun,
exprIsHNF, exprOkForSpeculation, exprOkForSideEffects, exprIsWorkFree,
exprIsConLike,
- isCheapApp, isExpandableApp,
+ isCheapApp, isExpandableApp, isSaturatedConApp,
exprIsTickedString, exprIsTickedString_maybe,
exprIsTopLevelBindable,
altsAreExhaustive,
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index b8a82656e8..e95b181743 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -245,6 +245,7 @@ data GeneralFlag
-- profiling opts
| Opt_AutoSccsOnIndividualCafs
| Opt_ProfCountEntries
+ | Opt_ProfLateCcs
-- misc opts
| Opt_Pp
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index 9640c13a66..1f5626cec8 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -3409,6 +3409,7 @@ fFlagsDeps = [
flagSpec "print-typechecker-elaboration" Opt_PrintTypecheckerElaboration,
flagSpec "prof-cafs" Opt_AutoSccsOnIndividualCafs,
flagSpec "prof-count-entries" Opt_ProfCountEntries,
+ flagSpec "prof-late-ccs" Opt_ProfLateCcs,
flagSpec "regs-graph" Opt_RegsGraph,
flagSpec "regs-iterative" Opt_RegsIterative,
depFlagSpec' "rewrite-rules" Opt_EnableRewriteRules
diff --git a/compiler/GHC/Hs/Dump.hs b/compiler/GHC/Hs/Dump.hs
index 5ba1df580b..de9848e139 100644
--- a/compiler/GHC/Hs/Dump.hs
+++ b/compiler/GHC/Hs/Dump.hs
@@ -12,6 +12,7 @@
module GHC.Hs.Dump (
-- * Dumping ASTs
showAstData,
+ showAstDataFull,
BlankSrcSpan(..),
BlankEpAnnotations(..),
) where
@@ -35,12 +36,18 @@ import GHC.Utils.Outputable
import Data.Data hiding (Fixity)
import qualified Data.ByteString as B
+-- | Should source spans be removed from output.
data BlankSrcSpan = BlankSrcSpan | BlankSrcSpanFile | NoBlankSrcSpan
deriving (Eq,Show)
+-- | Should EpAnnotations be removed from output.
data BlankEpAnnotations = BlankEpAnnotations | NoBlankEpAnnotations
deriving (Eq,Show)
+-- | Show the full AST as the compiler sees it.
+showAstDataFull :: Data a => a -> SDoc
+showAstDataFull = showAstData NoBlankSrcSpan NoBlankEpAnnotations
+
-- | Show a GHC syntax tree. This parameterised because it is also used for
-- comparing ASTs in ppr roundtripping tests, where the SrcSpan's are blanked
-- out, to avoid comparing locations, only structure
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index adb870801c..4e1e24e93d 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -35,7 +35,7 @@ import GHC.Cmm.Graph
import GHC.Stg.Syntax
import GHC.Cmm
import GHC.Unit ( rtsUnit )
-import GHC.Core.Type ( Type, tyConAppTyCon )
+import GHC.Core.Type ( Type, tyConAppTyCon_maybe )
import GHC.Core.TyCon
import GHC.Cmm.CLabel
import GHC.Cmm.Info ( closureInfoPtr )
@@ -49,6 +49,7 @@ import GHC.Utils.Panic.Plain
import Data.Maybe
import Control.Monad (liftM, when, unless)
+import GHC.Utils.Outputable
------------------------------------------------------------------------
-- Primitive operations and foreign calls
@@ -1542,7 +1543,7 @@ emitPrimOp cfg primop =
-- you used tagToEnum# in a non-monomorphic setting, e.g.,
-- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
-- That won't work.
- let tycon = tyConAppTyCon res_ty
+ let tycon = fromMaybe (pprPanic "tagToEnum#: Applied to non-concrete type" (ppr res_ty)) (tyConAppTyCon_maybe res_ty)
massert (isEnumerationTyCon tycon)
platform <- getPlatform
pure [tagToClosure platform tycon amode]
diff --git a/compiler/GHC/Types/Tickish.hs b/compiler/GHC/Types/Tickish.hs
index 480bb2befd..c1f745870d 100644
--- a/compiler/GHC/Types/Tickish.hs
+++ b/compiler/GHC/Types/Tickish.hs
@@ -39,6 +39,7 @@ import GHC.Utils.Panic
import Language.Haskell.Syntax.Extension ( NoExtField )
import Data.Data
+import GHC.Utils.Outputable (Outputable (ppr), text)
{- *********************************************************************
* *
@@ -334,6 +335,9 @@ data TickishPlacement =
-- restrictive placement rule for ticks, as all tickishs have in
-- common that they want to track runtime processes. The only
-- legal placement rule for counting ticks.
+ -- NB: We generally try to move these as close to the relevant
+ -- runtime expression as possible. This means they get pushed through
+ -- tyoe arguments. E.g. we create `(tick f) @Bool` instead of `tick (f @Bool)`.
PlaceRuntime
-- | As @PlaceRuntime@, but we float the tick through all
@@ -354,7 +358,10 @@ data TickishPlacement =
-- above example is safe.
| PlaceCostCentre
- deriving (Eq)
+ deriving (Eq,Show)
+
+instance Outputable TickishPlacement where
+ ppr = text . show
-- | Placement behaviour we want for the ticks
tickishPlace :: GenTickish pass -> TickishPlacement
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 8dc7de62f7..efc4639b96 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -280,6 +280,7 @@ Library
GHC.Core.FVs
GHC.Core.InstEnv
GHC.Core.Lint
+ GHC.Core.LateCC
GHC.Core.Make
GHC.Core.Map.Expr
GHC.Core.Map.Type
diff --git a/docs/users_guide/profiling.rst b/docs/users_guide/profiling.rst
index a1b1de819f..87509d6331 100644
--- a/docs/users_guide/profiling.rst
+++ b/docs/users_guide/profiling.rst
@@ -438,6 +438,19 @@ compiled program.
or the :rts-flag:`-xc` RTS flag (:ref:`rts-options-debugging`) for more
details.
+.. ghc-flag:: -fprof-late-ccs
+ :shortdesc: Auto-add ``SCC``\\ s to all top level bindings *after* the optimizer has run.
+ :type: dynamic
+ :reverse: -fno-prof-auto
+ :category:
+
+ Adds an automatic ``SCC`` annotation to all top level bindings late in the core pipeline after
+ the optimizer has run. This means these cost centres will not interfere with core-level optimizations
+ and the resulting profile will be closer to the performance profile of an optimized non-profiled
+ executable.
+ While the results of this are generally very good some of the compiler internal names
+ will leak into the profile.
+
.. ghc-flag:: -fprof-cafs
:shortdesc: Auto-add ``SCC``\\ s to all CAFs
:type: dynamic
diff --git a/testsuite/tests/profiling/should_compile/all.T b/testsuite/tests/profiling/should_compile/all.T
index de565de40b..630b692f10 100644
--- a/testsuite/tests/profiling/should_compile/all.T
+++ b/testsuite/tests/profiling/should_compile/all.T
@@ -2,6 +2,8 @@
# added for the profiling ways makes it pass
test('prof001', [only_ways(['normal']), req_profiling], compile_and_run, ['-prof -fprof-cafs'])
test('prof002', [only_ways(['normal']), req_profiling], compile_and_run, ['-prof -fprof-cafs'])
+test('prof-late-cc', [only_ways(['normal']), req_profiling], compile, ['-prof -fprof-late-ccs'])
+test('prof-late-cc2', [only_ways(['normal']), req_profiling], compile_and_run, ['-prof -fprof-late-ccs'])
test('T2410', [only_ways(['normal']), req_profiling], compile, ['-O2 -prof -fprof-cafs'])
test('T5889', [only_ways(['normal']), req_profiling, extra_files(['T5889/A.hs', 'T5889/B.hs'])], multimod_compile, ['A B', '-O -prof -fno-prof-count-entries -v0'])
diff --git a/testsuite/tests/profiling/should_compile/prof-late-cc.hs b/testsuite/tests/profiling/should_compile/prof-late-cc.hs
new file mode 100644
index 0000000000..9ae777e023
--- /dev/null
+++ b/testsuite/tests/profiling/should_compile/prof-late-cc.hs
@@ -0,0 +1,12 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+module Test where
+
+import GHC.Prim
+import GHC.Types
+
+-- This caused problems during implementation.
+-- See #20938
+foo x = tagToEnum# x :: Bool
+
+bar x = ({-# SCC scc_bar #-} tagToEnum#) x :: Bool
diff --git a/testsuite/tests/profiling/should_compile/prof-late-cc2.hs b/testsuite/tests/profiling/should_compile/prof-late-cc2.hs
new file mode 100644
index 0000000000..cfe09687d8
--- /dev/null
+++ b/testsuite/tests/profiling/should_compile/prof-late-cc2.hs
@@ -0,0 +1,12 @@
+module Main(main) where
+
+{-# NOINLINE doStuff #-}
+doStuff x = do
+ print x
+ return x
+
+{-# NOINLINE main #-}
+main :: IO Int
+main = doStuff 42
+
+
diff --git a/testsuite/tests/profiling/should_compile/prof-late-cc2.stdout b/testsuite/tests/profiling/should_compile/prof-late-cc2.stdout
new file mode 100644
index 0000000000..d81cc0710e
--- /dev/null
+++ b/testsuite/tests/profiling/should_compile/prof-late-cc2.stdout
@@ -0,0 +1 @@
+42