summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-11-28 16:48:43 +0000
committerSimon Marlow <marlowsd@gmail.com>2011-11-29 12:21:18 +0000
commit50de6034343abc93a7b01daccff34121042c0e7c (patch)
tree24496a5fc6bc39c6baaa574608e53c5d76c169f6 /compiler
parent1c2b838131134d44004dfdff18c302131478390d (diff)
downloadhaskell-50de6034343abc93a7b01daccff34121042c0e7c.tar.gz
Make profiling work with multiple capabilities (+RTS -N)
This means that both time and heap profiling work for parallel programs. Main internal changes: - CCCS is no longer a global variable; it is now another pseudo-register in the StgRegTable struct. Thus every Capability has its own CCCS. - There is a new built-in CCS called "IDLE", which records ticks for Capabilities in the idle state. If you profile a single-threaded program with +RTS -N2, you'll see about 50% of time in "IDLE". - There is appropriate locking in rts/Profiling.c to protect the shared cost-centre-stack data structures. This patch does enough to get it working, I have cut one big corner: the cost-centre-stack data structure is still shared amongst all Capabilities, which means that multiple Capabilities will race when updating the "allocations" and "entries" fields of a CCS. Not only does this give unpredictable results, but it runs very slowly due to cache line bouncing. It is strongly recommended that you use -fno-prof-count-entries to disable the "entries" count when profiling parallel programs. (I shall add a note to this effect to the docs).
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CmmExpr.hs6
-rw-r--r--compiler/cmm/CmmLex.x5
-rw-r--r--compiler/cmm/CmmParse.y5
-rw-r--r--compiler/cmm/PprCmmExpr.hs1
-rw-r--r--compiler/codeGen/CgCase.lhs2
-rw-r--r--compiler/codeGen/CgClosure.lhs7
-rw-r--r--compiler/codeGen/CgForeignCall.hs4
-rw-r--r--compiler/codeGen/CgProf.hs22
-rw-r--r--compiler/codeGen/CgUtils.hs6
-rw-r--r--compiler/codeGen/StgCmmForeign.hs4
-rw-r--r--compiler/codeGen/StgCmmProf.hs15
-rw-r--r--compiler/codeGen/StgCmmUtils.hs8
12 files changed, 50 insertions, 35 deletions
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index ef97a82aa9..885639b874 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -343,7 +343,8 @@ data GlobalReg
| SpLim -- Stack limit
| Hp -- Heap ptr; points to last occupied heap location.
| HpLim -- Heap limit register
- | CurrentTSO -- pointer to current thread's TSO
+ | CCCS -- Current cost-centre stack
+ | CurrentTSO -- pointer to current thread's TSO
| CurrentNursery -- pointer to allocation area
| HpAlloc -- allocation count for heap check failure
@@ -395,6 +396,7 @@ instance Ord GlobalReg where
compare SpLim SpLim = EQ
compare Hp Hp = EQ
compare HpLim HpLim = EQ
+ compare CCCS CCCS = EQ
compare CurrentTSO CurrentTSO = EQ
compare CurrentNursery CurrentNursery = EQ
compare HpAlloc HpAlloc = EQ
@@ -419,6 +421,8 @@ instance Ord GlobalReg where
compare _ Hp = GT
compare HpLim _ = LT
compare _ HpLim = GT
+ compare CCCS _ = LT
+ compare _ CCCS = GT
compare CurrentTSO _ = LT
compare _ CurrentTSO = GT
compare CurrentNursery _ = LT
diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x
index 1e2b20d4b3..ddd681d25e 100644
--- a/compiler/cmm/CmmLex.x
+++ b/compiler/cmm/CmmLex.x
@@ -106,8 +106,9 @@ $white_no_nl+ ;
SpLim { global_reg SpLim }
Hp { global_reg Hp }
HpLim { global_reg HpLim }
- CurrentTSO { global_reg CurrentTSO }
- CurrentNursery { global_reg CurrentNursery }
+ CCCS { global_reg CCCS }
+ CurrentTSO { global_reg CurrentTSO }
+ CurrentNursery { global_reg CurrentNursery }
HpAlloc { global_reg HpAlloc }
BaseReg { global_reg BaseReg }
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index 0a50f60b2c..4e315ddbdf 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -775,8 +775,9 @@ isPtrGlobalReg Sp = True
isPtrGlobalReg SpLim = True
isPtrGlobalReg Hp = True
isPtrGlobalReg HpLim = True
-isPtrGlobalReg CurrentTSO = True
-isPtrGlobalReg CurrentNursery = True
+isPtrGlobalReg CCCS = True
+isPtrGlobalReg CurrentTSO = True
+isPtrGlobalReg CurrentNursery = True
isPtrGlobalReg (VanillaReg _ VGcPtr) = True
isPtrGlobalReg _ = False
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 76fbdcec8d..81ce84c264 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -271,6 +271,7 @@ pprGlobalReg gr
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
HpLim -> ptext (sLit "HpLim")
+ CCCS -> ptext (sLit "CCCS")
CurrentTSO -> ptext (sLit "CurrentTSO")
CurrentNursery -> ptext (sLit "CurrentNursery")
HpAlloc -> ptext (sLit "HpAlloc")
diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs
index e4fe386043..a36621bdaf 100644
--- a/compiler/codeGen/CgCase.lhs
+++ b/compiler/codeGen/CgCase.lhs
@@ -670,6 +670,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC
restoreCurrentCostCentre (Just slot) freeit
= do { sp_rel <- getSpRelOffset slot
; whenC freeit (freeStackSlots [slot])
- ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) }
+ ; stmtC (storeCurCCS (CmmLoad sp_rel bWord)) }
\end{code}
diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs
index 243d59f5db..7bad8516d9 100644
--- a/compiler/codeGen/CgClosure.lhs
+++ b/compiler/codeGen/CgClosure.lhs
@@ -316,9 +316,10 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do
-- Do the business
; funWrapper cl_info reg_args reg_save_code $ do
{ tickyEnterFun cl_info
- ; enterCostCentreFun cc $
- CmmMachOp mo_wordSub [ CmmReg nodeReg
- , CmmLit (mkIntCLit (funTag cl_info)) ]
+ ; enterCostCentreFun cc
+ (CmmMachOp mo_wordSub [ CmmReg nodeReg
+ , CmmLit (mkIntCLit (funTag cl_info)) ])
+ (node : map snd reg_args) -- live regs
; cgExpr body }
}
diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs
index 7d67132fcf..8d8b97d76a 100644
--- a/compiler/codeGen/CgForeignCall.hs
+++ b/compiler/codeGen/CgForeignCall.hs
@@ -240,8 +240,8 @@ emitLoadThreadState = do
emitOpenNursery
-- and load the current cost centre stack from the TSO when profiling:
when opt_SccProfilingOn $
- stmtC (CmmStore curCCSAddr
- (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord))
+ stmtC $ storeCurCCS $
+ CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord
emitOpenNursery :: Code
emitOpenNursery = stmtsC [
diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs
index 13667c399a..3e247ff4d6 100644
--- a/compiler/codeGen/CgProf.hs
+++ b/compiler/codeGen/CgProf.hs
@@ -21,7 +21,7 @@ module CgProf (
enterCostCentreThunk,
enterCostCentreFun,
costCentreFrom,
- curCCS, curCCSAddr,
+ curCCS, storeCurCCS,
emitCostCentreDecl, emitCostCentreStackDecl,
emitSetCCC,
@@ -66,11 +66,10 @@ import Control.Monad
-- Expression representing the current cost centre stack
curCCS :: CmmExpr
-curCCS = CmmLoad curCCSAddr bWord
+curCCS = CmmReg (CmmGlobal CCCS)
--- Address of current CCS variable, for storing into
-curCCSAddr :: CmmExpr
-curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
+storeCurCCS :: CmmExpr -> CmmStmt
+storeCurCCS e = CmmAssign (CmmGlobal CCCS) e
mkCCostCentre :: CostCentre -> CmmLit
mkCCostCentre cc = CmmLabel (mkCCLabel cc)
@@ -135,14 +134,15 @@ profAlloc words ccs
enterCostCentreThunk :: CmmExpr -> Code
enterCostCentreThunk closure =
ifProfiling $ do
- stmtC $ CmmStore curCCSAddr (costCentreFrom closure)
+ stmtC $ storeCurCCS (costCentreFrom closure)
-enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code
-enterCostCentreFun ccs closure =
+enterCostCentreFun :: CostCentreStack -> CmmExpr -> [GlobalReg] -> Code
+enterCostCentreFun ccs closure vols =
ifProfiling $ do
if isCurrentCCS ccs
- then emitRtsCall rtsPackageId (fsLit "enterFunCCS")
- [CmmHinted (costCentreFrom closure) AddrHint]
+ then emitRtsCallWithVols rtsPackageId (fsLit "enterFunCCS")
+ [CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint,
+ CmmHinted (costCentreFrom closure) AddrHint] vols
else return () -- top-level function, nothing to do
ifProfiling :: Code -> Code
@@ -226,7 +226,7 @@ emitSetCCC cc tick push
tmp <- newTemp bWord -- TODO FIXME NOW
pushCostCentre tmp curCCS cc
when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp)))
- when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp)))
+ when push $ stmtC (storeCurCCS (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code
pushCostCentre result ccs cc
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index 85957e81b9..5274a176a0 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -286,7 +286,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load)
caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
- system_regs = [Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery,
+ system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery,
{-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
regs_to_save = system_regs ++ vol_list
@@ -384,6 +384,9 @@ callerSaves Hp = True
#ifdef CALLER_SAVES_HpLim
callerSaves HpLim = True
#endif
+#ifdef CALLER_SAVES_CCCS
+callerSaves CCCS = True
+#endif
#ifdef CALLER_SAVES_CurrentTSO
callerSaves CurrentTSO = True
#endif
@@ -423,6 +426,7 @@ baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
baseRegOffset Hp = oFFSET_StgRegTable_rHp
baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
+baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index 78aabd82ce..7c739c61b6 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -210,8 +210,8 @@ loadThreadState tso stack = do
openNursery,
-- and load the current cost centre stack from the TSO when profiling:
if opt_SccProfilingOn then
- mkStore curCCSAddr
- (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType)
+ storeCurCCS
+ (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType)
else mkNop]
emitLoadThreadState :: LocalReg -> LocalReg -> FCode ()
emitLoadThreadState tso stack = emit $ loadThreadState tso stack
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index 13c1be7f42..d9b3583382 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -21,7 +21,7 @@ module StgCmmProf (
dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
enterCostCentreThunk,
costCentreFrom,
- curCCS, curCCSAddr,
+ curCCS, storeCurCCS,
emitSetCCC,
saveCurrentCostCentre, restoreCurrentCostCentre,
@@ -73,11 +73,10 @@ ccType :: CmmType -- Type of a cost centre
ccType = bWord
curCCS :: CmmExpr
-curCCS = CmmLoad curCCSAddr ccsType
+curCCS = CmmReg (CmmGlobal CCCS)
--- Address of current CCS variable, for storing into
-curCCSAddr :: CmmExpr
-curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
+storeCurCCS :: CmmExpr -> CmmAGraph
+storeCurCCS e = mkAssign (CmmGlobal CCCS) e
mkCCostCentre :: CostCentre -> CmmLit
mkCCostCentre cc = CmmLabel (mkCCLabel cc)
@@ -150,7 +149,7 @@ restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
restoreCurrentCostCentre Nothing
= return ()
restoreCurrentCostCentre (Just local_cc)
- = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc)))
+ = emit (storeCurCCS (CmmReg (CmmLocal local_cc)))
-------------------------------------------------------------------------------
@@ -186,7 +185,7 @@ profAlloc words ccs
enterCostCentreThunk :: CmmExpr -> FCode ()
enterCostCentreThunk closure =
ifProfiling $ do
- emit $ mkStore curCCSAddr (costCentreFrom closure)
+ emit $ storeCurCCS (costCentreFrom closure)
ifProfiling :: FCode () -> FCode ()
ifProfiling code
@@ -269,7 +268,7 @@ emitSetCCC cc tick push
tmp <- newTemp ccsType -- TODO FIXME NOW
pushCostCentre tmp curCCS cc
when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp)))
- when push $ emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp)))
+ when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp)))
pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
pushCostCentre result ccs cc
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index f209005108..c3327138b3 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -253,7 +253,7 @@ callerSaveVolatileRegs = (caller_save, caller_load)
caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save)
caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save)
- system_regs = [ Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery
+ system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery
{- ,SparkHd,SparkTl,SparkBase,SparkLim -}
, BaseReg ]
@@ -366,6 +366,9 @@ callerSaves Hp = True
#ifdef CALLER_SAVES_HpLim
callerSaves HpLim = True
#endif
+#ifdef CALLER_SAVES_CCCS
+callerSaves CCCS = True
+#endif
#ifdef CALLER_SAVES_CurrentTSO
callerSaves CurrentTSO = True
#endif
@@ -385,7 +388,8 @@ baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
baseRegOffset Hp = oFFSET_StgRegTable_rHp
baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
-baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
+baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
+baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
baseRegOffset GCEnter1 = oFFSET_stgGCEnter1