diff options
author | Simon Marlow <marlowsd@gmail.com> | 2011-11-28 16:48:43 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2011-11-29 12:21:18 +0000 |
commit | 50de6034343abc93a7b01daccff34121042c0e7c (patch) | |
tree | 24496a5fc6bc39c6baaa574608e53c5d76c169f6 /compiler | |
parent | 1c2b838131134d44004dfdff18c302131478390d (diff) | |
download | haskell-50de6034343abc93a7b01daccff34121042c0e7c.tar.gz |
Make profiling work with multiple capabilities (+RTS -N)
This means that both time and heap profiling work for parallel
programs. Main internal changes:
- CCCS is no longer a global variable; it is now another
pseudo-register in the StgRegTable struct. Thus every
Capability has its own CCCS.
- There is a new built-in CCS called "IDLE", which records ticks for
Capabilities in the idle state. If you profile a single-threaded
program with +RTS -N2, you'll see about 50% of time in "IDLE".
- There is appropriate locking in rts/Profiling.c to protect the
shared cost-centre-stack data structures.
This patch does enough to get it working, I have cut one big corner:
the cost-centre-stack data structure is still shared amongst all
Capabilities, which means that multiple Capabilities will race when
updating the "allocations" and "entries" fields of a CCS. Not only
does this give unpredictable results, but it runs very slowly due to
cache line bouncing.
It is strongly recommended that you use -fno-prof-count-entries to
disable the "entries" count when profiling parallel programs. (I shall
add a note to this effect to the docs).
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 6 | ||||
-rw-r--r-- | compiler/cmm/CmmLex.x | 5 | ||||
-rw-r--r-- | compiler/cmm/CmmParse.y | 5 | ||||
-rw-r--r-- | compiler/cmm/PprCmmExpr.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/CgCase.lhs | 2 | ||||
-rw-r--r-- | compiler/codeGen/CgClosure.lhs | 7 | ||||
-rw-r--r-- | compiler/codeGen/CgForeignCall.hs | 4 | ||||
-rw-r--r-- | compiler/codeGen/CgProf.hs | 22 | ||||
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 6 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 4 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmProf.hs | 15 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 8 |
12 files changed, 50 insertions, 35 deletions
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index ef97a82aa9..885639b874 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -343,7 +343,8 @@ data GlobalReg | SpLim -- Stack limit | Hp -- Heap ptr; points to last occupied heap location. | HpLim -- Heap limit register - | CurrentTSO -- pointer to current thread's TSO + | CCCS -- Current cost-centre stack + | CurrentTSO -- pointer to current thread's TSO | CurrentNursery -- pointer to allocation area | HpAlloc -- allocation count for heap check failure @@ -395,6 +396,7 @@ instance Ord GlobalReg where compare SpLim SpLim = EQ compare Hp Hp = EQ compare HpLim HpLim = EQ + compare CCCS CCCS = EQ compare CurrentTSO CurrentTSO = EQ compare CurrentNursery CurrentNursery = EQ compare HpAlloc HpAlloc = EQ @@ -419,6 +421,8 @@ instance Ord GlobalReg where compare _ Hp = GT compare HpLim _ = LT compare _ HpLim = GT + compare CCCS _ = LT + compare _ CCCS = GT compare CurrentTSO _ = LT compare _ CurrentTSO = GT compare CurrentNursery _ = LT diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x index 1e2b20d4b3..ddd681d25e 100644 --- a/compiler/cmm/CmmLex.x +++ b/compiler/cmm/CmmLex.x @@ -106,8 +106,9 @@ $white_no_nl+ ; SpLim { global_reg SpLim } Hp { global_reg Hp } HpLim { global_reg HpLim } - CurrentTSO { global_reg CurrentTSO } - CurrentNursery { global_reg CurrentNursery } + CCCS { global_reg CCCS } + CurrentTSO { global_reg CurrentTSO } + CurrentNursery { global_reg CurrentNursery } HpAlloc { global_reg HpAlloc } BaseReg { global_reg BaseReg } diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 0a50f60b2c..4e315ddbdf 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -775,8 +775,9 @@ isPtrGlobalReg Sp = True isPtrGlobalReg SpLim = True isPtrGlobalReg Hp = True isPtrGlobalReg HpLim = True -isPtrGlobalReg CurrentTSO = True -isPtrGlobalReg CurrentNursery = True +isPtrGlobalReg CCCS = True +isPtrGlobalReg CurrentTSO = True +isPtrGlobalReg CurrentNursery = True isPtrGlobalReg (VanillaReg _ VGcPtr) = True isPtrGlobalReg _ = False diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 76fbdcec8d..81ce84c264 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -271,6 +271,7 @@ pprGlobalReg gr SpLim -> ptext (sLit "SpLim") Hp -> ptext (sLit "Hp") HpLim -> ptext (sLit "HpLim") + CCCS -> ptext (sLit "CCCS") CurrentTSO -> ptext (sLit "CurrentTSO") CurrentNursery -> ptext (sLit "CurrentNursery") HpAlloc -> ptext (sLit "HpAlloc") diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs index e4fe386043..a36621bdaf 100644 --- a/compiler/codeGen/CgCase.lhs +++ b/compiler/codeGen/CgCase.lhs @@ -670,6 +670,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC restoreCurrentCostCentre (Just slot) freeit = do { sp_rel <- getSpRelOffset slot ; whenC freeit (freeStackSlots [slot]) - ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) } + ; stmtC (storeCurCCS (CmmLoad sp_rel bWord)) } \end{code} diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index 243d59f5db..7bad8516d9 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -316,9 +316,10 @@ mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do -- Do the business ; funWrapper cl_info reg_args reg_save_code $ do { tickyEnterFun cl_info - ; enterCostCentreFun cc $ - CmmMachOp mo_wordSub [ CmmReg nodeReg - , CmmLit (mkIntCLit (funTag cl_info)) ] + ; enterCostCentreFun cc + (CmmMachOp mo_wordSub [ CmmReg nodeReg + , CmmLit (mkIntCLit (funTag cl_info)) ]) + (node : map snd reg_args) -- live regs ; cgExpr body } } diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index 7d67132fcf..8d8b97d76a 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -240,8 +240,8 @@ emitLoadThreadState = do emitOpenNursery -- and load the current cost centre stack from the TSO when profiling: when opt_SccProfilingOn $ - stmtC (CmmStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)) + stmtC $ storeCurCCS $ + CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord emitOpenNursery :: Code emitOpenNursery = stmtsC [ diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index 13667c399a..3e247ff4d6 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -21,7 +21,7 @@ module CgProf ( enterCostCentreThunk, enterCostCentreFun, costCentreFrom, - curCCS, curCCSAddr, + curCCS, storeCurCCS, emitCostCentreDecl, emitCostCentreStackDecl, emitSetCCC, @@ -66,11 +66,10 @@ import Control.Monad -- Expression representing the current cost centre stack curCCS :: CmmExpr -curCCS = CmmLoad curCCSAddr bWord +curCCS = CmmReg (CmmGlobal CCCS) --- Address of current CCS variable, for storing into -curCCSAddr :: CmmExpr -curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS"))) +storeCurCCS :: CmmExpr -> CmmStmt +storeCurCCS e = CmmAssign (CmmGlobal CCCS) e mkCCostCentre :: CostCentre -> CmmLit mkCCostCentre cc = CmmLabel (mkCCLabel cc) @@ -135,14 +134,15 @@ profAlloc words ccs enterCostCentreThunk :: CmmExpr -> Code enterCostCentreThunk closure = ifProfiling $ do - stmtC $ CmmStore curCCSAddr (costCentreFrom closure) + stmtC $ storeCurCCS (costCentreFrom closure) -enterCostCentreFun :: CostCentreStack -> CmmExpr -> Code -enterCostCentreFun ccs closure = +enterCostCentreFun :: CostCentreStack -> CmmExpr -> [GlobalReg] -> Code +enterCostCentreFun ccs closure vols = ifProfiling $ do if isCurrentCCS ccs - then emitRtsCall rtsPackageId (fsLit "enterFunCCS") - [CmmHinted (costCentreFrom closure) AddrHint] + then emitRtsCallWithVols rtsPackageId (fsLit "enterFunCCS") + [CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint, + CmmHinted (costCentreFrom closure) AddrHint] vols else return () -- top-level function, nothing to do ifProfiling :: Code -> Code @@ -226,7 +226,7 @@ emitSetCCC cc tick push tmp <- newTemp bWord -- TODO FIXME NOW pushCostCentre tmp curCCS cc when tick $ stmtC (bumpSccCount (CmmReg (CmmLocal tmp))) - when push $ stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) + when push $ stmtC (storeCurCCS (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code pushCostCentre result ccs cc diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 85957e81b9..5274a176a0 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -286,7 +286,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save) caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save) - system_regs = [Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery, + system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery, {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ] regs_to_save = system_regs ++ vol_list @@ -384,6 +384,9 @@ callerSaves Hp = True #ifdef CALLER_SAVES_HpLim callerSaves HpLim = True #endif +#ifdef CALLER_SAVES_CCCS +callerSaves CCCS = True +#endif #ifdef CALLER_SAVES_CurrentTSO callerSaves CurrentTSO = True #endif @@ -423,6 +426,7 @@ baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")") baseRegOffset Hp = oFFSET_StgRegTable_rHp baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim +baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 78aabd82ce..7c739c61b6 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -210,8 +210,8 @@ loadThreadState tso stack = do openNursery, -- and load the current cost centre stack from the TSO when profiling: if opt_SccProfilingOn then - mkStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType) + storeCurCCS + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType) else mkNop] emitLoadThreadState :: LocalReg -> LocalReg -> FCode () emitLoadThreadState tso stack = emit $ loadThreadState tso stack diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index 13c1be7f42..d9b3583382 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -21,7 +21,7 @@ module StgCmmProf ( dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, enterCostCentreThunk, costCentreFrom, - curCCS, curCCSAddr, + curCCS, storeCurCCS, emitSetCCC, saveCurrentCostCentre, restoreCurrentCostCentre, @@ -73,11 +73,10 @@ ccType :: CmmType -- Type of a cost centre ccType = bWord curCCS :: CmmExpr -curCCS = CmmLoad curCCSAddr ccsType +curCCS = CmmReg (CmmGlobal CCCS) --- Address of current CCS variable, for storing into -curCCSAddr :: CmmExpr -curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS"))) +storeCurCCS :: CmmExpr -> CmmAGraph +storeCurCCS e = mkAssign (CmmGlobal CCCS) e mkCCostCentre :: CostCentre -> CmmLit mkCCostCentre cc = CmmLabel (mkCCLabel cc) @@ -150,7 +149,7 @@ restoreCurrentCostCentre :: Maybe LocalReg -> FCode () restoreCurrentCostCentre Nothing = return () restoreCurrentCostCentre (Just local_cc) - = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc))) + = emit (storeCurCCS (CmmReg (CmmLocal local_cc))) ------------------------------------------------------------------------------- @@ -186,7 +185,7 @@ profAlloc words ccs enterCostCentreThunk :: CmmExpr -> FCode () enterCostCentreThunk closure = ifProfiling $ do - emit $ mkStore curCCSAddr (costCentreFrom closure) + emit $ storeCurCCS (costCentreFrom closure) ifProfiling :: FCode () -> FCode () ifProfiling code @@ -269,7 +268,7 @@ emitSetCCC cc tick push tmp <- newTemp ccsType -- TODO FIXME NOW pushCostCentre tmp curCCS cc when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp))) - when push $ emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) + when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () pushCostCentre result ccs cc diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index f209005108..c3327138b3 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -253,7 +253,7 @@ callerSaveVolatileRegs = (caller_save, caller_load) caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save) caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save) - system_regs = [ Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery + system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery {- ,SparkHd,SparkTl,SparkBase,SparkLim -} , BaseReg ] @@ -366,6 +366,9 @@ callerSaves Hp = True #ifdef CALLER_SAVES_HpLim callerSaves HpLim = True #endif +#ifdef CALLER_SAVES_CCCS +callerSaves CCCS = True +#endif #ifdef CALLER_SAVES_CurrentTSO callerSaves CurrentTSO = True #endif @@ -385,7 +388,8 @@ baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 baseRegOffset Hp = oFFSET_StgRegTable_rHp baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim -baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO +baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS +baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc baseRegOffset GCEnter1 = oFFSET_stgGCEnter1 |