diff options
author | Simon Marlow <marlowsd@gmail.com> | 2014-04-28 16:55:47 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2014-05-02 14:49:22 +0100 |
commit | b0534f78a73f972e279eed4447a5687bd6a8308e (patch) | |
tree | 02d52756620bf27b9df9db45c57dacf55f190842 /compiler/codeGen | |
parent | 34db5ccf52ec2a1b5e953c282d0c52a7fc82c02a (diff) | |
download | haskell-b0534f78a73f972e279eed4447a5687bd6a8308e.tar.gz |
Per-thread allocation counters and limits
This tracks the amount of memory allocation by each thread in a
counter stored in the TSO. Optionally, when the counter drops below
zero (it counts down), the thread can be sent an asynchronous
exception: AllocationLimitExceeded. When this happens, given a small
additional limit so that it can handle the exception. See
documentation in GHC.Conc for more details.
Allocation limits are similar to timeouts, but
- timeouts use real time, not CPU time. Allocation limits do not
count anything while the thread is blocked or in foreign code.
- timeouts don't re-trigger if the thread catches the exception,
allocation limits do.
- timeouts can catch non-allocating loops, if you use
-fno-omit-yields. This doesn't work for allocation limits.
I couldn't measure any impact on benchmarks with these changes, even
for nofib/smp.
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 268 |
1 files changed, 196 insertions, 72 deletions
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index bf88f1ccb3..2730275d34 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -7,12 +7,15 @@ ----------------------------------------------------------------------------- module StgCmmForeign ( - cgForeignCall, loadThreadState, saveThreadState, + cgForeignCall, emitPrimCall, emitCCall, emitForeignCall, -- For CmmParse - emitSaveThreadState, -- will be needed by the Cmm parser - emitLoadThreadState, -- ditto - emitCloseNursery, emitOpenNursery + emitSaveThreadState, + saveThreadState, + emitLoadThreadState, + loadThreadState, + emitOpenNursery, + emitCloseNursery, ) where #include "HsVersions.h" @@ -264,94 +267,215 @@ maybe_assign_temp e = do -- This stuff can't be done in suspendThread/resumeThread, because it -- refers to global registers which aren't available in the C world. -saveThreadState :: DynFlags -> CmmAGraph -saveThreadState dflags = - -- CurrentTSO->stackobj->sp = Sp; - mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags stgCurrentTSO (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp - <*> closeNursery dflags - -- and save the current cost centre stack in the TSO when profiling: - <*> if gopt Opt_SccProfilingOn dflags then - mkStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS - else mkNop - emitSaveThreadState :: FCode () emitSaveThreadState = do dflags <- getDynFlags - emit (saveThreadState dflags) + tso <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + emit $ saveThreadState dflags tso cn + + +-- saveThreadState must be usable from the stack layout pass, where we +-- don't have FCode. Therefore it takes LocalRegs as arguments, so +-- the caller can create these. +saveThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph +saveThreadState dflags tso cn = + catAGraphs [ + -- tso = CurrentTSO; + mkAssign (CmmLocal tso) stgCurrentTSO, + -- tso->stackobj->sp = Sp; + mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp, + closeNursery dflags tso cn, + -- and save the current cost centre stack in the TSO when profiling: + if gopt Opt_SccProfilingOn dflags then + mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS + else mkNop + ] emitCloseNursery :: FCode () emitCloseNursery = do - df <- getDynFlags - emit (closeNursery df) + dflags <- getDynFlags + tso <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> + closeNursery dflags tso cn + +{- +Closing the nursery corresponds to the following code: + + tso = CurrentTSO; + cn = CurrentNuresry; - -- CurrentNursery->free = Hp+1; -closeNursery :: DynFlags -> CmmAGraph -closeNursery dflags = mkStore (nursery_bdescr_free dflags) (cmmOffsetW dflags stgHp 1) + // Update the allocation limit for the current thread. We don't + // check to see whether it has overflowed at this point, that check is + // made when we run out of space in the current heap block (stg_gc_noregs) + // and in the scheduler when context switching (schedulePostRunThread). + tso->alloc_limit -= Hp + WDS(1) - cn->start; -loadThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph -loadThreadState dflags tso stack = do + // Set cn->free to the next unoccupied word in the block + cn->free = Hp + WDS(1); +-} + +closeNursery :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph +closeNursery df tso cn = + let + tsoreg = CmmLocal tso + cnreg = CmmLocal cn + in catAGraphs [ - -- tso = CurrentTSO; - mkAssign (CmmLocal tso) stgCurrentTSO, - -- stack = tso->stackobj; - mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)), - -- Sp = stack->sp; - mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)), - -- SpLim = stack->stack + RESERVED_STACK_WORDS; - mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags)) - (rESERVED_STACK_WORDS dflags)), - -- HpAlloc = 0; - -- HpAlloc is assumed to be set to non-zero only by a failed - -- a heap check, see HeapStackCheck.cmm:GC_GENERIC - mkAssign hpAlloc (zeroExpr dflags), - - openNursery dflags, - -- and load the current cost centre stack from the TSO when profiling: - if gopt Opt_SccProfilingOn dflags then - storeCurCCS - (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) (ccsType dflags)) - else mkNop] + mkAssign cnreg stgCurrentNursery, + + let alloc = + CmmMachOp (mo_wordSub df) + [ cmmOffsetW df stgHp 1 + , CmmLoad (nursery_bdescr_start df cnreg) (bWord df) + ] + + alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df) + in + + -- tso->alloc_limit += alloc + mkStore alloc_limit (CmmMachOp (mo_wordSub df) + [ CmmLoad alloc_limit b64 + , alloc ]), + + -- CurrentNursery->free = Hp+1; + mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1) + ] emitLoadThreadState :: FCode () emitLoadThreadState = do dflags <- getDynFlags - load_tso <- newTemp (gcWord dflags) - load_stack <- newTemp (gcWord dflags) - emit $ loadThreadState dflags load_tso load_stack + tso <- newTemp (gcWord dflags) + stack <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + bdfree <- newTemp (bWord dflags) + bdstart <- newTemp (bWord dflags) + emit $ loadThreadState dflags tso stack cn bdfree bdstart + +-- loadThreadState must be usable from the stack layout pass, where we +-- don't have FCode. Therefore it takes LocalRegs as arguments, so +-- the caller can create these. +loadThreadState :: DynFlags + -> LocalReg -> LocalReg -> LocalReg -> LocalReg -> LocalReg + -> CmmAGraph +loadThreadState dflags tso stack cn bdfree bdstart = + catAGraphs [ + -- tso = CurrentTSO; + mkAssign (CmmLocal tso) stgCurrentTSO, + -- stack = tso->stackobj; + mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)), + -- Sp = stack->sp; + mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)), + -- SpLim = stack->stack + RESERVED_STACK_WORDS; + mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags)) + (rESERVED_STACK_WORDS dflags)), + -- HpAlloc = 0; + -- HpAlloc is assumed to be set to non-zero only by a failed + -- a heap check, see HeapStackCheck.cmm:GC_GENERIC + mkAssign hpAlloc (zeroExpr dflags), + openNursery dflags tso cn bdfree bdstart, + -- and load the current cost centre stack from the TSO when profiling: + if gopt Opt_SccProfilingOn dflags + then storeCurCCS + (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) + (tso_CCCS dflags)) (ccsType dflags)) + else mkNop + ] + emitOpenNursery :: FCode () emitOpenNursery = do - df <- getDynFlags - emit (openNursery df) - -openNursery :: DynFlags -> CmmAGraph -openNursery dflags = catAGraphs [ - -- Hp = CurrentNursery->free - 1; - mkAssign hp (cmmOffsetW dflags (CmmLoad (nursery_bdescr_free dflags) (bWord dflags)) (-1)), - - -- HpLim = CurrentNursery->start + - -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; - mkAssign hpLim - (cmmOffsetExpr dflags - (CmmLoad (nursery_bdescr_start dflags) (bWord dflags)) - (cmmOffset dflags - (CmmMachOp (mo_wordMul dflags) [ - CmmMachOp (MO_SS_Conv W32 (wordWidth dflags)) - [CmmLoad (nursery_bdescr_blocks dflags) b32], - mkIntExpr dflags (bLOCK_SIZE dflags) - ]) - (-1) - ) - ) + dflags <- getDynFlags + tso <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + bdfree <- newTemp (bWord dflags) + bdstart <- newTemp (bWord dflags) + emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> + openNursery dflags tso cn bdfree bdstart + +{- +Opening the nursery corresponds to the following code: + + tso = CurrentTSO; + cn = CurrentNursery; + bdfree = CurrentNuresry->free; + bdstart = CurrentNuresry->start; + + // We *add* the currently occupied portion of the nursery block to + // the allocation limit, because we will subtract it again in + // closeNursery. + tso->alloc_limit += bdfree - bdstart; + + // Set Hp to the last occupied word of the heap block. Why not the + // next unocupied word? Doing it this way means that we get to use + // an offset of zero more often, which might lead to slightly smaller + // code on some architectures. + Hp = bdfree - WDS(1); + + // Set HpLim to the end of the current nursery block (note that this block + // might be a block group, consisting of several adjacent blocks. + HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1; +-} + +openNursery :: DynFlags + -> LocalReg -> LocalReg -> LocalReg -> LocalReg + -> CmmAGraph +openNursery df tso cn bdfree bdstart = + let + tsoreg = CmmLocal tso + cnreg = CmmLocal cn + bdfreereg = CmmLocal bdfree + bdstartreg = CmmLocal bdstart + in + catAGraphs [ + mkAssign cnreg stgCurrentNursery, + mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)), + mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)), + + -- alloc = bd->free - bd->start + let alloc = + CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg] + + alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df) + in + + -- tso->alloc_limit += alloc + mkStore alloc_limit (CmmMachOp (mo_wordAdd df) + [ CmmLoad alloc_limit b64 + , alloc ]), + + -- Hp = CurrentNursery->free - 1; + mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)), + + -- HpLim = CurrentNursery->start + + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; + mkAssign hpLim + (cmmOffsetExpr df + (CmmReg bdstartreg) + (cmmOffset df + (CmmMachOp (mo_wordMul df) [ + CmmMachOp (MO_SS_Conv W32 (wordWidth df)) + [CmmLoad (nursery_bdescr_blocks df cnreg) b32], + mkIntExpr df (bLOCK_SIZE df) + ]) + (-1) + ) + ) ] -nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks :: DynFlags -> CmmExpr -nursery_bdescr_free dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_free dflags) -nursery_bdescr_start dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_start dflags) -nursery_bdescr_blocks dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_blocks dflags) +nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks + :: DynFlags -> CmmReg -> CmmExpr +nursery_bdescr_free dflags cn = + cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags) +nursery_bdescr_start dflags cn = + cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags) +nursery_bdescr_blocks dflags cn = + cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags) -tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: DynFlags -> ByteOff +tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags) +tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags) tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags) stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags) stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags) |