diff options
37 files changed, 675 insertions, 168 deletions
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 188233d1ea..c9399b3ba1 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -992,9 +992,12 @@ lowerSafeForeignCall dflags block id <- newTemp (bWord dflags) new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg)) let (caller_save, caller_load) = callerSaveVolatileRegs dflags - load_tso <- newTemp (gcWord dflags) load_stack <- newTemp (gcWord dflags) - let suspend = saveThreadState dflags <*> + tso <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + bdfree <- newTemp (bWord dflags) + bdstart <- newTemp (bWord dflags) + let suspend = saveThreadState dflags tso cn <*> caller_save <*> mkMiddle (callSuspendThread dflags id intrbl) midCall = mkUnsafeCall tgt res args @@ -1003,7 +1006,7 @@ lowerSafeForeignCall dflags block -- might now have a different Capability! mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*> caller_load <*> - loadThreadState dflags load_tso load_stack + loadThreadState dflags tso load_stack cn bdfree bdstart (_, regs, copyout) = copyOutOflow dflags NativeReturn Jump (Young succ) diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index b84cb40c69..e9215d5021 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -3,7 +3,7 @@ module CmmMachOp ( MachOp(..) , pprMachOp, isCommutableMachOp, isAssociativeMachOp - , isComparisonMachOp, machOpResultType + , isComparisonMachOp, maybeIntComparison, machOpResultType , machOpArgReps, maybeInvertComparison -- MachOp builders @@ -11,9 +11,11 @@ module CmmMachOp , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe , mo_wordULe, mo_wordUGt, mo_wordULt - , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot + , mo_wordShl, mo_wordSShr, mo_wordUShr , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 - , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord + , mo_u_32ToWord, mo_s_32ToWord , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64 -- CallishMachOp @@ -260,6 +262,7 @@ isAssociativeMachOp mop = MO_Xor {} -> True _other -> False + -- ---------------------------------------------------------------------------- -- isComparisonMachOp @@ -290,6 +293,25 @@ isComparisonMachOp mop = MO_F_Lt {} -> True _other -> False +{- | +Returns @Just w@ if the operation is an integer comparison with width +@w@, or @Nothing@ otherwise. +-} +maybeIntComparison :: MachOp -> Maybe Width +maybeIntComparison mop = + case mop of + MO_Eq w -> Just w + MO_Ne w -> Just w + MO_S_Ge w -> Just w + MO_S_Le w -> Just w + MO_S_Gt w -> Just w + MO_S_Lt w -> Just w + MO_U_Ge w -> Just w + MO_U_Le w -> Just w + MO_U_Gt w -> Just w + MO_U_Lt w -> Just w + _ -> Nothing + -- ----------------------------------------------------------------------------- -- Inverting conditions diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index eb1c7da76d..c2e276ed0b 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -9,12 +9,15 @@ ----------------------------------------------------------------------------- module StgCmmForeign ( - cgForeignCall, loadThreadState, saveThreadState, + cgForeignCall, emitPrimCall, emitCCall, emitForeignCall, -- For CmmParse - emitSaveThreadState, -- will be needed by the Cmm parser - emitLoadThreadState, -- ditto - emitCloseNursery, emitOpenNursery + emitSaveThreadState, + saveThreadState, + emitLoadThreadState, + loadThreadState, + emitOpenNursery, + emitCloseNursery, ) where #include "HsVersions.h" @@ -271,94 +274,221 @@ maybe_assign_temp e = do -- This stuff can't be done in suspendThread/resumeThread, because it -- refers to global registers which aren't available in the C world. -saveThreadState :: DynFlags -> CmmAGraph -saveThreadState dflags = - -- CurrentTSO->stackobj->sp = Sp; - mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags stgCurrentTSO (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp - <*> closeNursery dflags - -- and save the current cost centre stack in the TSO when profiling: - <*> if gopt Opt_SccProfilingOn dflags then - mkStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS - else mkNop - emitSaveThreadState :: FCode () emitSaveThreadState = do dflags <- getDynFlags - emit (saveThreadState dflags) + tso <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + emit $ saveThreadState dflags tso cn + + +-- saveThreadState must be usable from the stack layout pass, where we +-- don't have FCode. Therefore it takes LocalRegs as arguments, so +-- the caller can create these. +saveThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph +saveThreadState dflags tso cn = + catAGraphs [ + -- tso = CurrentTSO; + mkAssign (CmmLocal tso) stgCurrentTSO, + -- tso->stackobj->sp = Sp; + mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp, + closeNursery dflags tso cn, + -- and save the current cost centre stack in the TSO when profiling: + if gopt Opt_SccProfilingOn dflags then + mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS + else mkNop + ] emitCloseNursery :: FCode () emitCloseNursery = do - df <- getDynFlags - emit (closeNursery df) + dflags <- getDynFlags + tso <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> + closeNursery dflags tso cn + +{- +Closing the nursery corresponds to the following code: + + tso = CurrentTSO; + cn = CurrentNuresry; - -- CurrentNursery->free = Hp+1; -closeNursery :: DynFlags -> CmmAGraph -closeNursery dflags = mkStore (nursery_bdescr_free dflags) (cmmOffsetW dflags stgHp 1) + // Update the allocation limit for the current thread. We don't + // check to see whether it has overflowed at this point, that check is + // made when we run out of space in the current heap block (stg_gc_noregs) + // and in the scheduler when context switching (schedulePostRunThread). + tso->alloc_limit -= Hp + WDS(1) - cn->start; -loadThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph -loadThreadState dflags tso stack = do + // Set cn->free to the next unoccupied word in the block + cn->free = Hp + WDS(1); +-} + +closeNursery :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph +closeNursery df tso cn = + let + tsoreg = CmmLocal tso + cnreg = CmmLocal cn + in catAGraphs [ - -- tso = CurrentTSO; - mkAssign (CmmLocal tso) stgCurrentTSO, - -- stack = tso->stackobj; - mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)), - -- Sp = stack->sp; - mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)), - -- SpLim = stack->stack + RESERVED_STACK_WORDS; - mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags)) - (rESERVED_STACK_WORDS dflags)), - -- HpAlloc = 0; - -- HpAlloc is assumed to be set to non-zero only by a failed - -- a heap check, see HeapStackCheck.cmm:GC_GENERIC - mkAssign hpAlloc (zeroExpr dflags), - - openNursery dflags, - -- and load the current cost centre stack from the TSO when profiling: - if gopt Opt_SccProfilingOn dflags then - storeCurCCS - (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) (ccsType dflags)) - else mkNop] + mkAssign cnreg stgCurrentNursery, + + -- CurrentNursery->free = Hp+1; + mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1), + + let alloc = + CmmMachOp (mo_wordSub df) + [ cmmOffsetW df stgHp 1 + , CmmLoad (nursery_bdescr_start df cnreg) (bWord df) + ] + + alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df) + in + + -- tso->alloc_limit += alloc + mkStore alloc_limit (CmmMachOp (MO_Sub W64) + [ CmmLoad alloc_limit b64 + , CmmMachOp (mo_WordTo64 df) [alloc] ]) + ] emitLoadThreadState :: FCode () emitLoadThreadState = do dflags <- getDynFlags - load_tso <- newTemp (gcWord dflags) - load_stack <- newTemp (gcWord dflags) - emit $ loadThreadState dflags load_tso load_stack + tso <- newTemp (gcWord dflags) + stack <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + bdfree <- newTemp (bWord dflags) + bdstart <- newTemp (bWord dflags) + emit $ loadThreadState dflags tso stack cn bdfree bdstart + +-- loadThreadState must be usable from the stack layout pass, where we +-- don't have FCode. Therefore it takes LocalRegs as arguments, so +-- the caller can create these. +loadThreadState :: DynFlags + -> LocalReg -> LocalReg -> LocalReg -> LocalReg -> LocalReg + -> CmmAGraph +loadThreadState dflags tso stack cn bdfree bdstart = + catAGraphs [ + -- tso = CurrentTSO; + mkAssign (CmmLocal tso) stgCurrentTSO, + -- stack = tso->stackobj; + mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)), + -- Sp = stack->sp; + mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)), + -- SpLim = stack->stack + RESERVED_STACK_WORDS; + mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags)) + (rESERVED_STACK_WORDS dflags)), + -- HpAlloc = 0; + -- HpAlloc is assumed to be set to non-zero only by a failed + -- a heap check, see HeapStackCheck.cmm:GC_GENERIC + mkAssign hpAlloc (zeroExpr dflags), + openNursery dflags tso cn bdfree bdstart, + -- and load the current cost centre stack from the TSO when profiling: + if gopt Opt_SccProfilingOn dflags + then storeCurCCS + (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) + (tso_CCCS dflags)) (ccsType dflags)) + else mkNop + ] + emitOpenNursery :: FCode () emitOpenNursery = do - df <- getDynFlags - emit (openNursery df) - -openNursery :: DynFlags -> CmmAGraph -openNursery dflags = catAGraphs [ - -- Hp = CurrentNursery->free - 1; - mkAssign hp (cmmOffsetW dflags (CmmLoad (nursery_bdescr_free dflags) (bWord dflags)) (-1)), - - -- HpLim = CurrentNursery->start + - -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; - mkAssign hpLim - (cmmOffsetExpr dflags - (CmmLoad (nursery_bdescr_start dflags) (bWord dflags)) - (cmmOffset dflags - (CmmMachOp (mo_wordMul dflags) [ - CmmMachOp (MO_SS_Conv W32 (wordWidth dflags)) - [CmmLoad (nursery_bdescr_blocks dflags) b32], - mkIntExpr dflags (bLOCK_SIZE dflags) - ]) - (-1) - ) - ) + dflags <- getDynFlags + tso <- newTemp (gcWord dflags) + cn <- newTemp (bWord dflags) + bdfree <- newTemp (bWord dflags) + bdstart <- newTemp (bWord dflags) + emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> + openNursery dflags tso cn bdfree bdstart + +{- +Opening the nursery corresponds to the following code: + + tso = CurrentTSO; + cn = CurrentNursery; + bdfree = CurrentNuresry->free; + bdstart = CurrentNuresry->start; + + // We *add* the currently occupied portion of the nursery block to + // the allocation limit, because we will subtract it again in + // closeNursery. + tso->alloc_limit += bdfree - bdstart; + + // Set Hp to the last occupied word of the heap block. Why not the + // next unocupied word? Doing it this way means that we get to use + // an offset of zero more often, which might lead to slightly smaller + // code on some architectures. + Hp = bdfree - WDS(1); + + // Set HpLim to the end of the current nursery block (note that this block + // might be a block group, consisting of several adjacent blocks. + HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1; +-} + +openNursery :: DynFlags + -> LocalReg -> LocalReg -> LocalReg -> LocalReg + -> CmmAGraph +openNursery df tso cn bdfree bdstart = + let + tsoreg = CmmLocal tso + cnreg = CmmLocal cn + bdfreereg = CmmLocal bdfree + bdstartreg = CmmLocal bdstart + in + -- These assignments are carefully ordered to reduce register + -- pressure and generate not completely awful code on x86. To see + -- what code we generate, look at the assembly for + -- stg_returnToStackTop in rts/StgStartup.cmm. + catAGraphs [ + mkAssign cnreg stgCurrentNursery, + mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)), + + -- Hp = CurrentNursery->free - 1; + mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)), + + mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)), + + -- HpLim = CurrentNursery->start + + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; + mkAssign hpLim + (cmmOffsetExpr df + (CmmReg bdstartreg) + (cmmOffset df + (CmmMachOp (mo_wordMul df) [ + CmmMachOp (MO_SS_Conv W32 (wordWidth df)) + [CmmLoad (nursery_bdescr_blocks df cnreg) b32], + mkIntExpr df (bLOCK_SIZE df) + ]) + (-1) + ) + ), + + -- alloc = bd->free - bd->start + let alloc = + CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg] + + alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df) + in + + -- tso->alloc_limit += alloc + mkStore alloc_limit (CmmMachOp (MO_Add W64) + [ CmmLoad alloc_limit b64 + , CmmMachOp (mo_WordTo64 df) [alloc] ]) + ] -nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks :: DynFlags -> CmmExpr -nursery_bdescr_free dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_free dflags) -nursery_bdescr_start dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_start dflags) -nursery_bdescr_blocks dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_blocks dflags) +nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks + :: DynFlags -> CmmReg -> CmmExpr +nursery_bdescr_free dflags cn = + cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags) +nursery_bdescr_start dflags cn = + cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags) +nursery_bdescr_blocks dflags cn = + cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags) -tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: DynFlags -> ByteOff +tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags) +tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags) tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags) stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags) stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags) diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index abd87ed087..a4115a0b6d 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -391,6 +391,21 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do ADC II32 (OpReg r2hi) (OpReg rhi) ] return (ChildCode64 code rlo) +iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat II32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), + SUB II32 (OpReg r2lo) (OpReg rlo), + MOV II32 (OpReg r1hi) (OpReg rhi), + SBB II32 (OpReg r2hi) (OpReg rhi) ] + return (ChildCode64 code rlo) + iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do fn <- getAnyReg expr r_dst_lo <- getNewRegNat II32 @@ -1272,24 +1287,23 @@ getCondCode (CmmMachOp mop [x, y]) MO_F_Lt W64 -> condFltCode LTT x y MO_F_Le W64 -> condFltCode LE x y - MO_Eq _ -> condIntCode EQQ x y - MO_Ne _ -> condIntCode NE x y - - MO_S_Gt _ -> condIntCode GTT x y - MO_S_Ge _ -> condIntCode GE x y - MO_S_Lt _ -> condIntCode LTT x y - MO_S_Le _ -> condIntCode LE x y - - MO_U_Gt _ -> condIntCode GU x y - MO_U_Ge _ -> condIntCode GEU x y - MO_U_Lt _ -> condIntCode LU x y - MO_U_Le _ -> condIntCode LEU x y - - _other -> pprPanic "getCondCode(x86,x86_64)" (ppr (CmmMachOp mop [x,y])) + _ -> condIntCode (machOpToCond mop) x y getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other) - +machOpToCond :: MachOp -> Cond +machOpToCond mo = case mo of + MO_Eq _ -> EQQ + MO_Ne _ -> NE + MO_S_Gt _ -> GTT + MO_S_Ge _ -> GE + MO_S_Lt _ -> LTT + MO_S_Le _ -> LE + MO_U_Gt _ -> GU + MO_U_Ge _ -> GEU + MO_U_Lt _ -> LU + MO_U_Le _ -> LEU + _other -> pprPanic "machOpToCond" (pprMachOp mo) -- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be @@ -1538,7 +1552,31 @@ genCondJump -> CmmExpr -- the condition on which to branch -> NatM InstrBlock -genCondJump id bool = do +genCondJump id expr = do + is32Bit <- is32BitPlatform + genCondJump' is32Bit id expr + +genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock + +-- 64-bit integer comparisons on 32-bit +genCondJump' is32Bit true (CmmMachOp mop [e1,e2]) + | is32Bit, Just W64 <- maybeIntComparison mop = do + ChildCode64 code1 r1_lo <- iselExpr64 e1 + ChildCode64 code2 r2_lo <- iselExpr64 e2 + let r1_hi = getHiVRegFromLo r1_lo + r2_hi = getHiVRegFromLo r2_lo + cond = machOpToCond mop + Just cond' = maybeFlipCond cond + false <- getBlockIdNat + return $ code1 `appOL` code2 `appOL` toOL [ + CMP II32 (OpReg r2_hi) (OpReg r1_hi), + JXX cond true, + JXX cond' false, + CMP II32 (OpReg r2_lo) (OpReg r1_lo), + JXX cond true, + NEWBLOCK false ] + +genCondJump' _ id bool = do CondCode is_float cond cond_code <- getCondCode bool use_sse2 <- sse2Enabled if not is_float || not use_sse2 @@ -1569,7 +1607,6 @@ genCondJump id bool = do ] return (cond_code `appOL` code) - -- ----------------------------------------------------------------------------- -- Generating C calls diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 2f6196227b..0d85376868 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -196,6 +196,7 @@ data Instr | ADD Size Operand Operand | ADC Size Operand Operand | SUB Size Operand Operand + | SBB Size Operand Operand | MUL Size Operand Operand | MUL2 Size Operand -- %edx:%eax = operand * %rax @@ -365,6 +366,7 @@ x86_regUsageOfInstr platform instr ADD _ src dst -> usageRM src dst ADC _ src dst -> usageRM src dst SUB _ src dst -> usageRM src dst + SBB _ src dst -> usageRM src dst IMUL _ src dst -> usageRM src dst IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx] MUL _ src dst -> usageRM src dst @@ -543,6 +545,7 @@ x86_patchRegsOfInstr instr env ADD sz src dst -> patch2 (ADD sz) src dst ADC sz src dst -> patch2 (ADC sz) src dst SUB sz src dst -> patch2 (SUB sz) src dst + SBB sz src dst -> patch2 (SBB sz) src dst IMUL sz src dst -> patch2 (IMUL sz) src dst IMUL2 sz src -> patch1 (IMUL2 sz) src MUL sz src dst -> patch2 (MUL sz) src dst diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index cc39557f1d..2b3711751c 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -570,11 +570,10 @@ pprInstr (ADD size (OpImm (ImmInt (-1))) dst) = pprSizeOp (sLit "dec") size dst pprInstr (ADD size (OpImm (ImmInt 1)) dst) = pprSizeOp (sLit "inc") size dst -pprInstr (ADD size src dst) - = pprSizeOpOp (sLit "add") size src dst -pprInstr (ADC size src dst) - = pprSizeOpOp (sLit "adc") size src dst +pprInstr (ADD size src dst) = pprSizeOpOp (sLit "add") size src dst +pprInstr (ADC size src dst) = pprSizeOpOp (sLit "adc") size src dst pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst +pprInstr (SBB size src dst) = pprSizeOpOp (sLit "sbb") size src dst pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2 pprInstr (ADD_CC size src dst) diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h index 6fd0dc0dfc..02cb63210d 100644 --- a/includes/rts/Constants.h +++ b/includes/rts/Constants.h @@ -277,6 +277,12 @@ #define TSO_SQUEEZED 128 /* + * Enables the AllocationLimitExceeded exception when the thread's + * allocation limit goes negative. + */ +#define TSO_ALLOC_LIMIT 256 + +/* * The number of times we spin in a spin lock before yielding (see * #3758). To tune this value, use the benchmark in #3758: run the * server with -N2 and the client both on a dual-core. Also make sure diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h index bf6a7f3c5c..ec542701df 100644 --- a/includes/rts/Flags.h +++ b/includes/rts/Flags.h @@ -56,6 +56,14 @@ struct GC_FLAGS { rtsBool doIdleGC; StgWord heapBase; /* address to ask the OS for memory */ + + StgWord allocLimitGrace; /* units: *blocks* + * After an AllocationLimitExceeded + * exception has been raised, how much + * extra space is given to the thread + * to handle the exception before we + * raise it again. + */ }; struct DEBUG_FLAGS { diff --git a/includes/rts/Threads.h b/includes/rts/Threads.h index 941f6daf65..fc8ae6e089 100644 --- a/includes/rts/Threads.h +++ b/includes/rts/Threads.h @@ -42,8 +42,12 @@ StgRegTable * resumeThread (void *); // // Thread operations from Threads.c // -int cmp_thread (StgPtr tso1, StgPtr tso2); -int rts_getThreadId (StgPtr tso); +int cmp_thread (StgPtr tso1, StgPtr tso2); +int rts_getThreadId (StgPtr tso); +HsInt64 rts_getThreadAllocationCounter (StgPtr tso); +void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i); +void rts_enableThreadAllocationLimit (StgPtr tso); +void rts_disableThreadAllocationLimit (StgPtr tso); #if !defined(mingw32_HOST_OS) pid_t forkProcess (HsStablePtr *entry); diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h index 6dbcec2595..06056fe716 100644 --- a/includes/rts/storage/TSO.h +++ b/includes/rts/storage/TSO.h @@ -145,15 +145,18 @@ typedef struct StgTSO_ { */ struct StgBlockingQueue_ *bq; -#ifdef TICKY_TICKY - /* TICKY-specific stuff would go here. */ -#endif -#ifdef PROFILING - StgTSOProfInfo prof; -#endif -#ifdef mingw32_HOST_OS - StgWord32 saved_winerror; -#endif + /* + * The allocation limit for this thread, which is updated as the + * thread allocates. If the value drops below zero, and + * TSO_ALLOC_LIMIT is set in flags, we raise an exception in the + * thread, and give the thread a little more space to handle the + * exception before we raise the exception again. + * + * This is an integer, because we might update it in a place where + * it isn't convenient to raise the exception, so we want it to + * stay negative until we get around to checking it. + */ + StgInt64 alloc_limit; /* in bytes */ /* * sum of the sizes of all stack chunks (in words), used to decide @@ -168,6 +171,16 @@ typedef struct StgTSO_ { */ StgWord32 tot_stack_size; +#ifdef TICKY_TICKY + /* TICKY-specific stuff would go here. */ +#endif +#ifdef PROFILING + StgTSOProfInfo prof; +#endif +#ifdef mingw32_HOST_OS + StgWord32 saved_winerror; +#endif + } *StgTSOPtr; typedef struct StgStack_ { diff --git a/libraries/base/Control/Exception.hs b/libraries/base/Control/Exception.hs index fa505750f2..0bcbdca942 100644 --- a/libraries/base/Control/Exception.hs +++ b/libraries/base/Control/Exception.hs @@ -48,6 +48,7 @@ module Control.Exception ( NestedAtomically(..), BlockedIndefinitelyOnMVar(..), BlockedIndefinitelyOnSTM(..), + AllocationLimitExceeded(..), Deadlock(..), NoMethodError(..), PatternMatchFail(..), diff --git a/libraries/base/Control/Exception/Base.hs b/libraries/base/Control/Exception/Base.hs index c581d1a5c4..f7779d6f9c 100644 --- a/libraries/base/Control/Exception/Base.hs +++ b/libraries/base/Control/Exception/Base.hs @@ -31,6 +31,7 @@ module Control.Exception.Base ( NestedAtomically(..), BlockedIndefinitelyOnMVar(..), BlockedIndefinitelyOnSTM(..), + AllocationLimitExceeded(..), Deadlock(..), NoMethodError(..), PatternMatchFail(..), diff --git a/libraries/base/GHC/Conc.hs b/libraries/base/GHC/Conc.hs index f1708b33d4..68182a11e4 100644 --- a/libraries/base/GHC/Conc.hs +++ b/libraries/base/GHC/Conc.hs @@ -59,6 +59,12 @@ module GHC.Conc , threadWaitWriteSTM , closeFdWith + -- * Allocation counter and limit + , setAllocationCounter + , getAllocationCounter + , enableAllocationLimit + , disableAllocationLimit + -- * TVars , STM(..) , atomically diff --git a/libraries/base/GHC/Conc/Sync.hs b/libraries/base/GHC/Conc/Sync.hs index 6d2e772b5a..777fb71e20 100644 --- a/libraries/base/GHC/Conc/Sync.hs +++ b/libraries/base/GHC/Conc/Sync.hs @@ -60,6 +60,12 @@ module GHC.Conc.Sync , threadStatus , threadCapability + -- * Allocation counter and quota + , setAllocationCounter + , getAllocationCounter + , enableAllocationLimit + , disableAllocationLimit + -- * TVars , STM(..) , atomically @@ -171,16 +177,92 @@ instance Eq ThreadId where instance Ord ThreadId where compare = cmpThread +-- | Every thread has an allocation counter that tracks how much +-- memory has been allocated by the thread. The counter is +-- initialized to zero, and 'setAllocationCounter' sets the current +-- value. The allocation counter counts *down*, so in the absence of +-- a call to 'setAllocationCounter' its value is the negation of the +-- number of bytes of memory allocated by the thread. +-- +-- There are two things that you can do with this counter: +-- +-- * Use it as a simple profiling mechanism, with +-- 'getAllocationCounter'. +-- +-- * Use it as a resource limit. See 'enableAllocationLimit'. +-- +-- Allocation accounting is accurate only to about 4Kbytes. +-- +setAllocationCounter :: Int64 -> IO () +setAllocationCounter i = do + ThreadId t <- myThreadId + rts_setThreadAllocationCounter t i + +-- | Return the current value of the allocation counter for the +-- current thread. +getAllocationCounter :: IO Int64 +getAllocationCounter = do + ThreadId t <- myThreadId + rts_getThreadAllocationCounter t + +-- | Enables the allocation counter to be treated as a limit for the +-- current thread. When the allocation limit is enabled, if the +-- allocation counter counts down below zero, the thread will be sent +-- the 'AllocationLimitExceeded' asynchronous exception. When this +-- happens, the counter is reinitialised (by default +-- to 100K, but tunable with the @+RTS -xq@ option) so that it can handle +-- the exception and perform any necessary clean up. If it exhausts +-- this additional allowance, another 'AllocationLimitExceeded' exception +-- is sent, and so forth. +-- +-- Note that memory allocation is unrelated to /live memory/, also +-- known as /heap residency/. A thread can allocate a large amount of +-- memory and retain anything between none and all of it. It is +-- better to think of the allocation limit as a limit on +-- /CPU time/, rather than a limit on memory. +-- +-- Compared to using timeouts, allocation limits don't count time +-- spent blocked or in foreign calls. +-- +enableAllocationLimit :: IO () +enableAllocationLimit = do + ThreadId t <- myThreadId + rts_enableThreadAllocationLimit t + +-- | Disable allocation limit processing for the current thread. +disableAllocationLimit :: IO () +disableAllocationLimit = do + ThreadId t <- myThreadId + rts_disableThreadAllocationLimit t + +-- We cannot do these operations safely on another thread, because on +-- a 32-bit machine we cannot do atomic operations on a 64-bit value. +-- Therefore, we only expose APIs that allow getting and setting the +-- limit of the current thread. +foreign import ccall unsafe "rts_setThreadAllocationCounter" + rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO () + +foreign import ccall unsafe "rts_getThreadAllocationCounter" + rts_getThreadAllocationCounter :: ThreadId# -> IO Int64 + +foreign import ccall unsafe "rts_enableThreadAllocationLimit" + rts_enableThreadAllocationLimit :: ThreadId# -> IO () + +foreign import ccall unsafe "rts_disableThreadAllocationLimit" + rts_disableThreadAllocationLimit :: ThreadId# -> IO () + {- | -Sparks off a new thread to run the 'IO' computation passed as the +Creates a new thread to run the 'IO' computation passed as the first argument, and returns the 'ThreadId' of the newly created thread. -The new thread will be a lightweight thread; if you want to use a foreign -library that uses thread-local storage, use 'Control.Concurrent.forkOS' instead. +The new thread will be a lightweight, /unbound/ thread. Foreign calls +made by this thread are not guaranteed to be made by any particular OS +thread; if you need foreign calls to be made by a particular OS +thread, then use 'Control.Concurrent.forkOS' instead. -GHC note: the new thread inherits the /masked/ state of the parent -(see 'Control.Exception.mask'). +The new thread inherits the /masked/ state of the parent (see +'Control.Exception.mask'). The newly created thread has an exception handler that discards the exceptions 'BlockedIndefinitelyOnMVar', 'BlockedIndefinitelyOnSTM', and diff --git a/libraries/base/GHC/IO/Exception.hs b/libraries/base/GHC/IO/Exception.hs index 0f351f0382..d0a21b2744 100644 --- a/libraries/base/GHC/IO/Exception.hs +++ b/libraries/base/GHC/IO/Exception.hs @@ -22,6 +22,7 @@ module GHC.IO.Exception ( BlockedIndefinitelyOnMVar(..), blockedIndefinitelyOnMVar, BlockedIndefinitelyOnSTM(..), blockedIndefinitelyOnSTM, Deadlock(..), + AllocationLimitExceeded(..), allocationLimitExceeded, AssertionFailed(..), SomeAsyncException(..), @@ -98,6 +99,23 @@ instance Show Deadlock where ----- +-- |This thread has exceeded its allocation limit. See +-- 'GHC.Conc.setAllocationCounter' and +-- 'GHC.Conc.enableAllocationLimit'. +data AllocationLimitExceeded = AllocationLimitExceeded + deriving Typeable + +instance Exception AllocationLimitExceeded + +instance Show AllocationLimitExceeded where + showsPrec _ AllocationLimitExceeded = + showString "allocation limit exceeded" + +allocationLimitExceeded :: SomeException -- for the RTS +allocationLimitExceeded = toException AllocationLimitExceeded + +----- + -- |'assert' was applied to 'False'. data AssertionFailed = AssertionFailed String deriving Typeable @@ -174,7 +192,8 @@ data ArrayException instance Exception ArrayException -stackOverflow, heapOverflow :: SomeException -- for the RTS +-- for the RTS +stackOverflow, heapOverflow :: SomeException stackOverflow = toException StackOverflow heapOverflow = toException HeapOverflow diff --git a/rts/Capability.c b/rts/Capability.c index 289eeb2c5b..21f63f39d9 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -297,6 +297,10 @@ initCapability( Capability *cap, nat i ) cap->r.rCCCS = NULL; #endif + // cap->r.rCurrentTSO is charged for calls to allocate(), so we + // don't want it set when not running a Haskell thread. + cap->r.rCurrentTSO = NULL; + traceCapCreate(cap); traceCapsetAssignCap(CAPSET_OSPROCESS_DEFAULT, i); traceCapsetAssignCap(CAPSET_CLOCKDOMAIN_DEFAULT, i); diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm index 0659fed89f..a1fb5d446d 100644 --- a/rts/HeapStackCheck.cmm +++ b/rts/HeapStackCheck.cmm @@ -100,7 +100,9 @@ stg_gc_noregs CurrentNursery = bdescr_link(CurrentNursery); OPEN_NURSERY(); if (Capability_context_switch(MyCapability()) != 0 :: CInt || - Capability_interrupt(MyCapability()) != 0 :: CInt) { + Capability_interrupt(MyCapability()) != 0 :: CInt || + (StgTSO_alloc_limit(CurrentTSO) `lt` (0::I64) && + (TO_W_(StgTSO_flags(CurrentTSO)) & TSO_ALLOC_LIMIT) != 0)) { ret = ThreadYielding; goto sched; } else { diff --git a/rts/Linker.c b/rts/Linker.c index 7d029c62ac..2c74a0dd35 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -1264,6 +1264,10 @@ typedef struct _RtsSymbolVal { SymI_HasProto(rtsSupportsBoundThreads) \ SymI_HasProto(rts_isProfiled) \ SymI_HasProto(rts_isDynamic) \ + SymI_HasProto(rts_getThreadAllocationCounter) \ + SymI_HasProto(rts_setThreadAllocationCounter) \ + SymI_HasProto(rts_enableThreadAllocationLimit) \ + SymI_HasProto(rts_disableThreadAllocationLimit) \ SymI_HasProto(setProgArgv) \ SymI_HasProto(startupHaskell) \ SymI_HasProto(shutdownHaskell) \ diff --git a/rts/Prelude.h b/rts/Prelude.h index 0c54148ba2..614c255af5 100644 --- a/rts/Prelude.h +++ b/rts/Prelude.h @@ -37,6 +37,7 @@ extern StgClosure ZCMain_main_closure; PRELUDE_CLOSURE(base_GHCziIOziException_stackOverflow_closure); PRELUDE_CLOSURE(base_GHCziIOziException_heapOverflow_closure); +PRELUDE_CLOSURE(base_GHCziIOziException_allocationLimitExceeded_closure); PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnThrowTo_closure); PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnMVar_closure); PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnSTM_closure); @@ -101,6 +102,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info); #define stackOverflow_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_stackOverflow_closure) #define heapOverflow_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_heapOverflow_closure) +#define allocationLimitExceeded_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_allocationLimitExceeded_closure) #define blockedIndefinitelyOnMVar_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_blockedIndefinitelyOnMVar_closure) #define blockedIndefinitelyOnSTM_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_blockedIndefinitelyOnSTM_closure) #define nonTermination_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure) diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 10585c89fa..3b206ffa7e 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -89,6 +89,60 @@ suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here) } /* ----------------------------------------------------------------------------- + throwToSelf + + Useful for throwing an async exception in a thread from the + runtime. It handles unlocking the throwto message returned by + throwTo(). + + Note [Throw to self when masked] + + When a StackOverflow occurs when the thread is masked, we want to + defer the exception to when the thread becomes unmasked/hits an + interruptible point. We already have a mechanism for doing this, + the blocked_exceptions list, but the use here is a bit unusual, + because an exception is normally only added to this list upon + an asynchronous 'throwTo' call (with all of the relevant + multithreaded nonsense). Morally, a stack overflow should be an + asynchronous exception sent by a thread to itself, and it should + have the same semantics. But there are a few key differences: + + - If you actually tried to send an asynchronous exception to + yourself using throwTo, the exception would actually immediately + be delivered. This is because throwTo itself is considered an + interruptible point, so the exception is always deliverable. Thus, + ordinarily, we never end up with a message to onesself in the + blocked_exceptions queue. + + - In the case of a StackOverflow, we don't actually care about the + wakeup semantics; when an exception is delivered, the thread that + originally threw the exception should be woken up, since throwTo + blocks until the exception is successfully thrown. Fortunately, + it is harmless to wakeup a thread that doesn't actually need waking + up, e.g. ourselves. + + - No synchronization is necessary, because we own the TSO and the + capability. You can observe this by tracing through the execution + of throwTo. We skip synchronizing the message and inter-capability + communication. + + We think this doesn't break any invariants, but do be careful! + -------------------------------------------------------------------------- */ + +void +throwToSelf (Capability *cap, StgTSO *tso, StgClosure *exception) +{ + MessageThrowTo *m; + + m = throwTo(cap, tso, tso, exception); + + if (m != NULL) { + // throwTo leaves it locked + unlockClosure((StgClosure*)m, &stg_MSG_THROWTO_info); + } +} + +/* ----------------------------------------------------------------------------- throwTo This function may be used to throw an exception from one thread to diff --git a/rts/RaiseAsync.h b/rts/RaiseAsync.h index e2763d0cb8..6bfed8d6ca 100644 --- a/rts/RaiseAsync.h +++ b/rts/RaiseAsync.h @@ -28,6 +28,10 @@ void throwToSingleThreaded_ (Capability *cap, StgClosure *exception, rtsBool stop_at_atomically); +void throwToSelf (Capability *cap, + StgTSO *tso, + StgClosure *exception); + void suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here); diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 44c05cec3b..82e90e5b0e 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -137,6 +137,7 @@ void initRtsFlagsDefaults(void) #else RtsFlags.GcFlags.heapBase = 0; /* means don't care */ #endif + RtsFlags.GcFlags.allocLimitGrace = (100*1024) / BLOCK_SIZE; #ifdef DEBUG RtsFlags.DebugFlags.scheduler = rtsFalse; @@ -403,6 +404,8 @@ usage_text[] = { " +PAPI_EVENT - collect papi preset event PAPI_EVENT", " #NATIVE_EVENT - collect native event NATIVE_EVENT (in hex)", #endif +" -xq The allocation limit given to a thread after it receives", +" an AllocationLimitExceeded exception. (default: 100k)", "", "RTS options may also be specified using the GHCRTS environment variable.", "", @@ -1361,6 +1364,13 @@ error = rtsTrue; /* The option prefix '-xx' is reserved for future extension. KSW 1999-11. */ + case 'q': + OPTION_UNSAFE; + RtsFlags.GcFlags.allocLimitGrace + = decodeSize(rts_argv[arg], 3, BLOCK_SIZE, HS_INT_MAX) + / BLOCK_SIZE; + break; + default: OPTION_SAFE; errorBelch("unknown RTS option: %s",rts_argv[arg]); diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index 32bed5af8f..b8201e1651 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -214,6 +214,7 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config) getStablePtr((StgPtr)blockedIndefinitelyOnMVar_closure); getStablePtr((StgPtr)nonTermination_closure); getStablePtr((StgPtr)blockedIndefinitelyOnSTM_closure); + getStablePtr((StgPtr)allocationLimitExceeded_closure); getStablePtr((StgPtr)nestedAtomically_closure); getStablePtr((StgPtr)runSparks_closure); diff --git a/rts/Schedule.c b/rts/Schedule.c index b11270832d..c2260f0282 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -481,6 +481,10 @@ run_thread: // happened. So find the new location: t = cap->r.rCurrentTSO; + // cap->r.rCurrentTSO is charged for calls to allocate(), so we + // don't want it set when not running a Haskell thread. + cap->r.rCurrentTSO = NULL; + // And save the current errno in this thread. // XXX: possibly bogus for SMP because this thread might already // be running again, see code below. @@ -1078,6 +1082,21 @@ schedulePostRunThread (Capability *cap, StgTSO *t) } } + // + // If the current thread's allocation limit has run out, send it + // the AllocationLimitExceeded exception. + + if (t->alloc_limit < 0 && (t->flags & TSO_ALLOC_LIMIT)) { + // Use a throwToSelf rather than a throwToSingleThreaded, because + // it correctly handles the case where the thread is currently + // inside mask. Also the thread might be blocked (e.g. on an + // MVar), and throwToSingleThreaded doesn't unblock it + // correctly in that case. + throwToSelf(cap, t, allocationLimitExceeded_closure); + t->alloc_limit = (StgInt64)RtsFlags.GcFlags.allocLimitGrace + * BLOCK_SIZE; + } + /* some statistics gathering in the parallel case */ } diff --git a/rts/Threads.c b/rts/Threads.c index 76e844a3f6..90efd9ce4e 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -110,6 +110,8 @@ createThread(Capability *cap, W_ size) tso->stackobj = stack; tso->tot_stack_size = stack->stack_size; + tso->alloc_limit = 0; + tso->trec = NO_TREC; #ifdef PROFILING @@ -164,6 +166,31 @@ rts_getThreadId(StgPtr tso) return ((StgTSO *)tso)->id; } +/* --------------------------------------------------------------------------- + * Getting & setting the thread allocation limit + * ------------------------------------------------------------------------ */ +HsInt64 rts_getThreadAllocationCounter(StgPtr tso) +{ + // NB. doesn't take into account allocation in the current nursery + // block, so it might be off by up to 4k. + return ((StgTSO *)tso)->alloc_limit; +} + +void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i) +{ + ((StgTSO *)tso)->alloc_limit = i; +} + +void rts_enableThreadAllocationLimit(StgPtr tso) +{ + ((StgTSO *)tso)->flags |= TSO_ALLOC_LIMIT; +} + +void rts_disableThreadAllocationLimit(StgPtr tso) +{ + ((StgTSO *)tso)->flags &= ~TSO_ALLOC_LIMIT; +} + /* ----------------------------------------------------------------------------- Remove a thread from a queue. Fails fatally if the TSO is not on the queue. @@ -524,21 +551,8 @@ threadStackOverflow (Capability *cap, StgTSO *tso) stg_min(tso->stackobj->stack + tso->stackobj->stack_size, tso->stackobj->sp+64))); - if (tso->flags & TSO_BLOCKEX) { - // NB. StackOverflow exceptions must be deferred if the thread is - // inside Control.Exception.mask. See bug #767 and bug #8303. - // This implementation is a minor hack, see Note [Throw to self when masked] - MessageThrowTo *msg = (MessageThrowTo*)allocate(cap, sizeofW(MessageThrowTo)); - SET_HDR(msg, &stg_MSG_THROWTO_info, CCS_SYSTEM); - msg->source = tso; - msg->target = tso; - msg->exception = (StgClosure *)stackOverflow_closure; - blockedThrowTo(cap, tso, msg); - } else { - // Send this thread the StackOverflow exception - throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure); - return; - } + // Note [Throw to self when masked], also #767 and #8303. + throwToSelf(cap, tso, (StgClosure *)stackOverflow_closure); } @@ -669,39 +683,6 @@ threadStackOverflow (Capability *cap, StgTSO *tso) // IF_DEBUG(scheduler,printTSO(new_tso)); } -/* Note [Throw to self when masked] - * - * When a StackOverflow occurs when the thread is masked, we want to - * defer the exception to when the thread becomes unmasked/hits an - * interruptible point. We already have a mechanism for doing this, - * the blocked_exceptions list, but the use here is a bit unusual, - * because an exception is normally only added to this list upon - * an asynchronous 'throwTo' call (with all of the relevant - * multithreaded nonsense). Morally, a stack overflow should be an - * asynchronous exception sent by a thread to itself, and it should - * have the same semantics. But there are a few key differences: - * - * - If you actually tried to send an asynchronous exception to - * yourself using throwTo, the exception would actually immediately - * be delivered. This is because throwTo itself is considered an - * interruptible point, so the exception is always deliverable. Thus, - * ordinarily, we never end up with a message to onesself in the - * blocked_exceptions queue. - * - * - In the case of a StackOverflow, we don't actually care about the - * wakeup semantics; when an exception is delivered, the thread that - * originally threw the exception should be woken up, since throwTo - * blocks until the exception is successfully thrown. Fortunately, - * it is harmless to wakeup a thread that doesn't actually need waking - * up, e.g. ourselves. - * - * - No synchronization is necessary, because we own the TSO and the - * capability. You can observe this by tracing through the execution - * of throwTo. We skip synchronizing the message and inter-capability - * communication. - * - * We think this doesn't break any invariants, but do be careful! - */ /* --------------------------------------------------------------------------- diff --git a/rts/package.conf.in b/rts/package.conf.in index 82d2870cde..ce44a09651 100644 --- a/rts/package.conf.in +++ b/rts/package.conf.in @@ -99,6 +99,7 @@ ld-options: , "-Wl,-u,_base_ControlziExceptionziBase_nonTermination_closure" , "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnMVar_closure" , "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnSTM_closure" + , "-Wl,-u,_base_GHCziIOziException_allocationLimitExceeded_closure" , "-Wl,-u,_base_ControlziExceptionziBase_nestedAtomically_closure" , "-Wl,-u,_base_GHCziEventziThread_blockedOnBadFD_closure" , "-Wl,-u,_base_GHCziWeak_runFinalizzerBatch_closure" @@ -140,6 +141,7 @@ ld-options: , "-Wl,-u,base_ControlziExceptionziBase_nonTermination_closure" , "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnMVar_closure" , "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnSTM_closure" + , "-Wl,-u,base_GHCziIOziException_allocationLimitExceeded_closure" , "-Wl,-u,base_ControlziExceptionziBase_nestedAtomically_closure" , "-Wl,-u,base_GHCziEventziThread_blockedOnBadFD_closure" , "-Wl,-u,base_GHCziWeak_runFinalizzerBatch_closure" diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 379d9da769..afb171b568 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -684,7 +684,10 @@ StgPtr allocate (Capability *cap, W_ n) TICK_ALLOC_HEAP_NOCTR(WDS(n)); CCS_ALLOC(cap->r.rCCCS,n); - + if (cap->r.rCurrentTSO != NULL) { + cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_); + } + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { // The largest number of words such that // the computation of req_blocks will not overflow. @@ -829,6 +832,9 @@ allocatePinned (Capability *cap, W_ n) TICK_ALLOC_HEAP_NOCTR(WDS(n)); CCS_ALLOC(cap->r.rCCCS,n); + if (cap->r.rCurrentTSO != NULL) { + cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_); + } bd = cap->pinned_object_block; diff --git a/rts/win32/libHSbase.def b/rts/win32/libHSbase.def index 8140528c70..2091e85c9c 100644 --- a/rts/win32/libHSbase.def +++ b/rts/win32/libHSbase.def @@ -32,11 +32,12 @@ EXPORTS base_GHCziTopHandler_flushStdHandles_closure - base_GHCziWeak_runFinalizzerBatch_closure + base_GHCziWeak_runFinalizzerBatch_closure base_GHCziPack_unpackCString_closure base_GHCziIOziException_blockedIndefinitelyOnMVar_closure base_GHCziIOziException_blockedIndefinitelyOnSTM_closure - base_GHCziIOziException_stackOverflow_closure + base_GHCziIOziException_allocationLimitExceeded_closure + base_GHCziIOziException_stackOverflow_closure base_ControlziExceptionziBase_nonTermination_closure base_ControlziExceptionziBase_nestedAtomically_closure diff --git a/testsuite/tests/concurrent/should_run/all.T b/testsuite/tests/concurrent/should_run/all.T index 166c232766..b77d9aca87 100644 --- a/testsuite/tests/concurrent/should_run/all.T +++ b/testsuite/tests/concurrent/should_run/all.T @@ -88,6 +88,12 @@ test('threadstatus-9333', [omit_ways(['threaded2'])], compile_and_run, ['']) test('T9379', normal, compile_and_run, ['']) +test('allocLimit1', exit_code(1), compile_and_run, ['']) +test('allocLimit2', normal, compile_and_run, ['']) +test('allocLimit3', exit_code(1), compile_and_run, ['']) +test('allocLimit4', [ extra_run_opts('+RTS -xq300k -RTS') ], + compile_and_run, ['']) + # ----------------------------------------------------------------------------- # These tests we only do for a full run @@ -252,3 +258,4 @@ test('setnumcapabilities001', # omit ghci, which can't handle unboxed tuples: test('compareAndSwap', [omit_ways(['ghci','hpc']), reqlib('primitive')], compile_and_run, ['']) + diff --git a/testsuite/tests/concurrent/should_run/allocLimit1.hs b/testsuite/tests/concurrent/should_run/allocLimit1.hs new file mode 100644 index 0000000000..b1c8fa6035 --- /dev/null +++ b/testsuite/tests/concurrent/should_run/allocLimit1.hs @@ -0,0 +1,9 @@ +module Main (main) where + +import GHC.Conc + +main = do + setAllocationCounter (10*1024) + enableAllocationLimit + print (length [1..]) + diff --git a/testsuite/tests/concurrent/should_run/allocLimit1.stderr b/testsuite/tests/concurrent/should_run/allocLimit1.stderr new file mode 100644 index 0000000000..2133e14ce1 --- /dev/null +++ b/testsuite/tests/concurrent/should_run/allocLimit1.stderr @@ -0,0 +1 @@ +allocLimit1: allocation limit exceeded diff --git a/testsuite/tests/concurrent/should_run/allocLimit2.hs b/testsuite/tests/concurrent/should_run/allocLimit2.hs new file mode 100644 index 0000000000..4fd117b615 --- /dev/null +++ b/testsuite/tests/concurrent/should_run/allocLimit2.hs @@ -0,0 +1,17 @@ +module Main (main) where + +import GHC.Conc +import Control.Concurrent +import Control.Exception +import System.Exit + +main = do + m <- newEmptyMVar + let action = do setAllocationCounter (10*1024) + enableAllocationLimit + print (length [1..]) + forkFinally action (putMVar m) + r <- takeMVar m + case r of + Left e | Just AllocationLimitExceeded <- fromException e -> return () + _ -> print r >> exitFailure diff --git a/testsuite/tests/concurrent/should_run/allocLimit3.hs b/testsuite/tests/concurrent/should_run/allocLimit3.hs new file mode 100644 index 0000000000..28881dc016 --- /dev/null +++ b/testsuite/tests/concurrent/should_run/allocLimit3.hs @@ -0,0 +1,15 @@ +module Main (main) where + +import GHC.Conc +import Control.Concurrent +import Control.Exception + +main = do + setAllocationCounter (10*1024) + enableAllocationLimit + + -- alloc limit overflow while masked: should successfully print the + -- result, and then immediately raise the exception + r <- mask_ $ try $ print (length [1..100000]) + + print (r :: Either SomeException ()) diff --git a/testsuite/tests/concurrent/should_run/allocLimit3.stderr b/testsuite/tests/concurrent/should_run/allocLimit3.stderr new file mode 100644 index 0000000000..27ae0a9480 --- /dev/null +++ b/testsuite/tests/concurrent/should_run/allocLimit3.stderr @@ -0,0 +1 @@ +allocLimit3: allocation limit exceeded diff --git a/testsuite/tests/concurrent/should_run/allocLimit3.stdout b/testsuite/tests/concurrent/should_run/allocLimit3.stdout new file mode 100644 index 0000000000..f7393e847d --- /dev/null +++ b/testsuite/tests/concurrent/should_run/allocLimit3.stdout @@ -0,0 +1 @@ +100000 diff --git a/testsuite/tests/concurrent/should_run/allocLimit4.hs b/testsuite/tests/concurrent/should_run/allocLimit4.hs new file mode 100644 index 0000000000..b589ffa4af --- /dev/null +++ b/testsuite/tests/concurrent/should_run/allocLimit4.hs @@ -0,0 +1,31 @@ +module Main (main) where + +import GHC.Conc +import Control.Concurrent +import Control.Exception +import System.Exit +import Control.Monad + +-- check that +RTS -xq is doing the right thing: the test requires +-- +RTS -xq300k + +main = do + m <- newEmptyMVar + let action = do + e <- try $ do + setAllocationCounter (10*1024) + enableAllocationLimit + print (length [1..]) + case e of + Left AllocationLimitExceeded{} -> do + c <- getAllocationCounter + when (c < 250*1024 || c > 350*1024) $ fail "wrong limit grace" + print (length [2..]) + Right _ -> + fail "didn't catch AllocationLimitExceeded" + + forkFinally action (putMVar m) + r <- takeMVar m + case r of + Left e | Just AllocationLimitExceeded <- fromException e -> return () + _ -> print r >> exitFailure diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs index 72605d755e..486f497572 100644 --- a/utils/deriveConstants/DeriveConstants.hs +++ b/utils/deriveConstants/DeriveConstants.hs @@ -413,6 +413,7 @@ wanteds = concat ,closureField C "StgTSO" "flags" ,closureField C "StgTSO" "dirty" ,closureField C "StgTSO" "bq" + ,closureField Both "StgTSO" "alloc_limit" ,closureField_ Both "StgTSO_cccs" "StgTSO" "prof.cccs" ,closureField Both "StgTSO" "stackobj" |