summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2014-10-15 00:03:15 +0100
committerSimon Marlow <marlowsd@gmail.com>2014-11-12 15:11:10 +0000
commitd70b19bfb5ed79b22c2ac31e22f46782fc47a117 (patch)
treed7bc2ad5a6d50bf351dfd97779030dae63e5d7cf
parentc774b28f76ee4c220f7c1c9fd81585e0e3af0e8a (diff)
downloadhaskell-d70b19bfb5ed79b22c2ac31e22f46782fc47a117.tar.gz
Per-thread allocation counters and limits
This reverts commit f0fcc41d755876a1b02d1c7c79f57515059f6417. New changes: now works on 32-bit platforms too. I added some basic support for 64-bit subtraction and comparison operations to the x86 NCG.
-rw-r--r--compiler/cmm/CmmLayoutStack.hs9
-rw-r--r--compiler/cmm/CmmMachOp.hs28
-rw-r--r--compiler/codeGen/StgCmmForeign.hs274
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs71
-rw-r--r--compiler/nativeGen/X86/Instr.hs3
-rw-r--r--compiler/nativeGen/X86/Ppr.hs7
-rw-r--r--includes/rts/Constants.h6
-rw-r--r--includes/rts/Flags.h8
-rw-r--r--includes/rts/Threads.h8
-rw-r--r--includes/rts/storage/TSO.h31
-rw-r--r--libraries/base/Control/Exception.hs1
-rw-r--r--libraries/base/Control/Exception/Base.hs1
-rw-r--r--libraries/base/GHC/Conc.hs6
-rw-r--r--libraries/base/GHC/Conc/Sync.hs92
-rw-r--r--libraries/base/GHC/IO/Exception.hs21
-rw-r--r--rts/Capability.c4
-rw-r--r--rts/HeapStackCheck.cmm4
-rw-r--r--rts/Linker.c4
-rw-r--r--rts/Prelude.h2
-rw-r--r--rts/RaiseAsync.c54
-rw-r--r--rts/RaiseAsync.h4
-rw-r--r--rts/RtsFlags.c10
-rw-r--r--rts/RtsStartup.c1
-rw-r--r--rts/Schedule.c19
-rw-r--r--rts/Threads.c77
-rw-r--r--rts/package.conf.in2
-rw-r--r--rts/sm/Storage.c8
-rw-r--r--rts/win32/libHSbase.def5
-rw-r--r--testsuite/tests/concurrent/should_run/all.T7
-rw-r--r--testsuite/tests/concurrent/should_run/allocLimit1.hs9
-rw-r--r--testsuite/tests/concurrent/should_run/allocLimit1.stderr1
-rw-r--r--testsuite/tests/concurrent/should_run/allocLimit2.hs17
-rw-r--r--testsuite/tests/concurrent/should_run/allocLimit3.hs15
-rw-r--r--testsuite/tests/concurrent/should_run/allocLimit3.stderr1
-rw-r--r--testsuite/tests/concurrent/should_run/allocLimit3.stdout1
-rw-r--r--testsuite/tests/concurrent/should_run/allocLimit4.hs31
-rw-r--r--utils/deriveConstants/DeriveConstants.hs1
37 files changed, 675 insertions, 168 deletions
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index 188233d1ea..c9399b3ba1 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -992,9 +992,12 @@ lowerSafeForeignCall dflags block
id <- newTemp (bWord dflags)
new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
let (caller_save, caller_load) = callerSaveVolatileRegs dflags
- load_tso <- newTemp (gcWord dflags)
load_stack <- newTemp (gcWord dflags)
- let suspend = saveThreadState dflags <*>
+ tso <- newTemp (gcWord dflags)
+ cn <- newTemp (bWord dflags)
+ bdfree <- newTemp (bWord dflags)
+ bdstart <- newTemp (bWord dflags)
+ let suspend = saveThreadState dflags tso cn <*>
caller_save <*>
mkMiddle (callSuspendThread dflags id intrbl)
midCall = mkUnsafeCall tgt res args
@@ -1003,7 +1006,7 @@ lowerSafeForeignCall dflags block
-- might now have a different Capability!
mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
caller_load <*>
- loadThreadState dflags load_tso load_stack
+ loadThreadState dflags tso load_stack cn bdfree bdstart
(_, regs, copyout) =
copyOutOflow dflags NativeReturn Jump (Young succ)
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index b84cb40c69..e9215d5021 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -3,7 +3,7 @@
module CmmMachOp
( MachOp(..)
, pprMachOp, isCommutableMachOp, isAssociativeMachOp
- , isComparisonMachOp, machOpResultType
+ , isComparisonMachOp, maybeIntComparison, machOpResultType
, machOpArgReps, maybeInvertComparison
-- MachOp builders
@@ -11,9 +11,11 @@ module CmmMachOp
, mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
, mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
, mo_wordULe, mo_wordUGt, mo_wordULt
- , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
+ , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot
+ , mo_wordShl, mo_wordSShr, mo_wordUShr
, mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
- , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
+ , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord
+ , mo_u_32ToWord, mo_s_32ToWord
, mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64
-- CallishMachOp
@@ -260,6 +262,7 @@ isAssociativeMachOp mop =
MO_Xor {} -> True
_other -> False
+
-- ----------------------------------------------------------------------------
-- isComparisonMachOp
@@ -290,6 +293,25 @@ isComparisonMachOp mop =
MO_F_Lt {} -> True
_other -> False
+{- |
+Returns @Just w@ if the operation is an integer comparison with width
+@w@, or @Nothing@ otherwise.
+-}
+maybeIntComparison :: MachOp -> Maybe Width
+maybeIntComparison mop =
+ case mop of
+ MO_Eq w -> Just w
+ MO_Ne w -> Just w
+ MO_S_Ge w -> Just w
+ MO_S_Le w -> Just w
+ MO_S_Gt w -> Just w
+ MO_S_Lt w -> Just w
+ MO_U_Ge w -> Just w
+ MO_U_Le w -> Just w
+ MO_U_Gt w -> Just w
+ MO_U_Lt w -> Just w
+ _ -> Nothing
+
-- -----------------------------------------------------------------------------
-- Inverting conditions
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index eb1c7da76d..c2e276ed0b 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -9,12 +9,15 @@
-----------------------------------------------------------------------------
module StgCmmForeign (
- cgForeignCall, loadThreadState, saveThreadState,
+ cgForeignCall,
emitPrimCall, emitCCall,
emitForeignCall, -- For CmmParse
- emitSaveThreadState, -- will be needed by the Cmm parser
- emitLoadThreadState, -- ditto
- emitCloseNursery, emitOpenNursery
+ emitSaveThreadState,
+ saveThreadState,
+ emitLoadThreadState,
+ loadThreadState,
+ emitOpenNursery,
+ emitCloseNursery,
) where
#include "HsVersions.h"
@@ -271,94 +274,221 @@ maybe_assign_temp e = do
-- This stuff can't be done in suspendThread/resumeThread, because it
-- refers to global registers which aren't available in the C world.
-saveThreadState :: DynFlags -> CmmAGraph
-saveThreadState dflags =
- -- CurrentTSO->stackobj->sp = Sp;
- mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags stgCurrentTSO (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp
- <*> closeNursery dflags
- -- and save the current cost centre stack in the TSO when profiling:
- <*> if gopt Opt_SccProfilingOn dflags then
- mkStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS
- else mkNop
-
emitSaveThreadState :: FCode ()
emitSaveThreadState = do
dflags <- getDynFlags
- emit (saveThreadState dflags)
+ tso <- newTemp (gcWord dflags)
+ cn <- newTemp (bWord dflags)
+ emit $ saveThreadState dflags tso cn
+
+
+-- saveThreadState must be usable from the stack layout pass, where we
+-- don't have FCode. Therefore it takes LocalRegs as arguments, so
+-- the caller can create these.
+saveThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
+saveThreadState dflags tso cn =
+ catAGraphs [
+ -- tso = CurrentTSO;
+ mkAssign (CmmLocal tso) stgCurrentTSO,
+ -- tso->stackobj->sp = Sp;
+ mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) stgSp,
+ closeNursery dflags tso cn,
+ -- and save the current cost centre stack in the TSO when profiling:
+ if gopt Opt_SccProfilingOn dflags then
+ mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS
+ else mkNop
+ ]
emitCloseNursery :: FCode ()
emitCloseNursery = do
- df <- getDynFlags
- emit (closeNursery df)
+ dflags <- getDynFlags
+ tso <- newTemp (gcWord dflags)
+ cn <- newTemp (bWord dflags)
+ emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
+ closeNursery dflags tso cn
+
+{-
+Closing the nursery corresponds to the following code:
+
+ tso = CurrentTSO;
+ cn = CurrentNuresry;
- -- CurrentNursery->free = Hp+1;
-closeNursery :: DynFlags -> CmmAGraph
-closeNursery dflags = mkStore (nursery_bdescr_free dflags) (cmmOffsetW dflags stgHp 1)
+ // Update the allocation limit for the current thread. We don't
+ // check to see whether it has overflowed at this point, that check is
+ // made when we run out of space in the current heap block (stg_gc_noregs)
+ // and in the scheduler when context switching (schedulePostRunThread).
+ tso->alloc_limit -= Hp + WDS(1) - cn->start;
-loadThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
-loadThreadState dflags tso stack = do
+ // Set cn->free to the next unoccupied word in the block
+ cn->free = Hp + WDS(1);
+-}
+
+closeNursery :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph
+closeNursery df tso cn =
+ let
+ tsoreg = CmmLocal tso
+ cnreg = CmmLocal cn
+ in
catAGraphs [
- -- tso = CurrentTSO;
- mkAssign (CmmLocal tso) stgCurrentTSO,
- -- stack = tso->stackobj;
- mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
- -- Sp = stack->sp;
- mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
- -- SpLim = stack->stack + RESERVED_STACK_WORDS;
- mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
- (rESERVED_STACK_WORDS dflags)),
- -- HpAlloc = 0;
- -- HpAlloc is assumed to be set to non-zero only by a failed
- -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
- mkAssign hpAlloc (zeroExpr dflags),
-
- openNursery dflags,
- -- and load the current cost centre stack from the TSO when profiling:
- if gopt Opt_SccProfilingOn dflags then
- storeCurCCS
- (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) (ccsType dflags))
- else mkNop]
+ mkAssign cnreg stgCurrentNursery,
+
+ -- CurrentNursery->free = Hp+1;
+ mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1),
+
+ let alloc =
+ CmmMachOp (mo_wordSub df)
+ [ cmmOffsetW df stgHp 1
+ , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
+ ]
+
+ alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
+ in
+
+ -- tso->alloc_limit += alloc
+ mkStore alloc_limit (CmmMachOp (MO_Sub W64)
+ [ CmmLoad alloc_limit b64
+ , CmmMachOp (mo_WordTo64 df) [alloc] ])
+ ]
emitLoadThreadState :: FCode ()
emitLoadThreadState = do
dflags <- getDynFlags
- load_tso <- newTemp (gcWord dflags)
- load_stack <- newTemp (gcWord dflags)
- emit $ loadThreadState dflags load_tso load_stack
+ tso <- newTemp (gcWord dflags)
+ stack <- newTemp (gcWord dflags)
+ cn <- newTemp (bWord dflags)
+ bdfree <- newTemp (bWord dflags)
+ bdstart <- newTemp (bWord dflags)
+ emit $ loadThreadState dflags tso stack cn bdfree bdstart
+
+-- loadThreadState must be usable from the stack layout pass, where we
+-- don't have FCode. Therefore it takes LocalRegs as arguments, so
+-- the caller can create these.
+loadThreadState :: DynFlags
+ -> LocalReg -> LocalReg -> LocalReg -> LocalReg -> LocalReg
+ -> CmmAGraph
+loadThreadState dflags tso stack cn bdfree bdstart =
+ catAGraphs [
+ -- tso = CurrentTSO;
+ mkAssign (CmmLocal tso) stgCurrentTSO,
+ -- stack = tso->stackobj;
+ mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
+ -- Sp = stack->sp;
+ mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
+ -- SpLim = stack->stack + RESERVED_STACK_WORDS;
+ mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
+ (rESERVED_STACK_WORDS dflags)),
+ -- HpAlloc = 0;
+ -- HpAlloc is assumed to be set to non-zero only by a failed
+ -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
+ mkAssign hpAlloc (zeroExpr dflags),
+ openNursery dflags tso cn bdfree bdstart,
+ -- and load the current cost centre stack from the TSO when profiling:
+ if gopt Opt_SccProfilingOn dflags
+ then storeCurCCS
+ (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
+ (tso_CCCS dflags)) (ccsType dflags))
+ else mkNop
+ ]
+
emitOpenNursery :: FCode ()
emitOpenNursery = do
- df <- getDynFlags
- emit (openNursery df)
-
-openNursery :: DynFlags -> CmmAGraph
-openNursery dflags = catAGraphs [
- -- Hp = CurrentNursery->free - 1;
- mkAssign hp (cmmOffsetW dflags (CmmLoad (nursery_bdescr_free dflags) (bWord dflags)) (-1)),
-
- -- HpLim = CurrentNursery->start +
- -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
- mkAssign hpLim
- (cmmOffsetExpr dflags
- (CmmLoad (nursery_bdescr_start dflags) (bWord dflags))
- (cmmOffset dflags
- (CmmMachOp (mo_wordMul dflags) [
- CmmMachOp (MO_SS_Conv W32 (wordWidth dflags))
- [CmmLoad (nursery_bdescr_blocks dflags) b32],
- mkIntExpr dflags (bLOCK_SIZE dflags)
- ])
- (-1)
- )
- )
+ dflags <- getDynFlags
+ tso <- newTemp (gcWord dflags)
+ cn <- newTemp (bWord dflags)
+ bdfree <- newTemp (bWord dflags)
+ bdstart <- newTemp (bWord dflags)
+ emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*>
+ openNursery dflags tso cn bdfree bdstart
+
+{-
+Opening the nursery corresponds to the following code:
+
+ tso = CurrentTSO;
+ cn = CurrentNursery;
+ bdfree = CurrentNuresry->free;
+ bdstart = CurrentNuresry->start;
+
+ // We *add* the currently occupied portion of the nursery block to
+ // the allocation limit, because we will subtract it again in
+ // closeNursery.
+ tso->alloc_limit += bdfree - bdstart;
+
+ // Set Hp to the last occupied word of the heap block. Why not the
+ // next unocupied word? Doing it this way means that we get to use
+ // an offset of zero more often, which might lead to slightly smaller
+ // code on some architectures.
+ Hp = bdfree - WDS(1);
+
+ // Set HpLim to the end of the current nursery block (note that this block
+ // might be a block group, consisting of several adjacent blocks.
+ HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
+-}
+
+openNursery :: DynFlags
+ -> LocalReg -> LocalReg -> LocalReg -> LocalReg
+ -> CmmAGraph
+openNursery df tso cn bdfree bdstart =
+ let
+ tsoreg = CmmLocal tso
+ cnreg = CmmLocal cn
+ bdfreereg = CmmLocal bdfree
+ bdstartreg = CmmLocal bdstart
+ in
+ -- These assignments are carefully ordered to reduce register
+ -- pressure and generate not completely awful code on x86. To see
+ -- what code we generate, look at the assembly for
+ -- stg_returnToStackTop in rts/StgStartup.cmm.
+ catAGraphs [
+ mkAssign cnreg stgCurrentNursery,
+ mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
+
+ -- Hp = CurrentNursery->free - 1;
+ mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)),
+
+ mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
+
+ -- HpLim = CurrentNursery->start +
+ -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
+ mkAssign hpLim
+ (cmmOffsetExpr df
+ (CmmReg bdstartreg)
+ (cmmOffset df
+ (CmmMachOp (mo_wordMul df) [
+ CmmMachOp (MO_SS_Conv W32 (wordWidth df))
+ [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
+ mkIntExpr df (bLOCK_SIZE df)
+ ])
+ (-1)
+ )
+ ),
+
+ -- alloc = bd->free - bd->start
+ let alloc =
+ CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
+
+ alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
+ in
+
+ -- tso->alloc_limit += alloc
+ mkStore alloc_limit (CmmMachOp (MO_Add W64)
+ [ CmmLoad alloc_limit b64
+ , CmmMachOp (mo_WordTo64 df) [alloc] ])
+
]
-nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks :: DynFlags -> CmmExpr
-nursery_bdescr_free dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_free dflags)
-nursery_bdescr_start dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_start dflags)
-nursery_bdescr_blocks dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_blocks dflags)
+nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
+ :: DynFlags -> CmmReg -> CmmExpr
+nursery_bdescr_free dflags cn =
+ cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
+nursery_bdescr_start dflags cn =
+ cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
+nursery_bdescr_blocks dflags cn =
+ cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
-tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: DynFlags -> ByteOff
+tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
+tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index abd87ed087..a4115a0b6d 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -391,6 +391,21 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
ADC II32 (OpReg r2hi) (OpReg rhi) ]
return (ChildCode64 code rlo)
+iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
+ ChildCode64 code1 r1lo <- iselExpr64 e1
+ ChildCode64 code2 r2lo <- iselExpr64 e2
+ (rlo,rhi) <- getNewRegPairNat II32
+ let
+ r1hi = getHiVRegFromLo r1lo
+ r2hi = getHiVRegFromLo r2lo
+ code = code1 `appOL`
+ code2 `appOL`
+ toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
+ SUB II32 (OpReg r2lo) (OpReg rlo),
+ MOV II32 (OpReg r1hi) (OpReg rhi),
+ SBB II32 (OpReg r2hi) (OpReg rhi) ]
+ return (ChildCode64 code rlo)
+
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
fn <- getAnyReg expr
r_dst_lo <- getNewRegNat II32
@@ -1272,24 +1287,23 @@ getCondCode (CmmMachOp mop [x, y])
MO_F_Lt W64 -> condFltCode LTT x y
MO_F_Le W64 -> condFltCode LE x y
- MO_Eq _ -> condIntCode EQQ x y
- MO_Ne _ -> condIntCode NE x y
-
- MO_S_Gt _ -> condIntCode GTT x y
- MO_S_Ge _ -> condIntCode GE x y
- MO_S_Lt _ -> condIntCode LTT x y
- MO_S_Le _ -> condIntCode LE x y
-
- MO_U_Gt _ -> condIntCode GU x y
- MO_U_Ge _ -> condIntCode GEU x y
- MO_U_Lt _ -> condIntCode LU x y
- MO_U_Le _ -> condIntCode LEU x y
-
- _other -> pprPanic "getCondCode(x86,x86_64)" (ppr (CmmMachOp mop [x,y]))
+ _ -> condIntCode (machOpToCond mop) x y
getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
-
+machOpToCond :: MachOp -> Cond
+machOpToCond mo = case mo of
+ MO_Eq _ -> EQQ
+ MO_Ne _ -> NE
+ MO_S_Gt _ -> GTT
+ MO_S_Ge _ -> GE
+ MO_S_Lt _ -> LTT
+ MO_S_Le _ -> LE
+ MO_U_Gt _ -> GU
+ MO_U_Ge _ -> GEU
+ MO_U_Lt _ -> LU
+ MO_U_Le _ -> LEU
+ _other -> pprPanic "machOpToCond" (pprMachOp mo)
-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
@@ -1538,7 +1552,31 @@ genCondJump
-> CmmExpr -- the condition on which to branch
-> NatM InstrBlock
-genCondJump id bool = do
+genCondJump id expr = do
+ is32Bit <- is32BitPlatform
+ genCondJump' is32Bit id expr
+
+genCondJump' :: Bool -> BlockId -> CmmExpr -> NatM InstrBlock
+
+-- 64-bit integer comparisons on 32-bit
+genCondJump' is32Bit true (CmmMachOp mop [e1,e2])
+ | is32Bit, Just W64 <- maybeIntComparison mop = do
+ ChildCode64 code1 r1_lo <- iselExpr64 e1
+ ChildCode64 code2 r2_lo <- iselExpr64 e2
+ let r1_hi = getHiVRegFromLo r1_lo
+ r2_hi = getHiVRegFromLo r2_lo
+ cond = machOpToCond mop
+ Just cond' = maybeFlipCond cond
+ false <- getBlockIdNat
+ return $ code1 `appOL` code2 `appOL` toOL [
+ CMP II32 (OpReg r2_hi) (OpReg r1_hi),
+ JXX cond true,
+ JXX cond' false,
+ CMP II32 (OpReg r2_lo) (OpReg r1_lo),
+ JXX cond true,
+ NEWBLOCK false ]
+
+genCondJump' _ id bool = do
CondCode is_float cond cond_code <- getCondCode bool
use_sse2 <- sse2Enabled
if not is_float || not use_sse2
@@ -1569,7 +1607,6 @@ genCondJump id bool = do
]
return (cond_code `appOL` code)
-
-- -----------------------------------------------------------------------------
-- Generating C calls
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 2f6196227b..0d85376868 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -196,6 +196,7 @@ data Instr
| ADD Size Operand Operand
| ADC Size Operand Operand
| SUB Size Operand Operand
+ | SBB Size Operand Operand
| MUL Size Operand Operand
| MUL2 Size Operand -- %edx:%eax = operand * %rax
@@ -365,6 +366,7 @@ x86_regUsageOfInstr platform instr
ADD _ src dst -> usageRM src dst
ADC _ src dst -> usageRM src dst
SUB _ src dst -> usageRM src dst
+ SBB _ src dst -> usageRM src dst
IMUL _ src dst -> usageRM src dst
IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
MUL _ src dst -> usageRM src dst
@@ -543,6 +545,7 @@ x86_patchRegsOfInstr instr env
ADD sz src dst -> patch2 (ADD sz) src dst
ADC sz src dst -> patch2 (ADC sz) src dst
SUB sz src dst -> patch2 (SUB sz) src dst
+ SBB sz src dst -> patch2 (SBB sz) src dst
IMUL sz src dst -> patch2 (IMUL sz) src dst
IMUL2 sz src -> patch1 (IMUL2 sz) src
MUL sz src dst -> patch2 (MUL sz) src dst
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index cc39557f1d..2b3711751c 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -570,11 +570,10 @@ pprInstr (ADD size (OpImm (ImmInt (-1))) dst)
= pprSizeOp (sLit "dec") size dst
pprInstr (ADD size (OpImm (ImmInt 1)) dst)
= pprSizeOp (sLit "inc") size dst
-pprInstr (ADD size src dst)
- = pprSizeOpOp (sLit "add") size src dst
-pprInstr (ADC size src dst)
- = pprSizeOpOp (sLit "adc") size src dst
+pprInstr (ADD size src dst) = pprSizeOpOp (sLit "add") size src dst
+pprInstr (ADC size src dst) = pprSizeOpOp (sLit "adc") size src dst
pprInstr (SUB size src dst) = pprSizeOpOp (sLit "sub") size src dst
+pprInstr (SBB size src dst) = pprSizeOpOp (sLit "sbb") size src dst
pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2
pprInstr (ADD_CC size src dst)
diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h
index 6fd0dc0dfc..02cb63210d 100644
--- a/includes/rts/Constants.h
+++ b/includes/rts/Constants.h
@@ -277,6 +277,12 @@
#define TSO_SQUEEZED 128
/*
+ * Enables the AllocationLimitExceeded exception when the thread's
+ * allocation limit goes negative.
+ */
+#define TSO_ALLOC_LIMIT 256
+
+/*
* The number of times we spin in a spin lock before yielding (see
* #3758). To tune this value, use the benchmark in #3758: run the
* server with -N2 and the client both on a dual-core. Also make sure
diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h
index bf6a7f3c5c..ec542701df 100644
--- a/includes/rts/Flags.h
+++ b/includes/rts/Flags.h
@@ -56,6 +56,14 @@ struct GC_FLAGS {
rtsBool doIdleGC;
StgWord heapBase; /* address to ask the OS for memory */
+
+ StgWord allocLimitGrace; /* units: *blocks*
+ * After an AllocationLimitExceeded
+ * exception has been raised, how much
+ * extra space is given to the thread
+ * to handle the exception before we
+ * raise it again.
+ */
};
struct DEBUG_FLAGS {
diff --git a/includes/rts/Threads.h b/includes/rts/Threads.h
index 941f6daf65..fc8ae6e089 100644
--- a/includes/rts/Threads.h
+++ b/includes/rts/Threads.h
@@ -42,8 +42,12 @@ StgRegTable * resumeThread (void *);
//
// Thread operations from Threads.c
//
-int cmp_thread (StgPtr tso1, StgPtr tso2);
-int rts_getThreadId (StgPtr tso);
+int cmp_thread (StgPtr tso1, StgPtr tso2);
+int rts_getThreadId (StgPtr tso);
+HsInt64 rts_getThreadAllocationCounter (StgPtr tso);
+void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i);
+void rts_enableThreadAllocationLimit (StgPtr tso);
+void rts_disableThreadAllocationLimit (StgPtr tso);
#if !defined(mingw32_HOST_OS)
pid_t forkProcess (HsStablePtr *entry);
diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h
index 6dbcec2595..06056fe716 100644
--- a/includes/rts/storage/TSO.h
+++ b/includes/rts/storage/TSO.h
@@ -145,15 +145,18 @@ typedef struct StgTSO_ {
*/
struct StgBlockingQueue_ *bq;
-#ifdef TICKY_TICKY
- /* TICKY-specific stuff would go here. */
-#endif
-#ifdef PROFILING
- StgTSOProfInfo prof;
-#endif
-#ifdef mingw32_HOST_OS
- StgWord32 saved_winerror;
-#endif
+ /*
+ * The allocation limit for this thread, which is updated as the
+ * thread allocates. If the value drops below zero, and
+ * TSO_ALLOC_LIMIT is set in flags, we raise an exception in the
+ * thread, and give the thread a little more space to handle the
+ * exception before we raise the exception again.
+ *
+ * This is an integer, because we might update it in a place where
+ * it isn't convenient to raise the exception, so we want it to
+ * stay negative until we get around to checking it.
+ */
+ StgInt64 alloc_limit; /* in bytes */
/*
* sum of the sizes of all stack chunks (in words), used to decide
@@ -168,6 +171,16 @@ typedef struct StgTSO_ {
*/
StgWord32 tot_stack_size;
+#ifdef TICKY_TICKY
+ /* TICKY-specific stuff would go here. */
+#endif
+#ifdef PROFILING
+ StgTSOProfInfo prof;
+#endif
+#ifdef mingw32_HOST_OS
+ StgWord32 saved_winerror;
+#endif
+
} *StgTSOPtr;
typedef struct StgStack_ {
diff --git a/libraries/base/Control/Exception.hs b/libraries/base/Control/Exception.hs
index fa505750f2..0bcbdca942 100644
--- a/libraries/base/Control/Exception.hs
+++ b/libraries/base/Control/Exception.hs
@@ -48,6 +48,7 @@ module Control.Exception (
NestedAtomically(..),
BlockedIndefinitelyOnMVar(..),
BlockedIndefinitelyOnSTM(..),
+ AllocationLimitExceeded(..),
Deadlock(..),
NoMethodError(..),
PatternMatchFail(..),
diff --git a/libraries/base/Control/Exception/Base.hs b/libraries/base/Control/Exception/Base.hs
index c581d1a5c4..f7779d6f9c 100644
--- a/libraries/base/Control/Exception/Base.hs
+++ b/libraries/base/Control/Exception/Base.hs
@@ -31,6 +31,7 @@ module Control.Exception.Base (
NestedAtomically(..),
BlockedIndefinitelyOnMVar(..),
BlockedIndefinitelyOnSTM(..),
+ AllocationLimitExceeded(..),
Deadlock(..),
NoMethodError(..),
PatternMatchFail(..),
diff --git a/libraries/base/GHC/Conc.hs b/libraries/base/GHC/Conc.hs
index f1708b33d4..68182a11e4 100644
--- a/libraries/base/GHC/Conc.hs
+++ b/libraries/base/GHC/Conc.hs
@@ -59,6 +59,12 @@ module GHC.Conc
, threadWaitWriteSTM
, closeFdWith
+ -- * Allocation counter and limit
+ , setAllocationCounter
+ , getAllocationCounter
+ , enableAllocationLimit
+ , disableAllocationLimit
+
-- * TVars
, STM(..)
, atomically
diff --git a/libraries/base/GHC/Conc/Sync.hs b/libraries/base/GHC/Conc/Sync.hs
index 6d2e772b5a..777fb71e20 100644
--- a/libraries/base/GHC/Conc/Sync.hs
+++ b/libraries/base/GHC/Conc/Sync.hs
@@ -60,6 +60,12 @@ module GHC.Conc.Sync
, threadStatus
, threadCapability
+ -- * Allocation counter and quota
+ , setAllocationCounter
+ , getAllocationCounter
+ , enableAllocationLimit
+ , disableAllocationLimit
+
-- * TVars
, STM(..)
, atomically
@@ -171,16 +177,92 @@ instance Eq ThreadId where
instance Ord ThreadId where
compare = cmpThread
+-- | Every thread has an allocation counter that tracks how much
+-- memory has been allocated by the thread. The counter is
+-- initialized to zero, and 'setAllocationCounter' sets the current
+-- value. The allocation counter counts *down*, so in the absence of
+-- a call to 'setAllocationCounter' its value is the negation of the
+-- number of bytes of memory allocated by the thread.
+--
+-- There are two things that you can do with this counter:
+--
+-- * Use it as a simple profiling mechanism, with
+-- 'getAllocationCounter'.
+--
+-- * Use it as a resource limit. See 'enableAllocationLimit'.
+--
+-- Allocation accounting is accurate only to about 4Kbytes.
+--
+setAllocationCounter :: Int64 -> IO ()
+setAllocationCounter i = do
+ ThreadId t <- myThreadId
+ rts_setThreadAllocationCounter t i
+
+-- | Return the current value of the allocation counter for the
+-- current thread.
+getAllocationCounter :: IO Int64
+getAllocationCounter = do
+ ThreadId t <- myThreadId
+ rts_getThreadAllocationCounter t
+
+-- | Enables the allocation counter to be treated as a limit for the
+-- current thread. When the allocation limit is enabled, if the
+-- allocation counter counts down below zero, the thread will be sent
+-- the 'AllocationLimitExceeded' asynchronous exception. When this
+-- happens, the counter is reinitialised (by default
+-- to 100K, but tunable with the @+RTS -xq@ option) so that it can handle
+-- the exception and perform any necessary clean up. If it exhausts
+-- this additional allowance, another 'AllocationLimitExceeded' exception
+-- is sent, and so forth.
+--
+-- Note that memory allocation is unrelated to /live memory/, also
+-- known as /heap residency/. A thread can allocate a large amount of
+-- memory and retain anything between none and all of it. It is
+-- better to think of the allocation limit as a limit on
+-- /CPU time/, rather than a limit on memory.
+--
+-- Compared to using timeouts, allocation limits don't count time
+-- spent blocked or in foreign calls.
+--
+enableAllocationLimit :: IO ()
+enableAllocationLimit = do
+ ThreadId t <- myThreadId
+ rts_enableThreadAllocationLimit t
+
+-- | Disable allocation limit processing for the current thread.
+disableAllocationLimit :: IO ()
+disableAllocationLimit = do
+ ThreadId t <- myThreadId
+ rts_disableThreadAllocationLimit t
+
+-- We cannot do these operations safely on another thread, because on
+-- a 32-bit machine we cannot do atomic operations on a 64-bit value.
+-- Therefore, we only expose APIs that allow getting and setting the
+-- limit of the current thread.
+foreign import ccall unsafe "rts_setThreadAllocationCounter"
+ rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO ()
+
+foreign import ccall unsafe "rts_getThreadAllocationCounter"
+ rts_getThreadAllocationCounter :: ThreadId# -> IO Int64
+
+foreign import ccall unsafe "rts_enableThreadAllocationLimit"
+ rts_enableThreadAllocationLimit :: ThreadId# -> IO ()
+
+foreign import ccall unsafe "rts_disableThreadAllocationLimit"
+ rts_disableThreadAllocationLimit :: ThreadId# -> IO ()
+
{- |
-Sparks off a new thread to run the 'IO' computation passed as the
+Creates a new thread to run the 'IO' computation passed as the
first argument, and returns the 'ThreadId' of the newly created
thread.
-The new thread will be a lightweight thread; if you want to use a foreign
-library that uses thread-local storage, use 'Control.Concurrent.forkOS' instead.
+The new thread will be a lightweight, /unbound/ thread. Foreign calls
+made by this thread are not guaranteed to be made by any particular OS
+thread; if you need foreign calls to be made by a particular OS
+thread, then use 'Control.Concurrent.forkOS' instead.
-GHC note: the new thread inherits the /masked/ state of the parent
-(see 'Control.Exception.mask').
+The new thread inherits the /masked/ state of the parent (see
+'Control.Exception.mask').
The newly created thread has an exception handler that discards the
exceptions 'BlockedIndefinitelyOnMVar', 'BlockedIndefinitelyOnSTM', and
diff --git a/libraries/base/GHC/IO/Exception.hs b/libraries/base/GHC/IO/Exception.hs
index 0f351f0382..d0a21b2744 100644
--- a/libraries/base/GHC/IO/Exception.hs
+++ b/libraries/base/GHC/IO/Exception.hs
@@ -22,6 +22,7 @@ module GHC.IO.Exception (
BlockedIndefinitelyOnMVar(..), blockedIndefinitelyOnMVar,
BlockedIndefinitelyOnSTM(..), blockedIndefinitelyOnSTM,
Deadlock(..),
+ AllocationLimitExceeded(..), allocationLimitExceeded,
AssertionFailed(..),
SomeAsyncException(..),
@@ -98,6 +99,23 @@ instance Show Deadlock where
-----
+-- |This thread has exceeded its allocation limit. See
+-- 'GHC.Conc.setAllocationCounter' and
+-- 'GHC.Conc.enableAllocationLimit'.
+data AllocationLimitExceeded = AllocationLimitExceeded
+ deriving Typeable
+
+instance Exception AllocationLimitExceeded
+
+instance Show AllocationLimitExceeded where
+ showsPrec _ AllocationLimitExceeded =
+ showString "allocation limit exceeded"
+
+allocationLimitExceeded :: SomeException -- for the RTS
+allocationLimitExceeded = toException AllocationLimitExceeded
+
+-----
+
-- |'assert' was applied to 'False'.
data AssertionFailed = AssertionFailed String
deriving Typeable
@@ -174,7 +192,8 @@ data ArrayException
instance Exception ArrayException
-stackOverflow, heapOverflow :: SomeException -- for the RTS
+-- for the RTS
+stackOverflow, heapOverflow :: SomeException
stackOverflow = toException StackOverflow
heapOverflow = toException HeapOverflow
diff --git a/rts/Capability.c b/rts/Capability.c
index 289eeb2c5b..21f63f39d9 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -297,6 +297,10 @@ initCapability( Capability *cap, nat i )
cap->r.rCCCS = NULL;
#endif
+ // cap->r.rCurrentTSO is charged for calls to allocate(), so we
+ // don't want it set when not running a Haskell thread.
+ cap->r.rCurrentTSO = NULL;
+
traceCapCreate(cap);
traceCapsetAssignCap(CAPSET_OSPROCESS_DEFAULT, i);
traceCapsetAssignCap(CAPSET_CLOCKDOMAIN_DEFAULT, i);
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index 0659fed89f..a1fb5d446d 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -100,7 +100,9 @@ stg_gc_noregs
CurrentNursery = bdescr_link(CurrentNursery);
OPEN_NURSERY();
if (Capability_context_switch(MyCapability()) != 0 :: CInt ||
- Capability_interrupt(MyCapability()) != 0 :: CInt) {
+ Capability_interrupt(MyCapability()) != 0 :: CInt ||
+ (StgTSO_alloc_limit(CurrentTSO) `lt` (0::I64) &&
+ (TO_W_(StgTSO_flags(CurrentTSO)) & TSO_ALLOC_LIMIT) != 0)) {
ret = ThreadYielding;
goto sched;
} else {
diff --git a/rts/Linker.c b/rts/Linker.c
index 7d029c62ac..2c74a0dd35 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -1264,6 +1264,10 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(rtsSupportsBoundThreads) \
SymI_HasProto(rts_isProfiled) \
SymI_HasProto(rts_isDynamic) \
+ SymI_HasProto(rts_getThreadAllocationCounter) \
+ SymI_HasProto(rts_setThreadAllocationCounter) \
+ SymI_HasProto(rts_enableThreadAllocationLimit) \
+ SymI_HasProto(rts_disableThreadAllocationLimit) \
SymI_HasProto(setProgArgv) \
SymI_HasProto(startupHaskell) \
SymI_HasProto(shutdownHaskell) \
diff --git a/rts/Prelude.h b/rts/Prelude.h
index 0c54148ba2..614c255af5 100644
--- a/rts/Prelude.h
+++ b/rts/Prelude.h
@@ -37,6 +37,7 @@ extern StgClosure ZCMain_main_closure;
PRELUDE_CLOSURE(base_GHCziIOziException_stackOverflow_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_heapOverflow_closure);
+PRELUDE_CLOSURE(base_GHCziIOziException_allocationLimitExceeded_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnThrowTo_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnMVar_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnSTM_closure);
@@ -101,6 +102,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info);
#define stackOverflow_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_stackOverflow_closure)
#define heapOverflow_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_heapOverflow_closure)
+#define allocationLimitExceeded_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_allocationLimitExceeded_closure)
#define blockedIndefinitelyOnMVar_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_blockedIndefinitelyOnMVar_closure)
#define blockedIndefinitelyOnSTM_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_blockedIndefinitelyOnSTM_closure)
#define nonTermination_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure)
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 10585c89fa..3b206ffa7e 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -89,6 +89,60 @@ suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
}
/* -----------------------------------------------------------------------------
+ throwToSelf
+
+ Useful for throwing an async exception in a thread from the
+ runtime. It handles unlocking the throwto message returned by
+ throwTo().
+
+ Note [Throw to self when masked]
+
+ When a StackOverflow occurs when the thread is masked, we want to
+ defer the exception to when the thread becomes unmasked/hits an
+ interruptible point. We already have a mechanism for doing this,
+ the blocked_exceptions list, but the use here is a bit unusual,
+ because an exception is normally only added to this list upon
+ an asynchronous 'throwTo' call (with all of the relevant
+ multithreaded nonsense). Morally, a stack overflow should be an
+ asynchronous exception sent by a thread to itself, and it should
+ have the same semantics. But there are a few key differences:
+
+ - If you actually tried to send an asynchronous exception to
+ yourself using throwTo, the exception would actually immediately
+ be delivered. This is because throwTo itself is considered an
+ interruptible point, so the exception is always deliverable. Thus,
+ ordinarily, we never end up with a message to onesself in the
+ blocked_exceptions queue.
+
+ - In the case of a StackOverflow, we don't actually care about the
+ wakeup semantics; when an exception is delivered, the thread that
+ originally threw the exception should be woken up, since throwTo
+ blocks until the exception is successfully thrown. Fortunately,
+ it is harmless to wakeup a thread that doesn't actually need waking
+ up, e.g. ourselves.
+
+ - No synchronization is necessary, because we own the TSO and the
+ capability. You can observe this by tracing through the execution
+ of throwTo. We skip synchronizing the message and inter-capability
+ communication.
+
+ We think this doesn't break any invariants, but do be careful!
+ -------------------------------------------------------------------------- */
+
+void
+throwToSelf (Capability *cap, StgTSO *tso, StgClosure *exception)
+{
+ MessageThrowTo *m;
+
+ m = throwTo(cap, tso, tso, exception);
+
+ if (m != NULL) {
+ // throwTo leaves it locked
+ unlockClosure((StgClosure*)m, &stg_MSG_THROWTO_info);
+ }
+}
+
+/* -----------------------------------------------------------------------------
throwTo
This function may be used to throw an exception from one thread to
diff --git a/rts/RaiseAsync.h b/rts/RaiseAsync.h
index e2763d0cb8..6bfed8d6ca 100644
--- a/rts/RaiseAsync.h
+++ b/rts/RaiseAsync.h
@@ -28,6 +28,10 @@ void throwToSingleThreaded_ (Capability *cap,
StgClosure *exception,
rtsBool stop_at_atomically);
+void throwToSelf (Capability *cap,
+ StgTSO *tso,
+ StgClosure *exception);
+
void suspendComputation (Capability *cap,
StgTSO *tso,
StgUpdateFrame *stop_here);
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 44c05cec3b..82e90e5b0e 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -137,6 +137,7 @@ void initRtsFlagsDefaults(void)
#else
RtsFlags.GcFlags.heapBase = 0; /* means don't care */
#endif
+ RtsFlags.GcFlags.allocLimitGrace = (100*1024) / BLOCK_SIZE;
#ifdef DEBUG
RtsFlags.DebugFlags.scheduler = rtsFalse;
@@ -403,6 +404,8 @@ usage_text[] = {
" +PAPI_EVENT - collect papi preset event PAPI_EVENT",
" #NATIVE_EVENT - collect native event NATIVE_EVENT (in hex)",
#endif
+" -xq The allocation limit given to a thread after it receives",
+" an AllocationLimitExceeded exception. (default: 100k)",
"",
"RTS options may also be specified using the GHCRTS environment variable.",
"",
@@ -1361,6 +1364,13 @@ error = rtsTrue;
/* The option prefix '-xx' is reserved for future extension. KSW 1999-11. */
+ case 'q':
+ OPTION_UNSAFE;
+ RtsFlags.GcFlags.allocLimitGrace
+ = decodeSize(rts_argv[arg], 3, BLOCK_SIZE, HS_INT_MAX)
+ / BLOCK_SIZE;
+ break;
+
default:
OPTION_SAFE;
errorBelch("unknown RTS option: %s",rts_argv[arg]);
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index 32bed5af8f..b8201e1651 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -214,6 +214,7 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
getStablePtr((StgPtr)blockedIndefinitelyOnMVar_closure);
getStablePtr((StgPtr)nonTermination_closure);
getStablePtr((StgPtr)blockedIndefinitelyOnSTM_closure);
+ getStablePtr((StgPtr)allocationLimitExceeded_closure);
getStablePtr((StgPtr)nestedAtomically_closure);
getStablePtr((StgPtr)runSparks_closure);
diff --git a/rts/Schedule.c b/rts/Schedule.c
index b11270832d..c2260f0282 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -481,6 +481,10 @@ run_thread:
// happened. So find the new location:
t = cap->r.rCurrentTSO;
+ // cap->r.rCurrentTSO is charged for calls to allocate(), so we
+ // don't want it set when not running a Haskell thread.
+ cap->r.rCurrentTSO = NULL;
+
// And save the current errno in this thread.
// XXX: possibly bogus for SMP because this thread might already
// be running again, see code below.
@@ -1078,6 +1082,21 @@ schedulePostRunThread (Capability *cap, StgTSO *t)
}
}
+ //
+ // If the current thread's allocation limit has run out, send it
+ // the AllocationLimitExceeded exception.
+
+ if (t->alloc_limit < 0 && (t->flags & TSO_ALLOC_LIMIT)) {
+ // Use a throwToSelf rather than a throwToSingleThreaded, because
+ // it correctly handles the case where the thread is currently
+ // inside mask. Also the thread might be blocked (e.g. on an
+ // MVar), and throwToSingleThreaded doesn't unblock it
+ // correctly in that case.
+ throwToSelf(cap, t, allocationLimitExceeded_closure);
+ t->alloc_limit = (StgInt64)RtsFlags.GcFlags.allocLimitGrace
+ * BLOCK_SIZE;
+ }
+
/* some statistics gathering in the parallel case */
}
diff --git a/rts/Threads.c b/rts/Threads.c
index 76e844a3f6..90efd9ce4e 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -110,6 +110,8 @@ createThread(Capability *cap, W_ size)
tso->stackobj = stack;
tso->tot_stack_size = stack->stack_size;
+ tso->alloc_limit = 0;
+
tso->trec = NO_TREC;
#ifdef PROFILING
@@ -164,6 +166,31 @@ rts_getThreadId(StgPtr tso)
return ((StgTSO *)tso)->id;
}
+/* ---------------------------------------------------------------------------
+ * Getting & setting the thread allocation limit
+ * ------------------------------------------------------------------------ */
+HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
+{
+ // NB. doesn't take into account allocation in the current nursery
+ // block, so it might be off by up to 4k.
+ return ((StgTSO *)tso)->alloc_limit;
+}
+
+void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
+{
+ ((StgTSO *)tso)->alloc_limit = i;
+}
+
+void rts_enableThreadAllocationLimit(StgPtr tso)
+{
+ ((StgTSO *)tso)->flags |= TSO_ALLOC_LIMIT;
+}
+
+void rts_disableThreadAllocationLimit(StgPtr tso)
+{
+ ((StgTSO *)tso)->flags &= ~TSO_ALLOC_LIMIT;
+}
+
/* -----------------------------------------------------------------------------
Remove a thread from a queue.
Fails fatally if the TSO is not on the queue.
@@ -524,21 +551,8 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
stg_min(tso->stackobj->stack + tso->stackobj->stack_size,
tso->stackobj->sp+64)));
- if (tso->flags & TSO_BLOCKEX) {
- // NB. StackOverflow exceptions must be deferred if the thread is
- // inside Control.Exception.mask. See bug #767 and bug #8303.
- // This implementation is a minor hack, see Note [Throw to self when masked]
- MessageThrowTo *msg = (MessageThrowTo*)allocate(cap, sizeofW(MessageThrowTo));
- SET_HDR(msg, &stg_MSG_THROWTO_info, CCS_SYSTEM);
- msg->source = tso;
- msg->target = tso;
- msg->exception = (StgClosure *)stackOverflow_closure;
- blockedThrowTo(cap, tso, msg);
- } else {
- // Send this thread the StackOverflow exception
- throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure);
- return;
- }
+ // Note [Throw to self when masked], also #767 and #8303.
+ throwToSelf(cap, tso, (StgClosure *)stackOverflow_closure);
}
@@ -669,39 +683,6 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
// IF_DEBUG(scheduler,printTSO(new_tso));
}
-/* Note [Throw to self when masked]
- *
- * When a StackOverflow occurs when the thread is masked, we want to
- * defer the exception to when the thread becomes unmasked/hits an
- * interruptible point. We already have a mechanism for doing this,
- * the blocked_exceptions list, but the use here is a bit unusual,
- * because an exception is normally only added to this list upon
- * an asynchronous 'throwTo' call (with all of the relevant
- * multithreaded nonsense). Morally, a stack overflow should be an
- * asynchronous exception sent by a thread to itself, and it should
- * have the same semantics. But there are a few key differences:
- *
- * - If you actually tried to send an asynchronous exception to
- * yourself using throwTo, the exception would actually immediately
- * be delivered. This is because throwTo itself is considered an
- * interruptible point, so the exception is always deliverable. Thus,
- * ordinarily, we never end up with a message to onesself in the
- * blocked_exceptions queue.
- *
- * - In the case of a StackOverflow, we don't actually care about the
- * wakeup semantics; when an exception is delivered, the thread that
- * originally threw the exception should be woken up, since throwTo
- * blocks until the exception is successfully thrown. Fortunately,
- * it is harmless to wakeup a thread that doesn't actually need waking
- * up, e.g. ourselves.
- *
- * - No synchronization is necessary, because we own the TSO and the
- * capability. You can observe this by tracing through the execution
- * of throwTo. We skip synchronizing the message and inter-capability
- * communication.
- *
- * We think this doesn't break any invariants, but do be careful!
- */
/* ---------------------------------------------------------------------------
diff --git a/rts/package.conf.in b/rts/package.conf.in
index 82d2870cde..ce44a09651 100644
--- a/rts/package.conf.in
+++ b/rts/package.conf.in
@@ -99,6 +99,7 @@ ld-options:
, "-Wl,-u,_base_ControlziExceptionziBase_nonTermination_closure"
, "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnMVar_closure"
, "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnSTM_closure"
+ , "-Wl,-u,_base_GHCziIOziException_allocationLimitExceeded_closure"
, "-Wl,-u,_base_ControlziExceptionziBase_nestedAtomically_closure"
, "-Wl,-u,_base_GHCziEventziThread_blockedOnBadFD_closure"
, "-Wl,-u,_base_GHCziWeak_runFinalizzerBatch_closure"
@@ -140,6 +141,7 @@ ld-options:
, "-Wl,-u,base_ControlziExceptionziBase_nonTermination_closure"
, "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnMVar_closure"
, "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnSTM_closure"
+ , "-Wl,-u,base_GHCziIOziException_allocationLimitExceeded_closure"
, "-Wl,-u,base_ControlziExceptionziBase_nestedAtomically_closure"
, "-Wl,-u,base_GHCziEventziThread_blockedOnBadFD_closure"
, "-Wl,-u,base_GHCziWeak_runFinalizzerBatch_closure"
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 379d9da769..afb171b568 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -684,7 +684,10 @@ StgPtr allocate (Capability *cap, W_ n)
TICK_ALLOC_HEAP_NOCTR(WDS(n));
CCS_ALLOC(cap->r.rCCCS,n);
-
+ if (cap->r.rCurrentTSO != NULL) {
+ cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+ }
+
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
// The largest number of words such that
// the computation of req_blocks will not overflow.
@@ -829,6 +832,9 @@ allocatePinned (Capability *cap, W_ n)
TICK_ALLOC_HEAP_NOCTR(WDS(n));
CCS_ALLOC(cap->r.rCCCS,n);
+ if (cap->r.rCurrentTSO != NULL) {
+ cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+ }
bd = cap->pinned_object_block;
diff --git a/rts/win32/libHSbase.def b/rts/win32/libHSbase.def
index 8140528c70..2091e85c9c 100644
--- a/rts/win32/libHSbase.def
+++ b/rts/win32/libHSbase.def
@@ -32,11 +32,12 @@ EXPORTS
base_GHCziTopHandler_flushStdHandles_closure
- base_GHCziWeak_runFinalizzerBatch_closure
+ base_GHCziWeak_runFinalizzerBatch_closure
base_GHCziPack_unpackCString_closure
base_GHCziIOziException_blockedIndefinitelyOnMVar_closure
base_GHCziIOziException_blockedIndefinitelyOnSTM_closure
- base_GHCziIOziException_stackOverflow_closure
+ base_GHCziIOziException_allocationLimitExceeded_closure
+ base_GHCziIOziException_stackOverflow_closure
base_ControlziExceptionziBase_nonTermination_closure
base_ControlziExceptionziBase_nestedAtomically_closure
diff --git a/testsuite/tests/concurrent/should_run/all.T b/testsuite/tests/concurrent/should_run/all.T
index 166c232766..b77d9aca87 100644
--- a/testsuite/tests/concurrent/should_run/all.T
+++ b/testsuite/tests/concurrent/should_run/all.T
@@ -88,6 +88,12 @@ test('threadstatus-9333', [omit_ways(['threaded2'])], compile_and_run, [''])
test('T9379', normal, compile_and_run, [''])
+test('allocLimit1', exit_code(1), compile_and_run, [''])
+test('allocLimit2', normal, compile_and_run, [''])
+test('allocLimit3', exit_code(1), compile_and_run, [''])
+test('allocLimit4', [ extra_run_opts('+RTS -xq300k -RTS') ],
+ compile_and_run, [''])
+
# -----------------------------------------------------------------------------
# These tests we only do for a full run
@@ -252,3 +258,4 @@ test('setnumcapabilities001',
# omit ghci, which can't handle unboxed tuples:
test('compareAndSwap', [omit_ways(['ghci','hpc']), reqlib('primitive')], compile_and_run, [''])
+
diff --git a/testsuite/tests/concurrent/should_run/allocLimit1.hs b/testsuite/tests/concurrent/should_run/allocLimit1.hs
new file mode 100644
index 0000000000..b1c8fa6035
--- /dev/null
+++ b/testsuite/tests/concurrent/should_run/allocLimit1.hs
@@ -0,0 +1,9 @@
+module Main (main) where
+
+import GHC.Conc
+
+main = do
+ setAllocationCounter (10*1024)
+ enableAllocationLimit
+ print (length [1..])
+
diff --git a/testsuite/tests/concurrent/should_run/allocLimit1.stderr b/testsuite/tests/concurrent/should_run/allocLimit1.stderr
new file mode 100644
index 0000000000..2133e14ce1
--- /dev/null
+++ b/testsuite/tests/concurrent/should_run/allocLimit1.stderr
@@ -0,0 +1 @@
+allocLimit1: allocation limit exceeded
diff --git a/testsuite/tests/concurrent/should_run/allocLimit2.hs b/testsuite/tests/concurrent/should_run/allocLimit2.hs
new file mode 100644
index 0000000000..4fd117b615
--- /dev/null
+++ b/testsuite/tests/concurrent/should_run/allocLimit2.hs
@@ -0,0 +1,17 @@
+module Main (main) where
+
+import GHC.Conc
+import Control.Concurrent
+import Control.Exception
+import System.Exit
+
+main = do
+ m <- newEmptyMVar
+ let action = do setAllocationCounter (10*1024)
+ enableAllocationLimit
+ print (length [1..])
+ forkFinally action (putMVar m)
+ r <- takeMVar m
+ case r of
+ Left e | Just AllocationLimitExceeded <- fromException e -> return ()
+ _ -> print r >> exitFailure
diff --git a/testsuite/tests/concurrent/should_run/allocLimit3.hs b/testsuite/tests/concurrent/should_run/allocLimit3.hs
new file mode 100644
index 0000000000..28881dc016
--- /dev/null
+++ b/testsuite/tests/concurrent/should_run/allocLimit3.hs
@@ -0,0 +1,15 @@
+module Main (main) where
+
+import GHC.Conc
+import Control.Concurrent
+import Control.Exception
+
+main = do
+ setAllocationCounter (10*1024)
+ enableAllocationLimit
+
+ -- alloc limit overflow while masked: should successfully print the
+ -- result, and then immediately raise the exception
+ r <- mask_ $ try $ print (length [1..100000])
+
+ print (r :: Either SomeException ())
diff --git a/testsuite/tests/concurrent/should_run/allocLimit3.stderr b/testsuite/tests/concurrent/should_run/allocLimit3.stderr
new file mode 100644
index 0000000000..27ae0a9480
--- /dev/null
+++ b/testsuite/tests/concurrent/should_run/allocLimit3.stderr
@@ -0,0 +1 @@
+allocLimit3: allocation limit exceeded
diff --git a/testsuite/tests/concurrent/should_run/allocLimit3.stdout b/testsuite/tests/concurrent/should_run/allocLimit3.stdout
new file mode 100644
index 0000000000..f7393e847d
--- /dev/null
+++ b/testsuite/tests/concurrent/should_run/allocLimit3.stdout
@@ -0,0 +1 @@
+100000
diff --git a/testsuite/tests/concurrent/should_run/allocLimit4.hs b/testsuite/tests/concurrent/should_run/allocLimit4.hs
new file mode 100644
index 0000000000..b589ffa4af
--- /dev/null
+++ b/testsuite/tests/concurrent/should_run/allocLimit4.hs
@@ -0,0 +1,31 @@
+module Main (main) where
+
+import GHC.Conc
+import Control.Concurrent
+import Control.Exception
+import System.Exit
+import Control.Monad
+
+-- check that +RTS -xq is doing the right thing: the test requires
+-- +RTS -xq300k
+
+main = do
+ m <- newEmptyMVar
+ let action = do
+ e <- try $ do
+ setAllocationCounter (10*1024)
+ enableAllocationLimit
+ print (length [1..])
+ case e of
+ Left AllocationLimitExceeded{} -> do
+ c <- getAllocationCounter
+ when (c < 250*1024 || c > 350*1024) $ fail "wrong limit grace"
+ print (length [2..])
+ Right _ ->
+ fail "didn't catch AllocationLimitExceeded"
+
+ forkFinally action (putMVar m)
+ r <- takeMVar m
+ case r of
+ Left e | Just AllocationLimitExceeded <- fromException e -> return ()
+ _ -> print r >> exitFailure
diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs
index 72605d755e..486f497572 100644
--- a/utils/deriveConstants/DeriveConstants.hs
+++ b/utils/deriveConstants/DeriveConstants.hs
@@ -413,6 +413,7 @@ wanteds = concat
,closureField C "StgTSO" "flags"
,closureField C "StgTSO" "dirty"
,closureField C "StgTSO" "bq"
+ ,closureField Both "StgTSO" "alloc_limit"
,closureField_ Both "StgTSO_cccs" "StgTSO" "prof.cccs"
,closureField Both "StgTSO" "stackobj"