summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2014-04-28 16:55:47 +0100
committerSimon Marlow <marlowsd@gmail.com>2014-05-02 14:49:22 +0100
commitb0534f78a73f972e279eed4447a5687bd6a8308e (patch)
tree02d52756620bf27b9df9db45c57dacf55f190842 /rts
parent34db5ccf52ec2a1b5e953c282d0c52a7fc82c02a (diff)
downloadhaskell-b0534f78a73f972e279eed4447a5687bd6a8308e.tar.gz
Per-thread allocation counters and limits
This tracks the amount of memory allocation by each thread in a counter stored in the TSO. Optionally, when the counter drops below zero (it counts down), the thread can be sent an asynchronous exception: AllocationLimitExceeded. When this happens, given a small additional limit so that it can handle the exception. See documentation in GHC.Conc for more details. Allocation limits are similar to timeouts, but - timeouts use real time, not CPU time. Allocation limits do not count anything while the thread is blocked or in foreign code. - timeouts don't re-trigger if the thread catches the exception, allocation limits do. - timeouts can catch non-allocating loops, if you use -fno-omit-yields. This doesn't work for allocation limits. I couldn't measure any impact on benchmarks with these changes, even for nofib/smp.
Diffstat (limited to 'rts')
-rw-r--r--rts/HeapStackCheck.cmm4
-rw-r--r--rts/Linker.c4
-rw-r--r--rts/Prelude.h2
-rw-r--r--rts/RaiseAsync.c54
-rw-r--r--rts/RaiseAsync.h6
-rw-r--r--rts/RtsFlags.c10
-rw-r--r--rts/RtsStartup.c1
-rw-r--r--rts/Schedule.c19
-rw-r--r--rts/Threads.c77
-rw-r--r--rts/package.conf.in2
-rw-r--r--rts/sm/Storage.c8
11 files changed, 136 insertions, 51 deletions
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index 12bcfb26df..280820334a 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -100,7 +100,9 @@ stg_gc_noregs
CurrentNursery = bdescr_link(CurrentNursery);
OPEN_NURSERY();
if (Capability_context_switch(MyCapability()) != 0 :: CInt ||
- Capability_interrupt(MyCapability()) != 0 :: CInt) {
+ Capability_interrupt(MyCapability()) != 0 :: CInt ||
+ (StgTSO_alloc_limit(CurrentTSO) `lt` 0 &&
+ (TO_W_(StgTSO_flags(CurrentTSO)) & TSO_ALLOC_LIMIT) != 0)) {
ret = ThreadYielding;
goto sched;
} else {
diff --git a/rts/Linker.c b/rts/Linker.c
index ea7c1c67b9..6ddf4bef9d 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -1230,6 +1230,10 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(rts_getFunPtr) \
SymI_HasProto(rts_getStablePtr) \
SymI_HasProto(rts_getThreadId) \
+ SymI_HasProto(rts_getThreadAllocationCounter) \
+ SymI_HasProto(rts_setThreadAllocationCounter) \
+ SymI_HasProto(rts_enableThreadAllocationLimit) \
+ SymI_HasProto(rts_disableThreadAllocationLimit) \
SymI_HasProto(rts_getWord) \
SymI_HasProto(rts_getWord8) \
SymI_HasProto(rts_getWord16) \
diff --git a/rts/Prelude.h b/rts/Prelude.h
index 89e80a0a3d..ca08e2c84e 100644
--- a/rts/Prelude.h
+++ b/rts/Prelude.h
@@ -37,6 +37,7 @@ extern StgClosure ZCMain_main_closure;
PRELUDE_CLOSURE(base_GHCziIOziException_stackOverflow_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_heapOverflow_closure);
+PRELUDE_CLOSURE(base_GHCziIOziException_allocationLimitExceeded_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnThrowTo_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnMVar_closure);
PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnSTM_closure);
@@ -100,6 +101,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info);
#define stackOverflow_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_stackOverflow_closure)
#define heapOverflow_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_heapOverflow_closure)
+#define allocationLimitExceeded_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_allocationLimitExceeded_closure)
#define blockedIndefinitelyOnMVar_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_blockedIndefinitelyOnMVar_closure)
#define blockedIndefinitelyOnSTM_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_blockedIndefinitelyOnSTM_closure)
#define nonTermination_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure)
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index a5440e40ad..847076bc6f 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -89,6 +89,60 @@ suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
}
/* -----------------------------------------------------------------------------
+ throwToSelf
+
+ Useful for throwing an async exception in a thread from the
+ runtime. It handles unlocking the throwto message returned by
+ throwTo().
+
+ Note [Throw to self when masked]
+
+ When a StackOverflow occurs when the thread is masked, we want to
+ defer the exception to when the thread becomes unmasked/hits an
+ interruptible point. We already have a mechanism for doing this,
+ the blocked_exceptions list, but the use here is a bit unusual,
+ because an exception is normally only added to this list upon
+ an asynchronous 'throwTo' call (with all of the relevant
+ multithreaded nonsense). Morally, a stack overflow should be an
+ asynchronous exception sent by a thread to itself, and it should
+ have the same semantics. But there are a few key differences:
+
+ - If you actually tried to send an asynchronous exception to
+ yourself using throwTo, the exception would actually immediately
+ be delivered. This is because throwTo itself is considered an
+ interruptible point, so the exception is always deliverable. Thus,
+ ordinarily, we never end up with a message to onesself in the
+ blocked_exceptions queue.
+
+ - In the case of a StackOverflow, we don't actually care about the
+ wakeup semantics; when an exception is delivered, the thread that
+ originally threw the exception should be woken up, since throwTo
+ blocks until the exception is successfully thrown. Fortunately,
+ it is harmless to wakeup a thread that doesn't actually need waking
+ up, e.g. ourselves.
+
+ - No synchronization is necessary, because we own the TSO and the
+ capability. You can observe this by tracing through the execution
+ of throwTo. We skip synchronizing the message and inter-capability
+ communication.
+
+ We think this doesn't break any invariants, but do be careful!
+ -------------------------------------------------------------------------- */
+
+void
+throwToSelf (Capability *cap, StgTSO *tso, StgClosure *exception)
+{
+ MessageThrowTo *m;
+
+ m = throwTo(cap, tso, tso, exception);
+
+ if (m != NULL) {
+ // throwTo leaves it locked
+ unlockClosure((StgClosure*)m, &stg_MSG_THROWTO_info);
+ }
+}
+
+/* -----------------------------------------------------------------------------
throwTo
This function may be used to throw an exception from one thread to
diff --git a/rts/RaiseAsync.h b/rts/RaiseAsync.h
index 1f61b8c72d..65ca4f5103 100644
--- a/rts/RaiseAsync.h
+++ b/rts/RaiseAsync.h
@@ -28,7 +28,11 @@ void throwToSingleThreaded_ (Capability *cap,
StgClosure *exception,
rtsBool stop_at_atomically);
-void suspendComputation (Capability *cap,
+void throwToSelf (Capability *cap,
+ StgTSO *tso,
+ StgClosure *exception);
+
+void suspendComputation (Capability *cap,
StgTSO *tso,
StgUpdateFrame *stop_here);
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index af1b2049f6..fb1e2ec07b 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -137,6 +137,7 @@ void initRtsFlagsDefaults(void)
#else
RtsFlags.GcFlags.heapBase = 0; /* means don't care */
#endif
+ RtsFlags.GcFlags.allocLimitGrace = (100*1024) / BLOCK_SIZE;
#ifdef DEBUG
RtsFlags.DebugFlags.scheduler = rtsFalse;
@@ -402,6 +403,8 @@ usage_text[] = {
" +PAPI_EVENT - collect papi preset event PAPI_EVENT",
" #NATIVE_EVENT - collect native event NATIVE_EVENT (in hex)",
#endif
+" -xq The allocation limit given to a thread after it receives",
+" an AllocationLimitExceeded exception. (default: 100k)",
"",
"RTS options may also be specified using the GHCRTS environment variable.",
"",
@@ -1360,6 +1363,13 @@ error = rtsTrue;
/* The option prefix '-xx' is reserved for future extension. KSW 1999-11. */
+ case 'q':
+ OPTION_UNSAFE;
+ RtsFlags.GcFlags.allocLimitGrace
+ = decodeSize(rts_argv[arg], 3, BLOCK_SIZE, HS_INT_MAX)
+ / BLOCK_SIZE;
+ break;
+
default:
OPTION_SAFE;
errorBelch("unknown RTS option: %s",rts_argv[arg]);
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index aa7306f88a..640811ff17 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -208,6 +208,7 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
getStablePtr((StgPtr)blockedIndefinitelyOnMVar_closure);
getStablePtr((StgPtr)nonTermination_closure);
getStablePtr((StgPtr)blockedIndefinitelyOnSTM_closure);
+ getStablePtr((StgPtr)allocationLimitExceeded_closure);
getStablePtr((StgPtr)nestedAtomically_closure);
getStablePtr((StgPtr)runSparks_closure);
diff --git a/rts/Schedule.c b/rts/Schedule.c
index adf2b5cb39..b1b489a6d1 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -481,6 +481,10 @@ run_thread:
// happened. So find the new location:
t = cap->r.rCurrentTSO;
+ // cap->r.rCurrentTSO is charged for calls to allocate(), so we
+ // don't want it set during scheduler operations.
+ cap->r.rCurrentTSO = NULL;
+
// And save the current errno in this thread.
// XXX: possibly bogus for SMP because this thread might already
// be running again, see code below.
@@ -1078,6 +1082,21 @@ schedulePostRunThread (Capability *cap, StgTSO *t)
}
}
+ //
+ // If the current thread's allocation limit has run out, send it
+ // the AllocationLimitExceeded exception.
+
+ if (t->alloc_limit < 0 && (t->flags & TSO_ALLOC_LIMIT)) {
+ // Use a throwToSelf rather than a throwToSingleThreaded, because
+ // it correctly handles the case where the thread is currently
+ // inside mask. Also the thread might be blocked (e.g. on an
+ // MVar), and throwToSingleThreaded doesn't unblock it
+ // correctly in that case.
+ throwToSelf(cap, t, allocationLimitExceeded_closure);
+ t->alloc_limit = (StgInt64)RtsFlags.GcFlags.allocLimitGrace
+ * BLOCK_SIZE;
+ }
+
/* some statistics gathering in the parallel case */
}
diff --git a/rts/Threads.c b/rts/Threads.c
index af4353fc49..b82295284b 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -110,6 +110,8 @@ createThread(Capability *cap, W_ size)
tso->stackobj = stack;
tso->tot_stack_size = stack->stack_size;
+ tso->alloc_limit = 0;
+
tso->trec = NO_TREC;
#ifdef PROFILING
@@ -164,6 +166,31 @@ rts_getThreadId(StgPtr tso)
return ((StgTSO *)tso)->id;
}
+/* ---------------------------------------------------------------------------
+ * Getting & setting the thread allocation limit
+ * ------------------------------------------------------------------------ */
+HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
+{
+ // NB. doesn't take into account allocation in the current nursery
+ // block, so it might be off by up to 4k.
+ return ((StgTSO *)tso)->alloc_limit;
+}
+
+void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
+{
+ ((StgTSO *)tso)->alloc_limit = i;
+}
+
+void rts_enableThreadAllocationLimit(StgPtr tso)
+{
+ ((StgTSO *)tso)->flags |= TSO_ALLOC_LIMIT;
+}
+
+void rts_disableThreadAllocationLimit(StgPtr tso)
+{
+ ((StgTSO *)tso)->flags &= ~TSO_ALLOC_LIMIT;
+}
+
/* -----------------------------------------------------------------------------
Remove a thread from a queue.
Fails fatally if the TSO is not on the queue.
@@ -524,21 +551,8 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
stg_min(tso->stackobj->stack + tso->stackobj->stack_size,
tso->stackobj->sp+64)));
- if (tso->flags & TSO_BLOCKEX) {
- // NB. StackOverflow exceptions must be deferred if the thread is
- // inside Control.Exception.mask. See bug #767 and bug #8303.
- // This implementation is a minor hack, see Note [Throw to self when masked]
- MessageThrowTo *msg = (MessageThrowTo*)allocate(cap, sizeofW(MessageThrowTo));
- SET_HDR(msg, &stg_MSG_THROWTO_info, CCS_SYSTEM);
- msg->source = tso;
- msg->target = tso;
- msg->exception = (StgClosure *)stackOverflow_closure;
- blockedThrowTo(cap, tso, msg);
- } else {
- // Send this thread the StackOverflow exception
- throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure);
- return;
- }
+ // Note [Throw to self when masked], also #767 and #8303.
+ throwToSelf(cap, tso, (StgClosure *)stackOverflow_closure);
}
@@ -669,39 +683,6 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
// IF_DEBUG(scheduler,printTSO(new_tso));
}
-/* Note [Throw to self when masked]
- *
- * When a StackOverflow occurs when the thread is masked, we want to
- * defer the exception to when the thread becomes unmasked/hits an
- * interruptible point. We already have a mechanism for doing this,
- * the blocked_exceptions list, but the use here is a bit unusual,
- * because an exception is normally only added to this list upon
- * an asynchronous 'throwTo' call (with all of the relevant
- * multithreaded nonsense). Morally, a stack overflow should be an
- * asynchronous exception sent by a thread to itself, and it should
- * have the same semantics. But there are a few key differences:
- *
- * - If you actually tried to send an asynchronous exception to
- * yourself using throwTo, the exception would actually immediately
- * be delivered. This is because throwTo itself is considered an
- * interruptible point, so the exception is always deliverable. Thus,
- * ordinarily, we never end up with a message to onesself in the
- * blocked_exceptions queue.
- *
- * - In the case of a StackOverflow, we don't actually care about the
- * wakeup semantics; when an exception is delivered, the thread that
- * originally threw the exception should be woken up, since throwTo
- * blocks until the exception is successfully thrown. Fortunately,
- * it is harmless to wakeup a thread that doesn't actually need waking
- * up, e.g. ourselves.
- *
- * - No synchronization is necessary, because we own the TSO and the
- * capability. You can observe this by tracing through the execution
- * of throwTo. We skip synchronizing the message and inter-capability
- * communication.
- *
- * We think this doesn't break any invariants, but do be careful!
- */
/* ---------------------------------------------------------------------------
diff --git a/rts/package.conf.in b/rts/package.conf.in
index 4c8686f262..25fb5eb543 100644
--- a/rts/package.conf.in
+++ b/rts/package.conf.in
@@ -98,6 +98,7 @@ ld-options:
, "-Wl,-u,_base_ControlziExceptionziBase_nonTermination_closure"
, "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnMVar_closure"
, "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnSTM_closure"
+ , "-Wl,-u,_base_GHCziIOziException_allocationQuotaExceeded_closure"
, "-Wl,-u,_base_ControlziExceptionziBase_nestedAtomically_closure"
, "-Wl,-u,_base_GHCziWeak_runFinalizzerBatch_closure"
, "-Wl,-u,_base_GHCziTopHandler_flushStdHandles_closure"
@@ -138,6 +139,7 @@ ld-options:
, "-Wl,-u,base_ControlziExceptionziBase_nonTermination_closure"
, "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnMVar_closure"
, "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnSTM_closure"
+ , "-Wl,-u,base_GHCziIOziException_allocationQuotaExceeded_closure"
, "-Wl,-u,base_ControlziExceptionziBase_nestedAtomically_closure"
, "-Wl,-u,base_GHCziWeak_runFinalizzerBatch_closure"
, "-Wl,-u,base_GHCziTopHandler_flushStdHandles_closure"
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 86bd1c2bb3..865a890fcc 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -684,7 +684,10 @@ StgPtr allocate (Capability *cap, W_ n)
TICK_ALLOC_HEAP_NOCTR(WDS(n));
CCS_ALLOC(cap->r.rCCCS,n);
-
+ if (cap->r.rCurrentTSO != NULL) {
+ cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+ }
+
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
W_ req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
@@ -821,6 +824,9 @@ allocatePinned (Capability *cap, W_ n)
TICK_ALLOC_HEAP_NOCTR(WDS(n));
CCS_ALLOC(cap->r.rCCCS,n);
+ if (cap->r.rCurrentTSO != NULL) {
+ cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+ }
bd = cap->pinned_object_block;