Per-thread allocation counters and limits

This tracks the amount of memory allocation by each thread in a counter stored in the TSO. Optionally, when the counter drops below zero (it counts down), the thread can be sent an asynchronous exception: AllocationLimitExceeded. When this happens, given a small additional limit so that it can handle the exception. See documentation in GHC.Conc for more details. Allocation limits are similar to timeouts, but - timeouts use real time, not CPU time. Allocation limits do not count anything while the thread is blocked or in foreign code. - timeouts don't re-trigger if the thread catches the exception, allocation limits do. - timeouts can catch non-allocating loops, if you use -fno-omit-yields. This doesn't work for allocation limits. I couldn't measure any impact on benchmarks with these changes, even for nofib/smp.
author: Simon Marlow <marlowsd@gmail.com> 2014-04-28 16:55:47 +0100
committer: Simon Marlow <marlowsd@gmail.com> 2014-05-02 14:49:22 +0100
commit: b0534f78a73f972e279eed4447a5687bd6a8308e (patch)
tree: 02d52756620bf27b9df9db45c57dacf55f190842 /rts
parent: 34db5ccf52ec2a1b5e953c282d0c52a7fc82c02a (diff)
download: haskell-b0534f78a73f972e279eed4447a5687bd6a8308e.tar.gz
11 files changed, 136 insertions, 51 deletions
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index 12bcfb26df..280820334a 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -100,7 +100,9 @@ stg_gc_noregs
             CurrentNursery = bdescr_link(CurrentNursery);
             OPEN_NURSERY();
             if (Capability_context_switch(MyCapability()) != 0 :: CInt ||
-                Capability_interrupt(MyCapability())      != 0 :: CInt) {
+                Capability_interrupt(MyCapability())      != 0 :: CInt ||
+                (StgTSO_alloc_limit(CurrentTSO) `lt` 0 &&
+                 (TO_W_(StgTSO_flags(CurrentTSO)) & TSO_ALLOC_LIMIT) != 0)) {
                 ret = ThreadYielding;
                 goto sched;
             } else {
diff --git a/rts/Linker.c b/rts/Linker.c
index ea7c1c67b9..6ddf4bef9d 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -1230,6 +1230,10 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(rts_getFunPtr)                                      \
       SymI_HasProto(rts_getStablePtr)                                   \
       SymI_HasProto(rts_getThreadId)                                    \
+      SymI_HasProto(rts_getThreadAllocationCounter)                     \
+      SymI_HasProto(rts_setThreadAllocationCounter)                     \
+      SymI_HasProto(rts_enableThreadAllocationLimit)                    \
+      SymI_HasProto(rts_disableThreadAllocationLimit)                   \
       SymI_HasProto(rts_getWord)                                        \
       SymI_HasProto(rts_getWord8)                                       \
       SymI_HasProto(rts_getWord16)                                      \
diff --git a/rts/Prelude.h b/rts/Prelude.h
index 89e80a0a3d..ca08e2c84e 100644
--- a/rts/Prelude.h
+++ b/rts/Prelude.h
@@ -37,6 +37,7 @@ extern StgClosure ZCMain_main_closure;
 
 PRELUDE_CLOSURE(base_GHCziIOziException_stackOverflow_closure);
 PRELUDE_CLOSURE(base_GHCziIOziException_heapOverflow_closure);
+PRELUDE_CLOSURE(base_GHCziIOziException_allocationLimitExceeded_closure);
 PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnThrowTo_closure);
 PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnMVar_closure);
 PRELUDE_CLOSURE(base_GHCziIOziException_blockedIndefinitelyOnSTM_closure);
@@ -100,6 +101,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info);
 
 #define stackOverflow_closure     DLL_IMPORT_DATA_REF(base_GHCziIOziException_stackOverflow_closure)
 #define heapOverflow_closure      DLL_IMPORT_DATA_REF(base_GHCziIOziException_heapOverflow_closure)
+#define allocationLimitExceeded_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_allocationLimitExceeded_closure)
 #define blockedIndefinitelyOnMVar_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_blockedIndefinitelyOnMVar_closure)
 #define blockedIndefinitelyOnSTM_closure DLL_IMPORT_DATA_REF(base_GHCziIOziException_blockedIndefinitelyOnSTM_closure)
 #define nonTermination_closure    DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure)
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index a5440e40ad..847076bc6f 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -89,6 +89,60 @@ suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
 }
 
 /* -----------------------------------------------------------------------------
+   throwToSelf
+
+   Useful for throwing an async exception in a thread from the
+   runtime.  It handles unlocking the throwto message returned by
+   throwTo().
+
+   Note [Throw to self when masked]
+   
+   When a StackOverflow occurs when the thread is masked, we want to
+   defer the exception to when the thread becomes unmasked/hits an
+   interruptible point.  We already have a mechanism for doing this,
+   the blocked_exceptions list, but the use here is a bit unusual,
+   because an exception is normally only added to this list upon
+   an asynchronous 'throwTo' call (with all of the relevant
+   multithreaded nonsense). Morally, a stack overflow should be an
+   asynchronous exception sent by a thread to itself, and it should
+   have the same semantics.  But there are a few key differences:
+   
+   - If you actually tried to send an asynchronous exception to
+     yourself using throwTo, the exception would actually immediately
+     be delivered.  This is because throwTo itself is considered an
+     interruptible point, so the exception is always deliverable. Thus,
+     ordinarily, we never end up with a message to onesself in the
+     blocked_exceptions queue.
+   
+   - In the case of a StackOverflow, we don't actually care about the
+     wakeup semantics; when an exception is delivered, the thread that
+     originally threw the exception should be woken up, since throwTo
+     blocks until the exception is successfully thrown.  Fortunately,
+     it is harmless to wakeup a thread that doesn't actually need waking
+     up, e.g. ourselves.
+   
+   - No synchronization is necessary, because we own the TSO and the
+     capability.  You can observe this by tracing through the execution
+     of throwTo.  We skip synchronizing the message and inter-capability
+     communication.
+   
+   We think this doesn't break any invariants, but do be careful!
+   -------------------------------------------------------------------------- */
+
+void
+throwToSelf (Capability *cap, StgTSO *tso, StgClosure *exception)
+{
+    MessageThrowTo *m;
+
+    m = throwTo(cap, tso, tso, exception);
+
+    if (m != NULL) {
+        // throwTo leaves it locked
+        unlockClosure((StgClosure*)m, &stg_MSG_THROWTO_info);
+    }
+}
+
+/* -----------------------------------------------------------------------------
    throwTo
 
    This function may be used to throw an exception from one thread to
diff --git a/rts/RaiseAsync.h b/rts/RaiseAsync.h
index 1f61b8c72d..65ca4f5103 100644
--- a/rts/RaiseAsync.h
+++ b/rts/RaiseAsync.h
@@ -28,7 +28,11 @@ void throwToSingleThreaded_ (Capability *cap,
 			     StgClosure *exception, 
 			     rtsBool stop_at_atomically);
 
-void suspendComputation (Capability *cap, 
+void throwToSelf (Capability *cap,
+                  StgTSO *tso,
+                  StgClosure *exception);
+
+void suspendComputation (Capability *cap,
 			 StgTSO *tso, 
 			 StgUpdateFrame *stop_here);
 
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index af1b2049f6..fb1e2ec07b 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -137,6 +137,7 @@ void initRtsFlagsDefaults(void)
 #else
     RtsFlags.GcFlags.heapBase           = 0;   /* means don't care */
 #endif
+    RtsFlags.GcFlags.allocLimitGrace    = (100*1024) / BLOCK_SIZE;
 
 #ifdef DEBUG
     RtsFlags.DebugFlags.scheduler       = rtsFalse;
@@ -402,6 +403,8 @@ usage_text[] = {
 "            +PAPI_EVENT   - collect papi preset event PAPI_EVENT",
 "            #NATIVE_EVENT - collect native event NATIVE_EVENT (in hex)",
 #endif
+"  -xq       The allocation limit given to a thread after it receives",
+"            an AllocationLimitExceeded exception. (default: 100k)",
 "",
 "RTS options may also be specified using the GHCRTS environment variable.",
 "",
@@ -1360,6 +1363,13 @@ error = rtsTrue;
 
                   /* The option prefix '-xx' is reserved for future extension.  KSW 1999-11. */
 
+                case 'q':
+                  OPTION_UNSAFE;
+                  RtsFlags.GcFlags.allocLimitGrace
+                      = decodeSize(rts_argv[arg], 3, BLOCK_SIZE, HS_INT_MAX)
+                          / BLOCK_SIZE;
+                  break;
+
                   default:
                     OPTION_SAFE;
                     errorBelch("unknown RTS option: %s",rts_argv[arg]);
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index aa7306f88a..640811ff17 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -208,6 +208,7 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
     getStablePtr((StgPtr)blockedIndefinitelyOnMVar_closure);
     getStablePtr((StgPtr)nonTermination_closure);
     getStablePtr((StgPtr)blockedIndefinitelyOnSTM_closure);
+    getStablePtr((StgPtr)allocationLimitExceeded_closure);
     getStablePtr((StgPtr)nestedAtomically_closure);
 
     getStablePtr((StgPtr)runSparks_closure);
diff --git a/rts/Schedule.c b/rts/Schedule.c
index adf2b5cb39..b1b489a6d1 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -481,6 +481,10 @@ run_thread:
     // happened.  So find the new location:
     t = cap->r.rCurrentTSO;
 
+    // cap->r.rCurrentTSO is charged for calls to allocate(), so we
+    // don't want it set during scheduler operations.
+    cap->r.rCurrentTSO = NULL;
+
     // And save the current errno in this thread.
     // XXX: possibly bogus for SMP because this thread might already
     // be running again, see code below.
@@ -1078,6 +1082,21 @@ schedulePostRunThread (Capability *cap, StgTSO *t)
         }
     }
 
+    //
+    // If the current thread's allocation limit has run out, send it
+    // the AllocationLimitExceeded exception.
+
+    if (t->alloc_limit < 0 && (t->flags & TSO_ALLOC_LIMIT)) {
+        // Use a throwToSelf rather than a throwToSingleThreaded, because
+        // it correctly handles the case where the thread is currently
+        // inside mask.  Also the thread might be blocked (e.g. on an
+        // MVar), and throwToSingleThreaded doesn't unblock it
+        // correctly in that case.
+        throwToSelf(cap, t, allocationLimitExceeded_closure);
+        t->alloc_limit = (StgInt64)RtsFlags.GcFlags.allocLimitGrace
+            * BLOCK_SIZE;
+    }
+
   /* some statistics gathering in the parallel case */
 }
 
diff --git a/rts/Threads.c b/rts/Threads.c
index af4353fc49..b82295284b 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -110,6 +110,8 @@ createThread(Capability *cap, W_ size)
     tso->stackobj       = stack;
     tso->tot_stack_size = stack->stack_size;
 
+    tso->alloc_limit = 0;
+
     tso->trec = NO_TREC;
 
 #ifdef PROFILING
@@ -164,6 +166,31 @@ rts_getThreadId(StgPtr tso)
   return ((StgTSO *)tso)->id;
 }
 
+/* ---------------------------------------------------------------------------
+ * Getting & setting the thread allocation limit
+ * ------------------------------------------------------------------------ */
+HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
+{
+    // NB. doesn't take into account allocation in the current nursery
+    // block, so it might be off by up to 4k.
+    return ((StgTSO *)tso)->alloc_limit;
+}
+
+void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
+{
+    ((StgTSO *)tso)->alloc_limit = i;
+}
+
+void rts_enableThreadAllocationLimit(StgPtr tso)
+{
+    ((StgTSO *)tso)->flags |= TSO_ALLOC_LIMIT;
+}
+
+void rts_disableThreadAllocationLimit(StgPtr tso)
+{
+    ((StgTSO *)tso)->flags &= ~TSO_ALLOC_LIMIT;
+}
+
 /* -----------------------------------------------------------------------------
    Remove a thread from a queue.
    Fails fatally if the TSO is not on the queue.
@@ -524,21 +551,8 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
                                  stg_min(tso->stackobj->stack + tso->stackobj->stack_size,
                                          tso->stackobj->sp+64)));
 
-        if (tso->flags & TSO_BLOCKEX) {
-            // NB. StackOverflow exceptions must be deferred if the thread is
-            // inside Control.Exception.mask.  See bug #767 and bug #8303.
-            // This implementation is a minor hack, see Note [Throw to self when masked]
-            MessageThrowTo *msg = (MessageThrowTo*)allocate(cap, sizeofW(MessageThrowTo));
-            SET_HDR(msg, &stg_MSG_THROWTO_info, CCS_SYSTEM);
-            msg->source = tso;
-            msg->target = tso;
-            msg->exception = (StgClosure *)stackOverflow_closure;
-            blockedThrowTo(cap, tso, msg);
-        } else {
-            // Send this thread the StackOverflow exception
-            throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure);
-            return;
-        }
+        // Note [Throw to self when masked], also #767 and #8303.
+        throwToSelf(cap, tso, (StgClosure *)stackOverflow_closure);
     }
 
 
@@ -669,39 +683,6 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
     // IF_DEBUG(scheduler,printTSO(new_tso));
 }
 
-/* Note [Throw to self when masked]
- *
- * When a StackOverflow occurs when the thread is masked, we want to
- * defer the exception to when the thread becomes unmasked/hits an
- * interruptible point.  We already have a mechanism for doing this,
- * the blocked_exceptions list, but the use here is a bit unusual,
- * because an exception is normally only added to this list upon
- * an asynchronous 'throwTo' call (with all of the relevant
- * multithreaded nonsense). Morally, a stack overflow should be an
- * asynchronous exception sent by a thread to itself, and it should
- * have the same semantics.  But there are a few key differences:
- *
- * - If you actually tried to send an asynchronous exception to
- *   yourself using throwTo, the exception would actually immediately
- *   be delivered.  This is because throwTo itself is considered an
- *   interruptible point, so the exception is always deliverable. Thus,
- *   ordinarily, we never end up with a message to onesself in the
- *   blocked_exceptions queue.
- *
- * - In the case of a StackOverflow, we don't actually care about the
- *   wakeup semantics; when an exception is delivered, the thread that
- *   originally threw the exception should be woken up, since throwTo
- *   blocks until the exception is successfully thrown.  Fortunately,
- *   it is harmless to wakeup a thread that doesn't actually need waking
- *   up, e.g. ourselves.
- *
- * - No synchronization is necessary, because we own the TSO and the
- *   capability.  You can observe this by tracing through the execution
- *   of throwTo.  We skip synchronizing the message and inter-capability
- *   communication.
- *
- * We think this doesn't break any invariants, but do be careful!
- */
 
 
 /* ---------------------------------------------------------------------------
diff --git a/rts/package.conf.in b/rts/package.conf.in
index 4c8686f262..25fb5eb543 100644
--- a/rts/package.conf.in
+++ b/rts/package.conf.in
@@ -98,6 +98,7 @@ ld-options:
          , "-Wl,-u,_base_ControlziExceptionziBase_nonTermination_closure"
          , "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnMVar_closure"
          , "-Wl,-u,_base_GHCziIOziException_blockedIndefinitelyOnSTM_closure"
+         , "-Wl,-u,_base_GHCziIOziException_allocationQuotaExceeded_closure"
          , "-Wl,-u,_base_ControlziExceptionziBase_nestedAtomically_closure"
          , "-Wl,-u,_base_GHCziWeak_runFinalizzerBatch_closure"
          , "-Wl,-u,_base_GHCziTopHandler_flushStdHandles_closure"
@@ -138,6 +139,7 @@ ld-options:
          , "-Wl,-u,base_ControlziExceptionziBase_nonTermination_closure"
          , "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnMVar_closure"
          , "-Wl,-u,base_GHCziIOziException_blockedIndefinitelyOnSTM_closure"
+         , "-Wl,-u,base_GHCziIOziException_allocationQuotaExceeded_closure"
          , "-Wl,-u,base_ControlziExceptionziBase_nestedAtomically_closure"
          , "-Wl,-u,base_GHCziWeak_runFinalizzerBatch_closure"
          , "-Wl,-u,base_GHCziTopHandler_flushStdHandles_closure"
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 86bd1c2bb3..865a890fcc 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -684,7 +684,10 @@ StgPtr allocate (Capability *cap, W_ n)
 
     TICK_ALLOC_HEAP_NOCTR(WDS(n));
     CCS_ALLOC(cap->r.rCCCS,n);
-    
+    if (cap->r.rCurrentTSO != NULL) {
+        cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+    }
+
     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
         W_ req_blocks =  (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 
@@ -821,6 +824,9 @@ allocatePinned (Capability *cap, W_ n)
 
     TICK_ALLOC_HEAP_NOCTR(WDS(n));
     CCS_ALLOC(cap->r.rCCCS,n);
+    if (cap->r.rCurrentTSO != NULL) {
+        cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
+    }
 
     bd = cap->pinned_object_block;
author	Simon Marlow <marlowsd@gmail.com>	2014-04-28 16:55:47 +0100
committer	Simon Marlow <marlowsd@gmail.com>	2014-05-02 14:49:22 +0100
commit	b0534f78a73f972e279eed4447a5687bd6a8308e (patch)
tree	02d52756620bf27b9df9db45c57dacf55f190842 /rts
parent	34db5ccf52ec2a1b5e953c282d0c52a7fc82c02a (diff)
download	haskell-b0534f78a73f972e279eed4447a5687bd6a8308e.tar.gz