Use mutator threads to do GC, instead of having a separate pool of GC threads

Previously, the GC had its own pool of threads to use as workers when doing parallel GC. There was a "leader", which was the mutator thread that initiated the GC, and the other threads were taken from the pool. This was simple and worked fine for sequential programs, where we did most of the benchmarking for the parallel GC, but falls down for parallel programs. When we have N mutator threads and N cores, at GC time we would have to stop N-1 mutator threads and start up N-1 GC threads, and hope that the OS schedules them all onto separate cores. It practice it doesn't, as you might expect. Now we use the mutator threads to do GC. This works quite nicely, particularly for parallel programs, where each mutator thread scans its own spark pool, which is probably in its cache anyway. There are some flag changes: -g<n> is removed (-g1 is still accepted for backwards compat). There's no way to have a different number of GC threads than mutator threads now. -q1 Use one OS thread for GC (turns off parallel GC) -qg<n> Use parallel GC for generations >= <n> (default: 1) Using parallel GC only for generations >=1 works well for sequential programs. Compiling an ordinary sequential program with -threaded and running it with -N2 or more should help if you do a lot of GC. I've found that adding -qg0 (do parallel GC for generation 0 too) speeds up some parallel programs, but slows down some sequential programs. Being conservative, I left the threshold at 1. ToDo: document the new options.
author: Simon Marlow <marlowsd@gmail.com> 2008-11-21 15:12:33 +0000
committer: Simon Marlow <marlowsd@gmail.com> 2008-11-21 15:12:33 +0000
commit: 3ebcd3deb769a03f4ded0fca2cf38201048c0214 (patch)
tree: ba4f0a6fc73550425a0db988bf4fbb9651d110aa /rts/Capability.c
parent: c373ebdb90edee470ad6fa8277cbe7aa369f23f8 (diff)
download: haskell-3ebcd3deb769a03f4ded0fca2cf38201048c0214.tar.gz
1 files changed, 19 insertions, 55 deletions
diff --git a/rts/Capability.c b/rts/Capability.c
index 8dddbc5d34..7c6ceb5c66 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -26,6 +26,7 @@
 #include "Schedule.h"
 #include "Sparks.h"
 #include "Trace.h"
+#include "GC.h"
 
 // one global capability, this is the Capability for non-threaded
 // builds, and for +RTS -N1
@@ -190,6 +191,7 @@ initCapability( Capability *cap, nat i )
 
     cap->no = i;
     cap->in_haskell        = rtsFalse;
+    cap->in_gc             = rtsFalse;
 
     cap->run_queue_hd      = END_TSO_QUEUE;
     cap->run_queue_tl      = END_TSO_QUEUE;
@@ -358,14 +360,7 @@ releaseCapability_ (Capability* cap,
 	return;
     }
 
-    /* if waiting_for_gc was the reason to release the cap: thread
-       comes from yieldCap->releaseAndQueueWorker. Unconditionally set
-       cap. free and return (see default after the if-protected other
-       special cases). Thread will wait on cond.var and re-acquire the
-       same cap after GC (GC-triggering cap. calls releaseCap and
-       enters the spare_workers case)
-    */
-    if (waiting_for_gc) {
+    if (waiting_for_gc == PENDING_GC_SEQ) {
       last_free_capability = cap; // needed?
       trace(TRACE_sched | DEBUG_sched, 
 	    "GC pending, set capability %d free", cap->no);
@@ -557,6 +552,12 @@ yieldCapability (Capability** pCap, Task *task)
 {
     Capability *cap = *pCap;
 
+    if (waiting_for_gc == PENDING_GC_PAR) {
+	debugTrace(DEBUG_sched, "capability %d: becoming a GC thread", cap->no);
+        gcWorkerThread(cap);
+        return;
+    }
+
 	debugTrace(DEBUG_sched, "giving up capability %d", cap->no);
 
 	// We must now release the capability and wait to be woken up
@@ -655,58 +656,21 @@ wakeupThreadOnCapability (Capability *my_cap,
 }
 
 /* ----------------------------------------------------------------------------
- * prodCapabilities
+ * prodCapability
  *
- * Used to indicate that the interrupted flag is now set, or some
- * other global condition that might require waking up a Task on each
- * Capability.
- * ------------------------------------------------------------------------- */
-
-static void
-prodCapabilities(rtsBool all)
-{
-    nat i;
-    Capability *cap;
-    Task *task;
-
-    for (i=0; i < n_capabilities; i++) {
-	cap = &capabilities[i];
-	ACQUIRE_LOCK(&cap->lock);
-	if (!cap->running_task) {
-	    if (cap->spare_workers) {
-		trace(TRACE_sched, "resuming capability %d", cap->no);
-		task = cap->spare_workers;
-		ASSERT(!task->stopped);
-		giveCapabilityToTask(cap,task);
-		if (!all) {
-		    RELEASE_LOCK(&cap->lock);
-		    return;
-		}
-	    }
-	}
-	RELEASE_LOCK(&cap->lock);
-    }
-    return;
-}
-
-void
-prodAllCapabilities (void)
-{
-    prodCapabilities(rtsTrue);
-}
-
-/* ----------------------------------------------------------------------------
- * prodOneCapability
- *
- * Like prodAllCapabilities, but we only require a single Task to wake
- * up in order to service some global event, such as checking for
- * deadlock after some idle time has passed.
+ * If a Capability is currently idle, wake up a Task on it.  Used to 
+ * get every Capability into the GC.
  * ------------------------------------------------------------------------- */
 
 void
-prodOneCapability (void)
+prodCapability (Capability *cap, Task *task)
 {
-    prodCapabilities(rtsFalse);
+    ACQUIRE_LOCK(&cap->lock);
+    if (!cap->running_task) {
+        cap->running_task = task;
+        releaseCapability_(cap,rtsTrue);
+    }
+    RELEASE_LOCK(&cap->lock);
 }
 
 /* ----------------------------------------------------------------------------
author	Simon Marlow <marlowsd@gmail.com>	2008-11-21 15:12:33 +0000
committer	Simon Marlow <marlowsd@gmail.com>	2008-11-21 15:12:33 +0000
commit	3ebcd3deb769a03f4ded0fca2cf38201048c0214 (patch)
tree	ba4f0a6fc73550425a0db988bf4fbb9651d110aa /rts/Capability.c
parent	c373ebdb90edee470ad6fa8277cbe7aa369f23f8 (diff)
download	haskell-3ebcd3deb769a03f4ded0fca2cf38201048c0214.tar.gz