Fix a bug in parallel GC synchronisation

Summary: The problem boils down to global variables: in particular gc_threads[], which was being modified by a subsequent GC before the previous GC had finished with it. The fix is to not use global variables. This was causing setnumcapabilities001 to fail (again!). It's an old bug though. Test Plan: Ran setnumcapabilities001 in a loop for a couple of hours. Before this patch it had been failing after a few minutes. Not a very scientific test, but it's the best I have. Reviewers: bgamari, austin, fryguybob, niteria, erikd Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2654
author: Simon Marlow <smarlow@fb.com> 2016-10-28 08:30:14 -0700
committer: Simon Marlow <marlowsd@gmail.com> 2016-10-29 19:33:52 +0100
commit: 4e088b497edd83f361898fa9d2d62ff310b08945 (patch)
tree: f0ef3a8f2443cec7dcdce15493fd8ad92e197997 /rts
parent: 4b300a32257c3ed272747f7e75709a26dd2d8407 (diff)
download: haskell-4e088b497edd83f361898fa9d2d62ff310b08945.tar.gz
4 files changed, 45 insertions, 42 deletions
diff --git a/rts/Schedule.c b/rts/Schedule.c
index a44512b3cb..7d5654fd03 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -1535,6 +1535,9 @@ scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
     uint32_t n_idle_caps = 0, n_failed_trygrab_idles = 0;
     StgTSO *tso;
     rtsBool *idle_cap;
+      // idle_cap is an array (allocated later) of size n_capabilities, where
+      // idle_cap[i] is rtsTrue if capability i will be idle during this GC
+      // cycle.
 #endif
 
     if (sched_state == SCHED_SHUTTING_DOWN) {
@@ -1735,23 +1738,13 @@ scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
         }
         debugTrace(DEBUG_sched, "%d idle caps", n_idle_caps);
 
-        // We set the gc_thread[i]->idle flag if that
-        // capability/thread is not participating in this collection.
-        // We also keep a local record of which capabilities are idle
-        // in idle_cap[], because scheduleDoGC() is re-entrant:
-        // another thread might start a GC as soon as we've finished
-        // this one, and thus the gc_thread[]->idle flags are invalid
-        // as soon as we release any threads after GC.  Getting this
-        // wrong leads to a rare and hard to debug deadlock!
-
         for (i=0; i < n_capabilities; i++) {
-            gc_threads[i]->idle = idle_cap[i];
             capabilities[i]->idle++;
         }
 
         // For all capabilities participating in this GC, wait until
         // they have stopped mutating and are standing by for GC.
-        waitForGcThreads(cap);
+        waitForGcThreads(cap, idle_cap);
 
 #if defined(THREADED_RTS)
         // Stable point where we can do a global check on our spark counters
@@ -1819,9 +1812,9 @@ delete_threads_and_gc:
     // reset pending_sync *before* GC, so that when the GC threads
     // emerge they don't immediately re-enter the GC.
     pending_sync = 0;
-    GarbageCollect(collect_gen, heap_census, gc_type, cap);
+    GarbageCollect(collect_gen, heap_census, gc_type, cap, idle_cap);
 #else
-    GarbageCollect(collect_gen, heap_census, 0, cap);
+    GarbageCollect(collect_gen, heap_census, 0, cap, NULL);
 #endif
 
     traceSparkCounters(cap);
@@ -1871,7 +1864,6 @@ delete_threads_and_gc:
 
     if (gc_type == SYNC_GC_PAR)
     {
-        releaseGCThreads(cap);
         for (i = 0; i < n_capabilities; i++) {
             if (i != cap->no) {
                 if (idle_cap[i]) {
@@ -1884,6 +1876,16 @@ delete_threads_and_gc:
             }
         }
         task->cap = cap;
+
+        // releaseGCThreads() happens *after* we have released idle
+        // capabilities.  Otherwise what can happen is one of the released
+        // threads starts a new GC, and finds that it can't acquire some of
+        // the disabled capabilities, because the previous GC still holds
+        // them, so those disabled capabilities will not be idle during the
+        // next GC round.  However, if we release the capabilities first,
+        // then they will be free (because they're disabled) when the next
+        // GC cycle happens.
+        releaseGCThreads(cap, idle_cap);
     }
 #endif
 
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index a6a1a9a460..46530b2cd9 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -153,8 +153,8 @@ static void start_gc_threads        (void);
 static void scavenge_until_all_done (void);
 static StgWord inc_running          (void);
 static StgWord dec_running          (void);
-static void wakeup_gc_threads       (uint32_t me);
-static void shutdown_gc_threads     (uint32_t me);
+static void wakeup_gc_threads       (uint32_t me, rtsBool idle_cap[]);
+static void shutdown_gc_threads     (uint32_t me, rtsBool idle_cap[]);
 static void collect_gct_blocks      (void);
 static void collect_pinned_object_blocks (void);
 
@@ -182,7 +182,8 @@ void
 GarbageCollect (uint32_t collect_gen,
                 rtsBool do_heap_census,
                 uint32_t gc_type USED_IF_THREADS,
-                Capability *cap)
+                Capability *cap,
+                rtsBool idle_cap[])
 {
   bdescr *bd;
   generation *gen;
@@ -339,7 +340,7 @@ GarbageCollect (uint32_t collect_gen,
   // NB. do this after the mutable lists have been saved above, otherwise
   // the other GC threads will be writing into the old mutable lists.
   inc_running();
-  wakeup_gc_threads(gct->thread_index);
+  wakeup_gc_threads(gct->thread_index, idle_cap);
 
   traceEventGcWork(gct->cap);
 
@@ -358,7 +359,7 @@ GarbageCollect (uint32_t collect_gen,
   } else {
       scavenge_capability_mut_lists(gct->cap);
       for (n = 0; n < n_capabilities; n++) {
-          if (gc_threads[n]->idle) {
+          if (idle_cap[n]) {
               markCapability(mark_root, gct, capabilities[n],
                              rtsTrue/*don't mark sparks*/);
               scavenge_capability_mut_lists(capabilities[n]);
@@ -416,7 +417,7 @@ GarbageCollect (uint32_t collect_gen,
       break;
   }
 
-  shutdown_gc_threads(gct->thread_index);
+  shutdown_gc_threads(gct->thread_index, idle_cap);
 
   // Now see which stable names are still alive.
   gcStableTables();
@@ -428,7 +429,7 @@ GarbageCollect (uint32_t collect_gen,
       }
   } else {
       for (n = 0; n < n_capabilities; n++) {
-          if (n == cap->no || gc_threads[n]->idle) {
+          if (n == cap->no || idle_cap[n]) {
               pruneSparkQueue(capabilities[n]);
          }
       }
@@ -814,7 +815,6 @@ new_gc_thread (uint32_t n, gc_thread *t)
 #endif
 
     t->thread_index = n;
-    t->idle = rtsFalse;
     t->free_blocks = NULL;
     t->gc_count = 0;
 
@@ -1092,7 +1092,7 @@ gcWorkerThread (Capability *cap)
 #if defined(THREADED_RTS)
 
 void
-waitForGcThreads (Capability *cap USED_IF_THREADS)
+waitForGcThreads (Capability *cap USED_IF_THREADS, rtsBool idle_cap[])
 {
     const uint32_t n_threads = n_capabilities;
     const uint32_t me = cap->no;
@@ -1101,7 +1101,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS)
 
     while(retry) {
         for (i=0; i < n_threads; i++) {
-            if (i == me || gc_threads[i]->idle) continue;
+            if (i == me || idle_cap[i]) continue;
             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
                 prodCapability(capabilities[i], cap->running_task);
             }
@@ -1109,7 +1109,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS)
         for (j=0; j < 10; j++) {
             retry = rtsFalse;
             for (i=0; i < n_threads; i++) {
-                if (i == me || gc_threads[i]->idle) continue;
+                if (i == me || idle_cap[i]) continue;
                 write_barrier();
                 interruptCapability(capabilities[i]);
                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
@@ -1133,7 +1133,8 @@ start_gc_threads (void)
 }
 
 static void
-wakeup_gc_threads (uint32_t me USED_IF_THREADS)
+wakeup_gc_threads (uint32_t me USED_IF_THREADS,
+                   rtsBool idle_cap[] USED_IF_THREADS)
 {
 #if defined(THREADED_RTS)
     uint32_t i;
@@ -1141,10 +1142,11 @@ wakeup_gc_threads (uint32_t me USED_IF_THREADS)
     if (n_gc_threads == 1) return;
 
     for (i=0; i < n_gc_threads; i++) {
-        if (i == me || gc_threads[i]->idle) continue;
+        if (i == me || idle_cap[i]) continue;
         inc_running();
         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
-        if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
+        if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY)
+            barf("wakeup_gc_threads");
 
         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
@@ -1157,7 +1159,8 @@ wakeup_gc_threads (uint32_t me USED_IF_THREADS)
 // standby state, otherwise they may still be executing inside
 // any_work(), and may even remain awake until the next GC starts.
 static void
-shutdown_gc_threads (uint32_t me USED_IF_THREADS)
+shutdown_gc_threads (uint32_t me USED_IF_THREADS,
+                     rtsBool idle_cap[] USED_IF_THREADS)
 {
 #if defined(THREADED_RTS)
     uint32_t i;
@@ -1165,7 +1168,7 @@ shutdown_gc_threads (uint32_t me USED_IF_THREADS)
     if (n_gc_threads == 1) return;
 
     for (i=0; i < n_gc_threads; i++) {
-        if (i == me || gc_threads[i]->idle) continue;
+        if (i == me || idle_cap[i]) continue;
         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
             busy_wait_nop();
             write_barrier();
@@ -1176,13 +1179,13 @@ shutdown_gc_threads (uint32_t me USED_IF_THREADS)
 
 #if defined(THREADED_RTS)
 void
-releaseGCThreads (Capability *cap USED_IF_THREADS)
+releaseGCThreads (Capability *cap USED_IF_THREADS, rtsBool idle_cap[])
 {
     const uint32_t n_threads = n_capabilities;
     const uint32_t me = cap->no;
     uint32_t i;
     for (i=0; i < n_threads; i++) {
-        if (i == me || gc_threads[i]->idle) continue;
+        if (i == me || idle_cap[i]) continue;
         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
             barf("releaseGCThreads");
 
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index c32064a4dd..707d4b52d6 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -6,7 +6,7 @@
  *
  * Documentation on the architecture of the Garbage Collector can be
  * found in the online commentary:
- * 
+ *
  *   http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  *
  * ---------------------------------------------------------------------------*/
@@ -20,7 +20,7 @@
 
 void GarbageCollect (rtsBool force_major_gc,
                      rtsBool do_heap_census,
-                     uint32_t gc_type, Capability *cap);
+                     uint32_t gc_type, Capability *cap, rtsBool idle_cap[]);
 
 typedef void (*evac_fn)(void *user, StgClosure **root);
 
@@ -55,8 +55,8 @@ void initGcThreads (uint32_t from, uint32_t to);
 void freeGcThreads (void);
 
 #if defined(THREADED_RTS)
-void waitForGcThreads (Capability *cap);
-void releaseGCThreads (Capability *cap);
+void waitForGcThreads (Capability *cap, rtsBool idle_cap[]);
+void releaseGCThreads (Capability *cap, rtsBool idle_cap[]);
 #endif
 
 #define WORK_UNIT_WORDS 128
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index b728315554..f940263665 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -6,7 +6,7 @@
  *
  * Documentation on the architecture of the Garbage Collector can be
  * found in the online commentary:
- * 
+ *
  *   http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  *
  * ---------------------------------------------------------------------------*/
@@ -21,7 +21,7 @@
 
 /* -----------------------------------------------------------------------------
    General scheme
-   
+
    ToDo: move this to the wiki when the implementation is done.
 
    We're only going to try to parallelise the copying GC for now.  The
@@ -67,13 +67,13 @@
 
 /* -----------------------------------------------------------------------------
    Generation Workspace
-  
+
    A generation workspace exists for each generation for each GC
    thread. The GC thread takes a block from the todos list of the
    generation into the scanbd and then scans it.  Objects referred to
    by those in the scan block are copied into the todo or scavd blocks
    of the relevant generation.
-  
+
    ------------------------------------------------------------------------- */
 
 typedef struct gen_workspace_ {
@@ -127,7 +127,6 @@ typedef struct gc_thread_ {
     volatile StgWord wakeup;       // NB not StgWord8; only StgWord is guaranteed atomic
 #endif
     uint32_t thread_index;         // a zero based index identifying the thread
-    rtsBool idle;                  // sitting out of this GC cycle
 
     bdescr * free_blocks;          // a buffer of free blocks for this thread
                                    //  during GC without accessing the block
@@ -211,4 +210,3 @@ extern ThreadLocalKey gctKey;
 #include "EndPrivate.h"
 
 #endif // SM_GCTHREAD_H
-
author	Simon Marlow <smarlow@fb.com>	2016-10-28 08:30:14 -0700
committer	Simon Marlow <marlowsd@gmail.com>	2016-10-29 19:33:52 +0100
commit	4e088b497edd83f361898fa9d2d62ff310b08945 (patch)
tree	f0ef3a8f2443cec7dcdce15493fd8ad92e197997 /rts
parent	4b300a32257c3ed272747f7e75709a26dd2d8407 (diff)
download	haskell-4e088b497edd83f361898fa9d2d62ff310b08945.tar.gz