nonmoving: Non-concurrent collection

author: Ben Gamari <ben@smart-cactus.org> 2023-02-02 17:21:27 -0500
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2023-03-08 15:02:31 -0500
commit: ba73a807edbb444c49e0cf21ab2ce89226a77f2e (patch)
tree: d26d06d494eb37deddd66e752f7c640f66a94f12
parent: f6f12a36346e19de7eed330537350d0b7420764a (diff)
download: haskell-ba73a807edbb444c49e0cf21ab2ce89226a77f2e.tar.gz
8 files changed, 133 insertions, 83 deletions
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 5c9f75859a..685862a08b 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -157,7 +157,10 @@ static bool scheduleHandleThreadFinished( Capability *cap, Task *task,
                                           StgTSO *t );
 static bool scheduleNeedHeapProfile(bool ready_to_gc);
 static void scheduleDoGC( Capability **pcap, Task *task,
-                          bool force_major, bool is_overflow_gc, bool deadlock_detect );
+                          bool force_major,
+                          bool is_overflow_gc,
+                          bool deadlock_detect,
+                          bool nonconcurrent );
 
 static void deleteThread (StgTSO *tso);
 static void deleteAllThreads (void);
@@ -259,7 +262,7 @@ schedule (Capability *initialCapability, Task *task)
     case SCHED_INTERRUPTING:
         debugTrace(DEBUG_sched, "SCHED_INTERRUPTING");
         /* scheduleDoGC() deletes all the threads */
-        scheduleDoGC(&cap,task,true,false,false);
+        scheduleDoGC(&cap,task,true,false,false,false);
 
         // after scheduleDoGC(), we must be shutting down.  Either some
         // other Capability did the final GC, or we did it above,
@@ -572,7 +575,7 @@ run_thread:
     }
 
     if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
-      scheduleDoGC(&cap,task,false,ready_to_gc,false);
+      scheduleDoGC(&cap,task,false,ready_to_gc,false,false);
     }
   } /* end of while() */
 }
@@ -966,7 +969,7 @@ scheduleDetectDeadlock (Capability **pcap, Task *task)
         // they are unreachable and will therefore be sent an
         // exception.  Any threads thus released will be immediately
         // runnable.
-        scheduleDoGC (pcap, task, true/*force major GC*/, false /* Whether it is an overflow GC */, true/*deadlock detection*/);
+        scheduleDoGC (pcap, task, true/*force major GC*/, false /* Whether it is an overflow GC */, true/*deadlock detection*/, false/*nonconcurrent*/);
         cap = *pcap;
         // when force_major == true. scheduleDoGC sets
         // recent_activity to ACTIVITY_DONE_GC and turns off the timer
@@ -1015,7 +1018,7 @@ scheduleProcessInbox (Capability **pcap USED_IF_THREADS)
     while (!emptyInbox(cap)) {
         // Executing messages might use heap, so we should check for GC.
         if (doYouWantToGC(cap)) {
-            scheduleDoGC(pcap, cap->running_task, false, false, false);
+            scheduleDoGC(pcap, cap->running_task, false, false, false, false);
             cap = *pcap;
         }
 
@@ -1583,7 +1586,10 @@ void releaseAllCapabilities(uint32_t n, Capability *keep_cap, Task *task)
 // behind deadlock_detect argument.
 static void
 scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
-              bool force_major, bool is_overflow_gc, bool deadlock_detect)
+              bool force_major,
+              bool is_overflow_gc,
+              bool deadlock_detect,
+              bool nonconcurrent)
 {
     Capability *cap = *pcap;
     bool heap_census;
@@ -1878,6 +1884,7 @@ delete_threads_and_gc:
         .do_heap_census = heap_census,
         .overflow_gc = is_overflow_gc,
         .deadlock_detect = deadlock_detect,
+        .nonconcurrent = nonconcurrent
     };
 
 #if defined(THREADED_RTS)
@@ -2778,7 +2785,7 @@ exitScheduler (bool wait_foreign USED_IF_THREADS)
         nonmovingStop();
         Capability *cap = task->cap;
         waitForCapability(&cap,task);
-        scheduleDoGC(&cap,task,true,false,false);
+        scheduleDoGC(&cap,task,true,false,false,true);
         ASSERT(task->incall->tso == NULL);
         releaseCapability(cap);
     }
@@ -2823,7 +2830,7 @@ freeScheduler( void )
    -------------------------------------------------------------------------- */
 
 static void
-performGC_(bool force_major)
+performGC_(bool force_major, bool nonconcurrent)
 {
     Task *task;
     Capability *cap = NULL;
@@ -2836,7 +2843,7 @@ performGC_(bool force_major)
     // TODO: do we need to traceTask*() here?
 
     waitForCapability(&cap,task);
-    scheduleDoGC(&cap,task,force_major,false,false);
+    scheduleDoGC(&cap,task,force_major,false,false,nonconcurrent);
     releaseCapability(cap);
     exitMyTask();
 }
@@ -2844,13 +2851,19 @@ performGC_(bool force_major)
 void
 performGC(void)
 {
-    performGC_(false);
+    performGC_(false, false);
 }
 
 void
 performMajorGC(void)
 {
-    performGC_(true);
+    performGC_(true, false);
+}
+
+void
+performBlockingMajorGC(void)
+{
+    performGC_(true, true);
 }
 
 /* ---------------------------------------------------------------------------
diff --git a/rts/include/rts/storage/GC.h b/rts/include/rts/storage/GC.h
index 77f7f38d9a..4dd8a2820b 100644
--- a/rts/include/rts/storage/GC.h
+++ b/rts/include/rts/storage/GC.h
@@ -215,6 +215,7 @@ extern W_ large_alloc_lim;
 
 void performGC(void);
 void performMajorGC(void);
+void performBlockingMajorGC(void);
 
 /* -----------------------------------------------------------------------------
    The CAF table - used to let us revert CAFs in GHCi
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 6857c9e6c1..695131e738 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -856,6 +856,8 @@ GarbageCollect (struct GcConfig config,
   // oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
   ASSERT(oldest_gen->scavenged_large_objects == NULL);
   if (RtsFlags.GcFlags.useNonmoving && major_gc) {
+      bool concurrent = false;
+
       // All threads in non-moving heap should be found to be alive, because
       // threads in the non-moving generation's list should live in the
       // non-moving heap, and we consider non-moving objects alive during
@@ -869,18 +871,21 @@ GarbageCollect (struct GcConfig config,
       // old_weak_ptr_list should be empty.
       ASSERT(oldest_gen->old_weak_ptr_list == NULL);
 
+#if defined(THREADED_RTS)
+      concurrent = !config.nonconcurrent;
+#else
+      // In the non-threaded runtime this is the only time we push to the
+      // upd_rem_set
+      nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set);
+#endif
+
       // dead_weak_ptr_list contains weak pointers with dead keys. Those need to
       // be kept alive because we'll use them in finalizeSchedulers(). Similarly
       // resurrected_threads are also going to be used in resurrectedThreads()
       // so we need to mark those too.
       // Note that in sequential case these lists will be appended with more
       // weaks and threads found to be dead in mark.
-#if !defined(THREADED_RTS)
-      // In the non-threaded runtime this is the only time we push to the
-      // upd_rem_set
-      nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set);
-#endif
-      nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
+      nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads, concurrent);
   }
 
   // Update the max size of older generations after a major GC:
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 1f3034c1ec..dc7b773e99 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -26,6 +26,9 @@ struct GcConfig {
     bool overflow_gc;
     // is this GC triggered by a deadlock?
     bool deadlock_detect;
+    // should we force non-concurrent collection if the non-moving collector is
+    // being used?
+    bool nonconcurrent;
     // should we use parallel scavenging?
     bool parallel;
 };
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index 114f96a548..e7ce4ea1d3 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -548,7 +548,7 @@ MarkBudget sync_phase_marking_budget = 200000;
 #if defined(THREADED_RTS)
 static void* nonmovingConcurrentMark(void *mark_queue);
 #endif
-static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads);
+static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads, bool concurrent);
 
 // Add a segment to the free list.
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
@@ -712,7 +712,7 @@ static void nonmovingPrepareMark(void)
 #endif
 }
 
-void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
+void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads, bool concurrent STG_UNUSED)
 {
 #if defined(THREADED_RTS)
     // We can't start a new collection until the old one has finished
@@ -799,7 +799,7 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
     }
     trace(TRACE_nonmoving_gc, "Finished nonmoving GC preparation");
 
-    // We are now safe to start concurrent marking
+    // We are now safe to start (possibly concurrent) marking
 
     // Note that in concurrent mark we can't use dead_weaks and
     // resurrected_threads from the preparation to add new weaks and threads as
@@ -807,13 +807,17 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
     // those lists to mark function in sequential case. In concurrent case we
     // allocate fresh lists.
 
-#if defined(THREADED_RTS)
     // If we're interrupting or shutting down, do not let this capability go and
     // run a STW collection. Reason: we won't be able to acquire this capability
     // again for the sync if we let it go, because it'll immediately start doing
     // a major GC, because that's what we do when exiting scheduler (see
     // exitScheduler()).
-    if (getSchedState() == SCHED_RUNNING) {
+    if (getSchedState() != SCHED_RUNNING) {
+        concurrent = false;
+    }
+
+#if defined(THREADED_RTS)
+    if (concurrent) {
         RELAXED_STORE(&concurrent_coll_running, true);
         nonmoving_write_barrier_enabled = true;
         debugTrace(DEBUG_nonmoving_gc, "Starting concurrent mark thread");
@@ -823,14 +827,19 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
             barf("nonmovingCollect: failed to spawn mark thread: %s", strerror(errno));
         }
         RELAXED_STORE(&mark_thread, thread);
+        return;
     } else {
-        nonmovingConcurrentMark(mark_queue);
+        RELEASE_SM_LOCK;
     }
-#else
+#endif
+
     // Use the weak and thread lists from the preparation for any new weaks and
     // threads found to be dead in mark.
-    nonmovingMark_(mark_queue, dead_weaks, resurrected_threads);
-#endif
+    nonmovingMark_(mark_queue, dead_weaks, resurrected_threads, false);
+
+    if (!concurrent) {
+        ACQUIRE_SM_LOCK;
+    }
 }
 
 /* Mark queue, threads, and weak pointers until no more weaks have been
@@ -862,7 +871,7 @@ static void* nonmovingConcurrentMark(void *data)
     MarkQueue *mark_queue = (MarkQueue*)data;
     StgWeak *dead_weaks = NULL;
     StgTSO *resurrected_threads = (StgTSO*)&stg_END_TSO_QUEUE_closure;
-    nonmovingMark_(mark_queue, &dead_weaks, &resurrected_threads);
+    nonmovingMark_(mark_queue, &dead_weaks, &resurrected_threads, true);
     return NULL;
 }
 
@@ -876,8 +885,11 @@ static void appendWeakList( StgWeak **w1, StgWeak *w2 )
 }
 #endif
 
-static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads)
+static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads, bool concurrent)
 {
+#if !defined(THREADED_RTS)
+    ASSERT(!concurrent);
+#endif
     ACQUIRE_LOCK(&nonmoving_collection_mutex);
     debugTrace(DEBUG_nonmoving_gc, "Starting mark...");
     stat_startNonmovingGc();
@@ -920,38 +932,41 @@ concurrent_marking:
     }
 
 #if defined(THREADED_RTS)
-    Task *task = newBoundTask();
-
-    // If at this point if we've decided to exit then just return
-    if (getSchedState() > SCHED_RUNNING) {
-        // Note that we break our invariants here and leave segments in
-        // nonmovingHeap.sweep_list, don't free nonmoving_large_objects etc.
-        // However because we won't be running sweep in the final GC this
-        // is OK.
-        //
-        // However, we must move any weak pointers remaining on
-        // nonmoving_old_weak_ptr_list back to nonmoving_weak_ptr_list
-        // such that their C finalizers can be run by hs_exit_.
-        appendWeakList(&nonmoving_weak_ptr_list, nonmoving_old_weak_ptr_list);
-        goto finish;
-    }
-
-    // We're still running, request a sync
-    nonmovingBeginFlush(task);
-
-    bool all_caps_syncd;
-    MarkBudget sync_marking_budget = sync_phase_marking_budget;
-    do {
-        all_caps_syncd = nonmovingWaitForFlush();
-        if (nonmovingMarkThreadsWeaks(&sync_marking_budget, mark_queue) == false) {
-            // We ran out of budget for marking. Abort sync.
-            // See Note [Sync phase marking budget].
-            traceConcSyncEnd();
-            stat_endNonmovingGcSync();
-            releaseAllCapabilities(n_capabilities, NULL, task);
-            goto concurrent_marking;
+    Task *task = NULL;
+    if (concurrent) {
+        task = newBoundTask();
+
+        // If at this point if we've decided to exit then just return
+        if (getSchedState() > SCHED_RUNNING) {
+            // Note that we break our invariants here and leave segments in
+            // nonmovingHeap.sweep_list, don't free nonmoving_large_objects etc.
+            // However because we won't be running sweep in the final GC this
+            // is OK.
+            //
+            // However, we must move any weak pointers remaining on
+            // nonmoving_old_weak_ptr_list back to nonmoving_weak_ptr_list
+            // such that their C finalizers can be run by hs_exit_.
+            appendWeakList(&nonmoving_weak_ptr_list, nonmoving_old_weak_ptr_list);
+            goto finish;
         }
-    } while (!all_caps_syncd);
+
+        // We're still running, request a sync
+        nonmovingBeginFlush(task);
+
+        bool all_caps_syncd;
+        MarkBudget sync_marking_budget = sync_phase_marking_budget;
+        do {
+            all_caps_syncd = nonmovingWaitForFlush();
+            if (nonmovingMarkThreadsWeaks(&sync_marking_budget, mark_queue) == false) {
+                // We ran out of budget for marking. Abort sync.
+                // See Note [Sync phase marking budget].
+                traceConcSyncEnd();
+                stat_endNonmovingGcSync();
+                releaseAllCapabilities(n_capabilities, NULL, task);
+                goto concurrent_marking;
+            }
+        } while (!all_caps_syncd);
+    }
 #endif
 
     nonmovingResurrectThreads(mark_queue, resurrected_threads);
@@ -981,15 +996,15 @@ concurrent_marking:
 
 
     // Schedule finalizers and resurrect threads
-#if defined(THREADED_RTS)
-    // Just pick a random capability. Not sure if this is a good idea -- we use
-    // only one capability for all finalizers.
-    scheduleFinalizers(getCapability(0), *dead_weaks);
-    // Note that this mutates heap and causes running write barriers.
-    // See Note [Unintentional marking in resurrectThreads] in NonMovingMark.c
-    // for how we deal with this.
-    resurrectThreads(*resurrected_threads);
-#endif
+    if (concurrent) {
+        // Just pick a random capability. Not sure if this is a good idea -- we use
+        // only one capability for all finalizers.
+        scheduleFinalizers(getCapability(0), *dead_weaks);
+        // Note that this mutates heap and causes running write barriers.
+        // See Note [Unintentional marking in resurrectThreads] in NonMovingMark.c
+        // for how we deal with this.
+        resurrectThreads(*resurrected_threads);
+    }
 
 #if defined(DEBUG)
     // Zap CAFs that we will sweep
@@ -1019,15 +1034,19 @@ concurrent_marking:
     // Prune spark lists
     // See Note [Spark management under the nonmoving collector].
 #if defined(THREADED_RTS)
-    for (uint32_t n = 0; n < getNumCapabilities(); n++) {
-        pruneSparkQueue(true, getCapability(n));
+    if (concurrent) {
+        for (uint32_t n = 0; n < getNumCapabilities(); n++) {
+            pruneSparkQueue(true, getCapability(n));
+        }
     }
-#endif
 
     // Everything has been marked; allow the mutators to proceed
-#if defined(THREADED_RTS) && !defined(NONCONCURRENT_SWEEP)
-    nonmoving_write_barrier_enabled = false;
-    nonmovingFinishFlush(task);
+#if !defined(NONCONCURRENT_SWEEP)
+    if (concurrent) {
+        nonmoving_write_barrier_enabled = false;
+        nonmovingFinishFlush(task);
+    }
+#endif
 #endif
 
     current_mark_queue = NULL;
@@ -1064,24 +1083,28 @@ concurrent_marking:
         nonmovingTraceAllocatorCensus();
 #endif
 
-#if defined(THREADED_RTS) && defined(NONCONCURRENT_SWEEP)
+#if defined(NONCONCURRENT_SWEEP)
 #if defined(DEBUG)
     checkNonmovingHeap(&nonmovingHeap);
     checkSanity(true, true);
 #endif
-    nonmoving_write_barrier_enabled = false;
-    nonmovingFinishFlush(task);
+    if (concurrent) {
+        nonmoving_write_barrier_enabled = false;
+        nonmovingFinishFlush(task);
+    }
 #endif
 
     // TODO: Remainder of things done by GarbageCollect (update stats)
 
 #if defined(THREADED_RTS)
 finish:
-    exitMyTask();
+    if (concurrent) {
+        exitMyTask();
 
-    // We are done...
-    RELAXED_STORE(&mark_thread, 0);
-    stat_endNonmovingGc();
+        // We are done...
+        RELAXED_STORE(&mark_thread, 0);
+        stat_endNonmovingGc();
+    }
 
     // Signal that the concurrent collection is finished, allowing the next
     // non-moving collection to proceed
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
index 02c7806345..3ee6225b3e 100644
--- a/rts/sm/NonMoving.h
+++ b/rts/sm/NonMoving.h
@@ -149,7 +149,8 @@ void nonmovingExit(void);
 //   directly, but in a pause.
 //
 void nonmovingCollect(StgWeak **dead_weaks,
-                       StgTSO **resurrected_threads);
+                      StgTSO **resurrected_threads,
+                      bool concurrent);
 
 void nonmovingPushFreeSegment(struct NonmovingSegment *seg);
 
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index bd018f4ff7..fffabdd97f 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -27,8 +27,10 @@
 #include "sm/Storage.h"
 #include "CNF.h"
 
+#if defined(THREADED_RTS)
 static void nonmovingResetUpdRemSetQueue (MarkQueue *rset);
 static void nonmovingResetUpdRemSet (UpdRemSet *rset);
+#endif
 static bool check_in_nonmoving_heap(StgClosure *p);
 static void mark_closure (MarkQueue *queue, const StgClosure *p, StgClosure **origin);
 static void trace_tso (MarkQueue *queue, StgTSO *tso);
@@ -955,6 +957,7 @@ void nonmovingInitUpdRemSet (UpdRemSet *rset)
     rset->queue.is_upd_rem_set = true;
 }
 
+#if defined(THREADED_RTS)
 static void nonmovingResetUpdRemSetQueue (MarkQueue *rset)
 {
     // UpdRemSets always have one block for the mark queue. This assertion is to
@@ -968,6 +971,7 @@ void nonmovingResetUpdRemSet (UpdRemSet *rset)
 {
     nonmovingResetUpdRemSetQueue(&rset->queue);
 }
+#endif
 
 void freeMarkQueue (MarkQueue *queue)
 {
diff --git a/testsuite/tests/ffi/should_run/ffi023_c.c b/testsuite/tests/ffi/should_run/ffi023_c.c
index 979c378b7d..8928e99f9a 100644
--- a/testsuite/tests/ffi/should_run/ffi023_c.c
+++ b/testsuite/tests/ffi/should_run/ffi023_c.c
@@ -4,7 +4,7 @@
 
 HsInt out (HsInt x)
 {
-    performMajorGC();
+    performBlockingMajorGC();
     rts_clearMemory();
     return incall(x);
 }
author	Ben Gamari <ben@smart-cactus.org>	2023-02-02 17:21:27 -0500
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2023-03-08 15:02:31 -0500
commit	ba73a807edbb444c49e0cf21ab2ce89226a77f2e (patch)
tree	d26d06d494eb37deddd66e752f7c640f66a94f12
parent	f6f12a36346e19de7eed330537350d0b7420764a (diff)
download	haskell-ba73a807edbb444c49e0cf21ab2ce89226a77f2e.tar.gz