nonmoving: Sync-phase mark budgeting

Here we significantly improve the bound on sync phase pause times by imposing a limit on the amount of work that we can perform during the sync. If we find that we have exceeded our marking budget then we allow the mutators to resume, return to concurrent marking, and try synchronizing again later. Fixes #22929.
author: Ben Gamari <ben@smart-cactus.org> 2022-12-08 13:19:47 -0500
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2023-03-08 15:02:31 -0500
commit: 7c817c0a4ab857e03d09526a481f63e313598c5b (patch)
tree: 931c9e8b581a8c62578e8522e012a71caed412a7 /rts
parent: 0a7eb0aa0bf7e7464e68ab9b6f4176771dcc3590 (diff)
download: haskell-7c817c0a4ab857e03d09526a481f63e313598c5b.tar.gz
3 files changed, 86 insertions, 12 deletions
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index 251151f287..320898f0b3 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -253,6 +253,9 @@ Mutex concurrent_coll_finished_lock;
  *  - Note [Weak pointer processing and the non-moving GC] (MarkWeak.c) describes
  *    how weak pointers are handled when the non-moving GC is in use.
  *
+ *  - Note [Sync phase marking budget] describes how we avoid long mutator
+ *    pauses during the sync phase
+ *
  * [ueno 2016]:
  *   Katsuhiro Ueno and Atsushi Ohori. 2016. A fully concurrent garbage
  *   collector for functional programs on multicore processors. SIGPLAN Not. 51,
@@ -504,10 +507,44 @@ Mutex concurrent_coll_finished_lock;
  * remembered set during the preparatory GC. This allows us to safely skip the
  * non-moving write barrier without jeopardizing the snapshot invariant.
  *
+ *
+ * Note [Sync phase marking budget]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The non-moving collector is intended to provide reliably low collection
+ * latencies. These latencies are primarily due to two sources:
+ *
+ *  a. the preparatory moving collection at the beginning of the major GC cycle
+ *  b. the post-mark synchronization pause at the end
+ *
+ * While the cost of (a) is inherently bounded by the young generation size,
+ * (b) can in principle be unbounded since the mutator may hide large swathes
+ * of heap from the collector's concurrent mark phase via mutation. These will
+ * only become visible to the collector during the post-mark synchronization
+ * phase.
+ *
+ * Since we don't want to do unbounded marking work in the pause, we impose a
+ * limit (specifically, sync_phase_marking_budget) on the amount of work
+ * (namely, the number of marked closures) that we can do during the pause. If
+ * we deplete our marking budget during the pause then we allow the mutators to
+ * resume and return to concurrent marking (keeping the update remembered set
+ * write barrier enabled). After we have finished marking we will again
+ * attempt the post-mark synchronization.
+ *
+ * The choice of sync_phase_marking_budget was made empirically. On 2022
+ * hardware and a "typical" test program we tend to mark ~10^7 closures per
+ * second. Consequently, a sync_phase_marking_budget of 10^5 should produce
+ * ~10 ms pauses, which seems like a reasonable tradeoff.
+ *
+ * TODO: Perhaps sync_phase_marking_budget should be controllable via a
+ * command-line argument?
+ *
  */
 
 memcount nonmoving_live_words = 0;
 
+// See Note [Sync phase marking budget].
+MarkBudget sync_phase_marking_budget = 200000;
+
 #if defined(THREADED_RTS)
 static void* nonmovingConcurrentMark(void *mark_queue);
 #endif
@@ -1026,19 +1063,25 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
 }
 
 /* Mark queue, threads, and weak pointers until no more weaks have been
- * resuscitated
+ * resuscitated. If *budget is non-zero then we will mark no more than
+ * Returns true if we there is no more marking work to be done, false if
+ * we exceeded our marking budget.
  */
-static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue)
+static bool nonmovingMarkThreadsWeaks(MarkBudget *budget, MarkQueue *mark_queue)
 {
     while (true) {
         // Propagate marks
-        nonmovingMark(mark_queue);
+        nonmovingMark(budget, mark_queue);
+        if (*budget == 0) {
+            return false;
+        }
 
         // Tidy threads and weaks
         nonmovingTidyThreads();
 
-        if (! nonmovingTidyWeaks(mark_queue))
-            return;
+        if (! nonmovingTidyWeaks(mark_queue)) {
+            return true;
+        }
     }
 }
 
@@ -1097,7 +1140,13 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
     nonmovingMarkWeakPtrList(mark_queue);
 
     // Do concurrent marking; most of the heap will get marked here.
-    nonmovingMarkThreadsWeaks(mark_queue);
+#if defined(THREADED_RTS)
+concurrent_marking:
+#endif
+    {
+        MarkBudget budget = UNLIMITED_MARK_BUDGET;
+        nonmovingMarkThreadsWeaks(&budget, mark_queue);
+    }
 
 #if defined(THREADED_RTS)
     Task *task = newBoundTask();
@@ -1120,9 +1169,17 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
     nonmovingBeginFlush(task);
 
     bool all_caps_syncd;
+    MarkBudget sync_marking_budget = sync_phase_marking_budget;
     do {
         all_caps_syncd = nonmovingWaitForFlush();
-        nonmovingMarkThreadsWeaks(mark_queue);
+        if (nonmovingMarkThreadsWeaks(&sync_marking_budget, mark_queue) == false) {
+            // We ran out of budget for marking. Abort sync.
+            // See Note [Sync phase marking budget].
+            traceConcSyncEnd();
+            stat_endNonmovingGcSync();
+            releaseAllCapabilities(n_capabilities, NULL, task);
+            goto concurrent_marking;
+        }
     } while (!all_caps_syncd);
 #endif
 
@@ -1133,7 +1190,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
     // Do last marking of weak pointers
     while (true) {
         // Propagate marks
-        nonmovingMark(mark_queue);
+        nonmovingMarkUnlimitedBudget(mark_queue);
 
         if (!nonmovingTidyWeaks(mark_queue))
             break;
@@ -1142,7 +1199,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
     nonmovingMarkDeadWeaks(mark_queue, dead_weaks);
 
     // Propagate marks
-    nonmovingMark(mark_queue);
+    nonmovingMarkUnlimitedBudget(mark_queue);
 
     // Now remove all dead objects from the mut_list to ensure that a younger
     // generation collection doesn't attempt to look at them after we've swept.
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 02cecd154f..c4c43ad7dc 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -1767,15 +1767,23 @@ done:
  *  b. the nursery has been fully evacuated into the non-moving generation.
  *  c. the mark queue has been seeded with a set of roots.
  *
+ * If budget is not UNLIMITED_MARK_BUDGET, then we will mark no more than the
+ * indicated number of objects and deduct the work done from the budget.
  */
 GNUC_ATTR_HOT void
-nonmovingMark (MarkQueue *queue)
+nonmovingMark (MarkBudget* budget, MarkQueue *queue)
 {
     traceConcMarkBegin();
     debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
-    unsigned int count = 0;
+    uint64_t count = 0;
     while (true) {
         count++;
+        if (*budget == 0) {
+            return;
+        } else if (*budget != UNLIMITED_MARK_BUDGET) {
+            *budget -= 1;
+        }
+
         MarkQueueEnt ent = markQueuePop(queue);
 
         switch (nonmovingMarkQueueEntryType(&ent)) {
diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h
index 7d4fd3ccb4..1b56083113 100644
--- a/rts/sm/NonMovingMark.h
+++ b/rts/sm/NonMovingMark.h
@@ -111,6 +111,11 @@ typedef struct {
     MarkQueue queue;
 } UpdRemSet;
 
+// How much marking work we are allowed to perform
+// See Note [Sync phase marking budget] in NonMoving.c
+typedef int64_t MarkBudget;
+#define UNLIMITED_MARK_BUDGET INT64_MIN
+
 // Number of blocks to allocate for a mark queue
 #define MARK_QUEUE_BLOCKS 16
 
@@ -155,7 +160,11 @@ void markQueueAddRoot(MarkQueue* q, StgClosure** root);
 
 void initMarkQueue(MarkQueue *queue);
 void freeMarkQueue(MarkQueue *queue);
-void nonmovingMark(struct MarkQueue_ *STG_RESTRICT queue);
+void nonmovingMark(MarkBudget *budget, struct MarkQueue_ *STG_RESTRICT queue);
+INLINE_HEADER void nonmovingMarkUnlimitedBudget(struct MarkQueue_ *STG_RESTRICT queue) {
+    MarkBudget budget = UNLIMITED_MARK_BUDGET;
+    nonmovingMark(&budget, queue);
+}
 
 void nonmovingMarkWeakPtrList(struct MarkQueue_ *queue);
 bool nonmovingTidyWeaks(struct MarkQueue_ *queue);
author	Ben Gamari <ben@smart-cactus.org>	2022-12-08 13:19:47 -0500
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2023-03-08 15:02:31 -0500
commit	7c817c0a4ab857e03d09526a481f63e313598c5b (patch)
tree	931c9e8b581a8c62578e8522e012a71caed412a7 /rts
parent	0a7eb0aa0bf7e7464e68ab9b6f4176771dcc3590 (diff)
download	haskell-7c817c0a4ab857e03d09526a481f63e313598c5b.tar.gz