summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-12-08 13:19:47 -0500
committerZubin Duggal <zubin.duggal@gmail.com>2023-02-08 19:05:11 +0530
commit41c8db24efaba282bbef11baec754eae7454c3c5 (patch)
treec5cfbe6a988d5d05636e4c4d938905cfb36df10c
parent1276125345a361e615ee8c084324ce24e21e15d1 (diff)
downloadhaskell-41c8db24efaba282bbef11baec754eae7454c3c5.tar.gz
nonmoving: Sync-phase mark budgeting
Here we significantly improve the bound on sync phase pause times by imposing a limit on the amount of work that we can perform during the sync. If we find that we have exceeded our marking budget then we allow the mutators to resume, return to concurrent marking, and try synchronizing again later. (cherry picked from commit c620669651c52fd228af61040747dd7236c4ba2b)
-rw-r--r--rts/sm/NonMoving.c76
-rw-r--r--rts/sm/NonMovingMark.c12
-rw-r--r--rts/sm/NonMovingMark.h11
3 files changed, 87 insertions, 12 deletions
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index 7159aa7c49..fdf09f3678 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -247,6 +247,9 @@ Mutex concurrent_coll_finished_lock;
* - Note [Weak pointer processing and the non-moving GC] (MarkWeak.c) describes
* how weak pointers are handled when the non-moving GC is in use.
*
+ * - Note [Sync phase marking budget] describes how we avoid long mutator
+ * pauses during the sync phase
+ *
* [ueno 2016]:
* Katsuhiro Ueno and Atsushi Ohori. 2016. A fully concurrent garbage
* collector for functional programs on multicore processors. SIGPLAN Not. 51,
@@ -498,10 +501,44 @@ Mutex concurrent_coll_finished_lock;
* remembered set during the preparatory GC. This allows us to safely skip the
* non-moving write barrier without jeopardizing the snapshot invariant.
*
+ *
+ * Note [Sync phase marking budget]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The non-moving collector is intended to provide reliably low collection
+ * latencies. These latencies are primarily due to two sources:
+ *
+ * a. the preparatory moving collection at the beginning of the major GC cycle
+ * b. the post-mark synchronization pause at the end
+ *
+ * While the cost of (a) is inherently bounded by the young generation size,
+ * (b) can in principle be unbounded since the mutator may hide large swathes
+ * of heap from the collector's concurrent mark phase via mutation. These will
+ * only become visible to the collector during the post-mark synchronization
+ * phase.
+ *
+ * Since we don't want to do unbounded marking work in the pause, we impose a
+ * limit (specifically, sync_phase_marking_budget) on the amount of work
+ * (namely, the number of marked closures) that we can do during the pause. If
+ * we deplete our marking budget during the pause then we allow the mutators to
+ * resume and return to concurrent marking (keeping the update remembered set
+ * write barrier enabled). After we have finished marking we will again
+ * attempt the post-mark synchronization.
+ *
+ * The choice of sync_phase_marking_budget was made empirically. On 2022
+ * hardware and a "typical" test program we tend to mark ~10^7 closures per
+ * second. Consequently, a sync_phase_marking_budget of 10^5 should produce
+ * ~10 ms pauses, which seems like a reasonable tradeoff.
+ *
+ * TODO: Perhaps sync_phase_marking_budget should be controllable via a
+ * command-line argument?
+ *
*/
memcount nonmoving_live_words = 0;
+// See Note [Sync phase marking budget].
+MarkBudget sync_phase_marking_budget = 200000;
+
#if defined(THREADED_RTS)
static void* nonmovingConcurrentMark(void *mark_queue);
#endif
@@ -1011,19 +1048,25 @@ void nonmovingCollect(StgWeak **dead_weaks STG_UNUSED, StgTSO **resurrected_thre
}
/* Mark queue, threads, and weak pointers until no more weaks have been
- * resuscitated
+ * resuscitated. If *budget is non-zero then we will mark no more than
+ * Returns true if we there is no more marking work to be done, false if
+ * we exceeded our marking budget.
*/
-static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue)
+static bool nonmovingMarkThreadsWeaks(MarkBudget *budget, MarkQueue *mark_queue)
{
while (true) {
// Propagate marks
- nonmovingMark(mark_queue);
+ nonmovingMark(budget, mark_queue);
+ if (*budget == 0) {
+ return false;
+ }
// Tidy threads and weaks
nonmovingTidyThreads();
- if (! nonmovingTidyWeaks(mark_queue))
- return;
+ if (! nonmovingTidyWeaks(mark_queue)) {
+ return true;
+ }
}
}
@@ -1081,7 +1124,14 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
nonmovingMarkWeakPtrList(mark_queue);
// Do concurrent marking; most of the heap will get marked here.
- nonmovingMarkThreadsWeaks(mark_queue);
+
+#if defined(THREADED_RTS)
+concurrent_marking:
+#endif
+ {
+ MarkBudget budget = UNLIMITED_MARK_BUDGET;
+ nonmovingMarkThreadsWeaks(&budget, mark_queue);
+ }
#if defined(THREADED_RTS)
Task *task = newBoundTask();
@@ -1104,9 +1154,17 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
nonmovingBeginFlush(task);
bool all_caps_syncd;
+ MarkBudget sync_marking_budget = sync_phase_marking_budget;
do {
all_caps_syncd = nonmovingWaitForFlush();
- nonmovingMarkThreadsWeaks(mark_queue);
+ if (nonmovingMarkThreadsWeaks(&sync_marking_budget, mark_queue) == false) {
+ // We ran out of budget for marking. Abort sync.
+ // See Note [Sync phase marking budget].
+ traceConcSyncEnd();
+ stat_endNonmovingGcSync();
+ releaseAllCapabilities(n_capabilities, NULL, task);
+ goto concurrent_marking;
+ }
} while (!all_caps_syncd);
#endif
@@ -1117,7 +1175,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
// Do last marking of weak pointers
while (true) {
// Propagate marks
- nonmovingMark(mark_queue);
+ nonmovingMarkUnlimitedBudget(mark_queue);
if (!nonmovingTidyWeaks(mark_queue))
break;
@@ -1126,7 +1184,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
nonmovingMarkDeadWeaks(mark_queue, dead_weaks);
// Propagate marks
- nonmovingMark(mark_queue);
+ nonmovingMarkUnlimitedBudget(mark_queue);
// Now remove all dead objects from the mut_list to ensure that a younger
// generation collection doesn't attempt to look at them after we've swept.
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 3829b016bd..2c1aa8481e 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -1733,15 +1733,23 @@ done:
* b. the nursery has been fully evacuated into the non-moving generation.
* c. the mark queue has been seeded with a set of roots.
*
+ * If budget is not UNLIMITED_MARK_BUDGET, then we will mark no more than the
+ * indicated number of objects and deduct the work done from the budget.
*/
GNUC_ATTR_HOT void
-nonmovingMark (MarkQueue *queue)
+nonmovingMark (MarkBudget* budget, MarkQueue *queue)
{
traceConcMarkBegin();
debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
- unsigned int count = 0;
+ uint64_t count = 0;
while (true) {
count++;
+ if (*budget == 0) {
+ return;
+ } else if (*budget != UNLIMITED_MARK_BUDGET) {
+ *budget -= 1;
+ }
+
MarkQueueEnt ent = markQueuePop(queue);
switch (nonmovingMarkQueueEntryType(&ent)) {
diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h
index 5a1bf93c42..970d95409b 100644
--- a/rts/sm/NonMovingMark.h
+++ b/rts/sm/NonMovingMark.h
@@ -111,6 +111,11 @@ typedef struct {
MarkQueue queue;
} UpdRemSet;
+// How much marking work we are allowed to perform
+// See Note [Sync phase marking budget] in NonMoving.c
+typedef int64_t MarkBudget;
+#define UNLIMITED_MARK_BUDGET INT64_MIN
+
// Number of blocks to allocate for a mark queue
#define MARK_QUEUE_BLOCKS 16
@@ -154,7 +159,11 @@ void markQueueAddRoot(MarkQueue* q, StgClosure** root);
void initMarkQueue(MarkQueue *queue);
void freeMarkQueue(MarkQueue *queue);
-void nonmovingMark(struct MarkQueue_ *restrict queue);
+void nonmovingMark(MarkBudget *budget, struct MarkQueue_ *restrict queue);
+INLINE_HEADER void nonmovingMarkUnlimitedBudget(struct MarkQueue_ *restrict queue) {
+ MarkBudget budget = UNLIMITED_MARK_BUDGET;
+ nonmovingMark(&budget, queue);
+}
void nonmovingMarkWeakPtrList(struct MarkQueue_ *queue);
bool nonmovingTidyWeaks(struct MarkQueue_ *queue);