rts: Use non-moving collector for pinned allocations while using moving collectorwip/gc/nonmoving-pinned-b

author: Ben Gamari <ben@smart-cactus.org> 2021-01-20 11:50:50 -0500
committer: Ben Gamari <ben@smart-cactus.org> 2021-01-21 11:02:19 -0500
commit: 94f5710518c82f729938c5f19360145c404d5767 (patch)
tree: 55add58fe00503046fdbae53763295789250dc3f
parent: 9eeeb85eb3c29519c3fdfdaeb5ed659b50c6bcea (diff)
download: haskell-94f5710518c82f729938c5f19360145c404d5767.tar.gz
13 files changed, 121 insertions, 60 deletions
diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h
index 35b45b0940..c25faa1786 100644
--- a/includes/rts/Flags.h
+++ b/includes/rts/Flags.h
@@ -52,7 +52,21 @@ typedef struct _GC_FLAGS {
     double  oldGenFactor;
     double  pcFreeHeap;
 
-    bool         useNonmoving; // default = false
+    // The non-moving collector has a few modes:
+    //
+    //   * if !THREADED_RTS: non-moving oldest generation, non-concurrent collection
+    //
+    //   * if THREADED_RTS && useNonmovingPinned: moving oldest generation,
+    //     non-moving allocation of pinned objects, non-concurrent collection
+    //
+    //   * if THREADED_RTS && concurrentNonmoving: non-moving oldest generation,
+    //     non-moving allocation of pinned objects, concurrent collection
+    //
+    //   * if THREADED_RTS && useNonmovingPinned && concurrentNonmoving:
+    //     moving oldest generation, non-moving allocation of pinned objects,
+    //     non-concurrent collection
+    bool         useNonmovingPinned; // Allocate pinned objects directly into non-moving heap; default = false
+    bool         concurrentNonmoving; // Use non-moving heap and concurrent collection for oldest generation; default = false
     bool         nonmovingSelectorOpt; // Do selector optimization in the
                                        // non-moving heap, default = false
     uint32_t     generations;
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 95656c3da6..4324366696 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -164,7 +164,8 @@ void initRtsFlagsDefaults(void)
     RtsFlags.GcFlags.heapSizeSuggestionAuto = false;
     RtsFlags.GcFlags.pcFreeHeap         = 3;    /* 3% */
     RtsFlags.GcFlags.oldGenFactor       = 2;
-    RtsFlags.GcFlags.useNonmoving       = false;
+    RtsFlags.GcFlags.useNonmovingPinned = false;
+    RtsFlags.GcFlags.concurrentNonmoving = false;
     RtsFlags.GcFlags.nonmovingSelectorOpt = false;
     RtsFlags.GcFlags.generations        = 2;
     RtsFlags.GcFlags.squeezeUpdFrames   = true;
@@ -981,12 +982,17 @@ error = true;
                   else if (strequal("copying-gc",
                                &rts_argv[arg][2])) {
                       OPTION_SAFE;
-                      RtsFlags.GcFlags.useNonmoving = false;
+                      RtsFlags.GcFlags.concurrentNonmoving = false;
                   }
                   else if (strequal("nonmoving-gc",
                                &rts_argv[arg][2])) {
                       OPTION_SAFE;
-                      RtsFlags.GcFlags.useNonmoving = true;
+                      RtsFlags.GcFlags.concurrentNonmoving = true;
+                  }
+                  else if (strequal("nonmoving-pinned",
+                               &rts_argv[arg][2])) {
+                      OPTION_SAFE;
+                      RtsFlags.GcFlags.useNonmovingPinned = true;
                   }
 #if defined(THREADED_RTS)
 #if defined(mingw32_HOST_OS)
@@ -1664,7 +1670,7 @@ error = true;
 
                 case 'n':
                     OPTION_SAFE;
-                    RtsFlags.GcFlags.useNonmoving = true;
+                    RtsFlags.GcFlags.concurrentNonmoving = true;
                     unchecked_arg_start++;
                     if (rts_argv[arg][3] == 's') {
                         RtsFlags.GcFlags.nonmovingSelectorOpt = true;
@@ -1846,7 +1852,7 @@ static void normaliseRtsOpts (void)
         RtsFlags.MiscFlags.install_seh_handlers = true;
     }
 
-    if (RtsFlags.GcFlags.useNonmoving && RtsFlags.GcFlags.generations == 1) {
+    if (RtsFlags.GcFlags.concurrentNonmoving && RtsFlags.GcFlags.generations == 1) {
         barf("The non-moving collector doesn't support -G1");
     }
 
@@ -1861,11 +1867,11 @@ static void normaliseRtsOpts (void)
 #endif
 
     if (RtsFlags.ProfFlags.doHeapProfile != NO_HEAP_PROFILING &&
-            RtsFlags.GcFlags.useNonmoving) {
+            RtsFlags.GcFlags.concurrentNonmoving) {
         barf("The non-moving collector doesn't support profiling");
     }
 
-    if (RtsFlags.GcFlags.compact && RtsFlags.GcFlags.useNonmoving) {
+    if (RtsFlags.GcFlags.compact && RtsFlags.GcFlags.concurrentNonmoving) {
         errorBelch("The non-moving collector cannot be used in conjunction with\n"
                    "the compacting collector.");
         errorUsage();
diff --git a/rts/Stats.c b/rts/Stats.c
index 53251bb7d6..3b34c1f856 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -855,7 +855,7 @@ static void report_summary(const RTSSummaryStats* sum)
                     TimeToSecondsDbl(gen_stats->avg_pause_ns),
                     TimeToSecondsDbl(gen_stats->max_pause_ns));
     }
-    if (RtsFlags.GcFlags.useNonmoving) {
+    if (RtsFlags.GcFlags.concurrentNonmoving) {
         const int n_major_colls = sum->gc_summary_stats[RtsFlags.GcFlags.generations-1].collections;
         statsPrintf("  Gen  1     %5d syncs"
                     ",                      %6.3fs     %3.4fs    %3.4fs\n",
@@ -906,7 +906,7 @@ static void report_summary(const RTSSummaryStats* sum)
     statsPrintf("  GC      time  %7.3fs  (%7.3fs elapsed)\n",
                 TimeToSecondsDbl(stats.gc_cpu_ns),
                 TimeToSecondsDbl(stats.gc_elapsed_ns));
-    if (RtsFlags.GcFlags.useNonmoving) {
+    if (RtsFlags.GcFlags.concurrentNonmoving) {
         statsPrintf(
                 "  CONC GC time  %7.3fs  (%7.3fs elapsed)\n",
                 TimeToSecondsDbl(stats.nonmoving_gc_cpu_ns),
@@ -1175,7 +1175,7 @@ static void report_machine_readable (const RTSSummaryStats * sum)
 #endif
     }
     // non-moving collector statistics
-    if (RtsFlags.GcFlags.useNonmoving) {
+    if (RtsFlags.GcFlags.concurrentNonmoving) {
         const int n_major_colls = sum->gc_summary_stats[RtsFlags.GcFlags.generations-1].collections;
         MR_STAT("nonmoving_sync_wall_seconds", "f",
                 TimeToSecondsDbl(stats.nonmoving_gc_sync_elapsed_ns));
diff --git a/rts/Weak.c b/rts/Weak.c
index 0adf5a8b92..82c5c146e9 100644
--- a/rts/Weak.c
+++ b/rts/Weak.c
@@ -95,7 +95,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
     // n_finalizers is not necessarily zero under non-moving collection
     // because non-moving collector does not wait for the list to be consumed
     // (by doIdleGcWork()) before appending the list with more finalizers.
-    ASSERT(RtsFlags.GcFlags.useNonmoving || SEQ_CST_LOAD(&n_finalizers) == 0);
+    ASSERT(RtsFlags.GcFlags.concurrentNonmoving || SEQ_CST_LOAD(&n_finalizers) == 0);
 
     // Append finalizer_list with the new list. TODO: Perhaps cache tail of the
     // list for faster append. NOTE: We can't append `list` here! Otherwise we
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index 31b3cb99f2..09f73cde6f 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -277,7 +277,7 @@ compactFree(StgCompactNFData *str)
     for ( ; block; block = next) {
         next = block->next;
         bd = Bdescr((StgPtr)block);
-        ASSERT(RtsFlags.GcFlags.useNonmoving || ((bd->flags & BF_EVACUATED) == 0));
+        ASSERT(RtsFlags.GcFlags.concurrentNonmoving || ((bd->flags & BF_EVACUATED) == 0));
             // When using the non-moving collector we leave compact object
             // evacuated to the oldset gen as BF_EVACUATED to avoid evacuating
             // objects in the non-moving heap.
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index ea40563499..bf059eddfe 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -147,7 +147,7 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
 {
     ASSERT(gen_no < RtsFlags.GcFlags.generations);
 
-    if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)) {
+    if (RTS_UNLIKELY(RtsFlags.GcFlags.concurrentNonmoving)) {
         return alloc_for_copy_nonmoving(size, gen_no);
     }
 
@@ -437,7 +437,7 @@ evacuate_large(StgPtr p)
   new_gen = &generations[new_gen_no];
 
   __atomic_fetch_or(&bd->flags, BF_EVACUATED, __ATOMIC_ACQ_REL);
-  if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) {
+  if (RTS_UNLIKELY(RtsFlags.GcFlags.concurrentNonmoving && new_gen == oldest_gen)) {
       __atomic_fetch_or(&bd->flags, BF_NONMOVING, __ATOMIC_ACQ_REL);
 
       // See Note [Non-moving GC: Marking evacuated objects].
@@ -479,7 +479,7 @@ evacuate_large(StgPtr p)
 STATIC_INLINE void
 evacuate_static_object (StgClosure **link_field, StgClosure *q)
 {
-    if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)) {
+    if (RTS_UNLIKELY(RtsFlags.GcFlags.concurrentNonmoving)) {
         // See Note [Static objects under the nonmoving collector] in Storage.c.
         if (major_gc && !deadlock_detect_gc)
             markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
@@ -596,7 +596,7 @@ evacuate_compact (StgPtr p)
     // for that - the only code touching the generation of the block is
     // in the GC, and that should never see blocks other than the first)
     bd->flags |= BF_EVACUATED;
-    if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) {
+    if (RTS_UNLIKELY(RtsFlags.GcFlags.concurrentNonmoving && new_gen == oldest_gen)) {
       __atomic_fetch_or(&bd->flags, BF_NONMOVING, __ATOMIC_RELAXED);
 
       // See Note [Non-moving GC: Marking evacuated objects].
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 4ddbcfdafd..d38a454e12 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -335,7 +335,7 @@ GarbageCollect (uint32_t collect_gen,
   deadlock_detect_gc = deadlock_detect;
 
 #if defined(THREADED_RTS)
-  if (major_gc && RtsFlags.GcFlags.useNonmoving && concurrent_coll_running) {
+  if (major_gc && RtsFlags.GcFlags.concurrentNonmoving && concurrent_coll_running) {
       /* If there is already a concurrent major collection running then
        * there is no benefit to starting another.
        * TODO: Catch heap-size runaway.
@@ -349,7 +349,7 @@ GarbageCollect (uint32_t collect_gen,
   /* N.B. The nonmoving collector works a bit differently. See
    * Note [Static objects under the nonmoving collector].
    */
-  if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
+  if (major_gc && !RtsFlags.GcFlags.concurrentNonmoving) {
       prev_static_flag = static_flag;
       static_flag =
           static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
@@ -675,7 +675,7 @@ GarbageCollect (uint32_t collect_gen,
     gen = &generations[g];
 
     // for generations we collected...
-    if (g <= N && !(RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
+    if (g <= N && !(RtsFlags.GcFlags.concurrentNonmoving && gen == oldest_gen)) {
 
         /* free old memory and shift to-space into from-space for all
          * the collected generations (except the allocation area).  These
@@ -815,7 +815,7 @@ GarbageCollect (uint32_t collect_gen,
 
   // Flush the update remembered sets. See Note [Eager update remembered set
   // flushing] in NonMovingMark.c
-  if (RtsFlags.GcFlags.useNonmoving) {
+  if (USE_NONMOVING) {
       RELEASE_SM_LOCK;
       for (n = 0; n < n_capabilities; n++) {
           nonmovingAddUpdRemSetBlocks(&capabilities[n]->upd_rem_set.queue);
@@ -827,7 +827,7 @@ GarbageCollect (uint32_t collect_gen,
   // N.B. This can only happen after we've moved
   // oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
   ASSERT(oldest_gen->scavenged_large_objects == NULL);
-  if (RtsFlags.GcFlags.useNonmoving && major_gc) {
+  if (major_gc && (RtsFlags.GcFlags.useNonmovingPinned || RtsFlags.GcFlags.concurrentNonmoving)) {
       // All threads in non-moving heap should be found to be alive, because
       // threads in the non-moving generation's list should live in the
       // non-moving heap, and we consider non-moving objects alive during
@@ -853,6 +853,9 @@ GarbageCollect (uint32_t collect_gen,
       // In the non-threaded runtime this is the only time we push to the
       // upd_rem_set
       nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+#else
+      if (RtsFlags.GcFlags.concurrentNonmoving)
+          nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
 #endif
       nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
       ACQUIRE_SM_LOCK;
@@ -862,7 +865,7 @@ GarbageCollect (uint32_t collect_gen,
   // We can't resize here in the case of the concurrent collector since we
   // don't yet know how much live data we have. This will be instead done
   // once we finish marking.
-  if (major_gc && RtsFlags.GcFlags.generations > 1 && ! RtsFlags.GcFlags.useNonmoving)
+  if (major_gc && RtsFlags.GcFlags.generations > 1 && ! RtsFlags.GcFlags.concurrentNonmoving)
       resizeGenerations();
 
   // Free the mark stack.
@@ -888,7 +891,7 @@ GarbageCollect (uint32_t collect_gen,
 #if defined(DEBUG)
   // Mark the garbage collected CAFs as dead. Done in `nonmovingGcCafs()` when
   // non-moving GC is enabled.
-  if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
+  if (major_gc && !RtsFlags.GcFlags.concurrentNonmoving) {
       gcCAFs();
   }
 #endif
@@ -907,7 +910,7 @@ GarbageCollect (uint32_t collect_gen,
   // TODO: Similar to `nonmovingGcCafs` non-moving GC should have its own
   // collector for these objects, but that's currently not implemented, so we
   // simply don't unload object code when non-moving GC is enabled.
-  if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
+  if (major_gc && !RtsFlags.GcFlags.concurrentNonmoving) {
       checkUnload();
   }
 
@@ -930,7 +933,7 @@ GarbageCollect (uint32_t collect_gen,
   // closures, which will cause problems with THREADED where we don't
   // fill slop. If we are using the nonmoving collector then we can't claim to
   // be *after* the major GC; it's now running concurrently.
-  IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc && !RtsFlags.GcFlags.useNonmoving));
+  IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc && !RtsFlags.GcFlags.concurrentNonmoving));
 
   // If a heap census is due, we need to do it before
   // resurrectThreads(), for the same reason as checkSanity above:
@@ -1571,7 +1574,7 @@ prepare_collected_gen (generation *gen)
 
     g = gen->no;
 
-    if (RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no) {
+    if (RtsFlags.GcFlags.concurrentNonmoving && g == oldest_gen->no) {
         // Nonmoving heap's mutable list is always a root.
         for (i = 0; i < n_capabilities; i++) {
             stash_mut_list(capabilities[i], g);
@@ -1600,7 +1603,7 @@ prepare_collected_gen (generation *gen)
     // deprecate the existing blocks (except in the case of the nonmoving
     // collector since these will be preserved in nonmovingCollect for the
     // concurrent GC).
-    if (!(RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no)) {
+    if (!(RtsFlags.GcFlags.concurrentNonmoving && g == oldest_gen->no)) {
         gen->old_blocks   = gen->blocks;
         gen->n_old_blocks = gen->n_blocks;
         gen->blocks       = NULL;
@@ -1787,7 +1790,7 @@ collect_gct_blocks (void)
 static void
 collect_pinned_object_blocks (void)
 {
-    const bool use_nonmoving = RtsFlags.GcFlags.useNonmoving;
+    const bool use_nonmoving = RtsFlags.GcFlags.concurrentNonmoving;
     generation *const gen = (use_nonmoving && major_gc) ? oldest_gen : g0;
 
     for (uint32_t n = 0; n < n_capabilities; n++) {
@@ -1911,7 +1914,7 @@ resizeGenerations (void)
     // Auto-enable compaction when the residency reaches a
     // certain percentage of the maximum heap size (default: 30%).
     // Except when non-moving GC is enabled.
-    if (!RtsFlags.GcFlags.useNonmoving &&
+    if (!RtsFlags.GcFlags.concurrentNonmoving &&
         (RtsFlags.GcFlags.compact ||
          (max > 0 &&
           oldest_gen->n_blocks >
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index 42fa98fcbb..71a4aa50a0 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -733,7 +733,7 @@ static void free_nonmoving_allocator(struct NonmovingAllocator *alloc)
 
 void nonmovingInit(void)
 {
-    if (! RtsFlags.GcFlags.useNonmoving) return;
+    if (! USE_NONMOVING) return;
 #if defined(THREADED_RTS)
     initMutex(&nonmoving_collection_mutex);
     initCondition(&concurrent_coll_finished);
@@ -748,7 +748,7 @@ void nonmovingInit(void)
 // Stop any nonmoving collection in preparation for RTS shutdown.
 void nonmovingStop(void)
 {
-    if (! RtsFlags.GcFlags.useNonmoving) return;
+    if (! USE_NONMOVING) return;
 #if defined(THREADED_RTS)
     if (mark_thread) {
         debugTrace(DEBUG_nonmoving_gc,
@@ -761,7 +761,7 @@ void nonmovingStop(void)
 
 void nonmovingExit(void)
 {
-    if (! RtsFlags.GcFlags.useNonmoving) return;
+    if (! USE_NONMOVING) return;
 
     // First make sure collector is stopped before we tear things down.
     nonmovingStop();
@@ -819,11 +819,6 @@ void nonmovingClearBitmap(struct NonmovingSegment *seg)
 /* Prepare the heap bitmaps and snapshot metadata for a mark */
 static void nonmovingPrepareMark(void)
 {
-    // See Note [Static objects under the nonmoving collector].
-    prev_static_flag = static_flag;
-    static_flag =
-        static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
-
     // Should have been cleared by the last sweep
     ASSERT(nonmovingHeap.sweep_list == NULL);
 
@@ -852,6 +847,15 @@ static void nonmovingPrepareMark(void)
         bd->flags &= ~BF_MARKED;
     }
 
+    if (!RtsFlags.GcFlags.concurrentNonmoving)
+        return;
+
+    // See Note [Static objects under the nonmoving collector].
+    prev_static_flag = static_flag;
+    static_flag =
+        static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
+
+
     // Add newly promoted large objects and clear mark bits
     bdescr *next;
     ASSERT(oldest_gen->scavenged_large_objects == NULL);
@@ -933,6 +937,39 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
     }
 #endif
 
+    if (!RtsFlags.GcFlags.concurrentNonmoving) {
+        // In this case we are running in useNonmovingPinned mode. We merely
+        // mark the objects that were pushed to the update
+        // remembered set during the preparatory GC, sweep and
+        // return.
+        nonmovingPrepareMark();
+        for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+            struct NonmovingSegment *filled = nonmovingHeap.allocators[alloca_idx]->saved_filled;
+            struct NonmovingSegment *seg = filled;
+            if (filled) {
+                while (true) {
+                    nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
+                    if (seg->link)
+                        seg = seg->link;
+                    else
+                        break;
+                }
+                seg->link = nonmovingHeap.sweep_list;
+                nonmovingHeap.sweep_list = filled;
+            }
+        }
+
+        MarkQueue *mark_queue = stgMallocBytes(sizeof(MarkQueue), "mark queue");
+        initMarkQueue(mark_queue);
+        nonmovingMark(mark_queue);
+        freeMarkQueue(mark_queue);
+        stgFree(mark_queue);
+        oldest_gen->live_estimate += nonmoving_live_words;
+        oldest_gen->n_blocks += nonmoving_live_words / BLOCK_SIZE_W;
+        nonmovingSweep();
+        return;
+    }
+
     trace(TRACE_nonmoving_gc, "Starting nonmoving GC preparation");
     resizeGenerations();
 
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
index cd0aef1631..90b9599609 100644
--- a/rts/sm/NonMoving.h
+++ b/rts/sm/NonMoving.h
@@ -27,6 +27,8 @@
 // In blocks
 #define NONMOVING_SEGMENT_BLOCKS (NONMOVING_SEGMENT_SIZE / BLOCK_SIZE)
 
+#define USE_NONMOVING (RtsFlags.GcFlags.useNonmovingPinned || RtsFlags.GcFlags.concurrentNonmoving)
+
 _Static_assert(NONMOVING_SEGMENT_SIZE % BLOCK_SIZE == 0,
                "non-moving segment size must be multiple of block size");
 
diff --git a/rts/sm/NonMovingCensus.c b/rts/sm/NonMovingCensus.c
index 2dcec4b745..1ecb725ce9 100644
--- a/rts/sm/NonMovingCensus.c
+++ b/rts/sm/NonMovingCensus.c
@@ -90,7 +90,7 @@ nonmovingAllocatorCensus(struct NonmovingAllocator *alloc)
 
 void nonmovingPrintAllocatorCensus()
 {
-    if (!RtsFlags.GcFlags.useNonmoving)
+    if (! USE_NONMOVING)
         return;
 
     for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
@@ -116,7 +116,7 @@ void nonmovingPrintAllocatorCensus()
 void nonmovingTraceAllocatorCensus()
 {
 #if defined(TRACING)
-    if (!RtsFlags.GcFlags.useNonmoving && !TRACE_nonmoving_gc)
+    if (!USE_NONMOVING && !TRACE_nonmoving_gc)
         return;
 
     for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 193a1a884c..f3c198ae70 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -922,7 +922,7 @@ static void checkGeneration (generation *gen,
     if (!after_major_gc) return;
 #endif
 
-    if (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen) {
+    if (RtsFlags.GcFlags.concurrentNonmoving && gen == oldest_gen) {
         ASSERT(countNonMovingSegments(nonmovingHeap.free) == (W_) nonmovingHeap.n_free * NONMOVING_SEGMENT_BLOCKS);
         ASSERT(countBlocks(nonmoving_large_objects) == n_nonmoving_large_blocks);
         ASSERT(countBlocks(nonmoving_marked_large_objects) == n_nonmoving_marked_large_blocks);
@@ -1041,7 +1041,7 @@ findMemoryLeak (void)
         markBlocks(capabilities[i]->upd_rem_set.queue.blocks);
     }
 
-    if (RtsFlags.GcFlags.useNonmoving) {
+    if (RtsFlags.GcFlags.useNonmovingPinned || RtsFlags.GcFlags.concurrentNonmoving) {
         markBlocks(upd_rem_set_block_list);
         markBlocks(nonmoving_large_objects);
         markBlocks(nonmoving_marked_large_objects);
@@ -1120,7 +1120,7 @@ static W_
 genBlocks (generation *gen)
 {
     W_ ret = 0;
-    if (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen) {
+    if (USE_NONMOVING && gen == oldest_gen) {
         // See Note [Live data accounting in nonmoving collector].
         ASSERT(countNonMovingHeap(&nonmovingHeap) == gen->n_blocks);
         ret += countAllocdBlocks(nonmoving_large_objects);
@@ -1130,12 +1130,11 @@ genBlocks (generation *gen)
         ret += countNonMovingHeap(&nonmovingHeap);
         if (current_mark_queue)
             ret += countBlocks(current_mark_queue->blocks);
-    } else {
-        ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
-        ASSERT(countCompactBlocks(gen->compact_objects) == gen->n_compact_blocks);
-        ASSERT(countCompactBlocks(gen->compact_blocks_in_import) == gen->n_compact_blocks_in_import);
-        ret += gen->n_blocks;
     }
+    ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+    ASSERT(countCompactBlocks(gen->compact_objects) == gen->n_compact_blocks);
+    ASSERT(countCompactBlocks(gen->compact_blocks_in_import) == gen->n_compact_blocks_in_import);
+    ret += gen->n_blocks;
 
     ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 
@@ -1195,7 +1194,7 @@ memInventory (bool show)
   // Can't easily do a memory inventory: We might race with the nonmoving
   // collector. In principle we could try to take nonmoving_collection_mutex
   // and do an inventory if we have it but we don't currently implement this.
-  if (RtsFlags.GcFlags.useNonmoving)
+  if (RtsFlags.GcFlags.concurrentNonmoving)
     return;
 #endif
 
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index fe6dc4be54..7143a166c2 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -443,7 +443,7 @@ scavenge_block (bdescr *bd)
 
   // Sanity check: See Note [Deadlock detection under nonmoving collector].
 #if defined(DEBUG)
-  if (RtsFlags.GcFlags.useNonmoving && deadlock_detect_gc) {
+  if (RtsFlags.GcFlags.concurrentNonmoving && deadlock_detect_gc) {
       ASSERT(bd->gen == oldest_gen);
   }
 #endif
@@ -1675,7 +1675,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
                 ;
             }
 
-            if (RtsFlags.GcFlags.useNonmoving && major_gc && gen == oldest_gen) {
+            if (RtsFlags.GcFlags.concurrentNonmoving && major_gc && gen == oldest_gen) {
                 // We can't use scavenge_one here as we need to scavenge SRTs
                 nonmovingScavengeOne((StgClosure *)p);
             } else if (scavenge_one(p)) {
@@ -1698,7 +1698,7 @@ void
 scavenge_capability_mut_lists (Capability *cap)
 {
     // In a major GC only nonmoving heap's mut list is root
-    if (RtsFlags.GcFlags.useNonmoving && major_gc) {
+    if (RtsFlags.GcFlags.concurrentNonmoving && major_gc) {
         uint32_t g = oldest_gen->no;
         scavenge_mutable_list(cap->saved_mut_lists[g], oldest_gen);
         freeChain_sync(cap->saved_mut_lists[g]);
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 3aa6c560c7..44fb413abe 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -181,7 +181,7 @@ initStorage (void)
   initSpinLock(&gc_alloc_block_sync);
 #endif
 
-  if (RtsFlags.GcFlags.useNonmoving)
+  if (RtsFlags.GcFlags.concurrentNonmoving || RtsFlags.GcFlags.useNonmovingPinned)
       nonmovingAddCapabilities(n_capabilities);
 
   /* The oldest generation has one step. */
@@ -283,7 +283,7 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
     }
 
     // Initialize NonmovingAllocators and UpdRemSets
-    if (RtsFlags.GcFlags.useNonmoving) {
+    if (RtsFlags.GcFlags.concurrentNonmoving || RtsFlags.GcFlags.useNonmovingPinned) {
         nonmovingAddCapabilities(to);
         for (i = 0; i < to; ++i) {
             init_upd_rem_set(&capabilities[i]->upd_rem_set);
@@ -490,7 +490,7 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
     caf->saved_info = orig_info;
 
     // Allocate the blackhole indirection closure
-    if (RtsFlags.GcFlags.useNonmoving) {
+    if (RtsFlags.GcFlags.concurrentNonmoving) {
         // See Note [Static objects under the nonmoving collector].
         ACQUIRE_SM_LOCK;
         bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh));
@@ -542,7 +542,7 @@ newCAF(StgRegTable *reg, StgIndStatic *caf)
         // Put this CAF on the mutable list for the old generation.
         // N.B. the nonmoving collector works a bit differently: see
         // Note [Static objects under the nonmoving collector].
-        if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
+        if (oldest_gen->no != 0 && !RtsFlags.GcFlags.concurrentNonmoving) {
             recordMutableCap((StgClosure*)caf,
                              regTableToCapability(reg), oldest_gen->no);
         }
@@ -627,7 +627,7 @@ StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
     // Put this CAF on the mutable list for the old generation.
     // N.B. the nonmoving collector works a bit differently:
     // see Note [Static objects under the nonmoving collector].
-    if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
+    if (oldest_gen->no != 0 && !RtsFlags.GcFlags.concurrentNonmoving) {
         recordMutableCap((StgClosure*)caf,
                          regTableToCapability(reg), oldest_gen->no);
     }
@@ -1182,7 +1182,7 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig
     // pinned allocations are often long-lived..
     //
     // See Note [Allocating pinned objects into the non-moving heap].
-    if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)
+    if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmovingPinned)
         && (n + alignment_w) * sizeof(W_) < NONMOVING_MAX_BLOCK_SZ)
     {
         ACQUIRE_SM_LOCK;
@@ -1191,7 +1191,7 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig
         W_ off_w = ALIGN_WITH_OFF_W(p, alignment, align_off);
         memset(p, 0, off_w * sizeof(W_));
         p += off_w;
-        MEMSET_IF_PROFILING_W(p + n, 0, alignment_w - off_w - 1);
+        MEMSET_SLOP_W(p + n, 0, alignment_w - off_w - 1);
         return p;
     }
 
@@ -1616,7 +1616,7 @@ calcNeeded (bool force_major, memcount *blocks_needed)
                 //  mark stack:
                 needed += gen->n_blocks / 100;
             }
-            if (gen->compact || (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
+            if (gen->compact || (RtsFlags.GcFlags.concurrentNonmoving && gen == oldest_gen)) {
                 continue; // no additional space needed for compaction
             } else {
                 needed += gen->n_blocks;
author	Ben Gamari <ben@smart-cactus.org>	2021-01-20 11:50:50 -0500
committer	Ben Gamari <ben@smart-cactus.org>	2021-01-21 11:02:19 -0500
commit	94f5710518c82f729938c5f19360145c404d5767 (patch)
tree	55add58fe00503046fdbae53763295789250dc3f
parent	9eeeb85eb3c29519c3fdfdaeb5ed659b50c6bcea (diff)
download	haskell-94f5710518c82f729938c5f19360145c404d5767.tar.gz