1 files changed, 236 insertions, 134 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 76237f35c2..83e9c97bd9 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -51,6 +51,7 @@
 #include "CheckUnload.h"
 #include "CNF.h"
 #include "RtsFlags.h"
+#include "NonMoving.h"
 
 #include <string.h> // for memset()
 #include <unistd.h>
@@ -103,6 +104,7 @@
  */
 uint32_t N;
 bool major_gc;
+bool deadlock_detect_gc;
 
 /* Data used for allocation area sizing.
  */
@@ -159,7 +161,6 @@ static void mark_root               (void *user, StgClosure **root);
 static void prepare_collected_gen   (generation *gen);
 static void prepare_uncollected_gen (generation *gen);
 static void init_gc_thread          (gc_thread *t);
-static void resize_generations      (void);
 static void resize_nursery          (void);
 static void start_gc_threads        (void);
 static void scavenge_until_all_done (void);
@@ -193,7 +194,8 @@ StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 
 void
 GarbageCollect (uint32_t collect_gen,
-                bool do_heap_census,
+                const bool do_heap_census,
+                const bool deadlock_detect,
                 uint32_t gc_type USED_IF_THREADS,
                 Capability *cap,
                 bool idle_cap[])
@@ -263,7 +265,25 @@ GarbageCollect (uint32_t collect_gen,
   N = collect_gen;
   major_gc = (N == RtsFlags.GcFlags.generations-1);
 
-  if (major_gc) {
+  /* See Note [Deadlock detection under nonmoving collector]. */
+  deadlock_detect_gc = deadlock_detect;
+
+#if defined(THREADED_RTS)
+  if (major_gc && RtsFlags.GcFlags.useNonmoving && concurrent_coll_running) {
+      /* If there is already a concurrent major collection running then
+       * there is no benefit to starting another.
+       * TODO: Catch heap-size runaway.
+       */
+      N--;
+      collect_gen--;
+      major_gc = false;
+  }
+#endif
+
+  /* N.B. The nonmoving collector works a bit differently. See
+   * Note [Static objects under the nonmoving collector].
+   */
+  if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
       prev_static_flag = static_flag;
       static_flag =
           static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
@@ -572,7 +592,7 @@ GarbageCollect (uint32_t collect_gen,
     gen = &generations[g];
 
     // for generations we collected...
-    if (g <= N) {
+    if (g <= N && !(RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
 
         /* free old memory and shift to-space into from-space for all
          * the collected generations (except the allocation area).  These
@@ -710,8 +730,55 @@ GarbageCollect (uint32_t collect_gen,
     }
   } // for all generations
 
-  // update the max size of older generations after a major GC
-  resize_generations();
+  // Flush the update remembered set. See Note [Eager update remembered set
+  // flushing] in NonMovingMark.c
+  if (RtsFlags.GcFlags.useNonmoving) {
+      RELEASE_SM_LOCK;
+      nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+      ACQUIRE_SM_LOCK;
+  }
+
+  // Mark and sweep the oldest generation.
+  // N.B. This can only happen after we've moved
+  // oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
+  ASSERT(oldest_gen->scavenged_large_objects == NULL);
+  if (RtsFlags.GcFlags.useNonmoving && major_gc) {
+      // All threads in non-moving heap should be found to be alive, becuase
+      // threads in the non-moving generation's list should live in the
+      // non-moving heap, and we consider non-moving objects alive during
+      // preparation.
+      ASSERT(oldest_gen->old_threads == END_TSO_QUEUE);
+      // For weaks, remember that we evacuated all weaks to the non-moving heap
+      // in markWeakPtrList(), and then moved the weak_ptr_list list to
+      // old_weak_ptr_list. We then moved weaks with live keys to the
+      // weak_ptr_list again. Then, in collectDeadWeakPtrs() we moved weaks in
+      // old_weak_ptr_list to dead_weak_ptr_list. So at this point
+      // old_weak_ptr_list should be empty.
+      ASSERT(oldest_gen->old_weak_ptr_list == NULL);
+
+      // we may need to take the lock to allocate mark queue blocks
+      RELEASE_SM_LOCK;
+      // dead_weak_ptr_list contains weak pointers with dead keys. Those need to
+      // be kept alive because we'll use them in finalizeSchedulers(). Similarly
+      // resurrected_threads are also going to be used in resurrectedThreads()
+      // so we need to mark those too.
+      // Note that in sequential case these lists will be appended with more
+      // weaks and threads found to be dead in mark.
+#if !defined(THREADED_RTS)
+      // In the non-threaded runtime this is the only time we push to the
+      // upd_rem_set
+      nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+#endif
+      nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
+      ACQUIRE_SM_LOCK;
+  }
+
+  // Update the max size of older generations after a major GC:
+  // We can't resize here in the case of the concurrent collector since we
+  // don't yet know how much live data we have. This will be instead done
+  // once we finish marking.
+  if (major_gc && RtsFlags.GcFlags.generations > 1 && ! RtsFlags.GcFlags.useNonmoving)
+      resizeGenerations();
 
   // Free the mark stack.
   if (mark_stack_top_bd != NULL) {
@@ -735,7 +802,7 @@ GarbageCollect (uint32_t collect_gen,
 
  // mark the garbage collected CAFs as dead
 #if defined(DEBUG)
-  if (major_gc) { gcCAFs(); }
+  if (major_gc && !RtsFlags.GcFlags.useNonmoving) { gcCAFs(); }
 #endif
 
   // Update the stable name hash table
@@ -768,8 +835,9 @@ GarbageCollect (uint32_t collect_gen,
   // check sanity after GC
   // before resurrectThreads(), because that might overwrite some
   // closures, which will cause problems with THREADED where we don't
-  // fill slop.
-  IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc));
+  // fill slop. If we are using the nonmoving collector then we can't claim to
+  // be *after* the major GC; it's now running concurrently.
+  IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc && !RtsFlags.GcFlags.useNonmoving));
 
   // If a heap census is due, we need to do it before
   // resurrectThreads(), for the same reason as checkSanity above:
@@ -942,6 +1010,7 @@ new_gc_thread (uint32_t n, gc_thread *t)
         ws->todo_overflow = NULL;
         ws->n_todo_overflow = 0;
         ws->todo_large_objects = NULL;
+        ws->todo_seg = END_NONMOVING_TODO_LIST;
 
         ws->part_list = NULL;
         ws->n_part_blocks = 0;
@@ -1321,6 +1390,18 @@ releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
 #endif
 
 /* ----------------------------------------------------------------------------
+   Save the mutable lists in saved_mut_lists where it will be scavenged
+   during GC
+   ------------------------------------------------------------------------- */
+
+static void
+stash_mut_list (Capability *cap, uint32_t gen_no)
+{
+    cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
+    cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
+}
+
+/* ----------------------------------------------------------------------------
    Initialise a generation that is to be collected
    ------------------------------------------------------------------------- */
 
@@ -1331,11 +1412,17 @@ prepare_collected_gen (generation *gen)
     gen_workspace *ws;
     bdescr *bd, *next;
 
-    // Throw away the current mutable list.  Invariant: the mutable
-    // list always has at least one block; this means we can avoid a
-    // check for NULL in recordMutable().
     g = gen->no;
-    if (g != 0) {
+
+    if (RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no) {
+        // Nonmoving heap's mutable list is always a root.
+        for (i = 0; i < n_capabilities; i++) {
+            stash_mut_list(capabilities[i], g);
+        }
+    } else if (g != 0) {
+        // Otherwise throw away the current mutable list. Invariant: the
+        // mutable list always has at least one block; this means we can avoid
+        // a check for NULL in recordMutable().
         for (i = 0; i < n_capabilities; i++) {
             freeChain(capabilities[i]->mut_lists[g]);
             capabilities[i]->mut_lists[g] =
@@ -1351,13 +1438,17 @@ prepare_collected_gen (generation *gen)
     gen->old_threads = gen->threads;
     gen->threads = END_TSO_QUEUE;
 
-    // deprecate the existing blocks
-    gen->old_blocks   = gen->blocks;
-    gen->n_old_blocks = gen->n_blocks;
-    gen->blocks       = NULL;
-    gen->n_blocks     = 0;
-    gen->n_words      = 0;
-    gen->live_estimate = 0;
+    // deprecate the existing blocks (except in the case of the nonmoving
+    // collector since these will be preserved in nonmovingCollect for the
+    // concurrent GC).
+    if (!(RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no)) {
+        gen->old_blocks   = gen->blocks;
+        gen->n_old_blocks = gen->n_blocks;
+        gen->blocks       = NULL;
+        gen->n_blocks     = 0;
+        gen->n_words      = 0;
+        gen->live_estimate = 0;
+    }
 
     // initialise the large object queues.
     ASSERT(gen->scavenged_large_objects == NULL);
@@ -1451,18 +1542,6 @@ prepare_collected_gen (generation *gen)
     }
 }
 
-
-/* ----------------------------------------------------------------------------
-   Save the mutable lists in saved_mut_lists
-   ------------------------------------------------------------------------- */
-
-static void
-stash_mut_list (Capability *cap, uint32_t gen_no)
-{
-    cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
-    cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
-}
-
 /* ----------------------------------------------------------------------------
    Initialise a generation that is *not* to be collected
    ------------------------------------------------------------------------- */
@@ -1531,31 +1610,57 @@ collect_gct_blocks (void)
 }
 
 /* -----------------------------------------------------------------------------
-   During mutation, any blocks that are filled by allocatePinned() are
-   stashed on the local pinned_object_blocks list, to avoid needing to
-   take a global lock.  Here we collect those blocks from the
-   cap->pinned_object_blocks lists and put them on the
-   main g0->large_object list.
+   During mutation, any blocks that are filled by allocatePinned() are stashed
+   on the local pinned_object_blocks list, to avoid needing to take a global
+   lock.  Here we collect those blocks from the cap->pinned_object_blocks lists
+   and put them on the g0->large_object or oldest_gen->large_objects.
+
+   How to decide which list to put them on?
+
+   - When non-moving heap is enabled and this is a major GC, we put them on
+     oldest_gen. This is because after preparation we really want no
+     old-to-young references, and we want to be able to reset mut_lists. For
+     this we need to promote every potentially live object to the oldest gen.
+
+   - Otherwise we put them on g0.
    -------------------------------------------------------------------------- */
 
 static void
 collect_pinned_object_blocks (void)
 {
-    uint32_t n;
-    bdescr *bd, *prev;
+    generation *gen;
+    const bool use_nonmoving = RtsFlags.GcFlags.useNonmoving;
+    if (use_nonmoving && major_gc) {
+        gen = oldest_gen;
+    } else {
+        gen = g0;
+    }
 
-    for (n = 0; n < n_capabilities; n++) {
-        prev = NULL;
-        for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
-            prev = bd;
+    for (uint32_t n = 0; n < n_capabilities; n++) {
+        bdescr *last = NULL;
+        if (use_nonmoving && gen == oldest_gen) {
+            // Mark objects as belonging to the nonmoving heap
+            for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
+                bd->flags |= BF_NONMOVING;
+                bd->gen = oldest_gen;
+                bd->gen_no = oldest_gen->no;
+                oldest_gen->n_large_words += bd->free - bd->start;
+                oldest_gen->n_large_blocks += bd->blocks;
+                last = bd;
+            }
+        } else {
+            for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
+                last = bd;
+            }
         }
-        if (prev != NULL) {
-            prev->link = g0->large_objects;
-            if (g0->large_objects != NULL) {
-                g0->large_objects->u.back = prev;
+
+        if (last != NULL) {
+            last->link = gen->large_objects;
+            if (gen->large_objects != NULL) {
+                gen->large_objects->u.back = last;
             }
-            g0->large_objects = capabilities[n]->pinned_object_blocks;
-            capabilities[n]->pinned_object_blocks = 0;
+            gen->large_objects = capabilities[n]->pinned_object_blocks;
+            capabilities[n]->pinned_object_blocks = NULL;
         }
     }
 }
@@ -1614,98 +1719,100 @@ mark_root(void *user USED_IF_THREADS, StgClosure **root)
    percentage of the maximum heap size available to allocate into.
    ------------------------------------------------------------------------- */
 
-static void
-resize_generations (void)
+void
+resizeGenerations (void)
 {
     uint32_t g;
+    W_ live, size, min_alloc, words;
+    const W_ max  = RtsFlags.GcFlags.maxHeapSize;
+    const W_ gens = RtsFlags.GcFlags.generations;
 
-    if (major_gc && RtsFlags.GcFlags.generations > 1) {
-        W_ live, size, min_alloc, words;
-        const W_ max  = RtsFlags.GcFlags.maxHeapSize;
-        const W_ gens = RtsFlags.GcFlags.generations;
-
-        // live in the oldest generations
-        if (oldest_gen->live_estimate != 0) {
-            words = oldest_gen->live_estimate;
-        } else {
-            words = oldest_gen->n_words;
-        }
-        live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
-            oldest_gen->n_large_blocks +
-            oldest_gen->n_compact_blocks;
+    // live in the oldest generations
+    if (oldest_gen->live_estimate != 0) {
+        words = oldest_gen->live_estimate;
+    } else {
+        words = oldest_gen->n_words;
+    }
+    live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
+        oldest_gen->n_large_blocks +
+        oldest_gen->n_compact_blocks;
 
-        // default max size for all generations except zero
-        size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
-                       RtsFlags.GcFlags.minOldGenSize);
+    // default max size for all generations except zero
+    size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
+                   RtsFlags.GcFlags.minOldGenSize);
 
-        if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
-            if (max > 0) {
-                RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
-            } else {
-                RtsFlags.GcFlags.heapSizeSuggestion = size;
-            }
+    if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
+        if (max > 0) {
+            RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
+        } else {
+            RtsFlags.GcFlags.heapSizeSuggestion = size;
         }
+    }
 
-        // minimum size for generation zero
-        min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
-                            RtsFlags.GcFlags.minAllocAreaSize
-                            * (W_)n_capabilities);
-
-        // Auto-enable compaction when the residency reaches a
-        // certain percentage of the maximum heap size (default: 30%).
-        if (RtsFlags.GcFlags.compact ||
-            (max > 0 &&
-             oldest_gen->n_blocks >
-             (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
-            oldest_gen->mark = 1;
-            oldest_gen->compact = 1;
+    // minimum size for generation zero
+    min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
+                        RtsFlags.GcFlags.minAllocAreaSize
+                        * (W_)n_capabilities);
+
+    // Auto-enable compaction when the residency reaches a
+    // certain percentage of the maximum heap size (default: 30%).
+    // Except when non-moving GC is enabled.
+    if (!RtsFlags.GcFlags.useNonmoving &&
+        (RtsFlags.GcFlags.compact ||
+         (max > 0 &&
+          oldest_gen->n_blocks >
+          (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
+        oldest_gen->mark = 1;
+        oldest_gen->compact = 1;
 //        debugBelch("compaction: on\n", live);
-        } else {
-            oldest_gen->mark = 0;
-            oldest_gen->compact = 0;
+    } else {
+        oldest_gen->mark = 0;
+        oldest_gen->compact = 0;
 //        debugBelch("compaction: off\n", live);
-        }
+    }
 
-        if (RtsFlags.GcFlags.sweep) {
-            oldest_gen->mark = 1;
-        }
+    if (RtsFlags.GcFlags.sweep) {
+        oldest_gen->mark = 1;
+    }
 
-        // if we're going to go over the maximum heap size, reduce the
-        // size of the generations accordingly.  The calculation is
-        // different if compaction is turned on, because we don't need
-        // to double the space required to collect the old generation.
-        if (max != 0) {
+    // if we're going to go over the maximum heap size, reduce the
+    // size of the generations accordingly.  The calculation is
+    // different if compaction is turned on, because we don't need
+    // to double the space required to collect the old generation.
+    if (max != 0) {
+
+        // this test is necessary to ensure that the calculations
+        // below don't have any negative results - we're working
+        // with unsigned values here.
+        if (max < min_alloc) {
+            heapOverflow();
+        }
 
-            // this test is necessary to ensure that the calculations
-            // below don't have any negative results - we're working
-            // with unsigned values here.
-            if (max < min_alloc) {
-                heapOverflow();
+        if (oldest_gen->compact) {
+            if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
+                size = (max - min_alloc) / ((gens - 1) * 2 - 1);
             }
-
-            if (oldest_gen->compact) {
-                if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
-                    size = (max - min_alloc) / ((gens - 1) * 2 - 1);
-                }
-            } else {
-                if ( (size * (gens - 1) * 2) + min_alloc > max ) {
-                    size = (max - min_alloc) / ((gens - 1) * 2);
-                }
+        } else {
+            if ( (size * (gens - 1) * 2) + min_alloc > max ) {
+                size = (max - min_alloc) / ((gens - 1) * 2);
             }
+        }
 
-            if (size < live) {
-                heapOverflow();
-            }
+        if (size < live) {
+            heapOverflow();
         }
+    }
 
 #if 0
-        debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
-                   min_alloc, size, max);
+    debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
+               min_alloc, size, max);
+    debugBelch("resize_gen: n_blocks: %lu, n_large_block: %lu, n_compact_blocks: %lu\n",
+               oldest_gen->n_blocks, oldest_gen->n_large_blocks, oldest_gen->n_compact_blocks);
+    debugBelch("resize_gen: max_blocks: %lu -> %lu\n", oldest_gen->max_blocks, oldest_gen->n_blocks);
 #endif
 
-        for (g = 0; g < gens; g++) {
-            generations[g].max_blocks = size;
-        }
+    for (g = 0; g < gens; g++) {
+        generations[g].max_blocks = size;
     }
 }
 
@@ -1841,21 +1948,16 @@ resize_nursery (void)
 
 #if defined(DEBUG)
 
-static void gcCAFs(void)
+void gcCAFs(void)
 {
-    StgIndStatic *p, *prev;
+    uint32_t i = 0;
+    StgIndStatic *prev = NULL;
 
-    const StgInfoTable *info;
-    uint32_t i;
-
-    i = 0;
-    p = debug_caf_list;
-    prev = NULL;
-
-    for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
-         p = (StgIndStatic*)p->saved_info) {
-
-        info = get_itbl((StgClosure*)p);
+    for (StgIndStatic *p = debug_caf_list;
+         p != (StgIndStatic*) END_OF_CAF_LIST;
+         p = (StgIndStatic*) p->saved_info)
+    {
+        const StgInfoTable *info = get_itbl((StgClosure*)p);
         ASSERT(info->type == IND_STATIC);
 
         // See Note [STATIC_LINK fields] in Storage.h