diff options
author | Ömer Sinan Ağacan <omer@well-typed.com> | 2019-02-05 00:18:44 -0500 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2019-10-20 21:15:37 -0400 |
commit | 68e0647f432f9d79ae13a23f614ef293bfd297a9 (patch) | |
tree | 89211783fbd4d8d9502e2777b2719da493fef851 | |
parent | b3ef2d1a861e9b892d64f22f6a233ea331db86d1 (diff) | |
download | haskell-68e0647f432f9d79ae13a23f614ef293bfd297a9.tar.gz |
rts: Non-concurrent mark and sweep
This implements the core heap structure and a serial mark/sweep
collector which can be used to manage the oldest-generation heap.
This is the first step towards a concurrent mark-and-sweep collector
aimed at low-latency applications.
The full design of the collector implemented here is described in detail
in a technical note
B. Gamari. "A Concurrent Garbage Collector For the Glasgow Haskell
Compiler" (2018)
The basic heap structure used in this design is heavily inspired by
K. Ueno & A. Ohori. "A fully concurrent garbage collector for
functional programs on multicore processors." /ACM SIGPLAN Notices/
Vol. 51. No. 9 (presented by ICFP 2016)
This design is intended to allow both marking and sweeping
concurrent to execution of a multi-core mutator. Unlike the Ueno design,
which requires no global synchronization pauses, the collector
introduced here requires a stop-the-world pause at the beginning and end
of the mark phase.
To avoid heap fragmentation, the allocator consists of a number of
fixed-size /sub-allocators/. Each of these sub-allocators allocators into
its own set of /segments/, themselves allocated from the block
allocator. Each segment is broken into a set of fixed-size allocation
blocks (which back allocations) in addition to a bitmap (used to track
the liveness of blocks) and some additional metadata (used also used
to track liveness).
This heap structure enables collection via mark-and-sweep, which can be
performed concurrently via a snapshot-at-the-beginning scheme (although
concurrent collection is not implemented in this patch).
The mark queue is a fairly straightforward chunked-array structure.
The representation is a bit more verbose than a typical mark queue to
accomodate a combination of two features:
* a mark FIFO, which improves the locality of marking, reducing one of
the major overheads seen in mark/sweep allocators (see [1] for
details)
* the selector optimization and indirection shortcutting, which
requires that we track where we found each reference to an object
in case we need to update the reference at a later point (e.g. when
we find that it is an indirection). See Note [Origin references in
the nonmoving collector] (in `NonMovingMark.h`) for details.
Beyond this the mark/sweep is fairly run-of-the-mill.
[1] R. Garner, S.M. Blackburn, D. Frampton. "Effective Prefetch for
Mark-Sweep Garbage Collection." ISMM 2007.
Co-Authored-By: Ben Gamari <ben@well-typed.com>
-rw-r--r-- | includes/rts/storage/Block.h | 11 | ||||
-rw-r--r-- | rts/Capability.c | 1 | ||||
-rw-r--r-- | rts/Capability.h | 1 | ||||
-rw-r--r-- | rts/RtsStartup.c | 3 | ||||
-rw-r--r-- | rts/Weak.c | 18 | ||||
-rw-r--r-- | rts/rts.cabal.in | 4 | ||||
-rw-r--r-- | rts/sm/Evac.c | 40 | ||||
-rw-r--r-- | rts/sm/GC.c | 312 | ||||
-rw-r--r-- | rts/sm/GC.h | 2 | ||||
-rw-r--r-- | rts/sm/GCAux.c | 8 | ||||
-rw-r--r-- | rts/sm/GCThread.h | 4 | ||||
-rw-r--r-- | rts/sm/NonMoving.c | 865 | ||||
-rw-r--r-- | rts/sm/NonMoving.h | 285 | ||||
-rw-r--r-- | rts/sm/NonMovingMark.c | 1217 | ||||
-rw-r--r-- | rts/sm/NonMovingMark.h | 140 | ||||
-rw-r--r-- | rts/sm/NonMovingScav.c | 366 | ||||
-rw-r--r-- | rts/sm/NonMovingScav.h | 10 | ||||
-rw-r--r-- | rts/sm/NonMovingSweep.c | 273 | ||||
-rw-r--r-- | rts/sm/NonMovingSweep.h | 32 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 147 | ||||
-rw-r--r-- | rts/sm/Sanity.h | 1 | ||||
-rw-r--r-- | rts/sm/Scav.c | 28 | ||||
-rw-r--r-- | rts/sm/Storage.c | 30 | ||||
-rw-r--r-- | rts/sm/Storage.h | 1 |
24 files changed, 3644 insertions, 155 deletions
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h index 792a72d717..32cf98958f 100644 --- a/includes/rts/storage/Block.h +++ b/includes/rts/storage/Block.h @@ -97,6 +97,8 @@ typedef struct bdescr_ { // block allocator. In particular, the // value (StgPtr)(-1) is used to // indicate that a block is unallocated. + // + // Unused by the non-moving allocator. struct bdescr_ *link; // used for chaining blocks together @@ -141,7 +143,8 @@ typedef struct bdescr_ { #define BF_LARGE 2 /* Block is pinned */ #define BF_PINNED 4 -/* Block is to be marked, not copied */ +/* Block is to be marked, not copied. Also used for marked large objects in + * non-moving heap. */ #define BF_MARKED 8 /* Block is executable */ #define BF_EXEC 32 @@ -153,6 +156,12 @@ typedef struct bdescr_ { #define BF_SWEPT 256 /* Block is part of a Compact */ #define BF_COMPACT 512 +/* A non-moving allocator segment (see NonMoving.c) */ +#define BF_NONMOVING 1024 +/* A large object which has been moved to off of oldest_gen->large_objects and + * onto nonmoving_large_objects. The mark phase ignores objects which aren't + * so-flagged */ +#define BF_NONMOVING_SWEEPING 2048 /* Maximum flag value (do not define anything higher than this!) */ #define BF_FLAG_MAX (1 << 15) diff --git a/rts/Capability.c b/rts/Capability.c index 8b552e0b09..23e581359e 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -27,6 +27,7 @@ #include "STM.h" #include "RtsUtils.h" #include "sm/OSMem.h" +#include "sm/BlockAlloc.h" // for countBlocks() #if !defined(mingw32_HOST_OS) #include "rts/IOManager.h" // for setIOManagerControlFd() diff --git a/rts/Capability.h b/rts/Capability.h index 2a5f127793..0c41456261 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -23,6 +23,7 @@ #include "sm/GC.h" // for evac_fn #include "Task.h" #include "Sparks.h" +#include "sm/NonMovingMark.h" // for MarkQueue #include "BeginPrivate.h" diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index a202d53960..ce0fa2d519 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -436,6 +436,9 @@ hs_exit_(bool wait_foreign) /* shutdown the hpc support (if needed) */ exitHpc(); + /* wait for any on-going concurrent GC to finish */ + nonmovingExit(); + // clean up things from the storage manager's point of view. // also outputs the stats (+RTS -s) info. exitStorage(); diff --git a/rts/Weak.c b/rts/Weak.c index ec998c214f..fe4516794a 100644 --- a/rts/Weak.c +++ b/rts/Weak.c @@ -93,9 +93,19 @@ scheduleFinalizers(Capability *cap, StgWeak *list) StgWord size; uint32_t n, i; - ASSERT(n_finalizers == 0); - - finalizer_list = list; + // This assertion does not hold with non-moving collection because + // non-moving collector does not wait for the list to be consumed (by + // doIdleGcWork()) before appending the list with more finalizers. + ASSERT(RtsFlags.GcFlags.useNonmoving || n_finalizers == 0); + + // Append finalizer_list with the new list. TODO: Perhaps cache tail of the + // list for faster append. NOTE: We can't append `list` here! Otherwise we + // end up traversing already visited weaks in the loops below. + StgWeak **tl = &finalizer_list; + while (*tl) { + tl = &(*tl)->link; + } + *tl = list; // Traverse the list and // * count the number of Haskell finalizers @@ -130,7 +140,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list) SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs); } - n_finalizers = i; + n_finalizers += i; // No Haskell finalizers to run? if (n == 0) return; diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 99f1e7296d..7aad5e4385 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -465,6 +465,10 @@ library sm/GCUtils.c sm/MBlock.c sm/MarkWeak.c + sm/NonMoving.c + sm/NonMovingMark.c + sm/NonMovingScav.c + sm/NonMovingSweep.c sm/Sanity.c sm/Scav.c sm/Scav_thr.c diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 666daf0e32..c2aaaacffa 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -27,6 +27,7 @@ #include "LdvProfile.h" #include "CNF.h" #include "Scav.h" +#include "NonMoving.h" #if defined(THREADED_RTS) && !defined(PARALLEL_GC) #define evacuate(p) evacuate1(p) @@ -62,9 +63,18 @@ STATIC_INLINE void evacuate_large(StgPtr p); Allocate some space in which to copy an object. -------------------------------------------------------------------------- */ +/* size is in words */ STATIC_INLINE StgPtr alloc_for_copy (uint32_t size, uint32_t gen_no) { + ASSERT(gen_no < RtsFlags.GcFlags.generations); + + if (RtsFlags.GcFlags.useNonmoving && major_gc) { + // unconditionally promote to non-moving heap in major gc + gct->copied += size; + return nonmovingAllocate(gct->cap, size); + } + StgPtr to; gen_workspace *ws; @@ -81,6 +91,11 @@ alloc_for_copy (uint32_t size, uint32_t gen_no) } } + if (RtsFlags.GcFlags.useNonmoving && gen_no == oldest_gen->no) { + gct->copied += size; + return nonmovingAllocate(gct->cap, size); + } + ws = &gct->gens[gen_no]; // zero memory references here /* chain a new block onto the to-space for the destination gen if @@ -100,6 +115,7 @@ alloc_for_copy (uint32_t size, uint32_t gen_no) The evacuate() code -------------------------------------------------------------------------- */ +/* size is in words */ STATIC_INLINE GNUC_ATTR_HOT void copy_tag(StgClosure **p, const StgInfoTable *info, StgClosure *src, uint32_t size, uint32_t gen_no, StgWord tag) @@ -296,7 +312,9 @@ evacuate_large(StgPtr p) */ new_gen_no = bd->dest_no; - if (new_gen_no < gct->evac_gen_no) { + if (RtsFlags.GcFlags.useNonmoving && major_gc) { + new_gen_no = oldest_gen->no; + } else if (new_gen_no < gct->evac_gen_no) { if (gct->eager_promotion) { new_gen_no = gct->evac_gen_no; } else { @@ -308,6 +326,9 @@ evacuate_large(StgPtr p) new_gen = &generations[new_gen_no]; bd->flags |= BF_EVACUATED; + if (RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen) { + bd->flags |= BF_NONMOVING; + } initBdescr(bd, new_gen, new_gen->to); // If this is a block of pinned or compact objects, we don't have to scan @@ -575,7 +596,16 @@ loop: bd = Bdescr((P_)q); - if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT)) != 0) { + if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT | BF_NONMOVING)) != 0) { + // Pointer to non-moving heap. Non-moving heap is collected using + // mark-sweep so this object should be marked and then retained in sweep. + if (bd->flags & BF_NONMOVING) { + // NOTE: large objects in nonmoving heap are also marked with + // BF_NONMOVING. Those are moved to scavenged_large_objects list in + // mark phase. + return; + } + // pointer into to-space: just return it. It might be a pointer // into a generation that we aren't collecting (> N), or it // might just be a pointer into to-space. The latter doesn't @@ -906,6 +936,10 @@ evacuate_BLACKHOLE(StgClosure **p) // blackholes can't be in a compact ASSERT((bd->flags & BF_COMPACT) == 0); + if (bd->flags & BF_NONMOVING) { + return; + } + // blackholes *can* be in a large object: when raiseAsync() creates an // AP_STACK the payload might be large enough to create a large object. // See #14497. @@ -1056,7 +1090,7 @@ selector_chain: // save any space in any case, and updating with an indirection is // trickier in a non-collected gen: we would have to update the // mutable list. - if (bd->flags & BF_EVACUATED) { + if ((bd->flags & BF_EVACUATED) || (bd->flags & BF_NONMOVING)) { unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); *q = (StgClosure *)p; // shortcut, behave as for: if (evac) evacuate(q); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 3f301aeb53..a3d0e4a164 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -51,6 +51,7 @@ #include "CheckUnload.h" #include "CNF.h" #include "RtsFlags.h" +#include "NonMoving.h" #include <string.h> // for memset() #include <unistd.h> @@ -159,7 +160,6 @@ static void mark_root (void *user, StgClosure **root); static void prepare_collected_gen (generation *gen); static void prepare_uncollected_gen (generation *gen); static void init_gc_thread (gc_thread *t); -static void resize_generations (void); static void resize_nursery (void); static void start_gc_threads (void); static void scavenge_until_all_done (void); @@ -572,7 +572,7 @@ GarbageCollect (uint32_t collect_gen, gen = &generations[g]; // for generations we collected... - if (g <= N) { + if (g <= N && !(RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) { /* free old memory and shift to-space into from-space for all * the collected generations (except the allocation area). These @@ -710,8 +710,42 @@ GarbageCollect (uint32_t collect_gen, } } // for all generations - // update the max size of older generations after a major GC - resize_generations(); + // Mark and sweep the oldest generation. + // N.B. This can only happen after we've moved + // oldest_gen->scavenged_large_objects back to oldest_gen->large_objects. + ASSERT(oldest_gen->scavenged_large_objects == NULL); + if (RtsFlags.GcFlags.useNonmoving && major_gc) { + // All threads in non-moving heap should be found to be alive, becuase + // threads in the non-moving generation's list should live in the + // non-moving heap, and we consider non-moving objects alive during + // preparation. + ASSERT(oldest_gen->old_threads == END_TSO_QUEUE); + // For weaks, remember that we evacuated all weaks to the non-moving heap + // in markWeakPtrList(), and then moved the weak_ptr_list list to + // old_weak_ptr_list. We then moved weaks with live keys to the + // weak_ptr_list again. Then, in collectDeadWeakPtrs() we moved weaks in + // old_weak_ptr_list to dead_weak_ptr_list. So at this point + // old_weak_ptr_list should be empty. + ASSERT(oldest_gen->old_weak_ptr_list == NULL); + + // we may need to take the lock to allocate mark queue blocks + RELEASE_SM_LOCK; + // dead_weak_ptr_list contains weak pointers with dead keys. Those need to + // be kept alive because we'll use them in finalizeSchedulers(). Similarly + // resurrected_threads are also going to be used in resurrectedThreads() + // so we need to mark those too. + // Note that in sequential case these lists will be appended with more + // weaks and threads found to be dead in mark. + nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads); + ACQUIRE_SM_LOCK; + } + + // Update the max size of older generations after a major GC: + // We can't resize here in the case of the concurrent collector since we + // don't yet know how much live data we have. This will be instead done + // once we finish marking. + if (major_gc && RtsFlags.GcFlags.generations > 1 && ! RtsFlags.GcFlags.useNonmoving) + resizeGenerations(); // Free the mark stack. if (mark_stack_top_bd != NULL) { @@ -735,7 +769,7 @@ GarbageCollect (uint32_t collect_gen, // mark the garbage collected CAFs as dead #if defined(DEBUG) - if (major_gc) { gcCAFs(); } + if (major_gc && !RtsFlags.GcFlags.useNonmoving) { gcCAFs(); } #endif // Update the stable name hash table @@ -768,8 +802,9 @@ GarbageCollect (uint32_t collect_gen, // check sanity after GC // before resurrectThreads(), because that might overwrite some // closures, which will cause problems with THREADED where we don't - // fill slop. - IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc)); + // fill slop. If we are using the nonmoving collector then we can't claim to + // be *after* the major GC; it's now running concurrently. + IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc && !RtsFlags.GcFlags.useNonmoving)); // If a heap census is due, we need to do it before // resurrectThreads(), for the same reason as checkSanity above: @@ -942,6 +977,7 @@ new_gc_thread (uint32_t n, gc_thread *t) ws->todo_overflow = NULL; ws->n_todo_overflow = 0; ws->todo_large_objects = NULL; + ws->todo_seg = END_NONMOVING_TODO_LIST; ws->part_list = NULL; ws->n_part_blocks = 0; @@ -1321,6 +1357,18 @@ releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) #endif /* ---------------------------------------------------------------------------- + Save the mutable lists in saved_mut_lists where it will be scavenged + during GC + ------------------------------------------------------------------------- */ + +static void +stash_mut_list (Capability *cap, uint32_t gen_no) +{ + cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no]; + cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node); +} + +/* ---------------------------------------------------------------------------- Initialise a generation that is to be collected ------------------------------------------------------------------------- */ @@ -1331,11 +1379,17 @@ prepare_collected_gen (generation *gen) gen_workspace *ws; bdescr *bd, *next; - // Throw away the current mutable list. Invariant: the mutable - // list always has at least one block; this means we can avoid a - // check for NULL in recordMutable(). g = gen->no; - if (g != 0) { + + if (RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no) { + // Nonmoving heap's mutable list is always a root. + for (i = 0; i < n_capabilities; i++) { + stash_mut_list(capabilities[i], g); + } + } else if (g != 0) { + // Otherwise throw away the current mutable list. Invariant: the + // mutable list always has at least one block; this means we can avoid + // a check for NULL in recordMutable(). for (i = 0; i < n_capabilities; i++) { freeChain(capabilities[i]->mut_lists[g]); capabilities[i]->mut_lists[g] = @@ -1351,13 +1405,17 @@ prepare_collected_gen (generation *gen) gen->old_threads = gen->threads; gen->threads = END_TSO_QUEUE; - // deprecate the existing blocks - gen->old_blocks = gen->blocks; - gen->n_old_blocks = gen->n_blocks; - gen->blocks = NULL; - gen->n_blocks = 0; - gen->n_words = 0; - gen->live_estimate = 0; + // deprecate the existing blocks (except in the case of the nonmoving + // collector since these will be preserved in nonmovingCollect for the + // concurrent GC). + if (!(RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no)) { + gen->old_blocks = gen->blocks; + gen->n_old_blocks = gen->n_blocks; + gen->blocks = NULL; + gen->n_blocks = 0; + gen->n_words = 0; + gen->live_estimate = 0; + } // initialise the large object queues. ASSERT(gen->scavenged_large_objects == NULL); @@ -1451,18 +1509,6 @@ prepare_collected_gen (generation *gen) } } - -/* ---------------------------------------------------------------------------- - Save the mutable lists in saved_mut_lists - ------------------------------------------------------------------------- */ - -static void -stash_mut_list (Capability *cap, uint32_t gen_no) -{ - cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no]; - cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node); -} - /* ---------------------------------------------------------------------------- Initialise a generation that is *not* to be collected ------------------------------------------------------------------------- */ @@ -1531,31 +1577,57 @@ collect_gct_blocks (void) } /* ----------------------------------------------------------------------------- - During mutation, any blocks that are filled by allocatePinned() are - stashed on the local pinned_object_blocks list, to avoid needing to - take a global lock. Here we collect those blocks from the - cap->pinned_object_blocks lists and put them on the - main g0->large_object list. + During mutation, any blocks that are filled by allocatePinned() are stashed + on the local pinned_object_blocks list, to avoid needing to take a global + lock. Here we collect those blocks from the cap->pinned_object_blocks lists + and put them on the g0->large_object or oldest_gen->large_objects. + + How to decide which list to put them on? + + - When non-moving heap is enabled and this is a major GC, we put them on + oldest_gen. This is because after preparation we really want no + old-to-young references, and we want to be able to reset mut_lists. For + this we need to promote every potentially live object to the oldest gen. + + - Otherwise we put them on g0. -------------------------------------------------------------------------- */ static void collect_pinned_object_blocks (void) { - uint32_t n; - bdescr *bd, *prev; + generation *gen; + const bool use_nonmoving = RtsFlags.GcFlags.useNonmoving; + if (use_nonmoving && major_gc) { + gen = oldest_gen; + } else { + gen = g0; + } - for (n = 0; n < n_capabilities; n++) { - prev = NULL; - for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) { - prev = bd; + for (uint32_t n = 0; n < n_capabilities; n++) { + bdescr *last = NULL; + if (use_nonmoving && gen == oldest_gen) { + // Mark objects as belonging to the nonmoving heap + for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) { + bd->flags |= BF_NONMOVING; + bd->gen = oldest_gen; + bd->gen_no = oldest_gen->no; + oldest_gen->n_large_words += bd->free - bd->start; + oldest_gen->n_large_blocks += bd->blocks; + last = bd; + } + } else { + for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) { + last = bd; + } } - if (prev != NULL) { - prev->link = g0->large_objects; - if (g0->large_objects != NULL) { - g0->large_objects->u.back = prev; + + if (last != NULL) { + last->link = gen->large_objects; + if (gen->large_objects != NULL) { + gen->large_objects->u.back = last; } - g0->large_objects = capabilities[n]->pinned_object_blocks; - capabilities[n]->pinned_object_blocks = 0; + gen->large_objects = capabilities[n]->pinned_object_blocks; + capabilities[n]->pinned_object_blocks = NULL; } } } @@ -1614,98 +1686,100 @@ mark_root(void *user USED_IF_THREADS, StgClosure **root) percentage of the maximum heap size available to allocate into. ------------------------------------------------------------------------- */ -static void -resize_generations (void) +void +resizeGenerations (void) { uint32_t g; + W_ live, size, min_alloc, words; + const W_ max = RtsFlags.GcFlags.maxHeapSize; + const W_ gens = RtsFlags.GcFlags.generations; - if (major_gc && RtsFlags.GcFlags.generations > 1) { - W_ live, size, min_alloc, words; - const W_ max = RtsFlags.GcFlags.maxHeapSize; - const W_ gens = RtsFlags.GcFlags.generations; - - // live in the oldest generations - if (oldest_gen->live_estimate != 0) { - words = oldest_gen->live_estimate; - } else { - words = oldest_gen->n_words; - } - live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W + - oldest_gen->n_large_blocks + - oldest_gen->n_compact_blocks; + // live in the oldest generations + if (oldest_gen->live_estimate != 0) { + words = oldest_gen->live_estimate; + } else { + words = oldest_gen->n_words; + } + live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W + + oldest_gen->n_large_blocks + + oldest_gen->n_compact_blocks; - // default max size for all generations except zero - size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, - RtsFlags.GcFlags.minOldGenSize); + // default max size for all generations except zero + size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, + RtsFlags.GcFlags.minOldGenSize); - if (RtsFlags.GcFlags.heapSizeSuggestionAuto) { - if (max > 0) { - RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size); - } else { - RtsFlags.GcFlags.heapSizeSuggestion = size; - } + if (RtsFlags.GcFlags.heapSizeSuggestionAuto) { + if (max > 0) { + RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size); + } else { + RtsFlags.GcFlags.heapSizeSuggestion = size; } + } - // minimum size for generation zero - min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200, - RtsFlags.GcFlags.minAllocAreaSize - * (W_)n_capabilities); - - // Auto-enable compaction when the residency reaches a - // certain percentage of the maximum heap size (default: 30%). - if (RtsFlags.GcFlags.compact || - (max > 0 && - oldest_gen->n_blocks > - (RtsFlags.GcFlags.compactThreshold * max) / 100)) { - oldest_gen->mark = 1; - oldest_gen->compact = 1; + // minimum size for generation zero + min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200, + RtsFlags.GcFlags.minAllocAreaSize + * (W_)n_capabilities); + + // Auto-enable compaction when the residency reaches a + // certain percentage of the maximum heap size (default: 30%). + // Except when non-moving GC is enabled. + if (!RtsFlags.GcFlags.useNonmoving && + (RtsFlags.GcFlags.compact || + (max > 0 && + oldest_gen->n_blocks > + (RtsFlags.GcFlags.compactThreshold * max) / 100))) { + oldest_gen->mark = 1; + oldest_gen->compact = 1; // debugBelch("compaction: on\n", live); - } else { - oldest_gen->mark = 0; - oldest_gen->compact = 0; + } else { + oldest_gen->mark = 0; + oldest_gen->compact = 0; // debugBelch("compaction: off\n", live); - } + } - if (RtsFlags.GcFlags.sweep) { - oldest_gen->mark = 1; - } + if (RtsFlags.GcFlags.sweep) { + oldest_gen->mark = 1; + } - // if we're going to go over the maximum heap size, reduce the - // size of the generations accordingly. The calculation is - // different if compaction is turned on, because we don't need - // to double the space required to collect the old generation. - if (max != 0) { + // if we're going to go over the maximum heap size, reduce the + // size of the generations accordingly. The calculation is + // different if compaction is turned on, because we don't need + // to double the space required to collect the old generation. + if (max != 0) { + + // this test is necessary to ensure that the calculations + // below don't have any negative results - we're working + // with unsigned values here. + if (max < min_alloc) { + heapOverflow(); + } - // this test is necessary to ensure that the calculations - // below don't have any negative results - we're working - // with unsigned values here. - if (max < min_alloc) { - heapOverflow(); + if (oldest_gen->compact) { + if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) { + size = (max - min_alloc) / ((gens - 1) * 2 - 1); } - - if (oldest_gen->compact) { - if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) { - size = (max - min_alloc) / ((gens - 1) * 2 - 1); - } - } else { - if ( (size * (gens - 1) * 2) + min_alloc > max ) { - size = (max - min_alloc) / ((gens - 1) * 2); - } + } else { + if ( (size * (gens - 1) * 2) + min_alloc > max ) { + size = (max - min_alloc) / ((gens - 1) * 2); } + } - if (size < live) { - heapOverflow(); - } + if (size < live) { + heapOverflow(); } + } #if 0 - debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live, - min_alloc, size, max); + debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live, + min_alloc, size, max); + debugBelch("resize_gen: n_blocks: %lu, n_large_block: %lu, n_compact_blocks: %lu\n", + oldest_gen->n_blocks, oldest_gen->n_large_blocks, oldest_gen->n_compact_blocks); + debugBelch("resize_gen: max_blocks: %lu -> %lu\n", oldest_gen->max_blocks, oldest_gen->n_blocks); #endif - for (g = 0; g < gens; g++) { - generations[g].max_blocks = size; - } + for (g = 0; g < gens; g++) { + generations[g].max_blocks = size; } } diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 43cc4ca8a1..ed19b8bddf 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -55,6 +55,8 @@ void gcWorkerThread (Capability *cap); void initGcThreads (uint32_t from, uint32_t to); void freeGcThreads (void); +void resizeGenerations (void); + #if defined(THREADED_RTS) void waitForGcThreads (Capability *cap, bool idle_cap[]); void releaseGCThreads (Capability *cap, bool idle_cap[]); diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 650dc2c1df..6076f61d6f 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -60,6 +60,14 @@ isAlive(StgClosure *p) // ignore closures in generations that we're not collecting. bd = Bdescr((P_)q); + // isAlive is used when scavenging moving generations, before the mark + // phase. Because we don't know alive-ness of objects before the mark phase + // we have to conservatively treat objects in the non-moving generation as + // alive here. + if (bd->flags & BF_NONMOVING) { + return p; + } + // if it's a pointer into to-space, then we're done if (bd->flags & BF_EVACUATED) { return p; diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index 66f7a7f84f..3012f52f28 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -83,6 +83,7 @@ typedef struct gen_workspace_ { bdescr * todo_bd; StgPtr todo_free; // free ptr for todo_bd StgPtr todo_lim; // lim for todo_bd + struct NonmovingSegment *todo_seg; // only available for oldest gen workspace WSDeque * todo_q; bdescr * todo_overflow; @@ -100,9 +101,6 @@ typedef struct gen_workspace_ { bdescr * part_list; StgWord n_part_blocks; // count of above StgWord n_part_words; - - StgWord pad[1]; - } gen_workspace ATTRIBUTE_ALIGNED(64); // align so that computing gct->gens[n] is a shift, not a multiply // fails if the size is <64, which is why we need the pad above diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c new file mode 100644 index 0000000000..f383949ebf --- /dev/null +++ b/rts/sm/NonMoving.c @@ -0,0 +1,865 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2018 + * + * Non-moving garbage collector and allocator + * + * ---------------------------------------------------------------------------*/ + +#include "Rts.h" +#include "RtsUtils.h" +#include "Capability.h" +#include "Printer.h" +#include "Storage.h" +// We call evacuate, which expects the thread-local gc_thread to be valid; +// This is sometimes declared as a register variable therefore it is necessary +// to include the declaration so that the compiler doesn't clobber the register. +#include "GCThread.h" +#include "GCTDecl.h" +#include "Schedule.h" + +#include "NonMoving.h" +#include "NonMovingMark.h" +#include "NonMovingSweep.h" +#include "StablePtr.h" // markStablePtrTable +#include "Schedule.h" // markScheduler +#include "Weak.h" // dead_weak_ptr_list + +struct NonmovingHeap nonmovingHeap; + +uint8_t nonmovingMarkEpoch = 1; + +static void nonmovingBumpEpoch(void) { + nonmovingMarkEpoch = nonmovingMarkEpoch == 1 ? 2 : 1; +} + +/* + * Note [Non-moving garbage collector] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * TODO + * + * Note [Concurrent non-moving collection] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Concurrency-control of non-moving garbage collection is a bit tricky. There + * are a few things to keep in mind: + * + * - Only one non-moving collection may be active at a time. This is enforced by the + * concurrent_coll_running flag, which is set when a collection is on-going. If + * we attempt to initiate a new collection while this is set we wait on the + * concurrent_coll_finished condition variable, which signals when the + * active collection finishes. + * + * - In between the mark and sweep phases the non-moving collector must synchronize + * with mutator threads to collect and mark their final update remembered + * sets. This is accomplished using + * stopAllCapabilitiesWith(SYNC_FLUSH_UPD_REM_SET). Capabilities are held + * the final mark has concluded. + * + * + * Note [Live data accounting in nonmoving collector] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * The nonmoving collector uses an approximate heuristic for reporting live + * data quantity. Specifically, during mark we record how much live data we + * find in nonmoving_live_words. At the end of mark we declare this amount to + * be how much live data we have on in the nonmoving heap (by setting + * oldest_gen->live_estimate). + * + * In addition, we update oldest_gen->live_estimate every time we fill a + * segment. This, as well, is quite approximate: we assume that all blocks + * above next_free_next are newly-allocated. In principle we could refer to the + * bitmap to count how many blocks we actually allocated but this too would be + * approximate due to concurrent collection and ultimately seems more costly + * than the problem demands. + * + */ + +memcount nonmoving_live_words = 0; + +static void nonmovingClearBitmap(struct NonmovingSegment *seg); +static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads); + +/* Signals to mutators that they should stop to synchronize with the nonmoving + * collector so it can proceed to sweep phase. */ +bool nonmoving_syncing = false; + +static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t block_size) +{ + seg->link = NULL; + seg->todo_link = NULL; + seg->next_free = 0; + seg->next_free_snap = 0; + seg->block_size = block_size; + nonmovingClearBitmap(seg); + Bdescr((P_)seg)->u.scan = nonmovingSegmentGetBlock(seg, 0); +} + +// Add a segment to the free list. +void nonmovingPushFreeSegment(struct NonmovingSegment *seg) +{ + // See Note [Live data accounting in nonmoving collector]. + if (nonmovingHeap.n_free > NONMOVING_MAX_FREE) { + bdescr *bd = Bdescr((StgPtr) seg); + ACQUIRE_SM_LOCK; + ASSERT(oldest_gen->n_blocks >= bd->blocks); + ASSERT(oldest_gen->n_words >= BLOCK_SIZE_W * bd->blocks); + oldest_gen->n_blocks -= bd->blocks; + oldest_gen->n_words -= BLOCK_SIZE_W * bd->blocks; + freeGroup(bd); + RELEASE_SM_LOCK; + return; + } + + while (true) { + struct NonmovingSegment *old = nonmovingHeap.free; + seg->link = old; + if (cas((StgVolatilePtr) &nonmovingHeap.free, (StgWord) old, (StgWord) seg) == (StgWord) old) + break; + } + __sync_add_and_fetch(&nonmovingHeap.n_free, 1); +} + +static struct NonmovingSegment *nonmovingPopFreeSegment(void) +{ + while (true) { + struct NonmovingSegment *seg = nonmovingHeap.free; + if (seg == NULL) { + return NULL; + } + if (cas((StgVolatilePtr) &nonmovingHeap.free, + (StgWord) seg, + (StgWord) seg->link) == (StgWord) seg) { + __sync_sub_and_fetch(&nonmovingHeap.n_free, 1); + return seg; + } + } +} + +/* + * Request a fresh segment from the free segment list or allocate one of the + * given node. + * + */ +static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node) +{ + // First try taking something off of the free list + struct NonmovingSegment *ret; + ret = nonmovingPopFreeSegment(); + + // Nothing in the free list, allocate a new segment... + if (ret == NULL) { + // Take gc spinlock: another thread may be scavenging a moving + // generation and call `todo_block_full` + ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS); + // See Note [Live data accounting in nonmoving collector]. + oldest_gen->n_blocks += bd->blocks; + oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks; + RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + + for (StgWord32 i = 0; i < bd->blocks; ++i) { + initBdescr(&bd[i], oldest_gen, oldest_gen); + bd[i].flags = BF_NONMOVING; + } + ret = (struct NonmovingSegment *)bd->start; + } + + // Check alignment + ASSERT(((uintptr_t)ret % NONMOVING_SEGMENT_SIZE) == 0); + return ret; +} + +static inline unsigned long log2_floor(unsigned long x) +{ + return sizeof(unsigned long)*8 - 1 - __builtin_clzl(x); +} + +static inline unsigned long log2_ceil(unsigned long x) +{ + unsigned long log = log2_floor(x); + return (x - (1 << log)) ? log + 1 : log; +} + +// Advance a segment's next_free pointer. Returns true if segment if full. +static bool advance_next_free(struct NonmovingSegment *seg) +{ + uint8_t *bitmap = seg->bitmap; + unsigned int blk_count = nonmovingSegmentBlockCount(seg); + for (unsigned int i = seg->next_free+1; i < blk_count; i++) { + if (!bitmap[i]) { + seg->next_free = i; + return false; + } + } + seg->next_free = blk_count; + return true; +} + +static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *alloca) +{ + while (true) { + struct NonmovingSegment *seg = alloca->active; + if (seg == NULL) { + return NULL; + } + if (cas((StgVolatilePtr) &alloca->active, + (StgWord) seg, + (StgWord) seg->link) == (StgWord) seg) { + return seg; + } + } +} + +/* sz is in words */ +GNUC_ATTR_HOT +void *nonmovingAllocate(Capability *cap, StgWord sz) +{ + unsigned int allocator_idx = log2_ceil(sz * sizeof(StgWord)) - NONMOVING_ALLOCA0; + + // The max we ever allocate is 3276 bytes (anything larger is a large + // object and not moved) which is covered by allocator 9. + ASSERT(allocator_idx < NONMOVING_ALLOCA_CNT); + + struct NonmovingAllocator *alloca = nonmovingHeap.allocators[allocator_idx]; + + // Allocate into current segment + struct NonmovingSegment *current = alloca->current[cap->no]; + ASSERT(current); // current is never NULL + void *ret = nonmovingSegmentGetBlock(current, current->next_free); + ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment + + // Add segment to the todo list unless it's already there + // current->todo_link == NULL means not in todo list + if (!current->todo_link) { + gen_workspace *ws = &gct->gens[oldest_gen->no]; + current->todo_link = ws->todo_seg; + ws->todo_seg = current; + } + + // Advance the current segment's next_free or allocate a new segment if full + bool full = advance_next_free(current); + if (full) { + // Current segment is full: update live data estimate link it to + // filled, take an active segment if one exists, otherwise allocate a + // new segment. + + // Update live data estimate. + // See Note [Live data accounting in nonmoving collector]. + unsigned int new_blocks = nonmovingSegmentBlockCount(current) - current->next_free_snap; + atomic_inc(&oldest_gen->live_estimate, new_blocks * nonmovingSegmentBlockSize(current) / sizeof(W_)); + + // push the current segment to the filled list + nonmovingPushFilledSegment(current); + + // first look for a new segment in the active list + struct NonmovingSegment *new_current = pop_active_segment(alloca); + + // there are no active segments, allocate new segment + if (new_current == NULL) { + new_current = nonmovingAllocSegment(cap->node); + nonmovingInitSegment(new_current, NONMOVING_ALLOCA0 + allocator_idx); + } + + // make it current + new_current->link = NULL; + alloca->current[cap->no] = new_current; + } + + return ret; +} + +/* Allocate a nonmovingAllocator */ +static struct NonmovingAllocator *alloc_nonmoving_allocator(uint32_t n_caps) +{ + size_t allocator_sz = + sizeof(struct NonmovingAllocator) + + sizeof(void*) * n_caps; // current segment pointer for each capability + struct NonmovingAllocator *alloc = + stgMallocBytes(allocator_sz, "nonmovingInit"); + memset(alloc, 0, allocator_sz); + return alloc; +} + +void nonmovingInit(void) +{ + if (! RtsFlags.GcFlags.useNonmoving) return; + for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) { + nonmovingHeap.allocators[i] = alloc_nonmoving_allocator(n_capabilities); + } +} + +void nonmovingExit(void) +{ + if (! RtsFlags.GcFlags.useNonmoving) return; + for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) { + stgFree(nonmovingHeap.allocators[i]); + } +} + +/* + * Wait for any concurrent collections to finish. Called during shutdown to + * ensure we don't steal capabilities that the nonmoving collector still has yet + * to synchronize with. + */ +void nonmovingWaitUntilFinished(void) +{ +} + +/* + * Assumes that no garbage collector or mutator threads are running to safely + * resize the nonmoving_allocators. + * + * Must hold sm_mutex. + */ +void nonmovingAddCapabilities(uint32_t new_n_caps) +{ + unsigned int old_n_caps = nonmovingHeap.n_caps; + struct NonmovingAllocator **allocs = nonmovingHeap.allocators; + + for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) { + struct NonmovingAllocator *old = allocs[i]; + allocs[i] = alloc_nonmoving_allocator(new_n_caps); + + // Copy the old state + allocs[i]->filled = old->filled; + allocs[i]->active = old->active; + for (unsigned int j = 0; j < old_n_caps; j++) { + allocs[i]->current[j] = old->current[j]; + } + stgFree(old); + + // Initialize current segments for the new capabilities + for (unsigned int j = old_n_caps; j < new_n_caps; j++) { + allocs[i]->current[j] = nonmovingAllocSegment(capabilities[j]->node); + nonmovingInitSegment(allocs[i]->current[j], NONMOVING_ALLOCA0 + i); + allocs[i]->current[j]->link = NULL; + } + } + nonmovingHeap.n_caps = new_n_caps; +} + +static void nonmovingClearBitmap(struct NonmovingSegment *seg) +{ + unsigned int n = nonmovingSegmentBlockCount(seg); + memset(seg->bitmap, 0, n); +} + +static void nonmovingClearSegmentBitmaps(struct NonmovingSegment *seg) +{ + while (seg) { + nonmovingClearBitmap(seg); + seg = seg->link; + } +} + +static void nonmovingClearAllBitmaps(void) +{ + for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) { + struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx]; + nonmovingClearSegmentBitmaps(alloca->filled); + } + + // Clear large object bits + for (bdescr *bd = nonmoving_large_objects; bd; bd = bd->link) { + bd->flags &= ~BF_MARKED; + } +} + +/* Prepare the heap bitmaps and snapshot metadata for a mark */ +static void nonmovingPrepareMark(void) +{ + nonmovingClearAllBitmaps(); + nonmovingBumpEpoch(); + for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) { + struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx]; + + // Update current segments' snapshot pointers + for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) { + struct NonmovingSegment *seg = alloca->current[cap_n]; + seg->next_free_snap = seg->next_free; + } + + // Update filled segments' snapshot pointers + struct NonmovingSegment *seg = alloca->filled; + while (seg) { + seg->next_free_snap = seg->next_free; + seg = seg->link; + } + + // N.B. It's not necessary to update snapshot pointers of active segments; + // they were set after they were swept and haven't seen any allocation + // since. + } + + ASSERT(oldest_gen->scavenged_large_objects == NULL); + bdescr *next; + for (bdescr *bd = oldest_gen->large_objects; bd; bd = next) { + next = bd->link; + bd->flags |= BF_NONMOVING_SWEEPING; + dbl_link_onto(bd, &nonmoving_large_objects); + } + n_nonmoving_large_blocks += oldest_gen->n_large_blocks; + oldest_gen->large_objects = NULL; + oldest_gen->n_large_words = 0; + oldest_gen->n_large_blocks = 0; + nonmoving_live_words = 0; + +#if defined(DEBUG) + debug_caf_list_snapshot = debug_caf_list; + debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST; +#endif +} + +// Mark weak pointers in the non-moving heap. They'll either end up in +// dead_weak_ptr_list or stay in weak_ptr_list. Either way they need to be kept +// during sweep. See `MarkWeak.c:markWeakPtrList` for the moving heap variant +// of this. +static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_ptr_list) +{ + for (StgWeak *w = oldest_gen->weak_ptr_list; w; w = w->link) { + markQueuePushClosure_(mark_queue, (StgClosure*)w); + // Do not mark finalizers and values here, those fields will be marked + // in `nonmovingMarkDeadWeaks` (for dead weaks) or + // `nonmovingTidyWeaks` (for live weaks) + } + + // We need to mark dead_weak_ptr_list too. This is subtle: + // + // - By the beginning of this GC we evacuated all weaks to the non-moving + // heap (in `markWeakPtrList`) + // + // - During the scavenging of the moving heap we discovered that some of + // those weaks are dead and moved them to `dead_weak_ptr_list`. Note that + // because of the fact above _all weaks_ are in the non-moving heap at + // this point. + // + // - So, to be able to traverse `dead_weak_ptr_list` and run finalizers we + // need to mark it. + for (StgWeak *w = dead_weak_ptr_list; w; w = w->link) { + markQueuePushClosure_(mark_queue, (StgClosure*)w); + nonmovingMarkDeadWeak(mark_queue, w); + } +} + +void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads) +{ + resizeGenerations(); + + nonmovingPrepareMark(); + nonmovingPrepareSweep(); + + // N.B. These should have been cleared at the end of the last sweep. + ASSERT(nonmoving_marked_large_objects == NULL); + ASSERT(n_nonmoving_marked_large_blocks == 0); + + MarkQueue *mark_queue = stgMallocBytes(sizeof(MarkQueue), "mark queue"); + initMarkQueue(mark_queue); + current_mark_queue = mark_queue; + + // Mark roots + markCAFs((evac_fn)markQueueAddRoot, mark_queue); + for (unsigned int n = 0; n < n_capabilities; ++n) { + markCapability((evac_fn)markQueueAddRoot, mark_queue, + capabilities[n], true/*don't mark sparks*/); + } + markScheduler((evac_fn)markQueueAddRoot, mark_queue); + nonmovingMarkWeakPtrList(mark_queue, *dead_weaks); + markStablePtrTable((evac_fn)markQueueAddRoot, mark_queue); + + // Mark threads resurrected during moving heap scavenging + for (StgTSO *tso = *resurrected_threads; tso != END_TSO_QUEUE; tso = tso->global_link) { + markQueuePushClosure_(mark_queue, (StgClosure*)tso); + } + + // Roots marked, mark threads and weak pointers + + // At this point all threads are moved to threads list (from old_threads) + // and all weaks are moved to weak_ptr_list (from old_weak_ptr_list) by + // the previous scavenge step, so we need to move them to "old" lists + // again. + + // Fine to override old_threads because any live or resurrected threads are + // moved to threads or resurrected_threads lists. + ASSERT(oldest_gen->old_threads == END_TSO_QUEUE); + ASSERT(nonmoving_old_threads == END_TSO_QUEUE); + nonmoving_old_threads = oldest_gen->threads; + oldest_gen->threads = END_TSO_QUEUE; + + // Make sure we don't lose any weak ptrs here. Weaks in old_weak_ptr_list + // will either be moved to `dead_weaks` (if dead) or `weak_ptr_list` (if + // alive). + ASSERT(oldest_gen->old_weak_ptr_list == NULL); + ASSERT(nonmoving_old_weak_ptr_list == NULL); + nonmoving_old_weak_ptr_list = oldest_gen->weak_ptr_list; + oldest_gen->weak_ptr_list = NULL; + + // We are now safe to start concurrent marking + + // Note that in concurrent mark we can't use dead_weaks and + // resurrected_threads from the preparation to add new weaks and threads as + // that would cause races between minor collection and mark. So we only pass + // those lists to mark function in sequential case. In concurrent case we + // allocate fresh lists. + + // Use the weak and thread lists from the preparation for any new weaks and + // threads found to be dead in mark. + nonmovingMark_(mark_queue, dead_weaks, resurrected_threads); +} + +/* Mark mark queue, threads, and weak pointers until no more weaks have been + * resuscitated + */ +static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue) +{ + while (true) { + // Propagate marks + nonmovingMark(mark_queue); + + // Tidy threads and weaks + nonmovingTidyThreads(); + + if (! nonmovingTidyWeaks(mark_queue)) + return; + } +} + +static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads) +{ + debugTrace(DEBUG_nonmoving_gc, "Starting mark..."); + + // Do concurrent marking; most of the heap will get marked here. + nonmovingMarkThreadsWeaks(mark_queue); + + nonmovingResurrectThreads(mark_queue, resurrected_threads); + + // No more resurrecting threads after this point + + // Do last marking of weak pointers + while (true) { + // Propagate marks + nonmovingMark(mark_queue); + + if (!nonmovingTidyWeaks(mark_queue)) + break; + } + + nonmovingMarkDeadWeaks(mark_queue, dead_weaks); + + // Propagate marks + nonmovingMark(mark_queue); + + // Now remove all dead objects from the mut_list to ensure that a younger + // generation collection doesn't attempt to look at them after we've swept. + nonmovingSweepMutLists(); + + debugTrace(DEBUG_nonmoving_gc, + "Done marking, resurrecting threads before releasing capabilities"); + +#if defined(DEBUG) + // Zap CAFs that we will sweep + nonmovingGcCafs(mark_queue); +#endif + + ASSERT(mark_queue->top->head == 0); + ASSERT(mark_queue->blocks->link == NULL); + + // Update oldest_gen thread and weak lists + // Note that we need to append these lists as a concurrent minor GC may have + // added stuff to them while we're doing mark-sweep concurrently + { + StgTSO **threads = &oldest_gen->threads; + while (*threads != END_TSO_QUEUE) { + threads = &(*threads)->global_link; + } + *threads = nonmoving_threads; + nonmoving_threads = END_TSO_QUEUE; + nonmoving_old_threads = END_TSO_QUEUE; + } + + { + StgWeak **weaks = &oldest_gen->weak_ptr_list; + while (*weaks) { + weaks = &(*weaks)->link; + } + *weaks = nonmoving_weak_ptr_list; + nonmoving_weak_ptr_list = NULL; + nonmoving_old_weak_ptr_list = NULL; + } + + current_mark_queue = NULL; + freeMarkQueue(mark_queue); + stgFree(mark_queue); + + oldest_gen->live_estimate = nonmoving_live_words; + oldest_gen->n_old_blocks = 0; + resizeGenerations(); + + /**************************************************** + * Sweep + ****************************************************/ + + // Because we can't mark large object blocks (no room for mark bit) we + // collect them in a map in mark_queue and we pass it here to sweep large + // objects + nonmovingSweepLargeObjects(); + nonmovingSweepStableNameTable(); + + nonmovingSweep(); + ASSERT(nonmovingHeap.sweep_list == NULL); + debugTrace(DEBUG_nonmoving_gc, "Finished sweeping."); + + // TODO: Remainder of things done by GarbageCollect (update stats) +} + +#if defined(DEBUG) + +// Use this with caution: this doesn't work correctly during scavenge phase +// when we're doing parallel scavenging. Use it in mark phase or later (where +// we don't allocate more anymore). +void assert_in_nonmoving_heap(StgPtr p) +{ + if (!HEAP_ALLOCED_GC(p)) + return; + + bdescr *bd = Bdescr(p); + if (bd->flags & BF_LARGE) { + // It should be in a capability (if it's not filled yet) or in non-moving heap + for (uint32_t cap = 0; cap < n_capabilities; ++cap) { + if (bd == capabilities[cap]->pinned_object_block) { + return; + } + } + ASSERT(bd->flags & BF_NONMOVING); + return; + } + + // Search snapshot segments + for (struct NonmovingSegment *seg = nonmovingHeap.sweep_list; seg; seg = seg->link) { + if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) { + return; + } + } + + for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) { + struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx]; + // Search current segments + for (uint32_t cap_idx = 0; cap_idx < n_capabilities; ++cap_idx) { + struct NonmovingSegment *seg = alloca->current[cap_idx]; + if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) { + return; + } + } + + // Search active segments + int seg_idx = 0; + struct NonmovingSegment *seg = alloca->active; + while (seg) { + if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) { + return; + } + seg_idx++; + seg = seg->link; + } + + // Search filled segments + seg_idx = 0; + seg = alloca->filled; + while (seg) { + if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) { + return; + } + seg_idx++; + seg = seg->link; + } + } + + // We don't search free segments as they're unused + + barf("%p is not in nonmoving heap\n", (void*)p); +} + +void nonmovingPrintSegment(struct NonmovingSegment *seg) +{ + int num_blocks = nonmovingSegmentBlockCount(seg); + + debugBelch("Segment with %d blocks of size 2^%d (%d bytes, %u words, scan: %p)\n", + num_blocks, + seg->block_size, + 1 << seg->block_size, + (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << seg->block_size), + (void*)Bdescr((P_)seg)->u.scan); + + for (nonmoving_block_idx p_idx = 0; p_idx < seg->next_free; ++p_idx) { + StgClosure *p = (StgClosure*)nonmovingSegmentGetBlock(seg, p_idx); + if (nonmovingGetMark(seg, p_idx) != 0) { + debugBelch("%d (%p)* :\t", p_idx, (void*)p); + } else { + debugBelch("%d (%p) :\t", p_idx, (void*)p); + } + printClosure(p); + } + + debugBelch("End of segment\n\n"); +} + +void nonmovingPrintAllocator(struct NonmovingAllocator *alloc) +{ + debugBelch("Allocator at %p\n", (void*)alloc); + debugBelch("Filled segments:\n"); + for (struct NonmovingSegment *seg = alloc->filled; seg != NULL; seg = seg->link) { + debugBelch("%p ", (void*)seg); + } + debugBelch("\nActive segments:\n"); + for (struct NonmovingSegment *seg = alloc->active; seg != NULL; seg = seg->link) { + debugBelch("%p ", (void*)seg); + } + debugBelch("\nCurrent segments:\n"); + for (uint32_t i = 0; i < n_capabilities; ++i) { + debugBelch("%p ", alloc->current[i]); + } + debugBelch("\n"); +} + +void locate_object(P_ obj) +{ + // Search allocators + for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) { + struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx]; + for (uint32_t cap = 0; cap < n_capabilities; ++cap) { + struct NonmovingSegment *seg = alloca->current[cap]; + if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) { + debugBelch("%p is in current segment of capability %d of allocator %d at %p\n", obj, cap, alloca_idx, (void*)seg); + return; + } + } + int seg_idx = 0; + struct NonmovingSegment *seg = alloca->active; + while (seg) { + if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) { + debugBelch("%p is in active segment %d of allocator %d at %p\n", obj, seg_idx, alloca_idx, (void*)seg); + return; + } + seg_idx++; + seg = seg->link; + } + + seg_idx = 0; + seg = alloca->filled; + while (seg) { + if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) { + debugBelch("%p is in filled segment %d of allocator %d at %p\n", obj, seg_idx, alloca_idx, (void*)seg); + return; + } + seg_idx++; + seg = seg->link; + } + } + + struct NonmovingSegment *seg = nonmovingHeap.free; + int seg_idx = 0; + while (seg) { + if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) { + debugBelch("%p is in free segment %d at %p\n", obj, seg_idx, (void*)seg); + return; + } + seg_idx++; + seg = seg->link; + } + + // Search nurseries + for (uint32_t nursery_idx = 0; nursery_idx < n_nurseries; ++nursery_idx) { + for (bdescr* nursery_block = nurseries[nursery_idx].blocks; nursery_block; nursery_block = nursery_block->link) { + if (obj >= nursery_block->start && obj <= nursery_block->start + nursery_block->blocks*BLOCK_SIZE_W) { + debugBelch("%p is in nursery %d\n", obj, nursery_idx); + return; + } + } + } + + // Search generations + for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++g) { + generation *gen = &generations[g]; + for (bdescr *blk = gen->blocks; blk; blk = blk->link) { + if (obj >= blk->start && obj < blk->free) { + debugBelch("%p is in generation %" FMT_Word32 " blocks\n", obj, g); + return; + } + } + for (bdescr *blk = gen->old_blocks; blk; blk = blk->link) { + if (obj >= blk->start && obj < blk->free) { + debugBelch("%p is in generation %" FMT_Word32 " old blocks\n", obj, g); + return; + } + } + } + + // Search large objects + for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++g) { + generation *gen = &generations[g]; + for (bdescr *large_block = gen->large_objects; large_block; large_block = large_block->link) { + if ((P_)large_block->start == obj) { + debugBelch("%p is in large blocks of generation %d\n", obj, g); + return; + } + } + } + + for (bdescr *large_block = nonmoving_large_objects; large_block; large_block = large_block->link) { + if ((P_)large_block->start == obj) { + debugBelch("%p is in nonmoving_large_objects\n", obj); + return; + } + } + + for (bdescr *large_block = nonmoving_marked_large_objects; large_block; large_block = large_block->link) { + if ((P_)large_block->start == obj) { + debugBelch("%p is in nonmoving_marked_large_objects\n", obj); + return; + } + } +} + +void nonmovingPrintSweepList() +{ + debugBelch("==== SWEEP LIST =====\n"); + int i = 0; + for (struct NonmovingSegment *seg = nonmovingHeap.sweep_list; seg; seg = seg->link) { + debugBelch("%d: %p\n", i++, (void*)seg); + } + debugBelch("= END OF SWEEP LIST =\n"); +} + +void check_in_mut_list(StgClosure *p) +{ + for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) { + for (bdescr *bd = capabilities[cap_n]->mut_lists[oldest_gen->no]; bd; bd = bd->link) { + for (StgPtr q = bd->start; q < bd->free; ++q) { + if (*((StgPtr**)q) == (StgPtr*)p) { + debugBelch("Object is in mut list of cap %d: %p\n", cap_n, capabilities[cap_n]->mut_lists[oldest_gen->no]); + return; + } + } + } + } + + debugBelch("Object is not in a mut list\n"); +} + +void print_block_list(bdescr* bd) +{ + while (bd) { + debugBelch("%p, ", (void*)bd); + bd = bd->link; + } + debugBelch("\n"); +} + +void print_thread_list(StgTSO* tso) +{ + while (tso != END_TSO_QUEUE) { + printClosure((StgClosure*)tso); + tso = tso->global_link; + } +} + +#endif diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h new file mode 100644 index 0000000000..a031f3d223 --- /dev/null +++ b/rts/sm/NonMoving.h @@ -0,0 +1,285 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2018 + * + * Non-moving garbage collector and allocator + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if !defined(CMINUSMINUS) + +#include <string.h> +#include "HeapAlloc.h" +#include "NonMovingMark.h" + +#include "BeginPrivate.h" + +// Segments +#define NONMOVING_SEGMENT_BITS 15 // 2^15 = 32kByte +// Mask to find base of segment +#define NONMOVING_SEGMENT_MASK ((1 << NONMOVING_SEGMENT_BITS) - 1) +// In bytes +#define NONMOVING_SEGMENT_SIZE (1 << NONMOVING_SEGMENT_BITS) +// In words +#define NONMOVING_SEGMENT_SIZE_W ((1 << NONMOVING_SEGMENT_BITS) / SIZEOF_VOID_P) +// In blocks +#define NONMOVING_SEGMENT_BLOCKS (NONMOVING_SEGMENT_SIZE / BLOCK_SIZE) + +_Static_assert(NONMOVING_SEGMENT_SIZE % BLOCK_SIZE == 0, + "non-moving segment size must be multiple of block size"); + +// The index of a block within a segment +typedef uint16_t nonmoving_block_idx; + +// A non-moving heap segment +struct NonmovingSegment { + struct NonmovingSegment *link; // for linking together segments into lists + struct NonmovingSegment *todo_link; // NULL when not in todo list + nonmoving_block_idx next_free; // index of the next unallocated block + nonmoving_block_idx next_free_snap; // snapshot of next_free + uint8_t block_size; // log2 of block size in bytes + uint8_t bitmap[]; // liveness bitmap + // After the liveness bitmap comes the data blocks. Note that we need to + // ensure that the size of this struct (including the bitmap) is a multiple + // of the word size since GHC assumes that all object pointers are + // so-aligned. +}; + +// This is how we mark end of todo lists. Not NULL because todo_link == NULL +// means segment is not in list. +#define END_NONMOVING_TODO_LIST ((struct NonmovingSegment*)1) + +// A non-moving allocator for a particular block size +struct NonmovingAllocator { + struct NonmovingSegment *filled; + struct NonmovingSegment *active; + // indexed by capability number + struct NonmovingSegment *current[]; +}; + +// first allocator is of size 2^NONMOVING_ALLOCA0 (in bytes) +#define NONMOVING_ALLOCA0 3 + +// allocators cover block sizes of 2^NONMOVING_ALLOCA0 to +// 2^(NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT) (in bytes) +#define NONMOVING_ALLOCA_CNT 12 + +// maximum number of free segments to hold on to +#define NONMOVING_MAX_FREE 16 + +struct NonmovingHeap { + struct NonmovingAllocator *allocators[NONMOVING_ALLOCA_CNT]; + // free segment list. This is a cache where we keep up to + // NONMOVING_MAX_FREE segments to avoid thrashing the block allocator. + // Note that segments in this list are still counted towards + // oldest_gen->n_blocks. + struct NonmovingSegment *free; + // how many segments in free segment list? accessed atomically. + unsigned int n_free; + + // records the current length of the nonmovingAllocator.current arrays + unsigned int n_caps; + + // The set of segments being swept in this GC. Segments are moved here from + // the filled list during preparation and moved back to either the filled, + // active, or free lists during sweep. Should be NULL before mark and + // after sweep. + struct NonmovingSegment *sweep_list; +}; + +extern struct NonmovingHeap nonmovingHeap; + +extern memcount nonmoving_live_words; + +void nonmovingInit(void); +void nonmovingExit(void); +void nonmovingWaitUntilFinished(void); + + +// dead_weaks and resurrected_threads lists are used for two things: +// +// - The weaks and threads in those lists are found to be dead during +// preparation, but the weaks will be used for finalization and threads will +// be scheduled again (aka. resurrection) so we need to keep them alive in the +// non-moving heap as well. So we treat them as roots and mark them. +// +// - In non-threaded runtime we add weaks and threads found to be dead in the +// non-moving heap to those lists so that they'll be finalized and scheduled +// as other weaks and threads. In threaded runtime we can't do this as that'd +// cause races between a minor collection and non-moving collection. Instead +// in non-moving heap we finalize the weaks and resurrect the threads +// directly, but in a pause. +// +void nonmovingCollect(StgWeak **dead_weaks, + StgTSO **resurrected_threads); + +void *nonmovingAllocate(Capability *cap, StgWord sz); +void nonmovingAddCapabilities(uint32_t new_n_caps); +void nonmovingPushFreeSegment(struct NonmovingSegment *seg); + +// Add a segment to the appropriate active list. +INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg) +{ + struct NonmovingAllocator *alloc = + nonmovingHeap.allocators[seg->block_size - NONMOVING_ALLOCA0]; + while (true) { + struct NonmovingSegment *current_active = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->active); + seg->link = current_active; + if (cas((StgVolatilePtr) &alloc->active, (StgWord) current_active, (StgWord) seg) == (StgWord) current_active) { + break; + } + } +} + +// Add a segment to the appropriate filled list. +INLINE_HEADER void nonmovingPushFilledSegment(struct NonmovingSegment *seg) +{ + struct NonmovingAllocator *alloc = + nonmovingHeap.allocators[seg->block_size - NONMOVING_ALLOCA0]; + while (true) { + struct NonmovingSegment *current_filled = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->filled); + seg->link = current_filled; + if (cas((StgVolatilePtr) &alloc->filled, (StgWord) current_filled, (StgWord) seg) == (StgWord) current_filled) { + break; + } + } +} +// Assert that the pointer can be traced by the non-moving collector (e.g. in +// mark phase). This means one of the following: +// +// - A static object +// - A large object +// - An object in the non-moving heap (e.g. in one of the segments) +// +void assert_in_nonmoving_heap(StgPtr p); + +// The block size of a given segment in bytes. +INLINE_HEADER unsigned int nonmovingSegmentBlockSize(struct NonmovingSegment *seg) +{ + return 1 << seg->block_size; +} + +// How many blocks does the given segment contain? Also the size of the bitmap. +INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg) +{ + unsigned int sz = nonmovingSegmentBlockSize(seg); + unsigned int segment_data_size = NONMOVING_SEGMENT_SIZE - sizeof(struct NonmovingSegment); + segment_data_size -= segment_data_size % SIZEOF_VOID_P; + return segment_data_size / (sz + 1); +} + +// Get a pointer to the given block index +INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i) +{ + // Block size in bytes + unsigned int blk_size = nonmovingSegmentBlockSize(seg); + // Bitmap size in bytes + W_ bitmap_size = nonmovingSegmentBlockCount(seg) * sizeof(uint8_t); + // Where the actual data starts (address of the first block). + // Use ROUNDUP_BYTES_TO_WDS to align to word size. Note that + // ROUNDUP_BYTES_TO_WDS returns in _words_, not in _bytes_, so convert it back + // back to bytes by multiplying with word size. + W_ data = ROUNDUP_BYTES_TO_WDS(((W_)seg) + sizeof(struct NonmovingSegment) + bitmap_size) * sizeof(W_); + return (void*)(data + i*blk_size); +} + +// Get the segment which a closure resides in. Assumes that pointer points into +// non-moving heap. +INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment(StgPtr p) +{ + ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING)); + const uintptr_t mask = ~NONMOVING_SEGMENT_MASK; + return (struct NonmovingSegment *) (((uintptr_t) p) & mask); +} + +INLINE_HEADER nonmoving_block_idx nonmovingGetBlockIdx(StgPtr p) +{ + ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING)); + struct NonmovingSegment *seg = nonmovingGetSegment(p); + ptrdiff_t blk0 = (ptrdiff_t)nonmovingSegmentGetBlock(seg, 0); + ptrdiff_t offset = (ptrdiff_t)p - blk0; + return (nonmoving_block_idx) (offset >> seg->block_size); +} + +// TODO: Eliminate this +extern uint8_t nonmovingMarkEpoch; + +INLINE_HEADER void nonmovingSetMark(struct NonmovingSegment *seg, nonmoving_block_idx i) +{ + seg->bitmap[i] = nonmovingMarkEpoch; +} + +INLINE_HEADER uint8_t nonmovingGetMark(struct NonmovingSegment *seg, nonmoving_block_idx i) +{ + return seg->bitmap[i]; +} + +INLINE_HEADER void nonmovingSetClosureMark(StgPtr p) +{ + nonmovingSetMark(nonmovingGetSegment(p), nonmovingGetBlockIdx(p)); +} + +// TODO: Audit the uses of these +/* Was the given closure marked this major GC cycle? */ +INLINE_HEADER bool nonmovingClosureMarkedThisCycle(StgPtr p) +{ + struct NonmovingSegment *seg = nonmovingGetSegment(p); + nonmoving_block_idx blk_idx = nonmovingGetBlockIdx(p); + return nonmovingGetMark(seg, blk_idx) == nonmovingMarkEpoch; +} + +INLINE_HEADER bool nonmovingClosureMarked(StgPtr p) +{ + struct NonmovingSegment *seg = nonmovingGetSegment(p); + nonmoving_block_idx blk_idx = nonmovingGetBlockIdx(p); + return nonmovingGetMark(seg, blk_idx) != 0; +} + +// Can be called during a major collection to determine whether a particular +// segment is in the set of segments that will be swept this collection cycle. +INLINE_HEADER bool nonmovingSegmentBeingSwept(struct NonmovingSegment *seg) +{ + unsigned int n = nonmovingSegmentBlockCount(seg); + return seg->next_free_snap >= n; +} + +// Can be called during a major collection to determine whether a particular +// closure lives in a segment that will be swept this collection cycle. +// Note that this returns true for both large and normal objects. +INLINE_HEADER bool nonmovingClosureBeingSwept(StgClosure *p) +{ + bdescr *bd = Bdescr((StgPtr) p); + if (HEAP_ALLOCED_GC(p)) { + if (bd->flags & BF_NONMOVING_SWEEPING) { + return true; + } else if (bd->flags & BF_NONMOVING) { + struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p); + return nonmovingSegmentBeingSwept(seg); + } else { + // outside of the nonmoving heap + return false; + } + } else { + // a static object + return true; + } +} + +#if defined(DEBUG) + +void nonmovingPrintSegment(struct NonmovingSegment *seg); +void nonmovingPrintAllocator(struct NonmovingAllocator *alloc); +void locate_object(P_ obj); +void nonmovingPrintSweepList(void); +// Check if the object is in one of non-moving heap mut_lists +void check_in_mut_list(StgClosure *p); +void print_block_list(bdescr *bd); +void print_thread_list(StgTSO* tso); + +#endif + +#include "EndPrivate.h" + +#endif // CMINUSMINUS diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c new file mode 100644 index 0000000000..cf1950471e --- /dev/null +++ b/rts/sm/NonMovingMark.c @@ -0,0 +1,1217 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2018 + * + * Non-moving garbage collector and allocator: Mark phase + * + * ---------------------------------------------------------------------------*/ + +#include "Rts.h" +// We call evacuate, which expects the thread-local gc_thread to be valid; +// This is sometimes declared as a register variable therefore it is necessary +// to include the declaration so that the compiler doesn't clobber the register. +#include "NonMovingMark.h" +#include "NonMoving.h" +#include "BlockAlloc.h" /* for countBlocks */ +#include "HeapAlloc.h" +#include "Task.h" +#include "Trace.h" +#include "HeapUtils.h" +#include "Printer.h" +#include "Schedule.h" +#include "Weak.h" +#include "STM.h" +#include "MarkWeak.h" +#include "sm/Storage.h" + +static void mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin); +static void mark_tso (MarkQueue *queue, StgTSO *tso); +static void mark_stack (MarkQueue *queue, StgStack *stack); +static void mark_PAP_payload (MarkQueue *queue, + StgClosure *fun, + StgClosure **payload, + StgWord size); + +// How many Array# entries to add to the mark queue at once? +#define MARK_ARRAY_CHUNK_LENGTH 128 + +/* Note [Large objects in the non-moving collector] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * The nonmoving collector keeps a separate list of its large objects, apart from + * oldest_gen->large_objects. There are two reasons for this: + * + * 1. oldest_gen is mutated by minor collections, which happen concurrently with + * marking + * 2. the non-moving collector needs a consistent picture + * + * At the beginning of a major collection, nonmovingCollect takes the objects in + * oldest_gen->large_objects (which includes all large objects evacuated by the + * moving collector) and adds them to nonmoving_large_objects. This is the set + * of large objects that will being collected in the current major GC cycle. + * + * As the concurrent mark phase proceeds, the large objects in + * nonmoving_large_objects that are found to be live are moved to + * nonmoving_marked_large_objects. During sweep we discard all objects that remain + * in nonmoving_large_objects and move everything in nonmoving_marked_larged_objects + * back to nonmoving_large_objects. + * + * During minor collections large objects will accumulate on + * oldest_gen->large_objects, where they will be picked up by the nonmoving + * collector and moved to nonmoving_large_objects during the next major GC. + * When this happens the block gets its BF_NONMOVING_SWEEPING flag set to + * indicate that it is part of the snapshot and consequently should be marked by + * the nonmoving mark phase.. + */ + +bdescr *nonmoving_large_objects = NULL; +bdescr *nonmoving_marked_large_objects = NULL; +memcount n_nonmoving_large_blocks = 0; +memcount n_nonmoving_marked_large_blocks = 0; + +/* + * Where we keep our threads during collection since we must have a snapshot of + * the threads that lived in the nonmoving heap at the time that the snapshot + * was taken to safely resurrect. + */ +StgTSO *nonmoving_old_threads = END_TSO_QUEUE; +/* Same for weak pointers */ +StgWeak *nonmoving_old_weak_ptr_list = NULL; +/* Because we can "tidy" thread and weak lists concurrently with a minor GC we + * need to move marked threads and weaks to these lists until we pause for sync. + * Then we move them to oldest_gen lists. */ +StgTSO *nonmoving_threads = END_TSO_QUEUE; +StgWeak *nonmoving_weak_ptr_list = NULL; + +#if defined(DEBUG) +// TODO (osa): Document +StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST; +#endif + +/* Used to provide the current mark queue to the young generation + * collector for scavenging. + */ +MarkQueue *current_mark_queue = NULL; + +/********************************************************* + * Pushing to either the mark queue or remembered set + *********************************************************/ + +STATIC_INLINE void +push (MarkQueue *q, const MarkQueueEnt *ent) +{ + // Are we at the end of the block? + if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) { + // Yes, this block is full. + // allocate a fresh block. + ACQUIRE_SM_LOCK; + bdescr *bd = allocGroup(1); + bd->link = q->blocks; + q->blocks = bd; + q->top = (MarkQueueBlock *) bd->start; + q->top->head = 0; + RELEASE_SM_LOCK; + } + + q->top->entries[q->top->head] = *ent; + q->top->head++; +} + +static inline +void push_closure (MarkQueue *q, + StgClosure *p, + StgClosure **origin) +{ + // TODO: Push this into callers where they already have the Bdescr + if (HEAP_ALLOCED_GC(p) && (Bdescr((StgPtr) p)->gen != oldest_gen)) + return; + +#if defined(DEBUG) + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + // Commenting out: too slow + // if (RtsFlags.DebugFlags.sanity) { + // assert_in_nonmoving_heap((P_)p); + // if (origin) + // assert_in_nonmoving_heap((P_)origin); + // } +#endif + + MarkQueueEnt ent = { + .type = MARK_CLOSURE, + .mark_closure = { + .p = UNTAG_CLOSURE(p), + .origin = origin, + } + }; + push(q, &ent); +} + +static +void push_array (MarkQueue *q, + const StgMutArrPtrs *array, + StgWord start_index) +{ + // TODO: Push this into callers where they already have the Bdescr + if (HEAP_ALLOCED_GC(array) && (Bdescr((StgPtr) array)->gen != oldest_gen)) + return; + + MarkQueueEnt ent = { + .type = MARK_ARRAY, + .mark_array = { + .array = array, + .start_index = start_index + } + }; + push(q, &ent); +} + +static +void push_thunk_srt (MarkQueue *q, const StgInfoTable *info) +{ + const StgThunkInfoTable *thunk_info = itbl_to_thunk_itbl(info); + if (thunk_info->i.srt) { + push_closure(q, (StgClosure*)GET_SRT(thunk_info), NULL); + } +} + +static +void push_fun_srt (MarkQueue *q, const StgInfoTable *info) +{ + const StgFunInfoTable *fun_info = itbl_to_fun_itbl(info); + if (fun_info->i.srt) { + push_closure(q, (StgClosure*)GET_FUN_SRT(fun_info), NULL); + } +} + +/********************************************************* + * Pushing to the mark queue + *********************************************************/ + +void markQueuePush (MarkQueue *q, const MarkQueueEnt *ent) +{ + push(q, ent); +} + +void markQueuePushClosure (MarkQueue *q, + StgClosure *p, + StgClosure **origin) +{ + push_closure(q, p, origin); +} + +/* TODO: Do we really never want to specify the origin here? */ +void markQueueAddRoot (MarkQueue* q, StgClosure** root) +{ + markQueuePushClosure(q, *root, NULL); +} + +/* Push a closure to the mark queue without origin information */ +void markQueuePushClosure_ (MarkQueue *q, StgClosure *p) +{ + markQueuePushClosure(q, p, NULL); +} + +void markQueuePushFunSrt (MarkQueue *q, const StgInfoTable *info) +{ + push_fun_srt(q, info); +} + +void markQueuePushThunkSrt (MarkQueue *q, const StgInfoTable *info) +{ + push_thunk_srt(q, info); +} + +void markQueuePushArray (MarkQueue *q, + const StgMutArrPtrs *array, + StgWord start_index) +{ + push_array(q, array, start_index); +} + +/********************************************************* + * Popping from the mark queue + *********************************************************/ + +// Returns invalid MarkQueueEnt if queue is empty. +static MarkQueueEnt markQueuePop (MarkQueue *q) +{ + MarkQueueBlock *top; + +again: + top = q->top; + + // Are we at the beginning of the block? + if (top->head == 0) { + // Is this the first block of the queue? + if (q->blocks->link == NULL) { + // Yes, therefore queue is empty... + MarkQueueEnt none = { .type = NULL_ENTRY }; + return none; + } else { + // No, unwind to the previous block and try popping again... + bdescr *old_block = q->blocks; + q->blocks = old_block->link; + q->top = (MarkQueueBlock*)q->blocks->start; + ACQUIRE_SM_LOCK; + freeGroup(old_block); // TODO: hold on to a block to avoid repeated allocation/deallocation? + RELEASE_SM_LOCK; + goto again; + } + } + + top->head--; + MarkQueueEnt ent = top->entries[top->head]; + return ent; +} + +/********************************************************* + * Creating and destroying MarkQueues + *********************************************************/ + +/* Must hold sm_mutex. */ +static void init_mark_queue_ (MarkQueue *queue) +{ + bdescr *bd = allocGroup(1); + queue->blocks = bd; + queue->top = (MarkQueueBlock *) bd->start; + queue->top->head = 0; +} + +/* Must hold sm_mutex. */ +void initMarkQueue (MarkQueue *queue) +{ + init_mark_queue_(queue); + queue->marked_objects = allocHashTable(); +} + +void freeMarkQueue (MarkQueue *queue) +{ + bdescr* b = queue->blocks; + ACQUIRE_SM_LOCK; + while (b) + { + bdescr* b_ = b->link; + freeGroup(b); + b = b_; + } + RELEASE_SM_LOCK; + freeHashTable(queue->marked_objects, NULL); +} + + +/********************************************************* + * Marking + *********************************************************/ + +/* + * N.B. Mutation of TRecHeaders is completely unprotected by any write + * barrier. Consequently it's quite important that we deeply mark + * any outstanding transactions. + */ +static void mark_trec_header (MarkQueue *queue, StgTRecHeader *trec) +{ + while (trec != NO_TREC) { + StgTRecChunk *chunk = trec->current_chunk; + markQueuePushClosure_(queue, (StgClosure *) trec); + markQueuePushClosure_(queue, (StgClosure *) chunk); + while (chunk != END_STM_CHUNK_LIST) { + for (StgWord i=0; i < chunk->next_entry_idx; i++) { + TRecEntry *ent = &chunk->entries[i]; + markQueuePushClosure_(queue, (StgClosure *) ent->tvar); + markQueuePushClosure_(queue, ent->expected_value); + markQueuePushClosure_(queue, ent->new_value); + } + chunk = chunk->prev_chunk; + } + trec = trec->enclosing_trec; + } +} + +static void mark_tso (MarkQueue *queue, StgTSO *tso) +{ + // TODO: Clear dirty if contains only old gen objects + + if (tso->bound != NULL) { + markQueuePushClosure_(queue, (StgClosure *) tso->bound->tso); + } + + markQueuePushClosure_(queue, (StgClosure *) tso->blocked_exceptions); + markQueuePushClosure_(queue, (StgClosure *) tso->bq); + mark_trec_header(queue, tso->trec); + markQueuePushClosure_(queue, (StgClosure *) tso->stackobj); + markQueuePushClosure_(queue, (StgClosure *) tso->_link); + if ( tso->why_blocked == BlockedOnMVar + || tso->why_blocked == BlockedOnMVarRead + || tso->why_blocked == BlockedOnBlackHole + || tso->why_blocked == BlockedOnMsgThrowTo + || tso->why_blocked == NotBlocked + ) { + markQueuePushClosure_(queue, tso->block_info.closure); + } +} + +static void +do_push_closure (StgClosure **p, void *user) +{ + MarkQueue *queue = (MarkQueue *) user; + // TODO: Origin? need reference to containing closure + markQueuePushClosure_(queue, *p); +} + +static void +mark_large_bitmap (MarkQueue *queue, + StgClosure **p, + StgLargeBitmap *large_bitmap, + StgWord size) +{ + walk_large_bitmap(do_push_closure, p, large_bitmap, size, queue); +} + +static void +mark_small_bitmap (MarkQueue *queue, StgClosure **p, StgWord size, StgWord bitmap) +{ + while (size > 0) { + if ((bitmap & 1) == 0) { + // TODO: Origin? + markQueuePushClosure(queue, *p, NULL); + } + p++; + bitmap = bitmap >> 1; + size--; + } +} + +static GNUC_ATTR_HOT +void mark_PAP_payload (MarkQueue *queue, + StgClosure *fun, + StgClosure **payload, + StgWord size) +{ + const StgFunInfoTable *fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(fun)); + ASSERT(fun_info->i.type != PAP); + StgPtr p = (StgPtr) payload; + + StgWord bitmap; + switch (fun_info->f.fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->f.b.bitmap); + goto small_bitmap; + case ARG_GEN_BIG: + mark_large_bitmap(queue, payload, GET_FUN_LARGE_BITMAP(fun_info), size); + break; + case ARG_BCO: + mark_large_bitmap(queue, payload, BCO_BITMAP(fun), size); + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); + small_bitmap: + mark_small_bitmap(queue, (StgClosure **) p, size, bitmap); + break; + } +} + +/* Helper for mark_stack; returns next stack frame. */ +static StgPtr +mark_arg_block (MarkQueue *queue, const StgFunInfoTable *fun_info, StgClosure **args) +{ + StgWord bitmap, size; + + StgPtr p = (StgPtr)args; + switch (fun_info->f.fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->f.b.bitmap); + size = BITMAP_SIZE(fun_info->f.b.bitmap); + goto small_bitmap; + case ARG_GEN_BIG: + size = GET_FUN_LARGE_BITMAP(fun_info)->size; + mark_large_bitmap(queue, (StgClosure**)p, GET_FUN_LARGE_BITMAP(fun_info), size); + p += size; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); + size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]); + small_bitmap: + mark_small_bitmap(queue, (StgClosure**)p, size, bitmap); + p += size; + break; + } + return p; +} + +static GNUC_ATTR_HOT void +mark_stack_ (MarkQueue *queue, StgPtr sp, StgPtr spBottom) +{ + ASSERT(sp <= spBottom); + + while (sp < spBottom) { + const StgRetInfoTable *info = get_ret_itbl((StgClosure *)sp); + switch (info->i.type) { + case UPDATE_FRAME: + { + // See Note [upd-black-hole] in rts/Scav.c + StgUpdateFrame *frame = (StgUpdateFrame *) sp; + markQueuePushClosure_(queue, frame->updatee); + sp += sizeofW(StgUpdateFrame); + continue; + } + + // small bitmap (< 32 entries, or 64 on a 64-bit machine) + case CATCH_STM_FRAME: + case CATCH_RETRY_FRAME: + case ATOMICALLY_FRAME: + case UNDERFLOW_FRAME: + case STOP_FRAME: + case CATCH_FRAME: + case RET_SMALL: + { + StgWord bitmap = BITMAP_BITS(info->i.layout.bitmap); + StgWord size = BITMAP_SIZE(info->i.layout.bitmap); + // NOTE: the payload starts immediately after the info-ptr, we + // don't have an StgHeader in the same sense as a heap closure. + sp++; + mark_small_bitmap(queue, (StgClosure **) sp, size, bitmap); + sp += size; + } + follow_srt: + if (info->i.srt) { + markQueuePushClosure_(queue, (StgClosure*)GET_SRT(info)); + } + continue; + + case RET_BCO: { + sp++; + markQueuePushClosure_(queue, *(StgClosure**)sp); + StgBCO *bco = (StgBCO *)*sp; + sp++; + StgWord size = BCO_BITMAP_SIZE(bco); + mark_large_bitmap(queue, (StgClosure **) sp, BCO_BITMAP(bco), size); + sp += size; + continue; + } + + // large bitmap (> 32 entries, or > 64 on a 64-bit machine) + case RET_BIG: + { + StgWord size; + + size = GET_LARGE_BITMAP(&info->i)->size; + sp++; + mark_large_bitmap(queue, (StgClosure **) sp, GET_LARGE_BITMAP(&info->i), size); + sp += size; + // and don't forget to follow the SRT + goto follow_srt; + } + + case RET_FUN: + { + StgRetFun *ret_fun = (StgRetFun *)sp; + const StgFunInfoTable *fun_info; + + markQueuePushClosure_(queue, ret_fun->fun); + fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); + sp = mark_arg_block(queue, fun_info, ret_fun->payload); + goto follow_srt; + } + + default: + barf("mark_stack: weird activation record found on stack: %d", (int)(info->i.type)); + } + } +} + +static GNUC_ATTR_HOT void +mark_stack (MarkQueue *queue, StgStack *stack) +{ + // TODO: Clear dirty if contains only old gen objects + + mark_stack_(queue, stack->sp, stack->stack + stack->stack_size); +} + +static GNUC_ATTR_HOT void +mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) +{ + (void)origin; // TODO: should be used for selector/thunk optimisations + + try_again: + p = UNTAG_CLOSURE(p); + +# define PUSH_FIELD(obj, field) \ + markQueuePushClosure(queue, \ + (StgClosure *) (obj)->field, \ + (StgClosure **) &(obj)->field) + + if (!HEAP_ALLOCED_GC(p)) { + const StgInfoTable *info = get_itbl(p); + StgHalfWord type = info->type; + + if (type == CONSTR_0_1 || type == CONSTR_0_2 || type == CONSTR_NOCAF) { + // no need to put these on the static linked list, they don't need + // to be marked. + return; + } + + if (lookupHashTable(queue->marked_objects, (W_)p)) { + // already marked + return; + } + + insertHashTable(queue->marked_objects, (W_)p, (P_)1); + + switch (type) { + + case THUNK_STATIC: + if (info->srt != 0) { + markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above + } + return; + + case FUN_STATIC: + if (info->srt != 0 || info->layout.payload.ptrs != 0) { + markQueuePushFunSrt(queue, info); // TODO this function repeats the check above + + // a FUN_STATIC can also be an SRT, so it may have pointer + // fields. See Note [SRTs] in CmmBuildInfoTables, specifically + // the [FUN] optimisation. + // TODO (osa) I don't understand this comment + for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) { + PUSH_FIELD(p, payload[i]); + } + } + return; + + case IND_STATIC: + PUSH_FIELD((StgInd *) p, indirectee); + return; + + case CONSTR: + case CONSTR_1_0: + case CONSTR_2_0: + case CONSTR_1_1: + for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) { + PUSH_FIELD(p, payload[i]); + } + return; + + case WHITEHOLE: + while (get_itbl(p)->type == WHITEHOLE); + // busy_wait_nop(); // FIXME + goto try_again; + + default: + barf("mark_closure(static): strange closure type %d", (int)(info->type)); + } + } + + bdescr *bd = Bdescr((StgPtr) p); + + if (bd->gen != oldest_gen) { + // Here we have an object living outside of the non-moving heap. Since + // we moved everything to the non-moving heap before starting the major + // collection, we know that we don't need to trace it: it was allocated + // after we took our snapshot. + + // This should never happen in the non-concurrent case + barf("Closure outside of non-moving heap: %p", p); + } + + ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info); + + ASSERT(!IS_FORWARDING_PTR(p->header.info)); + + if (bd->flags & BF_NONMOVING) { + + if (bd->flags & BF_LARGE) { + if (! (bd->flags & BF_NONMOVING_SWEEPING)) { + // Not in the snapshot + return; + } + if (bd->flags & BF_MARKED) { + return; + } + + // Mark contents + p = (StgClosure*)bd->start; + } else { + struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p); + nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p); + + /* We don't mark blocks that, + * - were not live at the time that the snapshot was taken, or + * - we have already marked this cycle + */ + uint8_t mark = nonmovingGetMark(seg, block_idx); + /* Don't mark things we've already marked (since we may loop) */ + if (mark == nonmovingMarkEpoch) + return; + + StgClosure *snapshot_loc = + (StgClosure *) nonmovingSegmentGetBlock(seg, seg->next_free_snap); + if (p >= snapshot_loc && mark == 0) { + /* + * In this case we are looking at a block that wasn't allocated + * at the time that the snapshot was taken. We mustn't trace + * things above the allocation pointer that aren't marked since + * they may not be valid objects. + */ + return; + } + } + } + + // A pinned object that is still attached to a capability (because it's not + // filled yet). No need to trace it pinned objects can't contain poiners. + else if (bd->flags & BF_PINNED) { +#if defined(DEBUG) + bool found_it = false; + for (uint32_t i = 0; i < n_capabilities; ++i) { + if (capabilities[i]->pinned_object_block == bd) { + found_it = true; + break; + } + } + ASSERT(found_it); +#endif + return; + } + + else { + barf("Strange closure in nonmoving mark: %p", p); + } + + ///////////////////////////////////////////////////// + // Trace pointers + ///////////////////////////////////////////////////// + + const StgInfoTable *info = get_itbl(p); + switch (info->type) { + + case MVAR_CLEAN: + case MVAR_DIRTY: { + StgMVar *mvar = (StgMVar *) p; + PUSH_FIELD(mvar, head); + PUSH_FIELD(mvar, tail); + PUSH_FIELD(mvar, value); + break; + } + + case TVAR: { + StgTVar *tvar = ((StgTVar *)p); + PUSH_FIELD(tvar, current_value); + PUSH_FIELD(tvar, first_watch_queue_entry); + break; + } + + case FUN_2_0: + markQueuePushFunSrt(queue, info); + PUSH_FIELD(p, payload[1]); + PUSH_FIELD(p, payload[0]); + break; + + case THUNK_2_0: { + StgThunk *thunk = (StgThunk *) p; + markQueuePushThunkSrt(queue, info); + PUSH_FIELD(thunk, payload[1]); + PUSH_FIELD(thunk, payload[0]); + break; + } + + case CONSTR_2_0: + PUSH_FIELD(p, payload[1]); + PUSH_FIELD(p, payload[0]); + break; + + case THUNK_1_0: + markQueuePushThunkSrt(queue, info); + PUSH_FIELD((StgThunk *) p, payload[0]); + break; + + case FUN_1_0: + markQueuePushFunSrt(queue, info); + PUSH_FIELD(p, payload[0]); + break; + + case CONSTR_1_0: + PUSH_FIELD(p, payload[0]); + break; + + case THUNK_0_1: + markQueuePushThunkSrt(queue, info); + break; + + case FUN_0_1: + markQueuePushFunSrt(queue, info); + break; + + case CONSTR_0_1: + case CONSTR_0_2: + break; + + case THUNK_0_2: + markQueuePushThunkSrt(queue, info); + break; + + case FUN_0_2: + markQueuePushFunSrt(queue, info); + break; + + case THUNK_1_1: + markQueuePushThunkSrt(queue, info); + PUSH_FIELD((StgThunk *) p, payload[0]); + break; + + case FUN_1_1: + markQueuePushFunSrt(queue, info); + PUSH_FIELD(p, payload[0]); + break; + + case CONSTR_1_1: + PUSH_FIELD(p, payload[0]); + break; + + case FUN: + markQueuePushFunSrt(queue, info); + goto gen_obj; + + case THUNK: { + markQueuePushThunkSrt(queue, info); + for (StgWord i = 0; i < info->layout.payload.ptrs; i++) { + StgClosure **field = &((StgThunk *) p)->payload[i]; + markQueuePushClosure(queue, *field, field); + } + break; + } + + gen_obj: + case CONSTR: + case CONSTR_NOCAF: + case WEAK: + case PRIM: + { + for (StgWord i = 0; i < info->layout.payload.ptrs; i++) { + StgClosure **field = &((StgClosure *) p)->payload[i]; + markQueuePushClosure(queue, *field, field); + } + break; + } + + case BCO: { + StgBCO *bco = (StgBCO *)p; + PUSH_FIELD(bco, instrs); + PUSH_FIELD(bco, literals); + PUSH_FIELD(bco, ptrs); + break; + } + + + case IND: + case BLACKHOLE: + PUSH_FIELD((StgInd *) p, indirectee); + break; + + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + PUSH_FIELD((StgMutVar *)p, var); + break; + + case BLOCKING_QUEUE: { + StgBlockingQueue *bq = (StgBlockingQueue *)p; + PUSH_FIELD(bq, bh); + PUSH_FIELD(bq, owner); + PUSH_FIELD(bq, queue); + PUSH_FIELD(bq, link); + break; + } + + case THUNK_SELECTOR: + PUSH_FIELD((StgSelector *) p, selectee); + // TODO: selector optimization + break; + + case AP_STACK: { + StgAP_STACK *ap = (StgAP_STACK *)p; + PUSH_FIELD(ap, fun); + mark_stack_(queue, (StgPtr) ap->payload, (StgPtr) ap->payload + ap->size); + break; + } + + case PAP: { + StgPAP *pap = (StgPAP *) p; + PUSH_FIELD(pap, fun); + mark_PAP_payload(queue, pap->fun, pap->payload, pap->n_args); + break; + } + + case AP: { + StgAP *ap = (StgAP *) p; + PUSH_FIELD(ap, fun); + mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args); + break; + } + + case ARR_WORDS: + // nothing to follow + break; + + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN_CLEAN: + case MUT_ARR_PTRS_FROZEN_DIRTY: + // TODO: Check this against Scav.c + markQueuePushArray(queue, (StgMutArrPtrs *) p, 0); + break; + + case SMALL_MUT_ARR_PTRS_CLEAN: + case SMALL_MUT_ARR_PTRS_DIRTY: + case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: + case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: { + StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs *) p; + for (StgWord i = 0; i < arr->ptrs; i++) { + StgClosure **field = &arr->payload[i]; + markQueuePushClosure(queue, *field, field); + } + break; + } + + case TSO: + mark_tso(queue, (StgTSO *) p); + break; + + case STACK: { + // See Note [StgStack dirtiness flags and concurrent marking] + StgStack *stack = (StgStack *) p; + mark_stack(queue, stack); + break; + } + + case MUT_PRIM: { + for (StgHalfWord p_idx = 0; p_idx < info->layout.payload.ptrs; ++p_idx) { + StgClosure **field = &p->payload[p_idx]; + markQueuePushClosure(queue, *field, field); + } + break; + } + + case TREC_CHUNK: { + // TODO: Should we abort here? This should have already been marked + // when we dirtied the TSO + StgTRecChunk *tc = ((StgTRecChunk *) p); + PUSH_FIELD(tc, prev_chunk); + TRecEntry *end = &tc->entries[tc->next_entry_idx]; + for (TRecEntry *e = &tc->entries[0]; e < end; e++) { + markQueuePushClosure_(queue, (StgClosure *) e->tvar); + markQueuePushClosure_(queue, (StgClosure *) e->expected_value); + markQueuePushClosure_(queue, (StgClosure *) e->new_value); + } + break; + } + + case WHITEHOLE: + while (get_itbl(p)->type == WHITEHOLE); + // busy_wait_nop(); // FIXME + goto try_again; + + default: + barf("mark_closure: unimplemented/strange closure type %d @ %p", + info->type, p); + } + +# undef PUSH_FIELD + + /* Set the mark bit: it's important that we do this only after we actually push + * the object's pointers since in the case of marking stacks there may be a + * mutator waiting for us to finish so it can start execution. + */ + if (bd->flags & BF_LARGE) { + if (! (bd->flags & BF_MARKED)) { + // Remove the object from nonmoving_large_objects and link it to + // nonmoving_marked_large_objects + dbl_link_remove(bd, &nonmoving_large_objects); + dbl_link_onto(bd, &nonmoving_marked_large_objects); + n_nonmoving_large_blocks -= bd->blocks; + n_nonmoving_marked_large_blocks += bd->blocks; + bd->flags |= BF_MARKED; + } + } else { + // TODO: Kill repetition + struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p); + nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p); + nonmovingSetMark(seg, block_idx); + nonmoving_live_words += nonmovingSegmentBlockSize(seg) / sizeof(W_); + } +} + +/* This is the main mark loop. + * Invariants: + * + * a. nonmovingPrepareMark has been called. + * b. the nursery has been fully evacuated into the non-moving generation. + * c. the mark queue has been seeded with a set of roots. + * + */ +GNUC_ATTR_HOT void nonmovingMark (MarkQueue *queue) +{ + debugTrace(DEBUG_nonmoving_gc, "Starting mark pass"); + unsigned int count = 0; + while (true) { + count++; + MarkQueueEnt ent = markQueuePop(queue); + + switch (ent.type) { + case MARK_CLOSURE: + mark_closure(queue, ent.mark_closure.p, ent.mark_closure.origin); + break; + case MARK_ARRAY: { + const StgMutArrPtrs *arr = ent.mark_array.array; + StgWord start = ent.mark_array.start_index; + StgWord end = start + MARK_ARRAY_CHUNK_LENGTH; + if (end < arr->ptrs) { + markQueuePushArray(queue, ent.mark_array.array, end); + } else { + end = arr->ptrs; + } + for (StgWord i = start; i < end; i++) { + markQueuePushClosure_(queue, arr->payload[i]); + } + break; + } + case NULL_ENTRY: + // Nothing more to do + debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count); + return; + } + } +} + +// A variant of `isAlive` that works for non-moving heap. Used for: +// +// - Collecting weak pointers; checking key of a weak pointer. +// - Resurrecting threads; checking if a thread is dead. +// - Sweeping object lists: large_objects, mut_list, stable_name_table. +// +// This may only be used after a full mark but before nonmovingSweep as it +// relies on the correctness of the next_free_snap and mark bitmaps. +bool nonmovingIsAlive (StgClosure *p) +{ + // Ignore static closures. See comments in `isAlive`. + if (!HEAP_ALLOCED_GC(p)) { + return true; + } + + bdescr *bd = Bdescr((P_)p); + + // All non-static objects in the non-moving heap should be marked as + // BF_NONMOVING + ASSERT(bd->flags & BF_NONMOVING); + + if (bd->flags & BF_LARGE) { + return (bd->flags & BF_NONMOVING_SWEEPING) == 0 + // the large object wasn't in the snapshot and therefore wasn't marked + || (bd->flags & BF_MARKED) != 0; + // The object was marked + } else { + struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p); + nonmoving_block_idx i = nonmovingGetBlockIdx((StgPtr) p); + uint8_t mark = nonmovingGetMark(seg, i); + if (i >= seg->next_free_snap) { + // If the object is allocated after next_free_snap then one of the + // following must be true: + // + // * if its mark is 0 then the block was not allocated last time + // the segment was swept; however, it may have been allocated since + // then and therefore we must conclude that the block is alive. + // + // * if its mark is equal to nonmovingMarkEpoch then we found that + // the object was alive in the snapshot of the current GC (recall + // that this function may only be used after a mark). + // Consequently we must conclude that the object is still alive. + // + // * if its mark is not equal to nonmovingMarkEpoch then we found + // that the object was not reachable in the last snapshot. + // Assuming that the mark is complete we can conclude that the + // object is dead since the snapshot invariant guarantees that + // all objects alive in the snapshot would be marked. + // + return mark == nonmovingMarkEpoch || mark == 0; + } else { + // If the object is below next_free_snap then the snapshot + // invariant guarantees that it is marked if reachable. + return mark == nonmovingMarkEpoch; + } + } +} + +// Check whether a snapshotted object is alive. That is for an object that we +// know to be in the snapshot, is its mark bit set. It is imperative that the +// object is in the snapshot (e.g. was in the nonmoving heap at the time that +// the snapshot was taken) since we assume that its mark bit reflects its +// reachability. +// +// This is used when +// +// - Collecting weak pointers; checking key of a weak pointer. +// - Resurrecting threads; checking if a thread is dead. +// - Sweeping object lists: large_objects, mut_list, stable_name_table. +// +static bool nonmovingIsNowAlive (StgClosure *p) +{ + // Ignore static closures. See comments in `isAlive`. + if (!HEAP_ALLOCED_GC(p)) { + return true; + } + + bdescr *bd = Bdescr((P_)p); + + // All non-static objects in the non-moving heap should be marked as + // BF_NONMOVING + ASSERT(bd->flags & BF_NONMOVING); + + if (bd->flags & BF_LARGE) { + return (bd->flags & BF_NONMOVING_SWEEPING) == 0 + // the large object wasn't in the snapshot and therefore wasn't marked + || (bd->flags & BF_MARKED) != 0; + // The object was marked + } else { + return nonmovingClosureMarkedThisCycle((P_)p); + } +} + +// Non-moving heap variant of `tidyWeakList` +bool nonmovingTidyWeaks (struct MarkQueue_ *queue) +{ + bool did_work = false; + + StgWeak **last_w = &nonmoving_old_weak_ptr_list; + StgWeak *next_w; + for (StgWeak *w = nonmoving_old_weak_ptr_list; w != NULL; w = next_w) { + if (w->header.info == &stg_DEAD_WEAK_info) { + // finalizeWeak# was called on the weak + next_w = w->link; + *last_w = next_w; + continue; + } + + // Otherwise it's a live weak + ASSERT(w->header.info == &stg_WEAK_info); + + if (nonmovingIsNowAlive(w->key)) { + nonmovingMarkLiveWeak(queue, w); + did_work = true; + + // remove this weak ptr from old_weak_ptr list + *last_w = w->link; + next_w = w->link; + + // and put it on the weak ptr list + w->link = nonmoving_weak_ptr_list; + nonmoving_weak_ptr_list = w; + } else { + last_w = &(w->link); + next_w = w->link; + } + } + + return did_work; +} + +void nonmovingMarkDeadWeak (struct MarkQueue_ *queue, StgWeak *w) +{ + if (w->cfinalizers != &stg_NO_FINALIZER_closure) { + markQueuePushClosure_(queue, w->value); + } + markQueuePushClosure_(queue, w->finalizer); +} + +void nonmovingMarkLiveWeak (struct MarkQueue_ *queue, StgWeak *w) +{ + ASSERT(nonmovingClosureMarkedThisCycle((P_)w)); + markQueuePushClosure_(queue, w->value); + markQueuePushClosure_(queue, w->finalizer); + markQueuePushClosure_(queue, w->cfinalizers); +} + +// When we're done with marking, any weak pointers with non-marked keys will be +// considered "dead". We mark values and finalizers of such weaks, and then +// schedule them for finalization in `scheduleFinalizers` (which we run during +// synchronization). +void nonmovingMarkDeadWeaks (struct MarkQueue_ *queue, StgWeak **dead_weaks) +{ + StgWeak *next_w; + for (StgWeak *w = nonmoving_old_weak_ptr_list; w; w = next_w) { + ASSERT(!nonmovingClosureMarkedThisCycle((P_)(w->key))); + nonmovingMarkDeadWeak(queue, w); + next_w = w ->link; + w->link = *dead_weaks; + *dead_weaks = w; + } +} + +// Non-moving heap variant of of `tidyThreadList` +void nonmovingTidyThreads () +{ + StgTSO *next; + StgTSO **prev = &nonmoving_old_threads; + for (StgTSO *t = nonmoving_old_threads; t != END_TSO_QUEUE; t = next) { + + next = t->global_link; + + // N.B. This thread is in old_threads, consequently we *know* it is in + // the snapshot and it is therefore safe to rely on the bitmap to + // determine its reachability. + if (nonmovingIsNowAlive((StgClosure*)t)) { + // alive + *prev = next; + + // move this thread onto threads list + t->global_link = nonmoving_threads; + nonmoving_threads = t; + } else { + // not alive (yet): leave this thread on the old_threads list + prev = &(t->global_link); + } + } +} + +void nonmovingResurrectThreads (struct MarkQueue_ *queue, StgTSO **resurrected_threads) +{ + StgTSO *next; + for (StgTSO *t = nonmoving_old_threads; t != END_TSO_QUEUE; t = next) { + next = t->global_link; + + switch (t->what_next) { + case ThreadKilled: + case ThreadComplete: + continue; + default: + markQueuePushClosure_(queue, (StgClosure*)t); + t->global_link = *resurrected_threads; + *resurrected_threads = t; + } + } +} + +#if defined(DEBUG) + +void printMarkQueueEntry (MarkQueueEnt *ent) +{ + if (ent->type == MARK_CLOSURE) { + debugBelch("Closure: "); + printClosure(ent->mark_closure.p); + } else if (ent->type == MARK_ARRAY) { + debugBelch("Array\n"); + } else { + debugBelch("End of mark\n"); + } +} + +void printMarkQueue (MarkQueue *q) +{ + debugBelch("======== MARK QUEUE ========\n"); + for (bdescr *block = q->blocks; block; block = block->link) { + MarkQueueBlock *queue = (MarkQueueBlock*)block->start; + for (uint32_t i = 0; i < queue->head; ++i) { + printMarkQueueEntry(&queue->entries[i]); + } + } + debugBelch("===== END OF MARK QUEUE ====\n"); +} + +#endif diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h new file mode 100644 index 0000000000..636f41890c --- /dev/null +++ b/rts/sm/NonMovingMark.h @@ -0,0 +1,140 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2018 + * + * Non-moving garbage collector and allocator: Mark phase + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include "Hash.h" +#include "Task.h" +#include "NonMoving.h" + +#include "BeginPrivate.h" + +#include "Hash.h" + +enum EntryType { + NULL_ENTRY = 0, + MARK_CLOSURE, + MARK_ARRAY +}; + +/* Note [Origin references in the nonmoving collector] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * To implement indirection short-cutting and the selector optimisation the + * collector needs to know where it found references, so it can update the + * reference if it later turns out that points to an indirection. For this + * reason, each mark queue entry contains two things: + * + * - a pointer to the object to be marked (p), and + * + * - a pointer to the field where we found the reference (origin) + * + * Note that the origin pointer is an interior pointer: it points not to a + * valid closure (with info table pointer) but rather to a field inside a closure. + * Since such references can't be safely scavenged we establish the invariant + * that the origin pointer may only point to a field of an object living in the + * nonmoving heap, where no scavenging is needed. + * + */ + +typedef struct { + enum EntryType type; + // All pointers should be untagged + union { + struct { + StgClosure *p; // the object to be marked + StgClosure **origin; // field where this reference was found. + // See Note [Origin references in the nonmoving collector] + } mark_closure; + struct { + const StgMutArrPtrs *array; + StgWord start_index; + } mark_array; + }; +} MarkQueueEnt; + +typedef struct { + // index of first *unused* queue entry + uint32_t head; + + MarkQueueEnt entries[]; +} MarkQueueBlock; + +/* The mark queue is not capable of concurrent read or write. + * + * invariants: + * + * a. top == blocks->start; + * b. there is always a valid MarkQueueChunk, although it may be empty + * (e.g. top->head == 0). + */ +typedef struct MarkQueue_ { + // A singly link-list of blocks, each containing a MarkQueueChunk. + bdescr *blocks; + + // Cached value of blocks->start. + MarkQueueBlock *top; + + // Marked objects outside of nonmoving heap, namely large and static + // objects. + HashTable *marked_objects; +} MarkQueue; + +// The length of MarkQueueBlock.entries +#define MARK_QUEUE_BLOCK_ENTRIES ((BLOCK_SIZE - sizeof(MarkQueueBlock)) / sizeof(MarkQueueEnt)) + +extern bdescr *nonmoving_large_objects, *nonmoving_marked_large_objects; +extern memcount n_nonmoving_large_blocks, n_nonmoving_marked_large_blocks; + +extern StgTSO *nonmoving_old_threads; +extern StgWeak *nonmoving_old_weak_ptr_list; +extern StgTSO *nonmoving_threads; +extern StgWeak *nonmoving_weak_ptr_list; + +#if defined(DEBUG) +extern StgIndStatic *debug_caf_list_snapshot; +#endif + +extern MarkQueue *current_mark_queue; + +void markQueueAddRoot(MarkQueue* q, StgClosure** root); + +void initMarkQueue(MarkQueue *queue); +void freeMarkQueue(MarkQueue *queue); +void nonmovingMark(struct MarkQueue_ *restrict queue); + +bool nonmovingTidyWeaks(struct MarkQueue_ *queue); +void nonmovingTidyThreads(void); +void nonmovingMarkDeadWeaks(struct MarkQueue_ *queue, StgWeak **dead_weak_ptr_list); +void nonmovingResurrectThreads(struct MarkQueue_ *queue, StgTSO **resurrected_threads); +bool nonmovingIsAlive(StgClosure *p); +void nonmovingMarkDeadWeak(struct MarkQueue_ *queue, StgWeak *w); +void nonmovingMarkLiveWeak(struct MarkQueue_ *queue, StgWeak *w); + +void markQueuePush(MarkQueue *q, const MarkQueueEnt *ent); +void markQueuePushClosure(MarkQueue *q, + StgClosure *p, + StgClosure **origin); +void markQueuePushClosure_(MarkQueue *q, StgClosure *p); +void markQueuePushThunkSrt(MarkQueue *q, const StgInfoTable *info); +void markQueuePushFunSrt(MarkQueue *q, const StgInfoTable *info); +void markQueuePushArray(MarkQueue *q, const StgMutArrPtrs *array, StgWord start_index); + +INLINE_HEADER bool markQueueIsEmpty(MarkQueue *q) +{ + return (q->blocks == NULL) || (q->top->head == 0 && q->blocks->link == NULL); +} + +#if defined(DEBUG) + +void printMarkQueueEntry(MarkQueueEnt *ent); +void printMarkQueue(MarkQueue *q); + +#endif + +#include "EndPrivate.h" diff --git a/rts/sm/NonMovingScav.c b/rts/sm/NonMovingScav.c new file mode 100644 index 0000000000..850750b43b --- /dev/null +++ b/rts/sm/NonMovingScav.c @@ -0,0 +1,366 @@ +#include "Rts.h" +#include "RtsUtils.h" +#include "NonMoving.h" +#include "NonMovingScav.h" +#include "Capability.h" +#include "Scav.h" +#include "Evac.h" +#include "GCThread.h" // for GCUtils.h +#include "GCUtils.h" +#include "Printer.h" +#include "MarkWeak.h" // scavengeLiveWeak + +void +nonmovingScavengeOne (StgClosure *q) +{ + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); + StgPtr p = (StgPtr)q; + const StgInfoTable *info = get_itbl(q); + + switch (info->type) { + + case MVAR_CLEAN: + case MVAR_DIRTY: + { + StgMVar *mvar = ((StgMVar *)p); + evacuate((StgClosure **)&mvar->head); + evacuate((StgClosure **)&mvar->tail); + evacuate((StgClosure **)&mvar->value); + if (gct->failed_to_evac) { + mvar->header.info = &stg_MVAR_DIRTY_info; + } else { + mvar->header.info = &stg_MVAR_CLEAN_info; + } + break; + } + + case TVAR: + { + StgTVar *tvar = ((StgTVar *)p); + evacuate((StgClosure **)&tvar->current_value); + evacuate((StgClosure **)&tvar->first_watch_queue_entry); + if (gct->failed_to_evac) { + tvar->header.info = &stg_TVAR_DIRTY_info; + } else { + tvar->header.info = &stg_TVAR_CLEAN_info; + } + break; + } + + case FUN_2_0: + scavenge_fun_srt(info); + evacuate(&((StgClosure *)p)->payload[1]); + evacuate(&((StgClosure *)p)->payload[0]); + break; + + case THUNK_2_0: + scavenge_thunk_srt(info); + evacuate(&((StgThunk *)p)->payload[1]); + evacuate(&((StgThunk *)p)->payload[0]); + break; + + case CONSTR_2_0: + evacuate(&((StgClosure *)p)->payload[1]); + evacuate(&((StgClosure *)p)->payload[0]); + break; + + case THUNK_1_0: + scavenge_thunk_srt(info); + evacuate(&((StgThunk *)p)->payload[0]); + break; + + case FUN_1_0: + scavenge_fun_srt(info); + FALLTHROUGH; + case CONSTR_1_0: + evacuate(&((StgClosure *)p)->payload[0]); + break; + + case THUNK_0_1: + scavenge_thunk_srt(info); + break; + + case FUN_0_1: + scavenge_fun_srt(info); + FALLTHROUGH; + case CONSTR_0_1: + break; + + case THUNK_0_2: + scavenge_thunk_srt(info); + break; + + case FUN_0_2: + scavenge_fun_srt(info); + FALLTHROUGH; + case CONSTR_0_2: + break; + + case THUNK_1_1: + scavenge_thunk_srt(info); + evacuate(&((StgThunk *)p)->payload[0]); + break; + + case FUN_1_1: + scavenge_fun_srt(info); + FALLTHROUGH; + case CONSTR_1_1: + evacuate(&q->payload[0]); + break; + + case FUN: + scavenge_fun_srt(info); + goto gen_obj; + + case THUNK: + { + scavenge_thunk_srt(info); + StgPtr end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs; + for (p = (P_)((StgThunk *)p)->payload; p < end; p++) { + evacuate((StgClosure **)p); + } + break; + } + + gen_obj: + case CONSTR: + case CONSTR_NOCAF: + case WEAK: + case PRIM: + { + StgPtr end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs; + for (p = (P_)((StgClosure *)p)->payload; p < end; p++) { + evacuate((StgClosure **)p); + } + break; + } + + case BCO: { + StgBCO *bco = (StgBCO *)p; + evacuate((StgClosure **)&bco->instrs); + evacuate((StgClosure **)&bco->literals); + evacuate((StgClosure **)&bco->ptrs); + break; + } + + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + evacuate(&((StgMutVar *)p)->var); + if (gct->failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + } + break; + + case BLOCKING_QUEUE: + { + StgBlockingQueue *bq = (StgBlockingQueue *)p; + + evacuate(&bq->bh); + evacuate((StgClosure**)&bq->owner); + evacuate((StgClosure**)&bq->queue); + evacuate((StgClosure**)&bq->link); + + if (gct->failed_to_evac) { + bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; + } else { + bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info; + } + break; + } + + case THUNK_SELECTOR: + { + StgSelector *s = (StgSelector *)p; + evacuate(&s->selectee); + break; + } + + // A chunk of stack saved in a heap object + case AP_STACK: + { + StgAP_STACK *ap = (StgAP_STACK *)p; + + evacuate(&ap->fun); + scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size); + break; + } + + case PAP: + p = scavenge_PAP((StgPAP *)p); + break; + + case AP: + scavenge_AP((StgAP *)p); + break; + + case ARR_WORDS: + // nothing to follow + break; + + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + { + // We don't eagerly promote objects pointed to by a mutable + // array, but if we find the array only points to objects in + // the same or an older generation, we mark it "clean" and + // avoid traversing it during minor GCs. + scavenge_mut_arr_ptrs((StgMutArrPtrs*)p); + if (gct->failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; + } + gct->failed_to_evac = true; // always put it on the mutable list. + break; + } + + case MUT_ARR_PTRS_FROZEN_CLEAN: + case MUT_ARR_PTRS_FROZEN_DIRTY: + // follow everything + { + scavenge_mut_arr_ptrs((StgMutArrPtrs*)p); + + if (gct->failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info; + } + break; + } + + case SMALL_MUT_ARR_PTRS_CLEAN: + case SMALL_MUT_ARR_PTRS_DIRTY: + // follow everything + { + // We don't eagerly promote objects pointed to by a mutable + // array, but if we find the array only points to objects in + // the same or an older generation, we mark it "clean" and + // avoid traversing it during minor GCs. + StgPtr next = p + small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p); + for (p = (P_)((StgSmallMutArrPtrs *)p)->payload; p < next; p++) { + evacuate((StgClosure **)p); + } + if (gct->failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info; + } + gct->failed_to_evac = true; // always put it on the mutable list. + break; + } + + case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: + case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: + // follow everything + { + StgPtr next = p + small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p); + for (p = (P_)((StgSmallMutArrPtrs *)p)->payload; p < next; p++) { + evacuate((StgClosure **)p); + } + + if (gct->failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info; + } + break; + } + + case TSO: + { + scavengeTSO((StgTSO *)p); + break; + } + + case STACK: + { + StgStack *stack = (StgStack*)p; + + scavenge_stack(stack->sp, stack->stack + stack->stack_size); + stack->dirty = gct->failed_to_evac; + // TODO (osa): There may be something special about stacks that we're + // missing. All other mut objects are marked by using a different info + // table except stacks. + + break; + } + + case MUT_PRIM: + { + StgPtr end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs; + for (p = (P_)((StgClosure *)p)->payload; p < end; p++) { + evacuate((StgClosure **)p); + } + gct->failed_to_evac = true; // mutable + break; + } + + case TREC_CHUNK: + { + StgWord i; + StgTRecChunk *tc = ((StgTRecChunk *) p); + TRecEntry *e = &(tc -> entries[0]); + evacuate((StgClosure **)&tc->prev_chunk); + for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) { + evacuate((StgClosure **)&e->tvar); + evacuate((StgClosure **)&e->expected_value); + evacuate((StgClosure **)&e->new_value); + } + gct->failed_to_evac = true; // mutable + break; + } + + case IND: + case BLACKHOLE: + case IND_STATIC: + evacuate(&((StgInd *)p)->indirectee); + break; + + default: + barf("nonmoving scavenge: unimplemented/strange closure type %d @ %p", + info->type, p); + } + + if (gct->failed_to_evac) { + // Mutable object or points to a younger object, add to the mut_list + gct->failed_to_evac = false; + if (oldest_gen->no > 0) { + recordMutableGen_GC(q, oldest_gen->no); + } + } +} + +/* Scavenge objects evacuated into a nonmoving segment by a minor GC */ +void +scavengeNonmovingSegment (struct NonmovingSegment *seg) +{ + const StgWord blk_size = nonmovingSegmentBlockSize(seg); + gct->evac_gen_no = oldest_gen->no; + gct->failed_to_evac = false; + + // scavenge objects between scan and free_ptr whose bitmap bits are 0 + bdescr *seg_block = Bdescr((P_)seg); + + ASSERT(seg_block->u.scan >= (P_)nonmovingSegmentGetBlock(seg, 0)); + ASSERT(seg_block->u.scan <= (P_)nonmovingSegmentGetBlock(seg, seg->next_free)); + + StgPtr scan_end = (P_)nonmovingSegmentGetBlock(seg, seg->next_free); + if (seg_block->u.scan == scan_end) + return; + + nonmoving_block_idx p_idx = nonmovingGetBlockIdx(seg_block->u.scan); + while (seg_block->u.scan < scan_end) { + StgClosure *p = (StgClosure*)seg_block->u.scan; + + // bit set = was allocated in a previous GC, no need to scavenge + // bit not set = new allocation, so scavenge + if (nonmovingGetMark(seg, p_idx) == 0) { + nonmovingScavengeOne(p); + } + + p_idx++; + seg_block->u.scan = (P_)(((uint8_t*)seg_block->u.scan) + blk_size); + } +} diff --git a/rts/sm/NonMovingScav.h b/rts/sm/NonMovingScav.h new file mode 100644 index 0000000000..021385e1e9 --- /dev/null +++ b/rts/sm/NonMovingScav.h @@ -0,0 +1,10 @@ +#pragma once + +#include "NonMoving.h" + +#include "BeginPrivate.h" + +void nonmovingScavengeOne(StgClosure *p); +void scavengeNonmovingSegment(struct NonmovingSegment *seg); + +#include "EndPrivate.h" diff --git a/rts/sm/NonMovingSweep.c b/rts/sm/NonMovingSweep.c new file mode 100644 index 0000000000..fa3e38cca4 --- /dev/null +++ b/rts/sm/NonMovingSweep.c @@ -0,0 +1,273 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2018 + * + * Non-moving garbage collector and allocator: Sweep phase + * + * ---------------------------------------------------------------------------*/ + +#include "Rts.h" +#include "NonMovingSweep.h" +#include "NonMoving.h" +#include "NonMovingMark.h" // for nonmovingIsAlive +#include "Capability.h" +#include "GCThread.h" // for GCUtils.h +#include "GCUtils.h" +#include "Storage.h" +#include "Trace.h" +#include "StableName.h" + +static struct NonmovingSegment *pop_all_filled_segments(struct NonmovingAllocator *alloc) +{ + while (true) { + struct NonmovingSegment *head = alloc->filled; + if (cas((StgVolatilePtr) &alloc->filled, (StgWord) head, (StgWord) NULL) == (StgWord) head) + return head; + } +} + +void nonmovingPrepareSweep() +{ + ASSERT(nonmovingHeap.sweep_list == NULL); + + // Move blocks in the allocators' filled lists into sweep_list + for (unsigned int alloc_idx = 0; alloc_idx < NONMOVING_ALLOCA_CNT; alloc_idx++) + { + struct NonmovingAllocator *alloc = nonmovingHeap.allocators[alloc_idx]; + struct NonmovingSegment *filled = pop_all_filled_segments(alloc); + + // Link filled to sweep_list + if (filled) { + struct NonmovingSegment *filled_head = filled; + // Find end of filled list + while (filled->link) { + filled = filled->link; + } + filled->link = nonmovingHeap.sweep_list; + nonmovingHeap.sweep_list = filled_head; + } + } +} + +// On which list should a particular segment be placed? +enum SweepResult { + SEGMENT_FREE, // segment is empty: place on free list + SEGMENT_PARTIAL, // segment is partially filled: place on active list + SEGMENT_FILLED // segment is full: place on filled list +}; + +// Determine which list a marked segment should be placed on and initialize +// next_free indices as appropriate. +GNUC_ATTR_HOT static enum SweepResult +nonmovingSweepSegment(struct NonmovingSegment *seg) +{ + bool found_free = false; + bool found_live = false; + + for (nonmoving_block_idx i = 0; + i < nonmovingSegmentBlockCount(seg); + ++i) + { + if (seg->bitmap[i] == nonmovingMarkEpoch) { + found_live = true; + } else if (!found_free) { + found_free = true; + seg->next_free = i; + seg->next_free_snap = i; + Bdescr((P_)seg)->u.scan = (P_)nonmovingSegmentGetBlock(seg, i); + seg->bitmap[i] = 0; + } else { + seg->bitmap[i] = 0; + } + + if (found_free && found_live) { + // zero the remaining dead object's mark bits + for (; i < nonmovingSegmentBlockCount(seg); ++i) { + if (seg->bitmap[i] != nonmovingMarkEpoch) { + seg->bitmap[i] = 0; + } + } + return SEGMENT_PARTIAL; + } + } + + if (found_live) { + return SEGMENT_FILLED; + } else { + ASSERT(seg->next_free == 0); + ASSERT(seg->next_free_snap == 0); + return SEGMENT_FREE; + } +} + +#if defined(DEBUG) + +void nonmovingGcCafs(struct MarkQueue_ *queue) +{ + uint32_t i = 0; + StgIndStatic *next; + + for (StgIndStatic *caf = debug_caf_list_snapshot; + caf != (StgIndStatic*) END_OF_CAF_LIST; + caf = next) + { + next = (StgIndStatic*)caf->saved_info; + + const StgInfoTable *info = get_itbl((StgClosure*)caf); + ASSERT(info->type == IND_STATIC); + + if (lookupHashTable(queue->marked_objects, (StgWord) caf) == NULL) { + debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", caf); + SET_INFO((StgClosure*)caf, &stg_GCD_CAF_info); // stub it + } else { + // CAF is alive, move it back to the debug_caf_list + ++i; + debugTrace(DEBUG_gccafs, "CAF alive at 0x%p", caf); + ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex + caf->saved_info = (const StgInfoTable*)debug_caf_list; + debug_caf_list = caf; + RELEASE_SM_LOCK; + } + } + + debugTrace(DEBUG_gccafs, "%d CAFs live", i); + debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST; +} + +static void +clear_segment(struct NonmovingSegment* seg) +{ + size_t end = ((size_t)seg) + NONMOVING_SEGMENT_SIZE; + memset(&seg->bitmap, 0, end - (size_t)&seg->bitmap); +} + +static void +clear_segment_free_blocks(struct NonmovingSegment* seg) +{ + unsigned int block_size = nonmovingSegmentBlockSize(seg); + for (unsigned int p_idx = 0; p_idx < nonmovingSegmentBlockCount(seg); ++p_idx) { + // after mark, so bit not set == dead + if (nonmovingGetMark(seg, p_idx) == 0) { + memset(nonmovingSegmentGetBlock(seg, p_idx), 0, block_size); + } + } +} + +#endif + +GNUC_ATTR_HOT void nonmovingSweep(void) +{ + while (nonmovingHeap.sweep_list) { + struct NonmovingSegment *seg = nonmovingHeap.sweep_list; + + // Pushing the segment to one of the free/active/filled segments + // updates the link field, so update sweep_list here + nonmovingHeap.sweep_list = seg->link; + + enum SweepResult ret = nonmovingSweepSegment(seg); + + switch (ret) { + case SEGMENT_FREE: + IF_DEBUG(sanity, clear_segment(seg)); + nonmovingPushFreeSegment(seg); + break; + case SEGMENT_PARTIAL: + IF_DEBUG(sanity, clear_segment_free_blocks(seg)); + nonmovingPushActiveSegment(seg); + break; + case SEGMENT_FILLED: + nonmovingPushFilledSegment(seg); + break; + default: + barf("nonmovingSweep: weird sweep return: %d\n", ret); + } + } +} + +/* N.B. This happens during the pause so we own all capabilities. */ +void nonmovingSweepMutLists() +{ + for (uint32_t n = 0; n < n_capabilities; n++) { + Capability *cap = capabilities[n]; + bdescr *old_mut_list = cap->mut_lists[oldest_gen->no]; + cap->mut_lists[oldest_gen->no] = allocBlockOnNode_sync(cap->node); + for (bdescr *bd = old_mut_list; bd; bd = bd->link) { + for (StgPtr p = bd->start; p < bd->free; p++) { + StgClosure **q = (StgClosure**)p; + if (nonmovingIsAlive(*q)) { + recordMutableCap(*q, cap, oldest_gen->no); + } + } + } + freeChain_lock(old_mut_list); + } +} + +void nonmovingSweepLargeObjects() +{ + freeChain_lock(nonmoving_large_objects); + nonmoving_large_objects = nonmoving_marked_large_objects; + n_nonmoving_large_blocks = n_nonmoving_marked_large_blocks; + nonmoving_marked_large_objects = NULL; + n_nonmoving_marked_large_blocks = 0; +} + +// Helper for nonmovingSweepStableNameTable. Essentially nonmovingIsAlive, +// but works when the object died in moving heap, see +// nonmovingSweepStableNameTable +static bool is_alive(StgClosure *p) +{ + if (!HEAP_ALLOCED_GC(p)) { + return true; + } + + if (nonmovingClosureBeingSwept(p)) { + return nonmovingIsAlive(p); + } else { + // We don't want to sweep any stable names which weren't in the + // set of segments that we swept. + // See Note [Sweeping stable names in the concurrent collector] + return true; + } +} + +void nonmovingSweepStableNameTable() +{ + // See comments in gcStableTables + + /* Note [Sweeping stable names in the concurrent collector] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * When collecting concurrently we need to take care to avoid freeing + * stable names the we didn't sweep this collection cycle. For instance, + * consider the following situation: + * + * 1. We take a snapshot and start collection + * 2. A mutator allocates a new object, then makes a stable name for it + * 3. The mutator performs a minor GC and promotes the new object to the nonmoving heap + * 4. The GC thread gets to the sweep phase and, when traversing the stable + * name table, finds the new object unmarked. It then assumes that the + * object is dead and removes the stable name from the stable name table. + * + */ + + // FIXME: We can't use nonmovingIsAlive here without first using isAlive: + // a stable name can die during moving heap collection and we can't use + // nonmovingIsAlive on those objects. Inefficient. + + stableNameLock(); + FOR_EACH_STABLE_NAME( + p, { + if (p->sn_obj != NULL) { + if (!is_alive((StgClosure*)p->sn_obj)) { + p->sn_obj = NULL; // Just to make an assertion happy + freeSnEntry(p); + } else if (p->addr != NULL) { + if (!is_alive((StgClosure*)p->addr)) { + p->addr = NULL; + } + } + } + }); + stableNameUnlock(); +} diff --git a/rts/sm/NonMovingSweep.h b/rts/sm/NonMovingSweep.h new file mode 100644 index 0000000000..de2d52acb5 --- /dev/null +++ b/rts/sm/NonMovingSweep.h @@ -0,0 +1,32 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2018 + * + * Non-moving garbage collector and allocator: Sweep phase + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include "NonMoving.h" +#include "Hash.h" + +GNUC_ATTR_HOT void nonmovingSweep(void); + +// Remove unmarked entries in oldest generation mut_lists +void nonmovingSweepMutLists(void); + +// Remove unmarked entries in oldest generation scavenged_large_objects list +void nonmovingSweepLargeObjects(void); + +// Remove dead entries in the stable name table +void nonmovingSweepStableNameTable(void); + +// Collect the set of segments to be collected during a major GC into +// nonmovingHeap.sweep_list. +void nonmovingPrepareSweep(void); + +#if defined(DEBUG) +// The non-moving equivalent of the moving collector's gcCAFs. +void nonmovingGcCafs(struct MarkQueue_ *queue); +#endif diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 289ac542b8..3e1748d5b6 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -29,6 +29,8 @@ #include "Arena.h" #include "RetainerProfile.h" #include "CNF.h" +#include "sm/NonMoving.h" +#include "sm/NonMovingMark.h" #include "Profiling.h" // prof_arena /* ----------------------------------------------------------------------------- @@ -40,6 +42,9 @@ static void checkLargeBitmap ( StgPtr payload, StgLargeBitmap*, uint32_t ); static void checkClosureShallow ( const StgClosure * ); static void checkSTACK (StgStack *stack); +static W_ countNonMovingSegments ( struct NonmovingSegment *segs ); +static W_ countNonMovingHeap ( struct NonmovingHeap *heap ); + /* ----------------------------------------------------------------------------- Check stack sanity -------------------------------------------------------------------------- */ @@ -478,6 +483,41 @@ void checkHeapChain (bdescr *bd) } } +/* ----------------------------------------------------------------------------- + * Check nonmoving heap sanity + * + * After a concurrent sweep the nonmoving heap can be checked for validity. + * -------------------------------------------------------------------------- */ + +static void checkNonmovingSegments (struct NonmovingSegment *seg) +{ + while (seg != NULL) { + const nonmoving_block_idx count = nonmovingSegmentBlockCount(seg); + for (nonmoving_block_idx i=0; i < count; i++) { + if (seg->bitmap[i] == nonmovingMarkEpoch) { + StgPtr p = nonmovingSegmentGetBlock(seg, i); + checkClosure((StgClosure *) p); + } else if (i < seg->next_free_snap){ + seg->bitmap[i] = 0; + } + } + seg = seg->link; + } +} + +void checkNonmovingHeap (const struct NonmovingHeap *heap) +{ + for (unsigned int i=0; i < NONMOVING_ALLOCA_CNT; i++) { + const struct NonmovingAllocator *alloc = heap->allocators[i]; + checkNonmovingSegments(alloc->filled); + checkNonmovingSegments(alloc->active); + for (unsigned int cap=0; cap < n_capabilities; cap++) { + checkNonmovingSegments(alloc->current[cap]); + } + } +} + + void checkHeapChunk(StgPtr start, StgPtr end) { @@ -766,16 +806,25 @@ static void checkGeneration (generation *gen, uint32_t n; gen_workspace *ws; - ASSERT(countBlocks(gen->blocks) == gen->n_blocks); + //ASSERT(countBlocks(gen->blocks) == gen->n_blocks); ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); #if defined(THREADED_RTS) - // heap sanity checking doesn't work with SMP, because we can't - // zero the slop (see Updates.h). However, we can sanity-check - // the heap after a major gc, because there is no slop. + // heap sanity checking doesn't work with SMP for two reasons: + // * we can't zero the slop (see Updates.h). However, we can sanity-check + // the heap after a major gc, because there is no slop. + // + // * the nonmoving collector may be mutating its large object lists, unless we + // were in fact called by the nonmoving collector. if (!after_major_gc) return; #endif + if (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen) { + ASSERT(countBlocks(nonmoving_large_objects) == n_nonmoving_large_blocks); + ASSERT(countBlocks(nonmoving_marked_large_objects) == n_nonmoving_marked_large_blocks); + ASSERT(countNonMovingSegments(nonmovingHeap.free) == (W_) nonmovingHeap.n_free * NONMOVING_SEGMENT_BLOCKS); + } + checkHeapChain(gen->blocks); for (n = 0; n < n_capabilities; n++) { @@ -824,6 +873,15 @@ markCompactBlocks(bdescr *bd) } } +static void +markNonMovingSegments(struct NonmovingSegment *seg) +{ + while (seg) { + markBlocks(Bdescr((P_)seg)); + seg = seg->link; + } +} + // If memInventory() calculates that we have a memory leak, this // function will try to find the block(s) that are leaking by marking // all the ones that we know about, and search through memory to find @@ -834,7 +892,7 @@ markCompactBlocks(bdescr *bd) static void findMemoryLeak (void) { - uint32_t g, i; + uint32_t g, i, j; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { for (i = 0; i < n_capabilities; i++) { markBlocks(capabilities[i]->mut_lists[g]); @@ -856,6 +914,23 @@ findMemoryLeak (void) markBlocks(capabilities[i]->pinned_object_block); } + if (RtsFlags.GcFlags.useNonmoving) { + markBlocks(nonmoving_large_objects); + markBlocks(nonmoving_marked_large_objects); + for (i = 0; i < NONMOVING_ALLOCA_CNT; i++) { + struct NonmovingAllocator *alloc = nonmovingHeap.allocators[i]; + markNonMovingSegments(alloc->filled); + markNonMovingSegments(alloc->active); + for (j = 0; j < n_capabilities; j++) { + markNonMovingSegments(alloc->current[j]); + } + } + markNonMovingSegments(nonmovingHeap.sweep_list); + markNonMovingSegments(nonmovingHeap.free); + if (current_mark_queue) + markBlocks(current_mark_queue->blocks); + } + #if defined(PROFILING) // TODO: // if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { @@ -914,14 +989,63 @@ void findSlop(bdescr *bd) static W_ genBlocks (generation *gen) { - ASSERT(countBlocks(gen->blocks) == gen->n_blocks); + W_ ret = 0; + if (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen) { + // See Note [Live data accounting in nonmoving collector]. + ASSERT(countNonMovingHeap(&nonmovingHeap) == gen->n_blocks); + ret += countAllocdBlocks(nonmoving_large_objects); + ret += countAllocdBlocks(nonmoving_marked_large_objects); + ret += countNonMovingHeap(&nonmovingHeap); + if (current_mark_queue) + ret += countBlocks(current_mark_queue->blocks); + } else { + ASSERT(countBlocks(gen->blocks) == gen->n_blocks); + ret += gen->n_blocks; + } + ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); ASSERT(countCompactBlocks(gen->compact_objects) == gen->n_compact_blocks); ASSERT(countCompactBlocks(gen->compact_blocks_in_import) == gen->n_compact_blocks_in_import); - return gen->n_blocks + gen->n_old_blocks + + + ret += gen->n_old_blocks + countAllocdBlocks(gen->large_objects) + countAllocdCompactBlocks(gen->compact_objects) + countAllocdCompactBlocks(gen->compact_blocks_in_import); + return ret; +} + +static W_ +countNonMovingSegments(struct NonmovingSegment *segs) +{ + W_ ret = 0; + while (segs) { + ret += countBlocks(Bdescr((P_)segs)); + segs = segs->link; + } + return ret; +} + +static W_ +countNonMovingAllocator(struct NonmovingAllocator *alloc) +{ + W_ ret = countNonMovingSegments(alloc->filled) + + countNonMovingSegments(alloc->active); + for (uint32_t i = 0; i < n_capabilities; ++i) { + ret += countNonMovingSegments(alloc->current[i]); + } + return ret; +} + +static W_ +countNonMovingHeap(struct NonmovingHeap *heap) +{ + W_ ret = 0; + for (int alloc_idx = 0; alloc_idx < NONMOVING_ALLOCA_CNT; alloc_idx++) { + ret += countNonMovingAllocator(heap->allocators[alloc_idx]); + } + ret += countNonMovingSegments(heap->sweep_list); + ret += countNonMovingSegments(heap->free); + return ret; } void @@ -929,8 +1053,8 @@ memInventory (bool show) { uint32_t g, i; W_ gen_blocks[RtsFlags.GcFlags.generations]; - W_ nursery_blocks, retainer_blocks, - arena_blocks, exec_blocks, gc_free_blocks = 0; + W_ nursery_blocks = 0, retainer_blocks = 0, + arena_blocks = 0, exec_blocks = 0, gc_free_blocks = 0; W_ live_blocks = 0, free_blocks = 0; bool leak; @@ -947,20 +1071,19 @@ memInventory (bool show) gen_blocks[g] += genBlocks(&generations[g]); } - nursery_blocks = 0; for (i = 0; i < n_nurseries; i++) { ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks); nursery_blocks += nurseries[i].n_blocks; } for (i = 0; i < n_capabilities; i++) { - gc_free_blocks += countBlocks(gc_threads[i]->free_blocks); + W_ n = countBlocks(gc_threads[i]->free_blocks); + gc_free_blocks += n; if (capabilities[i]->pinned_object_block != NULL) { nursery_blocks += capabilities[i]->pinned_object_block->blocks; } nursery_blocks += countBlocks(capabilities[i]->pinned_object_blocks); } - retainer_blocks = 0; #if defined(PROFILING) if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { retainer_blocks = retainerStackBlocks(); diff --git a/rts/sm/Sanity.h b/rts/sm/Sanity.h index 9227e6fd18..b6f2054383 100644 --- a/rts/sm/Sanity.h +++ b/rts/sm/Sanity.h @@ -31,6 +31,7 @@ void checkStaticObjects ( StgClosure* static_objects ); void checkStackChunk ( StgPtr sp, StgPtr stack_end ); StgOffset checkStackFrame ( StgPtr sp ); StgOffset checkClosure ( const StgClosure* p ); +void checkNonmovingHeap ( const struct NonmovingHeap *heap ); void checkRunQueue (Capability *cap); diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index b50dff38fb..cac9ca1e2d 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -62,6 +62,8 @@ #include "Hash.h" #include "sm/MarkWeak.h" +#include "sm/NonMoving.h" // for nonmoving_set_closure_mark_bit +#include "sm/NonMovingScav.h" static void scavenge_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, @@ -1654,7 +1656,10 @@ scavenge_mutable_list(bdescr *bd, generation *gen) ; } - if (scavenge_one(p)) { + if (RtsFlags.GcFlags.useNonmoving && major_gc && gen == oldest_gen) { + // We can't use scavenge_one here as we need to scavenge SRTs + nonmovingScavengeOne((StgClosure *)p); + } else if (scavenge_one(p)) { // didn't manage to promote everything, so put the // object back on the list. recordMutableGen_GC((StgClosure *)p,gen_no); @@ -1666,7 +1671,14 @@ scavenge_mutable_list(bdescr *bd, generation *gen) void scavenge_capability_mut_lists (Capability *cap) { - uint32_t g; + // In a major GC only nonmoving heap's mut list is root + if (RtsFlags.GcFlags.useNonmoving && major_gc) { + uint32_t g = oldest_gen->no; + scavenge_mutable_list(cap->saved_mut_lists[g], oldest_gen); + freeChain_sync(cap->saved_mut_lists[g]); + cap->saved_mut_lists[g] = NULL; + return; + } /* Mutable lists from each generation > N * we want to *scavenge* these roots, not evacuate them: they're not @@ -1674,7 +1686,7 @@ scavenge_capability_mut_lists (Capability *cap) * Also do them in reverse generation order, for the usual reason: * namely to reduce the likelihood of spurious old->new pointers. */ - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + for (uint32_t g = RtsFlags.GcFlags.generations-1; g > N; g--) { scavenge_mutable_list(cap->saved_mut_lists[g], &generations[g]); freeChain_sync(cap->saved_mut_lists[g]); cap->saved_mut_lists[g] = NULL; @@ -2044,6 +2056,16 @@ loop: for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) { ws = &gct->gens[g]; + if (ws->todo_seg != END_NONMOVING_TODO_LIST) { + struct NonmovingSegment *seg = ws->todo_seg; + ASSERT(seg->todo_link); + ws->todo_seg = seg->todo_link; + seg->todo_link = NULL; + scavengeNonmovingSegment(seg); + did_something = true; + break; + } + gct->scan_bd = NULL; // If we have a scan block with some work to do, diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 97c71478ed..9fe68e98b7 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -29,6 +29,7 @@ #include "Trace.h" #include "GC.h" #include "Evac.h" +#include "NonMoving.h" #if defined(ios_HOST_OS) #include "Hash.h" #endif @@ -82,7 +83,7 @@ Mutex sm_mutex; static void allocNurseries (uint32_t from, uint32_t to); static void assignNurseriesToCapabilities (uint32_t from, uint32_t to); -static void +void initGeneration (generation *gen, int g) { gen->no = g; @@ -170,6 +171,18 @@ initStorage (void) } oldest_gen->to = oldest_gen; + // Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen + nonmovingInit(); + +#if defined(THREADED_RTS) + // nonmovingAddCapabilities allocates segments, which requires taking the gc + // sync lock, so initialize it before nonmovingAddCapabilities + initSpinLock(&gc_alloc_block_sync); +#endif + + if (RtsFlags.GcFlags.useNonmoving) + nonmovingAddCapabilities(n_capabilities); + /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) { if (RtsFlags.GcFlags.generations == 1) { @@ -195,9 +208,6 @@ initStorage (void) exec_block = NULL; -#if defined(THREADED_RTS) - initSpinLock(&gc_alloc_block_sync); -#endif N = 0; for (n = 0; n < n_numa_nodes; n++) { @@ -1232,8 +1242,8 @@ W_ countOccupied (bdescr *bd) W_ genLiveWords (generation *gen) { - return gen->n_words + gen->n_large_words + - gen->n_compact_blocks * BLOCK_SIZE_W; + return (gen->live_estimate ? gen->live_estimate : gen->n_words) + + gen->n_large_words + gen->n_compact_blocks * BLOCK_SIZE_W; } W_ genLiveBlocks (generation *gen) @@ -1289,9 +1299,9 @@ calcNeeded (bool force_major, memcount *blocks_needed) for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) { generation *gen = &generations[g]; - W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?) - + gen->n_large_blocks - + gen->n_compact_blocks; + W_ blocks = gen->live_estimate ? (gen->live_estimate / BLOCK_SIZE_W) : gen->n_blocks; + blocks += gen->n_large_blocks + + gen->n_compact_blocks; // we need at least this much space needed += blocks; @@ -1309,7 +1319,7 @@ calcNeeded (bool force_major, memcount *blocks_needed) // mark stack: needed += gen->n_blocks / 100; } - if (gen->compact) { + if (gen->compact || (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) { continue; // no additional space needed for compaction } else { needed += gen->n_blocks; diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index aaa44428b3..08bdb37ba3 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -17,6 +17,7 @@ -------------------------------------------------------------------------- */ void initStorage(void); +void initGeneration(generation *gen, int g); void exitStorage(void); void freeStorage(bool free_heap); |