summaryrefslogtreecommitdiff
path: root/rts/sm/NonMoving.c
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm/NonMoving.c')
-rw-r--r--rts/sm/NonMoving.c1390
1 files changed, 1390 insertions, 0 deletions
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
new file mode 100644
index 0000000000..50cf784aab
--- /dev/null
+++ b/rts/sm/NonMoving.c
@@ -0,0 +1,1390 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "Capability.h"
+#include "Printer.h"
+#include "Storage.h"
+// We call evacuate, which expects the thread-local gc_thread to be valid;
+// This is sometimes declared as a register variable therefore it is necessary
+// to include the declaration so that the compiler doesn't clobber the register.
+#include "GCThread.h"
+#include "GCTDecl.h"
+#include "Schedule.h"
+
+#include "NonMoving.h"
+#include "NonMovingMark.h"
+#include "NonMovingSweep.h"
+#include "NonMovingCensus.h"
+#include "StablePtr.h" // markStablePtrTable
+#include "Schedule.h" // markScheduler
+#include "Weak.h" // dead_weak_ptr_list
+
+struct NonmovingHeap nonmovingHeap;
+
+uint8_t nonmovingMarkEpoch = 1;
+
+static void nonmovingBumpEpoch(void) {
+ nonmovingMarkEpoch = nonmovingMarkEpoch == 1 ? 2 : 1;
+}
+
+#if defined(THREADED_RTS)
+/*
+ * This mutex ensures that only one non-moving collection is active at a time.
+ */
+Mutex nonmoving_collection_mutex;
+
+OSThreadId mark_thread;
+bool concurrent_coll_running = false;
+Condition concurrent_coll_finished;
+Mutex concurrent_coll_finished_lock;
+#endif
+
+/*
+ * Note [Non-moving garbage collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The sources rts/NonMoving*.c implement GHC's non-moving garbage collector
+ * for the oldest generation. In contrast to the throughput-oriented moving
+ * collector, the non-moving collector is designed to achieve low GC latencies
+ * on large heaps. It accomplishes low-latencies by way of a concurrent
+ * mark-and-sweep collection strategy on a specially-designed heap structure.
+ * While the design is described in detail in the design document found in
+ * docs/storage/nonmoving-gc, we briefly summarize the structure here.
+ *
+ *
+ * === Heap Structure ===
+ *
+ * The nonmoving heap (embodied by struct NonmovingHeap) consists of a family
+ * of allocators, each serving a range of allocation sizes. Each allocator
+ * consists of a set of *segments*, each of which contain fixed-size *blocks*
+ * (not to be confused with "blocks" provided by GHC's block allocator; this is
+ * admittedly an unfortunate overlap in terminology). These blocks are the
+ * backing store for the allocator. In addition to blocks, the segment also
+ * contains some header information (see struct NonmovingSegment in
+ * NonMoving.h). This header contains a *bitmap* encoding one byte per block
+ * (used by the collector to record liveness), as well as the index of the next
+ * unallocated block (and a *snapshot* of this field which will be described in
+ * the next section).
+ *
+ * Each allocator maintains three sets of segments:
+ *
+ * - A *current* segment for each capability; this is the segment which that
+ * capability will allocate into.
+ *
+ * - A pool of *active* segments, each of which containing at least one
+ * unallocated block. The allocate will take a segment from this pool when
+ * it fills its *current* segment.
+ *
+ * - A set of *filled* segments, which contain no unallocated blocks and will
+ * be collected during the next major GC cycle
+ *
+ * Storage for segments is allocated using the block allocator using an aligned
+ * group of NONMOVING_SEGMENT_BLOCKS blocks. This makes the task of locating
+ * the segment header for a clone a simple matter of bit-masking (as
+ * implemented by nonmovingGetSegment).
+ *
+ * In addition, to relieve pressure on the block allocator we keep a small pool
+ * of free blocks around (nonmovingHeap.free) which can be pushed/popped
+ * to/from in a lock-free manner.
+ *
+ *
+ * === Allocation ===
+ *
+ * The allocator (as implemented by nonmovingAllocate) starts by identifying
+ * which allocator the request should be made against. It then allocates into
+ * its local current segment and bumps the next_free pointer to point to the
+ * next unallocated block (as indicated by the bitmap). If it finds the current
+ * segment is now full it moves it to the filled list and looks for a new
+ * segment to make current from a few sources:
+ *
+ * 1. the allocator's active list (see pop_active_segment)
+ * 2. the nonmoving heap's free block pool (see nonmovingPopFreeSegment)
+ * 3. allocate a new segment from the block allocator (see
+ * nonmovingAllocSegment)
+ *
+ * Note that allocation does *not* involve modifying the bitmap. The bitmap is
+ * only modified by the collector.
+ *
+ *
+ * === Snapshot invariant ===
+ *
+ * To safely collect in a concurrent setting, the collector relies on the
+ * notion of a *snapshot*. The snapshot is a hypothetical frozen state of the
+ * heap topology taken at the beginning of the major collection cycle.
+ * With this definition we require the following property of the mark phase,
+ * which we call the *snapshot invariant*,
+ *
+ * All objects that were reachable at the time the snapshot was collected
+ * must have their mark bits set at the end of the mark phase.
+ *
+ * As the mutator might change the topology of the heap while we are marking
+ * this property requires some cooperation from the mutator to maintain.
+ * Specifically, we rely on a write barrier as described in Note [Update
+ * remembered set].
+ *
+ * To determine which objects were existent when the snapshot was taken we
+ * record a snapshot of each segments next_free pointer at the beginning of
+ * collection.
+ *
+ *
+ * === Collection ===
+ *
+ * Collection happens in a few phases some of which occur during a
+ * stop-the-world period (marked with [STW]) and others which can occur
+ * concurrently with mutation and minor collection (marked with [CONC]):
+ *
+ * 1. [STW] Preparatory GC: Here we do a standard minor collection of the
+ * younger generations (which may evacuate things to the nonmoving heap).
+ * References from younger generations into the nonmoving heap are recorded
+ * in the mark queue (see Note [Aging under the non-moving collector] in
+ * this file).
+ *
+ * 2. [STW] Snapshot update: Here we update the segment snapshot metadata
+ * (see nonmovingPrepareMark) and move the filled segments to
+ * nonmovingHeap.sweep_list, which is the set of segments which we will
+ * sweep this GC cycle.
+ *
+ * 3. [STW] Root collection: Here we walk over a variety of root sources
+ * and add them to the mark queue (see nonmovingCollect).
+ *
+ * 4. [CONC] Concurrent marking: Here we do the majority of marking concurrently
+ * with mutator execution (but with the write barrier enabled; see
+ * Note [Update remembered set]).
+ *
+ * 5. [STW] Final sync: Here we interrupt the mutators, ask them to
+ * flush their final update remembered sets, and mark any new references
+ * we find.
+ *
+ * 6. [CONC] Sweep: Here we walk over the nonmoving segments on sweep_list
+ * and place them back on either the active, current, or filled list,
+ * depending upon how much live data they contain.
+ *
+ *
+ * === Marking ===
+ *
+ * Ignoring large and static objects, marking a closure is fairly
+ * straightforward (implemented in NonMovingMark.c:mark_closure):
+ *
+ * 1. Check whether the closure is in the non-moving generation; if not then
+ * we ignore it.
+ * 2. Find the segment containing the closure's block.
+ * 3. Check whether the closure's block is above $seg->next_free_snap; if so
+ * then the block was not allocated when we took the snapshot and therefore
+ * we don't need to mark it.
+ * 4. Check whether the block's bitmap bits is equal to nonmovingMarkEpoch. If
+ * so then we can stop as we have already marked it.
+ * 5. Push the closure's pointers to the mark queue.
+ * 6. Set the blocks bitmap bits to nonmovingMarkEpoch.
+ *
+ * Note that the ordering of (5) and (6) is rather important, as described in
+ * Note [StgStack dirtiness flags and concurrent marking].
+ *
+ *
+ * === Other references ===
+ *
+ * Apart from the design document in docs/storage/nonmoving-gc and the Ueno
+ * 2016 paper (TODO citation) from which it drew inspiration, there are a
+ * variety of other relevant Notes scattered throughout the tree:
+ *
+ * - Note [Concurrent non-moving collection] (NonMoving.c) describes
+ * concurrency control of the nonmoving collector
+ *
+ * - Note [Live data accounting in nonmoving collector] (NonMoving.c)
+ * describes how we track the quantity of live data in the nonmoving
+ * generation.
+ *
+ * - Note [Aging under the non-moving collector] (NonMoving.c) describes how
+ * we accomodate aging
+ *
+ * - Note [Large objects in the non-moving collector] (NonMovingMark.c)
+ * describes how we track large objects.
+ *
+ * - Note [Update remembered set] (NonMovingMark.c) describes the function and
+ * implementation of the update remembered set used to realize the concurrent
+ * write barrier.
+ *
+ * - Note [Concurrent read barrier on deRefWeak#] (NonMovingMark.c) describes
+ * the read barrier on Weak# objects.
+ *
+ * - Note [Unintentional marking in resurrectThreads] (NonMovingMark.c) describes
+ * a tricky interaction between the update remembered set flush and weak
+ * finalization.
+ *
+ * - Note [Origin references in the nonmoving collector] (NonMovingMark.h)
+ * describes how we implement indirection short-cutting and the selector
+ * optimisation.
+ *
+ * - Note [StgStack dirtiness flags and concurrent marking] (TSO.h) describes
+ * the protocol for concurrent marking of stacks.
+ *
+ * - Note [Static objects under the nonmoving collector] (Storage.c) describes
+ * treatment of static objects.
+ *
+ *
+ * Note [Concurrent non-moving collection]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Concurrency-control of non-moving garbage collection is a bit tricky. There
+ * are a few things to keep in mind:
+ *
+ * - Only one non-moving collection may be active at a time. This is enforced by the
+ * concurrent_coll_running flag, which is set when a collection is on-going. If
+ * we attempt to initiate a new collection while this is set we wait on the
+ * concurrent_coll_finished condition variable, which signals when the
+ * active collection finishes.
+ *
+ * - In between the mark and sweep phases the non-moving collector must synchronize
+ * with mutator threads to collect and mark their final update remembered
+ * sets. This is accomplished using
+ * stopAllCapabilitiesWith(SYNC_FLUSH_UPD_REM_SET). Capabilities are held
+ * the final mark has concluded.
+ *
+ * Note that possibility of concurrent minor and non-moving collections
+ * requires that we handle static objects a bit specially. See
+ * Note [Static objects under the nonmoving collector] in Storage.c
+ * for details.
+ *
+ *
+ * Note [Aging under the non-moving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The initial design of the non-moving collector mandated that all live data
+ * be evacuated to the non-moving heap prior to a major collection. This
+ * simplified certain bits of implementation and eased reasoning. However, it
+ * was (unsurprisingly) also found to result in significant amounts of
+ * unnecessary copying.
+ *
+ * Consequently, we now allow aging. Aging allows the preparatory GC leading up
+ * to a major collection to evacuate some objects into the young generation.
+ * However, this introduces the following tricky case that might arise after
+ * we have finished the preparatory GC:
+ *
+ * moving heap ┆ non-moving heap
+ * ───────────────┆──────────────────
+ * ┆
+ * B ←────────────── A ←─────────────── root
+ * │ ┆ ↖─────────────── gen1 mut_list
+ * ╰───────────────→ C
+ * ┆
+ *
+ * In this case C is clearly live, but the non-moving collector can only see
+ * this by walking through B, which lives in the moving heap. However, doing so
+ * would require that we synchronize with the mutator/minor GC to ensure that it
+ * isn't in the middle of moving B. What to do?
+ *
+ * The solution we use here is to teach the preparatory moving collector to
+ * "evacuate" objects it encounters in the non-moving heap by adding them to
+ * the mark queue. This is implemented by pushing the object to the update
+ * remembered set of the capability held by the evacuating gc_thread
+ * (implemented by markQueuePushClosureGC)
+ *
+ * Consequently collection of the case above would proceed as follows:
+ *
+ * 1. Initial state:
+ * * A lives in the non-moving heap and is reachable from the root set
+ * * A is on the oldest generation's mut_list, since it contains a pointer
+ * to B, which lives in a younger generation
+ * * B lives in the moving collector's from space
+ * * C lives in the non-moving heap
+ *
+ * 2. Preparatory GC: Scavenging mut_lists:
+ *
+ * The mut_list of the oldest generation is scavenged, resulting in B being
+ * evacuated (aged) into the moving collector's to-space.
+ *
+ * 3. Preparatory GC: Scavenge B
+ *
+ * B (now in to-space) is scavenged, resulting in evacuation of C.
+ * evacuate(C) pushes a reference to C to the mark queue.
+ *
+ * 4. Non-moving GC: C is marked
+ *
+ * The non-moving collector will come to C in the mark queue and mark it.
+ *
+ *
+ * Note [Deadlock detection under the non-moving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * In GHC the garbage collector is responsible for identifying deadlocked
+ * programs. Providing for this responsibility is slightly tricky in the
+ * non-moving collector due to the existence of aging. In particular, the
+ * non-moving collector cannot traverse objects living in a young generation
+ * but reachable from the non-moving generation, as described in Note [Aging
+ * under the non-moving collector].
+ *
+ * However, this can pose trouble for deadlock detection since it means that we
+ * may conservatively mark dead closures as live. Consider this case:
+ *
+ * moving heap ┆ non-moving heap
+ * ───────────────┆──────────────────
+ * ┆
+ * MVAR_QUEUE ←───── TSO ←───────────── gen1 mut_list
+ * ↑ │ ╰────────↗ │
+ * │ │ ┆ │
+ * │ │ ┆ ↓
+ * │ ╰──────────→ MVAR
+ * ╰─────────────────╯
+ * ┆
+ *
+ * In this case we have a TSO blocked on a dead MVar. Because the MVAR_TSO_QUEUE on
+ * which it is blocked lives in the moving heap, the TSO is necessarily on the
+ * oldest generation's mut_list. As in Note [Aging under the non-moving
+ * collector], the MVAR_TSO_QUEUE will be evacuated. If MVAR_TSO_QUEUE is aged
+ * (e.g. evacuated to the young generation) then the MVAR will be added to the
+ * mark queue. Consequently, we will falsely conclude that the MVAR is still
+ * alive and fail to spot the deadlock.
+ *
+ * To avoid this sort of situation we disable aging when we are starting a
+ * major GC specifically for deadlock detection (as done by
+ * scheduleDetectDeadlock). This condition is recorded by the
+ * deadlock_detect_gc global variable declared in GC.h. Setting this has a few
+ * effects on the preparatory GC:
+ *
+ * - Evac.c:alloc_for_copy forces evacuation to the non-moving generation.
+ *
+ * - The evacuation logic usually responsible for pushing objects living in
+ * the non-moving heap to the mark queue is disabled. This is safe because
+ * we know that all live objects will be in the non-moving heap by the end
+ * of the preparatory moving collection.
+ *
+ *
+ * Note [Live data accounting in nonmoving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The nonmoving collector uses an approximate heuristic for reporting live
+ * data quantity. Specifically, during mark we record how much live data we
+ * find in nonmoving_live_words. At the end of mark we declare this amount to
+ * be how much live data we have on in the nonmoving heap (by setting
+ * oldest_gen->live_estimate).
+ *
+ * In addition, we update oldest_gen->live_estimate every time we fill a
+ * segment. This, as well, is quite approximate: we assume that all blocks
+ * above next_free_next are newly-allocated. In principle we could refer to the
+ * bitmap to count how many blocks we actually allocated but this too would be
+ * approximate due to concurrent collection and ultimately seems more costly
+ * than the problem demands.
+ *
+ */
+
+memcount nonmoving_live_words = 0;
+
+#if defined(THREADED_RTS)
+static void* nonmovingConcurrentMark(void *mark_queue);
+#endif
+static void nonmovingClearBitmap(struct NonmovingSegment *seg);
+static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads);
+
+static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block_size)
+{
+ bdescr *bd = Bdescr((P_) seg);
+ seg->link = NULL;
+ seg->todo_link = NULL;
+ seg->next_free = 0;
+ nonmovingClearBitmap(seg);
+ bd->nonmoving_segment.log_block_size = log_block_size;
+ bd->nonmoving_segment.next_free_snap = 0;
+ bd->u.scan = nonmovingSegmentGetBlock(seg, 0);
+}
+
+// Add a segment to the free list.
+void nonmovingPushFreeSegment(struct NonmovingSegment *seg)
+{
+ // See Note [Live data accounting in nonmoving collector].
+ if (nonmovingHeap.n_free > NONMOVING_MAX_FREE) {
+ bdescr *bd = Bdescr((StgPtr) seg);
+ ACQUIRE_SM_LOCK;
+ ASSERT(oldest_gen->n_blocks >= bd->blocks);
+ ASSERT(oldest_gen->n_words >= BLOCK_SIZE_W * bd->blocks);
+ oldest_gen->n_blocks -= bd->blocks;
+ oldest_gen->n_words -= BLOCK_SIZE_W * bd->blocks;
+ freeGroup(bd);
+ RELEASE_SM_LOCK;
+ return;
+ }
+
+ while (true) {
+ struct NonmovingSegment *old = nonmovingHeap.free;
+ seg->link = old;
+ if (cas((StgVolatilePtr) &nonmovingHeap.free, (StgWord) old, (StgWord) seg) == (StgWord) old)
+ break;
+ }
+ __sync_add_and_fetch(&nonmovingHeap.n_free, 1);
+}
+
+static struct NonmovingSegment *nonmovingPopFreeSegment(void)
+{
+ while (true) {
+ struct NonmovingSegment *seg = nonmovingHeap.free;
+ if (seg == NULL) {
+ return NULL;
+ }
+ if (cas((StgVolatilePtr) &nonmovingHeap.free,
+ (StgWord) seg,
+ (StgWord) seg->link) == (StgWord) seg) {
+ __sync_sub_and_fetch(&nonmovingHeap.n_free, 1);
+ return seg;
+ }
+ }
+}
+
+unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size)
+{
+ // We compute the overwhelmingly common size cases directly to avoid a very
+ // expensive integer division.
+ switch (log_block_size) {
+ case 3: return nonmovingBlockCount(3);
+ case 4: return nonmovingBlockCount(4);
+ case 5: return nonmovingBlockCount(5);
+ case 6: return nonmovingBlockCount(6);
+ case 7: return nonmovingBlockCount(7);
+ default: return nonmovingBlockCount(log_block_size);
+ }
+}
+
+/*
+ * Request a fresh segment from the free segment list or allocate one of the
+ * given node.
+ *
+ * Caller must hold SM_MUTEX (although we take the gc_alloc_block_sync spinlock
+ * under the assumption that we are in a GC context).
+ */
+static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
+{
+ // First try taking something off of the free list
+ struct NonmovingSegment *ret;
+ ret = nonmovingPopFreeSegment();
+
+ // Nothing in the free list, allocate a new segment...
+ if (ret == NULL) {
+ // Take gc spinlock: another thread may be scavenging a moving
+ // generation and call `todo_block_full`
+ ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
+ // See Note [Live data accounting in nonmoving collector].
+ oldest_gen->n_blocks += bd->blocks;
+ oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks;
+ RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+
+ for (StgWord32 i = 0; i < bd->blocks; ++i) {
+ initBdescr(&bd[i], oldest_gen, oldest_gen);
+ bd[i].flags = BF_NONMOVING;
+ }
+ ret = (struct NonmovingSegment *)bd->start;
+ }
+
+ // Check alignment
+ ASSERT(((uintptr_t)ret % NONMOVING_SEGMENT_SIZE) == 0);
+ return ret;
+}
+
+static inline unsigned long log2_floor(unsigned long x)
+{
+ return sizeof(unsigned long)*8 - 1 - __builtin_clzl(x);
+}
+
+static inline unsigned long log2_ceil(unsigned long x)
+{
+ unsigned long log = log2_floor(x);
+ return (x - (1 << log)) ? log + 1 : log;
+}
+
+// Advance a segment's next_free pointer. Returns true if segment if full.
+static bool advance_next_free(struct NonmovingSegment *seg, const unsigned int blk_count)
+{
+ const uint8_t *bitmap = seg->bitmap;
+ ASSERT(blk_count == nonmovingSegmentBlockCount(seg));
+#if defined(NAIVE_ADVANCE_FREE)
+ // reference implementation
+ for (unsigned int i = seg->next_free+1; i < blk_count; i++) {
+ if (!bitmap[i]) {
+ seg->next_free = i;
+ return false;
+ }
+ }
+ seg->next_free = blk_count;
+ return true;
+#else
+ const uint8_t *c = memchr(&bitmap[seg->next_free+1], 0, blk_count - seg->next_free - 1);
+ if (c == NULL) {
+ seg->next_free = blk_count;
+ return true;
+ } else {
+ seg->next_free = c - bitmap;
+ return false;
+ }
+#endif
+}
+
+static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *alloca)
+{
+ while (true) {
+ struct NonmovingSegment *seg = alloca->active;
+ if (seg == NULL) {
+ return NULL;
+ }
+ if (cas((StgVolatilePtr) &alloca->active,
+ (StgWord) seg,
+ (StgWord) seg->link) == (StgWord) seg) {
+ return seg;
+ }
+ }
+}
+
+/* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */
+GNUC_ATTR_HOT
+void *nonmovingAllocate(Capability *cap, StgWord sz)
+{
+ unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord));
+ unsigned int block_count = nonmovingBlockCountFromSize(log_block_size);
+
+ // The max we ever allocate is 3276 bytes (anything larger is a large
+ // object and not moved) which is covered by allocator 9.
+ ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT);
+
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[log_block_size - NONMOVING_ALLOCA0];
+
+ // Allocate into current segment
+ struct NonmovingSegment *current = alloca->current[cap->no];
+ ASSERT(current); // current is never NULL
+ void *ret = nonmovingSegmentGetBlock_(current, log_block_size, current->next_free);
+ ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment
+
+ // Advance the current segment's next_free or allocate a new segment if full
+ bool full = advance_next_free(current, block_count);
+ if (full) {
+ // Current segment is full: update live data estimate link it to
+ // filled, take an active segment if one exists, otherwise allocate a
+ // new segment.
+
+ // Update live data estimate.
+ // See Note [Live data accounting in nonmoving collector].
+ unsigned int new_blocks = block_count - nonmovingSegmentInfo(current)->next_free_snap;
+ unsigned int block_size = 1 << log_block_size;
+ atomic_inc(&oldest_gen->live_estimate, new_blocks * block_size / sizeof(W_));
+
+ // push the current segment to the filled list
+ nonmovingPushFilledSegment(current);
+
+ // first look for a new segment in the active list
+ struct NonmovingSegment *new_current = pop_active_segment(alloca);
+
+ // there are no active segments, allocate new segment
+ if (new_current == NULL) {
+ new_current = nonmovingAllocSegment(cap->node);
+ nonmovingInitSegment(new_current, log_block_size);
+ }
+
+ // make it current
+ new_current->link = NULL;
+ alloca->current[cap->no] = new_current;
+ }
+
+ return ret;
+}
+
+/* Allocate a nonmovingAllocator */
+static struct NonmovingAllocator *alloc_nonmoving_allocator(uint32_t n_caps)
+{
+ size_t allocator_sz =
+ sizeof(struct NonmovingAllocator) +
+ sizeof(void*) * n_caps; // current segment pointer for each capability
+ struct NonmovingAllocator *alloc =
+ stgMallocBytes(allocator_sz, "nonmovingInit");
+ memset(alloc, 0, allocator_sz);
+ return alloc;
+}
+
+static void free_nonmoving_allocator(struct NonmovingAllocator *alloc)
+{
+ stgFree(alloc);
+}
+
+void nonmovingInit(void)
+{
+ if (! RtsFlags.GcFlags.useNonmoving) return;
+#if defined(THREADED_RTS)
+ initMutex(&nonmoving_collection_mutex);
+ initCondition(&concurrent_coll_finished);
+ initMutex(&concurrent_coll_finished_lock);
+#endif
+ for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ nonmovingHeap.allocators[i] = alloc_nonmoving_allocator(n_capabilities);
+ }
+ nonmovingMarkInitUpdRemSet();
+}
+
+// Stop any nonmoving collection in preparation for RTS shutdown.
+void nonmovingStop(void)
+{
+ if (! RtsFlags.GcFlags.useNonmoving) return;
+#if defined(THREADED_RTS)
+ if (mark_thread) {
+ debugTrace(DEBUG_nonmoving_gc,
+ "waiting for nonmoving collector thread to terminate");
+ ACQUIRE_LOCK(&concurrent_coll_finished_lock);
+ waitCondition(&concurrent_coll_finished, &concurrent_coll_finished_lock);
+ }
+#endif
+}
+
+void nonmovingExit(void)
+{
+ if (! RtsFlags.GcFlags.useNonmoving) return;
+
+ // First make sure collector is stopped before we tear things down.
+ nonmovingStop();
+
+#if defined(THREADED_RTS)
+ closeMutex(&concurrent_coll_finished_lock);
+ closeCondition(&concurrent_coll_finished);
+ closeMutex(&nonmoving_collection_mutex);
+#endif
+
+ for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ free_nonmoving_allocator(nonmovingHeap.allocators[i]);
+ }
+}
+
+/*
+ * Assumes that no garbage collector or mutator threads are running to safely
+ * resize the nonmoving_allocators.
+ *
+ * Must hold sm_mutex.
+ */
+void nonmovingAddCapabilities(uint32_t new_n_caps)
+{
+ unsigned int old_n_caps = nonmovingHeap.n_caps;
+ struct NonmovingAllocator **allocs = nonmovingHeap.allocators;
+
+ for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
+ struct NonmovingAllocator *old = allocs[i];
+ allocs[i] = alloc_nonmoving_allocator(new_n_caps);
+
+ // Copy the old state
+ allocs[i]->filled = old->filled;
+ allocs[i]->active = old->active;
+ for (unsigned int j = 0; j < old_n_caps; j++) {
+ allocs[i]->current[j] = old->current[j];
+ }
+ stgFree(old);
+
+ // Initialize current segments for the new capabilities
+ for (unsigned int j = old_n_caps; j < new_n_caps; j++) {
+ allocs[i]->current[j] = nonmovingAllocSegment(capabilities[j]->node);
+ nonmovingInitSegment(allocs[i]->current[j], NONMOVING_ALLOCA0 + i);
+ allocs[i]->current[j]->link = NULL;
+ }
+ }
+ nonmovingHeap.n_caps = new_n_caps;
+}
+
+static inline void nonmovingClearBitmap(struct NonmovingSegment *seg)
+{
+ unsigned int n = nonmovingSegmentBlockCount(seg);
+ memset(seg->bitmap, 0, n);
+}
+
+/* Prepare the heap bitmaps and snapshot metadata for a mark */
+static void nonmovingPrepareMark(void)
+{
+ // See Note [Static objects under the nonmoving collector].
+ prev_static_flag = static_flag;
+ static_flag =
+ static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
+
+ // Should have been cleared by the last sweep
+ ASSERT(nonmovingHeap.sweep_list == NULL);
+
+ nonmovingBumpEpoch();
+ for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
+
+ // Update current segments' snapshot pointers
+ for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) {
+ struct NonmovingSegment *seg = alloca->current[cap_n];
+ nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
+ }
+
+ // Update filled segments' snapshot pointers and move to sweep_list
+ uint32_t n_filled = 0;
+ struct NonmovingSegment *const filled = alloca->filled;
+ alloca->filled = NULL;
+ if (filled) {
+ struct NonmovingSegment *seg = filled;
+ while (true) {
+ n_filled++;
+ prefetchForRead(seg->link);
+ // Clear bitmap
+ prefetchForWrite(seg->link->bitmap);
+ nonmovingClearBitmap(seg);
+ // Set snapshot
+ nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
+ if (seg->link)
+ seg = seg->link;
+ else
+ break;
+ }
+ // add filled segments to sweep_list
+ seg->link = nonmovingHeap.sweep_list;
+ nonmovingHeap.sweep_list = filled;
+ }
+
+ // N.B. It's not necessary to update snapshot pointers of active segments;
+ // they were set after they were swept and haven't seen any allocation
+ // since.
+ }
+
+ // Clear large object bits of existing large objects
+ for (bdescr *bd = nonmoving_large_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_MARKED;
+ }
+
+ // Add newly promoted large objects and clear mark bits
+ bdescr *next;
+ ASSERT(oldest_gen->scavenged_large_objects == NULL);
+ for (bdescr *bd = oldest_gen->large_objects; bd; bd = next) {
+ next = bd->link;
+ bd->flags |= BF_NONMOVING_SWEEPING;
+ bd->flags &= ~BF_MARKED;
+ dbl_link_onto(bd, &nonmoving_large_objects);
+ }
+ n_nonmoving_large_blocks += oldest_gen->n_large_blocks;
+ oldest_gen->large_objects = NULL;
+ oldest_gen->n_large_words = 0;
+ oldest_gen->n_large_blocks = 0;
+ nonmoving_live_words = 0;
+
+ // Clear compact object mark bits
+ for (bdescr *bd = nonmoving_compact_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_MARKED;
+ }
+
+ // Move new compact objects from younger generations to nonmoving_compact_objects
+ for (bdescr *bd = oldest_gen->compact_objects; bd; bd = next) {
+ next = bd->link;
+ bd->flags |= BF_NONMOVING_SWEEPING;
+ bd->flags &= ~BF_MARKED;
+ dbl_link_onto(bd, &nonmoving_compact_objects);
+ }
+ n_nonmoving_compact_blocks += oldest_gen->n_compact_blocks;
+ oldest_gen->n_compact_blocks = 0;
+ oldest_gen->compact_objects = NULL;
+ // TODO (osa): what about "in import" stuff??
+
+
+
+#if defined(DEBUG)
+ debug_caf_list_snapshot = debug_caf_list;
+ debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
+#endif
+}
+
+// Mark weak pointers in the non-moving heap. They'll either end up in
+// dead_weak_ptr_list or stay in weak_ptr_list. Either way they need to be kept
+// during sweep. See `MarkWeak.c:markWeakPtrList` for the moving heap variant
+// of this.
+static void nonmovingMarkWeakPtrList(MarkQueue *mark_queue, StgWeak *dead_weak_ptr_list)
+{
+ for (StgWeak *w = oldest_gen->weak_ptr_list; w; w = w->link) {
+ markQueuePushClosure_(mark_queue, (StgClosure*)w);
+ // Do not mark finalizers and values here, those fields will be marked
+ // in `nonmovingMarkDeadWeaks` (for dead weaks) or
+ // `nonmovingTidyWeaks` (for live weaks)
+ }
+
+ // We need to mark dead_weak_ptr_list too. This is subtle:
+ //
+ // - By the beginning of this GC we evacuated all weaks to the non-moving
+ // heap (in `markWeakPtrList`)
+ //
+ // - During the scavenging of the moving heap we discovered that some of
+ // those weaks are dead and moved them to `dead_weak_ptr_list`. Note that
+ // because of the fact above _all weaks_ are in the non-moving heap at
+ // this point.
+ //
+ // - So, to be able to traverse `dead_weak_ptr_list` and run finalizers we
+ // need to mark it.
+ for (StgWeak *w = dead_weak_ptr_list; w; w = w->link) {
+ markQueuePushClosure_(mark_queue, (StgClosure*)w);
+ nonmovingMarkDeadWeak(mark_queue, w);
+ }
+}
+
+void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
+{
+#if defined(THREADED_RTS)
+ // We can't start a new collection until the old one has finished
+ // We also don't run in final GC
+ if (concurrent_coll_running || sched_state > SCHED_RUNNING) {
+ return;
+ }
+#endif
+
+ trace(TRACE_nonmoving_gc, "Starting nonmoving GC preparation");
+ resizeGenerations();
+
+ nonmovingPrepareMark();
+
+ // N.B. These should have been cleared at the end of the last sweep.
+ ASSERT(nonmoving_marked_large_objects == NULL);
+ ASSERT(n_nonmoving_marked_large_blocks == 0);
+ ASSERT(nonmoving_marked_compact_objects == NULL);
+ ASSERT(n_nonmoving_marked_compact_blocks == 0);
+
+ MarkQueue *mark_queue = stgMallocBytes(sizeof(MarkQueue), "mark queue");
+ initMarkQueue(mark_queue);
+ current_mark_queue = mark_queue;
+
+ // Mark roots
+ trace(TRACE_nonmoving_gc, "Marking roots for nonmoving GC");
+ markCAFs((evac_fn)markQueueAddRoot, mark_queue);
+ for (unsigned int n = 0; n < n_capabilities; ++n) {
+ markCapability((evac_fn)markQueueAddRoot, mark_queue,
+ capabilities[n], true/*don't mark sparks*/);
+ }
+ markScheduler((evac_fn)markQueueAddRoot, mark_queue);
+ nonmovingMarkWeakPtrList(mark_queue, *dead_weaks);
+ markStablePtrTable((evac_fn)markQueueAddRoot, mark_queue);
+
+ // Mark threads resurrected during moving heap scavenging
+ for (StgTSO *tso = *resurrected_threads; tso != END_TSO_QUEUE; tso = tso->global_link) {
+ markQueuePushClosure_(mark_queue, (StgClosure*)tso);
+ }
+ trace(TRACE_nonmoving_gc, "Finished marking roots for nonmoving GC");
+
+ // Roots marked, mark threads and weak pointers
+
+ // At this point all threads are moved to threads list (from old_threads)
+ // and all weaks are moved to weak_ptr_list (from old_weak_ptr_list) by
+ // the previous scavenge step, so we need to move them to "old" lists
+ // again.
+
+ // Fine to override old_threads because any live or resurrected threads are
+ // moved to threads or resurrected_threads lists.
+ ASSERT(oldest_gen->old_threads == END_TSO_QUEUE);
+ ASSERT(nonmoving_old_threads == END_TSO_QUEUE);
+ nonmoving_old_threads = oldest_gen->threads;
+ oldest_gen->threads = END_TSO_QUEUE;
+
+ // Make sure we don't lose any weak ptrs here. Weaks in old_weak_ptr_list
+ // will either be moved to `dead_weaks` (if dead) or `weak_ptr_list` (if
+ // alive).
+ ASSERT(oldest_gen->old_weak_ptr_list == NULL);
+ ASSERT(nonmoving_old_weak_ptr_list == NULL);
+ nonmoving_old_weak_ptr_list = oldest_gen->weak_ptr_list;
+ oldest_gen->weak_ptr_list = NULL;
+ trace(TRACE_nonmoving_gc, "Finished nonmoving GC preparation");
+
+ // We are now safe to start concurrent marking
+
+ // Note that in concurrent mark we can't use dead_weaks and
+ // resurrected_threads from the preparation to add new weaks and threads as
+ // that would cause races between minor collection and mark. So we only pass
+ // those lists to mark function in sequential case. In concurrent case we
+ // allocate fresh lists.
+
+#if defined(THREADED_RTS)
+ // If we're interrupting or shutting down, do not let this capability go and
+ // run a STW collection. Reason: we won't be able to acquire this capability
+ // again for the sync if we let it go, because it'll immediately start doing
+ // a major GC, becuase that's what we do when exiting scheduler (see
+ // exitScheduler()).
+ if (sched_state == SCHED_RUNNING) {
+ concurrent_coll_running = true;
+ nonmoving_write_barrier_enabled = true;
+ debugTrace(DEBUG_nonmoving_gc, "Starting concurrent mark thread");
+ createOSThread(&mark_thread, "non-moving mark thread",
+ nonmovingConcurrentMark, mark_queue);
+ } else {
+ nonmovingConcurrentMark(mark_queue);
+ }
+#else
+ // Use the weak and thread lists from the preparation for any new weaks and
+ // threads found to be dead in mark.
+ nonmovingMark_(mark_queue, dead_weaks, resurrected_threads);
+#endif
+}
+
+/* Mark mark queue, threads, and weak pointers until no more weaks have been
+ * resuscitated
+ */
+static void nonmovingMarkThreadsWeaks(MarkQueue *mark_queue)
+{
+ while (true) {
+ // Propagate marks
+ nonmovingMark(mark_queue);
+
+ // Tidy threads and weaks
+ nonmovingTidyThreads();
+
+ if (! nonmovingTidyWeaks(mark_queue))
+ return;
+ }
+}
+
+#if defined(THREADED_RTS)
+static void* nonmovingConcurrentMark(void *data)
+{
+ MarkQueue *mark_queue = (MarkQueue*)data;
+ StgWeak *dead_weaks = NULL;
+ StgTSO *resurrected_threads = (StgTSO*)&stg_END_TSO_QUEUE_closure;
+ nonmovingMark_(mark_queue, &dead_weaks, &resurrected_threads);
+ return NULL;
+}
+
+// TODO: Not sure where to put this function.
+// Append w2 to the end of w1.
+static void appendWeakList( StgWeak **w1, StgWeak *w2 )
+{
+ while (*w1) {
+ w1 = &(*w1)->link;
+ }
+ *w1 = w2;
+}
+#endif
+
+static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads)
+{
+ ACQUIRE_LOCK(&nonmoving_collection_mutex);
+ debugTrace(DEBUG_nonmoving_gc, "Starting mark...");
+
+ // Do concurrent marking; most of the heap will get marked here.
+ nonmovingMarkThreadsWeaks(mark_queue);
+
+#if defined(THREADED_RTS)
+ Task *task = newBoundTask();
+
+ // If at this point if we've decided to exit then just return
+ if (sched_state > SCHED_RUNNING) {
+ // Note that we break our invariants here and leave segments in
+ // nonmovingHeap.sweep_list, don't free nonmoving_large_objects etc.
+ // However because we won't be running mark-sweep in the final GC this
+ // is OK.
+
+ // This is a RTS shutdown so we need to move our copy (snapshot) of
+ // weaks (nonmoving_old_weak_ptr_list and nonmoving_weak_ptr_list) to
+ // oldest_gen->threads to be able to run C finalizers in hs_exit_. Note
+ // that there may be more weaks added to oldest_gen->threads since we
+ // started mark, so we need to append our list to the tail of
+ // oldest_gen->threads.
+ appendWeakList(&nonmoving_old_weak_ptr_list, nonmoving_weak_ptr_list);
+ appendWeakList(&oldest_gen->weak_ptr_list, nonmoving_old_weak_ptr_list);
+ // These lists won't be used again so this is not necessary, but still
+ nonmoving_old_weak_ptr_list = NULL;
+ nonmoving_weak_ptr_list = NULL;
+
+ goto finish;
+ }
+
+ // We're still running, request a sync
+ nonmovingBeginFlush(task);
+
+ bool all_caps_syncd;
+ do {
+ all_caps_syncd = nonmovingWaitForFlush();
+ nonmovingMarkThreadsWeaks(mark_queue);
+ } while (!all_caps_syncd);
+#endif
+
+ nonmovingResurrectThreads(mark_queue, resurrected_threads);
+
+ // No more resurrecting threads after this point
+
+ // Do last marking of weak pointers
+ while (true) {
+ // Propagate marks
+ nonmovingMark(mark_queue);
+
+ if (!nonmovingTidyWeaks(mark_queue))
+ break;
+ }
+
+ nonmovingMarkDeadWeaks(mark_queue, dead_weaks);
+
+ // Propagate marks
+ nonmovingMark(mark_queue);
+
+ // Now remove all dead objects from the mut_list to ensure that a younger
+ // generation collection doesn't attempt to look at them after we've swept.
+ nonmovingSweepMutLists();
+
+ debugTrace(DEBUG_nonmoving_gc,
+ "Done marking, resurrecting threads before releasing capabilities");
+
+
+ // Schedule finalizers and resurrect threads
+#if defined(THREADED_RTS)
+ // Just pick a random capability. Not sure if this is a good idea -- we use
+ // only one capability for all finalizers.
+ scheduleFinalizers(capabilities[0], *dead_weaks);
+ // Note that this mutates heap and causes running write barriers.
+ // See Note [Unintentional marking in resurrectThreads] in NonMovingMark.c
+ // for how we deal with this.
+ resurrectThreads(*resurrected_threads);
+#endif
+
+#if defined(DEBUG)
+ // Zap CAFs that we will sweep
+ nonmovingGcCafs();
+#endif
+
+ ASSERT(mark_queue->top->head == 0);
+ ASSERT(mark_queue->blocks->link == NULL);
+
+ // Update oldest_gen thread and weak lists
+ // Note that we need to append these lists as a concurrent minor GC may have
+ // added stuff to them while we're doing mark-sweep concurrently
+ {
+ StgTSO **threads = &oldest_gen->threads;
+ while (*threads != END_TSO_QUEUE) {
+ threads = &(*threads)->global_link;
+ }
+ *threads = nonmoving_threads;
+ nonmoving_threads = END_TSO_QUEUE;
+ nonmoving_old_threads = END_TSO_QUEUE;
+ }
+
+ {
+ StgWeak **weaks = &oldest_gen->weak_ptr_list;
+ while (*weaks) {
+ weaks = &(*weaks)->link;
+ }
+ *weaks = nonmoving_weak_ptr_list;
+ nonmoving_weak_ptr_list = NULL;
+ nonmoving_old_weak_ptr_list = NULL;
+ }
+
+ // Everything has been marked; allow the mutators to proceed
+#if defined(THREADED_RTS)
+ nonmoving_write_barrier_enabled = false;
+ nonmovingFinishFlush(task);
+#endif
+
+ current_mark_queue = NULL;
+ freeMarkQueue(mark_queue);
+ stgFree(mark_queue);
+
+ oldest_gen->live_estimate = nonmoving_live_words;
+ oldest_gen->n_old_blocks = 0;
+ resizeGenerations();
+
+ /****************************************************
+ * Sweep
+ ****************************************************/
+
+ traceConcSweepBegin();
+
+ // Because we can't mark large object blocks (no room for mark bit) we
+ // collect them in a map in mark_queue and we pass it here to sweep large
+ // objects
+ nonmovingSweepLargeObjects();
+ nonmovingSweepCompactObjects();
+ nonmovingSweepStableNameTable();
+
+ nonmovingSweep();
+ ASSERT(nonmovingHeap.sweep_list == NULL);
+ debugTrace(DEBUG_nonmoving_gc, "Finished sweeping.");
+ traceConcSweepEnd();
+#if defined(DEBUG)
+ if (RtsFlags.DebugFlags.nonmoving_gc)
+ nonmovingPrintAllocatorCensus();
+#endif
+
+ // TODO: Remainder of things done by GarbageCollect (update stats)
+
+#if defined(THREADED_RTS)
+finish:
+ boundTaskExiting(task);
+
+ // We are done...
+ mark_thread = 0;
+
+ // Signal that the concurrent collection is finished, allowing the next
+ // non-moving collection to proceed
+ concurrent_coll_running = false;
+ signalCondition(&concurrent_coll_finished);
+ RELEASE_LOCK(&nonmoving_collection_mutex);
+#endif
+}
+
+#if defined(DEBUG)
+
+// Use this with caution: this doesn't work correctly during scavenge phase
+// when we're doing parallel scavenging. Use it in mark phase or later (where
+// we don't allocate more anymore).
+void assert_in_nonmoving_heap(StgPtr p)
+{
+ if (!HEAP_ALLOCED_GC(p))
+ return;
+
+ bdescr *bd = Bdescr(p);
+ if (bd->flags & BF_LARGE) {
+ // It should be in a capability (if it's not filled yet) or in non-moving heap
+ for (uint32_t cap = 0; cap < n_capabilities; ++cap) {
+ if (bd == capabilities[cap]->pinned_object_block) {
+ return;
+ }
+ }
+ ASSERT(bd->flags & BF_NONMOVING);
+ return;
+ }
+
+ // Search snapshot segments
+ for (struct NonmovingSegment *seg = nonmovingHeap.sweep_list; seg; seg = seg->link) {
+ if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ return;
+ }
+ }
+
+ for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
+ // Search current segments
+ for (uint32_t cap_idx = 0; cap_idx < n_capabilities; ++cap_idx) {
+ struct NonmovingSegment *seg = alloca->current[cap_idx];
+ if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ return;
+ }
+ }
+
+ // Search active segments
+ int seg_idx = 0;
+ struct NonmovingSegment *seg = alloca->active;
+ while (seg) {
+ if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+
+ // Search filled segments
+ seg_idx = 0;
+ seg = alloca->filled;
+ while (seg) {
+ if (p >= (P_)seg && p < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+ }
+
+ // We don't search free segments as they're unused
+
+ barf("%p is not in nonmoving heap\n", (void*)p);
+}
+
+void nonmovingPrintSegment(struct NonmovingSegment *seg)
+{
+ int num_blocks = nonmovingSegmentBlockCount(seg);
+ uint8_t log_block_size = nonmovingSegmentLogBlockSize(seg);
+
+ debugBelch("Segment with %d blocks of size 2^%d (%d bytes, %u words, scan: %p)\n",
+ num_blocks,
+ log_block_size,
+ 1 << log_block_size,
+ (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << log_block_size),
+ (void*)Bdescr((P_)seg)->u.scan);
+
+ for (nonmoving_block_idx p_idx = 0; p_idx < seg->next_free; ++p_idx) {
+ StgClosure *p = (StgClosure*)nonmovingSegmentGetBlock(seg, p_idx);
+ if (nonmovingGetMark(seg, p_idx) != 0) {
+ debugBelch("%d (%p)* :\t", p_idx, (void*)p);
+ } else {
+ debugBelch("%d (%p) :\t", p_idx, (void*)p);
+ }
+ printClosure(p);
+ }
+
+ debugBelch("End of segment\n\n");
+}
+
+void nonmovingPrintAllocator(struct NonmovingAllocator *alloc)
+{
+ debugBelch("Allocator at %p\n", (void*)alloc);
+ debugBelch("Filled segments:\n");
+ for (struct NonmovingSegment *seg = alloc->filled; seg != NULL; seg = seg->link) {
+ debugBelch("%p ", (void*)seg);
+ }
+ debugBelch("\nActive segments:\n");
+ for (struct NonmovingSegment *seg = alloc->active; seg != NULL; seg = seg->link) {
+ debugBelch("%p ", (void*)seg);
+ }
+ debugBelch("\nCurrent segments:\n");
+ for (uint32_t i = 0; i < n_capabilities; ++i) {
+ debugBelch("%p ", alloc->current[i]);
+ }
+ debugBelch("\n");
+}
+
+void locate_object(P_ obj)
+{
+ // Search allocators
+ for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
+ for (uint32_t cap = 0; cap < n_capabilities; ++cap) {
+ struct NonmovingSegment *seg = alloca->current[cap];
+ if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ debugBelch("%p is in current segment of capability %d of allocator %d at %p\n", obj, cap, alloca_idx, (void*)seg);
+ return;
+ }
+ }
+ int seg_idx = 0;
+ struct NonmovingSegment *seg = alloca->active;
+ while (seg) {
+ if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ debugBelch("%p is in active segment %d of allocator %d at %p\n", obj, seg_idx, alloca_idx, (void*)seg);
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+
+ seg_idx = 0;
+ seg = alloca->filled;
+ while (seg) {
+ if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ debugBelch("%p is in filled segment %d of allocator %d at %p\n", obj, seg_idx, alloca_idx, (void*)seg);
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+ }
+
+ struct NonmovingSegment *seg = nonmovingHeap.free;
+ int seg_idx = 0;
+ while (seg) {
+ if (obj >= (P_)seg && obj < (((P_)seg) + NONMOVING_SEGMENT_SIZE_W)) {
+ debugBelch("%p is in free segment %d at %p\n", obj, seg_idx, (void*)seg);
+ return;
+ }
+ seg_idx++;
+ seg = seg->link;
+ }
+
+ // Search nurseries
+ for (uint32_t nursery_idx = 0; nursery_idx < n_nurseries; ++nursery_idx) {
+ for (bdescr* nursery_block = nurseries[nursery_idx].blocks; nursery_block; nursery_block = nursery_block->link) {
+ if (obj >= nursery_block->start && obj <= nursery_block->start + nursery_block->blocks*BLOCK_SIZE_W) {
+ debugBelch("%p is in nursery %d\n", obj, nursery_idx);
+ return;
+ }
+ }
+ }
+
+ // Search generations
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++g) {
+ generation *gen = &generations[g];
+ for (bdescr *blk = gen->blocks; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " blocks\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = gen->old_blocks; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " old blocks\n", obj, g);
+ return;
+ }
+ }
+ }
+
+ // Search large objects
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++g) {
+ generation *gen = &generations[g];
+ for (bdescr *large_block = gen->large_objects; large_block; large_block = large_block->link) {
+ if ((P_)large_block->start == obj) {
+ debugBelch("%p is in large blocks of generation %d\n", obj, g);
+ return;
+ }
+ }
+ }
+
+ for (bdescr *large_block = nonmoving_large_objects; large_block; large_block = large_block->link) {
+ if ((P_)large_block->start == obj) {
+ debugBelch("%p is in nonmoving_large_objects\n", obj);
+ return;
+ }
+ }
+
+ for (bdescr *large_block = nonmoving_marked_large_objects; large_block; large_block = large_block->link) {
+ if ((P_)large_block->start == obj) {
+ debugBelch("%p is in nonmoving_marked_large_objects\n", obj);
+ return;
+ }
+ }
+
+ // Search workspaces FIXME only works in non-threaded runtime
+#if !defined(THREADED_RTS)
+ for (uint32_t g = 0; g < RtsFlags.GcFlags.generations - 1; ++ g) {
+ gen_workspace *ws = &gct->gens[g];
+ for (bdescr *blk = ws->todo_bd; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " todo bds\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = ws->scavd_list; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " scavd bds\n", obj, g);
+ return;
+ }
+ }
+ for (bdescr *blk = ws->todo_large_objects; blk; blk = blk->link) {
+ if (obj >= blk->start && obj < blk->free) {
+ debugBelch("%p is in generation %" FMT_Word32 " todo large bds\n", obj, g);
+ return;
+ }
+ }
+ }
+#endif
+}
+
+void nonmovingPrintSweepList()
+{
+ debugBelch("==== SWEEP LIST =====\n");
+ int i = 0;
+ for (struct NonmovingSegment *seg = nonmovingHeap.sweep_list; seg; seg = seg->link) {
+ debugBelch("%d: %p\n", i++, (void*)seg);
+ }
+ debugBelch("= END OF SWEEP LIST =\n");
+}
+
+void check_in_mut_list(StgClosure *p)
+{
+ for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) {
+ for (bdescr *bd = capabilities[cap_n]->mut_lists[oldest_gen->no]; bd; bd = bd->link) {
+ for (StgPtr q = bd->start; q < bd->free; ++q) {
+ if (*((StgPtr**)q) == (StgPtr*)p) {
+ debugBelch("Object is in mut list of cap %d: %p\n", cap_n, capabilities[cap_n]->mut_lists[oldest_gen->no]);
+ return;
+ }
+ }
+ }
+ }
+
+ debugBelch("Object is not in a mut list\n");
+}
+
+void print_block_list(bdescr* bd)
+{
+ while (bd) {
+ debugBelch("%p, ", (void*)bd);
+ bd = bd->link;
+ }
+ debugBelch("\n");
+}
+
+void print_thread_list(StgTSO* tso)
+{
+ while (tso != END_TSO_QUEUE) {
+ printClosure((StgClosure*)tso);
+ tso = tso->global_link;
+ }
+}
+
+#endif