diff options
-rw-r--r-- | includes/rts/storage/Block.h | 28 | ||||
-rw-r--r-- | includes/rts/storage/TSO.h | 3 | ||||
-rw-r--r-- | rts/sm/NonMoving.c | 199 | ||||
-rw-r--r-- | rts/sm/NonMoving.h | 40 | ||||
-rw-r--r-- | rts/sm/NonMovingMark.c | 16 | ||||
-rw-r--r-- | rts/sm/NonMovingSweep.c | 4 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 2 |
7 files changed, 253 insertions, 39 deletions
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h index 32cf98958f..4afc3689cb 100644 --- a/includes/rts/storage/Block.h +++ b/includes/rts/storage/Block.h @@ -88,17 +88,23 @@ typedef struct bdescr_ { StgPtr start; // [READ ONLY] start addr of memory - StgPtr free; // First free byte of memory. - // allocGroup() sets this to the value of start. - // NB. during use this value should lie - // between start and start + blocks * - // BLOCK_SIZE. Values outside this - // range are reserved for use by the - // block allocator. In particular, the - // value (StgPtr)(-1) is used to - // indicate that a block is unallocated. - // - // Unused by the non-moving allocator. + union { + StgPtr free; // First free byte of memory. + // allocGroup() sets this to the value of start. + // NB. during use this value should lie + // between start and start + blocks * + // BLOCK_SIZE. Values outside this + // range are reserved for use by the + // block allocator. In particular, the + // value (StgPtr)(-1) is used to + // indicate that a block is unallocated. + // + // Unused by the non-moving allocator. + struct NonmovingSegmentInfo { + StgWord8 log_block_size; + StgWord16 next_free_snap; + } nonmoving_segment; + }; struct bdescr_ *link; // used for chaining blocks together diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h index 070ecbebaa..d706282796 100644 --- a/includes/rts/storage/TSO.h +++ b/includes/rts/storage/TSO.h @@ -231,6 +231,9 @@ typedef struct StgTSO_ { * setting the stack's mark bit (either the BF_MARKED bit for large objects * or otherwise its bit in its segment's mark bitmap). * + * To ensure that mutation does not proceed until the stack is fully marked the + * mark phase must not set the mark bit until it has finished tracing. + * */ #define STACK_DIRTY 1 diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c index 6dc1010d43..8174fa754d 100644 --- a/rts/sm/NonMoving.c +++ b/rts/sm/NonMoving.c @@ -49,8 +49,183 @@ Mutex concurrent_coll_finished_lock; /* * Note [Non-moving garbage collector] * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * The sources rts/NonMoving*.c implement GHC's non-moving garbage collector + * for the oldest generation. In contrast to the throughput-oriented moving + * collector, the non-moving collector is designed to achieve low GC latencies + * on large heaps. It accomplishes low-latencies by way of a concurrent + * mark-and-sweep collection strategy on a specially-designed heap structure. + * While the design is described in detail in the design document found in + * docs/storage/nonmoving-gc, we briefly summarize the structure here. + * + * + * === Heap Structure === + * + * The nonmoving heap (embodied by struct NonmovingHeap) consists of a family + * of allocators, each serving a range of allocation sizes. Each allocator + * consists of a set of *segments*, each of which contain fixed-size *blocks* + * (not to be confused with "blocks" provided by GHC's block allocator; this is + * admittedly an unfortunate overlap in terminology). These blocks are the + * backing store for the allocator. In addition to blocks, the segment also + * contains some header information (see struct NonmovingSegment in + * NonMoving.h). This header contains a *bitmap* encoding one byte per block + * (used by the collector to record liveness), as well as the index of the next + * unallocated block (and a *snapshot* of this field which will be described in + * the next section). + * + * Each allocator maintains three sets of segments: + * + * - A *current* segment for each capability; this is the segment which that + * capability will allocate into. + * + * - A pool of *active* segments, each of which containing at least one + * unallocated block. The allocate will take a segment from this pool when + * it fills its *current* segment. + * + * - A set of *filled* segments, which contain no unallocated blocks and will + * be collected during the next major GC cycle + * + * Storage for segments is allocated using the block allocator using an aligned + * group of NONMOVING_SEGMENT_BLOCKS blocks. This makes the task of locating + * the segment header for a clone a simple matter of bit-masking (as + * implemented by nonmovingGetSegment). + * + * In addition, to relieve pressure on the block allocator we keep a small pool + * of free blocks around (nonmovingHeap.free) which can be pushed/popped + * to/from in a lock-free manner. + * + * + * === Allocation === + * + * The allocator (as implemented by nonmovingAllocate) starts by identifying + * which allocator the request should be made against. It then allocates into + * its local current segment and bumps the next_free pointer to point to the + * next unallocated block (as indicated by the bitmap). If it finds the current + * segment is now full it moves it to the filled list and looks for a new + * segment to make current from a few sources: + * + * 1. the allocator's active list (see pop_active_segment) + * 2. the nonmoving heap's free block pool (see nonmovingPopFreeSegment) + * 3. allocate a new segment from the block allocator (see + * nonmovingAllocSegment) + * + * Note that allocation does *not* involve modifying the bitmap. The bitmap is + * only modified by the collector. + * + * + * === Snapshot invariant === + * + * To safely collect in a concurrent setting, the collector relies on the + * notion of a *snapshot*. The snapshot is a hypothetical frozen state of the + * heap topology taken at the beginning of the major collection cycle. + * With this definition we require the following property of the mark phase, + * which we call the *snapshot invariant*, + * + * All objects that were reachable at the time the snapshot was collected + * must have their mark bits set at the end of the mark phase. + * + * As the mutator might change the topology of the heap while we are marking + * this property requires some cooperation from the mutator to maintain. + * Specifically, we rely on a write barrier as described in Note [Update + * remembered set]. + * + * To determine which objects were existent when the snapshot was taken we + * record a snapshot of each segments next_free pointer at the beginning of + * collection. + * + * + * === Collection === + * + * Collection happens in a few phases some of which occur during a + * stop-the-world period (marked with [STW]) and others which can occur + * concurrently with mutation and minor collection (marked with [CONC]): + * + * 1. [STW] Preparatory GC: Here we do a standard minor collection of the + * younger generations (which may evacuate things to the nonmoving heap). + * References from younger generations into the nonmoving heap are recorded + * in the mark queue (see Note [Aging under the non-moving collector] in + * this file). + * + * 2. [STW] Snapshot update: Here we update the segment snapshot metadata + * (see nonmovingPrepareMark) and move the filled segments to + * nonmovingHeap.sweep_list, which is the set of segments which we will + * sweep this GC cycle. + * + * 3. [STW] Root collection: Here we walk over a variety of root sources + * and add them to the mark queue (see nonmovingCollect). + * + * 4. [CONC] Concurrent marking: Here we do the majority of marking concurrently + * with mutator execution (but with the write barrier enabled; see + * Note [Update remembered set]). + * + * 5. [STW] Final sync: Here we interrupt the mutators, ask them to + * flush their final update remembered sets, and mark any new references + * we find. + * + * 6. [CONC] Sweep: Here we walk over the nonmoving segments on sweep_list + * and place them back on either the active, current, or filled list, + * depending upon how much live data they contain. + * + * + * === Marking === + * + * Ignoring large and static objects, marking a closure is fairly + * straightforward (implemented in NonMovingMark.c:mark_closure): + * + * 1. Check whether the closure is in the non-moving generation; if not then + * we ignore it. + * 2. Find the segment containing the closure's block. + * 3. Check whether the closure's block is above $seg->next_free_snap; if so + * then the block was not allocated when we took the snapshot and therefore + * we don't need to mark it. + * 4. Check whether the block's bitmap bits is equal to nonmovingMarkEpoch. If + * so then we can stop as we have already marked it. + * 5. Push the closure's pointers to the mark queue. + * 6. Set the blocks bitmap bits to nonmovingMarkEpoch. + * + * Note that the ordering of (5) and (6) is rather important, as described in + * Note [StgStack dirtiness flags and concurrent marking]. + * + * + * === Other references === + * + * Apart from the design document in docs/storage/nonmoving-gc and the Ueno + * 2016 paper (TODO citation) from which it drew inspiration, there are a + * variety of other relevant Notes scattered throughout the tree: + * + * - Note [Concurrent non-moving collection] (NonMoving.c) describes + * concurrency control of the nonmoving collector + * + * - Note [Live data accounting in nonmoving collector] (NonMoving.c) + * describes how we track the quantity of live data in the nonmoving + * generation. + * + * - Note [Aging under the non-moving collector] (NonMoving.c) describes how + * we accomodate aging + * + * - Note [Large objects in the non-moving collector] (NonMovingMark.c) + * describes how we track large objects. + * + * - Note [Update remembered set] (NonMovingMark.c) describes the function and + * implementation of the update remembered set used to realize the concurrent + * write barrier. + * + * - Note [Concurrent read barrier on deRefWeak#] (NonMovingMark.c) describes + * the read barrier on Weak# objects. + * + * - Note [Unintentional marking in resurrectThreads] (NonMovingMark.c) describes + * a tricky interaction between the update remembered set flush and weak + * finalization. + * + * - Note [Origin references in the nonmoving collector] (NonMovingMark.h) + * describes how we implement indirection short-cutting and the selector + * optimisation. + * + * - Note [StgStack dirtiness flags and concurrent marking] (TSO.h) describes + * the protocol for concurrent marking of stacks. + * + * - Note [Static objects under the nonmoving collector] (Storage.c) describes + * treatment of static objects. * - * TODO * * Note [Concurrent non-moving collection] * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -202,15 +377,16 @@ static void* nonmovingConcurrentMark(void *mark_queue); static void nonmovingClearBitmap(struct NonmovingSegment *seg); static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads); -static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t block_size) +static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block_size) { + bdescr *bd = Bdescr((P_) seg); seg->link = NULL; seg->todo_link = NULL; seg->next_free = 0; - seg->next_free_snap = 0; - seg->block_size = block_size; nonmovingClearBitmap(seg); - Bdescr((P_)seg)->u.scan = nonmovingSegmentGetBlock(seg, 0); + bd->nonmoving_segment.log_block_size = log_block_size; + bd->nonmoving_segment.next_free_snap = 0; + bd->u.scan = nonmovingSegmentGetBlock(seg, 0); } // Add a segment to the free list. @@ -385,7 +561,7 @@ void *nonmovingAllocate(Capability *cap, StgWord sz) // Update live data estimate. // See Note [Live data accounting in nonmoving collector]. - unsigned int new_blocks = block_count - current->next_free_snap; + unsigned int new_blocks = block_count - nonmovingSegmentInfo(current)->next_free_snap; unsigned int block_size = 1 << log_block_size; atomic_inc(&oldest_gen->live_estimate, new_blocks * block_size / sizeof(W_)); @@ -529,7 +705,7 @@ static void nonmovingPrepareMark(void) // Update current segments' snapshot pointers for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) { struct NonmovingSegment *seg = alloca->current[cap_n]; - seg->next_free_snap = seg->next_free; + nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free; } // Update filled segments' snapshot pointers and move to sweep_list @@ -545,7 +721,7 @@ static void nonmovingPrepareMark(void) prefetchForWrite(seg->link->bitmap); nonmovingClearBitmap(seg); // Set snapshot - seg->next_free_snap = seg->next_free; + nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free; if (seg->link) seg = seg->link; else @@ -977,12 +1153,13 @@ void assert_in_nonmoving_heap(StgPtr p) void nonmovingPrintSegment(struct NonmovingSegment *seg) { int num_blocks = nonmovingSegmentBlockCount(seg); + uint8_t log_block_size = nonmovingSegmentLogBlockSize(seg); debugBelch("Segment with %d blocks of size 2^%d (%d bytes, %u words, scan: %p)\n", num_blocks, - seg->block_size, - 1 << seg->block_size, - (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << seg->block_size), + log_block_size, + 1 << log_block_size, + (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << log_block_size), (void*)Bdescr((P_)seg)->u.scan); for (nonmoving_block_idx p_idx = 0; p_idx < seg->next_free; ++p_idx) { diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h index 4458ce3bef..b3d4e14065 100644 --- a/rts/sm/NonMoving.h +++ b/rts/sm/NonMoving.h @@ -38,13 +38,21 @@ struct NonmovingSegment { struct NonmovingSegment *link; // for linking together segments into lists struct NonmovingSegment *todo_link; // NULL when not in todo list nonmoving_block_idx next_free; // index of the next unallocated block - nonmoving_block_idx next_free_snap; // snapshot of next_free - uint8_t block_size; // log2 of block size in bytes uint8_t bitmap[]; // liveness bitmap // After the liveness bitmap comes the data blocks. Note that we need to // ensure that the size of this struct (including the bitmap) is a multiple // of the word size since GHC assumes that all object pointers are // so-aligned. + + // N.B. There are also bits of information which are stored in the + // NonmovingBlockInfo stored in the segment's block descriptor. Namely: + // + // * the block size can be found in nonmovingBlockInfo(seg)->log_block_size. + // * the next_free snapshot can be found in + // nonmovingBlockInfo(seg)->next_free_snap. + // + // This allows us to mark a nonmoving closure without bringing the + // NonmovingSegment header into cache. }; // This is how we mark end of todo lists. Not NULL because todo_link == NULL @@ -123,11 +131,20 @@ void *nonmovingAllocate(Capability *cap, StgWord sz); void nonmovingAddCapabilities(uint32_t new_n_caps); void nonmovingPushFreeSegment(struct NonmovingSegment *seg); + +INLINE_HEADER struct NonmovingSegmentInfo *nonmovingSegmentInfo(struct NonmovingSegment *seg) { + return &Bdescr((StgPtr) seg)->nonmoving_segment; +} + +INLINE_HEADER uint8_t nonmovingSegmentLogBlockSize(struct NonmovingSegment *seg) { + return nonmovingSegmentInfo(seg)->log_block_size; +} + // Add a segment to the appropriate active list. INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg) { struct NonmovingAllocator *alloc = - nonmovingHeap.allocators[seg->block_size - NONMOVING_ALLOCA0]; + nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0]; while (true) { struct NonmovingSegment *current_active = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->active); seg->link = current_active; @@ -141,7 +158,7 @@ INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg) INLINE_HEADER void nonmovingPushFilledSegment(struct NonmovingSegment *seg) { struct NonmovingAllocator *alloc = - nonmovingHeap.allocators[seg->block_size - NONMOVING_ALLOCA0]; + nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0]; while (true) { struct NonmovingSegment *current_filled = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->filled); seg->link = current_filled; @@ -162,7 +179,7 @@ void assert_in_nonmoving_heap(StgPtr p); // The block size of a given segment in bytes. INLINE_HEADER unsigned int nonmovingSegmentBlockSize(struct NonmovingSegment *seg) { - return 1 << seg->block_size; + return 1 << nonmovingSegmentLogBlockSize(seg); } // How many blocks does a segment with the given block size have? @@ -180,7 +197,7 @@ unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size); // How many blocks does the given segment contain? Also the size of the bitmap. INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg) { - return nonmovingBlockCountFromSize(seg->block_size); + return nonmovingBlockCountFromSize(nonmovingSegmentLogBlockSize(seg)); } // Get a pointer to the given block index assuming that the block size is as @@ -188,7 +205,7 @@ INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *s // available). The log_block_size argument must be equal to seg->block_size. INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint8_t log_block_size, nonmoving_block_idx i) { - ASSERT(log_block_size == seg->block_size); + ASSERT(log_block_size == nonmovingSegmentLogBlockSize(seg)); // Block size in bytes unsigned int blk_size = 1 << log_block_size; // Bitmap size in bytes @@ -204,7 +221,7 @@ INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint // Get a pointer to the given block index. INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i) { - return nonmovingSegmentGetBlock_(seg, seg->block_size, i); + return nonmovingSegmentGetBlock_(seg, nonmovingSegmentLogBlockSize(seg), i); } // Get the segment which a closure resides in. Assumes that pointer points into @@ -227,7 +244,7 @@ INLINE_HEADER nonmoving_block_idx nonmovingGetBlockIdx(StgPtr p) struct NonmovingSegment *seg = nonmovingGetSegment(p); ptrdiff_t blk0 = (ptrdiff_t)nonmovingSegmentGetBlock(seg, 0); ptrdiff_t offset = (ptrdiff_t)p - blk0; - return (nonmoving_block_idx) (offset >> seg->block_size); + return (nonmoving_block_idx) (offset >> nonmovingSegmentLogBlockSize(seg)); } // TODO: Eliminate this @@ -268,8 +285,9 @@ INLINE_HEADER bool nonmovingClosureMarked(StgPtr p) // segment is in the set of segments that will be swept this collection cycle. INLINE_HEADER bool nonmovingSegmentBeingSwept(struct NonmovingSegment *seg) { - unsigned int n = nonmovingSegmentBlockCount(seg); - return seg->next_free_snap >= n; + struct NonmovingSegmentInfo *seginfo = nonmovingSegmentInfo(seg); + unsigned int n = nonmovingBlockCountFromSize(seginfo->log_block_size); + return seginfo->next_free_snap >= n; } // Can be called during a major collection to determine whether a particular diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c index 59db431848..54398a7f3c 100644 --- a/rts/sm/NonMovingMark.c +++ b/rts/sm/NonMovingMark.c @@ -120,6 +120,9 @@ StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST; * * - In the code generated by the STG code generator for pointer array writes * + * - In thunk updates (e.g. updateWithIndirection) to ensure that the free + * variables of the original thunk remain reachable. + * * There is also a read barrier to handle weak references, as described in * Note [Concurrent read barrier on deRefWeak#]. * @@ -560,6 +563,13 @@ inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk) updateRemembSetPushThunkEager(cap, (StgThunkInfoTable *) info, thunk); } +/* Push the free variables of a thunk to the update remembered set. + * This is called by the thunk update code (e.g. updateWithIndirection) before + * we update the indirectee to ensure that the thunk's free variables remain + * visible to the concurrent collector. + * + * See Note [Update rememembered set]. + */ void updateRemembSetPushThunkEager(Capability *cap, const StgThunkInfoTable *info, StgThunk *thunk) @@ -819,7 +829,7 @@ static MarkQueueEnt markQueuePop (MarkQueue *q) // MarkQueueEnt encoding always places the pointer to the object to be // marked first. prefetchForRead(&new.mark_closure.p->header.info); - prefetchForRead(&nonmovingGetSegment_unchecked((StgPtr) new.mark_closure.p)->block_size); + prefetchForRead(Bdescr((StgPtr) new.mark_closure.p)); q->prefetch_queue[i] = new; i = (i + 1) % MARK_PREFETCH_QUEUE_DEPTH; } @@ -1263,7 +1273,7 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin) goto done; StgClosure *snapshot_loc = - (StgClosure *) nonmovingSegmentGetBlock(seg, seg->next_free_snap); + (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap); if (p >= snapshot_loc && mark == 0) { /* * In this case we are looking at a block that wasn't allocated @@ -1700,7 +1710,7 @@ bool nonmovingIsAlive (StgClosure *p) struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p); nonmoving_block_idx i = nonmovingGetBlockIdx((StgPtr) p); uint8_t mark = nonmovingGetMark(seg, i); - if (i >= seg->next_free_snap) { + if (i >= nonmovingSegmentInfo(seg)->next_free_snap) { // If the object is allocated after next_free_snap then one of the // following must be true: // diff --git a/rts/sm/NonMovingSweep.c b/rts/sm/NonMovingSweep.c index 7af5508afc..3ee27ef3b4 100644 --- a/rts/sm/NonMovingSweep.c +++ b/rts/sm/NonMovingSweep.c @@ -41,7 +41,7 @@ nonmovingSweepSegment(struct NonmovingSegment *seg) } else if (!found_free) { found_free = true; seg->next_free = i; - seg->next_free_snap = i; + nonmovingSegmentInfo(seg)->next_free_snap = i; Bdescr((P_)seg)->u.scan = (P_)nonmovingSegmentGetBlock(seg, i); seg->bitmap[i] = 0; } else { @@ -63,7 +63,7 @@ nonmovingSweepSegment(struct NonmovingSegment *seg) return SEGMENT_FILLED; } else { ASSERT(seg->next_free == 0); - ASSERT(seg->next_free_snap == 0); + ASSERT(nonmovingSegmentInfo(seg)->next_free_snap == 0); return SEGMENT_FREE; } } diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 0159488856..99671e9c61 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -497,7 +497,7 @@ static void checkNonmovingSegments (struct NonmovingSegment *seg) if (seg->bitmap[i] == nonmovingMarkEpoch) { StgPtr p = nonmovingSegmentGetBlock(seg, i); checkClosure((StgClosure *) p); - } else if (i < seg->next_free_snap){ + } else if (i < nonmovingSegmentInfo(seg)->next_free_snap){ seg->bitmap[i] = 0; } } |