summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/rts/Flags.h4
-rw-r--r--includes/rts/storage/TSO.h3
-rw-r--r--rts/RtsFlags.c5
-rw-r--r--rts/rts.cabal.in1
-rw-r--r--rts/sm/NonMoving.c177
-rw-r--r--rts/sm/NonMoving.h5
-rw-r--r--rts/sm/NonMovingMark.c98
-rw-r--r--rts/sm/NonMovingShortcut.c326
-rw-r--r--rts/sm/NonMovingShortcut.h17
9 files changed, 612 insertions, 24 deletions
diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h
index 9a039fd95c..f27ce23b0b 100644
--- a/includes/rts/Flags.h
+++ b/includes/rts/Flags.h
@@ -52,7 +52,9 @@ typedef struct _GC_FLAGS {
double oldGenFactor;
double pcFreeHeap;
- bool useNonmoving;
+ bool useNonmoving; // default = false
+ bool nonmovingSelectorOpt; // Do selector optimization in the
+ // non-moving heap, default = false
uint32_t generations;
bool squeezeUpdFrames;
diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h
index 070ecbebaa..d706282796 100644
--- a/includes/rts/storage/TSO.h
+++ b/includes/rts/storage/TSO.h
@@ -231,6 +231,9 @@ typedef struct StgTSO_ {
* setting the stack's mark bit (either the BF_MARKED bit for large objects
* or otherwise its bit in its segment's mark bitmap).
*
+ * To ensure that mutation does not proceed until the stack is fully marked the
+ * mark phase must not set the mark bit until it has finished tracing.
+ *
*/
#define STACK_DIRTY 1
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index c606d86418..0e28b980ac 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -157,6 +157,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.GcFlags.pcFreeHeap = 3; /* 3% */
RtsFlags.GcFlags.oldGenFactor = 2;
RtsFlags.GcFlags.useNonmoving = false;
+ RtsFlags.GcFlags.nonmovingSelectorOpt = false;
RtsFlags.GcFlags.generations = 2;
RtsFlags.GcFlags.squeezeUpdFrames = true;
RtsFlags.GcFlags.compact = false;
@@ -1542,6 +1543,10 @@ error = true;
OPTION_SAFE;
RtsFlags.GcFlags.useNonmoving = true;
unchecked_arg_start++;
+ if (rts_argv[arg][3] == 's') {
+ RtsFlags.GcFlags.nonmovingSelectorOpt = true;
+ unchecked_arg_start++;
+ }
break;
case 'c': /* Debugging tool: show current cost centre on
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index a53c166849..1968f2f693 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -470,6 +470,7 @@ library
sm/NonMovingCensus.c
sm/NonMovingMark.c
sm/NonMovingScav.c
+ sm/NonMovingShortcut.c
sm/NonMovingSweep.c
sm/Sanity.c
sm/Scav.c
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index 95a6fb1273..8174fa754d 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -49,8 +49,183 @@ Mutex concurrent_coll_finished_lock;
/*
* Note [Non-moving garbage collector]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * The sources rts/NonMoving*.c implement GHC's non-moving garbage collector
+ * for the oldest generation. In contrast to the throughput-oriented moving
+ * collector, the non-moving collector is designed to achieve low GC latencies
+ * on large heaps. It accomplishes low-latencies by way of a concurrent
+ * mark-and-sweep collection strategy on a specially-designed heap structure.
+ * While the design is described in detail in the design document found in
+ * docs/storage/nonmoving-gc, we briefly summarize the structure here.
+ *
+ *
+ * === Heap Structure ===
+ *
+ * The nonmoving heap (embodied by struct NonmovingHeap) consists of a family
+ * of allocators, each serving a range of allocation sizes. Each allocator
+ * consists of a set of *segments*, each of which contain fixed-size *blocks*
+ * (not to be confused with "blocks" provided by GHC's block allocator; this is
+ * admittedly an unfortunate overlap in terminology). These blocks are the
+ * backing store for the allocator. In addition to blocks, the segment also
+ * contains some header information (see struct NonmovingSegment in
+ * NonMoving.h). This header contains a *bitmap* encoding one byte per block
+ * (used by the collector to record liveness), as well as the index of the next
+ * unallocated block (and a *snapshot* of this field which will be described in
+ * the next section).
+ *
+ * Each allocator maintains three sets of segments:
+ *
+ * - A *current* segment for each capability; this is the segment which that
+ * capability will allocate into.
+ *
+ * - A pool of *active* segments, each of which containing at least one
+ * unallocated block. The allocate will take a segment from this pool when
+ * it fills its *current* segment.
+ *
+ * - A set of *filled* segments, which contain no unallocated blocks and will
+ * be collected during the next major GC cycle
+ *
+ * Storage for segments is allocated using the block allocator using an aligned
+ * group of NONMOVING_SEGMENT_BLOCKS blocks. This makes the task of locating
+ * the segment header for a clone a simple matter of bit-masking (as
+ * implemented by nonmovingGetSegment).
+ *
+ * In addition, to relieve pressure on the block allocator we keep a small pool
+ * of free blocks around (nonmovingHeap.free) which can be pushed/popped
+ * to/from in a lock-free manner.
+ *
+ *
+ * === Allocation ===
+ *
+ * The allocator (as implemented by nonmovingAllocate) starts by identifying
+ * which allocator the request should be made against. It then allocates into
+ * its local current segment and bumps the next_free pointer to point to the
+ * next unallocated block (as indicated by the bitmap). If it finds the current
+ * segment is now full it moves it to the filled list and looks for a new
+ * segment to make current from a few sources:
+ *
+ * 1. the allocator's active list (see pop_active_segment)
+ * 2. the nonmoving heap's free block pool (see nonmovingPopFreeSegment)
+ * 3. allocate a new segment from the block allocator (see
+ * nonmovingAllocSegment)
+ *
+ * Note that allocation does *not* involve modifying the bitmap. The bitmap is
+ * only modified by the collector.
+ *
+ *
+ * === Snapshot invariant ===
+ *
+ * To safely collect in a concurrent setting, the collector relies on the
+ * notion of a *snapshot*. The snapshot is a hypothetical frozen state of the
+ * heap topology taken at the beginning of the major collection cycle.
+ * With this definition we require the following property of the mark phase,
+ * which we call the *snapshot invariant*,
+ *
+ * All objects that were reachable at the time the snapshot was collected
+ * must have their mark bits set at the end of the mark phase.
+ *
+ * As the mutator might change the topology of the heap while we are marking
+ * this property requires some cooperation from the mutator to maintain.
+ * Specifically, we rely on a write barrier as described in Note [Update
+ * remembered set].
+ *
+ * To determine which objects were existent when the snapshot was taken we
+ * record a snapshot of each segments next_free pointer at the beginning of
+ * collection.
+ *
+ *
+ * === Collection ===
+ *
+ * Collection happens in a few phases some of which occur during a
+ * stop-the-world period (marked with [STW]) and others which can occur
+ * concurrently with mutation and minor collection (marked with [CONC]):
+ *
+ * 1. [STW] Preparatory GC: Here we do a standard minor collection of the
+ * younger generations (which may evacuate things to the nonmoving heap).
+ * References from younger generations into the nonmoving heap are recorded
+ * in the mark queue (see Note [Aging under the non-moving collector] in
+ * this file).
+ *
+ * 2. [STW] Snapshot update: Here we update the segment snapshot metadata
+ * (see nonmovingPrepareMark) and move the filled segments to
+ * nonmovingHeap.sweep_list, which is the set of segments which we will
+ * sweep this GC cycle.
+ *
+ * 3. [STW] Root collection: Here we walk over a variety of root sources
+ * and add them to the mark queue (see nonmovingCollect).
+ *
+ * 4. [CONC] Concurrent marking: Here we do the majority of marking concurrently
+ * with mutator execution (but with the write barrier enabled; see
+ * Note [Update remembered set]).
+ *
+ * 5. [STW] Final sync: Here we interrupt the mutators, ask them to
+ * flush their final update remembered sets, and mark any new references
+ * we find.
+ *
+ * 6. [CONC] Sweep: Here we walk over the nonmoving segments on sweep_list
+ * and place them back on either the active, current, or filled list,
+ * depending upon how much live data they contain.
+ *
+ *
+ * === Marking ===
+ *
+ * Ignoring large and static objects, marking a closure is fairly
+ * straightforward (implemented in NonMovingMark.c:mark_closure):
+ *
+ * 1. Check whether the closure is in the non-moving generation; if not then
+ * we ignore it.
+ * 2. Find the segment containing the closure's block.
+ * 3. Check whether the closure's block is above $seg->next_free_snap; if so
+ * then the block was not allocated when we took the snapshot and therefore
+ * we don't need to mark it.
+ * 4. Check whether the block's bitmap bits is equal to nonmovingMarkEpoch. If
+ * so then we can stop as we have already marked it.
+ * 5. Push the closure's pointers to the mark queue.
+ * 6. Set the blocks bitmap bits to nonmovingMarkEpoch.
+ *
+ * Note that the ordering of (5) and (6) is rather important, as described in
+ * Note [StgStack dirtiness flags and concurrent marking].
+ *
+ *
+ * === Other references ===
+ *
+ * Apart from the design document in docs/storage/nonmoving-gc and the Ueno
+ * 2016 paper (TODO citation) from which it drew inspiration, there are a
+ * variety of other relevant Notes scattered throughout the tree:
+ *
+ * - Note [Concurrent non-moving collection] (NonMoving.c) describes
+ * concurrency control of the nonmoving collector
+ *
+ * - Note [Live data accounting in nonmoving collector] (NonMoving.c)
+ * describes how we track the quantity of live data in the nonmoving
+ * generation.
+ *
+ * - Note [Aging under the non-moving collector] (NonMoving.c) describes how
+ * we accomodate aging
+ *
+ * - Note [Large objects in the non-moving collector] (NonMovingMark.c)
+ * describes how we track large objects.
+ *
+ * - Note [Update remembered set] (NonMovingMark.c) describes the function and
+ * implementation of the update remembered set used to realize the concurrent
+ * write barrier.
+ *
+ * - Note [Concurrent read barrier on deRefWeak#] (NonMovingMark.c) describes
+ * the read barrier on Weak# objects.
+ *
+ * - Note [Unintentional marking in resurrectThreads] (NonMovingMark.c) describes
+ * a tricky interaction between the update remembered set flush and weak
+ * finalization.
+ *
+ * - Note [Origin references in the nonmoving collector] (NonMovingMark.h)
+ * describes how we implement indirection short-cutting and the selector
+ * optimisation.
+ *
+ * - Note [StgStack dirtiness flags and concurrent marking] (TSO.h) describes
+ * the protocol for concurrent marking of stacks.
+ *
+ * - Note [Static objects under the nonmoving collector] (Storage.c) describes
+ * treatment of static objects.
*
- * TODO
*
* Note [Concurrent non-moving collection]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
index 2a553f0a0d..b3d4e14065 100644
--- a/rts/sm/NonMoving.h
+++ b/rts/sm/NonMoving.h
@@ -312,6 +312,11 @@ INLINE_HEADER bool nonmovingClosureBeingSwept(StgClosure *p)
}
}
+INLINE_HEADER bool isNonmovingClosure(StgClosure *p)
+{
+ return !HEAP_ALLOCED_GC(p) || Bdescr((P_)p)->flags & BF_NONMOVING;
+}
+
#if defined(DEBUG)
void nonmovingPrintSegment(struct NonmovingSegment *seg);
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 0c91befcd6..54398a7f3c 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -11,6 +11,7 @@
// This is sometimes declared as a register variable therefore it is necessary
// to include the declaration so that the compiler doesn't clobber the register.
#include "NonMovingMark.h"
+#include "NonMovingShortcut.h"
#include "NonMoving.h"
#include "BlockAlloc.h" /* for countBlocks */
#include "HeapAlloc.h"
@@ -24,7 +25,7 @@
#include "MarkWeak.h"
#include "sm/Storage.h"
-static void mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin);
+static void mark_closure (MarkQueue *queue, const StgClosure *p, StgClosure **origin);
static void mark_tso (MarkQueue *queue, StgTSO *tso);
static void mark_stack (MarkQueue *queue, StgStack *stack);
static void mark_PAP_payload (MarkQueue *queue,
@@ -119,6 +120,9 @@ StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
*
* - In the code generated by the STG code generator for pointer array writes
*
+ * - In thunk updates (e.g. updateWithIndirection) to ensure that the free
+ * variables of the original thunk remain reachable.
+ *
* There is also a read barrier to handle weak references, as described in
* Note [Concurrent read barrier on deRefWeak#].
*
@@ -479,7 +483,7 @@ void push_closure (MarkQueue *q,
MarkQueueEnt ent = {
.mark_closure = {
- .p = UNTAG_CLOSURE(p),
+ .p = p,
.origin = origin,
}
};
@@ -559,11 +563,19 @@ inline void updateRemembSetPushThunk(Capability *cap, StgThunk *thunk)
updateRemembSetPushThunkEager(cap, (StgThunkInfoTable *) info, thunk);
}
+/* Push the free variables of a thunk to the update remembered set.
+ * This is called by the thunk update code (e.g. updateWithIndirection) before
+ * we update the indirectee to ensure that the thunk's free variables remain
+ * visible to the concurrent collector.
+ *
+ * See Note [Update rememembered set].
+ */
void updateRemembSetPushThunkEager(Capability *cap,
const StgThunkInfoTable *info,
StgThunk *thunk)
{
/* N.B. info->i.type mustn't be WHITEHOLE */
+ MarkQueue *queue = &cap->upd_rem_set.queue;
switch (info->i.type) {
case THUNK:
case THUNK_1_0:
@@ -572,7 +584,6 @@ void updateRemembSetPushThunkEager(Capability *cap,
case THUNK_1_1:
case THUNK_0_2:
{
- MarkQueue *queue = &cap->upd_rem_set.queue;
push_thunk_srt(queue, &info->i);
for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) {
@@ -586,7 +597,6 @@ void updateRemembSetPushThunkEager(Capability *cap,
}
case AP:
{
- MarkQueue *queue = &cap->upd_rem_set.queue;
StgAP *ap = (StgAP *) thunk;
if (check_in_nonmoving_heap(ap->fun)) {
push_closure(queue, ap->fun, NULL);
@@ -598,6 +608,16 @@ void updateRemembSetPushThunkEager(Capability *cap,
case BLACKHOLE:
// TODO: This is right, right?
break;
+ // The selector optimization performed by the nonmoving mark may have
+ // overwritten a thunk which we are updating with an indirection.
+ case IND:
+ {
+ StgInd *ind = (StgInd *) thunk;
+ if (check_in_nonmoving_heap(ind->indirectee)) {
+ push_closure(queue, ind->indirectee, NULL);
+ }
+ break;
+ }
default:
barf("updateRemembSetPushThunk: invalid thunk pushed: p=%p, type=%d",
thunk, info->i.type);
@@ -1130,13 +1150,15 @@ bump_static_flag(StgClosure **link_field, StgClosure *q STG_UNUSED)
}
static GNUC_ATTR_HOT void
-mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
+mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
{
- (void)origin; // TODO: should be used for selector/thunk optimisations
+ StgClosure *p = (StgClosure*)p0;
try_again:
;
bdescr *bd = NULL;
+ StgClosure *p_next = NULL;
+ StgWord tag = GET_CLOSURE_TAG(p);
p = UNTAG_CLOSURE(p);
# define PUSH_FIELD(obj, field) \
@@ -1162,7 +1184,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above
}
}
- return;
+ goto done;
case FUN_STATIC:
if (info->srt != 0 || info->layout.payload.ptrs != 0) {
@@ -1178,13 +1200,13 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
}
}
- return;
+ goto done;
case IND_STATIC:
if (bump_static_flag(IND_STATIC_LINK((StgClosure *)p), p)) {
PUSH_FIELD((StgInd *) p, indirectee);
}
- return;
+ goto done;
case CONSTR:
case CONSTR_1_0:
@@ -1195,7 +1217,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
PUSH_FIELD(p, payload[i]);
}
}
- return;
+ goto done;
case WHITEHOLE:
while (get_volatile_itbl(p)->type == WHITEHOLE);
@@ -1217,7 +1239,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
//
// * a mutable object might have been updated
// * we might have aged an object
- return;
+ goto done;
}
ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info);
@@ -1229,10 +1251,10 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
if (bd->flags & BF_LARGE) {
if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
// Not in the snapshot
- return;
+ goto done;
}
if (bd->flags & BF_MARKED) {
- return;
+ goto done;
}
// Mark contents
@@ -1248,7 +1270,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
uint8_t mark = nonmovingGetMark(seg, block_idx);
/* Don't mark things we've already marked (since we may loop) */
if (mark == nonmovingMarkEpoch)
- return;
+ goto done;
StgClosure *snapshot_loc =
(StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
@@ -1259,7 +1281,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
* things above the allocation pointer that aren't marked since
* they may not be valid objects.
*/
- return;
+ goto done;
}
}
}
@@ -1277,7 +1299,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
ASSERT(found_it);
#endif
- return;
+ return; // we don't update origin here! TODO(osa): explain this
}
else {
@@ -1409,10 +1431,24 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
- case IND:
- case BLACKHOLE:
+ case IND: {
PUSH_FIELD((StgInd *) p, indirectee);
+ if (origin != NULL) {
+ p_next = ((StgInd*)p)->indirectee;
+ }
break;
+ }
+
+ case BLACKHOLE: {
+ PUSH_FIELD((StgInd *) p, indirectee);
+ StgClosure *indirectee = ((StgInd*)p)->indirectee;
+ if (GET_CLOSURE_TAG(indirectee) == 0 || origin == NULL) {
+ // do nothing
+ } else {
+ p_next = indirectee;
+ }
+ break;
+ }
case MUT_VAR_CLEAN:
case MUT_VAR_DIRTY:
@@ -1429,8 +1465,11 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
case THUNK_SELECTOR:
- PUSH_FIELD((StgSelector *) p, selectee);
- // TODO: selector optimization
+ if (RtsFlags.GcFlags.nonmovingSelectorOpt) {
+ nonmoving_eval_thunk_selector(queue, (StgSelector*)p, origin);
+ } else {
+ PUSH_FIELD((StgSelector *) p, selectee);
+ }
break;
case AP_STACK: {
@@ -1499,7 +1538,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
// the stack will be fully marked before we sweep due to the final
// post-mark synchronization. Most importantly, we do not set its
// mark bit, the mutator is responsible for this.
- return;
+ goto done;
}
break;
}
@@ -1558,13 +1597,28 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
bd->flags |= BF_MARKED;
}
RELEASE_LOCK(&nonmoving_large_objects_mutex);
- } else {
+ } else if (bd->flags & BF_NONMOVING) {
// TODO: Kill repetition
struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
nonmovingSetMark(seg, block_idx);
nonmoving_live_words += nonmovingSegmentBlockSize(seg) / sizeof(W_);
}
+
+ // If we found a indirection to shortcut keep going.
+ if (p_next) {
+ p = p_next;
+ goto try_again;
+ }
+
+done:
+ if (origin != NULL && (!HEAP_ALLOCED(p) || bd->flags & BF_NONMOVING)) {
+ if (UNTAG_CLOSURE((StgClosure*)p0) != p && *origin == p0) {
+ if (cas((StgVolatilePtr)origin, (StgWord)p0, (StgWord)TAG_CLOSURE(tag, p)) == (StgWord)p0) {
+ // debugBelch("Thunk optimization successful\n");
+ }
+ }
+ }
}
/* This is the main mark loop.
diff --git a/rts/sm/NonMovingShortcut.c b/rts/sm/NonMovingShortcut.c
new file mode 100644
index 0000000000..83c4857677
--- /dev/null
+++ b/rts/sm/NonMovingShortcut.c
@@ -0,0 +1,326 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2019
+ *
+ * Non-moving garbage collector and allocator:
+ * Indirection short-cutting and the selector optimisation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "GC.h"
+#include "SMPClosureOps.h"
+#include "NonMovingMark.h"
+#include "NonMovingShortcut.h"
+#include "Printer.h"
+
+#define MAX_THUNK_SELECTOR_DEPTH 16
+
+//#define SELECTOR_OPT_DEBUG
+
+#if defined(SELECTOR_OPT_DEBUG)
+static void
+print_selector_chain(StgClosure *p)
+{
+ debugBelch("Selector chain: %p", (void*)p);
+ StgClosure *next = p->payload[0];
+ while (next != NULL) {
+ debugBelch(", %p", next);
+ next = next->payload[0];
+ }
+ debugBelch("\n");
+}
+#endif
+
+static void
+update_selector_chain(
+ StgClosure *chain,
+ StgClosure **origin,
+ StgSelector * const p0,
+ StgClosure * const val
+) {
+ ASSERT(val != NULL);
+
+ // Make sure we don't introduce non-moving-to-moving pointers here.
+ ASSERT(isNonmovingClosure(val));
+
+ // This case we can't handle because we don't know info ptr of the closure
+ // before we locked it.
+ ASSERT(chain != val);
+
+#if defined(SELECTOR_OPT_DEBUG)
+ if (chain != NULL) {
+ print_selector_chain(chain);
+ debugBelch("Value: ");
+ printClosure(val);
+ }
+#endif
+
+ while (chain) {
+ // debugBelch("chain: %p\n", (void*)chain);
+
+ StgClosure *next = chain->payload[0];
+
+ // We only update closures in the non-moving heap
+ ASSERT(isNonmovingClosure(chain));
+
+ ((StgInd*)chain)->indirectee = val;
+ unlockClosure((StgClosure*)chain, &stg_IND_info);
+
+ chain = next;
+ }
+
+ if (origin != NULL && (StgClosure*)p0 != val) {
+ cas((StgVolatilePtr)origin, (StgWord)p0, (StgWord)val);
+ }
+}
+
+// Returns value of the selector thunk. The value is a non-moving closure. If
+// it's not possible to evaluate the selector thunk the return value will be the
+// selector itself.
+static StgClosure*
+nonmoving_eval_thunk_selector_(
+ MarkQueue *queue,
+ StgSelector * const p0,
+ StgClosure ** const origin,
+ int depth
+) {
+ // This function should only be called on non-moving objects.
+ ASSERT(HEAP_ALLOCED_GC((P_)p0) && isNonmovingClosure((StgClosure*)p0));
+
+ markQueuePushClosure(queue, (StgClosure*)p0, NULL);
+
+ // INVARIANT: A non-moving object. Locked (below).
+ StgClosure *p = (StgClosure*)p0;
+
+ // Chain of non-moving selectors to update. These will be INDs to `p` when
+ // we reach to a value. INVARIANT: All objects in the chain are locked, and
+ // in the non-moving heap.
+ StgClosure *chain = NULL;
+
+ // Variables to update: p.
+selector_changed:
+ ;
+
+ // debugBelch("Selector changed: %p\n", (void*)p);
+
+ // Lock the selector to avoid concurrent modification in mutators
+ const StgInfoTable *selector_info_ptr = lockClosure((StgClosure*)p);
+ StgInfoTable *selector_info_tbl = INFO_PTR_TO_STRUCT(selector_info_ptr);
+
+ if (selector_info_tbl->type != THUNK_SELECTOR) {
+ // Selector updated in the meantime, or we reached to a value. Update
+ // the chain.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // The closure is locked and it's a selector thunk. If the selectee is a
+ // CONSTR we do the selection here and the In the selected value will be the
+ // value of this selector thunk.
+ //
+ // Two cases:
+ //
+ // - If the selected value is also a selector thunk, then we loop and
+ // evaluate it. The final value will be the value of both the current
+ // selector and the selected value (which is also a selector thunk).
+ //
+ // - If the selectee is a selector thunk, we recursively evaluate it (up to
+ // a certain depth, specified with MAX_THUNK_SELECTOR_DEPTH), then do the
+ // selection on the value of it.
+
+ //
+ // Do the selection
+ //
+
+ uint32_t field = selector_info_tbl->layout.selector_offset;
+ StgClosure *selectee = UNTAG_CLOSURE(((StgSelector*)p)->selectee);
+
+ // Variables to update: selectee
+selectee_changed:
+ // debugBelch("Selectee changed: %p\n", (void*)selectee);
+
+ if (!isNonmovingClosure(selectee)) {
+ // The selectee is a moving object, and it may be moved by a concurrent
+ // minor GC while we read the info table and fields, so don't try to
+ // read the fields, just update the chain.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // Selectee is a non-moving object, mark it.
+ markQueuePushClosure(queue, selectee, NULL);
+
+ const StgInfoTable *selectee_info_tbl = get_itbl(selectee);
+ switch (selectee_info_tbl->type) {
+ case WHITEHOLE: {
+ // Probably a loop. Abort.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_NOCAF: {
+ // Selectee is a constructor in the non-moving heap.
+ // Select the field.
+
+ // Check that the size is in range.
+ ASSERT(field < (StgWord32)(selectee_info_tbl->layout.payload.ptrs +
+ selectee_info_tbl->layout.payload.nptrs));
+
+ StgClosure *val = UNTAG_CLOSURE(selectee->payload[field]);
+
+ // `val` is the value of this selector thunk. We need to check a
+ // few cases:
+ //
+ // - If `val` is in the moving heap, we stop here and update the
+ // chain. All updated objects should be added to the mut_list.
+ // (TODO (osa): What happens if the value is evacuated as we do
+ // this?)
+ //
+ // - If `val` is in the non-moving heap, we check if it's also a
+ // selector. If it is we add it to the chain and loop.
+
+ // Follow indirections. Variables to update: `val`.
+ val_changed:
+ if (!isNonmovingClosure(val)) {
+ // The selected value is a moving object, so we won't be
+ // updating the chain to this object.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ switch (get_itbl(val)->type) {
+ case IND:
+ case IND_STATIC:
+ ;
+ // Follow the indirection
+ StgClosure *indirectee = UNTAG_CLOSURE(((StgInd*)val)->indirectee);
+ if (isNonmovingClosure(indirectee)) {
+ val = UNTAG_CLOSURE(((StgInd*)val)->indirectee);
+ goto val_changed;
+ } else {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+ case THUNK_SELECTOR:
+ // Value of the selector thunk is again a selector thunk in the
+ // non-moving heap. Add the current selector to the chain and
+ // loop.
+ p->payload[0] = chain;
+ chain = p;
+ p = val;
+ goto selector_changed;
+ default:
+ // Found a value, add the current selector to the chain and
+ // update it.
+ p->payload[0] = chain;
+ chain = p;
+ update_selector_chain(chain, origin, p0, val);
+ return val;
+ }
+ }
+
+ case IND:
+ case IND_STATIC: {
+ StgClosure *indirectee = UNTAG_CLOSURE(((StgInd *)selectee)->indirectee);
+ if (isNonmovingClosure(indirectee)) {
+ selectee = indirectee;
+ goto selectee_changed;
+ } else {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+ }
+
+ case BLACKHOLE: {
+ StgClosure *indirectee = ((StgInd*)selectee)->indirectee;
+
+ if (!isNonmovingClosure(UNTAG_CLOSURE(indirectee))) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // Establish whether this BH has been updated, and is now an
+ // indirection, as in evacuate().
+ if (GET_CLOSURE_TAG(indirectee) == 0) {
+ const StgInfoTable *i = indirectee->header.info;
+ if (i == &stg_TSO_info
+ || i == &stg_WHITEHOLE_info
+ || i == &stg_BLOCKING_QUEUE_CLEAN_info
+ || i == &stg_BLOCKING_QUEUE_DIRTY_info) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+ ASSERT(i != &stg_IND_info); // TODO not sure about this part
+ }
+
+ // It's an indirection, follow it.
+ selectee = UNTAG_CLOSURE(indirectee);
+ goto selectee_changed;
+ }
+
+ case AP:
+ case AP_STACK:
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_STATIC: {
+ // Not evaluated yet
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+
+ case THUNK_SELECTOR: {
+ // Selectee is a selector thunk. Evaluate it if we haven't reached
+ // the recursion limit yet.
+ if (depth < MAX_THUNK_SELECTOR_DEPTH) {
+ StgClosure *new_selectee =
+ UNTAG_CLOSURE(nonmoving_eval_thunk_selector_(
+ queue, (StgSelector*)selectee, NULL, depth+1));
+ ASSERT(isNonmovingClosure(new_selectee));
+ if (selectee == new_selectee) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ } else {
+ selectee = new_selectee;
+ goto selectee_changed;
+ }
+ } else {
+ // Recursion limit reached
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+ }
+
+ default: {
+ barf("nonmoving_eval_thunk_selector: strange selectee %d",
+ (int)(selectee_info_tbl->type));
+ }
+ }
+}
+
+void
+nonmoving_eval_thunk_selector(MarkQueue *queue, StgSelector *p, StgClosure **origin)
+{
+ nonmoving_eval_thunk_selector_(queue, p, origin, 0);
+}
diff --git a/rts/sm/NonMovingShortcut.h b/rts/sm/NonMovingShortcut.h
new file mode 100644
index 0000000000..72297884aa
--- /dev/null
+++ b/rts/sm/NonMovingShortcut.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2019
+ *
+ * Non-moving garbage collector and allocator:
+ * Indirection short-cutting and the selector optimisation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "BeginPrivate.h"
+
+void
+nonmoving_eval_thunk_selector(MarkQueue *queue, StgSelector *p, StgClosure **origin);
+
+#include "EndPrivate.h"