summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/rts/Flags.h4
-rw-r--r--includes/rts/storage/Block.h28
-rw-r--r--rts/RtsFlags.c5
-rw-r--r--rts/rts.cabal.in1
-rw-r--r--rts/sm/NonMoving.c22
-rw-r--r--rts/sm/NonMoving.h45
-rw-r--r--rts/sm/NonMovingMark.c94
-rw-r--r--rts/sm/NonMovingShortcut.c326
-rw-r--r--rts/sm/NonMovingShortcut.h17
-rw-r--r--rts/sm/NonMovingSweep.c4
-rw-r--r--rts/sm/Sanity.c2
11 files changed, 487 insertions, 61 deletions
diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h
index 9a039fd95c..f27ce23b0b 100644
--- a/includes/rts/Flags.h
+++ b/includes/rts/Flags.h
@@ -52,7 +52,9 @@ typedef struct _GC_FLAGS {
double oldGenFactor;
double pcFreeHeap;
- bool useNonmoving;
+ bool useNonmoving; // default = false
+ bool nonmovingSelectorOpt; // Do selector optimization in the
+ // non-moving heap, default = false
uint32_t generations;
bool squeezeUpdFrames;
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h
index 32cf98958f..4afc3689cb 100644
--- a/includes/rts/storage/Block.h
+++ b/includes/rts/storage/Block.h
@@ -88,17 +88,23 @@ typedef struct bdescr_ {
StgPtr start; // [READ ONLY] start addr of memory
- StgPtr free; // First free byte of memory.
- // allocGroup() sets this to the value of start.
- // NB. during use this value should lie
- // between start and start + blocks *
- // BLOCK_SIZE. Values outside this
- // range are reserved for use by the
- // block allocator. In particular, the
- // value (StgPtr)(-1) is used to
- // indicate that a block is unallocated.
- //
- // Unused by the non-moving allocator.
+ union {
+ StgPtr free; // First free byte of memory.
+ // allocGroup() sets this to the value of start.
+ // NB. during use this value should lie
+ // between start and start + blocks *
+ // BLOCK_SIZE. Values outside this
+ // range are reserved for use by the
+ // block allocator. In particular, the
+ // value (StgPtr)(-1) is used to
+ // indicate that a block is unallocated.
+ //
+ // Unused by the non-moving allocator.
+ struct NonmovingSegmentInfo {
+ StgWord8 log_block_size;
+ StgWord16 next_free_snap;
+ } nonmoving_segment;
+ };
struct bdescr_ *link; // used for chaining blocks together
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index c606d86418..0e28b980ac 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -157,6 +157,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.GcFlags.pcFreeHeap = 3; /* 3% */
RtsFlags.GcFlags.oldGenFactor = 2;
RtsFlags.GcFlags.useNonmoving = false;
+ RtsFlags.GcFlags.nonmovingSelectorOpt = false;
RtsFlags.GcFlags.generations = 2;
RtsFlags.GcFlags.squeezeUpdFrames = true;
RtsFlags.GcFlags.compact = false;
@@ -1542,6 +1543,10 @@ error = true;
OPTION_SAFE;
RtsFlags.GcFlags.useNonmoving = true;
unchecked_arg_start++;
+ if (rts_argv[arg][3] == 's') {
+ RtsFlags.GcFlags.nonmovingSelectorOpt = true;
+ unchecked_arg_start++;
+ }
break;
case 'c': /* Debugging tool: show current cost centre on
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index a53c166849..1968f2f693 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -470,6 +470,7 @@ library
sm/NonMovingCensus.c
sm/NonMovingMark.c
sm/NonMovingScav.c
+ sm/NonMovingShortcut.c
sm/NonMovingSweep.c
sm/Sanity.c
sm/Scav.c
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index b47dca1595..8174fa754d 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -377,15 +377,16 @@ static void* nonmovingConcurrentMark(void *mark_queue);
static void nonmovingClearBitmap(struct NonmovingSegment *seg);
static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO **resurrected_threads);
-static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t block_size)
+static void nonmovingInitSegment(struct NonmovingSegment *seg, uint8_t log_block_size)
{
+ bdescr *bd = Bdescr((P_) seg);
seg->link = NULL;
seg->todo_link = NULL;
seg->next_free = 0;
- seg->next_free_snap = 0;
- seg->block_size = block_size;
nonmovingClearBitmap(seg);
- Bdescr((P_)seg)->u.scan = nonmovingSegmentGetBlock(seg, 0);
+ bd->nonmoving_segment.log_block_size = log_block_size;
+ bd->nonmoving_segment.next_free_snap = 0;
+ bd->u.scan = nonmovingSegmentGetBlock(seg, 0);
}
// Add a segment to the free list.
@@ -560,7 +561,7 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
// Update live data estimate.
// See Note [Live data accounting in nonmoving collector].
- unsigned int new_blocks = block_count - current->next_free_snap;
+ unsigned int new_blocks = block_count - nonmovingSegmentInfo(current)->next_free_snap;
unsigned int block_size = 1 << log_block_size;
atomic_inc(&oldest_gen->live_estimate, new_blocks * block_size / sizeof(W_));
@@ -704,7 +705,7 @@ static void nonmovingPrepareMark(void)
// Update current segments' snapshot pointers
for (uint32_t cap_n = 0; cap_n < n_capabilities; ++cap_n) {
struct NonmovingSegment *seg = alloca->current[cap_n];
- seg->next_free_snap = seg->next_free;
+ nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
}
// Update filled segments' snapshot pointers and move to sweep_list
@@ -720,7 +721,7 @@ static void nonmovingPrepareMark(void)
prefetchForWrite(seg->link->bitmap);
nonmovingClearBitmap(seg);
// Set snapshot
- seg->next_free_snap = seg->next_free;
+ nonmovingSegmentInfo(seg)->next_free_snap = seg->next_free;
if (seg->link)
seg = seg->link;
else
@@ -1152,12 +1153,13 @@ void assert_in_nonmoving_heap(StgPtr p)
void nonmovingPrintSegment(struct NonmovingSegment *seg)
{
int num_blocks = nonmovingSegmentBlockCount(seg);
+ uint8_t log_block_size = nonmovingSegmentLogBlockSize(seg);
debugBelch("Segment with %d blocks of size 2^%d (%d bytes, %u words, scan: %p)\n",
num_blocks,
- seg->block_size,
- 1 << seg->block_size,
- (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << seg->block_size),
+ log_block_size,
+ 1 << log_block_size,
+ (unsigned int) ROUNDUP_BYTES_TO_WDS(1 << log_block_size),
(void*)Bdescr((P_)seg)->u.scan);
for (nonmoving_block_idx p_idx = 0; p_idx < seg->next_free; ++p_idx) {
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
index 17adfd6666..b3d4e14065 100644
--- a/rts/sm/NonMoving.h
+++ b/rts/sm/NonMoving.h
@@ -38,13 +38,21 @@ struct NonmovingSegment {
struct NonmovingSegment *link; // for linking together segments into lists
struct NonmovingSegment *todo_link; // NULL when not in todo list
nonmoving_block_idx next_free; // index of the next unallocated block
- nonmoving_block_idx next_free_snap; // snapshot of next_free
- uint8_t block_size; // log2 of block size in bytes
uint8_t bitmap[]; // liveness bitmap
// After the liveness bitmap comes the data blocks. Note that we need to
// ensure that the size of this struct (including the bitmap) is a multiple
// of the word size since GHC assumes that all object pointers are
// so-aligned.
+
+ // N.B. There are also bits of information which are stored in the
+ // NonmovingBlockInfo stored in the segment's block descriptor. Namely:
+ //
+ // * the block size can be found in nonmovingBlockInfo(seg)->log_block_size.
+ // * the next_free snapshot can be found in
+ // nonmovingBlockInfo(seg)->next_free_snap.
+ //
+ // This allows us to mark a nonmoving closure without bringing the
+ // NonmovingSegment header into cache.
};
// This is how we mark end of todo lists. Not NULL because todo_link == NULL
@@ -123,11 +131,20 @@ void *nonmovingAllocate(Capability *cap, StgWord sz);
void nonmovingAddCapabilities(uint32_t new_n_caps);
void nonmovingPushFreeSegment(struct NonmovingSegment *seg);
+
+INLINE_HEADER struct NonmovingSegmentInfo *nonmovingSegmentInfo(struct NonmovingSegment *seg) {
+ return &Bdescr((StgPtr) seg)->nonmoving_segment;
+}
+
+INLINE_HEADER uint8_t nonmovingSegmentLogBlockSize(struct NonmovingSegment *seg) {
+ return nonmovingSegmentInfo(seg)->log_block_size;
+}
+
// Add a segment to the appropriate active list.
INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg)
{
struct NonmovingAllocator *alloc =
- nonmovingHeap.allocators[seg->block_size - NONMOVING_ALLOCA0];
+ nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
while (true) {
struct NonmovingSegment *current_active = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->active);
seg->link = current_active;
@@ -141,7 +158,7 @@ INLINE_HEADER void nonmovingPushActiveSegment(struct NonmovingSegment *seg)
INLINE_HEADER void nonmovingPushFilledSegment(struct NonmovingSegment *seg)
{
struct NonmovingAllocator *alloc =
- nonmovingHeap.allocators[seg->block_size - NONMOVING_ALLOCA0];
+ nonmovingHeap.allocators[nonmovingSegmentLogBlockSize(seg) - NONMOVING_ALLOCA0];
while (true) {
struct NonmovingSegment *current_filled = (struct NonmovingSegment*)VOLATILE_LOAD(&alloc->filled);
seg->link = current_filled;
@@ -162,7 +179,7 @@ void assert_in_nonmoving_heap(StgPtr p);
// The block size of a given segment in bytes.
INLINE_HEADER unsigned int nonmovingSegmentBlockSize(struct NonmovingSegment *seg)
{
- return 1 << seg->block_size;
+ return 1 << nonmovingSegmentLogBlockSize(seg);
}
// How many blocks does a segment with the given block size have?
@@ -180,7 +197,7 @@ unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size);
// How many blocks does the given segment contain? Also the size of the bitmap.
INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg)
{
- return nonmovingBlockCountFromSize(seg->block_size);
+ return nonmovingBlockCountFromSize(nonmovingSegmentLogBlockSize(seg));
}
// Get a pointer to the given block index assuming that the block size is as
@@ -188,7 +205,7 @@ INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *s
// available). The log_block_size argument must be equal to seg->block_size.
INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint8_t log_block_size, nonmoving_block_idx i)
{
- ASSERT(log_block_size == seg->block_size);
+ ASSERT(log_block_size == nonmovingSegmentLogBlockSize(seg));
// Block size in bytes
unsigned int blk_size = 1 << log_block_size;
// Bitmap size in bytes
@@ -204,7 +221,7 @@ INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint
// Get a pointer to the given block index.
INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
{
- return nonmovingSegmentGetBlock_(seg, seg->block_size, i);
+ return nonmovingSegmentGetBlock_(seg, nonmovingSegmentLogBlockSize(seg), i);
}
// Get the segment which a closure resides in. Assumes that pointer points into
@@ -227,7 +244,7 @@ INLINE_HEADER nonmoving_block_idx nonmovingGetBlockIdx(StgPtr p)
struct NonmovingSegment *seg = nonmovingGetSegment(p);
ptrdiff_t blk0 = (ptrdiff_t)nonmovingSegmentGetBlock(seg, 0);
ptrdiff_t offset = (ptrdiff_t)p - blk0;
- return (nonmoving_block_idx) (offset >> seg->block_size);
+ return (nonmoving_block_idx) (offset >> nonmovingSegmentLogBlockSize(seg));
}
// TODO: Eliminate this
@@ -268,8 +285,9 @@ INLINE_HEADER bool nonmovingClosureMarked(StgPtr p)
// segment is in the set of segments that will be swept this collection cycle.
INLINE_HEADER bool nonmovingSegmentBeingSwept(struct NonmovingSegment *seg)
{
- unsigned int n = nonmovingSegmentBlockCount(seg);
- return seg->next_free_snap >= n;
+ struct NonmovingSegmentInfo *seginfo = nonmovingSegmentInfo(seg);
+ unsigned int n = nonmovingBlockCountFromSize(seginfo->log_block_size);
+ return seginfo->next_free_snap >= n;
}
// Can be called during a major collection to determine whether a particular
@@ -294,6 +312,11 @@ INLINE_HEADER bool nonmovingClosureBeingSwept(StgClosure *p)
}
}
+INLINE_HEADER bool isNonmovingClosure(StgClosure *p)
+{
+ return !HEAP_ALLOCED_GC(p) || Bdescr((P_)p)->flags & BF_NONMOVING;
+}
+
#if defined(DEBUG)
void nonmovingPrintSegment(struct NonmovingSegment *seg);
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 8dca11a417..54398a7f3c 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -11,6 +11,7 @@
// This is sometimes declared as a register variable therefore it is necessary
// to include the declaration so that the compiler doesn't clobber the register.
#include "NonMovingMark.h"
+#include "NonMovingShortcut.h"
#include "NonMoving.h"
#include "BlockAlloc.h" /* for countBlocks */
#include "HeapAlloc.h"
@@ -24,7 +25,7 @@
#include "MarkWeak.h"
#include "sm/Storage.h"
-static void mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin);
+static void mark_closure (MarkQueue *queue, const StgClosure *p, StgClosure **origin);
static void mark_tso (MarkQueue *queue, StgTSO *tso);
static void mark_stack (MarkQueue *queue, StgStack *stack);
static void mark_PAP_payload (MarkQueue *queue,
@@ -482,7 +483,7 @@ void push_closure (MarkQueue *q,
MarkQueueEnt ent = {
.mark_closure = {
- .p = UNTAG_CLOSURE(p),
+ .p = p,
.origin = origin,
}
};
@@ -574,6 +575,7 @@ void updateRemembSetPushThunkEager(Capability *cap,
StgThunk *thunk)
{
/* N.B. info->i.type mustn't be WHITEHOLE */
+ MarkQueue *queue = &cap->upd_rem_set.queue;
switch (info->i.type) {
case THUNK:
case THUNK_1_0:
@@ -582,7 +584,6 @@ void updateRemembSetPushThunkEager(Capability *cap,
case THUNK_1_1:
case THUNK_0_2:
{
- MarkQueue *queue = &cap->upd_rem_set.queue;
push_thunk_srt(queue, &info->i);
for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) {
@@ -596,7 +597,6 @@ void updateRemembSetPushThunkEager(Capability *cap,
}
case AP:
{
- MarkQueue *queue = &cap->upd_rem_set.queue;
StgAP *ap = (StgAP *) thunk;
if (check_in_nonmoving_heap(ap->fun)) {
push_closure(queue, ap->fun, NULL);
@@ -608,6 +608,16 @@ void updateRemembSetPushThunkEager(Capability *cap,
case BLACKHOLE:
// TODO: This is right, right?
break;
+ // The selector optimization performed by the nonmoving mark may have
+ // overwritten a thunk which we are updating with an indirection.
+ case IND:
+ {
+ StgInd *ind = (StgInd *) thunk;
+ if (check_in_nonmoving_heap(ind->indirectee)) {
+ push_closure(queue, ind->indirectee, NULL);
+ }
+ break;
+ }
default:
barf("updateRemembSetPushThunk: invalid thunk pushed: p=%p, type=%d",
thunk, info->i.type);
@@ -819,7 +829,7 @@ static MarkQueueEnt markQueuePop (MarkQueue *q)
// MarkQueueEnt encoding always places the pointer to the object to be
// marked first.
prefetchForRead(&new.mark_closure.p->header.info);
- prefetchForRead(&nonmovingGetSegment_unchecked((StgPtr) new.mark_closure.p)->block_size);
+ prefetchForRead(Bdescr((StgPtr) new.mark_closure.p));
q->prefetch_queue[i] = new;
i = (i + 1) % MARK_PREFETCH_QUEUE_DEPTH;
}
@@ -1140,13 +1150,15 @@ bump_static_flag(StgClosure **link_field, StgClosure *q STG_UNUSED)
}
static GNUC_ATTR_HOT void
-mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
+mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
{
- (void)origin; // TODO: should be used for selector/thunk optimisations
+ StgClosure *p = (StgClosure*)p0;
try_again:
;
bdescr *bd = NULL;
+ StgClosure *p_next = NULL;
+ StgWord tag = GET_CLOSURE_TAG(p);
p = UNTAG_CLOSURE(p);
# define PUSH_FIELD(obj, field) \
@@ -1172,7 +1184,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above
}
}
- return;
+ goto done;
case FUN_STATIC:
if (info->srt != 0 || info->layout.payload.ptrs != 0) {
@@ -1188,13 +1200,13 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
}
}
- return;
+ goto done;
case IND_STATIC:
if (bump_static_flag(IND_STATIC_LINK((StgClosure *)p), p)) {
PUSH_FIELD((StgInd *) p, indirectee);
}
- return;
+ goto done;
case CONSTR:
case CONSTR_1_0:
@@ -1205,7 +1217,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
PUSH_FIELD(p, payload[i]);
}
}
- return;
+ goto done;
case WHITEHOLE:
while (get_volatile_itbl(p)->type == WHITEHOLE);
@@ -1227,7 +1239,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
//
// * a mutable object might have been updated
// * we might have aged an object
- return;
+ goto done;
}
ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info);
@@ -1239,10 +1251,10 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
if (bd->flags & BF_LARGE) {
if (! (bd->flags & BF_NONMOVING_SWEEPING)) {
// Not in the snapshot
- return;
+ goto done;
}
if (bd->flags & BF_MARKED) {
- return;
+ goto done;
}
// Mark contents
@@ -1258,10 +1270,10 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
uint8_t mark = nonmovingGetMark(seg, block_idx);
/* Don't mark things we've already marked (since we may loop) */
if (mark == nonmovingMarkEpoch)
- return;
+ goto done;
StgClosure *snapshot_loc =
- (StgClosure *) nonmovingSegmentGetBlock(seg, seg->next_free_snap);
+ (StgClosure *) nonmovingSegmentGetBlock(seg, nonmovingSegmentInfo(seg)->next_free_snap);
if (p >= snapshot_loc && mark == 0) {
/*
* In this case we are looking at a block that wasn't allocated
@@ -1269,7 +1281,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
* things above the allocation pointer that aren't marked since
* they may not be valid objects.
*/
- return;
+ goto done;
}
}
}
@@ -1287,7 +1299,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
ASSERT(found_it);
#endif
- return;
+ return; // we don't update origin here! TODO(osa): explain this
}
else {
@@ -1419,10 +1431,24 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
- case IND:
- case BLACKHOLE:
+ case IND: {
PUSH_FIELD((StgInd *) p, indirectee);
+ if (origin != NULL) {
+ p_next = ((StgInd*)p)->indirectee;
+ }
+ break;
+ }
+
+ case BLACKHOLE: {
+ PUSH_FIELD((StgInd *) p, indirectee);
+ StgClosure *indirectee = ((StgInd*)p)->indirectee;
+ if (GET_CLOSURE_TAG(indirectee) == 0 || origin == NULL) {
+ // do nothing
+ } else {
+ p_next = indirectee;
+ }
break;
+ }
case MUT_VAR_CLEAN:
case MUT_VAR_DIRTY:
@@ -1439,8 +1465,11 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
case THUNK_SELECTOR:
- PUSH_FIELD((StgSelector *) p, selectee);
- // TODO: selector optimization
+ if (RtsFlags.GcFlags.nonmovingSelectorOpt) {
+ nonmoving_eval_thunk_selector(queue, (StgSelector*)p, origin);
+ } else {
+ PUSH_FIELD((StgSelector *) p, selectee);
+ }
break;
case AP_STACK: {
@@ -1509,7 +1538,7 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
// the stack will be fully marked before we sweep due to the final
// post-mark synchronization. Most importantly, we do not set its
// mark bit, the mutator is responsible for this.
- return;
+ goto done;
}
break;
}
@@ -1568,13 +1597,28 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
bd->flags |= BF_MARKED;
}
RELEASE_LOCK(&nonmoving_large_objects_mutex);
- } else {
+ } else if (bd->flags & BF_NONMOVING) {
// TODO: Kill repetition
struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
nonmoving_block_idx block_idx = nonmovingGetBlockIdx((StgPtr) p);
nonmovingSetMark(seg, block_idx);
nonmoving_live_words += nonmovingSegmentBlockSize(seg) / sizeof(W_);
}
+
+ // If we found a indirection to shortcut keep going.
+ if (p_next) {
+ p = p_next;
+ goto try_again;
+ }
+
+done:
+ if (origin != NULL && (!HEAP_ALLOCED(p) || bd->flags & BF_NONMOVING)) {
+ if (UNTAG_CLOSURE((StgClosure*)p0) != p && *origin == p0) {
+ if (cas((StgVolatilePtr)origin, (StgWord)p0, (StgWord)TAG_CLOSURE(tag, p)) == (StgWord)p0) {
+ // debugBelch("Thunk optimization successful\n");
+ }
+ }
+ }
}
/* This is the main mark loop.
@@ -1666,7 +1710,7 @@ bool nonmovingIsAlive (StgClosure *p)
struct NonmovingSegment *seg = nonmovingGetSegment((StgPtr) p);
nonmoving_block_idx i = nonmovingGetBlockIdx((StgPtr) p);
uint8_t mark = nonmovingGetMark(seg, i);
- if (i >= seg->next_free_snap) {
+ if (i >= nonmovingSegmentInfo(seg)->next_free_snap) {
// If the object is allocated after next_free_snap then one of the
// following must be true:
//
diff --git a/rts/sm/NonMovingShortcut.c b/rts/sm/NonMovingShortcut.c
new file mode 100644
index 0000000000..83c4857677
--- /dev/null
+++ b/rts/sm/NonMovingShortcut.c
@@ -0,0 +1,326 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2019
+ *
+ * Non-moving garbage collector and allocator:
+ * Indirection short-cutting and the selector optimisation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "GC.h"
+#include "SMPClosureOps.h"
+#include "NonMovingMark.h"
+#include "NonMovingShortcut.h"
+#include "Printer.h"
+
+#define MAX_THUNK_SELECTOR_DEPTH 16
+
+//#define SELECTOR_OPT_DEBUG
+
+#if defined(SELECTOR_OPT_DEBUG)
+static void
+print_selector_chain(StgClosure *p)
+{
+ debugBelch("Selector chain: %p", (void*)p);
+ StgClosure *next = p->payload[0];
+ while (next != NULL) {
+ debugBelch(", %p", next);
+ next = next->payload[0];
+ }
+ debugBelch("\n");
+}
+#endif
+
+static void
+update_selector_chain(
+ StgClosure *chain,
+ StgClosure **origin,
+ StgSelector * const p0,
+ StgClosure * const val
+) {
+ ASSERT(val != NULL);
+
+ // Make sure we don't introduce non-moving-to-moving pointers here.
+ ASSERT(isNonmovingClosure(val));
+
+ // This case we can't handle because we don't know info ptr of the closure
+ // before we locked it.
+ ASSERT(chain != val);
+
+#if defined(SELECTOR_OPT_DEBUG)
+ if (chain != NULL) {
+ print_selector_chain(chain);
+ debugBelch("Value: ");
+ printClosure(val);
+ }
+#endif
+
+ while (chain) {
+ // debugBelch("chain: %p\n", (void*)chain);
+
+ StgClosure *next = chain->payload[0];
+
+ // We only update closures in the non-moving heap
+ ASSERT(isNonmovingClosure(chain));
+
+ ((StgInd*)chain)->indirectee = val;
+ unlockClosure((StgClosure*)chain, &stg_IND_info);
+
+ chain = next;
+ }
+
+ if (origin != NULL && (StgClosure*)p0 != val) {
+ cas((StgVolatilePtr)origin, (StgWord)p0, (StgWord)val);
+ }
+}
+
+// Returns value of the selector thunk. The value is a non-moving closure. If
+// it's not possible to evaluate the selector thunk the return value will be the
+// selector itself.
+static StgClosure*
+nonmoving_eval_thunk_selector_(
+ MarkQueue *queue,
+ StgSelector * const p0,
+ StgClosure ** const origin,
+ int depth
+) {
+ // This function should only be called on non-moving objects.
+ ASSERT(HEAP_ALLOCED_GC((P_)p0) && isNonmovingClosure((StgClosure*)p0));
+
+ markQueuePushClosure(queue, (StgClosure*)p0, NULL);
+
+ // INVARIANT: A non-moving object. Locked (below).
+ StgClosure *p = (StgClosure*)p0;
+
+ // Chain of non-moving selectors to update. These will be INDs to `p` when
+ // we reach to a value. INVARIANT: All objects in the chain are locked, and
+ // in the non-moving heap.
+ StgClosure *chain = NULL;
+
+ // Variables to update: p.
+selector_changed:
+ ;
+
+ // debugBelch("Selector changed: %p\n", (void*)p);
+
+ // Lock the selector to avoid concurrent modification in mutators
+ const StgInfoTable *selector_info_ptr = lockClosure((StgClosure*)p);
+ StgInfoTable *selector_info_tbl = INFO_PTR_TO_STRUCT(selector_info_ptr);
+
+ if (selector_info_tbl->type != THUNK_SELECTOR) {
+ // Selector updated in the meantime, or we reached to a value. Update
+ // the chain.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // The closure is locked and it's a selector thunk. If the selectee is a
+ // CONSTR we do the selection here and the In the selected value will be the
+ // value of this selector thunk.
+ //
+ // Two cases:
+ //
+ // - If the selected value is also a selector thunk, then we loop and
+ // evaluate it. The final value will be the value of both the current
+ // selector and the selected value (which is also a selector thunk).
+ //
+ // - If the selectee is a selector thunk, we recursively evaluate it (up to
+ // a certain depth, specified with MAX_THUNK_SELECTOR_DEPTH), then do the
+ // selection on the value of it.
+
+ //
+ // Do the selection
+ //
+
+ uint32_t field = selector_info_tbl->layout.selector_offset;
+ StgClosure *selectee = UNTAG_CLOSURE(((StgSelector*)p)->selectee);
+
+ // Variables to update: selectee
+selectee_changed:
+ // debugBelch("Selectee changed: %p\n", (void*)selectee);
+
+ if (!isNonmovingClosure(selectee)) {
+ // The selectee is a moving object, and it may be moved by a concurrent
+ // minor GC while we read the info table and fields, so don't try to
+ // read the fields, just update the chain.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // Selectee is a non-moving object, mark it.
+ markQueuePushClosure(queue, selectee, NULL);
+
+ const StgInfoTable *selectee_info_tbl = get_itbl(selectee);
+ switch (selectee_info_tbl->type) {
+ case WHITEHOLE: {
+ // Probably a loop. Abort.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_NOCAF: {
+ // Selectee is a constructor in the non-moving heap.
+ // Select the field.
+
+ // Check that the size is in range.
+ ASSERT(field < (StgWord32)(selectee_info_tbl->layout.payload.ptrs +
+ selectee_info_tbl->layout.payload.nptrs));
+
+ StgClosure *val = UNTAG_CLOSURE(selectee->payload[field]);
+
+ // `val` is the value of this selector thunk. We need to check a
+ // few cases:
+ //
+ // - If `val` is in the moving heap, we stop here and update the
+ // chain. All updated objects should be added to the mut_list.
+ // (TODO (osa): What happens if the value is evacuated as we do
+ // this?)
+ //
+ // - If `val` is in the non-moving heap, we check if it's also a
+ // selector. If it is we add it to the chain and loop.
+
+ // Follow indirections. Variables to update: `val`.
+ val_changed:
+ if (!isNonmovingClosure(val)) {
+ // The selected value is a moving object, so we won't be
+ // updating the chain to this object.
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ switch (get_itbl(val)->type) {
+ case IND:
+ case IND_STATIC:
+ ;
+ // Follow the indirection
+ StgClosure *indirectee = UNTAG_CLOSURE(((StgInd*)val)->indirectee);
+ if (isNonmovingClosure(indirectee)) {
+ val = UNTAG_CLOSURE(((StgInd*)val)->indirectee);
+ goto val_changed;
+ } else {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+ case THUNK_SELECTOR:
+ // Value of the selector thunk is again a selector thunk in the
+ // non-moving heap. Add the current selector to the chain and
+ // loop.
+ p->payload[0] = chain;
+ chain = p;
+ p = val;
+ goto selector_changed;
+ default:
+ // Found a value, add the current selector to the chain and
+ // update it.
+ p->payload[0] = chain;
+ chain = p;
+ update_selector_chain(chain, origin, p0, val);
+ return val;
+ }
+ }
+
+ case IND:
+ case IND_STATIC: {
+ StgClosure *indirectee = UNTAG_CLOSURE(((StgInd *)selectee)->indirectee);
+ if (isNonmovingClosure(indirectee)) {
+ selectee = indirectee;
+ goto selectee_changed;
+ } else {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+ }
+
+ case BLACKHOLE: {
+ StgClosure *indirectee = ((StgInd*)selectee)->indirectee;
+
+ if (!isNonmovingClosure(UNTAG_CLOSURE(indirectee))) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return p;
+ }
+
+ // Establish whether this BH has been updated, and is now an
+ // indirection, as in evacuate().
+ if (GET_CLOSURE_TAG(indirectee) == 0) {
+ const StgInfoTable *i = indirectee->header.info;
+ if (i == &stg_TSO_info
+ || i == &stg_WHITEHOLE_info
+ || i == &stg_BLOCKING_QUEUE_CLEAN_info
+ || i == &stg_BLOCKING_QUEUE_DIRTY_info) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+ ASSERT(i != &stg_IND_info); // TODO not sure about this part
+ }
+
+ // It's an indirection, follow it.
+ selectee = UNTAG_CLOSURE(indirectee);
+ goto selectee_changed;
+ }
+
+ case AP:
+ case AP_STACK:
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_STATIC: {
+ // Not evaluated yet
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+
+ case THUNK_SELECTOR: {
+ // Selectee is a selector thunk. Evaluate it if we haven't reached
+ // the recursion limit yet.
+ if (depth < MAX_THUNK_SELECTOR_DEPTH) {
+ StgClosure *new_selectee =
+ UNTAG_CLOSURE(nonmoving_eval_thunk_selector_(
+ queue, (StgSelector*)selectee, NULL, depth+1));
+ ASSERT(isNonmovingClosure(new_selectee));
+ if (selectee == new_selectee) {
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ } else {
+ selectee = new_selectee;
+ goto selectee_changed;
+ }
+ } else {
+ // Recursion limit reached
+ unlockClosure(p, selector_info_ptr);
+ update_selector_chain(chain, origin, p0, p);
+ return (StgClosure*)p;
+ }
+ }
+
+ default: {
+ barf("nonmoving_eval_thunk_selector: strange selectee %d",
+ (int)(selectee_info_tbl->type));
+ }
+ }
+}
+
+void
+nonmoving_eval_thunk_selector(MarkQueue *queue, StgSelector *p, StgClosure **origin)
+{
+ nonmoving_eval_thunk_selector_(queue, p, origin, 0);
+}
diff --git a/rts/sm/NonMovingShortcut.h b/rts/sm/NonMovingShortcut.h
new file mode 100644
index 0000000000..72297884aa
--- /dev/null
+++ b/rts/sm/NonMovingShortcut.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2019
+ *
+ * Non-moving garbage collector and allocator:
+ * Indirection short-cutting and the selector optimisation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "BeginPrivate.h"
+
+void
+nonmoving_eval_thunk_selector(MarkQueue *queue, StgSelector *p, StgClosure **origin);
+
+#include "EndPrivate.h"
diff --git a/rts/sm/NonMovingSweep.c b/rts/sm/NonMovingSweep.c
index 7af5508afc..3ee27ef3b4 100644
--- a/rts/sm/NonMovingSweep.c
+++ b/rts/sm/NonMovingSweep.c
@@ -41,7 +41,7 @@ nonmovingSweepSegment(struct NonmovingSegment *seg)
} else if (!found_free) {
found_free = true;
seg->next_free = i;
- seg->next_free_snap = i;
+ nonmovingSegmentInfo(seg)->next_free_snap = i;
Bdescr((P_)seg)->u.scan = (P_)nonmovingSegmentGetBlock(seg, i);
seg->bitmap[i] = 0;
} else {
@@ -63,7 +63,7 @@ nonmovingSweepSegment(struct NonmovingSegment *seg)
return SEGMENT_FILLED;
} else {
ASSERT(seg->next_free == 0);
- ASSERT(seg->next_free_snap == 0);
+ ASSERT(nonmovingSegmentInfo(seg)->next_free_snap == 0);
return SEGMENT_FREE;
}
}
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 0159488856..99671e9c61 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -497,7 +497,7 @@ static void checkNonmovingSegments (struct NonmovingSegment *seg)
if (seg->bitmap[i] == nonmovingMarkEpoch) {
StgPtr p = nonmovingSegmentGetBlock(seg, i);
checkClosure((StgClosure *) p);
- } else if (i < seg->next_free_snap){
+ } else if (i < nonmovingSegmentInfo(seg)->next_free_snap){
seg->bitmap[i] = 0;
}
}