From 13dd78ddb158f98b35ad2eda50d0a7af63920ece Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Thu, 18 Apr 2019 14:08:32 -0400 Subject: Nonmoving: Allow aging and refactor static objects logic This commit does two things: * Allow aging of objects during the preparatory minor GC * Refactor handling of static objects to avoid the use of a hashtable --- rts/sm/Evac.c | 50 ++++++++++++++++----- rts/sm/GC.c | 10 ++++- rts/sm/GCAux.c | 4 +- rts/sm/NonMoving.c | 40 ++++++++++++----- rts/sm/NonMovingMark.c | 115 ++++++++++++++++++++++++++++++++++-------------- rts/sm/NonMovingMark.h | 6 +-- rts/sm/NonMovingScav.c | 45 +++++++++++++------ rts/sm/NonMovingSweep.c | 5 ++- rts/sm/NonMovingSweep.h | 2 +- rts/sm/Storage.c | 70 +++++++++++++++++++++++++++-- 10 files changed, 266 insertions(+), 81 deletions(-) diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index c2aaaacffa..b4e35849ba 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -69,12 +69,6 @@ alloc_for_copy (uint32_t size, uint32_t gen_no) { ASSERT(gen_no < RtsFlags.GcFlags.generations); - if (RtsFlags.GcFlags.useNonmoving && major_gc) { - // unconditionally promote to non-moving heap in major gc - gct->copied += size; - return nonmovingAllocate(gct->cap, size); - } - StgPtr to; gen_workspace *ws; @@ -93,7 +87,20 @@ alloc_for_copy (uint32_t size, uint32_t gen_no) if (RtsFlags.GcFlags.useNonmoving && gen_no == oldest_gen->no) { gct->copied += size; - return nonmovingAllocate(gct->cap, size); + to = nonmovingAllocate(gct->cap, size); + + // Add segment to the todo list unless it's already there + // current->todo_link == NULL means not in todo list + struct NonmovingSegment *seg = nonmovingGetSegment(to); + if (!seg->todo_link) { + gen_workspace *ws = &gct->gens[oldest_gen->no]; + seg->todo_link = ws->todo_seg; + ws->todo_seg = seg; + } + + if (major_gc) + markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) to); + return to; } ws = &gct->gens[gen_no]; // zero memory references here @@ -312,9 +319,7 @@ evacuate_large(StgPtr p) */ new_gen_no = bd->dest_no; - if (RtsFlags.GcFlags.useNonmoving && major_gc) { - new_gen_no = oldest_gen->no; - } else if (new_gen_no < gct->evac_gen_no) { + if (new_gen_no < gct->evac_gen_no) { if (gct->eager_promotion) { new_gen_no = gct->evac_gen_no; } else { @@ -363,6 +368,13 @@ evacuate_large(StgPtr p) STATIC_INLINE void evacuate_static_object (StgClosure **link_field, StgClosure *q) { + if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)) { + // See Note [Static objects under the nonmoving collector] in Storage.c. + if (major_gc) + markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q); + return; + } + StgWord link = (StgWord)*link_field; // See Note [STATIC_LINK fields] for how the link field bits work @@ -603,6 +615,8 @@ loop: // NOTE: large objects in nonmoving heap are also marked with // BF_NONMOVING. Those are moved to scavenged_large_objects list in // mark phase. + if (major_gc) + markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q); return; } @@ -629,6 +643,13 @@ loop: // they are not) if (bd->flags & BF_COMPACT) { evacuate_compact((P_)q); + + // We may have evacuated the block to the nonmoving generation. If so + // we need to make sure it is added to the mark queue since the only + // reference to it may be from the moving heap. + if (major_gc && bd->flags & BF_NONMOVING) { + markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q); + } return; } @@ -636,6 +657,13 @@ loop: */ if (bd->flags & BF_LARGE) { evacuate_large((P_)q); + + // We may have evacuated the block to the nonmoving generation. If so + // we need to make sure it is added to the mark queue since the only + // reference to it may be from the moving heap. + if (major_gc && bd->flags & BF_NONMOVING) { + markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q); + } return; } @@ -937,6 +965,8 @@ evacuate_BLACKHOLE(StgClosure **p) ASSERT((bd->flags & BF_COMPACT) == 0); if (bd->flags & BF_NONMOVING) { + if (major_gc) + markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q); return; } diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 37f9811aaf..1df6ace800 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -263,7 +263,10 @@ GarbageCollect (uint32_t collect_gen, N = collect_gen; major_gc = (N == RtsFlags.GcFlags.generations-1); - if (major_gc) { + /* N.B. The nonmoving collector works a bit differently. See + * Note [Static objects under the nonmoving collector]. + */ + if (major_gc && !RtsFlags.GcFlags.useNonmoving) { prev_static_flag = static_flag; static_flag = static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A; @@ -736,6 +739,11 @@ GarbageCollect (uint32_t collect_gen, // so we need to mark those too. // Note that in sequential case these lists will be appended with more // weaks and threads found to be dead in mark. +#if !defined(THREADED_RTS) + // In the non-threaded runtime this is the only time we push to the + // upd_rem_set + nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue); +#endif nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads); ACQUIRE_SM_LOCK; } diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 6076f61d6f..11080c1f22 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -148,14 +148,14 @@ markCAFs (evac_fn evac, void *user) StgIndStatic *c; for (c = dyn_caf_list; - c != (StgIndStatic*)END_OF_CAF_LIST; + ((StgWord) c | STATIC_FLAG_LIST) != (StgWord)END_OF_CAF_LIST; c = (StgIndStatic *)c->static_link) { c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c); evac(user, &c->indirectee); } for (c = revertible_caf_list; - c != (StgIndStatic*)END_OF_CAF_LIST; + ((StgWord) c | STATIC_FLAG_LIST) != (StgWord)END_OF_CAF_LIST; c = (StgIndStatic *)c->static_link) { c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c); diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c index 24464a43f9..76f54c9302 100644 --- a/rts/sm/NonMoving.c +++ b/rts/sm/NonMoving.c @@ -68,6 +68,27 @@ Mutex concurrent_coll_finished_lock; * stopAllCapabilitiesWith(SYNC_FLUSH_UPD_REM_SET). Capabilities are held * the final mark has concluded. * + * Note that possibility of concurrent minor and non-moving collections + * requires that we handle static objects a bit specially. See + * Note [Static objects under the nonmoving collector] in Storage.c + * for details. + * + * + * Note [Aging under the non-moving collector] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * The initial design of the non-moving collector mandated that all live data + * be evacuated to the non-moving heap prior to a major collection. This + * simplified certain bits of implementation and eased reasoning. However, it + * was (unsurprisingly) also found to result in significant amounts of + * unnecessary copying. + * + * Consequently, we now allow aging. We do this by teaching the moving + * collector to "evacuate" objects it encounters in the non-moving heap by + * adding them to the mark queue. Specifically, since each gc_thread holds a + * capability we push to the capability's update remembered set (implemented + * by markQueuePushClosureGC) + * * * Note [Live data accounting in nonmoving collector] * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -150,6 +171,8 @@ static struct NonmovingSegment *nonmovingPopFreeSegment(void) * Request a fresh segment from the free segment list or allocate one of the * given node. * + * Caller must hold SM_MUTEX (although we take the gc_alloc_block_sync spinlock + * under the assumption that we are in a GC context). */ static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node) { @@ -221,7 +244,7 @@ static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *al } } -/* sz is in words */ +/* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */ GNUC_ATTR_HOT void *nonmovingAllocate(Capability *cap, StgWord sz) { @@ -239,14 +262,6 @@ void *nonmovingAllocate(Capability *cap, StgWord sz) void *ret = nonmovingSegmentGetBlock(current, current->next_free); ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment - // Add segment to the todo list unless it's already there - // current->todo_link == NULL means not in todo list - if (!current->todo_link) { - gen_workspace *ws = &gct->gens[oldest_gen->no]; - current->todo_link = ws->todo_seg; - ws->todo_seg = current; - } - // Advance the current segment's next_free or allocate a new segment if full bool full = advance_next_free(current); if (full) { @@ -405,6 +420,11 @@ static void nonmovingClearAllBitmaps(void) /* Prepare the heap bitmaps and snapshot metadata for a mark */ static void nonmovingPrepareMark(void) { + // See Note [Static objects under the nonmoving collector]. + prev_static_flag = static_flag; + static_flag = + static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A; + nonmovingClearAllBitmaps(); nonmovingBumpEpoch(); for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) { @@ -688,7 +708,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO * #if defined(DEBUG) // Zap CAFs that we will sweep - nonmovingGcCafs(mark_queue); + nonmovingGcCafs(); #endif ASSERT(mark_queue->top->head == 0); diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c index 72a3ca69cb..b1c585f72a 100644 --- a/rts/sm/NonMovingMark.c +++ b/rts/sm/NonMovingMark.c @@ -205,7 +205,7 @@ static void init_mark_queue_(MarkQueue *queue); * Really the argument type should be UpdRemSet* but this would be rather * inconvenient without polymorphism. */ -static void nonmovingAddUpdRemSetBlocks(MarkQueue *rset) +void nonmovingAddUpdRemSetBlocks(MarkQueue *rset) { if (markQueueIsEmpty(rset)) return; @@ -374,6 +374,38 @@ push (MarkQueue *q, const MarkQueueEnt *ent) q->top->head++; } +/* A variant of push to be used by the minor GC when it encounters a reference + * to an object in the non-moving heap. In contrast to the other push + * operations this uses the gc_alloc_block_sync spinlock instead of the + * SM_LOCK to allocate new blocks in the event that the mark queue is full. + */ +void +markQueuePushClosureGC (MarkQueue *q, StgClosure *p) +{ + // Are we at the end of the block? + if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) { + // Yes, this block is full. + // allocate a fresh block. + ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + bdescr *bd = allocGroup(1); + bd->link = q->blocks; + q->blocks = bd; + q->top = (MarkQueueBlock *) bd->start; + q->top->head = 0; + RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + } + + MarkQueueEnt ent = { + .type = MARK_CLOSURE, + .mark_closure = { + .p = UNTAG_CLOSURE(p), + .origin = NULL, + } + }; + q->top->entries[q->top->head] = ent; + q->top->head++; +} + static inline void push_closure (MarkQueue *q, StgClosure *p, @@ -715,7 +747,6 @@ static void init_mark_queue_ (MarkQueue *queue) void initMarkQueue (MarkQueue *queue) { init_mark_queue_(queue); - queue->marked_objects = allocHashTable(); queue->is_upd_rem_set = false; } @@ -723,8 +754,6 @@ void initMarkQueue (MarkQueue *queue) void init_upd_rem_set (UpdRemSet *rset) { init_mark_queue_(&rset->queue); - // Update remembered sets don't have to worry about static objects - rset->queue.marked_objects = NULL; rset->queue.is_upd_rem_set = true; } @@ -739,7 +768,6 @@ void reset_upd_rem_set (UpdRemSet *rset) void freeMarkQueue (MarkQueue *queue) { freeChain_lock(queue->blocks); - freeHashTable(queue->marked_objects, NULL); } #if defined(THREADED_RTS) && defined(DEBUG) @@ -986,12 +1014,32 @@ mark_stack (MarkQueue *queue, StgStack *stack) mark_stack_(queue, stack->sp, stack->stack + stack->stack_size); } +/* See Note [Static objects under the nonmoving collector]. + * + * Returns true if the object needs to be marked. + */ +static bool +bump_static_flag(StgClosure **link_field, StgClosure *q STG_UNUSED) +{ + while (1) { + StgWord link = (StgWord) *link_field; + StgWord new = (link & ~STATIC_BITS) | static_flag; + if ((link & STATIC_BITS) == static_flag) + return false; + else if (cas((StgVolatilePtr) link_field, link, new) == link) { + return true; + } + } +} + static GNUC_ATTR_HOT void mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) { (void)origin; // TODO: should be used for selector/thunk optimisations try_again: + ; + bdescr *bd = NULL; p = UNTAG_CLOSURE(p); # define PUSH_FIELD(obj, field) \ @@ -1009,45 +1057,46 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) return; } - if (lookupHashTable(queue->marked_objects, (W_)p)) { - // already marked - return; - } - - insertHashTable(queue->marked_objects, (W_)p, (P_)1); - switch (type) { case THUNK_STATIC: if (info->srt != 0) { - markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above + if (bump_static_flag(THUNK_STATIC_LINK((StgClosure *)p), p)) { + markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above + } } return; case FUN_STATIC: if (info->srt != 0 || info->layout.payload.ptrs != 0) { - markQueuePushFunSrt(queue, info); // TODO this function repeats the check above - - // a FUN_STATIC can also be an SRT, so it may have pointer - // fields. See Note [SRTs] in CmmBuildInfoTables, specifically - // the [FUN] optimisation. - // TODO (osa) I don't understand this comment - for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) { - PUSH_FIELD(p, payload[i]); + if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) { + markQueuePushFunSrt(queue, info); // TODO this function repeats the check above + + // a FUN_STATIC can also be an SRT, so it may have pointer + // fields. See Note [SRTs] in CmmBuildInfoTables, specifically + // the [FUN] optimisation. + // TODO (osa) I don't understand this comment + for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) { + PUSH_FIELD(p, payload[i]); + } } } return; case IND_STATIC: - PUSH_FIELD((StgInd *) p, indirectee); + if (bump_static_flag(IND_STATIC_LINK((StgClosure *)p), p)) { + PUSH_FIELD((StgInd *) p, indirectee); + } return; case CONSTR: case CONSTR_1_0: case CONSTR_2_0: case CONSTR_1_1: - for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) { - PUSH_FIELD(p, payload[i]); + if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) { + for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) { + PUSH_FIELD(p, payload[i]); + } } return; @@ -1061,19 +1110,17 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin) } } - bdescr *bd = Bdescr((StgPtr) p); + bd = Bdescr((StgPtr) p); if (bd->gen != oldest_gen) { - // Here we have an object living outside of the non-moving heap. Since - // we moved everything to the non-moving heap before starting the major - // collection, we know that we don't need to trace it: it was allocated - // after we took our snapshot. -#if !defined(THREADED_RTS) - // This should never happen in the non-concurrent case - barf("Closure outside of non-moving heap: %p", p); -#else + // Here we have an object living outside of the non-moving heap. While + // we likely evacuated nearly everything to the nonmoving heap during + // preparation there are nevertheless a few ways in which we might trace + // a reference into younger generations: + // + // * a mutable object might have been updated + // * we might have aged an object return; -#endif } ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info); diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h index 84b6642d6c..7e9bfed1f9 100644 --- a/rts/sm/NonMovingMark.h +++ b/rts/sm/NonMovingMark.h @@ -82,10 +82,6 @@ typedef struct MarkQueue_ { // Is this a mark queue or a capability-local update remembered set? bool is_upd_rem_set; - - // Marked objects outside of nonmoving heap, namely large and static - // objects. - HashTable *marked_objects; } MarkQueue; /* While it shares its representation with MarkQueue, UpdRemSet differs in @@ -145,8 +141,10 @@ void nonmovingResurrectThreads(struct MarkQueue_ *queue, StgTSO **resurrected_th bool nonmovingIsAlive(StgClosure *p); void nonmovingMarkDeadWeak(struct MarkQueue_ *queue, StgWeak *w); void nonmovingMarkLiveWeak(struct MarkQueue_ *queue, StgWeak *w); +void nonmovingAddUpdRemSetBlocks(struct MarkQueue_ *rset); void markQueuePush(MarkQueue *q, const MarkQueueEnt *ent); +void markQueuePushClosureGC(MarkQueue *q, StgClosure *p); void markQueuePushClosure(MarkQueue *q, StgClosure *p, StgClosure **origin); diff --git a/rts/sm/NonMovingScav.c b/rts/sm/NonMovingScav.c index 850750b43b..0efbde688c 100644 --- a/rts/sm/NonMovingScav.c +++ b/rts/sm/NonMovingScav.c @@ -16,6 +16,7 @@ nonmovingScavengeOne (StgClosure *q) ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); StgPtr p = (StgPtr)q; const StgInfoTable *info = get_itbl(q); + const bool saved_eager_promotion = gct->eager_promotion; switch (info->type) { @@ -23,9 +24,11 @@ nonmovingScavengeOne (StgClosure *q) case MVAR_DIRTY: { StgMVar *mvar = ((StgMVar *)p); + gct->eager_promotion = false; evacuate((StgClosure **)&mvar->head); evacuate((StgClosure **)&mvar->tail); evacuate((StgClosure **)&mvar->value); + gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { mvar->header.info = &stg_MVAR_DIRTY_info; } else { @@ -37,8 +40,10 @@ nonmovingScavengeOne (StgClosure *q) case TVAR: { StgTVar *tvar = ((StgTVar *)p); + gct->eager_promotion = false; evacuate((StgClosure **)&tvar->current_value); evacuate((StgClosure **)&tvar->first_watch_queue_entry); + gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { tvar->header.info = &stg_TVAR_DIRTY_info; } else { @@ -122,10 +127,21 @@ nonmovingScavengeOne (StgClosure *q) break; } + case WEAK: + { + // We must evacuate the key since it may refer to an object in the + // moving heap which may be long gone by the time we call + // nonmovingTidyWeaks. + StgWeak *weak = (StgWeak *) p; + gct->eager_promotion = true; + evacuate(&weak->key); + gct->eager_promotion = saved_eager_promotion; + goto gen_obj; + } + gen_obj: case CONSTR: case CONSTR_NOCAF: - case WEAK: case PRIM: { StgPtr end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs; @@ -145,7 +161,9 @@ nonmovingScavengeOne (StgClosure *q) case MUT_VAR_CLEAN: case MUT_VAR_DIRTY: + gct->eager_promotion = false; evacuate(&((StgMutVar *)p)->var); + gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; } else { @@ -157,10 +175,12 @@ nonmovingScavengeOne (StgClosure *q) { StgBlockingQueue *bq = (StgBlockingQueue *)p; + gct->eager_promotion = false; evacuate(&bq->bh); evacuate((StgClosure**)&bq->owner); evacuate((StgClosure**)&bq->queue); evacuate((StgClosure**)&bq->link); + gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; @@ -202,11 +222,9 @@ nonmovingScavengeOne (StgClosure *q) case MUT_ARR_PTRS_CLEAN: case MUT_ARR_PTRS_DIRTY: { - // We don't eagerly promote objects pointed to by a mutable - // array, but if we find the array only points to objects in - // the same or an older generation, we mark it "clean" and - // avoid traversing it during minor GCs. + gct->eager_promotion = false; scavenge_mut_arr_ptrs((StgMutArrPtrs*)p); + gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; } else { @@ -234,14 +252,13 @@ nonmovingScavengeOne (StgClosure *q) case SMALL_MUT_ARR_PTRS_DIRTY: // follow everything { - // We don't eagerly promote objects pointed to by a mutable - // array, but if we find the array only points to objects in - // the same or an older generation, we mark it "clean" and - // avoid traversing it during minor GCs. StgPtr next = p + small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p); + gct->eager_promotion = false; for (p = (P_)((StgSmallMutArrPtrs *)p)->payload; p < next; p++) { evacuate((StgClosure **)p); } + gct->eager_promotion = saved_eager_promotion; + if (gct->failed_to_evac) { ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; } else { @@ -278,21 +295,21 @@ nonmovingScavengeOne (StgClosure *q) { StgStack *stack = (StgStack*)p; + gct->eager_promotion = false; scavenge_stack(stack->sp, stack->stack + stack->stack_size); + gct->eager_promotion = saved_eager_promotion; stack->dirty = gct->failed_to_evac; - // TODO (osa): There may be something special about stacks that we're - // missing. All other mut objects are marked by using a different info - // table except stacks. - break; } case MUT_PRIM: { StgPtr end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs; + gct->eager_promotion = false; for (p = (P_)((StgClosure *)p)->payload; p < end; p++) { evacuate((StgClosure **)p); } + gct->eager_promotion = saved_eager_promotion; gct->failed_to_evac = true; // mutable break; } @@ -302,12 +319,14 @@ nonmovingScavengeOne (StgClosure *q) StgWord i; StgTRecChunk *tc = ((StgTRecChunk *) p); TRecEntry *e = &(tc -> entries[0]); + gct->eager_promotion = false; evacuate((StgClosure **)&tc->prev_chunk); for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) { evacuate((StgClosure **)&e->tvar); evacuate((StgClosure **)&e->expected_value); evacuate((StgClosure **)&e->new_value); } + gct->eager_promotion = saved_eager_promotion; gct->failed_to_evac = true; // mutable break; } diff --git a/rts/sm/NonMovingSweep.c b/rts/sm/NonMovingSweep.c index fa3e38cca4..b67bba0ce5 100644 --- a/rts/sm/NonMovingSweep.c +++ b/rts/sm/NonMovingSweep.c @@ -102,7 +102,7 @@ nonmovingSweepSegment(struct NonmovingSegment *seg) #if defined(DEBUG) -void nonmovingGcCafs(struct MarkQueue_ *queue) +void nonmovingGcCafs() { uint32_t i = 0; StgIndStatic *next; @@ -116,7 +116,8 @@ void nonmovingGcCafs(struct MarkQueue_ *queue) const StgInfoTable *info = get_itbl((StgClosure*)caf); ASSERT(info->type == IND_STATIC); - if (lookupHashTable(queue->marked_objects, (StgWord) caf) == NULL) { + StgWord flag = ((StgWord) caf->static_link) & STATIC_BITS; + if (flag != 0 && flag != static_flag) { debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", caf); SET_INFO((StgClosure*)caf, &stg_GCD_CAF_info); // stub it } else { diff --git a/rts/sm/NonMovingSweep.h b/rts/sm/NonMovingSweep.h index de2d52acb5..f21936004f 100644 --- a/rts/sm/NonMovingSweep.h +++ b/rts/sm/NonMovingSweep.h @@ -28,5 +28,5 @@ void nonmovingPrepareSweep(void); #if defined(DEBUG) // The non-moving equivalent of the moving collector's gcCAFs. -void nonmovingGcCafs(struct MarkQueue_ *queue); +void nonmovingGcCafs(void); #endif diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 44163a353f..f04b3c5929 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -321,7 +321,8 @@ freeStorage (bool free_heap) } /* ----------------------------------------------------------------------------- - Note [CAF management]. + Note [CAF management] + ~~~~~~~~~~~~~~~~~~~~~ The entry code for every CAF does the following: @@ -356,6 +357,7 @@ freeStorage (bool free_heap) ------------------ Note [atomic CAF entry] + ~~~~~~~~~~~~~~~~~~~~~~~ With THREADED_RTS, newCAF() is required to be atomic (see #5558). This is because if two threads happened to enter the same @@ -369,6 +371,7 @@ freeStorage (bool free_heap) ------------------ Note [GHCi CAFs] + ~~~~~~~~~~~~~~~~ For GHCI, we have additional requirements when dealing with CAFs: @@ -388,6 +391,51 @@ freeStorage (bool free_heap) -- SDM 29/1/01 + ------------------ + Note [Static objects under the nonmoving collector] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Static object management is a bit tricky under the nonmoving collector as we + need to maintain a bit more state than in the moving collector. In + particular, the moving collector uses the low bits of the STATIC_LINK field + to determine whether the object has been moved to the scavenger's work list + (see Note [STATIC_LINK fields] in Storage.h). + + However, the nonmoving collector also needs a place to keep its mark bit. + This is problematic as we therefore need at least three bits of state + but can assume only two bits are available in STATIC_LINK (due to 32-bit + systems). + + To accomodate this we move handling of static objects entirely to the + oldest generation when the nonmoving collector is in use. To do this safely + and efficiently we allocate the blackhole created by lockCAF() directly in + the non-moving heap. This means that the moving collector can completely + ignore static objects in minor collections since they are guaranteed not to + have any references into the moving heap. Of course, the blackhole itself + likely will contain a reference into the moving heap but this is + significantly easier to handle, being a heap-allocated object (see Note + [Aging under the non-moving collector] in NonMoving.c for details). + + During the moving phase of a major collection we treat static objects + as we do any other reference into the non-moving heap by pushing them + to the non-moving mark queue (see Note [Aging under the non-moving + collector]). + + This allows the non-moving collector to have full control over the flags + in STATIC_LINK, which it uses as described in Note [STATIC_LINK fields]). + This is implemented by NonMovingMark.c:bump_static_flag. + + In short, the plan is: + + - lockCAF allocates its blackhole in the nonmoving heap. This is important + to ensure that we do not need to place the static object on the mut_list + lest we would need somw way to ensure that it evacuate only once during + a moving collection. + + - evacuate_static_object adds merely pushes objects to the mark queue + + - the nonmoving collector uses the flags in STATIC_LINK as its mark bit. + -------------------------------------------------------------------------- */ STATIC_INLINE StgInd * @@ -441,7 +489,16 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf) caf->saved_info = orig_info; // Allocate the blackhole indirection closure - bh = (StgInd *)allocate(cap, sizeofW(*bh)); + if (RtsFlags.GcFlags.useNonmoving) { + // See Note [Static objects under the nonmoving collector]. + ACQUIRE_SM_LOCK; + bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh)); + RELEASE_SM_LOCK; + recordMutableCap((StgClosure*)bh, + regTableToCapability(reg), oldest_gen->no); + } else { + bh = (StgInd *)allocate(cap, sizeofW(*bh)); + } bh->indirectee = (StgClosure *)cap->r.rCurrentTSO; SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs); // Ensure that above writes are visible before we introduce reference as CAF indirectee. @@ -483,7 +540,9 @@ newCAF(StgRegTable *reg, StgIndStatic *caf) else { // Put this CAF on the mutable list for the old generation. - if (oldest_gen->no != 0) { + // N.B. the nonmoving collector works a bit differently: see + // Note [Static objects under the nonmoving collector]. + if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) { recordMutableCap((StgClosure*)caf, regTableToCapability(reg), oldest_gen->no); } @@ -560,7 +619,9 @@ StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf) if (!bh) return NULL; // Put this CAF on the mutable list for the old generation. - if (oldest_gen->no != 0) { + // N.B. the nonmoving collector works a bit differently: + // see Note [Static objects under the nonmoving collector]. + if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) { recordMutableCap((StgClosure*)caf, regTableToCapability(reg), oldest_gen->no); } @@ -1520,6 +1581,7 @@ void flushExec (W_ len, AdjustorExecutable exec_addr) __clear_cache((void*)begin, (void*)end); # endif #elif defined(__GNUC__) + /* For all other platforms, fall back to a libgcc builtin. */ unsigned char* begin = (unsigned char*)exec_addr; unsigned char* end = begin + len; # if GCC_HAS_BUILTIN_CLEAR_CACHE -- cgit v1.2.1