From 9989cc67549b003db63c90f11b29d89f896323cb Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Thu, 23 Apr 2020 12:35:40 -0400 Subject: rts: Eliminate GC spinlock --- includes/rts/storage/MBlock.h | 5 ----- rts/Stats.c | 9 --------- rts/sm/GCUtils.c | 28 ++++++---------------------- rts/sm/HeapAlloc.h | 4 ++-- rts/sm/NonMoving.c | 5 +---- rts/sm/NonMovingMark.c | 5 +++-- rts/sm/Storage.c | 6 ------ 7 files changed, 12 insertions(+), 50 deletions(-) diff --git a/includes/rts/storage/MBlock.h b/includes/rts/storage/MBlock.h index 3acefda9a0..38789e7863 100644 --- a/includes/rts/storage/MBlock.h +++ b/includes/rts/storage/MBlock.h @@ -25,8 +25,3 @@ extern void freeAllMBlocks(void); extern void *getFirstMBlock(void **state); extern void *getNextMBlock(void **state, void *mblock); - -#if defined(THREADED_RTS) -// needed for HEAP_ALLOCED below -extern SpinLock gc_alloc_block_sync; -#endif diff --git a/rts/Stats.c b/rts/Stats.c index 2fbd8b2655..8d4bd41b31 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -937,11 +937,6 @@ static void report_summary(const RTSSummaryStats* sum) , col_width[1], "SpinLock" , col_width[2], "Spins" , col_width[3], "Yields"); - statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" - , col_width[0], "" - , col_width[1], "gc_alloc_block_sync" - , col_width[2], gc_alloc_block_sync.spin - , col_width[3], gc_alloc_block_sync.yield); statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" , col_width[0], "" , col_width[1], "gc_spin" @@ -1124,10 +1119,6 @@ static void report_machine_readable (const RTSSummaryStats * sum) // next, internal counters #if defined(PROF_SPIN) - MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin); - MR_STAT("gc_alloc_block_sync_yield", FMT_Word64, - gc_alloc_block_sync.yield); - MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin); MR_STAT("gc_spin_spin", FMT_Word64, stats.gc_spin_spin); MR_STAT("gc_spin_yield", FMT_Word64, stats.gc_spin_yield); MR_STAT("mut_spin_spin", FMT_Word64, stats.mut_spin_spin); diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 02c26ddf5e..cc64ede466 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -26,27 +26,15 @@ #include "WSDeque.h" #endif -#if defined(THREADED_RTS) -SpinLock gc_alloc_block_sync; -#endif - bdescr* allocGroup_sync(uint32_t n) { - bdescr *bd; uint32_t node = capNoToNumaNode(gct->thread_index); - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - bd = allocGroupOnNode(node,n); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); - return bd; + return allocGroupOnNode_lock(node,n); } bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n) { - bdescr *bd; - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - bd = allocGroupOnNode(node,n); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); - return bd; + return allocGroupOnNode_lock(node,n); } static uint32_t @@ -55,7 +43,7 @@ allocBlocks_sync(uint32_t n, bdescr **hd) bdescr *bd; uint32_t i; uint32_t node = capNoToNumaNode(gct->thread_index); - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + ACQUIRE_SM_LOCK; bd = allocLargeChunkOnNode(node,1,n); // NB. allocLargeChunk, rather than allocGroup(n), to allocate in a // fragmentation-friendly way. @@ -68,7 +56,7 @@ allocBlocks_sync(uint32_t n, bdescr **hd) bd[n-1].link = NULL; // We have to hold the lock until we've finished fiddling with the metadata, // otherwise the block allocator can get confused. - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + RELEASE_SM_LOCK; *hd = bd; return n; } @@ -76,17 +64,13 @@ allocBlocks_sync(uint32_t n, bdescr **hd) void freeChain_sync(bdescr *bd) { - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - freeChain(bd); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + freeChain_lock(bd); } void freeGroup_sync(bdescr *bd) { - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - freeGroup(bd); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + freeGroup_lock(bd); } /* ----------------------------------------------------------------------------- diff --git a/rts/sm/HeapAlloc.h b/rts/sm/HeapAlloc.h index 58aae1119d..e05bd41f87 100644 --- a/rts/sm/HeapAlloc.h +++ b/rts/sm/HeapAlloc.h @@ -210,9 +210,9 @@ StgBool HEAP_ALLOCED_GC(const void *p) } else { // putting the rest out of line turned out to be a slight // performance improvement: - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + ACQUIRE_SM_LOCK; b = HEAP_ALLOCED_miss(mblock,p); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + RELEASE_SM_LOCK; return b; } } diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c index bdf22dc37b..6e9d4e5375 100644 --- a/rts/sm/NonMoving.c +++ b/rts/sm/NonMoving.c @@ -467,8 +467,7 @@ unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size) * Request a fresh segment from the free segment list or allocate one of the * given node. * - * Caller must hold SM_MUTEX (although we take the gc_alloc_block_sync spinlock - * under the assumption that we are in a GC context). + * Caller must hold SM_MUTEX. */ static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node) { @@ -480,12 +479,10 @@ static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node) if (ret == NULL) { // Take gc spinlock: another thread may be scavenging a moving // generation and call `todo_block_full` - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS); // See Note [Live data accounting in nonmoving collector]. oldest_gen->n_blocks += bd->blocks; oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks; - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); for (StgWord32 i = 0; i < bd->blocks; ++i) { initBdescr(&bd[i], oldest_gen, oldest_gen); diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c index 3113e7f013..475762beeb 100644 --- a/rts/sm/NonMovingMark.c +++ b/rts/sm/NonMovingMark.c @@ -442,6 +442,7 @@ push (MarkQueue *q, const MarkQueueEnt *ent) q->top->head++; } +// TODO: Eliminate this /* A variant of push to be used by the minor GC when it encounters a reference * to an object in the non-moving heap. In contrast to the other push * operations this uses the gc_alloc_block_sync spinlock instead of the @@ -459,13 +460,13 @@ markQueuePushClosureGC (MarkQueue *q, StgClosure *p) if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) { // Yes, this block is full. // allocate a fresh block. - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + ACQUIRE_SM_LOCK; bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS); bd->link = q->blocks; q->blocks = bd; q->top = (MarkQueueBlock *) bd->start; q->top->head = 0; - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + RELEASE_SM_LOCK; } MarkQueueEnt ent = { diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 49c367fa8d..1e8cfadbe0 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -174,12 +174,6 @@ initStorage (void) // Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen nonmovingInit(); -#if defined(THREADED_RTS) - // nonmovingAddCapabilities allocates segments, which requires taking the gc - // sync lock, so initialize it before nonmovingAddCapabilities - initSpinLock(&gc_alloc_block_sync); -#endif - if (RtsFlags.GcFlags.useNonmoving) nonmovingAddCapabilities(n_capabilities); -- cgit v1.2.1