summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2020-04-23 12:35:40 -0400
committerBen Gamari <ben@smart-cactus.org>2020-04-23 22:48:43 -0400
commit9989cc67549b003db63c90f11b29d89f896323cb (patch)
tree42cfbd330658d77f2291c4e3297a4255909b9dbd
parent747093b7c23a1cf92b564eb3d9efe2adc15330df (diff)
downloadhaskell-wip/no-gc-spinlock.tar.gz
rts: Eliminate GC spinlockwip/no-gc-spinlock
-rw-r--r--includes/rts/storage/MBlock.h5
-rw-r--r--rts/Stats.c9
-rw-r--r--rts/sm/GCUtils.c28
-rw-r--r--rts/sm/HeapAlloc.h4
-rw-r--r--rts/sm/NonMoving.c5
-rw-r--r--rts/sm/NonMovingMark.c5
-rw-r--r--rts/sm/Storage.c6
7 files changed, 12 insertions, 50 deletions
diff --git a/includes/rts/storage/MBlock.h b/includes/rts/storage/MBlock.h
index 3acefda9a0..38789e7863 100644
--- a/includes/rts/storage/MBlock.h
+++ b/includes/rts/storage/MBlock.h
@@ -25,8 +25,3 @@ extern void freeAllMBlocks(void);
extern void *getFirstMBlock(void **state);
extern void *getNextMBlock(void **state, void *mblock);
-
-#if defined(THREADED_RTS)
-// needed for HEAP_ALLOCED below
-extern SpinLock gc_alloc_block_sync;
-#endif
diff --git a/rts/Stats.c b/rts/Stats.c
index 2fbd8b2655..8d4bd41b31 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -939,11 +939,6 @@ static void report_summary(const RTSSummaryStats* sum)
, col_width[3], "Yields");
statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
, col_width[0], ""
- , col_width[1], "gc_alloc_block_sync"
- , col_width[2], gc_alloc_block_sync.spin
- , col_width[3], gc_alloc_block_sync.yield);
- statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
- , col_width[0], ""
, col_width[1], "gc_spin"
, col_width[2], stats.gc_spin_spin
, col_width[3], stats.gc_spin_yield);
@@ -1124,10 +1119,6 @@ static void report_machine_readable (const RTSSummaryStats * sum)
// next, internal counters
#if defined(PROF_SPIN)
- MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
- MR_STAT("gc_alloc_block_sync_yield", FMT_Word64,
- gc_alloc_block_sync.yield);
- MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
MR_STAT("gc_spin_spin", FMT_Word64, stats.gc_spin_spin);
MR_STAT("gc_spin_yield", FMT_Word64, stats.gc_spin_yield);
MR_STAT("mut_spin_spin", FMT_Word64, stats.mut_spin_spin);
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 02c26ddf5e..cc64ede466 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -26,27 +26,15 @@
#include "WSDeque.h"
#endif
-#if defined(THREADED_RTS)
-SpinLock gc_alloc_block_sync;
-#endif
-
bdescr* allocGroup_sync(uint32_t n)
{
- bdescr *bd;
uint32_t node = capNoToNumaNode(gct->thread_index);
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroupOnNode(node,n);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return bd;
+ return allocGroupOnNode_lock(node,n);
}
bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n)
{
- bdescr *bd;
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroupOnNode(node,n);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return bd;
+ return allocGroupOnNode_lock(node,n);
}
static uint32_t
@@ -55,7 +43,7 @@ allocBlocks_sync(uint32_t n, bdescr **hd)
bdescr *bd;
uint32_t i;
uint32_t node = capNoToNumaNode(gct->thread_index);
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
bd = allocLargeChunkOnNode(node,1,n);
// NB. allocLargeChunk, rather than allocGroup(n), to allocate in a
// fragmentation-friendly way.
@@ -68,7 +56,7 @@ allocBlocks_sync(uint32_t n, bdescr **hd)
bd[n-1].link = NULL;
// We have to hold the lock until we've finished fiddling with the metadata,
// otherwise the block allocator can get confused.
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
*hd = bd;
return n;
}
@@ -76,17 +64,13 @@ allocBlocks_sync(uint32_t n, bdescr **hd)
void
freeChain_sync(bdescr *bd)
{
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- freeChain(bd);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ freeChain_lock(bd);
}
void
freeGroup_sync(bdescr *bd)
{
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- freeGroup(bd);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ freeGroup_lock(bd);
}
/* -----------------------------------------------------------------------------
diff --git a/rts/sm/HeapAlloc.h b/rts/sm/HeapAlloc.h
index 58aae1119d..e05bd41f87 100644
--- a/rts/sm/HeapAlloc.h
+++ b/rts/sm/HeapAlloc.h
@@ -210,9 +210,9 @@ StgBool HEAP_ALLOCED_GC(const void *p)
} else {
// putting the rest out of line turned out to be a slight
// performance improvement:
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
b = HEAP_ALLOCED_miss(mblock,p);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
return b;
}
}
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index bdf22dc37b..6e9d4e5375 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -467,8 +467,7 @@ unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size)
* Request a fresh segment from the free segment list or allocate one of the
* given node.
*
- * Caller must hold SM_MUTEX (although we take the gc_alloc_block_sync spinlock
- * under the assumption that we are in a GC context).
+ * Caller must hold SM_MUTEX.
*/
static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
{
@@ -480,12 +479,10 @@ static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
if (ret == NULL) {
// Take gc spinlock: another thread may be scavenging a moving
// generation and call `todo_block_full`
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
// See Note [Live data accounting in nonmoving collector].
oldest_gen->n_blocks += bd->blocks;
oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks;
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
for (StgWord32 i = 0; i < bd->blocks; ++i) {
initBdescr(&bd[i], oldest_gen, oldest_gen);
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 3113e7f013..475762beeb 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -442,6 +442,7 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
q->top->head++;
}
+// TODO: Eliminate this
/* A variant of push to be used by the minor GC when it encounters a reference
* to an object in the non-moving heap. In contrast to the other push
* operations this uses the gc_alloc_block_sync spinlock instead of the
@@ -459,13 +460,13 @@ markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
// Yes, this block is full.
// allocate a fresh block.
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
bd->link = q->blocks;
q->blocks = bd;
q->top = (MarkQueueBlock *) bd->start;
q->top->head = 0;
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
}
MarkQueueEnt ent = {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 49c367fa8d..1e8cfadbe0 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -174,12 +174,6 @@ initStorage (void)
// Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
nonmovingInit();
-#if defined(THREADED_RTS)
- // nonmovingAddCapabilities allocates segments, which requires taking the gc
- // sync lock, so initialize it before nonmovingAddCapabilities
- initSpinLock(&gc_alloc_block_sync);
-#endif
-
if (RtsFlags.GcFlags.useNonmoving)
nonmovingAddCapabilities(n_capabilities);