summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-11-19 16:57:53 +0000
committerBen Gamari <ben@smart-cactus.org>2023-01-26 18:39:37 -0500
commit893f7a947ad5f6e7a8be1693a04849a537e4d0a8 (patch)
treee5eed78574caaf50bc938cda4803a6d0debd4c80
parent5640cb1d84d3cce4ce0a9e90d29b2b20d2b38c2f (diff)
downloadhaskell-wip/no-sm-spinlock.tar.gz
rts: Drop SM spinlockwip/no-sm-spinlock
-rw-r--r--rts/Capability.h4
-rw-r--r--rts/CheckUnload.c1
-rw-r--r--rts/Sparks.h1
-rw-r--r--rts/Stats.c12
-rw-r--r--rts/include/rts/storage/MBlock.h4
-rw-r--r--rts/sm/BlockAlloc.c6
-rw-r--r--rts/sm/GC.c30
-rw-r--r--rts/sm/GC.h2
-rw-r--r--rts/sm/GCUtils.c51
-rw-r--r--rts/sm/GCUtils.h18
-rw-r--r--rts/sm/HeapAlloc.h6
-rw-r--r--rts/sm/MarkStack.h2
-rw-r--r--rts/sm/NonMoving.c12
-rw-r--r--rts/sm/NonMovingMark.c13
-rw-r--r--rts/sm/NonMovingMark.h3
-rw-r--r--rts/sm/NonMovingShortcut.c1
-rw-r--r--rts/sm/Sanity.h2
-rw-r--r--rts/sm/Scav.c4
-rw-r--r--rts/sm/Storage.c22
19 files changed, 53 insertions, 141 deletions
diff --git a/rts/Capability.h b/rts/Capability.h
index 2910c3faf0..597b25b37a 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -20,10 +20,10 @@
#pragma once
-#include "sm/GC.h" // for evac_fn
#include "Task.h"
#include "Sparks.h"
-#include "sm/NonMovingMark.h" // for MarkQueue
+#include "sm/GC.h" // for evac_fn
+#include "sm/NonMovingMark.h" // for UpdRemSet
#include "IOManager.h" // for CapIOManager
#include "BeginPrivate.h"
diff --git a/rts/CheckUnload.c b/rts/CheckUnload.c
index 3761fd2bf5..7a745e2a33 100644
--- a/rts/CheckUnload.c
+++ b/rts/CheckUnload.c
@@ -15,6 +15,7 @@
#include "Hash.h"
#include "LinkerInternals.h"
#include "CheckUnload.h"
+#include "sm/HeapAlloc.h"
#include "sm/Storage.h"
#include "sm/GCThread.h"
#include "sm/HeapUtils.h"
diff --git a/rts/Sparks.h b/rts/Sparks.h
index 58817b80af..13f2d3f6af 100644
--- a/rts/Sparks.h
+++ b/rts/Sparks.h
@@ -8,6 +8,7 @@
#pragma once
+#include "sm/GC.h" // for evac_fn
#include "WSDeque.h"
#include "BeginPrivate.h"
diff --git a/rts/Stats.c b/rts/Stats.c
index a2701bd0c8..abfd522338 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -963,11 +963,6 @@ static void report_summary(const RTSSummaryStats* sum)
, col_width[1], "SpinLock"
, col_width[2], "Spins"
, col_width[3], "Yields");
- statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
- , col_width[0], ""
- , col_width[1], "gc_alloc_block_sync"
- , col_width[2], gc_alloc_block_sync.spin
- , col_width[3], gc_alloc_block_sync.yield);
statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
, col_width[0], ""
, col_width[1], "whitehole_gc"
@@ -1142,10 +1137,6 @@ static void report_machine_readable (const RTSSummaryStats * sum)
// next, internal counters
#if defined(PROF_SPIN)
- MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
- MR_STAT("gc_alloc_block_sync_yield", FMT_Word64,
- gc_alloc_block_sync.yield);
- MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin);
MR_STAT("waitForGcThreads_spin", FMT_Word64, waitForGcThreads_spin);
MR_STAT("waitForGcThreads_yield", FMT_Word64,
waitForGcThreads_yield);
@@ -1572,9 +1563,6 @@ SpinLock:
Not all of these are actual SpinLocks, see the details below.
Actual SpinLocks:
-* gc_alloc_block:
- This SpinLock protects the block allocator and free list manager. See
- BlockAlloc.c.
* gen[g].sync:
These SpinLocks, one per generation, protect the generations[g] data
structure during garbage collection.
diff --git a/rts/include/rts/storage/MBlock.h b/rts/include/rts/storage/MBlock.h
index 3acefda9a0..29f2e9a2ac 100644
--- a/rts/include/rts/storage/MBlock.h
+++ b/rts/include/rts/storage/MBlock.h
@@ -26,7 +26,3 @@ extern void freeAllMBlocks(void);
extern void *getFirstMBlock(void **state);
extern void *getNextMBlock(void **state, void *mblock);
-#if defined(THREADED_RTS)
-// needed for HEAP_ALLOCED below
-extern SpinLock gc_alloc_block_sync;
-#endif
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 257dc253ed..acae755525 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -428,6 +428,7 @@ alloc_mega_group (uint32_t node, StgWord mblocks)
bdescr *best, *bd;
StgWord n;
+ ASSERT_SM_LOCK();
n = MBLOCK_GROUP_BLOCKS(mblocks);
if(defer_mblock_frees)
@@ -486,6 +487,7 @@ allocGroupOnNode (uint32_t node, W_ n)
bdescr *bd, *rem;
StgWord ln;
+ ASSERT_SM_LOCK();
if (n == 0) barf("allocGroup: requested zero blocks");
if (n >= BLOCKS_PER_MBLOCK)
@@ -699,6 +701,7 @@ bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
{
bdescr *bd;
StgWord ln, lnmax;
+ ASSERT_SM_LOCK();
if (min >= BLOCKS_PER_MBLOCK) {
return allocGroupOnNode(node,max);
@@ -923,8 +926,7 @@ freeGroup(bdescr *p)
StgWord ln;
uint32_t node;
- // not true in multithreaded GC:
- // ASSERT_SM_LOCK();
+ ASSERT_SM_LOCK();
ASSERT(RELAXED_LOAD(&p->free) != (P_)-1);
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 2438ad2816..7ff5e55227 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -313,8 +313,6 @@ GarbageCollect (uint32_t collect_gen,
CostCentreStack *save_CCS[getNumCapabilities()];
#endif
- ACQUIRE_SM_LOCK;
-
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
// block signals
@@ -594,9 +592,7 @@ GarbageCollect (uint32_t collect_gen,
// the current garbage collection, so we invoke LdvCensusForDead().
if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
|| RtsFlags.ProfFlags.bioSelector != NULL) {
- RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
LdvCensusForDead(N);
- ACQUIRE_SM_LOCK;
}
#endif
@@ -765,7 +761,7 @@ GarbageCollect (uint32_t collect_gen,
}
else // not compacted
{
- freeChain(gen->old_blocks);
+ freeChain_lock(gen->old_blocks);
}
gen->old_blocks = NULL;
@@ -776,7 +772,7 @@ GarbageCollect (uint32_t collect_gen,
* collection from large_objects. Any objects left on the
* large_objects list are therefore dead, so we free them here.
*/
- freeChain(gen->large_objects);
+ freeChain_lock(gen->large_objects);
gen->large_objects = gen->scavenged_large_objects;
gen->n_large_blocks = gen->n_scavenged_large_blocks;
gen->n_large_words = countOccupied(gen->large_objects);
@@ -895,7 +891,7 @@ GarbageCollect (uint32_t collect_gen,
if (mark_stack_top_bd != NULL) {
debugTrace(DEBUG_gc, "mark stack: %d blocks",
countBlocks(mark_stack_top_bd));
- freeChain(mark_stack_top_bd);
+ freeChain_lock(mark_stack_top_bd);
}
// Free any bitmaps.
@@ -947,9 +943,7 @@ GarbageCollect (uint32_t collect_gen,
// Start any pending finalizers. Must be after
// updateStableTables() and stableUnlock() (see #4221).
- RELEASE_SM_LOCK;
scheduleFinalizers(cap, dead_weak_ptr_list);
- ACQUIRE_SM_LOCK;
// check sanity after GC
// before resurrectThreads(), because that might overwrite some
@@ -964,9 +958,7 @@ GarbageCollect (uint32_t collect_gen,
// behind.
if (do_heap_census) {
debugTrace(DEBUG_sched, "performing heap census");
- RELEASE_SM_LOCK;
heapCensus(mut_time);
- ACQUIRE_SM_LOCK;
}
#if defined(TICKY_TICKY)
@@ -980,14 +972,14 @@ GarbageCollect (uint32_t collect_gen,
#endif
// send exceptions to any threads which were about to die
- RELEASE_SM_LOCK;
resurrectThreads(resurrected_threads);
- ACQUIRE_SM_LOCK;
// Finally free the deferred mblocks by sorting the deferred free list and
// merging it into the actual sorted free list. This needs to happen here so
// that the `returnMemoryToOS` call down below can successfully free memory.
+ ACQUIRE_SM_LOCK;
commitMBlockFreeing();
+ RELEASE_SM_LOCK;
if (major_gc) {
W_ need_prealloc, need_live, need, got;
@@ -1100,8 +1092,6 @@ GarbageCollect (uint32_t collect_gen,
}
#endif
- RELEASE_SM_LOCK;
-
SET_GCT(saved_gct);
}
@@ -1151,7 +1141,7 @@ new_gc_thread (uint32_t n, gc_thread *t)
// but can't, because it uses gct which isn't set up at this point.
// Hence, allocate a block for todo_bd manually:
{
- bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
+ bdescr *bd = allocBlockOnNode_lock(capNoToNumaNode(n));
// no lock, locks aren't initialised yet
initBdescr(bd, ws->gen, ws->gen->to);
bd->flags = BF_EVACUATED;
@@ -1606,7 +1596,7 @@ static void
stash_mut_list (Capability *cap, uint32_t gen_no)
{
cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
- RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node));
+ RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_lock(cap->node));
}
/* ----------------------------------------------------------------------------
@@ -1633,9 +1623,9 @@ prepare_collected_gen (generation *gen)
// a check for NULL in recordMutable().
for (i = 0; i < getNumCapabilities(); i++) {
bdescr *old = RELAXED_LOAD(&getCapability(i)->mut_lists[g]);
- freeChain(old);
+ freeChain_lock(old);
- bdescr *new = allocBlockOnNode(capNoToNumaNode(i));
+ bdescr *new = allocBlockOnNode_lock(capNoToNumaNode(i));
RELAXED_STORE(&getCapability(i)->mut_lists[g], new);
}
}
@@ -1718,7 +1708,7 @@ prepare_collected_gen (generation *gen)
bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
if (bitmap_size > 0) {
- bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
+ bitmap_bdescr = allocGroup_lock((StgWord)BLOCK_ROUND_UP(bitmap_size)
/ BLOCK_SIZE);
gen->bitmap = bitmap_bdescr;
bitmap = bitmap_bdescr->start;
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 25de588534..f6b091fc02 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -13,8 +13,6 @@
#pragma once
-#include "HeapAlloc.h"
-
#include "BeginPrivate.h"
void GarbageCollect (uint32_t collect_gen,
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 9d57bf7d9e..a33c60b2a5 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -26,38 +26,15 @@
#include "WSDeque.h"
#endif
-#if defined(THREADED_RTS)
-SpinLock gc_alloc_block_sync;
-#endif
-
static void push_todo_block(bdescr *bd, gen_workspace *ws);
-bdescr* allocGroup_sync(uint32_t n)
-{
- bdescr *bd;
- uint32_t node = capNoToNumaNode(gct->thread_index);
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroupOnNode(node,n);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return bd;
-}
-
-bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n)
-{
- bdescr *bd;
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroupOnNode(node,n);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return bd;
-}
-
static uint32_t
-allocBlocks_sync(uint32_t n, bdescr **hd)
+allocBlocks_lock(uint32_t n, bdescr **hd)
{
bdescr *bd;
uint32_t i;
uint32_t node = capNoToNumaNode(gct->thread_index);
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
bd = allocLargeChunkOnNode(node,1,n);
// NB. allocLargeChunk, rather than allocGroup(n), to allocate in a
// fragmentation-friendly way.
@@ -70,27 +47,11 @@ allocBlocks_sync(uint32_t n, bdescr **hd)
bd[n-1].link = NULL;
// We have to hold the lock until we've finished fiddling with the metadata,
// otherwise the block allocator can get confused.
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
*hd = bd;
return n;
}
-void
-freeChain_sync(bdescr *bd)
-{
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- freeChain(bd);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
-}
-
-void
-freeGroup_sync(bdescr *bd)
-{
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- freeGroup(bd);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
-}
-
/* -----------------------------------------------------------------------------
Workspace utilities
-------------------------------------------------------------------------- */
@@ -303,7 +264,7 @@ todo_block_full (uint32_t size, gen_workspace *ws)
// object. However, if the object we're copying is
// larger than a block, then we might have an empty
// block here.
- freeGroup_sync(bd);
+ freeGroup_lock(bd);
} else {
push_scanned_block(bd, ws);
}
@@ -343,14 +304,14 @@ alloc_todo_block (gen_workspace *ws, uint32_t size)
else
{
if (size > BLOCK_SIZE_W) {
- bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_))
+ bd = allocGroup_lock((W_)BLOCK_ROUND_UP(size*sizeof(W_))
/ BLOCK_SIZE);
} else {
if (gct->free_blocks) {
bd = gct->free_blocks;
gct->free_blocks = bd->link;
} else {
- allocBlocks_sync(16, &bd);
+ allocBlocks_lock(16, &bd);
gct->free_blocks = bd->link;
}
}
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index dec81e1755..c1ed0878eb 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -17,22 +17,6 @@
#include "BeginPrivate.h"
-bdescr* allocGroup_sync(uint32_t n);
-bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n);
-
-INLINE_HEADER bdescr *allocBlock_sync(void)
-{
- return allocGroup_sync(1);
-}
-
-INLINE_HEADER bdescr *allocBlockOnNode_sync(uint32_t node)
-{
- return allocGroupOnNode_sync(node,1);
-}
-
-void freeChain_sync(bdescr *bd);
-void freeGroup_sync(bdescr *bd);
-
void push_scanned_block (bdescr *bd, gen_workspace *ws);
StgPtr todo_block_full (uint32_t size, gen_workspace *ws);
StgPtr alloc_todo_block (gen_workspace *ws, uint32_t size);
@@ -62,7 +46,7 @@ recordMutableGen_GC (StgClosure *p, uint32_t gen_no)
bd = gct->mut_lists[gen_no];
if (bd->free >= bd->start + BLOCK_SIZE_W) {
bdescr *new_bd;
- new_bd = allocBlock_sync();
+ new_bd = allocBlock_lock();
new_bd->link = bd;
bd = new_bd;
gct->mut_lists[gen_no] = bd;
diff --git a/rts/sm/HeapAlloc.h b/rts/sm/HeapAlloc.h
index 58aae1119d..a24c1def2f 100644
--- a/rts/sm/HeapAlloc.h
+++ b/rts/sm/HeapAlloc.h
@@ -10,6 +10,8 @@
#include "BeginPrivate.h"
+#include "Storage.h"
+
/* -----------------------------------------------------------------------------
The HEAP_ALLOCED() test.
@@ -210,9 +212,9 @@ StgBool HEAP_ALLOCED_GC(const void *p)
} else {
// putting the rest out of line turned out to be a slight
// performance improvement:
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK; // TODO: this may be too expensive
b = HEAP_ALLOCED_miss(mblock,p);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
return b;
}
}
diff --git a/rts/sm/MarkStack.h b/rts/sm/MarkStack.h
index 8ea47a1865..121086352d 100644
--- a/rts/sm/MarkStack.h
+++ b/rts/sm/MarkStack.h
@@ -32,7 +32,7 @@ push_mark_stack(StgPtr p)
}
else
{
- bd = allocBlock_sync();
+ bd = allocBlock_lock();
bd->link = mark_stack_bd;
bd->u.back = NULL;
mark_stack_bd->u.back = bd; // double-link the new block on
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index 3731aebb95..fcdc94d2dd 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -589,14 +589,10 @@ static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
// Nothing in the free list, allocate a new segment...
if (ret == NULL) {
- // Take gc spinlock: another thread may be scavenging a moving
- // generation and call `todo_block_full`
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS);
// See Note [Live data accounting in nonmoving collector].
oldest_gen->n_blocks += bd->blocks;
oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks;
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
for (StgWord32 i = 0; i < bd->blocks; ++i) {
initBdescr(&bd[i], oldest_gen, oldest_gen);
@@ -659,7 +655,7 @@ static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *al
}
}
-/* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */
+/* Allocate a block in the nonmoving heap. sz is in words */
GNUC_ATTR_HOT
void *nonmovingAllocate(Capability *cap, StgWord sz)
{
@@ -699,7 +695,9 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
// there are no active segments, allocate new segment
if (new_current == NULL) {
+ ACQUIRE_SM_LOCK;
new_current = nonmovingAllocSegment(cap->node);
+ RELEASE_SM_LOCK;
nonmovingInitSegment(new_current, log_block_size);
}
@@ -782,14 +780,13 @@ void nonmovingExit(void)
/*
* Assumes that no garbage collector or mutator threads are running to safely
* resize the nonmoving_allocators.
- *
- * Must hold sm_mutex.
*/
void nonmovingAddCapabilities(uint32_t new_n_caps)
{
unsigned int old_n_caps = nonmovingHeap.n_caps;
struct NonmovingAllocator **allocs = nonmovingHeap.allocators;
+ ACQUIRE_SM_LOCK;
for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) {
struct NonmovingAllocator *old = allocs[i];
allocs[i] = alloc_nonmoving_allocator(new_n_caps);
@@ -811,6 +808,7 @@ void nonmovingAddCapabilities(uint32_t new_n_caps)
}
}
nonmovingHeap.n_caps = new_n_caps;
+ RELEASE_SM_LOCK;
}
void nonmovingClearBitmap(struct NonmovingSegment *seg)
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index a6ce3678bc..bb1c5e506b 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -291,10 +291,8 @@ static void nonmovingAddUpdRemSetBlocks_lock(MarkQueue *rset)
nonmovingAddUpdRemSetBlocks_(rset);
// Reset the state of the remembered set.
- ACQUIRE_SM_LOCK;
init_mark_queue_(rset);
rset->is_upd_rem_set = true;
- RELEASE_SM_LOCK;
}
/*
@@ -468,9 +466,7 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
}
/* A variant of push to be used by the minor GC when it encounters a reference
- * to an object in the non-moving heap. In contrast to the other push
- * operations this uses the gc_alloc_block_sync spinlock instead of the
- * SM_LOCK to allocate new blocks in the event that the mark queue is full.
+ * to an object in the non-moving heap.
*/
void
markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
@@ -491,13 +487,13 @@ markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
// Yes, this block is full.
// allocate a fresh block.
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ ACQUIRE_SM_LOCK;
bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
bd->link = q->blocks;
q->blocks = bd;
q->top = (MarkQueueBlock *) bd->start;
q->top->head = 0;
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ RELEASE_SM_LOCK;
}
MarkQueueEnt ent = {
@@ -917,6 +913,7 @@ static MarkQueueEnt markQueuePop (MarkQueue *q)
static void init_mark_queue_ (MarkQueue *queue)
{
bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
+ ASSERT(queue->blocks == NULL);
queue->blocks = bd;
queue->top = (MarkQueueBlock *) bd->start;
queue->top->head = 0;
@@ -926,14 +923,12 @@ static void init_mark_queue_ (MarkQueue *queue)
#endif
}
-/* Must hold sm_mutex. */
void initMarkQueue (MarkQueue *queue)
{
init_mark_queue_(queue);
queue->is_upd_rem_set = false;
}
-/* Must hold sm_mutex. */
void nonmovingInitUpdRemSet (UpdRemSet *rset)
{
init_mark_queue_(&rset->queue);
diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h
index 4b5c61cb38..de8e962ff8 100644
--- a/rts/sm/NonMovingMark.h
+++ b/rts/sm/NonMovingMark.h
@@ -9,10 +9,11 @@
#pragma once
#include "Task.h"
-#include "NonMoving.h"
#include "BeginPrivate.h"
+struct NonMovingHeap;
+
enum EntryType {
NULL_ENTRY = 0,
MARK_CLOSURE = 1,
diff --git a/rts/sm/NonMovingShortcut.c b/rts/sm/NonMovingShortcut.c
index ee97ba1b70..f7b6849e77 100644
--- a/rts/sm/NonMovingShortcut.c
+++ b/rts/sm/NonMovingShortcut.c
@@ -10,6 +10,7 @@
#include "Rts.h"
#include "GC.h"
#include "SMPClosureOps.h"
+#include "NonMoving.h"
#include "NonMovingMark.h"
#include "NonMovingShortcut.h"
#include "Printer.h"
diff --git a/rts/sm/Sanity.h b/rts/sm/Sanity.h
index b6f2054383..7dc761cfd3 100644
--- a/rts/sm/Sanity.h
+++ b/rts/sm/Sanity.h
@@ -19,6 +19,8 @@
# define MAX_SLOTS 100000
# endif
+struct NonmovingHeap;
+
/* debugging routines */
void checkSanity ( bool after_gc, bool major_gc );
void checkNurserySanity ( nursery *nursery );
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index be30e75b8f..cc5b1e669b 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -1730,7 +1730,7 @@ scavenge_capability_mut_lists (Capability *cap)
if (RtsFlags.GcFlags.useNonmoving && major_gc) {
uint32_t g = oldest_gen->no;
scavenge_mutable_list(cap->saved_mut_lists[g], oldest_gen);
- freeChain_sync(cap->saved_mut_lists[g]);
+ freeChain_lock(cap->saved_mut_lists[g]);
cap->saved_mut_lists[g] = NULL;
return;
}
@@ -1743,7 +1743,7 @@ scavenge_capability_mut_lists (Capability *cap)
*/
for (uint32_t g = RtsFlags.GcFlags.generations-1; g > N; g--) {
scavenge_mutable_list(cap->saved_mut_lists[g], &generations[g]);
- freeChain_sync(cap->saved_mut_lists[g]);
+ freeChain_lock(cap->saved_mut_lists[g]);
cap->saved_mut_lists[g] = NULL;
}
}
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 40d8a45806..cc28944efe 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -193,14 +193,13 @@ initStorage (void)
initMutex(&sm_mutex);
#endif
- ACQUIRE_SM_LOCK;
-
/* allocate generation info array */
generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
* sizeof(struct generation_),
"initStorage: gens");
/* Initialise all generations */
+ ACQUIRE_SM_LOCK;
for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
initGeneration(&generations[g], g);
}
@@ -214,16 +213,11 @@ initStorage (void)
generations[g].to = &generations[g+1];
}
oldest_gen->to = oldest_gen;
+ RELEASE_SM_LOCK;
// Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
nonmovingInit();
-#if defined(THREADED_RTS)
- // nonmovingAddCapabilities allocates segments, which requires taking the gc
- // sync lock, so initialize it before nonmovingAddCapabilities
- initSpinLock(&gc_alloc_block_sync);
-#endif
-
if (RtsFlags.GcFlags.useNonmoving)
nonmovingAddCapabilities(getNumCapabilities());
@@ -261,8 +255,6 @@ initStorage (void)
IF_DEBUG(gc, statDescribeGens());
- RELEASE_SM_LOCK;
-
traceInitEvent(traceHeapInfo);
}
@@ -314,12 +306,14 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
assignNurseriesToCapabilities(from,to);
// allocate a block for each mut list
+ ACQUIRE_SM_LOCK;
for (n = from; n < to; n++) {
for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
getCapability(n)->mut_lists[g] =
allocBlockOnNode(capNoToNumaNode(n));
}
}
+ RELEASE_SM_LOCK;
// Initialize NonmovingAllocators and UpdRemSets
if (RtsFlags.GcFlags.useNonmoving) {
@@ -564,9 +558,7 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
// Allocate the blackhole indirection closure
if (RtsFlags.GcFlags.useNonmoving) {
// See Note [Static objects under the nonmoving collector].
- ACQUIRE_SM_LOCK;
bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh));
- RELEASE_SM_LOCK;
recordMutableCap((StgClosure*)bh,
regTableToCapability(reg), oldest_gen->no);
} else {
@@ -724,6 +716,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks)
// automatic prefetching works across nursery blocks. This is a
// tiny optimisation (~0.5%), but it's free.
+ ACQUIRE_SM_LOCK;
while (blocks > 0) {
n = stg_min(BLOCKS_PER_MBLOCK, blocks);
// allocLargeChunk will prefer large chunks, but will pick up
@@ -759,6 +752,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks)
tail = &bd[0];
}
+ RELEASE_SM_LOCK;
return &bd[0];
}
@@ -878,7 +872,7 @@ resizeNurseriesEach (W_ blocks)
next_bd = bd->link;
next_bd->u.back = NULL;
nursery_blocks -= bd->blocks; // might be a large block
- freeGroup(bd);
+ freeGroup_lock(bd);
bd = next_bd;
}
nursery->blocks = bd;
@@ -1299,9 +1293,7 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig
if (bd == NULL) {
// The pinned block list is empty: allocate a fresh block (we can't fail
// here).
- ACQUIRE_SM_LOCK;
bd = allocNursery(cap->node, NULL, PINNED_EMPTY_SIZE);
- RELEASE_SM_LOCK;
}
// Bump up the nursery pointer to avoid the pathological situation