From 893f7a947ad5f6e7a8be1693a04849a537e4d0a8 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Sat, 19 Nov 2022 16:57:53 +0000 Subject: rts: Drop SM spinlock --- rts/Capability.h | 4 ++-- rts/CheckUnload.c | 1 + rts/Sparks.h | 1 + rts/Stats.c | 12 ---------- rts/include/rts/storage/MBlock.h | 4 ---- rts/sm/BlockAlloc.c | 6 +++-- rts/sm/GC.c | 30 ++++++++--------------- rts/sm/GC.h | 2 -- rts/sm/GCUtils.c | 51 +++++----------------------------------- rts/sm/GCUtils.h | 18 +------------- rts/sm/HeapAlloc.h | 6 +++-- rts/sm/MarkStack.h | 2 +- rts/sm/NonMoving.c | 12 ++++------ rts/sm/NonMovingMark.c | 13 ++++------ rts/sm/NonMovingMark.h | 3 ++- rts/sm/NonMovingShortcut.c | 1 + rts/sm/Sanity.h | 2 ++ rts/sm/Scav.c | 4 ++-- rts/sm/Storage.c | 22 ++++++----------- 19 files changed, 53 insertions(+), 141 deletions(-) diff --git a/rts/Capability.h b/rts/Capability.h index 2910c3faf0..597b25b37a 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -20,10 +20,10 @@ #pragma once -#include "sm/GC.h" // for evac_fn #include "Task.h" #include "Sparks.h" -#include "sm/NonMovingMark.h" // for MarkQueue +#include "sm/GC.h" // for evac_fn +#include "sm/NonMovingMark.h" // for UpdRemSet #include "IOManager.h" // for CapIOManager #include "BeginPrivate.h" diff --git a/rts/CheckUnload.c b/rts/CheckUnload.c index 3761fd2bf5..7a745e2a33 100644 --- a/rts/CheckUnload.c +++ b/rts/CheckUnload.c @@ -15,6 +15,7 @@ #include "Hash.h" #include "LinkerInternals.h" #include "CheckUnload.h" +#include "sm/HeapAlloc.h" #include "sm/Storage.h" #include "sm/GCThread.h" #include "sm/HeapUtils.h" diff --git a/rts/Sparks.h b/rts/Sparks.h index 58817b80af..13f2d3f6af 100644 --- a/rts/Sparks.h +++ b/rts/Sparks.h @@ -8,6 +8,7 @@ #pragma once +#include "sm/GC.h" // for evac_fn #include "WSDeque.h" #include "BeginPrivate.h" diff --git a/rts/Stats.c b/rts/Stats.c index a2701bd0c8..abfd522338 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -963,11 +963,6 @@ static void report_summary(const RTSSummaryStats* sum) , col_width[1], "SpinLock" , col_width[2], "Spins" , col_width[3], "Yields"); - statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" - , col_width[0], "" - , col_width[1], "gc_alloc_block_sync" - , col_width[2], gc_alloc_block_sync.spin - , col_width[3], gc_alloc_block_sync.yield); statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n" , col_width[0], "" , col_width[1], "whitehole_gc" @@ -1142,10 +1137,6 @@ static void report_machine_readable (const RTSSummaryStats * sum) // next, internal counters #if defined(PROF_SPIN) - MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin); - MR_STAT("gc_alloc_block_sync_yield", FMT_Word64, - gc_alloc_block_sync.yield); - MR_STAT("gc_alloc_block_sync_spin", FMT_Word64, gc_alloc_block_sync.spin); MR_STAT("waitForGcThreads_spin", FMT_Word64, waitForGcThreads_spin); MR_STAT("waitForGcThreads_yield", FMT_Word64, waitForGcThreads_yield); @@ -1572,9 +1563,6 @@ SpinLock: Not all of these are actual SpinLocks, see the details below. Actual SpinLocks: -* gc_alloc_block: - This SpinLock protects the block allocator and free list manager. See - BlockAlloc.c. * gen[g].sync: These SpinLocks, one per generation, protect the generations[g] data structure during garbage collection. diff --git a/rts/include/rts/storage/MBlock.h b/rts/include/rts/storage/MBlock.h index 3acefda9a0..29f2e9a2ac 100644 --- a/rts/include/rts/storage/MBlock.h +++ b/rts/include/rts/storage/MBlock.h @@ -26,7 +26,3 @@ extern void freeAllMBlocks(void); extern void *getFirstMBlock(void **state); extern void *getNextMBlock(void **state, void *mblock); -#if defined(THREADED_RTS) -// needed for HEAP_ALLOCED below -extern SpinLock gc_alloc_block_sync; -#endif diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index 257dc253ed..acae755525 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -428,6 +428,7 @@ alloc_mega_group (uint32_t node, StgWord mblocks) bdescr *best, *bd; StgWord n; + ASSERT_SM_LOCK(); n = MBLOCK_GROUP_BLOCKS(mblocks); if(defer_mblock_frees) @@ -486,6 +487,7 @@ allocGroupOnNode (uint32_t node, W_ n) bdescr *bd, *rem; StgWord ln; + ASSERT_SM_LOCK(); if (n == 0) barf("allocGroup: requested zero blocks"); if (n >= BLOCKS_PER_MBLOCK) @@ -699,6 +701,7 @@ bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max) { bdescr *bd; StgWord ln, lnmax; + ASSERT_SM_LOCK(); if (min >= BLOCKS_PER_MBLOCK) { return allocGroupOnNode(node,max); @@ -923,8 +926,7 @@ freeGroup(bdescr *p) StgWord ln; uint32_t node; - // not true in multithreaded GC: - // ASSERT_SM_LOCK(); + ASSERT_SM_LOCK(); ASSERT(RELAXED_LOAD(&p->free) != (P_)-1); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 2438ad2816..7ff5e55227 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -313,8 +313,6 @@ GarbageCollect (uint32_t collect_gen, CostCentreStack *save_CCS[getNumCapabilities()]; #endif - ACQUIRE_SM_LOCK; - #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { // block signals @@ -594,9 +592,7 @@ GarbageCollect (uint32_t collect_gen, // the current garbage collection, so we invoke LdvCensusForDead(). if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV || RtsFlags.ProfFlags.bioSelector != NULL) { - RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock LdvCensusForDead(N); - ACQUIRE_SM_LOCK; } #endif @@ -765,7 +761,7 @@ GarbageCollect (uint32_t collect_gen, } else // not compacted { - freeChain(gen->old_blocks); + freeChain_lock(gen->old_blocks); } gen->old_blocks = NULL; @@ -776,7 +772,7 @@ GarbageCollect (uint32_t collect_gen, * collection from large_objects. Any objects left on the * large_objects list are therefore dead, so we free them here. */ - freeChain(gen->large_objects); + freeChain_lock(gen->large_objects); gen->large_objects = gen->scavenged_large_objects; gen->n_large_blocks = gen->n_scavenged_large_blocks; gen->n_large_words = countOccupied(gen->large_objects); @@ -895,7 +891,7 @@ GarbageCollect (uint32_t collect_gen, if (mark_stack_top_bd != NULL) { debugTrace(DEBUG_gc, "mark stack: %d blocks", countBlocks(mark_stack_top_bd)); - freeChain(mark_stack_top_bd); + freeChain_lock(mark_stack_top_bd); } // Free any bitmaps. @@ -947,9 +943,7 @@ GarbageCollect (uint32_t collect_gen, // Start any pending finalizers. Must be after // updateStableTables() and stableUnlock() (see #4221). - RELEASE_SM_LOCK; scheduleFinalizers(cap, dead_weak_ptr_list); - ACQUIRE_SM_LOCK; // check sanity after GC // before resurrectThreads(), because that might overwrite some @@ -964,9 +958,7 @@ GarbageCollect (uint32_t collect_gen, // behind. if (do_heap_census) { debugTrace(DEBUG_sched, "performing heap census"); - RELEASE_SM_LOCK; heapCensus(mut_time); - ACQUIRE_SM_LOCK; } #if defined(TICKY_TICKY) @@ -980,14 +972,14 @@ GarbageCollect (uint32_t collect_gen, #endif // send exceptions to any threads which were about to die - RELEASE_SM_LOCK; resurrectThreads(resurrected_threads); - ACQUIRE_SM_LOCK; // Finally free the deferred mblocks by sorting the deferred free list and // merging it into the actual sorted free list. This needs to happen here so // that the `returnMemoryToOS` call down below can successfully free memory. + ACQUIRE_SM_LOCK; commitMBlockFreeing(); + RELEASE_SM_LOCK; if (major_gc) { W_ need_prealloc, need_live, need, got; @@ -1100,8 +1092,6 @@ GarbageCollect (uint32_t collect_gen, } #endif - RELEASE_SM_LOCK; - SET_GCT(saved_gct); } @@ -1151,7 +1141,7 @@ new_gc_thread (uint32_t n, gc_thread *t) // but can't, because it uses gct which isn't set up at this point. // Hence, allocate a block for todo_bd manually: { - bdescr *bd = allocBlockOnNode(capNoToNumaNode(n)); + bdescr *bd = allocBlockOnNode_lock(capNoToNumaNode(n)); // no lock, locks aren't initialised yet initBdescr(bd, ws->gen, ws->gen->to); bd->flags = BF_EVACUATED; @@ -1606,7 +1596,7 @@ static void stash_mut_list (Capability *cap, uint32_t gen_no) { cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no]; - RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node)); + RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_lock(cap->node)); } /* ---------------------------------------------------------------------------- @@ -1633,9 +1623,9 @@ prepare_collected_gen (generation *gen) // a check for NULL in recordMutable(). for (i = 0; i < getNumCapabilities(); i++) { bdescr *old = RELAXED_LOAD(&getCapability(i)->mut_lists[g]); - freeChain(old); + freeChain_lock(old); - bdescr *new = allocBlockOnNode(capNoToNumaNode(i)); + bdescr *new = allocBlockOnNode_lock(capNoToNumaNode(i)); RELAXED_STORE(&getCapability(i)->mut_lists[g], new); } } @@ -1718,7 +1708,7 @@ prepare_collected_gen (generation *gen) bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_); if (bitmap_size > 0) { - bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size) + bitmap_bdescr = allocGroup_lock((StgWord)BLOCK_ROUND_UP(bitmap_size) / BLOCK_SIZE); gen->bitmap = bitmap_bdescr; bitmap = bitmap_bdescr->start; diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 25de588534..f6b091fc02 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -13,8 +13,6 @@ #pragma once -#include "HeapAlloc.h" - #include "BeginPrivate.h" void GarbageCollect (uint32_t collect_gen, diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 9d57bf7d9e..a33c60b2a5 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -26,38 +26,15 @@ #include "WSDeque.h" #endif -#if defined(THREADED_RTS) -SpinLock gc_alloc_block_sync; -#endif - static void push_todo_block(bdescr *bd, gen_workspace *ws); -bdescr* allocGroup_sync(uint32_t n) -{ - bdescr *bd; - uint32_t node = capNoToNumaNode(gct->thread_index); - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - bd = allocGroupOnNode(node,n); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); - return bd; -} - -bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n) -{ - bdescr *bd; - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - bd = allocGroupOnNode(node,n); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); - return bd; -} - static uint32_t -allocBlocks_sync(uint32_t n, bdescr **hd) +allocBlocks_lock(uint32_t n, bdescr **hd) { bdescr *bd; uint32_t i; uint32_t node = capNoToNumaNode(gct->thread_index); - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + ACQUIRE_SM_LOCK; bd = allocLargeChunkOnNode(node,1,n); // NB. allocLargeChunk, rather than allocGroup(n), to allocate in a // fragmentation-friendly way. @@ -70,27 +47,11 @@ allocBlocks_sync(uint32_t n, bdescr **hd) bd[n-1].link = NULL; // We have to hold the lock until we've finished fiddling with the metadata, // otherwise the block allocator can get confused. - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + RELEASE_SM_LOCK; *hd = bd; return n; } -void -freeChain_sync(bdescr *bd) -{ - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - freeChain(bd); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); -} - -void -freeGroup_sync(bdescr *bd) -{ - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - freeGroup(bd); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); -} - /* ----------------------------------------------------------------------------- Workspace utilities -------------------------------------------------------------------------- */ @@ -303,7 +264,7 @@ todo_block_full (uint32_t size, gen_workspace *ws) // object. However, if the object we're copying is // larger than a block, then we might have an empty // block here. - freeGroup_sync(bd); + freeGroup_lock(bd); } else { push_scanned_block(bd, ws); } @@ -343,14 +304,14 @@ alloc_todo_block (gen_workspace *ws, uint32_t size) else { if (size > BLOCK_SIZE_W) { - bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_)) + bd = allocGroup_lock((W_)BLOCK_ROUND_UP(size*sizeof(W_)) / BLOCK_SIZE); } else { if (gct->free_blocks) { bd = gct->free_blocks; gct->free_blocks = bd->link; } else { - allocBlocks_sync(16, &bd); + allocBlocks_lock(16, &bd); gct->free_blocks = bd->link; } } diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h index dec81e1755..c1ed0878eb 100644 --- a/rts/sm/GCUtils.h +++ b/rts/sm/GCUtils.h @@ -17,22 +17,6 @@ #include "BeginPrivate.h" -bdescr* allocGroup_sync(uint32_t n); -bdescr* allocGroupOnNode_sync(uint32_t node, uint32_t n); - -INLINE_HEADER bdescr *allocBlock_sync(void) -{ - return allocGroup_sync(1); -} - -INLINE_HEADER bdescr *allocBlockOnNode_sync(uint32_t node) -{ - return allocGroupOnNode_sync(node,1); -} - -void freeChain_sync(bdescr *bd); -void freeGroup_sync(bdescr *bd); - void push_scanned_block (bdescr *bd, gen_workspace *ws); StgPtr todo_block_full (uint32_t size, gen_workspace *ws); StgPtr alloc_todo_block (gen_workspace *ws, uint32_t size); @@ -62,7 +46,7 @@ recordMutableGen_GC (StgClosure *p, uint32_t gen_no) bd = gct->mut_lists[gen_no]; if (bd->free >= bd->start + BLOCK_SIZE_W) { bdescr *new_bd; - new_bd = allocBlock_sync(); + new_bd = allocBlock_lock(); new_bd->link = bd; bd = new_bd; gct->mut_lists[gen_no] = bd; diff --git a/rts/sm/HeapAlloc.h b/rts/sm/HeapAlloc.h index 58aae1119d..a24c1def2f 100644 --- a/rts/sm/HeapAlloc.h +++ b/rts/sm/HeapAlloc.h @@ -10,6 +10,8 @@ #include "BeginPrivate.h" +#include "Storage.h" + /* ----------------------------------------------------------------------------- The HEAP_ALLOCED() test. @@ -210,9 +212,9 @@ StgBool HEAP_ALLOCED_GC(const void *p) } else { // putting the rest out of line turned out to be a slight // performance improvement: - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + ACQUIRE_SM_LOCK; // TODO: this may be too expensive b = HEAP_ALLOCED_miss(mblock,p); - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + RELEASE_SM_LOCK; return b; } } diff --git a/rts/sm/MarkStack.h b/rts/sm/MarkStack.h index 8ea47a1865..121086352d 100644 --- a/rts/sm/MarkStack.h +++ b/rts/sm/MarkStack.h @@ -32,7 +32,7 @@ push_mark_stack(StgPtr p) } else { - bd = allocBlock_sync(); + bd = allocBlock_lock(); bd->link = mark_stack_bd; bd->u.back = NULL; mark_stack_bd->u.back = bd; // double-link the new block on diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c index 3731aebb95..fcdc94d2dd 100644 --- a/rts/sm/NonMoving.c +++ b/rts/sm/NonMoving.c @@ -589,14 +589,10 @@ static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node) // Nothing in the free list, allocate a new segment... if (ret == NULL) { - // Take gc spinlock: another thread may be scavenging a moving - // generation and call `todo_block_full` - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); bdescr *bd = allocAlignedGroupOnNode(node, NONMOVING_SEGMENT_BLOCKS); // See Note [Live data accounting in nonmoving collector]. oldest_gen->n_blocks += bd->blocks; oldest_gen->n_words += BLOCK_SIZE_W * bd->blocks; - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); for (StgWord32 i = 0; i < bd->blocks; ++i) { initBdescr(&bd[i], oldest_gen, oldest_gen); @@ -659,7 +655,7 @@ static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *al } } -/* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */ +/* Allocate a block in the nonmoving heap. sz is in words */ GNUC_ATTR_HOT void *nonmovingAllocate(Capability *cap, StgWord sz) { @@ -699,7 +695,9 @@ void *nonmovingAllocate(Capability *cap, StgWord sz) // there are no active segments, allocate new segment if (new_current == NULL) { + ACQUIRE_SM_LOCK; new_current = nonmovingAllocSegment(cap->node); + RELEASE_SM_LOCK; nonmovingInitSegment(new_current, log_block_size); } @@ -782,14 +780,13 @@ void nonmovingExit(void) /* * Assumes that no garbage collector or mutator threads are running to safely * resize the nonmoving_allocators. - * - * Must hold sm_mutex. */ void nonmovingAddCapabilities(uint32_t new_n_caps) { unsigned int old_n_caps = nonmovingHeap.n_caps; struct NonmovingAllocator **allocs = nonmovingHeap.allocators; + ACQUIRE_SM_LOCK; for (unsigned int i = 0; i < NONMOVING_ALLOCA_CNT; i++) { struct NonmovingAllocator *old = allocs[i]; allocs[i] = alloc_nonmoving_allocator(new_n_caps); @@ -811,6 +808,7 @@ void nonmovingAddCapabilities(uint32_t new_n_caps) } } nonmovingHeap.n_caps = new_n_caps; + RELEASE_SM_LOCK; } void nonmovingClearBitmap(struct NonmovingSegment *seg) diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c index a6ce3678bc..bb1c5e506b 100644 --- a/rts/sm/NonMovingMark.c +++ b/rts/sm/NonMovingMark.c @@ -291,10 +291,8 @@ static void nonmovingAddUpdRemSetBlocks_lock(MarkQueue *rset) nonmovingAddUpdRemSetBlocks_(rset); // Reset the state of the remembered set. - ACQUIRE_SM_LOCK; init_mark_queue_(rset); rset->is_upd_rem_set = true; - RELEASE_SM_LOCK; } /* @@ -468,9 +466,7 @@ push (MarkQueue *q, const MarkQueueEnt *ent) } /* A variant of push to be used by the minor GC when it encounters a reference - * to an object in the non-moving heap. In contrast to the other push - * operations this uses the gc_alloc_block_sync spinlock instead of the - * SM_LOCK to allocate new blocks in the event that the mark queue is full. + * to an object in the non-moving heap. */ void markQueuePushClosureGC (MarkQueue *q, StgClosure *p) @@ -491,13 +487,13 @@ markQueuePushClosureGC (MarkQueue *q, StgClosure *p) if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) { // Yes, this block is full. // allocate a fresh block. - ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + ACQUIRE_SM_LOCK; bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS); bd->link = q->blocks; q->blocks = bd; q->top = (MarkQueueBlock *) bd->start; q->top->head = 0; - RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + RELEASE_SM_LOCK; } MarkQueueEnt ent = { @@ -917,6 +913,7 @@ static MarkQueueEnt markQueuePop (MarkQueue *q) static void init_mark_queue_ (MarkQueue *queue) { bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS); + ASSERT(queue->blocks == NULL); queue->blocks = bd; queue->top = (MarkQueueBlock *) bd->start; queue->top->head = 0; @@ -926,14 +923,12 @@ static void init_mark_queue_ (MarkQueue *queue) #endif } -/* Must hold sm_mutex. */ void initMarkQueue (MarkQueue *queue) { init_mark_queue_(queue); queue->is_upd_rem_set = false; } -/* Must hold sm_mutex. */ void nonmovingInitUpdRemSet (UpdRemSet *rset) { init_mark_queue_(&rset->queue); diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h index 4b5c61cb38..de8e962ff8 100644 --- a/rts/sm/NonMovingMark.h +++ b/rts/sm/NonMovingMark.h @@ -9,10 +9,11 @@ #pragma once #include "Task.h" -#include "NonMoving.h" #include "BeginPrivate.h" +struct NonMovingHeap; + enum EntryType { NULL_ENTRY = 0, MARK_CLOSURE = 1, diff --git a/rts/sm/NonMovingShortcut.c b/rts/sm/NonMovingShortcut.c index ee97ba1b70..f7b6849e77 100644 --- a/rts/sm/NonMovingShortcut.c +++ b/rts/sm/NonMovingShortcut.c @@ -10,6 +10,7 @@ #include "Rts.h" #include "GC.h" #include "SMPClosureOps.h" +#include "NonMoving.h" #include "NonMovingMark.h" #include "NonMovingShortcut.h" #include "Printer.h" diff --git a/rts/sm/Sanity.h b/rts/sm/Sanity.h index b6f2054383..7dc761cfd3 100644 --- a/rts/sm/Sanity.h +++ b/rts/sm/Sanity.h @@ -19,6 +19,8 @@ # define MAX_SLOTS 100000 # endif +struct NonmovingHeap; + /* debugging routines */ void checkSanity ( bool after_gc, bool major_gc ); void checkNurserySanity ( nursery *nursery ); diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index be30e75b8f..cc5b1e669b 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -1730,7 +1730,7 @@ scavenge_capability_mut_lists (Capability *cap) if (RtsFlags.GcFlags.useNonmoving && major_gc) { uint32_t g = oldest_gen->no; scavenge_mutable_list(cap->saved_mut_lists[g], oldest_gen); - freeChain_sync(cap->saved_mut_lists[g]); + freeChain_lock(cap->saved_mut_lists[g]); cap->saved_mut_lists[g] = NULL; return; } @@ -1743,7 +1743,7 @@ scavenge_capability_mut_lists (Capability *cap) */ for (uint32_t g = RtsFlags.GcFlags.generations-1; g > N; g--) { scavenge_mutable_list(cap->saved_mut_lists[g], &generations[g]); - freeChain_sync(cap->saved_mut_lists[g]); + freeChain_lock(cap->saved_mut_lists[g]); cap->saved_mut_lists[g] = NULL; } } diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 40d8a45806..cc28944efe 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -193,14 +193,13 @@ initStorage (void) initMutex(&sm_mutex); #endif - ACQUIRE_SM_LOCK; - /* allocate generation info array */ generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations * sizeof(struct generation_), "initStorage: gens"); /* Initialise all generations */ + ACQUIRE_SM_LOCK; for(g = 0; g < RtsFlags.GcFlags.generations; g++) { initGeneration(&generations[g], g); } @@ -214,16 +213,11 @@ initStorage (void) generations[g].to = &generations[g+1]; } oldest_gen->to = oldest_gen; + RELEASE_SM_LOCK; // Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen nonmovingInit(); -#if defined(THREADED_RTS) - // nonmovingAddCapabilities allocates segments, which requires taking the gc - // sync lock, so initialize it before nonmovingAddCapabilities - initSpinLock(&gc_alloc_block_sync); -#endif - if (RtsFlags.GcFlags.useNonmoving) nonmovingAddCapabilities(getNumCapabilities()); @@ -261,8 +255,6 @@ initStorage (void) IF_DEBUG(gc, statDescribeGens()); - RELEASE_SM_LOCK; - traceInitEvent(traceHeapInfo); } @@ -314,12 +306,14 @@ void storageAddCapabilities (uint32_t from, uint32_t to) assignNurseriesToCapabilities(from,to); // allocate a block for each mut list + ACQUIRE_SM_LOCK; for (n = from; n < to; n++) { for (g = 1; g < RtsFlags.GcFlags.generations; g++) { getCapability(n)->mut_lists[g] = allocBlockOnNode(capNoToNumaNode(n)); } } + RELEASE_SM_LOCK; // Initialize NonmovingAllocators and UpdRemSets if (RtsFlags.GcFlags.useNonmoving) { @@ -564,9 +558,7 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf) // Allocate the blackhole indirection closure if (RtsFlags.GcFlags.useNonmoving) { // See Note [Static objects under the nonmoving collector]. - ACQUIRE_SM_LOCK; bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh)); - RELEASE_SM_LOCK; recordMutableCap((StgClosure*)bh, regTableToCapability(reg), oldest_gen->no); } else { @@ -724,6 +716,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks) // automatic prefetching works across nursery blocks. This is a // tiny optimisation (~0.5%), but it's free. + ACQUIRE_SM_LOCK; while (blocks > 0) { n = stg_min(BLOCKS_PER_MBLOCK, blocks); // allocLargeChunk will prefer large chunks, but will pick up @@ -759,6 +752,7 @@ allocNursery (uint32_t node, bdescr *tail, W_ blocks) tail = &bd[0]; } + RELEASE_SM_LOCK; return &bd[0]; } @@ -878,7 +872,7 @@ resizeNurseriesEach (W_ blocks) next_bd = bd->link; next_bd->u.back = NULL; nursery_blocks -= bd->blocks; // might be a large block - freeGroup(bd); + freeGroup_lock(bd); bd = next_bd; } nursery->blocks = bd; @@ -1299,9 +1293,7 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig if (bd == NULL) { // The pinned block list is empty: allocate a fresh block (we can't fail // here). - ACQUIRE_SM_LOCK; bd = allocNursery(cap->node, NULL, PINNED_EMPTY_SIZE); - RELEASE_SM_LOCK; } // Bump up the nursery pointer to avoid the pathological situation -- cgit v1.2.1