diff options
author | Simon Marlow <marlowsd@gmail.com> | 2009-01-12 12:10:24 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2009-01-12 12:10:24 +0000 |
commit | 6a405b1efd138a4af4ed93ce4ff173a4c5704512 (patch) | |
tree | d11e6ba4cb32b3c447065b0e928e245d6639058d /includes | |
parent | 192c7d555448b8a78d57a5c01c0c20f642f2d0f3 (diff) | |
download | haskell-6a405b1efd138a4af4ed93ce4ff173a4c5704512.tar.gz |
Keep the remembered sets local to each thread during parallel GC
This turns out to be quite vital for parallel programs:
- The way we discover which threads to traverse is by finding
dirty threads via the remembered sets (aka mutable lists).
- A dirty thread will be on the remembered set of the capability
that was running it, and we really want to traverse that thread's
stack using the GC thread for the capability, because it is in
that CPU's cache. If we get this wrong, we get penalised badly by
the memory system.
Previously we had per-capability mutable lists but they were
aggregated before GC and traversed by just one of the GC threads.
This resulted in very poor performance particularly for parallel
programs with deep stacks.
Now we keep per-capability remembered sets throughout GC, which also
removes a lock (recordMutableGen_sync).
Diffstat (limited to 'includes')
-rw-r--r-- | includes/Storage.h | 42 |
1 files changed, 10 insertions, 32 deletions
diff --git a/includes/Storage.h b/includes/Storage.h index 0a7aae6750..f43eb79b76 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -245,7 +245,6 @@ extern void GarbageCollect(rtsBool force_major_gc, nat gc_type, Capability *cap) #if defined(THREADED_RTS) extern Mutex sm_mutex; extern Mutex atomic_modify_mutvar_mutex; -extern SpinLock recordMutableGen_sync; #endif #if defined(THREADED_RTS) @@ -258,63 +257,40 @@ extern SpinLock recordMutableGen_sync; #define ASSERT_SM_LOCK() #endif +#if !IN_STG_CODE + INLINE_HEADER void -recordMutableGen(StgClosure *p, generation *gen) +recordMutableGen(StgClosure *p, nat gen_no) { bdescr *bd; - bd = gen->mut_list; + bd = generations[gen_no].mut_list; if (bd->free >= bd->start + BLOCK_SIZE_W) { bdescr *new_bd; new_bd = allocBlock(); new_bd->link = bd; bd = new_bd; - gen->mut_list = bd; + generations[gen_no].mut_list = bd; } *bd->free++ = (StgWord)p; } INLINE_HEADER void -recordMutableGenLock(StgClosure *p, generation *gen) +recordMutableGenLock(StgClosure *p, nat gen_no) { ACQUIRE_SM_LOCK; - recordMutableGen(p,gen); + recordMutableGen(p,gen_no); RELEASE_SM_LOCK; } -extern bdescr *allocBlock_sync(void); - -// Version of recordMutableGen() for use in parallel GC. The same as -// recordMutableGen(), except that we surround it with a spinlock and -// call the spinlock version of allocBlock(). -INLINE_HEADER void -recordMutableGen_GC(StgClosure *p, generation *gen) -{ - bdescr *bd; - - ACQUIRE_SPIN_LOCK(&recordMutableGen_sync); - - bd = gen->mut_list; - if (bd->free >= bd->start + BLOCK_SIZE_W) { - bdescr *new_bd; - new_bd = allocBlock_sync(); - new_bd->link = bd; - bd = new_bd; - gen->mut_list = bd; - } - *bd->free++ = (StgWord)p; - - RELEASE_SPIN_LOCK(&recordMutableGen_sync); -} - INLINE_HEADER void recordMutable(StgClosure *p) { bdescr *bd; ASSERT(closure_MUTABLE(p)); bd = Bdescr((P_)p); - if (bd->gen_no > 0) recordMutableGen(p, &RTS_DEREF(generations)[bd->gen_no]); + if (bd->gen_no > 0) recordMutableGen(p, bd->gen_no); } INLINE_HEADER void @@ -325,6 +301,8 @@ recordMutableLock(StgClosure *p) RELEASE_SM_LOCK; } +#endif // !IN_STG_CODE + /* ----------------------------------------------------------------------------- The CAF table - used to let us revert CAFs in GHCi -------------------------------------------------------------------------- */ |