summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-07-10 09:57:53 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-07-10 10:08:48 +0100
commit713cf473de8a2ad7d0b8195d78860c25fec41839 (patch)
treeee813ecdc47842026b1090908e9466ce11519df4 /rts
parent2f3a41d92bf7bba45a1f37f4dfeaed84ac4ac52a (diff)
downloadhaskell-713cf473de8a2ad7d0b8195d78860c25fec41839.tar.gz
Parallelise clearNurseries() in the parallel GC
The clearNurseries() operation resets the free pointer in each nursery block to the start of the block, emptying the nursery. In the parallel GC this was done on the main GC thread, but that's bad because it accesses the bdescr of every nursery block, and move all those cache lines onto the CPU of the main GC thread. With large nurseries, this can be especially bad. So instead we want to clear each nursery in its local GC thread. Thanks to Andreas Voellmy <andreas.voellmy@gmail.com> for idenitfying the issue. After this change and the previous patch to make the last GC a major one, I see these results for nofib/parallel on 8 cores: blackscholes +0.0% +0.0% -3.7% -3.3% +0.3% coins +0.0% +0.0% -5.1% -5.0% +0.4% gray +0.0% +0.0% -4.5% -2.1% +0.8% mandel +0.0% -0.0% -7.6% -5.1% -2.3% matmult +0.0% +5.5% -2.8% -1.9% -5.8% minimax +0.0% +0.0% -10.6% -10.5% +0.0% nbody +0.0% -4.4% +0.0% 0.07 +0.0% parfib +0.0% +1.0% +0.5% +0.9% +0.0% partree +0.0% +0.0% -2.4% -2.5% +1.7% prsa +0.0% -0.2% +1.8% +4.2% +0.0% queens +0.0% -0.0% -1.8% -1.4% -4.8% ray +0.0% -0.6% -18.5% -17.8% +0.0% sumeuler +0.0% -0.0% -3.7% -3.7% +0.0% transclos +0.0% -0.0% -25.7% -26.6% +0.0% -------------------------------------------------------------------------------- Min +0.0% -4.4% -25.7% -26.6% -5.8% Max +0.0% +5.5% +1.8% +4.2% +1.7% Geometric Mean +0.0% +0.1% -6.3% -6.1% -0.7%
Diffstat (limited to 'rts')
-rw-r--r--rts/sm/GC.c14
-rw-r--r--rts/sm/GCThread.h1
-rw-r--r--rts/sm/Storage.c23
-rw-r--r--rts/sm/Storage.h2
4 files changed, 25 insertions, 15 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index a6b8c4af64..ab0ba640c1 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -629,7 +629,16 @@ GarbageCollect (rtsBool force_major_gc,
}
// Reset the nursery: make the blocks empty
- allocated += clearNurseries();
+ if (n_gc_threads == 1) {
+ for (n = 0; n < n_capabilities; n++) {
+ allocated += clearNursery(&capabilities[n]);
+ }
+ } else {
+ gct->allocated = clearNursery(cap);
+ for (n = 0; n < n_capabilities; n++) {
+ allocated += gc_threads[n]->allocated;
+ }
+ }
resize_nursery();
@@ -1094,6 +1103,8 @@ gcWorkerThread (Capability *cap)
scavenge_until_all_done();
+ gct->allocated = clearNursery(cap);
+
#ifdef THREADED_RTS
// Now that the whole heap is marked, we discard any sparks that
// were found to be unreachable. The main GC thread is currently
@@ -1477,6 +1488,7 @@ init_gc_thread (gc_thread *t)
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
t->thunk_selector_depth = 0;
+ t->allocated = 0;
t->copied = 0;
t->scanned = 0;
t->any_work = 0;
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index 60f721285d..1b811e43fc 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -176,6 +176,7 @@ typedef struct gc_thread_ {
// -------------------
// stats
+ lnat allocated; // result of clearNursery()
lnat copied;
lnat scanned;
lnat any_work;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 17798a25b8..18d317d446 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -496,22 +496,19 @@ allocNurseries (nat from, nat to)
assignNurseriesToCapabilities(from, to);
}
-lnat // words allocated
-clearNurseries (void)
+lnat
+clearNursery (Capability *cap)
{
- lnat allocated = 0;
- nat i;
bdescr *bd;
+ lnat allocated = 0;
- for (i = 0; i < n_capabilities; i++) {
- for (bd = nurseries[i].blocks; bd; bd = bd->link) {
- allocated += (lnat)(bd->free - bd->start);
- capabilities[i].total_allocated += (lnat)(bd->free - bd->start);
- bd->free = bd->start;
- ASSERT(bd->gen_no == 0);
- ASSERT(bd->gen == g0);
- IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
- }
+ for (bd = nurseries[cap->no].blocks; bd; bd = bd->link) {
+ allocated += (lnat)(bd->free - bd->start);
+ cap->total_allocated += (lnat)(bd->free - bd->start);
+ bd->free = bd->start;
+ ASSERT(bd->gen_no == 0);
+ ASSERT(bd->gen == g0);
+ IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
}
return allocated;
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 44f39ee29b..9dffc18f2d 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -81,7 +81,7 @@ void dirty_MVAR(StgRegTable *reg, StgClosure *p);
extern nursery *nurseries;
void resetNurseries ( void );
-lnat clearNurseries ( void );
+lnat clearNursery ( Capability *cap );
void resizeNurseries ( nat blocks );
void resizeNurseriesFixed ( nat blocks );
lnat countNurseryBlocks ( void );