summaryrefslogtreecommitdiff
path: root/rts/sm
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-02-13 11:17:50 +0000
committerSimon Marlow <marlowsd@gmail.com>2012-02-13 12:26:10 +0000
commit67f4ab7e6b7705a9d617c6109a8c5434ede13cae (patch)
tree8f1ed63f526c3a88a4f234c9a3d5b5ac2a9eb0c6 /rts/sm
parent86ebfef9a5acc60b7a2ce3c8f025e6e707f17f87 (diff)
downloadhaskell-67f4ab7e6b7705a9d617c6109a8c5434ede13cae.tar.gz
Allocate pinned object blocks from the nursery, not the global
allocator. Prompted by a benchmark posted to parallel-haskell@haskell.org by Andreas Voellmy <andreas.voellmy@gmail.com>. This program exhibits contention for the block allocator when run with -N2 and greater without the fix: {-# LANGUAGE MagicHash, UnboxedTuples, BangPatterns #-} module Main where import Control.Monad import Control.Concurrent import System.Environment import GHC.IO import GHC.Exts import GHC.Conc main = do [m] <- fmap (fmap read) getArgs n <- getNumCapabilities ms <- replicateM n newEmptyMVar sequence [ forkIO $ busyWorkerB (m `quot` n) >> putMVar mv () | mv <- ms ] mapM takeMVar ms busyWorkerB :: Int -> IO () busyWorkerB n_loops = go 0 where go !n | n >= n_loops = return () | otherwise = do p <- (IO $ \s -> case newPinnedByteArray# 1024# s of { (# s', mbarr# #) -> (# s', () #) } ) go (n+1)
Diffstat (limited to 'rts/sm')
-rw-r--r--rts/sm/GC.c42
-rw-r--r--rts/sm/Sanity.c3
-rw-r--r--rts/sm/Storage.c59
3 files changed, 91 insertions, 13 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index aeadf6f42f..86231948c1 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -150,6 +150,7 @@ static StgWord dec_running (void);
static void wakeup_gc_threads (nat me);
static void shutdown_gc_threads (nat me);
static void collect_gct_blocks (void);
+static lnat collect_pinned_object_blocks (void);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
@@ -285,6 +286,10 @@ GarbageCollect (rtsBool force_major_gc,
// check sanity *before* GC
IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
+ // gather blocks allocated using allocatePinned() from each capability
+ // and put them on the g0->large_object list.
+ collect_pinned_object_blocks();
+
// Initialise all the generations/steps that we're collecting.
for (g = 0; g <= N; g++) {
prepare_collected_gen(&generations[g]);
@@ -1422,6 +1427,43 @@ collect_gct_blocks (void)
}
/* -----------------------------------------------------------------------------
+ During mutation, any blocks that are filled by allocatePinned() are
+ stashed on the local pinned_object_blocks list, to avoid needing to
+ take a global lock. Here we collect those blocks from the
+ cap->pinned_object_blocks lists and put them on the
+ main g0->large_object list.
+
+ Returns: the number of words allocated this way, for stats
+ purposes.
+ -------------------------------------------------------------------------- */
+
+static lnat
+collect_pinned_object_blocks (void)
+{
+ nat n;
+ bdescr *bd, *prev;
+ lnat allocated = 0;
+
+ for (n = 0; n < n_capabilities; n++) {
+ prev = NULL;
+ for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
+ allocated += bd->free - bd->start;
+ prev = bd;
+ }
+ if (prev != NULL) {
+ prev->link = g0->large_objects;
+ if (g0->large_objects != NULL) {
+ g0->large_objects->u.back = prev;
+ }
+ g0->large_objects = capabilities[n].pinned_object_blocks;
+ capabilities[n].pinned_object_blocks = 0;
+ }
+ }
+
+ return allocated;
+}
+
+/* -----------------------------------------------------------------------------
Initialise a gc_thread before GC
-------------------------------------------------------------------------- */
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index b6c5926ab8..78ecc96e0a 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -869,7 +869,7 @@ memInventory (rtsBool show)
gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].part_list);
gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].scavd_list);
gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].todo_bd);
- }
+ }
gen_blocks[g] += genBlocks(&generations[g]);
}
@@ -880,6 +880,7 @@ memInventory (rtsBool show)
if (capabilities[i].pinned_object_block != NULL) {
nursery_blocks += capabilities[i].pinned_object_block->blocks;
}
+ nursery_blocks += countBlocks(capabilities[i].pinned_object_blocks);
}
retainer_blocks = 0;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 012ba514db..0ff37d2582 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -744,8 +744,54 @@ allocatePinned (Capability *cap, lnat n)
bd = cap->pinned_object_block;
// If we don't have a block of pinned objects yet, or the current
- // one isn't large enough to hold the new object, allocate a new one.
+ // one isn't large enough to hold the new object, get a new one.
if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
+
+ // stash the old block on cap->pinned_object_blocks. On the
+ // next GC cycle these objects will be moved to
+ // g0->large_objects.
+ if (bd != NULL) {
+ dbl_link_onto(bd, &cap->pinned_object_blocks);
+ }
+
+ // We need to find another block. We could just allocate one,
+ // but that means taking a global lock and we really want to
+ // avoid that (benchmarks that allocate a lot of pinned
+ // objects scale really badly if we do this).
+ //
+ // So first, we try taking the next block from the nursery, in
+ // the same way as allocate(), but note that we can only take
+ // an *empty* block, because we're about to mark it as
+ // BF_PINNED | BF_LARGE.
+ bd = cap->r.rCurrentNursery->link;
+ if (bd == NULL || bd->free != bd->start) { // must be empty!
+ // The nursery is empty, or the next block is non-empty:
+ // allocate a fresh block (we can't fail here).
+
+ // XXX in the case when the next nursery block is
+ // non-empty we aren't exerting any pressure to GC soon,
+ // so if this case ever happens then we could in theory
+ // keep allocating for ever without calling the GC. We
+ // can't bump g0->n_new_large_words because that will be
+ // counted towards allocation, and we're already counting
+ // our pinned obects as allocation in
+ // collect_pinned_object_blocks in the GC.
+ ACQUIRE_SM_LOCK;
+ bd = allocBlock();
+ RELEASE_SM_LOCK;
+ initBdescr(bd, g0, g0);
+ } else {
+ // we have a block in the nursery: steal it
+ cap->r.rCurrentNursery->link = bd->link;
+ if (bd->link != NULL) {
+ bd->link->u.back = cap->r.rCurrentNursery;
+ }
+ cap->r.rNursery->n_blocks--;
+ }
+
+ cap->pinned_object_block = bd;
+ bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
+
// The pinned_object_block remains attached to the capability
// until it is full, even if a GC occurs. We want this
// behaviour because otherwise the unallocated portion of the
@@ -759,17 +805,6 @@ allocatePinned (Capability *cap, lnat n)
// the next GC the BF_EVACUATED flag will be cleared, and the
// block will be promoted as usual (if anything in it is
// live).
- ACQUIRE_SM_LOCK;
- if (bd != NULL) {
- dbl_link_onto(bd, &g0->large_objects);
- g0->n_large_blocks++;
- g0->n_new_large_words += bd->free - bd->start;
- }
- cap->pinned_object_block = bd = allocBlock();
- RELEASE_SM_LOCK;
- initBdescr(bd, g0, g0);
- bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
- bd->free = bd->start;
}
p = bd->free;