diff options
author | Simon Marlow <marlowsd@gmail.com> | 2012-02-13 11:17:50 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2012-02-13 12:26:10 +0000 |
commit | 67f4ab7e6b7705a9d617c6109a8c5434ede13cae (patch) | |
tree | 8f1ed63f526c3a88a4f234c9a3d5b5ac2a9eb0c6 /rts/sm | |
parent | 86ebfef9a5acc60b7a2ce3c8f025e6e707f17f87 (diff) | |
download | haskell-67f4ab7e6b7705a9d617c6109a8c5434ede13cae.tar.gz |
Allocate pinned object blocks from the nursery, not the global
allocator.
Prompted by a benchmark posted to parallel-haskell@haskell.org by
Andreas Voellmy <andreas.voellmy@gmail.com>. This program exhibits
contention for the block allocator when run with -N2 and greater
without the fix:
{-# LANGUAGE MagicHash, UnboxedTuples, BangPatterns #-}
module Main where
import Control.Monad
import Control.Concurrent
import System.Environment
import GHC.IO
import GHC.Exts
import GHC.Conc
main = do
[m] <- fmap (fmap read) getArgs
n <- getNumCapabilities
ms <- replicateM n newEmptyMVar
sequence [ forkIO $ busyWorkerB (m `quot` n) >> putMVar mv () | mv <- ms ]
mapM takeMVar ms
busyWorkerB :: Int -> IO ()
busyWorkerB n_loops = go 0
where go !n | n >= n_loops = return ()
| otherwise =
do p <- (IO $ \s ->
case newPinnedByteArray# 1024# s of
{ (# s', mbarr# #) ->
(# s', () #)
}
)
go (n+1)
Diffstat (limited to 'rts/sm')
-rw-r--r-- | rts/sm/GC.c | 42 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 3 | ||||
-rw-r--r-- | rts/sm/Storage.c | 59 |
3 files changed, 91 insertions, 13 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c index aeadf6f42f..86231948c1 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -150,6 +150,7 @@ static StgWord dec_running (void); static void wakeup_gc_threads (nat me); static void shutdown_gc_threads (nat me); static void collect_gct_blocks (void); +static lnat collect_pinned_object_blocks (void); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -285,6 +286,10 @@ GarbageCollect (rtsBool force_major_gc, // check sanity *before* GC IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc)); + // gather blocks allocated using allocatePinned() from each capability + // and put them on the g0->large_object list. + collect_pinned_object_blocks(); + // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { prepare_collected_gen(&generations[g]); @@ -1422,6 +1427,43 @@ collect_gct_blocks (void) } /* ----------------------------------------------------------------------------- + During mutation, any blocks that are filled by allocatePinned() are + stashed on the local pinned_object_blocks list, to avoid needing to + take a global lock. Here we collect those blocks from the + cap->pinned_object_blocks lists and put them on the + main g0->large_object list. + + Returns: the number of words allocated this way, for stats + purposes. + -------------------------------------------------------------------------- */ + +static lnat +collect_pinned_object_blocks (void) +{ + nat n; + bdescr *bd, *prev; + lnat allocated = 0; + + for (n = 0; n < n_capabilities; n++) { + prev = NULL; + for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) { + allocated += bd->free - bd->start; + prev = bd; + } + if (prev != NULL) { + prev->link = g0->large_objects; + if (g0->large_objects != NULL) { + g0->large_objects->u.back = prev; + } + g0->large_objects = capabilities[n].pinned_object_blocks; + capabilities[n].pinned_object_blocks = 0; + } + } + + return allocated; +} + +/* ----------------------------------------------------------------------------- Initialise a gc_thread before GC -------------------------------------------------------------------------- */ diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index b6c5926ab8..78ecc96e0a 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -869,7 +869,7 @@ memInventory (rtsBool show) gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].part_list); gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].scavd_list); gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].todo_bd); - } + } gen_blocks[g] += genBlocks(&generations[g]); } @@ -880,6 +880,7 @@ memInventory (rtsBool show) if (capabilities[i].pinned_object_block != NULL) { nursery_blocks += capabilities[i].pinned_object_block->blocks; } + nursery_blocks += countBlocks(capabilities[i].pinned_object_blocks); } retainer_blocks = 0; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 012ba514db..0ff37d2582 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -744,8 +744,54 @@ allocatePinned (Capability *cap, lnat n) bd = cap->pinned_object_block; // If we don't have a block of pinned objects yet, or the current - // one isn't large enough to hold the new object, allocate a new one. + // one isn't large enough to hold the new object, get a new one. if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { + + // stash the old block on cap->pinned_object_blocks. On the + // next GC cycle these objects will be moved to + // g0->large_objects. + if (bd != NULL) { + dbl_link_onto(bd, &cap->pinned_object_blocks); + } + + // We need to find another block. We could just allocate one, + // but that means taking a global lock and we really want to + // avoid that (benchmarks that allocate a lot of pinned + // objects scale really badly if we do this). + // + // So first, we try taking the next block from the nursery, in + // the same way as allocate(), but note that we can only take + // an *empty* block, because we're about to mark it as + // BF_PINNED | BF_LARGE. + bd = cap->r.rCurrentNursery->link; + if (bd == NULL || bd->free != bd->start) { // must be empty! + // The nursery is empty, or the next block is non-empty: + // allocate a fresh block (we can't fail here). + + // XXX in the case when the next nursery block is + // non-empty we aren't exerting any pressure to GC soon, + // so if this case ever happens then we could in theory + // keep allocating for ever without calling the GC. We + // can't bump g0->n_new_large_words because that will be + // counted towards allocation, and we're already counting + // our pinned obects as allocation in + // collect_pinned_object_blocks in the GC. + ACQUIRE_SM_LOCK; + bd = allocBlock(); + RELEASE_SM_LOCK; + initBdescr(bd, g0, g0); + } else { + // we have a block in the nursery: steal it + cap->r.rCurrentNursery->link = bd->link; + if (bd->link != NULL) { + bd->link->u.back = cap->r.rCurrentNursery; + } + cap->r.rNursery->n_blocks--; + } + + cap->pinned_object_block = bd; + bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED; + // The pinned_object_block remains attached to the capability // until it is full, even if a GC occurs. We want this // behaviour because otherwise the unallocated portion of the @@ -759,17 +805,6 @@ allocatePinned (Capability *cap, lnat n) // the next GC the BF_EVACUATED flag will be cleared, and the // block will be promoted as usual (if anything in it is // live). - ACQUIRE_SM_LOCK; - if (bd != NULL) { - dbl_link_onto(bd, &g0->large_objects); - g0->n_large_blocks++; - g0->n_new_large_words += bd->free - bd->start; - } - cap->pinned_object_block = bd = allocBlock(); - RELEASE_SM_LOCK; - initBdescr(bd, g0, g0); - bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED; - bd->free = bd->start; } p = bd->free; |