diff options
author | Simon Marlow <marlowsd@gmail.com> | 2016-04-09 20:49:52 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2016-04-12 03:14:06 -0700 |
commit | f4446c5b963af8f3cc1693e2feab91dbe43d5237 (patch) | |
tree | 861d9bb9fea7086459bb734498348226e5d06019 | |
parent | 5c4cd0e44657d52f7ca5fee63f8765d17f1fbe85 (diff) | |
download | haskell-f4446c5b963af8f3cc1693e2feab91dbe43d5237.tar.gz |
Allocate blocks in the GC in batches
Avoids contention for the block allocator lock in the GC; this can be
seen in the gc_alloc_block_sync counter emitted by +RTS -s.
I experimented with this a while ago, and there was already
commented-out code for it in GCUtils.c, but I've now improved it so that
it doesn't result in significantly worse memory usage.
* The old method of putting spare blocks on ws->part_list was wasteful,
the spare blocks are now shared between all generations and retained
between GCs.
* repeated allocGroup() results in fragmentation, so I switched to using
allocLargeChunk() instead which is fragmentation-friendly; we already
use it for the same reason in nursery allocation.
-rw-r--r-- | includes/rts/storage/Block.h | 3 | ||||
-rw-r--r-- | rts/sm/GC.c | 2 | ||||
-rw-r--r-- | rts/sm/GCUtils.c | 49 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 8 |
4 files changed, 31 insertions, 31 deletions
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h index 755c8177cf..024f78c6e3 100644 --- a/includes/rts/storage/Block.h +++ b/includes/rts/storage/Block.h @@ -89,7 +89,8 @@ typedef struct bdescr_ { StgPtr start; // [READ ONLY] start addr of memory - StgPtr free; // first free byte of memory. + StgPtr free; // First free byte of memory. + // allocGroup() sets this to the value of start. // NB. during use this value should lie // between start and start + blocks * // BLOCK_SIZE. Values outside this diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 02bb3bbe6b..df73ab8314 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -922,7 +922,7 @@ any_work (void) return rtsTrue; } - // Check for global work in any step. We don't need to check for + // Check for global work in any gen. We don't need to check for // local work, because we have already exited scavenge_loop(), // which means there is no local work for this thread. for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) { diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 364a10a759..9ecb674bb3 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -51,29 +51,28 @@ allocGroup_sync(nat n) } -#if 0 -static void -allocBlocks_sync(nat n, bdescr **hd, bdescr **tl, - nat gen_no, step *stp, - StgWord32 flags) +static nat +allocBlocks_sync(nat n, bdescr **hd) { bdescr *bd; nat i; ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); - bd = allocGroup(n); + bd = allocLargeChunk(1,n); + // NB. allocLargeChunk, rather than allocGroup(n), to allocate in a + // fragmentation-friendly way. + n = bd->blocks; for (i = 0; i < n; i++) { bd[i].blocks = 1; - bd[i].gen_no = gen_no; - bd[i].step = stp; - bd[i].flags = flags; bd[i].link = &bd[i+1]; - bd[i].u.scan = bd[i].free = bd[i].start; + bd[i].free = bd[i].start; } - *hd = bd; - *tl = &bd[n-1]; + bd[n-1].link = NULL; + // We have to hold the lock until we've finished fiddling with the metadata, + // otherwise the block allocator can get confused. RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + *hd = bd; + return n; } -#endif void freeChain_sync(bdescr *bd) @@ -312,26 +311,22 @@ alloc_todo_block (gen_workspace *ws, nat size) } else { - // blocks in to-space get the BF_EVACUATED flag. - -// allocBlocks_sync(16, &hd, &tl, -// ws->step->gen_no, ws->step, BF_EVACUATED); -// -// tl->link = ws->part_list; -// ws->part_list = hd->link; -// ws->n_part_blocks += 15; -// -// bd = hd; - if (size > BLOCK_SIZE_W) { bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_)) / BLOCK_SIZE); } else { - bd = allocBlock_sync(); + if (gct->free_blocks) { + bd = gct->free_blocks; + gct->free_blocks = bd->link; + } else { + allocBlocks_sync(16, &bd); + gct->free_blocks = bd->link; + } } - initBdescr(bd, ws->gen, ws->gen->to); + // blocks in to-space get the BF_EVACUATED flag. bd->flags = BF_EVACUATED; - bd->u.scan = bd->free = bd->start; + bd->u.scan = bd->start; + initBdescr(bd, ws->gen, ws->gen->to); } bd->link = NULL; diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 7ce1183e48..1f4c4923c4 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -770,6 +770,7 @@ findMemoryLeak (void) } for (i = 0; i < n_capabilities; i++) { + markBlocks(gc_threads[i]->free_blocks); markBlocks(capabilities[i]->pinned_object_block); } @@ -841,7 +842,7 @@ memInventory (rtsBool show) nat g, i; W_ gen_blocks[RtsFlags.GcFlags.generations]; W_ nursery_blocks, retainer_blocks, - arena_blocks, exec_blocks; + arena_blocks, exec_blocks, gc_free_blocks = 0; W_ live_blocks = 0, free_blocks = 0; rtsBool leak; @@ -864,6 +865,7 @@ memInventory (rtsBool show) nursery_blocks += nurseries[i].n_blocks; } for (i = 0; i < n_capabilities; i++) { + gc_free_blocks += countBlocks(gc_threads[i]->free_blocks); if (capabilities[i]->pinned_object_block != NULL) { nursery_blocks += capabilities[i]->pinned_object_block->blocks; } @@ -891,7 +893,7 @@ memInventory (rtsBool show) live_blocks += gen_blocks[g]; } live_blocks += nursery_blocks + - + retainer_blocks + arena_blocks + exec_blocks; + + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks; #define MB(n) (((double)(n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_))) @@ -916,6 +918,8 @@ memInventory (rtsBool show) arena_blocks, MB(arena_blocks)); debugBelch(" exec : %5" FMT_Word " blocks (%6.1lf MB)\n", exec_blocks, MB(exec_blocks)); + debugBelch(" GC free pool : %5" FMT_Word " blocks (%6.1lf MB)\n", + gc_free_blocks, MB(gc_free_blocks)); debugBelch(" free : %5" FMT_Word " blocks (%6.1lf MB)\n", free_blocks, MB(free_blocks)); debugBelch(" total : %5" FMT_Word " blocks (%6.1lf MB)\n", |