summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-04-09 20:49:52 +0100
committerSimon Marlow <marlowsd@gmail.com>2016-04-12 03:14:06 -0700
commitf4446c5b963af8f3cc1693e2feab91dbe43d5237 (patch)
tree861d9bb9fea7086459bb734498348226e5d06019
parent5c4cd0e44657d52f7ca5fee63f8765d17f1fbe85 (diff)
downloadhaskell-f4446c5b963af8f3cc1693e2feab91dbe43d5237.tar.gz
Allocate blocks in the GC in batches
Avoids contention for the block allocator lock in the GC; this can be seen in the gc_alloc_block_sync counter emitted by +RTS -s. I experimented with this a while ago, and there was already commented-out code for it in GCUtils.c, but I've now improved it so that it doesn't result in significantly worse memory usage. * The old method of putting spare blocks on ws->part_list was wasteful, the spare blocks are now shared between all generations and retained between GCs. * repeated allocGroup() results in fragmentation, so I switched to using allocLargeChunk() instead which is fragmentation-friendly; we already use it for the same reason in nursery allocation.
-rw-r--r--includes/rts/storage/Block.h3
-rw-r--r--rts/sm/GC.c2
-rw-r--r--rts/sm/GCUtils.c49
-rw-r--r--rts/sm/Sanity.c8
4 files changed, 31 insertions, 31 deletions
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h
index 755c8177cf..024f78c6e3 100644
--- a/includes/rts/storage/Block.h
+++ b/includes/rts/storage/Block.h
@@ -89,7 +89,8 @@ typedef struct bdescr_ {
StgPtr start; // [READ ONLY] start addr of memory
- StgPtr free; // first free byte of memory.
+ StgPtr free; // First free byte of memory.
+ // allocGroup() sets this to the value of start.
// NB. during use this value should lie
// between start and start + blocks *
// BLOCK_SIZE. Values outside this
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 02bb3bbe6b..df73ab8314 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -922,7 +922,7 @@ any_work (void)
return rtsTrue;
}
- // Check for global work in any step. We don't need to check for
+ // Check for global work in any gen. We don't need to check for
// local work, because we have already exited scavenge_loop(),
// which means there is no local work for this thread.
for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 364a10a759..9ecb674bb3 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -51,29 +51,28 @@ allocGroup_sync(nat n)
}
-#if 0
-static void
-allocBlocks_sync(nat n, bdescr **hd, bdescr **tl,
- nat gen_no, step *stp,
- StgWord32 flags)
+static nat
+allocBlocks_sync(nat n, bdescr **hd)
{
bdescr *bd;
nat i;
ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- bd = allocGroup(n);
+ bd = allocLargeChunk(1,n);
+ // NB. allocLargeChunk, rather than allocGroup(n), to allocate in a
+ // fragmentation-friendly way.
+ n = bd->blocks;
for (i = 0; i < n; i++) {
bd[i].blocks = 1;
- bd[i].gen_no = gen_no;
- bd[i].step = stp;
- bd[i].flags = flags;
bd[i].link = &bd[i+1];
- bd[i].u.scan = bd[i].free = bd[i].start;
+ bd[i].free = bd[i].start;
}
- *hd = bd;
- *tl = &bd[n-1];
+ bd[n-1].link = NULL;
+ // We have to hold the lock until we've finished fiddling with the metadata,
+ // otherwise the block allocator can get confused.
RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ *hd = bd;
+ return n;
}
-#endif
void
freeChain_sync(bdescr *bd)
@@ -312,26 +311,22 @@ alloc_todo_block (gen_workspace *ws, nat size)
}
else
{
- // blocks in to-space get the BF_EVACUATED flag.
-
-// allocBlocks_sync(16, &hd, &tl,
-// ws->step->gen_no, ws->step, BF_EVACUATED);
-//
-// tl->link = ws->part_list;
-// ws->part_list = hd->link;
-// ws->n_part_blocks += 15;
-//
-// bd = hd;
-
if (size > BLOCK_SIZE_W) {
bd = allocGroup_sync((W_)BLOCK_ROUND_UP(size*sizeof(W_))
/ BLOCK_SIZE);
} else {
- bd = allocBlock_sync();
+ if (gct->free_blocks) {
+ bd = gct->free_blocks;
+ gct->free_blocks = bd->link;
+ } else {
+ allocBlocks_sync(16, &bd);
+ gct->free_blocks = bd->link;
+ }
}
- initBdescr(bd, ws->gen, ws->gen->to);
+ // blocks in to-space get the BF_EVACUATED flag.
bd->flags = BF_EVACUATED;
- bd->u.scan = bd->free = bd->start;
+ bd->u.scan = bd->start;
+ initBdescr(bd, ws->gen, ws->gen->to);
}
bd->link = NULL;
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 7ce1183e48..1f4c4923c4 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -770,6 +770,7 @@ findMemoryLeak (void)
}
for (i = 0; i < n_capabilities; i++) {
+ markBlocks(gc_threads[i]->free_blocks);
markBlocks(capabilities[i]->pinned_object_block);
}
@@ -841,7 +842,7 @@ memInventory (rtsBool show)
nat g, i;
W_ gen_blocks[RtsFlags.GcFlags.generations];
W_ nursery_blocks, retainer_blocks,
- arena_blocks, exec_blocks;
+ arena_blocks, exec_blocks, gc_free_blocks = 0;
W_ live_blocks = 0, free_blocks = 0;
rtsBool leak;
@@ -864,6 +865,7 @@ memInventory (rtsBool show)
nursery_blocks += nurseries[i].n_blocks;
}
for (i = 0; i < n_capabilities; i++) {
+ gc_free_blocks += countBlocks(gc_threads[i]->free_blocks);
if (capabilities[i]->pinned_object_block != NULL) {
nursery_blocks += capabilities[i]->pinned_object_block->blocks;
}
@@ -891,7 +893,7 @@ memInventory (rtsBool show)
live_blocks += gen_blocks[g];
}
live_blocks += nursery_blocks +
- + retainer_blocks + arena_blocks + exec_blocks;
+ + retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks;
#define MB(n) (((double)(n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
@@ -916,6 +918,8 @@ memInventory (rtsBool show)
arena_blocks, MB(arena_blocks));
debugBelch(" exec : %5" FMT_Word " blocks (%6.1lf MB)\n",
exec_blocks, MB(exec_blocks));
+ debugBelch(" GC free pool : %5" FMT_Word " blocks (%6.1lf MB)\n",
+ gc_free_blocks, MB(gc_free_blocks));
debugBelch(" free : %5" FMT_Word " blocks (%6.1lf MB)\n",
free_blocks, MB(free_blocks));
debugBelch(" total : %5" FMT_Word " blocks (%6.1lf MB)\n",