summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-09-21 13:11:22 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-09-21 13:46:47 +0100
commit1f5d83648dfda39d999eb8a9e8192339b3eea540 (patch)
tree257b9a2b5f4d38706bce47dc017128ebea35b431
parenta17da16f08ae24e24ebe671d0731c43b240008f5 (diff)
downloadhaskell-1f5d83648dfda39d999eb8a9e8192339b3eea540.tar.gz
Allow allocNursery() to allocate single blocks (#7257)
Forcing large allocations here can creates serious fragmentation in some cases, and since the large allocations are only a small optimisation we should allow the nursery to hoover up small blocks before allocating large chunks.
-rw-r--r--rts/sm/BlockAlloc.c8
-rw-r--r--rts/sm/Storage.c16
2 files changed, 13 insertions, 11 deletions
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index f0f6fb551c..9f04c68a70 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -393,12 +393,18 @@ finish:
// Allocate a chunk of blocks that is at least min and at most max
// blocks in size. This API is used by the nursery allocator that
// wants contiguous memory preferably, but doesn't require it. When
-// memory is fragmented we might have lots of large chunks that are
+// memory is fragmented we might have lots of chunks that are
// less than a full megablock, so allowing the nursery allocator to
// use these reduces fragmentation considerably. e.g. on a GHC build
// with +RTS -H, I saw fragmentation go from 17MB down to 3MB on a
// single compile.
//
+// Further to this: in #7257 there is a program that creates serious
+// fragmentation such that the heap is full of tiny <4 block chains.
+// The nursery allocator therefore has to use single blocks to avoid
+// fragmentation, but we make sure that we allocate large blocks
+// preferably if there are any.
+//
bdescr *
allocLargeChunk (W_ min, W_ max)
{
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 541da5df1c..755c65427c 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -437,16 +437,12 @@ allocNursery (bdescr *tail, W_ blocks)
// tiny optimisation (~0.5%), but it's free.
while (blocks > 0) {
- if (blocks >= BLOCKS_PER_MBLOCK / 4) {
- n = stg_min(BLOCKS_PER_MBLOCK, blocks);
- bd = allocLargeChunk(16, n); // see comment with allocLargeChunk()
- // NB. we want a nice power of 2 for the minimum here
- n = bd->blocks;
- } else {
- bd = allocGroup(blocks);
- n = blocks;
- }
-
+ n = stg_min(BLOCKS_PER_MBLOCK, blocks);
+ // allocLargeChunk will prefer large chunks, but will pick up
+ // small chunks if there are any available. We must allow
+ // single blocks here to avoid fragmentation (#7257)
+ bd = allocLargeChunk(1, n);
+ n = bd->blocks;
blocks -= n;
for (i = 0; i < n; i++) {