summaryrefslogtreecommitdiff
path: root/rts/sm
diff options
context:
space:
mode:
authorMatthew Pickering <matthewtpickering@gmail.com>2021-03-04 10:16:07 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-03-08 18:24:42 -0500
commit47d6acd3be1fadc0c59b7b4d4e105242c0ae0b90 (patch)
tree8696b191951e5bd9fbb5441f4eb0ba8e5ab23084 /rts/sm
parentdaa6363f49df0dceb2c460da500461e564aa9ea2 (diff)
downloadhaskell-47d6acd3be1fadc0c59b7b4d4e105242c0ae0b90.tar.gz
rts: Use a separate free block list for allocatePinned
The way in which allocatePinned took blocks out of the nursery was leading to horrible fragmentation in some workloads. The strategy now is that a separate free block list is reserved for each capability and blocks are taken from there. When it's empty the global SM lock is taken and a fresh block of size PINNED_EMPTY_SIZE is allocated. Fixes #19481
Diffstat (limited to 'rts/sm')
-rw-r--r--rts/sm/Sanity.c7
-rw-r--r--rts/sm/Storage.c162
2 files changed, 154 insertions, 15 deletions
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 193a1a884c..b39559a653 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -1185,7 +1185,7 @@ memInventory (bool show)
{
uint32_t g, i;
W_ gen_blocks[RtsFlags.GcFlags.generations];
- W_ nursery_blocks = 0, retainer_blocks = 0,
+ W_ nursery_blocks = 0, free_pinned_blocks = 0, retainer_blocks = 0,
arena_blocks = 0, exec_blocks = 0, gc_free_blocks = 0,
upd_rem_set_blocks = 0;
W_ live_blocks = 0, free_blocks = 0;
@@ -1223,6 +1223,7 @@ memInventory (bool show)
nursery_blocks += capabilities[i]->pinned_object_block->blocks;
}
nursery_blocks += countBlocks(capabilities[i]->pinned_object_blocks);
+ free_pinned_blocks += countBlocks(capabilities[i]->pinned_object_empty);
}
#if defined(PROFILING)
@@ -1252,7 +1253,7 @@ memInventory (bool show)
}
live_blocks += nursery_blocks +
+ retainer_blocks + arena_blocks + exec_blocks + gc_free_blocks
- + upd_rem_set_blocks;
+ + upd_rem_set_blocks + free_pinned_blocks;
#define MB(n) (((double)(n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
@@ -1271,6 +1272,8 @@ memInventory (bool show)
}
debugBelch(" nursery : %5" FMT_Word " blocks (%6.1lf MB)\n",
nursery_blocks, MB(nursery_blocks));
+ debugBelch(" empty pinned : %5" FMT_Word " blocks (%6.1lf MB)\n",
+ nursery_blocks, MB(free_pinned_blocks));
debugBelch(" retainer : %5" FMT_Word " blocks (%6.1lf MB)\n",
retainer_blocks, MB(retainer_blocks));
debugBelch(" arena blocks : %5" FMT_Word " blocks (%6.1lf MB)\n",
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 2bab2d6432..82e959e8d2 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -67,6 +67,16 @@ generation *oldest_gen = NULL; /* oldest generation, for convenience */
nursery *nurseries = NULL;
uint32_t n_nurseries;
+/* Pinned Nursery Size, the number of blocks that we reserve for
+ * pinned data. The number chosen here decides whether pinned objects
+ * are allocated from the free_list (if n < BLOCKS_PER_MBLOCK) or whether
+ * a fresh mblock is allocated each time.
+ * See Note [Sources of Block Level Fragmentation]
+ * */
+
+#define PINNED_EMPTY_SIZE BLOCKS_PER_MBLOCK
+
+
/*
* When we are using nursery chunks, we need a separate next_nursery
* pointer for each NUMA node.
@@ -353,6 +363,7 @@ void listAllBlocks (ListBlocksCb cb, void *user)
cb(user, capabilities[i]->pinned_object_block);
}
cb(user, capabilities[i]->pinned_object_blocks);
+ cb(user, capabilities[i]->pinned_object_empty);
}
}
@@ -1257,25 +1268,47 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig
// avoid that (benchmarks that allocate a lot of pinned
// objects scale really badly if we do this).
//
- // So first, we try taking the next block from the nursery, in
- // the same way as allocate().
- bd = cap->r.rCurrentNursery->link;
+ // See Note [Sources of Block Level Fragmentation]
+ // for a more complete history of this section.
+ bd = cap->pinned_object_empty;
if (bd == NULL) {
- // The nursery is empty: allocate a fresh block (we can't fail
+ // The pinned block list is empty: allocate a fresh block (we can't fail
// here).
ACQUIRE_SM_LOCK;
- bd = allocBlockOnNode(cap->node);
+ bd = allocNursery(cap->node, NULL, PINNED_EMPTY_SIZE);
RELEASE_SM_LOCK;
- initBdescr(bd, g0, g0);
- } else {
- newNurseryBlock(bd);
- // we have a block in the nursery: steal it
- cap->r.rCurrentNursery->link = bd->link;
- if (bd->link != NULL) {
- bd->link->u.back = cap->r.rCurrentNursery;
+ }
+
+ // Bump up the nursery pointer to avoid the pathological situation
+ // where a program is *only* allocating pinned objects.
+ // T4018 fails without this safety.
+ // This has the effect of counting a full pinned block in the same way
+ // as a full nursery block, so GCs will be triggered at the same interval
+ // if you are only allocating pinned data compared to normal allocations
+ // via allocate().
+ bdescr * nbd;
+ nbd = cap->r.rCurrentNursery->link;
+ if (nbd != NULL){
+ newNurseryBlock(nbd);
+ cap->r.rCurrentNursery->link = nbd->link;
+ if (nbd->link != NULL) {
+ nbd->link->u.back = cap->r.rCurrentNursery;
}
- cap->r.rNursery->n_blocks -= bd->blocks;
+ dbl_link_onto(nbd, &cap->r.rNursery->blocks);
+ // Important for accounting purposes
+ if (cap->r.rCurrentAlloc){
+ finishedNurseryBlock(cap, cap->r.rCurrentAlloc);
+ }
+ cap->r.rCurrentAlloc = nbd;
+ }
+
+
+ cap->pinned_object_empty = bd->link;
+ newNurseryBlock(bd);
+ if (bd->link != NULL) {
+ bd->link->u.back = cap->pinned_object_empty;
}
+ initBdescr(bd, g0, g0);
cap->pinned_object_block = bd;
bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
@@ -1912,3 +1945,106 @@ _bdescr (StgPtr p)
}
#endif
+
+/*
+Note [Sources of Block Level Fragmentation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Block level fragmentation is when there is unused space in megablocks.
+The amount of fragmentation can be calculated as the difference between the
+total size of allocated blocks and the total size of allocated megablocks.
+
+The act of the copying collection naturally reduces fragmentation by moving
+data between megablocks. Over time, the effect is that most megablocks end up quite full because
+data will be copied out of fragmented megablocks. The new block is chosen from
+the free list where the aim is to choose a gap of approximately the right size for
+the copied block so the data will end up in a probably less fragmented block.
+There are two situations where we end up with block fragmentation.
+
+1. Fragmentation from pinned data
+2. Fragmentation from nursery allocated blocks
+
+# Pinned Data Fragmentation
+
+There are two sources of
+pinned data, large objects and pinned bytearrays. After one of these object
+types is allocated, it is never moved by the collector
+and therefore if all the other blocks are collected around it then you can end
+up with a megablock with one pinned block and no other blocks. No special
+effort is taken in the compiler
+to ensure that this kind of fragmentation doesn't happen in the first place and
+once the heap is fragmented in this way, there's nothing you can do about it
+beyond hoping that the pinned data is eventually freed.
+
+# Nursery Fragmentation
+
+The other reason that a block may not ever be moved or emptied is if it forms
+part of the nursery. When the nursery is first allocated then it is made up of
+megablock sized chunks, so if the nursery is 4 megabytes then it will consist of
+blocks from about 4 megablocks.
+
+Over time, the nursery is resized (by resizeNurseries) under various conditions.
+It gets bigger when
+we are allocating more and then smaller when we are allocating less.
+When the nursery is resized
+blocks are added or removed to it at potentially smaller sizes than a complete
+megablock. For example, if the nursery size needs to increase by 1, then
+the free list is consulted for a block of size 1 (from a random block)
+and that's added to the nursery.
+
+Over time the make-up of the nursery changes from 4
+contiguous megablocks to a hodge-podge of blocks from different megablocks. In
+some programs (see #19481), the fragmentation is so bad that a program with
+only 4 MB of live data can retain over 500 megablocks because each of these
+megablocks contributed a small number of blocks to the nursery.
+
+In particular, and confusingly, this second form of fragmentation was caused
+by the act of allocating pinned objects. `allocPinned` was the primary
+reason that the nursery size decreases by small amounts. When `allocPinned`
+needed a block then it took a block permanently out of
+the nursery which shrunk the size of the nursery by 1 block. Then next time the size
+of the nursery was checked, the `alloc_nurseries` found that the existing
+nursery was smaller than the desired size and a new blocked needed
+to be added. This allocation was serviced from an arbitrary megablock
+which had some free space. The effect over time as more allocation happened
+was the nursery became made up of blocks from many different megablocks.
+
+Instead now we maintain a separate small list of blocks in `pinned_object_empty`
+which fresh blocks are taken from when we need a new one for a pinned block rather
+than threatening the continuity of the nursery. The size of this list is controlled
+by the PINNED_EMPTY_SIZE macro.
+
+In theory, this kind of fragmentation due to the nursery could still happen
+but in practice removing the primary cause (allocatePinned) was sufficient to
+greatly improve the situation. Another way to "fix" fragmentation of the nursery
+would be to periodically reallocate it when it was fragmented across many megablocks.
+
+Ticket: #19481
+
+# When can fragmentation be observed?
+
+Fragmentation is observed when the live data in a program is low compared to
+the overall resident size of the heap. The block allocator can reuse unused
+space within a megablock and therefore as residency
+increases again, the fragmented blocks will get filled up. Having a block-level
+fragmented heap means your program will never go below a certain memory
+threshold but it doesn't "use" more memory during periods of high residency.
+To clarify, say you observe 100 MB of fragmentation when your live data is
+4 MB, if your live data rise to 200MB then you probably will not still observe 100 MB
+of fragmentation as the block allocate will use the space in fragmented megablocks.
+
+# How to observe fragmentation
+
+Your heap is probably fragmented when
+
+* Live bytes is low
+* Memory in use (number of megablocks) is comparatively high
+* The size of the free list dominates residency (this can be observed using the
+ debug RTS and the memory inventory produced by -Dg).
+
+# Compacting Collector
+
+The compacting collector does nothing to improve megablock
+level fragmentation. The role of the compacting GC is to remove object level
+fragmentation and to use less memory when collecting. - see #19248
+*/