From cc2ea98ac4a15e40a15e89de9e47f33e191ba393 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Thu, 14 Apr 2011 08:59:39 +0100 Subject: Avoid accumulating slop in the pinned_object_block. The pinned_object_block is where we allocate small pinned ByteArray# objects. At a GC the pinned_object_block was being treated like other large objects and promoted to the next step/generation, even if it was only partly full. Under some ByteString-heavy workloads this would accumulate on average 2k of slop per GC, and this memory is never released until the ByteArray# objects in the block are freed. So now, we keep allocating into the pinned_object_block until it is completely full, at which point it is handed over to the GC as before. The pinned_object_block might therefore contain objects which a large range of ages, but I don't think this is any worse than the situation before. We still have the fragmentation issue in general, but the new scheme can improve the memory overhead for some workloads dramatically. --- rts/sm/GC.c | 5 ----- rts/sm/Sanity.c | 4 ++++ rts/sm/Storage.c | 25 ++++++++++++++++++++----- 3 files changed, 24 insertions(+), 10 deletions(-) (limited to 'rts/sm') diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 05bc8f22fb..30361401cc 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -597,11 +597,6 @@ GarbageCollect (rtsBool force_major_gc, // update the max size of older generations after a major GC resize_generations(); - // Start a new pinned_object_block - for (n = 0; n < n_capabilities; n++) { - capabilities[n].pinned_object_block = NULL; - } - // Free the mark stack. if (mark_stack_top_bd != NULL) { debugTrace(DEBUG_gc, "mark stack: %d blocks", diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 8ebb9a2513..0ec552c047 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -789,6 +789,7 @@ findMemoryLeak (void) for (i = 0; i < n_capabilities; i++) { markBlocks(nurseries[i].blocks); + markBlocks(capabilities[i].pinned_object_block); } #ifdef PROFILING @@ -880,6 +881,9 @@ memInventory (rtsBool show) for (i = 0; i < n_capabilities; i++) { ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks); nursery_blocks += nurseries[i].n_blocks; + if (capabilities[i].pinned_object_block != NULL) { + nursery_blocks += capabilities[i].pinned_object_block->blocks; + } } retainer_blocks = 0; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index ae3433a9b2..f8a9e559bf 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -657,17 +657,32 @@ allocatePinned (Capability *cap, lnat n) // If we don't have a block of pinned objects yet, or the current // one isn't large enough to hold the new object, allocate a new one. if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { + // The pinned_object_block remains attached to the capability + // until it is full, even if a GC occurs. We want this + // behaviour because otherwise the unallocated portion of the + // block would be forever slop, and under certain workloads + // (allocating a few ByteStrings per GC) we accumulate a lot + // of slop. + // + // So, the pinned_object_block is initially marked + // BF_EVACUATED so the GC won't touch it. When it is full, + // we place it on the large_objects list, and at the start of + // the next GC the BF_EVACUATED flag will be cleared, and the + // block will be promoted as usual (if anything in it is + // live). ACQUIRE_SM_LOCK; - cap->pinned_object_block = bd = allocBlock(); - dbl_link_onto(bd, &g0->large_objects); - g0->n_large_blocks++; + if (bd != NULL) { + dbl_link_onto(bd, &g0->large_objects); + g0->n_large_blocks++; + g0->n_new_large_words += bd->free - bd->start; + } + cap->pinned_object_block = bd = allocBlock(); RELEASE_SM_LOCK; initBdescr(bd, g0, g0); - bd->flags = BF_PINNED | BF_LARGE; + bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED; bd->free = bd->start; } - g0->n_new_large_words += n; p = bd->free; bd->free += n; return p; -- cgit v1.2.1