diff options
author | Matthew Pickering <matthewtpickering@gmail.com> | 2023-05-04 12:14:59 +0100 |
---|---|---|
committer | Matthew Pickering <matthewtpickering@gmail.com> | 2023-05-04 12:21:46 +0100 |
commit | 456fa18048a1f03598d8dc66d03cb4bf322060bf (patch) | |
tree | 177a55f9c762119d968bb395236174161ec43d05 | |
parent | 00a8a5ff9abf5bb1a0c2a9225c7bca5ec3bdf306 (diff) | |
download | haskell-wip/t23221.tar.gz |
rts: Refine memory retention behaviour to account for pinned/compacted objectswip/t23221
When using the copying collector there is still a lot of data which
isn't copied (such as pinned, compacted, large objects etc). The logic
to decide how much memory to retain didn't take into account that these
wouldn't be copied. Therefore we pessimistically retained 2* the amount
of memory for these blocks even though they wouldn't be copied by the
collector.
The solution is to split up the heap into two parts, the parts which
will be copied and the parts which won't be copied. Then the appropiate
factor is applied to each part individually (2 * for copying and 1.2 *
for not copying).
The T23221 test demonstrates this improvement with a program which first
allocates many unpinned ByteArray# followed by many pinned ByteArray#
and observes the difference in the ultimate memory baseline between the
two.
There are some charts on #23221.
Fixes #23221
-rw-r--r-- | rts/sm/GC.c | 51 | ||||
-rw-r--r-- | rts/sm/Storage.c | 18 | ||||
-rw-r--r-- | rts/sm/Storage.h | 2 | ||||
-rw-r--r-- | testsuite/tests/rts/T23221.hs | 70 | ||||
-rw-r--r-- | testsuite/tests/rts/all.T | 2 |
5 files changed, 131 insertions, 12 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 695131e738..798a86c0ba 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -994,14 +994,40 @@ GarbageCollect (struct GcConfig config, commitMBlockFreeing(); if (major_gc) { - W_ need_prealloc, need_live, need, got; + W_ need_prealloc, need_copied_live, need_uncopied_live, need, got, extra_needed; uint32_t i; - need_live = 0; + need_copied_live = 0; + need_uncopied_live = 0; for (i = 0; i < RtsFlags.GcFlags.generations; i++) { - need_live += genLiveBlocks(&generations[i]); + need_copied_live += genLiveCopiedBlocks(&generations[i]); + need_uncopied_live += genLiveUncopiedBlocks(&generations[i]); } - need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live); + + debugTrace(DEBUG_gc, "(before) copied_live: %d; uncopied_live: %d", need_copied_live, need_uncopied_live ); + + + // minOldGenSize states that the size of the oldest generation must be at least + // as big as a certain value, so make sure to save enough memory for that. + extra_needed = 0; + if (RtsFlags.GcFlags.minOldGenSize >= need_copied_live + need_uncopied_live){ + extra_needed = RtsFlags.GcFlags.minOldGenSize - (need_copied_live + need_uncopied_live); + } + debugTrace(DEBUG_gc, "(minOldGen: %d; extra_needed: %d", RtsFlags.GcFlags.minOldGenSize, extra_needed); + + // If oldest gen is uncopying in some manner (compact or non-moving) then + // add the extra requested by minOldGenSize to uncopying portion of memory. + // Otherwise, the last generation is copying so add it to copying portion. + if (oldest_gen -> compact || RtsFlags.GcFlags.useNonmoving) { + need_uncopied_live += extra_needed; + } + else { + need_copied_live += extra_needed; + } + + ASSERT(need_uncopied_live + need_copied_live >= RtsFlags.GcFlags.minOldGenSize ); + + debugTrace(DEBUG_gc, "(after) copyied_live: %d; uncopied_live: %d", need_copied_live, need_uncopied_live ); need_prealloc = 0; for (i = 0; i < n_nurseries; i++) { @@ -1027,14 +1053,19 @@ GarbageCollect (struct GcConfig config, debugTrace(DEBUG_gc, "factors: %f %d %f", RtsFlags.GcFlags.oldGenFactor, consec_idle_gcs, scaled_factor ); - // Unavoidable need depends on GC strategy + // Unavoidable need for copying memory depends on GC strategy // * Copying need 2 * live // * Compacting need 1.x * live (we choose 1.2) - // * Nonmoving needs ~ 1.x * live - double unavoidable_need_factor = (oldest_gen->compact || RtsFlags.GcFlags.useNonmoving) - ? 1.2 : 2; - W_ scaled_needed = (scaled_factor + unavoidable_need_factor) * need_live; - debugTrace(DEBUG_gc, "factors_2: %f %d", unavoidable_need_factor, scaled_needed); + double unavoidable_copied_need_factor = (oldest_gen->compact) + ? 1.2 : 2; + + // Unmoving blocks (compacted, pinned, nonmoving GC blocks) are not going + // to be copied so don't need to save 2* the memory for them. + double unavoidable_uncopied_need_factor = 1.2; + + W_ scaled_needed = ((scaled_factor + unavoidable_copied_need_factor) * need_copied_live) + + ((scaled_factor + unavoidable_uncopied_need_factor) * need_uncopied_live); + debugTrace(DEBUG_gc, "factors_2: %f %d", ((scaled_factor + unavoidable_copied_need_factor) * need_copied_live), ((scaled_factor + unavoidable_uncopied_need_factor) * need_uncopied_live)); need = need_prealloc + scaled_needed; /* Also, if user set heap size, do not drop below it. diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 6d6500df9a..3adebb160f 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -1608,20 +1608,34 @@ W_ countOccupied (bdescr *bd) return words; } +// Returns the total number of live blocks W_ genLiveWords (generation *gen) { return (gen->live_estimate ? gen->live_estimate : gen->n_words) + gen->n_large_words + gen->n_compact_blocks * BLOCK_SIZE_W; } -W_ genLiveBlocks (generation *gen) +// The number of live blocks which will be copied by the copying collector. +W_ genLiveCopiedBlocks (generation *gen) +{ + return gen->n_blocks; +} + +// The number of live blocks which will not be copied by the copying collector +// This includes non-moving collector segments, compact blocks and large/pinned blocks. +W_ genLiveUncopiedBlocks (generation *gen) { W_ nonmoving_blocks = 0; // The nonmoving heap contains some blocks that live outside the regular generation structure. if (gen == oldest_gen && RtsFlags.GcFlags.useNonmoving){ nonmoving_blocks = n_nonmoving_large_blocks + n_nonmoving_marked_large_blocks + n_nonmoving_compact_blocks + n_nonmoving_marked_compact_blocks; } - return gen->n_blocks + gen->n_large_blocks + gen->n_compact_blocks + nonmoving_blocks; + return gen->n_large_blocks + gen->n_compact_blocks + nonmoving_blocks; +} + +W_ genLiveBlocks (generation *gen) +{ + return genLiveCopiedBlocks(gen) + genLiveUncopiedBlocks(gen); } W_ gcThreadLiveWords (uint32_t i, uint32_t g) diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 42c56fe164..6c23556187 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -121,6 +121,8 @@ StgWord gcThreadLiveBlocks (uint32_t i, uint32_t g); StgWord genLiveWords (generation *gen); StgWord genLiveBlocks (generation *gen); +StgWord genLiveCopiedBlocks (generation *gen); +StgWord genLiveUncopiedBlocks (generation *gen); StgWord calcTotalLargeObjectsW (void); StgWord calcTotalCompactW (void); diff --git a/testsuite/tests/rts/T23221.hs b/testsuite/tests/rts/T23221.hs new file mode 100644 index 0000000000..574c26c97e --- /dev/null +++ b/testsuite/tests/rts/T23221.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE MagicHash, UnboxedTuples, NumericUnderscores #-} + +module Main where + +import GHC.Exts +import GHC.IO +import System.Mem +import System.Environment +import Debug.Trace +import Control.Monad +import GHC.Stats +import Data.Word + +-- This test is for checking the memory return behaviour of blocks which will be +-- copied and blocks which are not copied (#23221) +main :: IO () +main = do + [sn] <- getArgs + let n = read sn + -- By checking that lower bound of unpinned is the upper bound of pinned then we + -- check that unpinned has lower memory baseline than pinned. + loop newByteArray 2 3 n + loop newPinnedByteArray 1 2 n + + +-- The upper_bound is the upper bound on how much total memory should be live at the end +-- of the test as a factor of the expected live bytes. +loop f lower_bound upper_bound n = do + ba <- mapM (\_ -> f 128) [0..n] + traceMarkerIO "Allocated_all" + performGC + let !ba' = take (n `div` 4) ba + evaluate (length ba') + traceMarkerIO "GC_4" + performGC + evaluate (length (reverse ba')) + replicateM_ 20 performGC + total_mem <- checkStats lower_bound upper_bound (n `div` 4) + evaluate (length (reverse ba')) + return total_mem + +checkStats :: Double -> Double -> Int -> IO () +checkStats lower_bound upper_bound n = do + stats <- getRTSStats + let expected_live_memory = fromIntegral n -- How many objects + * (3 -- One list cons + + 2 -- One BA constructor + + 18) -- ByteArray# object (size 16 + 2 overhead) + -- size of each object + * 8 -- word size + let bytes_used = gcdetails_mem_in_use_bytes (gc stats) + mblocks = bytes_used `div` (2 ^ 20) + when (truncate (expected_live_memory * upper_bound) < bytes_used) $ + error ("Upper Memory bound failed: " ++ show (truncate expected_live_memory, upper_bound, bytes_used)) + when (truncate (expected_live_memory * lower_bound) >= bytes_used) $ + error ("Lower Memory bound failed: " ++ show (truncate expected_live_memory, lower_bound, bytes_used)) + +data BA = BA ByteArray# + +newByteArray :: Int -> IO BA +newByteArray (I# sz#) = IO $ \s -> case newByteArray# sz# s of + (# s', k #) -> case unsafeFreezeByteArray# k s' of + (# s'', ba# #) -> (# s'', BA ba# #) + +newPinnedByteArray :: Int -> IO BA +newPinnedByteArray (I# sz#) = IO $ \s -> case newPinnedByteArray# sz# s of + (# s', k #) -> case unsafeFreezeByteArray# k s' of + (# s'', ba# #) -> (# s'', BA ba# #) + + diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T index 371a315786..29120a69be 100644 --- a/testsuite/tests/rts/all.T +++ b/testsuite/tests/rts/all.T @@ -590,4 +590,6 @@ test('T22795c', [only_ways(['normal']), js_skip], compile_and_run, ['-threaded - test('T17574', [js_skip], compile_and_run, ['-with-rtsopts -T']) +test('T23221', [js_skip, high_memory_usage, extra_run_opts('1500000'), unless(wordsize(64), skip)], compile_and_run, ['-O -with-rtsopts -T']) + test('T23142', [unless(debug_rts(), skip), req_interp], makefile_test, ['T23142']) |