diff options
-rw-r--r-- | rts/sm/GC.c | 51 | ||||
-rw-r--r-- | rts/sm/Storage.c | 18 | ||||
-rw-r--r-- | rts/sm/Storage.h | 2 | ||||
-rw-r--r-- | testsuite/tests/rts/T23221.hs | 70 | ||||
-rw-r--r-- | testsuite/tests/rts/all.T | 2 |
5 files changed, 131 insertions, 12 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 695131e738..798a86c0ba 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -994,14 +994,40 @@ GarbageCollect (struct GcConfig config, commitMBlockFreeing(); if (major_gc) { - W_ need_prealloc, need_live, need, got; + W_ need_prealloc, need_copied_live, need_uncopied_live, need, got, extra_needed; uint32_t i; - need_live = 0; + need_copied_live = 0; + need_uncopied_live = 0; for (i = 0; i < RtsFlags.GcFlags.generations; i++) { - need_live += genLiveBlocks(&generations[i]); + need_copied_live += genLiveCopiedBlocks(&generations[i]); + need_uncopied_live += genLiveUncopiedBlocks(&generations[i]); } - need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live); + + debugTrace(DEBUG_gc, "(before) copied_live: %d; uncopied_live: %d", need_copied_live, need_uncopied_live ); + + + // minOldGenSize states that the size of the oldest generation must be at least + // as big as a certain value, so make sure to save enough memory for that. + extra_needed = 0; + if (RtsFlags.GcFlags.minOldGenSize >= need_copied_live + need_uncopied_live){ + extra_needed = RtsFlags.GcFlags.minOldGenSize - (need_copied_live + need_uncopied_live); + } + debugTrace(DEBUG_gc, "(minOldGen: %d; extra_needed: %d", RtsFlags.GcFlags.minOldGenSize, extra_needed); + + // If oldest gen is uncopying in some manner (compact or non-moving) then + // add the extra requested by minOldGenSize to uncopying portion of memory. + // Otherwise, the last generation is copying so add it to copying portion. + if (oldest_gen -> compact || RtsFlags.GcFlags.useNonmoving) { + need_uncopied_live += extra_needed; + } + else { + need_copied_live += extra_needed; + } + + ASSERT(need_uncopied_live + need_copied_live >= RtsFlags.GcFlags.minOldGenSize ); + + debugTrace(DEBUG_gc, "(after) copyied_live: %d; uncopied_live: %d", need_copied_live, need_uncopied_live ); need_prealloc = 0; for (i = 0; i < n_nurseries; i++) { @@ -1027,14 +1053,19 @@ GarbageCollect (struct GcConfig config, debugTrace(DEBUG_gc, "factors: %f %d %f", RtsFlags.GcFlags.oldGenFactor, consec_idle_gcs, scaled_factor ); - // Unavoidable need depends on GC strategy + // Unavoidable need for copying memory depends on GC strategy // * Copying need 2 * live // * Compacting need 1.x * live (we choose 1.2) - // * Nonmoving needs ~ 1.x * live - double unavoidable_need_factor = (oldest_gen->compact || RtsFlags.GcFlags.useNonmoving) - ? 1.2 : 2; - W_ scaled_needed = (scaled_factor + unavoidable_need_factor) * need_live; - debugTrace(DEBUG_gc, "factors_2: %f %d", unavoidable_need_factor, scaled_needed); + double unavoidable_copied_need_factor = (oldest_gen->compact) + ? 1.2 : 2; + + // Unmoving blocks (compacted, pinned, nonmoving GC blocks) are not going + // to be copied so don't need to save 2* the memory for them. + double unavoidable_uncopied_need_factor = 1.2; + + W_ scaled_needed = ((scaled_factor + unavoidable_copied_need_factor) * need_copied_live) + + ((scaled_factor + unavoidable_uncopied_need_factor) * need_uncopied_live); + debugTrace(DEBUG_gc, "factors_2: %f %d", ((scaled_factor + unavoidable_copied_need_factor) * need_copied_live), ((scaled_factor + unavoidable_uncopied_need_factor) * need_uncopied_live)); need = need_prealloc + scaled_needed; /* Also, if user set heap size, do not drop below it. diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index ec2f446a3f..b7350d7481 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -1608,20 +1608,34 @@ W_ countOccupied (bdescr *bd) return words; } +// Returns the total number of live blocks W_ genLiveWords (generation *gen) { return (gen->live_estimate ? gen->live_estimate : gen->n_words) + gen->n_large_words + gen->n_compact_blocks * BLOCK_SIZE_W; } -W_ genLiveBlocks (generation *gen) +// The number of live blocks which will be copied by the copying collector. +W_ genLiveCopiedBlocks (generation *gen) +{ + return gen->n_blocks; +} + +// The number of live blocks which will not be copied by the copying collector +// This includes non-moving collector segments, compact blocks and large/pinned blocks. +W_ genLiveUncopiedBlocks (generation *gen) { W_ nonmoving_blocks = 0; // The nonmoving heap contains some blocks that live outside the regular generation structure. if (gen == oldest_gen && RtsFlags.GcFlags.useNonmoving){ nonmoving_blocks = n_nonmoving_large_blocks + n_nonmoving_marked_large_blocks + n_nonmoving_compact_blocks + n_nonmoving_marked_compact_blocks; } - return gen->n_blocks + gen->n_large_blocks + gen->n_compact_blocks + nonmoving_blocks; + return gen->n_large_blocks + gen->n_compact_blocks + nonmoving_blocks; +} + +W_ genLiveBlocks (generation *gen) +{ + return genLiveCopiedBlocks(gen) + genLiveUncopiedBlocks(gen); } W_ gcThreadLiveWords (uint32_t i, uint32_t g) diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index a0d9af8de6..c6046de2c0 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -121,6 +121,8 @@ StgWord gcThreadLiveBlocks (uint32_t i, uint32_t g); StgWord genLiveWords (generation *gen); StgWord genLiveBlocks (generation *gen); +StgWord genLiveCopiedBlocks (generation *gen); +StgWord genLiveUncopiedBlocks (generation *gen); StgWord calcTotalLargeObjectsW (void); StgWord calcTotalCompactW (void); diff --git a/testsuite/tests/rts/T23221.hs b/testsuite/tests/rts/T23221.hs new file mode 100644 index 0000000000..574c26c97e --- /dev/null +++ b/testsuite/tests/rts/T23221.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE MagicHash, UnboxedTuples, NumericUnderscores #-} + +module Main where + +import GHC.Exts +import GHC.IO +import System.Mem +import System.Environment +import Debug.Trace +import Control.Monad +import GHC.Stats +import Data.Word + +-- This test is for checking the memory return behaviour of blocks which will be +-- copied and blocks which are not copied (#23221) +main :: IO () +main = do + [sn] <- getArgs + let n = read sn + -- By checking that lower bound of unpinned is the upper bound of pinned then we + -- check that unpinned has lower memory baseline than pinned. + loop newByteArray 2 3 n + loop newPinnedByteArray 1 2 n + + +-- The upper_bound is the upper bound on how much total memory should be live at the end +-- of the test as a factor of the expected live bytes. +loop f lower_bound upper_bound n = do + ba <- mapM (\_ -> f 128) [0..n] + traceMarkerIO "Allocated_all" + performGC + let !ba' = take (n `div` 4) ba + evaluate (length ba') + traceMarkerIO "GC_4" + performGC + evaluate (length (reverse ba')) + replicateM_ 20 performGC + total_mem <- checkStats lower_bound upper_bound (n `div` 4) + evaluate (length (reverse ba')) + return total_mem + +checkStats :: Double -> Double -> Int -> IO () +checkStats lower_bound upper_bound n = do + stats <- getRTSStats + let expected_live_memory = fromIntegral n -- How many objects + * (3 -- One list cons + + 2 -- One BA constructor + + 18) -- ByteArray# object (size 16 + 2 overhead) + -- size of each object + * 8 -- word size + let bytes_used = gcdetails_mem_in_use_bytes (gc stats) + mblocks = bytes_used `div` (2 ^ 20) + when (truncate (expected_live_memory * upper_bound) < bytes_used) $ + error ("Upper Memory bound failed: " ++ show (truncate expected_live_memory, upper_bound, bytes_used)) + when (truncate (expected_live_memory * lower_bound) >= bytes_used) $ + error ("Lower Memory bound failed: " ++ show (truncate expected_live_memory, lower_bound, bytes_used)) + +data BA = BA ByteArray# + +newByteArray :: Int -> IO BA +newByteArray (I# sz#) = IO $ \s -> case newByteArray# sz# s of + (# s', k #) -> case unsafeFreezeByteArray# k s' of + (# s'', ba# #) -> (# s'', BA ba# #) + +newPinnedByteArray :: Int -> IO BA +newPinnedByteArray (I# sz#) = IO $ \s -> case newPinnedByteArray# sz# s of + (# s', k #) -> case unsafeFreezeByteArray# k s' of + (# s'', ba# #) -> (# s'', BA ba# #) + + diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T index 371a315786..29120a69be 100644 --- a/testsuite/tests/rts/all.T +++ b/testsuite/tests/rts/all.T @@ -590,4 +590,6 @@ test('T22795c', [only_ways(['normal']), js_skip], compile_and_run, ['-threaded - test('T17574', [js_skip], compile_and_run, ['-with-rtsopts -T']) +test('T23221', [js_skip, high_memory_usage, extra_run_opts('1500000'), unless(wordsize(64), skip)], compile_and_run, ['-O -with-rtsopts -T']) + test('T23142', [unless(debug_rts(), skip), req_interp], makefile_test, ['T23142']) |