diff options
author | Andrey Sverdlichenko <blaze@ruddy.ru> | 2018-01-31 21:33:58 -0500 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-01-31 23:28:06 -0500 |
commit | 0171e09e4d073d8466953ebbf01292e55829fb20 (patch) | |
tree | 4a939f8132e7568a497b96adddf23d4d74f62349 /rts/sm | |
parent | 5f922fbbef56dd4f0133ffe07ab8f0ebcb58fbaf (diff) | |
download | haskell-0171e09e4d073d8466953ebbf01292e55829fb20.tar.gz |
Make RTS keep less memory (fixes #14702)
Currently runtime keeps hold to 4*used_memory. This includes, in
particular, nursery, which can be quite large on multiprocessor
machines: 16 CPUs x 64Mb each is 1GB. Multiplying it by 4 means whatever
actual memory usage is, runtime will never release memory under 4GB, and
this is quite excessive for processes which only need a lot of memory
shortly (think building data structures from large files).
This diff makes multiplier to apply only to GC-managed memory, leaving
all "static" allocations alone.
Test Plan: make test TEST="T14702"
Reviewers: bgamari, erikd, simonmar
Reviewed By: simonmar
Subscribers: rwbarton, thomie, carter
GHC Trac Issues: #14702
Differential Revision: https://phabricator.haskell.org/D4338
Diffstat (limited to 'rts/sm')
-rw-r--r-- | rts/sm/GC.c | 52 |
1 files changed, 42 insertions, 10 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c index c5ab7a8161..197b46657b 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -28,6 +28,7 @@ #include "Sparks.h" #include "Sweep.h" +#include "Arena.h" #include "Storage.h" #include "RtsUtils.h" #include "Apply.h" @@ -50,6 +51,10 @@ #include "CNF.h" #include "RtsFlags.h" +#if defined(PROFILING) +#include "RetainerProfile.h" +#endif + #include <string.h> // for memset() #include <unistd.h> @@ -756,24 +761,51 @@ GarbageCollect (uint32_t collect_gen, ACQUIRE_SM_LOCK; if (major_gc) { - W_ need, got; - need = BLOCKS_TO_MBLOCKS(n_alloc_blocks); - got = mblocks_allocated; + W_ need_prealloc, need_live, need, got; + uint32_t i; + + need_live = 0; + for (i = 0; i < RtsFlags.GcFlags.generations; i++) { + need_live += genLiveBlocks(&generations[i]); + } + need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live); + + need_prealloc = 0; + for (i = 0; i < n_nurseries; i++) { + need_prealloc += nurseries[i].n_blocks; + } + need_prealloc += RtsFlags.GcFlags.largeAllocLim; + need_prealloc += countAllocdBlocks(exec_block); + need_prealloc += arenaBlocks(); +#if defined(PROFILING) + if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { + need_prealloc = retainerStackBlocks(); + } +#endif + /* If the amount of data remains constant, next major GC we'll - require (F+1)*need. We leave (F+2)*need in order to reduce - repeated deallocation and reallocation. */ - need = (RtsFlags.GcFlags.oldGenFactor + 2) * need; + * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc + * in order to reduce repeated deallocation and reallocation. #14702 + */ + need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live; + + /* Also, if user set heap size, do not drop below it. + */ + need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need); + /* But with a large nursery, the above estimate might exceed * maxHeapSize. A large resident set size might make the OS * kill this process, or swap unnecessarily. Therefore we * ensure that our estimate does not exceed maxHeapSize. */ if (RtsFlags.GcFlags.maxHeapSize != 0) { - W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize); - if (need > max) { - need = max; - } + need = stg_min(RtsFlags.GcFlags.maxHeapSize, need); } + + need = BLOCKS_TO_MBLOCKS(need); + + got = mblocks_allocated; + if (got > need) { returnMemoryToOS(got - need); } |