summaryrefslogtreecommitdiff
path: root/rts/sm
diff options
context:
space:
mode:
authorAndrey Sverdlichenko <blaze@ruddy.ru>2018-01-31 21:33:58 -0500
committerBen Gamari <ben@smart-cactus.org>2018-01-31 23:28:06 -0500
commit0171e09e4d073d8466953ebbf01292e55829fb20 (patch)
tree4a939f8132e7568a497b96adddf23d4d74f62349 /rts/sm
parent5f922fbbef56dd4f0133ffe07ab8f0ebcb58fbaf (diff)
downloadhaskell-0171e09e4d073d8466953ebbf01292e55829fb20.tar.gz
Make RTS keep less memory (fixes #14702)
Currently runtime keeps hold to 4*used_memory. This includes, in particular, nursery, which can be quite large on multiprocessor machines: 16 CPUs x 64Mb each is 1GB. Multiplying it by 4 means whatever actual memory usage is, runtime will never release memory under 4GB, and this is quite excessive for processes which only need a lot of memory shortly (think building data structures from large files). This diff makes multiplier to apply only to GC-managed memory, leaving all "static" allocations alone. Test Plan: make test TEST="T14702" Reviewers: bgamari, erikd, simonmar Reviewed By: simonmar Subscribers: rwbarton, thomie, carter GHC Trac Issues: #14702 Differential Revision: https://phabricator.haskell.org/D4338
Diffstat (limited to 'rts/sm')
-rw-r--r--rts/sm/GC.c52
1 files changed, 42 insertions, 10 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index c5ab7a8161..197b46657b 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -28,6 +28,7 @@
#include "Sparks.h"
#include "Sweep.h"
+#include "Arena.h"
#include "Storage.h"
#include "RtsUtils.h"
#include "Apply.h"
@@ -50,6 +51,10 @@
#include "CNF.h"
#include "RtsFlags.h"
+#if defined(PROFILING)
+#include "RetainerProfile.h"
+#endif
+
#include <string.h> // for memset()
#include <unistd.h>
@@ -756,24 +761,51 @@ GarbageCollect (uint32_t collect_gen,
ACQUIRE_SM_LOCK;
if (major_gc) {
- W_ need, got;
- need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
- got = mblocks_allocated;
+ W_ need_prealloc, need_live, need, got;
+ uint32_t i;
+
+ need_live = 0;
+ for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
+ need_live += genLiveBlocks(&generations[i]);
+ }
+ need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
+
+ need_prealloc = 0;
+ for (i = 0; i < n_nurseries; i++) {
+ need_prealloc += nurseries[i].n_blocks;
+ }
+ need_prealloc += RtsFlags.GcFlags.largeAllocLim;
+ need_prealloc += countAllocdBlocks(exec_block);
+ need_prealloc += arenaBlocks();
+#if defined(PROFILING)
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
+ need_prealloc = retainerStackBlocks();
+ }
+#endif
+
/* If the amount of data remains constant, next major GC we'll
- require (F+1)*need. We leave (F+2)*need in order to reduce
- repeated deallocation and reallocation. */
- need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
+ * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
+ * in order to reduce repeated deallocation and reallocation. #14702
+ */
+ need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
+
+ /* Also, if user set heap size, do not drop below it.
+ */
+ need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
+
/* But with a large nursery, the above estimate might exceed
* maxHeapSize. A large resident set size might make the OS
* kill this process, or swap unnecessarily. Therefore we
* ensure that our estimate does not exceed maxHeapSize.
*/
if (RtsFlags.GcFlags.maxHeapSize != 0) {
- W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize);
- if (need > max) {
- need = max;
- }
+ need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
}
+
+ need = BLOCKS_TO_MBLOCKS(need);
+
+ got = mblocks_allocated;
+
if (got > need) {
returnMemoryToOS(got - need);
}