summaryrefslogtreecommitdiff
path: root/rts/RtsFlags.c
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-10-09 18:21:35 -0400
committerBen Gamari <ben@smart-cactus.org>2016-10-09 18:21:36 -0400
commit85e81a850a3e79d965e18f267a0e0b1c4bc69fae (patch)
treee3659426074078e2fce80b87253e6976373de0a5 /rts/RtsFlags.c
parent6c47f2efa3f8f4639f375d34f54c01a60c9a1a82 (diff)
downloadhaskell-85e81a850a3e79d965e18f267a0e0b1c4bc69fae.tar.gz
Turn on -n4m with -A16m or greater
Nursery chunks help reduce the cost of GC when capabilities are unevenly loaded, by ensuring that we use more of the available nursery. The rationale for enabling this at -A16m is that any negative effects due to loss of cache locality are less likely to be an issue at -A16m and above. It's a conservative guess. If we had a lot of benchmark data we could probably do better. Results for nofib/parallel at -N4 -A32m with and without -n4m: ``` ------------------------------------------------------------------------ Program Size Allocs Runtime Elapsed TotalMem ------------------------------------------------------------------------ blackscholes 0.0% -9.5% -9.0% -15.0% -2.2% coins 0.0% -4.7% -3.6% -0.6% -13.6% mandel 0.0% -0.3% +7.7% +13.1% +0.1% matmult 0.0% +1.5% +10.0% +7.7% +0.1% nbody 0.0% -4.1% -2.9% 0.085 0.0% parfib 0.0% -1.4% +1.0% +1.5% +0.2% partree 0.0% -0.3% +0.8% +2.9% -0.8% prsa 0.0% -0.5% -2.1% -7.6% 0.0% queens 0.0% -3.2% -1.4% +2.2% +1.3% ray 0.0% -5.6% -14.5% -7.6% +0.8% sumeuler 0.0% -0.4% +2.4% +1.1% 0.0% ------------------------------------------------------------------------ Min 0.0% -9.5% -14.5% -15.0% -13.6% Max 0.0% +1.5% +10.0% +13.1% +1.3% Geometric Mean +0.0% -2.6% -1.3% -0.5% -1.4% ``` Not conclusive, but slightly better. This matters a lot more when you have more cores. Test Plan: validate, nofib/paralel Reviewers: niteria, ezyang, nh2, trofi, austin, erikd, bgamari Reviewed By: bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2581 GHC Trac Issues: #9221
Diffstat (limited to 'rts/RtsFlags.c')
-rw-r--r--rts/RtsFlags.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 4bd544ee29..d86b154342 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -1454,6 +1454,24 @@ static void normaliseRtsOpts (void)
errorUsage();
}
+ if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+ RtsFlags.GcFlags.heapSizeSuggestion >
+ RtsFlags.GcFlags.maxHeapSize) {
+ RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
+ }
+
+ if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+ RtsFlags.GcFlags.minAllocAreaSize >
+ RtsFlags.GcFlags.maxHeapSize) {
+ errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
+ RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
+ }
+
+ // If we have -A16m or larger, use -n4m.
+ if (RtsFlags.GcFlags.minAllocAreaSize >= (16*1024*1024) / BLOCK_SIZE) {
+ RtsFlags.GcFlags.nurseryChunkSize = (4*1024*1024) / BLOCK_SIZE;
+ }
+
if (RtsFlags.ParFlags.parGcLoadBalancingGen == ~0u) {
StgWord alloc_area_bytes
= RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE;