summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-10-09 18:21:35 -0400
committerBen Gamari <ben@smart-cactus.org>2016-10-09 18:21:36 -0400
commit85e81a850a3e79d965e18f267a0e0b1c4bc69fae (patch)
treee3659426074078e2fce80b87253e6976373de0a5
parent6c47f2efa3f8f4639f375d34f54c01a60c9a1a82 (diff)
downloadhaskell-85e81a850a3e79d965e18f267a0e0b1c4bc69fae.tar.gz
Turn on -n4m with -A16m or greater
Nursery chunks help reduce the cost of GC when capabilities are unevenly loaded, by ensuring that we use more of the available nursery. The rationale for enabling this at -A16m is that any negative effects due to loss of cache locality are less likely to be an issue at -A16m and above. It's a conservative guess. If we had a lot of benchmark data we could probably do better. Results for nofib/parallel at -N4 -A32m with and without -n4m: ``` ------------------------------------------------------------------------ Program Size Allocs Runtime Elapsed TotalMem ------------------------------------------------------------------------ blackscholes 0.0% -9.5% -9.0% -15.0% -2.2% coins 0.0% -4.7% -3.6% -0.6% -13.6% mandel 0.0% -0.3% +7.7% +13.1% +0.1% matmult 0.0% +1.5% +10.0% +7.7% +0.1% nbody 0.0% -4.1% -2.9% 0.085 0.0% parfib 0.0% -1.4% +1.0% +1.5% +0.2% partree 0.0% -0.3% +0.8% +2.9% -0.8% prsa 0.0% -0.5% -2.1% -7.6% 0.0% queens 0.0% -3.2% -1.4% +2.2% +1.3% ray 0.0% -5.6% -14.5% -7.6% +0.8% sumeuler 0.0% -0.4% +2.4% +1.1% 0.0% ------------------------------------------------------------------------ Min 0.0% -9.5% -14.5% -15.0% -13.6% Max 0.0% +1.5% +10.0% +13.1% +1.3% Geometric Mean +0.0% -2.6% -1.3% -0.5% -1.4% ``` Not conclusive, but slightly better. This matters a lot more when you have more cores. Test Plan: validate, nofib/paralel Reviewers: niteria, ezyang, nh2, trofi, austin, erikd, bgamari Reviewed By: bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2581 GHC Trac Issues: #9221
-rw-r--r--docs/users_guide/runtime_control.rst2
-rw-r--r--rts/RtsFlags.c18
-rw-r--r--rts/sm/Storage.c13
3 files changed, 19 insertions, 14 deletions
diff --git a/docs/users_guide/runtime_control.rst b/docs/users_guide/runtime_control.rst
index 0ffb1d8206..54c7508ba8 100644
--- a/docs/users_guide/runtime_control.rst
+++ b/docs/users_guide/runtime_control.rst
@@ -327,7 +327,7 @@ performance.
.. rts-flag:: -n ⟨size⟩
- :default: 0
+ :default: 4m with ``-A16m`` or larger, otherwise 0.
.. index::
single: allocation area, chunk size
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 4bd544ee29..d86b154342 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -1454,6 +1454,24 @@ static void normaliseRtsOpts (void)
errorUsage();
}
+ if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+ RtsFlags.GcFlags.heapSizeSuggestion >
+ RtsFlags.GcFlags.maxHeapSize) {
+ RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
+ }
+
+ if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+ RtsFlags.GcFlags.minAllocAreaSize >
+ RtsFlags.GcFlags.maxHeapSize) {
+ errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
+ RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
+ }
+
+ // If we have -A16m or larger, use -n4m.
+ if (RtsFlags.GcFlags.minAllocAreaSize >= (16*1024*1024) / BLOCK_SIZE) {
+ RtsFlags.GcFlags.nurseryChunkSize = (4*1024*1024) / BLOCK_SIZE;
+ }
+
if (RtsFlags.ParFlags.parGcLoadBalancingGen == ~0u) {
StgWord alloc_area_bytes
= RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 4d0c8d5260..357e0180d2 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -140,19 +140,6 @@ initStorage (void)
ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
- if (RtsFlags.GcFlags.maxHeapSize != 0 &&
- RtsFlags.GcFlags.heapSizeSuggestion >
- RtsFlags.GcFlags.maxHeapSize) {
- RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
- }
-
- if (RtsFlags.GcFlags.maxHeapSize != 0 &&
- RtsFlags.GcFlags.minAllocAreaSize >
- RtsFlags.GcFlags.maxHeapSize) {
- errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
- RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
- }
-
initBlockAllocator();
#if defined(THREADED_RTS)