summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2020-07-11 06:37:12 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-12-22 22:10:06 -0500
commit553c59ca5581bcba79ea7d9672cffd438905d846 (patch)
tree5484c37ffdef7266ae91166a642d83461965493e
parent4c3fae472c0223dbbf8062cd7ab1e24b3e9c01c6 (diff)
downloadhaskell-553c59ca5581bcba79ea7d9672cffd438905d846.tar.gz
Increase -A default to 4MB.
This gives a small increase in performance under most circumstances. For single threaded GC the improvement is on the order of 1-2%. For multi threaded GC the results are quite noisy but seem to fall into the same ballpark. Fixes #16499
-rw-r--r--docs/users_guide/runtime_control.rst15
-rw-r--r--rts/RtsFlags.c5
-rw-r--r--testsuite/tests/profiling/should_run/all.T2
-rw-r--r--testsuite/tests/rts/T9579/Makefile16
4 files changed, 23 insertions, 15 deletions
diff --git a/docs/users_guide/runtime_control.rst b/docs/users_guide/runtime_control.rst
index 5d3b24adac..49dc6a5441 100644
--- a/docs/users_guide/runtime_control.rst
+++ b/docs/users_guide/runtime_control.rst
@@ -420,7 +420,7 @@ performance.
.. rts-flag:: -A ⟨size⟩
- :default: 1MB
+ :default: 4MB
.. index::
single: allocation area, size
@@ -429,15 +429,22 @@ performance.
collector. The allocation area (actually generation 0 step 0) is
fixed and is never resized (unless you use :rts-flag:`-H [⟨size⟩]`, below).
- Increasing the allocation area size may or may not give better
- performance (a bigger allocation area means worse cache behaviour
- but fewer garbage collections and less promotion).
+ Optimal settings depend on the actual machine, program, and other RTS options.
+ Increasing the allocation area size means worse cache behaviour
+ but fewer garbage collections and less promotion.
+
+ In general settings >= 4MB can reduce performance in some cases, in particular for single
+ threaded operation. However in a parallel setting increasing the allocation area
+ to ``16MB``, or even ``64MB`` can increase gc throughput significantly.
With only 1 generation (e.g. ``-G1``, see :rts-flag:`-G ⟨generations⟩`) the
``-A`` option specifies the minimum allocation area, since the actual size
of the allocation area will be resized according to the amount of data in
the heap (see :rts-flag:`-F ⟨factor⟩`, below).
+ When heap profiling using a smaller allocation area can increase accuracy as more frequent
+ major garbage collections also results in more frequent heap snapshots
+
.. rts-flag:: -AL ⟨size⟩
:default: :rts-flag:`-A <-A ⟨size⟩>` value
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index b23b19752b..044e7742c4 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -153,10 +153,11 @@ void initRtsFlagsDefaults(void)
RtsFlags.GcFlags.stkChunkSize = (32 * 1024) / sizeof(W_);
RtsFlags.GcFlags.stkChunkBufferSize = (1 * 1024) / sizeof(W_);
- RtsFlags.GcFlags.minAllocAreaSize = (1024 * 1024) / BLOCK_SIZE;
+ /* -A default. See #16499 for a discussion about the tradeoffs */
+ RtsFlags.GcFlags.minAllocAreaSize = (4 * 1024 * 1024) / BLOCK_SIZE;
RtsFlags.GcFlags.largeAllocLim = 0; /* defaults to minAllocAreasize */
RtsFlags.GcFlags.nurseryChunkSize = 0;
- RtsFlags.GcFlags.minOldGenSize = (1024 * 1024) / BLOCK_SIZE;
+ RtsFlags.GcFlags.minOldGenSize = (1024 * 1024) / BLOCK_SIZE; /* -O default */
RtsFlags.GcFlags.maxHeapSize = 0; /* off by default */
RtsFlags.GcFlags.heapLimitGrace = (1024 * 1024);
RtsFlags.GcFlags.heapSizeSuggestion = 0; /* none */
diff --git a/testsuite/tests/profiling/should_run/all.T b/testsuite/tests/profiling/should_run/all.T
index dec77add28..ad10baac13 100644
--- a/testsuite/tests/profiling/should_run/all.T
+++ b/testsuite/tests/profiling/should_run/all.T
@@ -28,7 +28,7 @@ test('heapprof001',
compile_and_run, [''])
test('T2592',
- [only_ways(['profasm']), extra_run_opts('+RTS -M1m -RTS'), exit_code(251)],
+ [only_ways(['profasm']), extra_run_opts('+RTS -M1m -A1m -RTS'), exit_code(251)],
compile_and_run, [''])
test('T3001', [only_ways(['prof_hb']), extra_ways(['prof_hb'])],
diff --git a/testsuite/tests/rts/T9579/Makefile b/testsuite/tests/rts/T9579/Makefile
index 23177ee125..e1bf2d3079 100644
--- a/testsuite/tests/rts/T9579/Makefile
+++ b/testsuite/tests/rts/T9579/Makefile
@@ -3,43 +3,43 @@ include $(TOP)/mk/boilerplate.mk
include $(TOP)/mk/test.mk
T9579_stackoverflow_rtsnone:
- '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=none -fforce-recomp -with-rtsopts -K1m \
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=none -fforce-recomp -with-rtsopts "-A1M -K1m" \
-outputdir tmp_T9579_stackoverflow_rtsnone \
StackOverflow.hs -o T9579_stackoverflow_rtsnone
T9579_stackoverflow_rtssome:
- '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=some -fforce-recomp -with-rtsopts -K1m \
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=some -fforce-recomp -with-rtsopts "-A1M -K1m" \
-outputdir tmp_T9579_stackoverflow_rtssome \
StackOverflow.hs -o T9579_stackoverflow_rtssome
T9579_stackoverflow_rtsall:
- '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts -K1m \
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts "-A1M -K1m" \
-outputdir tmp_T9579_stackoverflow_rtsall \
StackOverflow.hs -o T9579_stackoverflow_rtsall
T9579_stackoverflow_rtsall_no_suggestions:
- '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts -K1m \
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts "-A1M -K1m" \
-no-rtsopts-suggestions \
-outputdir tmp_T9579_stackoverflow_rtsall_no_suggestions \
StackOverflow.hs -o T9579_stackoverflow_rtsall_no_suggestions
T9579_outofheap_rtsnone:
- '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=none -fforce-recomp -with-rtsopts -M1m \
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=none -fforce-recomp -with-rtsopts "-A1M -M1m" \
-outputdir tmp_T9579_outofheap_rtsnone \
OutOfHeap.hs -o T9579_outofheap_rtsnone
T9579_outofheap_rtssome:
- '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=some -fforce-recomp -with-rtsopts -M1m \
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=some -fforce-recomp -with-rtsopts "-A1M -M1m" \
-outputdir tmp_T9579_outofheap_rtssome \
OutOfHeap.hs -o T9579_outofheap_rtssome
T9579_outofheap_rtsall:
- '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts -M1m \
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts "-A1M -M1m" \
-outputdir tmp_T9579_outofheap_rtsall \
OutOfHeap.hs -o T9579_outofheap_rtsall
T9579_outofheap_rtsall_no_suggestions:
- '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts -M1m \
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts "-A1M -M1m" \
-no-rtsopts-suggestions \
-outputdir tmp_T9579_outofheap_rtsall_no_suggestions \
OutOfHeap.hs -o T9579_outofheap_rtsall_no_suggestions