diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2020-07-11 06:37:12 -0400 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-12-22 22:10:06 -0500 |
commit | 553c59ca5581bcba79ea7d9672cffd438905d846 (patch) | |
tree | 5484c37ffdef7266ae91166a642d83461965493e | |
parent | 4c3fae472c0223dbbf8062cd7ab1e24b3e9c01c6 (diff) | |
download | haskell-553c59ca5581bcba79ea7d9672cffd438905d846.tar.gz |
Increase -A default to 4MB.
This gives a small increase in performance under most circumstances.
For single threaded GC the improvement is on the order of 1-2%.
For multi threaded GC the results are quite noisy but seem to
fall into the same ballpark.
Fixes #16499
-rw-r--r-- | docs/users_guide/runtime_control.rst | 15 | ||||
-rw-r--r-- | rts/RtsFlags.c | 5 | ||||
-rw-r--r-- | testsuite/tests/profiling/should_run/all.T | 2 | ||||
-rw-r--r-- | testsuite/tests/rts/T9579/Makefile | 16 |
4 files changed, 23 insertions, 15 deletions
diff --git a/docs/users_guide/runtime_control.rst b/docs/users_guide/runtime_control.rst index 5d3b24adac..49dc6a5441 100644 --- a/docs/users_guide/runtime_control.rst +++ b/docs/users_guide/runtime_control.rst @@ -420,7 +420,7 @@ performance. .. rts-flag:: -A ⟨size⟩ - :default: 1MB + :default: 4MB .. index:: single: allocation area, size @@ -429,15 +429,22 @@ performance. collector. The allocation area (actually generation 0 step 0) is fixed and is never resized (unless you use :rts-flag:`-H [⟨size⟩]`, below). - Increasing the allocation area size may or may not give better - performance (a bigger allocation area means worse cache behaviour - but fewer garbage collections and less promotion). + Optimal settings depend on the actual machine, program, and other RTS options. + Increasing the allocation area size means worse cache behaviour + but fewer garbage collections and less promotion. + + In general settings >= 4MB can reduce performance in some cases, in particular for single + threaded operation. However in a parallel setting increasing the allocation area + to ``16MB``, or even ``64MB`` can increase gc throughput significantly. With only 1 generation (e.g. ``-G1``, see :rts-flag:`-G ⟨generations⟩`) the ``-A`` option specifies the minimum allocation area, since the actual size of the allocation area will be resized according to the amount of data in the heap (see :rts-flag:`-F ⟨factor⟩`, below). + When heap profiling using a smaller allocation area can increase accuracy as more frequent + major garbage collections also results in more frequent heap snapshots + .. rts-flag:: -AL ⟨size⟩ :default: :rts-flag:`-A <-A ⟨size⟩>` value diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index b23b19752b..044e7742c4 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -153,10 +153,11 @@ void initRtsFlagsDefaults(void) RtsFlags.GcFlags.stkChunkSize = (32 * 1024) / sizeof(W_); RtsFlags.GcFlags.stkChunkBufferSize = (1 * 1024) / sizeof(W_); - RtsFlags.GcFlags.minAllocAreaSize = (1024 * 1024) / BLOCK_SIZE; + /* -A default. See #16499 for a discussion about the tradeoffs */ + RtsFlags.GcFlags.minAllocAreaSize = (4 * 1024 * 1024) / BLOCK_SIZE; RtsFlags.GcFlags.largeAllocLim = 0; /* defaults to minAllocAreasize */ RtsFlags.GcFlags.nurseryChunkSize = 0; - RtsFlags.GcFlags.minOldGenSize = (1024 * 1024) / BLOCK_SIZE; + RtsFlags.GcFlags.minOldGenSize = (1024 * 1024) / BLOCK_SIZE; /* -O default */ RtsFlags.GcFlags.maxHeapSize = 0; /* off by default */ RtsFlags.GcFlags.heapLimitGrace = (1024 * 1024); RtsFlags.GcFlags.heapSizeSuggestion = 0; /* none */ diff --git a/testsuite/tests/profiling/should_run/all.T b/testsuite/tests/profiling/should_run/all.T index dec77add28..ad10baac13 100644 --- a/testsuite/tests/profiling/should_run/all.T +++ b/testsuite/tests/profiling/should_run/all.T @@ -28,7 +28,7 @@ test('heapprof001', compile_and_run, ['']) test('T2592', - [only_ways(['profasm']), extra_run_opts('+RTS -M1m -RTS'), exit_code(251)], + [only_ways(['profasm']), extra_run_opts('+RTS -M1m -A1m -RTS'), exit_code(251)], compile_and_run, ['']) test('T3001', [only_ways(['prof_hb']), extra_ways(['prof_hb'])], diff --git a/testsuite/tests/rts/T9579/Makefile b/testsuite/tests/rts/T9579/Makefile index 23177ee125..e1bf2d3079 100644 --- a/testsuite/tests/rts/T9579/Makefile +++ b/testsuite/tests/rts/T9579/Makefile @@ -3,43 +3,43 @@ include $(TOP)/mk/boilerplate.mk include $(TOP)/mk/test.mk T9579_stackoverflow_rtsnone: - '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=none -fforce-recomp -with-rtsopts -K1m \ + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=none -fforce-recomp -with-rtsopts "-A1M -K1m" \ -outputdir tmp_T9579_stackoverflow_rtsnone \ StackOverflow.hs -o T9579_stackoverflow_rtsnone T9579_stackoverflow_rtssome: - '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=some -fforce-recomp -with-rtsopts -K1m \ + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=some -fforce-recomp -with-rtsopts "-A1M -K1m" \ -outputdir tmp_T9579_stackoverflow_rtssome \ StackOverflow.hs -o T9579_stackoverflow_rtssome T9579_stackoverflow_rtsall: - '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts -K1m \ + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts "-A1M -K1m" \ -outputdir tmp_T9579_stackoverflow_rtsall \ StackOverflow.hs -o T9579_stackoverflow_rtsall T9579_stackoverflow_rtsall_no_suggestions: - '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts -K1m \ + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts "-A1M -K1m" \ -no-rtsopts-suggestions \ -outputdir tmp_T9579_stackoverflow_rtsall_no_suggestions \ StackOverflow.hs -o T9579_stackoverflow_rtsall_no_suggestions T9579_outofheap_rtsnone: - '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=none -fforce-recomp -with-rtsopts -M1m \ + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=none -fforce-recomp -with-rtsopts "-A1M -M1m" \ -outputdir tmp_T9579_outofheap_rtsnone \ OutOfHeap.hs -o T9579_outofheap_rtsnone T9579_outofheap_rtssome: - '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=some -fforce-recomp -with-rtsopts -M1m \ + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=some -fforce-recomp -with-rtsopts "-A1M -M1m" \ -outputdir tmp_T9579_outofheap_rtssome \ OutOfHeap.hs -o T9579_outofheap_rtssome T9579_outofheap_rtsall: - '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts -M1m \ + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts "-A1M -M1m" \ -outputdir tmp_T9579_outofheap_rtsall \ OutOfHeap.hs -o T9579_outofheap_rtsall T9579_outofheap_rtsall_no_suggestions: - '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts -M1m \ + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -rtsopts=all -fforce-recomp -with-rtsopts "-A1M -M1m" \ -no-rtsopts-suggestions \ -outputdir tmp_T9579_outofheap_rtsall_no_suggestions \ OutOfHeap.hs -o T9579_outofheap_rtsall_no_suggestions |