summaryrefslogtreecommitdiff
path: root/rts/sm/GC.c
diff options
context:
space:
mode:
authorDouglas Wilson <douglas.wilson@gmail.com>2018-03-19 11:55:37 -0400
committerBen Gamari <ben@smart-cactus.org>2018-03-19 12:05:07 -0400
commit2918abf75594001deed51ee252a05b146f844489 (patch)
treeecb789d307060dc887e2dfe01eb390a9c475e8f7 /rts/sm/GC.c
parentf9a6d4207fb0e551821fee847ac064ac31d96bba (diff)
downloadhaskell-2918abf75594001deed51ee252a05b146f844489.tar.gz
rts: Add --internal-counters RTS flag and several counters
The existing internal counters: * gc_alloc_block_sync * whitehole_spin * gen[g].sync * gen[1].sync are now not shown in the -s report unless --internal-counters is also passed. If --internal-counters is passed we now show the counters above, reformatted, as well as several other counters. In particular, we now count the yieldThread() calls that SpinLocks do as well as their spins. The added counters are: * gc_spin (spin and yield) * mut_spin (spin and yield) * whitehole_threadPaused (spin only) * whitehole_executeMessage (spin only) * whitehole_lockClosure (spin only) * waitForGcThreadsd (spin and yield) As well as the following, which are not SpinLock-like things: * any_work * do_work * scav_find_work See the Note for descriptions of what these counters are. We add busy_wait_nops in these loops along with the counter increment where it was absent. Old internal counters output: ``` gc_alloc_block_sync: 0 whitehole_gc_spin: 0 gen[0].sync: 0 gen[1].sync: 0 ``` New internal counters output: ``` Internal Counters: Spins Yields gc_alloc_block_sync 323 0 gc_spin 9016713 752 mut_spin 57360944 47716 whitehole_gc 0 n/a whitehole_threadPaused 0 n/a whitehole_executeMessage 0 n/a whitehole_lockClosure 0 0 waitForGcThreads 2 415 gen[0].sync 6 0 gen[1].sync 1 0 any_work 2017 no_work 2014 scav_find_work 1004 ``` Test Plan: ./validate Check it builds with #define PROF_SPIN removed from includes/rts/Config.h Reviewers: bgamari, erikd, simonmar, hvr Reviewed By: simonmar Subscribers: rwbarton, thomie, carter GHC Trac Issues: #3553, #9221 Differential Revision: https://phabricator.haskell.org/D4302
Diffstat (limited to 'rts/sm/GC.c')
-rw-r--r--rts/sm/GC.c71
1 files changed, 55 insertions, 16 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 54797ba0f0..d61ca41a6b 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -139,6 +139,9 @@ uint32_t n_gc_threads;
static long copied; // *words* copied & scavenged during this GC
#if defined(PROF_SPIN) && defined(THREADED_RTS)
+// spin and yield counts for the quasi-SpinLock in waitForGcThreads
+volatile StgWord64 waitForGcThreads_spin = 0;
+volatile StgWord64 waitForGcThreads_yield = 0;
volatile StgWord64 whitehole_gc_spin = 0;
#endif
@@ -198,7 +201,9 @@ GarbageCollect (uint32_t collect_gen,
{
bdescr *bd;
generation *gen;
- StgWord live_blocks, live_words, par_max_copied, par_balanced_copied;
+ StgWord live_blocks, live_words, par_max_copied, par_balanced_copied,
+ gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
+ any_work, no_work, scav_find_work;
#if defined(THREADED_RTS)
gc_thread *saved_gct;
#endif
@@ -471,32 +476,53 @@ GarbageCollect (uint32_t collect_gen,
copied = 0;
par_max_copied = 0;
par_balanced_copied = 0;
+ gc_spin_spin = 0;
+ gc_spin_yield = 0;
+ mut_spin_spin = 0;
+ mut_spin_yield = 0;
+ any_work = 0;
+ no_work = 0;
+ scav_find_work = 0;
{
uint32_t i;
uint64_t par_balanced_copied_acc = 0;
+ const gc_thread* thread;
for (i=0; i < n_gc_threads; i++) {
copied += gc_threads[i]->copied;
}
for (i=0; i < n_gc_threads; i++) {
+ thread = gc_threads[i];
if (n_gc_threads > 1) {
debugTrace(DEBUG_gc,"thread %d:", i);
- debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
- debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
- debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
- debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
- debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
+ debugTrace(DEBUG_gc," copied %ld",
+ thread->copied * sizeof(W_));
+ debugTrace(DEBUG_gc," scanned %ld",
+ thread->scanned * sizeof(W_));
+ debugTrace(DEBUG_gc," any_work %ld",
+ thread->any_work);
+ debugTrace(DEBUG_gc," no_work %ld",
+ thread->no_work);
+ debugTrace(DEBUG_gc," scav_find_work %ld",
+ thread->scav_find_work);
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ gc_spin_spin += thread->gc_spin.spin;
+ gc_spin_yield += thread->gc_spin.yield;
+ mut_spin_spin += thread->mut_spin.spin;
+ mut_spin_yield += thread->mut_spin.yield;
+#endif
+
+ any_work += thread->any_work;
+ no_work += thread->no_work;
+ scav_find_work += thread->scav_find_work;
+
+ par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
+ par_balanced_copied_acc +=
+ stg_min(n_gc_threads * gc_threads[i]->copied, copied);
}
- par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
- par_balanced_copied_acc +=
- stg_min(n_gc_threads * gc_threads[i]->copied, copied);
- }
- if (n_gc_threads == 1) {
- par_max_copied = 0;
- par_balanced_copied = 0;
}
- else
- {
+ if (n_gc_threads > 1) {
// See Note [Work Balance] for an explanation of this computation
par_balanced_copied =
(par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
@@ -834,7 +860,9 @@ GarbageCollect (uint32_t collect_gen,
// ok, GC over: tell the stats department what happened.
stat_endGC(cap, gct, live_words, copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */,
- N, n_gc_threads, par_max_copied, par_balanced_copied);
+ N, n_gc_threads, par_max_copied, par_balanced_copied,
+ gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
+ any_work, no_work, scav_find_work);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
@@ -1186,6 +1214,9 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
}
}
if (!retry) break;
+#if defined(PROF_SPIN)
+ waitForGcThreads_yield++;
+#endif
yieldThread();
}
@@ -1196,6 +1227,14 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
rtsConfig.longGCSync(cap->no, t2 - t0);
t1 = t2;
}
+ if (retry) {
+#if defined(PROF_SPIN)
+ // This is a bit strange, we'll get more yields than spins.
+ // I guess that means it's not a spin-lock at all, but these
+ // numbers are still useful (I think).
+ waitForGcThreads_spin++;
+#endif
+ }
}
if (RtsFlags.GcFlags.longGCSync != 0 &&