diff options
author | Douglas Wilson <douglas.wilson@gmail.com> | 2018-03-19 11:55:37 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-03-19 12:05:07 -0400 |
commit | 2918abf75594001deed51ee252a05b146f844489 (patch) | |
tree | ecb789d307060dc887e2dfe01eb390a9c475e8f7 /rts/sm/GC.c | |
parent | f9a6d4207fb0e551821fee847ac064ac31d96bba (diff) | |
download | haskell-2918abf75594001deed51ee252a05b146f844489.tar.gz |
rts: Add --internal-counters RTS flag and several counters
The existing internal counters:
* gc_alloc_block_sync
* whitehole_spin
* gen[g].sync
* gen[1].sync
are now not shown in the -s report unless --internal-counters is also passed.
If --internal-counters is passed we now show the counters above, reformatted, as
well as several other counters. In particular, we now count the yieldThread()
calls that SpinLocks do as well as their spins.
The added counters are:
* gc_spin (spin and yield)
* mut_spin (spin and yield)
* whitehole_threadPaused (spin only)
* whitehole_executeMessage (spin only)
* whitehole_lockClosure (spin only)
* waitForGcThreadsd (spin and yield)
As well as the following, which are not SpinLock-like things:
* any_work
* do_work
* scav_find_work
See the Note for descriptions of what these counters are.
We add busy_wait_nops in these loops along with the counter increment where it
was absent.
Old internal counters output:
```
gc_alloc_block_sync: 0
whitehole_gc_spin: 0
gen[0].sync: 0
gen[1].sync: 0
```
New internal counters output:
```
Internal Counters:
Spins Yields
gc_alloc_block_sync 323 0
gc_spin 9016713 752
mut_spin 57360944 47716
whitehole_gc 0 n/a
whitehole_threadPaused 0 n/a
whitehole_executeMessage 0 n/a
whitehole_lockClosure 0 0
waitForGcThreads 2 415
gen[0].sync 6 0
gen[1].sync 1 0
any_work 2017
no_work 2014
scav_find_work 1004
```
Test Plan:
./validate
Check it builds with #define PROF_SPIN removed from includes/rts/Config.h
Reviewers: bgamari, erikd, simonmar, hvr
Reviewed By: simonmar
Subscribers: rwbarton, thomie, carter
GHC Trac Issues: #3553, #9221
Differential Revision: https://phabricator.haskell.org/D4302
Diffstat (limited to 'rts/sm/GC.c')
-rw-r--r-- | rts/sm/GC.c | 71 |
1 files changed, 55 insertions, 16 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 54797ba0f0..d61ca41a6b 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -139,6 +139,9 @@ uint32_t n_gc_threads; static long copied; // *words* copied & scavenged during this GC #if defined(PROF_SPIN) && defined(THREADED_RTS) +// spin and yield counts for the quasi-SpinLock in waitForGcThreads +volatile StgWord64 waitForGcThreads_spin = 0; +volatile StgWord64 waitForGcThreads_yield = 0; volatile StgWord64 whitehole_gc_spin = 0; #endif @@ -198,7 +201,9 @@ GarbageCollect (uint32_t collect_gen, { bdescr *bd; generation *gen; - StgWord live_blocks, live_words, par_max_copied, par_balanced_copied; + StgWord live_blocks, live_words, par_max_copied, par_balanced_copied, + gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield, + any_work, no_work, scav_find_work; #if defined(THREADED_RTS) gc_thread *saved_gct; #endif @@ -471,32 +476,53 @@ GarbageCollect (uint32_t collect_gen, copied = 0; par_max_copied = 0; par_balanced_copied = 0; + gc_spin_spin = 0; + gc_spin_yield = 0; + mut_spin_spin = 0; + mut_spin_yield = 0; + any_work = 0; + no_work = 0; + scav_find_work = 0; { uint32_t i; uint64_t par_balanced_copied_acc = 0; + const gc_thread* thread; for (i=0; i < n_gc_threads; i++) { copied += gc_threads[i]->copied; } for (i=0; i < n_gc_threads; i++) { + thread = gc_threads[i]; if (n_gc_threads > 1) { debugTrace(DEBUG_gc,"thread %d:", i); - debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); - debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); - debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work); - debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work); - debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); + debugTrace(DEBUG_gc," copied %ld", + thread->copied * sizeof(W_)); + debugTrace(DEBUG_gc," scanned %ld", + thread->scanned * sizeof(W_)); + debugTrace(DEBUG_gc," any_work %ld", + thread->any_work); + debugTrace(DEBUG_gc," no_work %ld", + thread->no_work); + debugTrace(DEBUG_gc," scav_find_work %ld", + thread->scav_find_work); + +#if defined(THREADED_RTS) && defined(PROF_SPIN) + gc_spin_spin += thread->gc_spin.spin; + gc_spin_yield += thread->gc_spin.yield; + mut_spin_spin += thread->mut_spin.spin; + mut_spin_yield += thread->mut_spin.yield; +#endif + + any_work += thread->any_work; + no_work += thread->no_work; + scav_find_work += thread->scav_find_work; + + par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); + par_balanced_copied_acc += + stg_min(n_gc_threads * gc_threads[i]->copied, copied); } - par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); - par_balanced_copied_acc += - stg_min(n_gc_threads * gc_threads[i]->copied, copied); - } - if (n_gc_threads == 1) { - par_max_copied = 0; - par_balanced_copied = 0; } - else - { + if (n_gc_threads > 1) { // See Note [Work Balance] for an explanation of this computation par_balanced_copied = (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) / @@ -834,7 +860,9 @@ GarbageCollect (uint32_t collect_gen, // ok, GC over: tell the stats department what happened. stat_endGC(cap, gct, live_words, copied, live_blocks * BLOCK_SIZE_W - live_words /* slop */, - N, n_gc_threads, par_max_copied, par_balanced_copied); + N, n_gc_threads, par_max_copied, par_balanced_copied, + gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield, + any_work, no_work, scav_find_work); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -1186,6 +1214,9 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) } } if (!retry) break; +#if defined(PROF_SPIN) + waitForGcThreads_yield++; +#endif yieldThread(); } @@ -1196,6 +1227,14 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) rtsConfig.longGCSync(cap->no, t2 - t0); t1 = t2; } + if (retry) { +#if defined(PROF_SPIN) + // This is a bit strange, we'll get more yields than spins. + // I guess that means it's not a spin-lock at all, but these + // numbers are still useful (I think). + waitForGcThreads_spin++; +#endif + } } if (RtsFlags.GcFlags.longGCSync != 0 && |