diff options
author | Douglas Wilson <douglas.wilson@gmail.com> | 2020-12-14 13:55:54 +0000 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-01-17 05:49:54 -0500 |
commit | 345ae06b3334a64e9d6db9ea69573ef3227e535a (patch) | |
tree | 55fff9e7dccbaf5b8181ce2534c0624a00a5b161 /rts | |
parent | f2d118c0a018dccd3c82e885f500d4e57ff94f82 (diff) | |
download | haskell-345ae06b3334a64e9d6db9ea69573ef3227e535a.tar.gz |
rts: add max_n_todo_overflow internal counter
I've never observed this counter taking a non-zero value, however I do
think it's existence is justified by the comment in grab_local_todo_block.
I've not added it to RTSStats in GHC.Stats, as it doesn't seem worth the
api churn.
Diffstat (limited to 'rts')
-rw-r--r-- | rts/Stats.c | 25 | ||||
-rw-r--r-- | rts/Stats.h | 3 | ||||
-rw-r--r-- | rts/sm/GC.c | 7 | ||||
-rw-r--r-- | rts/sm/GCThread.h | 1 | ||||
-rw-r--r-- | rts/sm/GCUtils.c | 12 |
5 files changed, 37 insertions, 11 deletions
diff --git a/rts/Stats.c b/rts/Stats.c index 02616094b8..45d40ddcad 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -160,6 +160,7 @@ initStats0(void) .mut_spin_yield = 0, .any_work = 0, .scav_find_work = 0, + .max_n_todo_overflow = 0, .init_cpu_ns = 0, .init_elapsed_ns = 0, .mutator_cpu_ns = 0, @@ -460,7 +461,8 @@ void stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ slop, uint32_t gen, uint32_t par_n_threads, gc_thread **gc_threads, W_ par_max_copied, W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield, - W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ scav_find_work) + W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ scav_find_work, + W_ max_n_todo_overflow) { ACQUIRE_LOCK(&stats_mutex); @@ -541,6 +543,7 @@ stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ s stats.gc.par_balanced_copied_bytes; stats.any_work += any_work; stats.scav_find_work += scav_find_work; + stats.max_n_todo_overflow += stg_max(max_n_todo_overflow, stats.max_n_todo_overflow); stats.gc_spin_spin += gc_spin_spin; stats.gc_spin_yield += gc_spin_yield; stats.mut_spin_spin += mut_spin_spin; @@ -1026,6 +1029,10 @@ static void report_summary(const RTSSummaryStats* sum) , col_width[0], "" , col_width[1], "scav_find_work" , col_width[2], stats.scav_find_work); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "max_n_todo_overflow" + , col_width[2], stats.max_n_todo_overflow); #elif defined(THREADED_RTS) // THREADED_RTS && PROF_SPIN statsPrintf("Internal Counters require the RTS to be built " "with PROF_SPIN"); // PROF_SPIN is not #defined here @@ -1167,6 +1174,8 @@ static void report_machine_readable (const RTSSummaryStats * sum) stats.any_work); MR_STAT("scav_find_work", FMT_Word64, stats.scav_find_work); + MR_STAT("max_n_todo_overflow", FMT_Word64, + stats.max_n_todo_overflow); #endif // PROF_SPIN #endif // THREADED_RTS @@ -1558,7 +1567,7 @@ See #13830 */ /* -Note [Internal Counter Stats] +Note [Internal Counters Stats] ----------------------------- What do the counts at the end of a '+RTS -s --internal-counters' report mean? They are detailed below. Most of these counters are used by multiple threads @@ -1596,7 +1605,6 @@ don't. We count these white-hole spins and include them in the SpinLocks table. If a particular loop does not yield, we put "n/a" in the table. They are named for the function that has the spinning loop except that several loops in the garbage collector accumulate into whitehole_gc. -TODO: Should these counters be more or less granular? white-hole spin counters: * whitehole_gc @@ -1604,16 +1612,17 @@ white-hole spin counters: * whitehole_executeMessage * whitehole_threadPaused - -We count the number of calls of several functions in the parallel garbage -collector. +We have several stats allowing us to observe the internals of the parallel +garbage collector: Parallel garbage collector counters: * any_work: Incremented whenever a parallel GC looks for work to steal. * scav_find_work: - Called to do work when any_work return true. - + Counts iterations of scavenge loop +* max_n_todo_overflow: + Tracks the maximum length of todo_overflow lists in the gc_thread structre. + See comment in grab_local_todo_block. */ /* ----------------------------------------------------------------------------- diff --git a/rts/Stats.h b/rts/Stats.h index 64aec2d5ee..19bd707302 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -37,7 +37,8 @@ void stat_endGC (Capability *cap, struct gc_thread_ *initiating_gct, W_ li uint32_t n_gc_threads, struct gc_thread_ **gc_threads, W_ par_max_copied, W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin, - W_ mut_spin_yield, W_ any_work, W_ scav_find_work); + W_ mut_spin_yield, W_ any_work, W_ scav_find_work, + W_ max_n_todo_overflow); void stat_startNonmovingGcSync(void); void stat_endNonmovingGcSync(void); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index b78f993260..5e986f2296 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -270,7 +270,7 @@ GarbageCollect (uint32_t collect_gen, generation *gen; StgWord live_blocks, live_words, par_max_copied, par_balanced_copied, gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield, - any_work, scav_find_work; + any_work, scav_find_work, max_n_todo_overflow; #if defined(THREADED_RTS) gc_thread *saved_gct; #endif @@ -594,6 +594,7 @@ GarbageCollect (uint32_t collect_gen, mut_spin_yield = 0; any_work = 0; scav_find_work = 0; + max_n_todo_overflow = 0; { uint32_t i; uint64_t par_balanced_copied_acc = 0; @@ -625,6 +626,7 @@ GarbageCollect (uint32_t collect_gen, any_work += RELAXED_LOAD(&thread->any_work); scav_find_work += RELAXED_LOAD(&thread->scav_find_work); + max_n_todo_overflow = stg_max(RELAXED_LOAD(&thread->max_n_todo_overflow), max_n_todo_overflow); par_max_copied = stg_max(RELAXED_LOAD(&thread->copied), par_max_copied); par_balanced_copied_acc += @@ -1043,7 +1045,7 @@ GarbageCollect (uint32_t collect_gen, N, n_gc_threads, gc_threads, par_max_copied, par_balanced_copied, gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield, - any_work, scav_find_work); + any_work, scav_find_work, max_n_todo_overflow); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -1803,6 +1805,7 @@ init_gc_thread (gc_thread *t) t->scanned = 0; t->any_work = 0; t->scav_find_work = 0; + t->max_n_todo_overflow = 0; } /* ----------------------------------------------------------------------------- diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index 90d15c69c5..31719ca020 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -183,6 +183,7 @@ typedef struct gc_thread_ { W_ scanned; W_ any_work; W_ scav_find_work; + W_ max_n_todo_overflow; Time gc_start_cpu; // thread CPU time Time gc_end_cpu; // thread CPU time diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 89a92bc837..52ea27f263 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -183,6 +183,18 @@ push_todo_block(bdescr *bd, gen_workspace *ws) bd->link = ws->todo_overflow; ws->todo_overflow = bd; ws->n_todo_overflow++; + + // In theory, if a gc thread pushes more blocks to it's todo_q than it + // pops, the todo_overflow list will continue to grow. Other gc threads + // can't steal from the todo_overflwo list, so they may be idle as the + // first gc thread works diligently on it's todo_overflow list. In + // practice this has not been observed to occur. + // + // The max_n_todo_overflow counter will allow us to observe large + // todo_overflow lists if they ever arise. As of now I've not observed + // any nonzero max_n_todo_overflow samples. + gct->max_n_todo_overflow = + stg_max(gct->max_n_todo_overflow, ws->n_todo_overflow); } #if defined(THREADED_RTS) |