summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorDouglas Wilson <douglas.wilson@gmail.com>2020-12-14 13:55:54 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-01-17 05:49:54 -0500
commit345ae06b3334a64e9d6db9ea69573ef3227e535a (patch)
tree55fff9e7dccbaf5b8181ce2534c0624a00a5b161 /rts
parentf2d118c0a018dccd3c82e885f500d4e57ff94f82 (diff)
downloadhaskell-345ae06b3334a64e9d6db9ea69573ef3227e535a.tar.gz
rts: add max_n_todo_overflow internal counter
I've never observed this counter taking a non-zero value, however I do think it's existence is justified by the comment in grab_local_todo_block. I've not added it to RTSStats in GHC.Stats, as it doesn't seem worth the api churn.
Diffstat (limited to 'rts')
-rw-r--r--rts/Stats.c25
-rw-r--r--rts/Stats.h3
-rw-r--r--rts/sm/GC.c7
-rw-r--r--rts/sm/GCThread.h1
-rw-r--r--rts/sm/GCUtils.c12
5 files changed, 37 insertions, 11 deletions
diff --git a/rts/Stats.c b/rts/Stats.c
index 02616094b8..45d40ddcad 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -160,6 +160,7 @@ initStats0(void)
.mut_spin_yield = 0,
.any_work = 0,
.scav_find_work = 0,
+ .max_n_todo_overflow = 0,
.init_cpu_ns = 0,
.init_elapsed_ns = 0,
.mutator_cpu_ns = 0,
@@ -460,7 +461,8 @@ void
stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ slop,
uint32_t gen, uint32_t par_n_threads, gc_thread **gc_threads,
W_ par_max_copied, W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield,
- W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ scav_find_work)
+ W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ scav_find_work,
+ W_ max_n_todo_overflow)
{
ACQUIRE_LOCK(&stats_mutex);
@@ -541,6 +543,7 @@ stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ s
stats.gc.par_balanced_copied_bytes;
stats.any_work += any_work;
stats.scav_find_work += scav_find_work;
+ stats.max_n_todo_overflow += stg_max(max_n_todo_overflow, stats.max_n_todo_overflow);
stats.gc_spin_spin += gc_spin_spin;
stats.gc_spin_yield += gc_spin_yield;
stats.mut_spin_spin += mut_spin_spin;
@@ -1026,6 +1029,10 @@ static void report_summary(const RTSSummaryStats* sum)
, col_width[0], ""
, col_width[1], "scav_find_work"
, col_width[2], stats.scav_find_work);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "max_n_todo_overflow"
+ , col_width[2], stats.max_n_todo_overflow);
#elif defined(THREADED_RTS) // THREADED_RTS && PROF_SPIN
statsPrintf("Internal Counters require the RTS to be built "
"with PROF_SPIN"); // PROF_SPIN is not #defined here
@@ -1167,6 +1174,8 @@ static void report_machine_readable (const RTSSummaryStats * sum)
stats.any_work);
MR_STAT("scav_find_work", FMT_Word64,
stats.scav_find_work);
+ MR_STAT("max_n_todo_overflow", FMT_Word64,
+ stats.max_n_todo_overflow);
#endif // PROF_SPIN
#endif // THREADED_RTS
@@ -1558,7 +1567,7 @@ See #13830
*/
/*
-Note [Internal Counter Stats]
+Note [Internal Counters Stats]
-----------------------------
What do the counts at the end of a '+RTS -s --internal-counters' report mean?
They are detailed below. Most of these counters are used by multiple threads
@@ -1596,7 +1605,6 @@ don't. We count these white-hole spins and include them in the SpinLocks table.
If a particular loop does not yield, we put "n/a" in the table. They are named
for the function that has the spinning loop except that several loops in the
garbage collector accumulate into whitehole_gc.
-TODO: Should these counters be more or less granular?
white-hole spin counters:
* whitehole_gc
@@ -1604,16 +1612,17 @@ white-hole spin counters:
* whitehole_executeMessage
* whitehole_threadPaused
-
-We count the number of calls of several functions in the parallel garbage
-collector.
+We have several stats allowing us to observe the internals of the parallel
+garbage collector:
Parallel garbage collector counters:
* any_work:
Incremented whenever a parallel GC looks for work to steal.
* scav_find_work:
- Called to do work when any_work return true.
-
+ Counts iterations of scavenge loop
+* max_n_todo_overflow:
+ Tracks the maximum length of todo_overflow lists in the gc_thread structre.
+ See comment in grab_local_todo_block.
*/
/* -----------------------------------------------------------------------------
diff --git a/rts/Stats.h b/rts/Stats.h
index 64aec2d5ee..19bd707302 100644
--- a/rts/Stats.h
+++ b/rts/Stats.h
@@ -37,7 +37,8 @@ void stat_endGC (Capability *cap, struct gc_thread_ *initiating_gct, W_ li
uint32_t n_gc_threads, struct gc_thread_ **gc_threads,
W_ par_max_copied, W_ par_balanced_copied,
W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin,
- W_ mut_spin_yield, W_ any_work, W_ scav_find_work);
+ W_ mut_spin_yield, W_ any_work, W_ scav_find_work,
+ W_ max_n_todo_overflow);
void stat_startNonmovingGcSync(void);
void stat_endNonmovingGcSync(void);
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index b78f993260..5e986f2296 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -270,7 +270,7 @@ GarbageCollect (uint32_t collect_gen,
generation *gen;
StgWord live_blocks, live_words, par_max_copied, par_balanced_copied,
gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
- any_work, scav_find_work;
+ any_work, scav_find_work, max_n_todo_overflow;
#if defined(THREADED_RTS)
gc_thread *saved_gct;
#endif
@@ -594,6 +594,7 @@ GarbageCollect (uint32_t collect_gen,
mut_spin_yield = 0;
any_work = 0;
scav_find_work = 0;
+ max_n_todo_overflow = 0;
{
uint32_t i;
uint64_t par_balanced_copied_acc = 0;
@@ -625,6 +626,7 @@ GarbageCollect (uint32_t collect_gen,
any_work += RELAXED_LOAD(&thread->any_work);
scav_find_work += RELAXED_LOAD(&thread->scav_find_work);
+ max_n_todo_overflow = stg_max(RELAXED_LOAD(&thread->max_n_todo_overflow), max_n_todo_overflow);
par_max_copied = stg_max(RELAXED_LOAD(&thread->copied), par_max_copied);
par_balanced_copied_acc +=
@@ -1043,7 +1045,7 @@ GarbageCollect (uint32_t collect_gen,
N, n_gc_threads, gc_threads,
par_max_copied, par_balanced_copied,
gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
- any_work, scav_find_work);
+ any_work, scav_find_work, max_n_todo_overflow);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
@@ -1803,6 +1805,7 @@ init_gc_thread (gc_thread *t)
t->scanned = 0;
t->any_work = 0;
t->scav_find_work = 0;
+ t->max_n_todo_overflow = 0;
}
/* -----------------------------------------------------------------------------
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index 90d15c69c5..31719ca020 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -183,6 +183,7 @@ typedef struct gc_thread_ {
W_ scanned;
W_ any_work;
W_ scav_find_work;
+ W_ max_n_todo_overflow;
Time gc_start_cpu; // thread CPU time
Time gc_end_cpu; // thread CPU time
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 89a92bc837..52ea27f263 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -183,6 +183,18 @@ push_todo_block(bdescr *bd, gen_workspace *ws)
bd->link = ws->todo_overflow;
ws->todo_overflow = bd;
ws->n_todo_overflow++;
+
+ // In theory, if a gc thread pushes more blocks to it's todo_q than it
+ // pops, the todo_overflow list will continue to grow. Other gc threads
+ // can't steal from the todo_overflwo list, so they may be idle as the
+ // first gc thread works diligently on it's todo_overflow list. In
+ // practice this has not been observed to occur.
+ //
+ // The max_n_todo_overflow counter will allow us to observe large
+ // todo_overflow lists if they ever arise. As of now I've not observed
+ // any nonzero max_n_todo_overflow samples.
+ gct->max_n_todo_overflow =
+ stg_max(gct->max_n_todo_overflow, ws->n_todo_overflow);
}
#if defined(THREADED_RTS)