diff options
author | Douglas Wilson <douglas.wilson@gmail.com> | 2021-01-01 12:45:06 +0000 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2021-05-04 19:12:10 -0400 |
commit | f4ac60b95c8eed9d6c3c905338156bfe42351db5 (patch) | |
tree | 632f1bebf3dbaed7c40248d14eb303e84c9ec600 | |
parent | fa0888718ddf74586da6e42bda662340e6f0e1c5 (diff) | |
download | haskell-f4ac60b95c8eed9d6c3c905338156bfe42351db5.tar.gz |
rts: stats: Some fixes to stats for sequential gcs
Solves #19147. When n_capabilities > 1 we were not correctly accounting
for gc time for sequential collections. In this case par_n_gcthreads ==
1, however it is not guaranteed that the single gc thread is capability 0.
A similar issue for copied is addressed as well.
(cherry picked from commit f49d6fb27336297d6d7a46269a22dd98c131b4a8)
-rw-r--r-- | rts/Stats.c | 16 | ||||
-rw-r--r-- | rts/sm/GC.c | 35 |
2 files changed, 37 insertions, 14 deletions
diff --git a/rts/Stats.c b/rts/Stats.c index 282e799d23..895ff3f040 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -523,10 +523,18 @@ stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ s initiating_gct->gc_start_elapsed - initiating_gct->gc_sync_start_elapsed; stats.gc.elapsed_ns = current_elapsed - initiating_gct->gc_start_elapsed; stats.gc.cpu_ns = 0; - for (unsigned int i=0; i < par_n_threads; i++) { - gc_thread *gct = gc_threads[i]; - ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu); - stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu; + // see Note [n_gc_threads] + // par_n_threads is set to n_gc_threads at the single callsite of this + // function + if (par_n_threads == 1) { + ASSERT(initiating_gct->gc_end_cpu >= initiating_gct->gc_start_cpu); + stats.gc.cpu_ns += initiating_gct->gc_end_cpu - initiating_gct->gc_start_cpu; + } else { + for (unsigned int i=0; i < par_n_threads; i++) { + gc_thread *gct = gc_threads[i]; + ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu); + stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu; + } } } // ------------------------------------------------- diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 7a6345f469..1e8b2e1254 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -134,8 +134,22 @@ StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)] ATTRIBUTE_ALIGNED(64); #endif -// Number of threads running in *this* GC. Affects how many -// step->todos[] lists we have to look in to find work. +/* Note [n_gc_threads] +This is a global variable that originally tracked the number of threads +participating in the current gc. It's meaing has diverged from this somewhate. +In practise, it now takes one of the values {1, n_capabilities}. Don't be +tricked into thinking this means garbage collections must have 1 or +n_capabilities participating: idle capabilities (idle_cap[cap->no]) are included +in the n_gc_thread count. + +Clearly this is in need of some tidying up, but for now we tread carefully. We +check n_gc_threads > 1 to see whether we are in a parallel or sequential. We +ensure n_gc_threads > 1 before iterating over gc_threads a la: + +for(i=0;i<n_gc_threads;++i) { foo(gc_thread[i]; )} + +Omitting this check has led to issues such as #19147. +*/ uint32_t n_gc_threads; // For stats: @@ -550,12 +564,13 @@ GarbageCollect (uint32_t collect_gen, uint64_t par_balanced_copied_acc = 0; const gc_thread* thread; - for (i=0; i < n_gc_threads; i++) { - copied += RELAXED_LOAD(&gc_threads[i]->copied); - } - for (i=0; i < n_gc_threads; i++) { - thread = gc_threads[i]; - if (n_gc_threads > 1) { + // see Note [n_gc_threads] + if (n_gc_threads > 1) { + for (i=0; i < n_gc_threads; i++) { + copied += RELAXED_LOAD(&gc_threads[i]->copied); + } + for (i=0; i < n_gc_threads; i++) { + thread = gc_threads[i]; debugTrace(DEBUG_gc,"thread %d:", i); debugTrace(DEBUG_gc," copied %ld", RELAXED_LOAD(&thread->copied) * sizeof(W_)); @@ -583,12 +598,12 @@ GarbageCollect (uint32_t collect_gen, par_balanced_copied_acc += stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied); } - } - if (n_gc_threads > 1) { // See Note [Work Balance] for an explanation of this computation par_balanced_copied = (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) / (n_gc_threads - 1); + } else { + copied += gct->copied; } } |