From 18d6dc07834c76b5fc8126ee7559a43798f67bee Mon Sep 17 00:00:00 2001 From: Douglas Wilson Date: Fri, 1 Jan 2021 12:45:06 +0000 Subject: rts: stats: Some fixes to stats for sequential gcs Solves #19147. When n_capabilities > 1 we were not correctly accounting for gc time for sequential collections. In this case par_n_gcthreads == 1, however it is not guaranteed that the single gc thread is capability 0. A similar issue for copied is addressed as well. (cherry picked from commit f49d6fb27336297d6d7a46269a22dd98c131b4a8) --- rts/Stats.c | 16 ++++++++++++---- rts/sm/GC.c | 35 +++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/rts/Stats.c b/rts/Stats.c index 71dcf8a9d0..6c5e7c5c37 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -507,10 +507,18 @@ stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ s initiating_gct->gc_start_elapsed - initiating_gct->gc_sync_start_elapsed; stats.gc.elapsed_ns = current_elapsed - initiating_gct->gc_start_elapsed; stats.gc.cpu_ns = 0; - for (unsigned int i=0; i < par_n_threads; i++) { - gc_thread *gct = gc_threads[i]; - ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu); - stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu; + // see Note [n_gc_threads] + // par_n_threads is set to n_gc_threads at the single callsite of this + // function + if (par_n_threads == 1) { + ASSERT(initiating_gct->gc_end_cpu >= initiating_gct->gc_start_cpu); + stats.gc.cpu_ns += initiating_gct->gc_end_cpu - initiating_gct->gc_start_cpu; + } else { + for (unsigned int i=0; i < par_n_threads; i++) { + gc_thread *gct = gc_threads[i]; + ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu); + stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu; + } } } // ------------------------------------------------- diff --git a/rts/sm/GC.c b/rts/sm/GC.c index d6d2c3f594..2a519c67f1 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -134,8 +134,22 @@ StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)] ATTRIBUTE_ALIGNED(64); #endif -// Number of threads running in *this* GC. Affects how many -// step->todos[] lists we have to look in to find work. +/* Note [n_gc_threads] +This is a global variable that originally tracked the number of threads +participating in the current gc. It's meaing has diverged from this somewhate. +In practise, it now takes one of the values {1, n_capabilities}. Don't be +tricked into thinking this means garbage collections must have 1 or +n_capabilities participating: idle capabilities (idle_cap[cap->no]) are included +in the n_gc_thread count. + +Clearly this is in need of some tidying up, but for now we tread carefully. We +check n_gc_threads > 1 to see whether we are in a parallel or sequential. We +ensure n_gc_threads > 1 before iterating over gc_threads a la: + +for(i=0;icopied); - } - for (i=0; i < n_gc_threads; i++) { - thread = gc_threads[i]; - if (n_gc_threads > 1) { + // see Note [n_gc_threads] + if (n_gc_threads > 1) { + for (i=0; i < n_gc_threads; i++) { + copied += RELAXED_LOAD(&gc_threads[i]->copied); + } + for (i=0; i < n_gc_threads; i++) { + thread = gc_threads[i]; debugTrace(DEBUG_gc,"thread %d:", i); debugTrace(DEBUG_gc," copied %ld", RELAXED_LOAD(&thread->copied) * sizeof(W_)); @@ -583,12 +598,12 @@ GarbageCollect (uint32_t collect_gen, par_balanced_copied_acc += stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied); } - } - if (n_gc_threads > 1) { // See Note [Work Balance] for an explanation of this computation par_balanced_copied = (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) / (n_gc_threads - 1); + } else { + copied += gct->copied; } } -- cgit v1.2.1