summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouglas Wilson <douglas.wilson@gmail.com>2021-01-01 12:45:06 +0000
committerBen Gamari <ben@smart-cactus.org>2021-05-04 19:12:10 -0400
commitf4ac60b95c8eed9d6c3c905338156bfe42351db5 (patch)
tree632f1bebf3dbaed7c40248d14eb303e84c9ec600
parentfa0888718ddf74586da6e42bda662340e6f0e1c5 (diff)
downloadhaskell-f4ac60b95c8eed9d6c3c905338156bfe42351db5.tar.gz
rts: stats: Some fixes to stats for sequential gcs
Solves #19147. When n_capabilities > 1 we were not correctly accounting for gc time for sequential collections. In this case par_n_gcthreads == 1, however it is not guaranteed that the single gc thread is capability 0. A similar issue for copied is addressed as well. (cherry picked from commit f49d6fb27336297d6d7a46269a22dd98c131b4a8)
-rw-r--r--rts/Stats.c16
-rw-r--r--rts/sm/GC.c35
2 files changed, 37 insertions, 14 deletions
diff --git a/rts/Stats.c b/rts/Stats.c
index 282e799d23..895ff3f040 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -523,10 +523,18 @@ stat_endGC (Capability *cap, gc_thread *initiating_gct, W_ live, W_ copied, W_ s
initiating_gct->gc_start_elapsed - initiating_gct->gc_sync_start_elapsed;
stats.gc.elapsed_ns = current_elapsed - initiating_gct->gc_start_elapsed;
stats.gc.cpu_ns = 0;
- for (unsigned int i=0; i < par_n_threads; i++) {
- gc_thread *gct = gc_threads[i];
- ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu);
- stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu;
+ // see Note [n_gc_threads]
+ // par_n_threads is set to n_gc_threads at the single callsite of this
+ // function
+ if (par_n_threads == 1) {
+ ASSERT(initiating_gct->gc_end_cpu >= initiating_gct->gc_start_cpu);
+ stats.gc.cpu_ns += initiating_gct->gc_end_cpu - initiating_gct->gc_start_cpu;
+ } else {
+ for (unsigned int i=0; i < par_n_threads; i++) {
+ gc_thread *gct = gc_threads[i];
+ ASSERT(gct->gc_end_cpu >= gct->gc_start_cpu);
+ stats.gc.cpu_ns += gct->gc_end_cpu - gct->gc_start_cpu;
+ }
}
}
// -------------------------------------------------
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 7a6345f469..1e8b2e1254 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -134,8 +134,22 @@ StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
ATTRIBUTE_ALIGNED(64);
#endif
-// Number of threads running in *this* GC. Affects how many
-// step->todos[] lists we have to look in to find work.
+/* Note [n_gc_threads]
+This is a global variable that originally tracked the number of threads
+participating in the current gc. It's meaing has diverged from this somewhate.
+In practise, it now takes one of the values {1, n_capabilities}. Don't be
+tricked into thinking this means garbage collections must have 1 or
+n_capabilities participating: idle capabilities (idle_cap[cap->no]) are included
+in the n_gc_thread count.
+
+Clearly this is in need of some tidying up, but for now we tread carefully. We
+check n_gc_threads > 1 to see whether we are in a parallel or sequential. We
+ensure n_gc_threads > 1 before iterating over gc_threads a la:
+
+for(i=0;i<n_gc_threads;++i) { foo(gc_thread[i]; )}
+
+Omitting this check has led to issues such as #19147.
+*/
uint32_t n_gc_threads;
// For stats:
@@ -550,12 +564,13 @@ GarbageCollect (uint32_t collect_gen,
uint64_t par_balanced_copied_acc = 0;
const gc_thread* thread;
- for (i=0; i < n_gc_threads; i++) {
- copied += RELAXED_LOAD(&gc_threads[i]->copied);
- }
- for (i=0; i < n_gc_threads; i++) {
- thread = gc_threads[i];
- if (n_gc_threads > 1) {
+ // see Note [n_gc_threads]
+ if (n_gc_threads > 1) {
+ for (i=0; i < n_gc_threads; i++) {
+ copied += RELAXED_LOAD(&gc_threads[i]->copied);
+ }
+ for (i=0; i < n_gc_threads; i++) {
+ thread = gc_threads[i];
debugTrace(DEBUG_gc,"thread %d:", i);
debugTrace(DEBUG_gc," copied %ld",
RELAXED_LOAD(&thread->copied) * sizeof(W_));
@@ -583,12 +598,12 @@ GarbageCollect (uint32_t collect_gen,
par_balanced_copied_acc +=
stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied);
}
- }
- if (n_gc_threads > 1) {
// See Note [Work Balance] for an explanation of this computation
par_balanced_copied =
(par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
(n_gc_threads - 1);
+ } else {
+ copied += gct->copied;
}
}