summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/users_guide/runtime_control.rst6
-rw-r--r--includes/RtsAPI.h24
-rw-r--r--includes/rts/Flags.h1
-rw-r--r--includes/rts/SpinLock.h5
-rw-r--r--includes/rts/storage/GC.h2
-rw-r--r--libraries/base/GHC/RTS/Flags.hsc3
-rw-r--r--rts/Messages.c3
-rw-r--r--rts/Messages.h4
-rw-r--r--rts/RtsFlags.c6
-rw-r--r--rts/SMPClosureOps.h12
-rw-r--r--rts/Stats.c189
-rw-r--r--rts/Stats.h5
-rw-r--r--rts/StgMiscClosures.cmm10
-rw-r--r--rts/ThreadPaused.c4
-rw-r--r--rts/ThreadPaused.h8
-rw-r--r--rts/sm/Evac.c9
-rw-r--r--rts/sm/GC.c71
-rw-r--r--rts/sm/GC.h2
-rw-r--r--testsuite/tests/rts/InternalCounters.stdout1
-rw-r--r--testsuite/tests/rts/Makefile19
-rw-r--r--testsuite/tests/rts/all.T2
21 files changed, 346 insertions, 40 deletions
diff --git a/docs/users_guide/runtime_control.rst b/docs/users_guide/runtime_control.rst
index 009e3ae887..92bc739dfe 100644
--- a/docs/users_guide/runtime_control.rst
+++ b/docs/users_guide/runtime_control.rst
@@ -776,6 +776,7 @@ RTS options to produce runtime statistics
-s [⟨file⟩]
-S [⟨file⟩]
--machine-readable
+ --internal-counters
These options produce runtime-system statistics, such as the amount
of time spent executing the program and in the garbage collector,
@@ -785,7 +786,10 @@ RTS options to produce runtime statistics
line of output in the same format as GHC's ``-Rghc-timing`` option,
``-s`` produces a more detailed summary at the end of the program,
and ``-S`` additionally produces information about each and every
- garbage collection.
+ garbage collection. Passing ``--internal-counters`` to a threaded
+ runtime will cause a detailed summary to include various internal
+ counts accumulated during the run; note that these are unspecified
+ and may change between releases.
The output is placed in ⟨file⟩. If ⟨file⟩ is omitted, then the
output is sent to ``stderr``.
diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h
index 27a5080220..6f011cbf6e 100644
--- a/includes/RtsAPI.h
+++ b/includes/RtsAPI.h
@@ -210,6 +210,30 @@ typedef struct _RTSStats {
GCDetails gc;
+ // -----------------------------------
+ // Internal Counters
+
+ // The number of times a GC thread spun on its 'gc_spin' lock.
+ // Will be zero if the rts was not built with PROF_SPIN
+ uint64_t gc_spin_spin;
+ // The number of times a GC thread yielded on its 'gc_spin' lock.
+ // Will be zero if the rts was not built with PROF_SPIN
+ uint64_t gc_spin_yield;
+ // The number of times a GC thread spun on its 'mut_spin' lock.
+ // Will be zero if the rts was not built with PROF_SPIN
+ uint64_t mut_spin_spin;
+ // The number of times a GC thread yielded on its 'mut_spin' lock.
+ // Will be zero if the rts was not built with PROF_SPIN
+ uint64_t mut_spin_yield;
+ // The number of times a GC thread has checked for work across all parallel
+ // GCs
+ uint64_t any_work;
+ // The number of times a GC thread has checked for work and found none across
+ // all parallel GCs
+ uint64_t no_work;
+ // The number of times a GC thread has iterated it's outer loop across all
+ // parallel GCs
+ uint64_t scav_find_work;
} RTSStats;
void getRTSStats (RTSStats *s);
diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h
index aed4dca384..6487947749 100644
--- a/includes/rts/Flags.h
+++ b/includes/rts/Flags.h
@@ -195,6 +195,7 @@ typedef struct _MISC_FLAGS {
bool generate_dump_file;
bool generate_stack_trace;
bool machineReadable;
+ bool internalCounters; /* See Note [Internal Counter Stats] */
StgWord linkerMemBase; /* address to ask the OS for memory
* for the linker, NULL ==> off */
} MISC_FLAGS;
diff --git a/includes/rts/SpinLock.h b/includes/rts/SpinLock.h
index 6530a3a2f0..1dca02f795 100644
--- a/includes/rts/SpinLock.h
+++ b/includes/rts/SpinLock.h
@@ -27,7 +27,8 @@
typedef struct SpinLock_
{
StgWord lock;
- StgWord64 spin; // DEBUG version counts how much it spins
+ StgWord64 spin; // incremented every time we spin in ACQUIRE_SPIN_LOCK
+ StgWord64 yield; // incremented every time we yield in ACQUIRE_SPIN_LOCK
} SpinLock;
#else
typedef StgWord SpinLock;
@@ -49,6 +50,7 @@ INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p)
p->spin++;
busy_wait_nop();
}
+ p->yield++;
yieldThread();
} while (1);
}
@@ -66,6 +68,7 @@ INLINE_HEADER void initSpinLock(SpinLock * p)
write_barrier();
p->lock = 1;
p->spin = 0;
+ p->yield = 0;
}
#else
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index 2aed7c57ee..d4182dd7f9 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -120,7 +120,7 @@ typedef struct generation_ {
// stats information
uint32_t collections;
uint32_t par_collections;
- uint32_t failed_promotions;
+ uint32_t failed_promotions; // Currently unused
// ------------------------------------
// Fields below are used during GC only
diff --git a/libraries/base/GHC/RTS/Flags.hsc b/libraries/base/GHC/RTS/Flags.hsc
index 3e712ca900..046975577e 100644
--- a/libraries/base/GHC/RTS/Flags.hsc
+++ b/libraries/base/GHC/RTS/Flags.hsc
@@ -138,6 +138,7 @@ data MiscFlags = MiscFlags
, generateCrashDumpFile :: Bool
, generateStackTrace :: Bool
, machineReadable :: Bool
+ , internalCounters :: Bool
, linkerMemBase :: Word
-- ^ address to ask the OS for memory for the linker, 0 ==> off
} deriving ( Show -- ^ @since 4.8.0.0
@@ -441,6 +442,8 @@ getMiscFlags = do
(#{peek MISC_FLAGS, generate_stack_trace} ptr :: IO CBool))
<*> (toBool <$>
(#{peek MISC_FLAGS, machineReadable} ptr :: IO CBool))
+ <*> (toBool <$>
+ (#{peek MISC_FLAGS, internalCounters} ptr :: IO CBool))
<*> #{peek MISC_FLAGS, linkerMemBase} ptr
getDebugFlags :: IO DebugFlags
diff --git a/rts/Messages.c b/rts/Messages.c
index 8fab314bc4..a9c794d823 100644
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -129,6 +129,9 @@ loop:
}
else if (i == &stg_WHITEHOLE_info)
{
+#if defined(PROF_SPIN)
+ ++whitehole_executeMessage_spin;
+#endif
goto loop;
}
else
diff --git a/rts/Messages.h b/rts/Messages.h
index e60f19dc1d..18371564c4 100644
--- a/rts/Messages.h
+++ b/rts/Messages.h
@@ -31,3 +31,7 @@ doneWithMsgThrowTo (MessageThrowTo *m)
}
#include "EndPrivate.h"
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+extern volatile StgWord64 whitehole_executeMessage_spin;
+#endif
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 7b38bbd6fd..b674e9b685 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -232,6 +232,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.MiscFlags.generate_stack_trace = true;
RtsFlags.MiscFlags.generate_dump_file = false;
RtsFlags.MiscFlags.machineReadable = false;
+ RtsFlags.MiscFlags.internalCounters = false;
RtsFlags.MiscFlags.linkerMemBase = 0;
#if defined(THREADED_RTS)
@@ -888,6 +889,11 @@ error = true;
OPTION_UNSAFE;
RtsFlags.MiscFlags.machineReadable = true;
}
+ else if (strequal("internal-counters",
+ &rts_argv[arg][2])) {
+ OPTION_SAFE;
+ RtsFlags.MiscFlags.internalCounters = true;
+ }
else if (strequal("info",
&rts_argv[arg][2])) {
OPTION_SAFE;
diff --git a/rts/SMPClosureOps.h b/rts/SMPClosureOps.h
index 4ea1c55976..1d18e1b018 100644
--- a/rts/SMPClosureOps.h
+++ b/rts/SMPClosureOps.h
@@ -38,6 +38,11 @@ EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info);
#if defined(THREADED_RTS)
+#if defined(PROF_SPIN)
+extern volatile StgWord64 whitehole_lockClosure_spin;
+extern volatile StgWord64 whitehole_lockClosure_yield;
+#endif
+
/* -----------------------------------------------------------------------------
* Locking/unlocking closures
*
@@ -56,7 +61,14 @@ EXTERN_INLINE StgInfoTable *reallyLockClosure(StgClosure *p)
do {
info = xchg((P_)(void *)&p->header.info, (W_)&stg_WHITEHOLE_info);
if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info;
+#if defined(PROF_SPIN)
+ ++whitehole_lockClosure_spin;
+#endif
+ busy_wait_nop();
} while (++i < SPIN_COUNT);
+#if defined(PROF_SPIN)
+ ++whitehole_lockClosure_yield;
+#endif
yieldThread();
} while (1);
}
diff --git a/rts/Stats.c b/rts/Stats.c
index 26bdac0ea5..7eb93bef33 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -16,10 +16,15 @@
#include "Profiling.h"
#include "GetTime.h"
#include "sm/Storage.h"
-#include "sm/GC.h" // gc_alloc_block_sync, whitehole_gc_spin
#include "sm/GCThread.h"
#include "sm/BlockAlloc.h"
+// for spin/yield counters
+#include "sm/GC.h"
+#include "ThreadPaused.h"
+#include "Messages.h"
+
+
#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
static Time
@@ -43,6 +48,13 @@ static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
#define PROF_VAL(x) 0
#endif
+#if defined(PROF_SPIN)
+volatile StgWord64 whitehole_lockClosure_spin = 0;
+volatile StgWord64 whitehole_lockClosure_yield = 0;
+volatile StgWord64 whitehole_threadPaused_spin = 0;
+volatile StgWord64 whitehole_executeMessage_spin = 0;
+#endif
+
//
// All the stats!
//
@@ -150,6 +162,13 @@ initStats0(void)
.par_copied_bytes = 0,
.cumulative_par_max_copied_bytes = 0,
.cumulative_par_balanced_copied_bytes = 0,
+ .gc_spin_spin = 0,
+ .gc_spin_yield = 0,
+ .mut_spin_spin = 0,
+ .mut_spin_yield = 0,
+ .any_work = 0,
+ .no_work = 0,
+ .scav_find_work = 0,
.mutator_cpu_ns = 0,
.mutator_elapsed_ns = 0,
.gc_cpu_ns = 0,
@@ -283,10 +302,11 @@ stat_startGC (Capability *cap, gc_thread *gct)
-------------------------------------------------------------------------- */
void
-stat_endGC (Capability *cap, gc_thread *gct,
- W_ live, W_ copied, W_ slop, uint32_t gen,
- uint32_t par_n_threads, W_ par_max_copied,
- W_ par_balanced_copied)
+stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop,
+ uint32_t gen, uint32_t par_n_threads, W_ par_max_copied,
+ W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield,
+ W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ no_work,
+ W_ scav_find_work)
{
// -------------------------------------------------
// Collect all the stats about this GC in stats.gc. We always do this since
@@ -350,6 +370,13 @@ stat_endGC (Capability *cap, gc_thread *gct,
stats.gc.par_max_copied_bytes;
stats.cumulative_par_balanced_copied_bytes +=
stats.gc.par_balanced_copied_bytes;
+ stats.any_work += any_work;
+ stats.no_work += no_work;
+ stats.scav_find_work += scav_find_work;
+ stats.gc_spin_spin += gc_spin_spin;
+ stats.gc_spin_yield += gc_spin_yield;
+ stats.mut_spin_spin += mut_spin_spin;
+ stats.mut_spin_yield += mut_spin_yield;
}
stats.gc_cpu_ns += stats.gc.cpu_ns;
stats.gc_elapsed_ns += stats.gc.elapsed_ns;
@@ -764,18 +791,96 @@ stat_exit (void)
PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
/ TimeToSecondsDbl(tot_elapsed));
+ // See Note [Internal Counter Stats] for a description of the
+ // following counters. If you add a counter here, please remember
+ // to update the Note.
+ if (RtsFlags.MiscFlags.internalCounters) {
#if defined(THREADED_RTS) && defined(PROF_SPIN)
- {
uint32_t g;
+ const int32_t col_width[] = {4, -30, 14, 14};
+ statsPrintf("Internal Counters:\n");
+ statsPrintf("%*s" "%*s" "%*s" "%*s" "\n"
+ , col_width[0], ""
+ , col_width[1], "SpinLock"
+ , col_width[2], "Spins"
+ , col_width[3], "Yields");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "gc_alloc_block_sync"
+ , col_width[2], gc_alloc_block_sync.spin
+ , col_width[3], gc_alloc_block_sync.yield);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "gc_spin"
+ , col_width[2], stats.gc_spin_spin
+ , col_width[3], stats.gc_spin_yield);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "mut_spin"
+ , col_width[2], stats.mut_spin_spin
+ , col_width[3], stats.mut_spin_yield);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
+ , col_width[0], ""
+ , col_width[1], "whitehole_gc"
+ , col_width[2], whitehole_gc_spin
+ , col_width[3], "n/a");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
+ , col_width[0], ""
+ , col_width[1], "whitehole_threadPaused"
+ , col_width[2], whitehole_threadPaused_spin
+ , col_width[3], "n/a");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n"
+ , col_width[0], ""
+ , col_width[1], "whitehole_executeMessage"
+ , col_width[2], whitehole_executeMessage_spin
+ , col_width[3], "n/a");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "whitehole_lockClosure"
+ , col_width[2], whitehole_lockClosure_spin
+ , col_width[3], whitehole_lockClosure_yield);
+ // waitForGcThreads isn't really spin-locking(see the function)
+ // but these numbers still seem useful.
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "waitForGcThreads"
+ , col_width[2], waitForGcThreads_spin
+ , col_width[3], waitForGcThreads_yield);
- statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
- statsPrintf("whitehole_gc_spin: %"FMT_Word64"\n"
- , whitehole_gc_spin);
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
+ int prefix_length = 0;
+ statsPrintf("%*s" "gen[%" FMT_Word32 "%n",
+ col_width[0], "", g, &prefix_length);
+ prefix_length -= col_width[0];
+ int suffix_length = col_width[1] + prefix_length;
+ suffix_length =
+ suffix_length > 0 ? col_width[1] : suffix_length;
+
+ statsPrintf("%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n"
+ , suffix_length, "].sync"
+ , col_width[2], generations[g].sync.spin
+ , col_width[3], generations[g].sync.yield);
}
- }
+ statsPrintf("\n");
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "any_work"
+ , col_width[2], stats.any_work);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "no_work"
+ , col_width[2], stats.no_work);
+ statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n"
+ , col_width[0], ""
+ , col_width[1], "scav_find_work"
+ , col_width[2], stats.scav_find_work);
+#elif defined(THREADED_RTS) // THREADED_RTS && PROF_SPIN
+ statsPrintf("Internal Counters require the RTS to be built "
+ "with PROF_SPIN"); // PROF_SPIN is not #defined here
+#else // THREADED_RTS
+ statsPrintf("Internal Counters require the threaded RTS");
#endif
+ }
}
if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
@@ -917,6 +1022,68 @@ the number of gc threads is limited to the number of cores.
See #13830
*/
+/*
+Note [Internal Counter Stats]
+-----------------------------
+What do the counts at the end of a '+RTS -s --internal-counters' report mean?
+They are detailed below. Most of these counters are used by multiple threads
+with no attempt at synchronisation. This means that reported values may be
+lower than the true value and this becomes more likely and more severe as
+contention increases.
+
+The first counters are for various SpinLock-like constructs in the RTS. See
+Spinlock.h for the definition of a SpinLock. We maintain up two counters per
+SpinLock:
+* spin: The number of busy-spins over the length of the program.
+* yield: The number of times the SpinLock spun SPIN_COUNT times without success
+ and called yieldThread().
+Not all of these are actual SpinLocks, see the details below.
+
+Actual SpinLocks:
+* gc_alloc_block:
+ This SpinLock protects the block allocator and free list manager. See
+ BlockAlloc.c.
+* gc_spin and mut_spin:
+ These SpinLocks are used to herd gc worker threads during parallel garbage
+ collection. See gcWorkerThread, wakeup_gc_threads and releaseGCThreads.
+* gen[g].sync:
+ These SpinLocks, one per generation, protect the generations[g] data
+ structure during garbage collection.
+
+waitForGcThreads:
+ These counters are incremented while we wait for all threads to be ready
+ for a parallel garbage collection. We yield more than we spin in this case.
+
+In several places in the runtime we must take a lock on a closure. To do this,
+we replace it's info table with stg_WHITEHOLE_info, spinning if it is already
+a white-hole. Sometimes we yieldThread() if we spin too long, sometimes we
+don't. We count these white-hole spins and include them in the SpinLocks table.
+If a particular loop does not yield, we put "n/a" in the table. They are named
+for the function that has the spinning loop except that several loops in the
+garbage collector accumulate into whitehole_gc.
+TODO: Should these counters be more or less granular?
+
+white-hole spin counters:
+* whitehole_gc
+* whitehole_lockClosure
+* whitehole_executeMessage
+* whitehole_threadPaused
+
+
+We count the number of calls of several functions in the parallel garbage
+collector.
+
+Parallel garbage collector counters:
+* any_work:
+ A cheap function called whenever a gc_thread is ready for work. Does
+ not do any work.
+* no_work:
+ Incremented whenever any_work finds no work.
+* scav_find_work:
+ Called to do work when any_work return true.
+
+*/
+
/* -----------------------------------------------------------------------------
stat_describe_gens
diff --git a/rts/Stats.h b/rts/Stats.h
index 5d9cf04fa7..1c56344f81 100644
--- a/rts/Stats.h
+++ b/rts/Stats.h
@@ -30,7 +30,10 @@ void stat_startGCSync(struct gc_thread_ *_gct);
void stat_startGC(Capability *cap, struct gc_thread_ *_gct);
void stat_endGC (Capability *cap, struct gc_thread_ *_gct, W_ live,
W_ copied, W_ slop, uint32_t gen, uint32_t n_gc_threads,
- W_ par_max_copied, W_ par_balanced_copied);
+ W_ par_max_copied, W_ par_balanced_copied,
+ W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin,
+ W_ mut_spin_yield, W_ any_work, W_ no_work,
+ W_ scav_find_work);
#if defined(PROFILING)
void stat_startRP(void);
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 361989d0d2..595d3ce6c2 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -375,11 +375,21 @@ loop:
// spin until the WHITEHOLE is updated
info = StgHeader_info(node);
if (info == stg_WHITEHOLE_info) {
+#if defined(PROF_SPIN)
+ W_[whitehole_lockClosure_spin] =
+ W_[whitehole_lockClosure_spin] + 1;
+#endif
i = i + 1;
if (i == SPIN_COUNT) {
i = 0;
+#if defined(PROF_SPIN)
+ W_[whitehole_lockClosure_yield] =
+ W_[whitehole_lockClosure_yield] + 1;
+#endif
ccall yieldThread();
}
+ // TODO: We should busy_wait_nop() here, but that's not currently
+ // defined in CMM.
goto loop;
}
jump %ENTRY_CODE(info) (node);
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index 7ade0a64c5..3f7bddeb79 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -329,6 +329,10 @@ threadPaused(Capability *cap, StgTSO *tso)
if (cur_bh_info != bh_info) {
bh_info = cur_bh_info;
+#if defined(PROF_SPIN)
+ ++whitehole_threadPaused_spin;
+#endif
+ busy_wait_nop();
goto retry;
}
#endif
diff --git a/rts/ThreadPaused.h b/rts/ThreadPaused.h
index 4d762f9aed..ee25189c20 100644
--- a/rts/ThreadPaused.h
+++ b/rts/ThreadPaused.h
@@ -8,4 +8,12 @@
#pragma once
+#include "BeginPrivate.h"
+
RTS_PRIVATE void threadPaused ( Capability *cap, StgTSO * );
+
+#include "EndPrivate.h"
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+extern volatile StgWord64 whitehole_threadPaused_spin;
+#endif
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index bb54c7efc3..27f280665e 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -1058,9 +1058,14 @@ selector_chain:
// In threaded mode, we'll use WHITEHOLE to lock the selector
// thunk while we evaluate it.
{
- do {
+ while(true) {
info_ptr = xchg((StgPtr)&p->header.info, (W_)&stg_WHITEHOLE_info);
- } while (info_ptr == (W_)&stg_WHITEHOLE_info);
+ if (info_ptr != (W_)&stg_WHITEHOLE_info) { break; }
+#if defined(PROF_SPIN)
+ ++whitehole_gc_spin;
+#endif
+ busy_wait_nop();
+ }
// make sure someone else didn't get here first...
if (IS_FORWARDING_PTR(info_ptr) ||
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 54797ba0f0..d61ca41a6b 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -139,6 +139,9 @@ uint32_t n_gc_threads;
static long copied; // *words* copied & scavenged during this GC
#if defined(PROF_SPIN) && defined(THREADED_RTS)
+// spin and yield counts for the quasi-SpinLock in waitForGcThreads
+volatile StgWord64 waitForGcThreads_spin = 0;
+volatile StgWord64 waitForGcThreads_yield = 0;
volatile StgWord64 whitehole_gc_spin = 0;
#endif
@@ -198,7 +201,9 @@ GarbageCollect (uint32_t collect_gen,
{
bdescr *bd;
generation *gen;
- StgWord live_blocks, live_words, par_max_copied, par_balanced_copied;
+ StgWord live_blocks, live_words, par_max_copied, par_balanced_copied,
+ gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
+ any_work, no_work, scav_find_work;
#if defined(THREADED_RTS)
gc_thread *saved_gct;
#endif
@@ -471,32 +476,53 @@ GarbageCollect (uint32_t collect_gen,
copied = 0;
par_max_copied = 0;
par_balanced_copied = 0;
+ gc_spin_spin = 0;
+ gc_spin_yield = 0;
+ mut_spin_spin = 0;
+ mut_spin_yield = 0;
+ any_work = 0;
+ no_work = 0;
+ scav_find_work = 0;
{
uint32_t i;
uint64_t par_balanced_copied_acc = 0;
+ const gc_thread* thread;
for (i=0; i < n_gc_threads; i++) {
copied += gc_threads[i]->copied;
}
for (i=0; i < n_gc_threads; i++) {
+ thread = gc_threads[i];
if (n_gc_threads > 1) {
debugTrace(DEBUG_gc,"thread %d:", i);
- debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
- debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
- debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
- debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
- debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
+ debugTrace(DEBUG_gc," copied %ld",
+ thread->copied * sizeof(W_));
+ debugTrace(DEBUG_gc," scanned %ld",
+ thread->scanned * sizeof(W_));
+ debugTrace(DEBUG_gc," any_work %ld",
+ thread->any_work);
+ debugTrace(DEBUG_gc," no_work %ld",
+ thread->no_work);
+ debugTrace(DEBUG_gc," scav_find_work %ld",
+ thread->scav_find_work);
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ gc_spin_spin += thread->gc_spin.spin;
+ gc_spin_yield += thread->gc_spin.yield;
+ mut_spin_spin += thread->mut_spin.spin;
+ mut_spin_yield += thread->mut_spin.yield;
+#endif
+
+ any_work += thread->any_work;
+ no_work += thread->no_work;
+ scav_find_work += thread->scav_find_work;
+
+ par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
+ par_balanced_copied_acc +=
+ stg_min(n_gc_threads * gc_threads[i]->copied, copied);
}
- par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
- par_balanced_copied_acc +=
- stg_min(n_gc_threads * gc_threads[i]->copied, copied);
- }
- if (n_gc_threads == 1) {
- par_max_copied = 0;
- par_balanced_copied = 0;
}
- else
- {
+ if (n_gc_threads > 1) {
// See Note [Work Balance] for an explanation of this computation
par_balanced_copied =
(par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
@@ -834,7 +860,9 @@ GarbageCollect (uint32_t collect_gen,
// ok, GC over: tell the stats department what happened.
stat_endGC(cap, gct, live_words, copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */,
- N, n_gc_threads, par_max_copied, par_balanced_copied);
+ N, n_gc_threads, par_max_copied, par_balanced_copied,
+ gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
+ any_work, no_work, scav_find_work);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
@@ -1186,6 +1214,9 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
}
}
if (!retry) break;
+#if defined(PROF_SPIN)
+ waitForGcThreads_yield++;
+#endif
yieldThread();
}
@@ -1196,6 +1227,14 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
rtsConfig.longGCSync(cap->no, t2 - t0);
t1 = t2;
}
+ if (retry) {
+#if defined(PROF_SPIN)
+ // This is a bit strange, we'll get more yields than spins.
+ // I guess that means it's not a spin-lock at all, but these
+ // numbers are still useful (I think).
+ waitForGcThreads_spin++;
+#endif
+ }
}
if (RtsFlags.GcFlags.longGCSync != 0 &&
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 78f054931a..7fce87edd4 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -47,6 +47,8 @@ extern uint32_t mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS,
#if defined(PROF_SPIN) && defined(THREADED_RTS)
extern volatile StgWord64 whitehole_gc_spin;
+extern volatile StgWord64 waitForGcThreads_spin;
+extern volatile StgWord64 waitForGcThreads_yield;
#endif
void gcWorkerThread (Capability *cap);
diff --git a/testsuite/tests/rts/InternalCounters.stdout b/testsuite/tests/rts/InternalCounters.stdout
new file mode 100644
index 0000000000..d764d7bc19
--- /dev/null
+++ b/testsuite/tests/rts/InternalCounters.stdout
@@ -0,0 +1 @@
+Internal Counters: \ No newline at end of file
diff --git a/testsuite/tests/rts/Makefile b/testsuite/tests/rts/Makefile
index ded3be1b3b..630508542d 100644
--- a/testsuite/tests/rts/Makefile
+++ b/testsuite/tests/rts/Makefile
@@ -48,13 +48,13 @@ T5423:
.PHONY: T9405
T9405:
@'$(TEST_HC)' $(TEST_HC_OPTS) -ticky -rtsopts T9405.hs; \
- ./T9405 +RTS -rT9405.ticky & \
- sleep 0.2; \
- kill -2 $$!; \
- wait $$!; \
- [ -e T9405.ticky ] || echo "Error: Ticky profile doesn't exist"; \
- [ -s T9405.ticky ] || echo "Error: Ticky profile is empty"; \
- echo Ticky-Ticky;
+ ./T9405 +RTS -rT9405.ticky & \
+ sleep 0.2; \
+ kill -2 $$!; \
+ wait $$!; \
+ [ -e T9405.ticky ] || echo "Error: Ticky profile doesn't exist"; \
+ [ -s T9405.ticky ] || echo "Error: Ticky profile is empty"; \
+ echo Ticky-Ticky;
# Naming convention: 'T5423_' obj-way '_' obj-src
# obj-way ::= v | dyn
@@ -178,3 +178,8 @@ T12497:
.PHONY: T14695
T14695:
echo ":quit" | LD_LIBRARY_PATH="foo:" "$(TEST_HC)" $(filter-out -rtsopts, $(TEST_HC_OPTS_INTERACTIVE))
+
+.PHONY: InternalCounters
+InternalCounters:
+ "$(TEST_HC)" +RTS -s --internal-counters -RTS 2>&1 | grep "Internal Counters"
+ -"$(TEST_HC)" +RTS -s -RTS 2>&1 | grep "Internal Counters"
diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T
index 5000a914db..cd70132610 100644
--- a/testsuite/tests/rts/all.T
+++ b/testsuite/tests/rts/all.T
@@ -390,3 +390,5 @@ test('T14702', [ ignore_stdout
, compile_and_run, [''])
test('T14900', normal, compile_and_run, ['-package ghc-compact'])
+test('InternalCounters', normal, run_command,
+ ['$MAKE -s --no-print-directory InternalCounters'])