diff options
-rw-r--r-- | docs/users_guide/runtime_control.rst | 6 | ||||
-rw-r--r-- | includes/RtsAPI.h | 24 | ||||
-rw-r--r-- | includes/rts/Flags.h | 1 | ||||
-rw-r--r-- | includes/rts/SpinLock.h | 5 | ||||
-rw-r--r-- | includes/rts/storage/GC.h | 2 | ||||
-rw-r--r-- | libraries/base/GHC/RTS/Flags.hsc | 3 | ||||
-rw-r--r-- | rts/Messages.c | 3 | ||||
-rw-r--r-- | rts/Messages.h | 4 | ||||
-rw-r--r-- | rts/RtsFlags.c | 6 | ||||
-rw-r--r-- | rts/SMPClosureOps.h | 12 | ||||
-rw-r--r-- | rts/Stats.c | 189 | ||||
-rw-r--r-- | rts/Stats.h | 5 | ||||
-rw-r--r-- | rts/StgMiscClosures.cmm | 10 | ||||
-rw-r--r-- | rts/ThreadPaused.c | 4 | ||||
-rw-r--r-- | rts/ThreadPaused.h | 8 | ||||
-rw-r--r-- | rts/sm/Evac.c | 9 | ||||
-rw-r--r-- | rts/sm/GC.c | 71 | ||||
-rw-r--r-- | rts/sm/GC.h | 2 | ||||
-rw-r--r-- | testsuite/tests/rts/InternalCounters.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/rts/Makefile | 19 | ||||
-rw-r--r-- | testsuite/tests/rts/all.T | 2 |
21 files changed, 346 insertions, 40 deletions
diff --git a/docs/users_guide/runtime_control.rst b/docs/users_guide/runtime_control.rst index 009e3ae887..92bc739dfe 100644 --- a/docs/users_guide/runtime_control.rst +++ b/docs/users_guide/runtime_control.rst @@ -776,6 +776,7 @@ RTS options to produce runtime statistics -s [⟨file⟩] -S [⟨file⟩] --machine-readable + --internal-counters These options produce runtime-system statistics, such as the amount of time spent executing the program and in the garbage collector, @@ -785,7 +786,10 @@ RTS options to produce runtime statistics line of output in the same format as GHC's ``-Rghc-timing`` option, ``-s`` produces a more detailed summary at the end of the program, and ``-S`` additionally produces information about each and every - garbage collection. + garbage collection. Passing ``--internal-counters`` to a threaded + runtime will cause a detailed summary to include various internal + counts accumulated during the run; note that these are unspecified + and may change between releases. The output is placed in ⟨file⟩. If ⟨file⟩ is omitted, then the output is sent to ``stderr``. diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h index 27a5080220..6f011cbf6e 100644 --- a/includes/RtsAPI.h +++ b/includes/RtsAPI.h @@ -210,6 +210,30 @@ typedef struct _RTSStats { GCDetails gc; + // ----------------------------------- + // Internal Counters + + // The number of times a GC thread spun on its 'gc_spin' lock. + // Will be zero if the rts was not built with PROF_SPIN + uint64_t gc_spin_spin; + // The number of times a GC thread yielded on its 'gc_spin' lock. + // Will be zero if the rts was not built with PROF_SPIN + uint64_t gc_spin_yield; + // The number of times a GC thread spun on its 'mut_spin' lock. + // Will be zero if the rts was not built with PROF_SPIN + uint64_t mut_spin_spin; + // The number of times a GC thread yielded on its 'mut_spin' lock. + // Will be zero if the rts was not built with PROF_SPIN + uint64_t mut_spin_yield; + // The number of times a GC thread has checked for work across all parallel + // GCs + uint64_t any_work; + // The number of times a GC thread has checked for work and found none across + // all parallel GCs + uint64_t no_work; + // The number of times a GC thread has iterated it's outer loop across all + // parallel GCs + uint64_t scav_find_work; } RTSStats; void getRTSStats (RTSStats *s); diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h index aed4dca384..6487947749 100644 --- a/includes/rts/Flags.h +++ b/includes/rts/Flags.h @@ -195,6 +195,7 @@ typedef struct _MISC_FLAGS { bool generate_dump_file; bool generate_stack_trace; bool machineReadable; + bool internalCounters; /* See Note [Internal Counter Stats] */ StgWord linkerMemBase; /* address to ask the OS for memory * for the linker, NULL ==> off */ } MISC_FLAGS; diff --git a/includes/rts/SpinLock.h b/includes/rts/SpinLock.h index 6530a3a2f0..1dca02f795 100644 --- a/includes/rts/SpinLock.h +++ b/includes/rts/SpinLock.h @@ -27,7 +27,8 @@ typedef struct SpinLock_ { StgWord lock; - StgWord64 spin; // DEBUG version counts how much it spins + StgWord64 spin; // incremented every time we spin in ACQUIRE_SPIN_LOCK + StgWord64 yield; // incremented every time we yield in ACQUIRE_SPIN_LOCK } SpinLock; #else typedef StgWord SpinLock; @@ -49,6 +50,7 @@ INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p) p->spin++; busy_wait_nop(); } + p->yield++; yieldThread(); } while (1); } @@ -66,6 +68,7 @@ INLINE_HEADER void initSpinLock(SpinLock * p) write_barrier(); p->lock = 1; p->spin = 0; + p->yield = 0; } #else diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index 2aed7c57ee..d4182dd7f9 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -120,7 +120,7 @@ typedef struct generation_ { // stats information uint32_t collections; uint32_t par_collections; - uint32_t failed_promotions; + uint32_t failed_promotions; // Currently unused // ------------------------------------ // Fields below are used during GC only diff --git a/libraries/base/GHC/RTS/Flags.hsc b/libraries/base/GHC/RTS/Flags.hsc index 3e712ca900..046975577e 100644 --- a/libraries/base/GHC/RTS/Flags.hsc +++ b/libraries/base/GHC/RTS/Flags.hsc @@ -138,6 +138,7 @@ data MiscFlags = MiscFlags , generateCrashDumpFile :: Bool , generateStackTrace :: Bool , machineReadable :: Bool + , internalCounters :: Bool , linkerMemBase :: Word -- ^ address to ask the OS for memory for the linker, 0 ==> off } deriving ( Show -- ^ @since 4.8.0.0 @@ -441,6 +442,8 @@ getMiscFlags = do (#{peek MISC_FLAGS, generate_stack_trace} ptr :: IO CBool)) <*> (toBool <$> (#{peek MISC_FLAGS, machineReadable} ptr :: IO CBool)) + <*> (toBool <$> + (#{peek MISC_FLAGS, internalCounters} ptr :: IO CBool)) <*> #{peek MISC_FLAGS, linkerMemBase} ptr getDebugFlags :: IO DebugFlags diff --git a/rts/Messages.c b/rts/Messages.c index 8fab314bc4..a9c794d823 100644 --- a/rts/Messages.c +++ b/rts/Messages.c @@ -129,6 +129,9 @@ loop: } else if (i == &stg_WHITEHOLE_info) { +#if defined(PROF_SPIN) + ++whitehole_executeMessage_spin; +#endif goto loop; } else diff --git a/rts/Messages.h b/rts/Messages.h index e60f19dc1d..18371564c4 100644 --- a/rts/Messages.h +++ b/rts/Messages.h @@ -31,3 +31,7 @@ doneWithMsgThrowTo (MessageThrowTo *m) } #include "EndPrivate.h" + +#if defined(THREADED_RTS) && defined(PROF_SPIN) +extern volatile StgWord64 whitehole_executeMessage_spin; +#endif diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 7b38bbd6fd..b674e9b685 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -232,6 +232,7 @@ void initRtsFlagsDefaults(void) RtsFlags.MiscFlags.generate_stack_trace = true; RtsFlags.MiscFlags.generate_dump_file = false; RtsFlags.MiscFlags.machineReadable = false; + RtsFlags.MiscFlags.internalCounters = false; RtsFlags.MiscFlags.linkerMemBase = 0; #if defined(THREADED_RTS) @@ -888,6 +889,11 @@ error = true; OPTION_UNSAFE; RtsFlags.MiscFlags.machineReadable = true; } + else if (strequal("internal-counters", + &rts_argv[arg][2])) { + OPTION_SAFE; + RtsFlags.MiscFlags.internalCounters = true; + } else if (strequal("info", &rts_argv[arg][2])) { OPTION_SAFE; diff --git a/rts/SMPClosureOps.h b/rts/SMPClosureOps.h index 4ea1c55976..1d18e1b018 100644 --- a/rts/SMPClosureOps.h +++ b/rts/SMPClosureOps.h @@ -38,6 +38,11 @@ EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info); #if defined(THREADED_RTS) +#if defined(PROF_SPIN) +extern volatile StgWord64 whitehole_lockClosure_spin; +extern volatile StgWord64 whitehole_lockClosure_yield; +#endif + /* ----------------------------------------------------------------------------- * Locking/unlocking closures * @@ -56,7 +61,14 @@ EXTERN_INLINE StgInfoTable *reallyLockClosure(StgClosure *p) do { info = xchg((P_)(void *)&p->header.info, (W_)&stg_WHITEHOLE_info); if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info; +#if defined(PROF_SPIN) + ++whitehole_lockClosure_spin; +#endif + busy_wait_nop(); } while (++i < SPIN_COUNT); +#if defined(PROF_SPIN) + ++whitehole_lockClosure_yield; +#endif yieldThread(); } while (1); } diff --git a/rts/Stats.c b/rts/Stats.c index 26bdac0ea5..7eb93bef33 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -16,10 +16,15 @@ #include "Profiling.h" #include "GetTime.h" #include "sm/Storage.h" -#include "sm/GC.h" // gc_alloc_block_sync, whitehole_gc_spin #include "sm/GCThread.h" #include "sm/BlockAlloc.h" +// for spin/yield counters +#include "sm/GC.h" +#include "ThreadPaused.h" +#include "Messages.h" + + #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION) static Time @@ -43,6 +48,13 @@ static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time #define PROF_VAL(x) 0 #endif +#if defined(PROF_SPIN) +volatile StgWord64 whitehole_lockClosure_spin = 0; +volatile StgWord64 whitehole_lockClosure_yield = 0; +volatile StgWord64 whitehole_threadPaused_spin = 0; +volatile StgWord64 whitehole_executeMessage_spin = 0; +#endif + // // All the stats! // @@ -150,6 +162,13 @@ initStats0(void) .par_copied_bytes = 0, .cumulative_par_max_copied_bytes = 0, .cumulative_par_balanced_copied_bytes = 0, + .gc_spin_spin = 0, + .gc_spin_yield = 0, + .mut_spin_spin = 0, + .mut_spin_yield = 0, + .any_work = 0, + .no_work = 0, + .scav_find_work = 0, .mutator_cpu_ns = 0, .mutator_elapsed_ns = 0, .gc_cpu_ns = 0, @@ -283,10 +302,11 @@ stat_startGC (Capability *cap, gc_thread *gct) -------------------------------------------------------------------------- */ void -stat_endGC (Capability *cap, gc_thread *gct, - W_ live, W_ copied, W_ slop, uint32_t gen, - uint32_t par_n_threads, W_ par_max_copied, - W_ par_balanced_copied) +stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop, + uint32_t gen, uint32_t par_n_threads, W_ par_max_copied, + W_ par_balanced_copied, W_ gc_spin_spin, W_ gc_spin_yield, + W_ mut_spin_spin, W_ mut_spin_yield, W_ any_work, W_ no_work, + W_ scav_find_work) { // ------------------------------------------------- // Collect all the stats about this GC in stats.gc. We always do this since @@ -350,6 +370,13 @@ stat_endGC (Capability *cap, gc_thread *gct, stats.gc.par_max_copied_bytes; stats.cumulative_par_balanced_copied_bytes += stats.gc.par_balanced_copied_bytes; + stats.any_work += any_work; + stats.no_work += no_work; + stats.scav_find_work += scav_find_work; + stats.gc_spin_spin += gc_spin_spin; + stats.gc_spin_yield += gc_spin_yield; + stats.mut_spin_spin += mut_spin_spin; + stats.mut_spin_yield += mut_spin_yield; } stats.gc_cpu_ns += stats.gc.cpu_ns; stats.gc_elapsed_ns += stats.gc.elapsed_ns; @@ -764,18 +791,96 @@ stat_exit (void) PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100 / TimeToSecondsDbl(tot_elapsed)); + // See Note [Internal Counter Stats] for a description of the + // following counters. If you add a counter here, please remember + // to update the Note. + if (RtsFlags.MiscFlags.internalCounters) { #if defined(THREADED_RTS) && defined(PROF_SPIN) - { uint32_t g; + const int32_t col_width[] = {4, -30, 14, 14}; + statsPrintf("Internal Counters:\n"); + statsPrintf("%*s" "%*s" "%*s" "%*s" "\n" + , col_width[0], "" + , col_width[1], "SpinLock" + , col_width[2], "Spins" + , col_width[3], "Yields"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "gc_alloc_block_sync" + , col_width[2], gc_alloc_block_sync.spin + , col_width[3], gc_alloc_block_sync.yield); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "gc_spin" + , col_width[2], stats.gc_spin_spin + , col_width[3], stats.gc_spin_yield); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "mut_spin" + , col_width[2], stats.mut_spin_spin + , col_width[3], stats.mut_spin_yield); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n" + , col_width[0], "" + , col_width[1], "whitehole_gc" + , col_width[2], whitehole_gc_spin + , col_width[3], "n/a"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n" + , col_width[0], "" + , col_width[1], "whitehole_threadPaused" + , col_width[2], whitehole_threadPaused_spin + , col_width[3], "n/a"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*s\n" + , col_width[0], "" + , col_width[1], "whitehole_executeMessage" + , col_width[2], whitehole_executeMessage_spin + , col_width[3], "n/a"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "whitehole_lockClosure" + , col_width[2], whitehole_lockClosure_spin + , col_width[3], whitehole_lockClosure_yield); + // waitForGcThreads isn't really spin-locking(see the function) + // but these numbers still seem useful. + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "waitForGcThreads" + , col_width[2], waitForGcThreads_spin + , col_width[3], waitForGcThreads_yield); - statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); - statsPrintf("whitehole_gc_spin: %"FMT_Word64"\n" - , whitehole_gc_spin); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin); + int prefix_length = 0; + statsPrintf("%*s" "gen[%" FMT_Word32 "%n", + col_width[0], "", g, &prefix_length); + prefix_length -= col_width[0]; + int suffix_length = col_width[1] + prefix_length; + suffix_length = + suffix_length > 0 ? col_width[1] : suffix_length; + + statsPrintf("%*s" "%*" FMT_Word64 "%*" FMT_Word64 "\n" + , suffix_length, "].sync" + , col_width[2], generations[g].sync.spin + , col_width[3], generations[g].sync.yield); } - } + statsPrintf("\n"); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "any_work" + , col_width[2], stats.any_work); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "no_work" + , col_width[2], stats.no_work); + statsPrintf("%*s" "%*s" "%*" FMT_Word64 "\n" + , col_width[0], "" + , col_width[1], "scav_find_work" + , col_width[2], stats.scav_find_work); +#elif defined(THREADED_RTS) // THREADED_RTS && PROF_SPIN + statsPrintf("Internal Counters require the RTS to be built " + "with PROF_SPIN"); // PROF_SPIN is not #defined here +#else // THREADED_RTS + statsPrintf("Internal Counters require the threaded RTS"); #endif + } } if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) { @@ -917,6 +1022,68 @@ the number of gc threads is limited to the number of cores. See #13830 */ +/* +Note [Internal Counter Stats] +----------------------------- +What do the counts at the end of a '+RTS -s --internal-counters' report mean? +They are detailed below. Most of these counters are used by multiple threads +with no attempt at synchronisation. This means that reported values may be +lower than the true value and this becomes more likely and more severe as +contention increases. + +The first counters are for various SpinLock-like constructs in the RTS. See +Spinlock.h for the definition of a SpinLock. We maintain up two counters per +SpinLock: +* spin: The number of busy-spins over the length of the program. +* yield: The number of times the SpinLock spun SPIN_COUNT times without success + and called yieldThread(). +Not all of these are actual SpinLocks, see the details below. + +Actual SpinLocks: +* gc_alloc_block: + This SpinLock protects the block allocator and free list manager. See + BlockAlloc.c. +* gc_spin and mut_spin: + These SpinLocks are used to herd gc worker threads during parallel garbage + collection. See gcWorkerThread, wakeup_gc_threads and releaseGCThreads. +* gen[g].sync: + These SpinLocks, one per generation, protect the generations[g] data + structure during garbage collection. + +waitForGcThreads: + These counters are incremented while we wait for all threads to be ready + for a parallel garbage collection. We yield more than we spin in this case. + +In several places in the runtime we must take a lock on a closure. To do this, +we replace it's info table with stg_WHITEHOLE_info, spinning if it is already +a white-hole. Sometimes we yieldThread() if we spin too long, sometimes we +don't. We count these white-hole spins and include them in the SpinLocks table. +If a particular loop does not yield, we put "n/a" in the table. They are named +for the function that has the spinning loop except that several loops in the +garbage collector accumulate into whitehole_gc. +TODO: Should these counters be more or less granular? + +white-hole spin counters: +* whitehole_gc +* whitehole_lockClosure +* whitehole_executeMessage +* whitehole_threadPaused + + +We count the number of calls of several functions in the parallel garbage +collector. + +Parallel garbage collector counters: +* any_work: + A cheap function called whenever a gc_thread is ready for work. Does + not do any work. +* no_work: + Incremented whenever any_work finds no work. +* scav_find_work: + Called to do work when any_work return true. + +*/ + /* ----------------------------------------------------------------------------- stat_describe_gens diff --git a/rts/Stats.h b/rts/Stats.h index 5d9cf04fa7..1c56344f81 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -30,7 +30,10 @@ void stat_startGCSync(struct gc_thread_ *_gct); void stat_startGC(Capability *cap, struct gc_thread_ *_gct); void stat_endGC (Capability *cap, struct gc_thread_ *_gct, W_ live, W_ copied, W_ slop, uint32_t gen, uint32_t n_gc_threads, - W_ par_max_copied, W_ par_balanced_copied); + W_ par_max_copied, W_ par_balanced_copied, + W_ gc_spin_spin, W_ gc_spin_yield, W_ mut_spin_spin, + W_ mut_spin_yield, W_ any_work, W_ no_work, + W_ scav_find_work); #if defined(PROFILING) void stat_startRP(void); diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index 361989d0d2..595d3ce6c2 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -375,11 +375,21 @@ loop: // spin until the WHITEHOLE is updated info = StgHeader_info(node); if (info == stg_WHITEHOLE_info) { +#if defined(PROF_SPIN) + W_[whitehole_lockClosure_spin] = + W_[whitehole_lockClosure_spin] + 1; +#endif i = i + 1; if (i == SPIN_COUNT) { i = 0; +#if defined(PROF_SPIN) + W_[whitehole_lockClosure_yield] = + W_[whitehole_lockClosure_yield] + 1; +#endif ccall yieldThread(); } + // TODO: We should busy_wait_nop() here, but that's not currently + // defined in CMM. goto loop; } jump %ENTRY_CODE(info) (node); diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index 7ade0a64c5..3f7bddeb79 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -329,6 +329,10 @@ threadPaused(Capability *cap, StgTSO *tso) if (cur_bh_info != bh_info) { bh_info = cur_bh_info; +#if defined(PROF_SPIN) + ++whitehole_threadPaused_spin; +#endif + busy_wait_nop(); goto retry; } #endif diff --git a/rts/ThreadPaused.h b/rts/ThreadPaused.h index 4d762f9aed..ee25189c20 100644 --- a/rts/ThreadPaused.h +++ b/rts/ThreadPaused.h @@ -8,4 +8,12 @@ #pragma once +#include "BeginPrivate.h" + RTS_PRIVATE void threadPaused ( Capability *cap, StgTSO * ); + +#include "EndPrivate.h" + +#if defined(THREADED_RTS) && defined(PROF_SPIN) +extern volatile StgWord64 whitehole_threadPaused_spin; +#endif diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index bb54c7efc3..27f280665e 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -1058,9 +1058,14 @@ selector_chain: // In threaded mode, we'll use WHITEHOLE to lock the selector // thunk while we evaluate it. { - do { + while(true) { info_ptr = xchg((StgPtr)&p->header.info, (W_)&stg_WHITEHOLE_info); - } while (info_ptr == (W_)&stg_WHITEHOLE_info); + if (info_ptr != (W_)&stg_WHITEHOLE_info) { break; } +#if defined(PROF_SPIN) + ++whitehole_gc_spin; +#endif + busy_wait_nop(); + } // make sure someone else didn't get here first... if (IS_FORWARDING_PTR(info_ptr) || diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 54797ba0f0..d61ca41a6b 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -139,6 +139,9 @@ uint32_t n_gc_threads; static long copied; // *words* copied & scavenged during this GC #if defined(PROF_SPIN) && defined(THREADED_RTS) +// spin and yield counts for the quasi-SpinLock in waitForGcThreads +volatile StgWord64 waitForGcThreads_spin = 0; +volatile StgWord64 waitForGcThreads_yield = 0; volatile StgWord64 whitehole_gc_spin = 0; #endif @@ -198,7 +201,9 @@ GarbageCollect (uint32_t collect_gen, { bdescr *bd; generation *gen; - StgWord live_blocks, live_words, par_max_copied, par_balanced_copied; + StgWord live_blocks, live_words, par_max_copied, par_balanced_copied, + gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield, + any_work, no_work, scav_find_work; #if defined(THREADED_RTS) gc_thread *saved_gct; #endif @@ -471,32 +476,53 @@ GarbageCollect (uint32_t collect_gen, copied = 0; par_max_copied = 0; par_balanced_copied = 0; + gc_spin_spin = 0; + gc_spin_yield = 0; + mut_spin_spin = 0; + mut_spin_yield = 0; + any_work = 0; + no_work = 0; + scav_find_work = 0; { uint32_t i; uint64_t par_balanced_copied_acc = 0; + const gc_thread* thread; for (i=0; i < n_gc_threads; i++) { copied += gc_threads[i]->copied; } for (i=0; i < n_gc_threads; i++) { + thread = gc_threads[i]; if (n_gc_threads > 1) { debugTrace(DEBUG_gc,"thread %d:", i); - debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); - debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); - debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work); - debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work); - debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); + debugTrace(DEBUG_gc," copied %ld", + thread->copied * sizeof(W_)); + debugTrace(DEBUG_gc," scanned %ld", + thread->scanned * sizeof(W_)); + debugTrace(DEBUG_gc," any_work %ld", + thread->any_work); + debugTrace(DEBUG_gc," no_work %ld", + thread->no_work); + debugTrace(DEBUG_gc," scav_find_work %ld", + thread->scav_find_work); + +#if defined(THREADED_RTS) && defined(PROF_SPIN) + gc_spin_spin += thread->gc_spin.spin; + gc_spin_yield += thread->gc_spin.yield; + mut_spin_spin += thread->mut_spin.spin; + mut_spin_yield += thread->mut_spin.yield; +#endif + + any_work += thread->any_work; + no_work += thread->no_work; + scav_find_work += thread->scav_find_work; + + par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); + par_balanced_copied_acc += + stg_min(n_gc_threads * gc_threads[i]->copied, copied); } - par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); - par_balanced_copied_acc += - stg_min(n_gc_threads * gc_threads[i]->copied, copied); - } - if (n_gc_threads == 1) { - par_max_copied = 0; - par_balanced_copied = 0; } - else - { + if (n_gc_threads > 1) { // See Note [Work Balance] for an explanation of this computation par_balanced_copied = (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) / @@ -834,7 +860,9 @@ GarbageCollect (uint32_t collect_gen, // ok, GC over: tell the stats department what happened. stat_endGC(cap, gct, live_words, copied, live_blocks * BLOCK_SIZE_W - live_words /* slop */, - N, n_gc_threads, par_max_copied, par_balanced_copied); + N, n_gc_threads, par_max_copied, par_balanced_copied, + gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield, + any_work, no_work, scav_find_work); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -1186,6 +1214,9 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) } } if (!retry) break; +#if defined(PROF_SPIN) + waitForGcThreads_yield++; +#endif yieldThread(); } @@ -1196,6 +1227,14 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) rtsConfig.longGCSync(cap->no, t2 - t0); t1 = t2; } + if (retry) { +#if defined(PROF_SPIN) + // This is a bit strange, we'll get more yields than spins. + // I guess that means it's not a spin-lock at all, but these + // numbers are still useful (I think). + waitForGcThreads_spin++; +#endif + } } if (RtsFlags.GcFlags.longGCSync != 0 && diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 78f054931a..7fce87edd4 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -47,6 +47,8 @@ extern uint32_t mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS, #if defined(PROF_SPIN) && defined(THREADED_RTS) extern volatile StgWord64 whitehole_gc_spin; +extern volatile StgWord64 waitForGcThreads_spin; +extern volatile StgWord64 waitForGcThreads_yield; #endif void gcWorkerThread (Capability *cap); diff --git a/testsuite/tests/rts/InternalCounters.stdout b/testsuite/tests/rts/InternalCounters.stdout new file mode 100644 index 0000000000..d764d7bc19 --- /dev/null +++ b/testsuite/tests/rts/InternalCounters.stdout @@ -0,0 +1 @@ +Internal Counters:
\ No newline at end of file diff --git a/testsuite/tests/rts/Makefile b/testsuite/tests/rts/Makefile index ded3be1b3b..630508542d 100644 --- a/testsuite/tests/rts/Makefile +++ b/testsuite/tests/rts/Makefile @@ -48,13 +48,13 @@ T5423: .PHONY: T9405 T9405: @'$(TEST_HC)' $(TEST_HC_OPTS) -ticky -rtsopts T9405.hs; \ - ./T9405 +RTS -rT9405.ticky & \ - sleep 0.2; \ - kill -2 $$!; \ - wait $$!; \ - [ -e T9405.ticky ] || echo "Error: Ticky profile doesn't exist"; \ - [ -s T9405.ticky ] || echo "Error: Ticky profile is empty"; \ - echo Ticky-Ticky; + ./T9405 +RTS -rT9405.ticky & \ + sleep 0.2; \ + kill -2 $$!; \ + wait $$!; \ + [ -e T9405.ticky ] || echo "Error: Ticky profile doesn't exist"; \ + [ -s T9405.ticky ] || echo "Error: Ticky profile is empty"; \ + echo Ticky-Ticky; # Naming convention: 'T5423_' obj-way '_' obj-src # obj-way ::= v | dyn @@ -178,3 +178,8 @@ T12497: .PHONY: T14695 T14695: echo ":quit" | LD_LIBRARY_PATH="foo:" "$(TEST_HC)" $(filter-out -rtsopts, $(TEST_HC_OPTS_INTERACTIVE)) + +.PHONY: InternalCounters +InternalCounters: + "$(TEST_HC)" +RTS -s --internal-counters -RTS 2>&1 | grep "Internal Counters" + -"$(TEST_HC)" +RTS -s -RTS 2>&1 | grep "Internal Counters" diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T index 5000a914db..cd70132610 100644 --- a/testsuite/tests/rts/all.T +++ b/testsuite/tests/rts/all.T @@ -390,3 +390,5 @@ test('T14702', [ ignore_stdout , compile_and_run, ['']) test('T14900', normal, compile_and_run, ['-package ghc-compact']) +test('InternalCounters', normal, run_command, + ['$MAKE -s --no-print-directory InternalCounters']) |