diff options
author | Simon Marlow <marlowsd@gmail.com> | 2016-11-25 16:45:43 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2016-12-06 15:25:50 +0000 |
commit | 24e6594cc7890babe69b8ba122d171affabad2d1 (patch) | |
tree | 0efef02a3e03787e9e6ee9822cb20efc7d48fec5 /includes | |
parent | eec02ab7c8433465cc8d6be0a8889e7c6a222fb0 (diff) | |
download | haskell-24e6594cc7890babe69b8ba122d171affabad2d1.tar.gz |
Overhaul GC stats
Summary:
Visible API changes:
* The C struct `GCDetails` gives the stats about a single GC. This is
passed to the `gcDone()` callback if one is set via the
RtsConfig. (previously we just passed a collection of values, so this
is more extensible, at the expense of breaking the existing API)
* `RTSStats` gives cumulative stats since the start of the program,
and includes the `GCDetails` for the most recent GC. This struct
can be obtained via `getRTSStats()` (the old `getGCStats()` has been
removed, and `getGCStatsEnabled()` has been renamed to
`getRTSStatsEnabled()`)
Improvements:
* The per-GC stats and cumulative stats are now cleanly separated.
* Inside the RTS we have a top-level `RTSStats` struct to keep all our
stats in, previously this was just a collection of strangely-named
variables. This struct is mostly just copied in `getRTSStats()`, so
the implementation of that function is a lot shorter.
* Types are more consistent. We use a uint64_t byte count for all
memory values, and Time for all time values.
* Names are more consistent. We use a suffix `_bytes` for all byte
counts and `_ns` for all time values.
* We now collect information about the amount of memory in large
objects and compact objects in `GCDetails`. (the latter was the reason
I started doing this patch but it seems to have ballooned a bit!)
* I fixed a bug in the calculation of the elapsed MUT time, and added
an ASSERT to stop the calculations going wrong in the future.
For now I kept the Haskell API in `GHC.Stats` the same, by
impedence-matching with the new API. We could either break that API
and make it match the C API more closely, or we could add a new API
and deprecate the old one. Opinions welcome.
This stuff is very easy to get wrong, and it's hard to test. Reviews
welcome!
Test Plan:
manual testing
validate
Reviewers: bgamari, niteria, austin, ezyang, hvr, erikd, rwbarton, Phyx
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D2756
Diffstat (limited to 'includes')
-rw-r--r-- | includes/Rts.h | 33 | ||||
-rw-r--r-- | includes/RtsAPI.h | 115 | ||||
-rw-r--r-- | includes/rts/Time.h | 43 | ||||
-rw-r--r-- | includes/rts/storage/GC.h | 55 |
4 files changed, 150 insertions, 96 deletions
diff --git a/includes/Rts.h b/includes/Rts.h index be81b0d9c7..0599df655c 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -39,6 +39,7 @@ extern "C" { #endif #include "rts/Types.h" +#include "rts/Time.h" #if __GNUC__ >= 3 #define ATTRIBUTE_ALIGNED(n) __attribute__((aligned(n))) @@ -145,38 +146,6 @@ void _assertFail(const char *filename, unsigned int linenum) #define FMT_HexSizeT "zx" /* ----------------------------------------------------------------------------- - Time values in the RTS - -------------------------------------------------------------------------- */ - -// For most time values in the RTS we use a fixed resolution of nanoseconds, -// normalising the time we get from platform-dependent APIs to this -// resolution. -#define TIME_RESOLUTION 1000000000 -typedef StgInt64 Time; - -#define TIME_MAX HS_INT64_MAX - -#if TIME_RESOLUTION == 1000000000 -// I'm being lazy, but it's awkward to define fully general versions of these -#define TimeToUS(t) ((t) / 1000) -#define TimeToNS(t) (t) -#define USToTime(t) ((Time)(t) * 1000) -#define NSToTime(t) ((Time)(t)) -#else -#error Fix TimeToNS(), TimeToUS() etc. -#endif - -#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION) -#define TimeToSeconds(t) ((t) / TIME_RESOLUTION) - -// Use instead of SecondsToTime() when we have a floating-point -// seconds value, to avoid truncating it. -INLINE_HEADER Time fsecondsToTime (double t) -{ - return (Time)(t * TIME_RESOLUTION); -} - -/* ----------------------------------------------------------------------------- Include everything STG-ish -------------------------------------------------------------------------- */ diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h index 4dccb84fd2..2c68219b8b 100644 --- a/includes/RtsAPI.h +++ b/includes/RtsAPI.h @@ -17,6 +17,7 @@ extern "C" { #endif #include "HsFFI.h" +#include "rts/Time.h" /* * Running the scheduler @@ -56,6 +57,8 @@ typedef enum { RtsOptsAll // all RTS options allowed } RtsOptsEnabledEnum; +struct GCDetails_; + // The RtsConfig struct is passed (by value) to hs_init_ghc(). The // reason for using a struct is extensibility: we can add more // fields to this later without breaking existing client code. @@ -93,15 +96,7 @@ typedef struct { void (* mallocFailHook) (W_ request_size /* in bytes */, const char *msg); // Called for every GC - void (* gcDoneHook) (unsigned int gen, - W_ allocated_bytes, /* since last GC */ - W_ live_bytes, - W_ copied_bytes, - W_ max_copied_per_thread_bytes, - W_ total_bytes, - W_ slop_bytes, - W_ sync_elapsed_ns, W_ elapsed_ns, W_ cpu_ns); - + void (* gcDoneHook) (const struct GCDetails_ *stats); } RtsConfig; // Clients should start with defaultRtsConfig and then customise it. @@ -109,6 +104,108 @@ typedef struct { // you can't do that in C (it generates code). extern const RtsConfig defaultRtsConfig; +/* ----------------------------------------------------------------------------- + Statistics + -------------------------------------------------------------------------- */ + +// +// Stats about a single GC +// +typedef struct GCDetails_ { + // The generation number of this GC + uint32_t gen; + // Number of threads used in this GC + uint32_t threads; + // Number of bytes allocated since the previous GC + uint64_t allocated_bytes; + // Total amount of live data in the heap (incliudes large + compact data) + uint64_t live_bytes; + // Total amount of live data in large objects + uint64_t large_objects_bytes; + // Total amount of live data in compact regions + uint64_t compact_bytes; + // Total amount of slop (wasted memory) + uint64_t slop_bytes; + // Total amount of memory in use by the RTS + uint64_t mem_in_use_bytes; + // Total amount of data copied during this GC + uint64_t copied_bytes; + // In parallel GC, the max amount of data copied by any one thread + uint64_t par_max_copied_bytes; + // The time elapsed during synchronisation before GC + Time sync_elapsed_ns; + // The CPU time used during GC itself + Time cpu_ns; + // The time elapsed during GC itself + Time elapsed_ns; +} GCDetails; + +// +// Stats about the RTS currently, and since the start of execution +// +typedef struct _RTSStats { + + // ----------------------------------- + // Cumulative stats about memory use + + // Total number of GCs + uint32_t gcs; + // Total number of major (oldest generation) GCs + uint32_t major_gcs; + // Total bytes allocated + uint64_t allocated_bytes; + // Maximum live data (including large objects + compact regions) + uint64_t max_live_bytes; + // Maximum live data in large objects + uint64_t max_large_objects_bytes; + // Maximum live data in compact regions + uint64_t max_compact_bytes; + // Maximum slop + uint64_t max_slop_bytes; + // Maximum memory in use by the RTS + uint64_t max_mem_in_use_bytes; + // Sum of live bytes across all major GCs. Divided by major_gcs + // gives the average live data over the lifetime of the program. + uint64_t cumulative_live_bytes; + // Sum of copied_bytes across all GCs + uint64_t copied_bytes; + // Sum of copied_bytes across all parallel GCs + uint64_t par_copied_bytes; + // Sum of par_max_copied_bytes across all parallel GCs + uint64_t cumulative_par_max_copied_bytes; + + // ----------------------------------- + // Cumulative stats about time use + // (we use signed values here because due to inacuracies in timers + // the values can occasionally go slightly negative) + + // Total CPU time used by the mutator + Time mutator_cpu_ns; + // Total elapsed time used by the mutator + Time mutator_elapsed_ns; + // Total CPU time used by the GC + Time gc_cpu_ns; + // Total elapsed time used by the GC + Time gc_elapsed_ns; + // Total CPU time (at the previous GC) + Time cpu_ns; + // Total elapsed time (at the previous GC) + Time elapsed_ns; + + // ----------------------------------- + // Stats about the most recent GC + + GCDetails gc; + +} RTSStats; + +void getRTSStats (RTSStats *s); +int getRTSStatsEnabled (void); + +// Returns the total number of bytes allocated since the start of the program. +// TODO: can we remove this? +uint64_t getAllocations (void); + /* ---------------------------------------------------------------------------- Starting up and shutting down the Haskell RTS. ------------------------------------------------------------------------- */ diff --git a/includes/rts/Time.h b/includes/rts/Time.h new file mode 100644 index 0000000000..a1debedea0 --- /dev/null +++ b/includes/rts/Time.h @@ -0,0 +1,43 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * Time values in the RTS + * + * To understand the structure of the RTS headers, see the wiki: + * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes + * + * --------------------------------------------------------------------------*/ + +#ifndef RTSTIME_H +#define RTSTIME_H + +// For most time values in the RTS we use a fixed resolution of nanoseconds, +// normalising the time we get from platform-dependent APIs to this +// resolution. +#define TIME_RESOLUTION 1000000000 +typedef int64_t Time; + +#define TIME_MAX HS_INT64_MAX + +#if TIME_RESOLUTION == 1000000000 +// I'm being lazy, but it's awkward to define fully general versions of these +#define TimeToUS(t) ((t) / 1000) +#define TimeToNS(t) (t) +#define USToTime(t) ((Time)(t) * 1000) +#define NSToTime(t) ((Time)(t)) +#else +#error Fix TimeToNS(), TimeToUS() etc. +#endif + +#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION) +#define TimeToSeconds(t) ((t) / TIME_RESOLUTION) + +// Use instead of SecondsToTime() when we have a floating-point +// seconds value, to avoid truncating it. +INLINE_HEADER Time fsecondsToTime (double t) +{ + return (Time)(t * TIME_RESOLUTION); +} + +#endif // RTSTIME_H diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index f15fd2a7cf..ddc4238592 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -223,61 +223,6 @@ void revertCAFs (void); void setKeepCAFs (void); /* ----------------------------------------------------------------------------- - Stats - -------------------------------------------------------------------------- */ - -typedef struct _GCStats { - StgWord64 bytes_allocated; - StgWord64 num_gcs; - StgWord64 num_byte_usage_samples; - StgWord64 max_bytes_used; - StgWord64 cumulative_bytes_used; - StgWord64 bytes_copied; - StgWord64 current_bytes_used; - StgWord64 current_bytes_slop; - StgWord64 max_bytes_slop; - StgWord64 peak_megabytes_allocated; - StgWord64 mblocks_allocated; - StgWord64 par_tot_bytes_copied; - StgWord64 par_max_bytes_copied; - StgDouble mutator_cpu_seconds; - StgDouble mutator_wall_seconds; - StgDouble gc_cpu_seconds; - StgDouble gc_wall_seconds; - StgDouble cpu_seconds; - StgDouble wall_seconds; -} GCStats; -void getGCStats (GCStats *s); -bool getGCStatsEnabled (void); - -// These don't change over execution, so do them elsewhere -// StgDouble init_cpu_seconds; -// StgDouble init_wall_seconds; - -typedef struct _ParGCStats { - StgWord64 tot_copied; - StgWord64 max_copied; -} ParGCStats; -void getParGCStats (ParGCStats *s); - -/* -typedef struct _TaskStats { - StgWord64 mut_time; - StgWord64 mut_etime; - StgWord64 gc_time; - StgWord64 gc_etime; -} TaskStats; -// would need to allocate arbitrarily large amount of memory -// because it's a linked list of results -void getTaskStats (TaskStats **s); -// Need to stuff SparkCounters in a public header file... -void getSparkStats (SparkCounters *s); -*/ - -// Returns the total number of bytes allocated since the start of the program. -HsInt64 getAllocations (void); - -/* ----------------------------------------------------------------------------- This is the write barrier for MUT_VARs, a.k.a. IORefs. A MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY |