summaryrefslogtreecommitdiff
path: root/includes
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-11-25 16:45:43 +0000
committerSimon Marlow <marlowsd@gmail.com>2016-12-06 15:25:50 +0000
commit24e6594cc7890babe69b8ba122d171affabad2d1 (patch)
tree0efef02a3e03787e9e6ee9822cb20efc7d48fec5 /includes
parenteec02ab7c8433465cc8d6be0a8889e7c6a222fb0 (diff)
downloadhaskell-24e6594cc7890babe69b8ba122d171affabad2d1.tar.gz
Overhaul GC stats
Summary: Visible API changes: * The C struct `GCDetails` gives the stats about a single GC. This is passed to the `gcDone()` callback if one is set via the RtsConfig. (previously we just passed a collection of values, so this is more extensible, at the expense of breaking the existing API) * `RTSStats` gives cumulative stats since the start of the program, and includes the `GCDetails` for the most recent GC. This struct can be obtained via `getRTSStats()` (the old `getGCStats()` has been removed, and `getGCStatsEnabled()` has been renamed to `getRTSStatsEnabled()`) Improvements: * The per-GC stats and cumulative stats are now cleanly separated. * Inside the RTS we have a top-level `RTSStats` struct to keep all our stats in, previously this was just a collection of strangely-named variables. This struct is mostly just copied in `getRTSStats()`, so the implementation of that function is a lot shorter. * Types are more consistent. We use a uint64_t byte count for all memory values, and Time for all time values. * Names are more consistent. We use a suffix `_bytes` for all byte counts and `_ns` for all time values. * We now collect information about the amount of memory in large objects and compact objects in `GCDetails`. (the latter was the reason I started doing this patch but it seems to have ballooned a bit!) * I fixed a bug in the calculation of the elapsed MUT time, and added an ASSERT to stop the calculations going wrong in the future. For now I kept the Haskell API in `GHC.Stats` the same, by impedence-matching with the new API. We could either break that API and make it match the C API more closely, or we could add a new API and deprecate the old one. Opinions welcome. This stuff is very easy to get wrong, and it's hard to test. Reviews welcome! Test Plan: manual testing validate Reviewers: bgamari, niteria, austin, ezyang, hvr, erikd, rwbarton, Phyx Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2756
Diffstat (limited to 'includes')
-rw-r--r--includes/Rts.h33
-rw-r--r--includes/RtsAPI.h115
-rw-r--r--includes/rts/Time.h43
-rw-r--r--includes/rts/storage/GC.h55
4 files changed, 150 insertions, 96 deletions
diff --git a/includes/Rts.h b/includes/Rts.h
index be81b0d9c7..0599df655c 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -39,6 +39,7 @@ extern "C" {
#endif
#include "rts/Types.h"
+#include "rts/Time.h"
#if __GNUC__ >= 3
#define ATTRIBUTE_ALIGNED(n) __attribute__((aligned(n)))
@@ -145,38 +146,6 @@ void _assertFail(const char *filename, unsigned int linenum)
#define FMT_HexSizeT "zx"
/* -----------------------------------------------------------------------------
- Time values in the RTS
- -------------------------------------------------------------------------- */
-
-// For most time values in the RTS we use a fixed resolution of nanoseconds,
-// normalising the time we get from platform-dependent APIs to this
-// resolution.
-#define TIME_RESOLUTION 1000000000
-typedef StgInt64 Time;
-
-#define TIME_MAX HS_INT64_MAX
-
-#if TIME_RESOLUTION == 1000000000
-// I'm being lazy, but it's awkward to define fully general versions of these
-#define TimeToUS(t) ((t) / 1000)
-#define TimeToNS(t) (t)
-#define USToTime(t) ((Time)(t) * 1000)
-#define NSToTime(t) ((Time)(t))
-#else
-#error Fix TimeToNS(), TimeToUS() etc.
-#endif
-
-#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION)
-#define TimeToSeconds(t) ((t) / TIME_RESOLUTION)
-
-// Use instead of SecondsToTime() when we have a floating-point
-// seconds value, to avoid truncating it.
-INLINE_HEADER Time fsecondsToTime (double t)
-{
- return (Time)(t * TIME_RESOLUTION);
-}
-
-/* -----------------------------------------------------------------------------
Include everything STG-ish
-------------------------------------------------------------------------- */
diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h
index 4dccb84fd2..2c68219b8b 100644
--- a/includes/RtsAPI.h
+++ b/includes/RtsAPI.h
@@ -17,6 +17,7 @@ extern "C" {
#endif
#include "HsFFI.h"
+#include "rts/Time.h"
/*
* Running the scheduler
@@ -56,6 +57,8 @@ typedef enum {
RtsOptsAll // all RTS options allowed
} RtsOptsEnabledEnum;
+struct GCDetails_;
+
// The RtsConfig struct is passed (by value) to hs_init_ghc(). The
// reason for using a struct is extensibility: we can add more
// fields to this later without breaking existing client code.
@@ -93,15 +96,7 @@ typedef struct {
void (* mallocFailHook) (W_ request_size /* in bytes */, const char *msg);
// Called for every GC
- void (* gcDoneHook) (unsigned int gen,
- W_ allocated_bytes, /* since last GC */
- W_ live_bytes,
- W_ copied_bytes,
- W_ max_copied_per_thread_bytes,
- W_ total_bytes,
- W_ slop_bytes,
- W_ sync_elapsed_ns, W_ elapsed_ns, W_ cpu_ns);
-
+ void (* gcDoneHook) (const struct GCDetails_ *stats);
} RtsConfig;
// Clients should start with defaultRtsConfig and then customise it.
@@ -109,6 +104,108 @@ typedef struct {
// you can't do that in C (it generates code).
extern const RtsConfig defaultRtsConfig;
+/* -----------------------------------------------------------------------------
+ Statistics
+ -------------------------------------------------------------------------- */
+
+//
+// Stats about a single GC
+//
+typedef struct GCDetails_ {
+ // The generation number of this GC
+ uint32_t gen;
+ // Number of threads used in this GC
+ uint32_t threads;
+ // Number of bytes allocated since the previous GC
+ uint64_t allocated_bytes;
+ // Total amount of live data in the heap (incliudes large + compact data)
+ uint64_t live_bytes;
+ // Total amount of live data in large objects
+ uint64_t large_objects_bytes;
+ // Total amount of live data in compact regions
+ uint64_t compact_bytes;
+ // Total amount of slop (wasted memory)
+ uint64_t slop_bytes;
+ // Total amount of memory in use by the RTS
+ uint64_t mem_in_use_bytes;
+ // Total amount of data copied during this GC
+ uint64_t copied_bytes;
+ // In parallel GC, the max amount of data copied by any one thread
+ uint64_t par_max_copied_bytes;
+ // The time elapsed during synchronisation before GC
+ Time sync_elapsed_ns;
+ // The CPU time used during GC itself
+ Time cpu_ns;
+ // The time elapsed during GC itself
+ Time elapsed_ns;
+} GCDetails;
+
+//
+// Stats about the RTS currently, and since the start of execution
+//
+typedef struct _RTSStats {
+
+ // -----------------------------------
+ // Cumulative stats about memory use
+
+ // Total number of GCs
+ uint32_t gcs;
+ // Total number of major (oldest generation) GCs
+ uint32_t major_gcs;
+ // Total bytes allocated
+ uint64_t allocated_bytes;
+ // Maximum live data (including large objects + compact regions)
+ uint64_t max_live_bytes;
+ // Maximum live data in large objects
+ uint64_t max_large_objects_bytes;
+ // Maximum live data in compact regions
+ uint64_t max_compact_bytes;
+ // Maximum slop
+ uint64_t max_slop_bytes;
+ // Maximum memory in use by the RTS
+ uint64_t max_mem_in_use_bytes;
+ // Sum of live bytes across all major GCs. Divided by major_gcs
+ // gives the average live data over the lifetime of the program.
+ uint64_t cumulative_live_bytes;
+ // Sum of copied_bytes across all GCs
+ uint64_t copied_bytes;
+ // Sum of copied_bytes across all parallel GCs
+ uint64_t par_copied_bytes;
+ // Sum of par_max_copied_bytes across all parallel GCs
+ uint64_t cumulative_par_max_copied_bytes;
+
+ // -----------------------------------
+ // Cumulative stats about time use
+ // (we use signed values here because due to inacuracies in timers
+ // the values can occasionally go slightly negative)
+
+ // Total CPU time used by the mutator
+ Time mutator_cpu_ns;
+ // Total elapsed time used by the mutator
+ Time mutator_elapsed_ns;
+ // Total CPU time used by the GC
+ Time gc_cpu_ns;
+ // Total elapsed time used by the GC
+ Time gc_elapsed_ns;
+ // Total CPU time (at the previous GC)
+ Time cpu_ns;
+ // Total elapsed time (at the previous GC)
+ Time elapsed_ns;
+
+ // -----------------------------------
+ // Stats about the most recent GC
+
+ GCDetails gc;
+
+} RTSStats;
+
+void getRTSStats (RTSStats *s);
+int getRTSStatsEnabled (void);
+
+// Returns the total number of bytes allocated since the start of the program.
+// TODO: can we remove this?
+uint64_t getAllocations (void);
+
/* ----------------------------------------------------------------------------
Starting up and shutting down the Haskell RTS.
------------------------------------------------------------------------- */
diff --git a/includes/rts/Time.h b/includes/rts/Time.h
new file mode 100644
index 0000000000..a1debedea0
--- /dev/null
+++ b/includes/rts/Time.h
@@ -0,0 +1,43 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Time values in the RTS
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
+ *
+ * --------------------------------------------------------------------------*/
+
+#ifndef RTSTIME_H
+#define RTSTIME_H
+
+// For most time values in the RTS we use a fixed resolution of nanoseconds,
+// normalising the time we get from platform-dependent APIs to this
+// resolution.
+#define TIME_RESOLUTION 1000000000
+typedef int64_t Time;
+
+#define TIME_MAX HS_INT64_MAX
+
+#if TIME_RESOLUTION == 1000000000
+// I'm being lazy, but it's awkward to define fully general versions of these
+#define TimeToUS(t) ((t) / 1000)
+#define TimeToNS(t) (t)
+#define USToTime(t) ((Time)(t) * 1000)
+#define NSToTime(t) ((Time)(t))
+#else
+#error Fix TimeToNS(), TimeToUS() etc.
+#endif
+
+#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION)
+#define TimeToSeconds(t) ((t) / TIME_RESOLUTION)
+
+// Use instead of SecondsToTime() when we have a floating-point
+// seconds value, to avoid truncating it.
+INLINE_HEADER Time fsecondsToTime (double t)
+{
+ return (Time)(t * TIME_RESOLUTION);
+}
+
+#endif // RTSTIME_H
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index f15fd2a7cf..ddc4238592 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -223,61 +223,6 @@ void revertCAFs (void);
void setKeepCAFs (void);
/* -----------------------------------------------------------------------------
- Stats
- -------------------------------------------------------------------------- */
-
-typedef struct _GCStats {
- StgWord64 bytes_allocated;
- StgWord64 num_gcs;
- StgWord64 num_byte_usage_samples;
- StgWord64 max_bytes_used;
- StgWord64 cumulative_bytes_used;
- StgWord64 bytes_copied;
- StgWord64 current_bytes_used;
- StgWord64 current_bytes_slop;
- StgWord64 max_bytes_slop;
- StgWord64 peak_megabytes_allocated;
- StgWord64 mblocks_allocated;
- StgWord64 par_tot_bytes_copied;
- StgWord64 par_max_bytes_copied;
- StgDouble mutator_cpu_seconds;
- StgDouble mutator_wall_seconds;
- StgDouble gc_cpu_seconds;
- StgDouble gc_wall_seconds;
- StgDouble cpu_seconds;
- StgDouble wall_seconds;
-} GCStats;
-void getGCStats (GCStats *s);
-bool getGCStatsEnabled (void);
-
-// These don't change over execution, so do them elsewhere
-// StgDouble init_cpu_seconds;
-// StgDouble init_wall_seconds;
-
-typedef struct _ParGCStats {
- StgWord64 tot_copied;
- StgWord64 max_copied;
-} ParGCStats;
-void getParGCStats (ParGCStats *s);
-
-/*
-typedef struct _TaskStats {
- StgWord64 mut_time;
- StgWord64 mut_etime;
- StgWord64 gc_time;
- StgWord64 gc_etime;
-} TaskStats;
-// would need to allocate arbitrarily large amount of memory
-// because it's a linked list of results
-void getTaskStats (TaskStats **s);
-// Need to stuff SparkCounters in a public header file...
-void getSparkStats (SparkCounters *s);
-*/
-
-// Returns the total number of bytes allocated since the start of the program.
-HsInt64 getAllocations (void);
-
-/* -----------------------------------------------------------------------------
This is the write barrier for MUT_VARs, a.k.a. IORefs. A
MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY