summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/Rts.h33
-rw-r--r--includes/RtsAPI.h115
-rw-r--r--includes/rts/Time.h43
-rw-r--r--includes/rts/storage/GC.h55
-rw-r--r--libraries/base/GHC/Stats.hsc260
-rw-r--r--rts/RtsSymbols.c4
-rw-r--r--rts/Stats.c437
-rw-r--r--rts/Stats.h2
-rw-r--r--rts/sm/GC.c7
-rw-r--r--rts/sm/Storage.c22
-rw-r--r--rts/sm/Storage.h4
11 files changed, 598 insertions, 384 deletions
diff --git a/includes/Rts.h b/includes/Rts.h
index be81b0d9c7..0599df655c 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -39,6 +39,7 @@ extern "C" {
#endif
#include "rts/Types.h"
+#include "rts/Time.h"
#if __GNUC__ >= 3
#define ATTRIBUTE_ALIGNED(n) __attribute__((aligned(n)))
@@ -145,38 +146,6 @@ void _assertFail(const char *filename, unsigned int linenum)
#define FMT_HexSizeT "zx"
/* -----------------------------------------------------------------------------
- Time values in the RTS
- -------------------------------------------------------------------------- */
-
-// For most time values in the RTS we use a fixed resolution of nanoseconds,
-// normalising the time we get from platform-dependent APIs to this
-// resolution.
-#define TIME_RESOLUTION 1000000000
-typedef StgInt64 Time;
-
-#define TIME_MAX HS_INT64_MAX
-
-#if TIME_RESOLUTION == 1000000000
-// I'm being lazy, but it's awkward to define fully general versions of these
-#define TimeToUS(t) ((t) / 1000)
-#define TimeToNS(t) (t)
-#define USToTime(t) ((Time)(t) * 1000)
-#define NSToTime(t) ((Time)(t))
-#else
-#error Fix TimeToNS(), TimeToUS() etc.
-#endif
-
-#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION)
-#define TimeToSeconds(t) ((t) / TIME_RESOLUTION)
-
-// Use instead of SecondsToTime() when we have a floating-point
-// seconds value, to avoid truncating it.
-INLINE_HEADER Time fsecondsToTime (double t)
-{
- return (Time)(t * TIME_RESOLUTION);
-}
-
-/* -----------------------------------------------------------------------------
Include everything STG-ish
-------------------------------------------------------------------------- */
diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h
index 4dccb84fd2..2c68219b8b 100644
--- a/includes/RtsAPI.h
+++ b/includes/RtsAPI.h
@@ -17,6 +17,7 @@ extern "C" {
#endif
#include "HsFFI.h"
+#include "rts/Time.h"
/*
* Running the scheduler
@@ -56,6 +57,8 @@ typedef enum {
RtsOptsAll // all RTS options allowed
} RtsOptsEnabledEnum;
+struct GCDetails_;
+
// The RtsConfig struct is passed (by value) to hs_init_ghc(). The
// reason for using a struct is extensibility: we can add more
// fields to this later without breaking existing client code.
@@ -93,15 +96,7 @@ typedef struct {
void (* mallocFailHook) (W_ request_size /* in bytes */, const char *msg);
// Called for every GC
- void (* gcDoneHook) (unsigned int gen,
- W_ allocated_bytes, /* since last GC */
- W_ live_bytes,
- W_ copied_bytes,
- W_ max_copied_per_thread_bytes,
- W_ total_bytes,
- W_ slop_bytes,
- W_ sync_elapsed_ns, W_ elapsed_ns, W_ cpu_ns);
-
+ void (* gcDoneHook) (const struct GCDetails_ *stats);
} RtsConfig;
// Clients should start with defaultRtsConfig and then customise it.
@@ -109,6 +104,108 @@ typedef struct {
// you can't do that in C (it generates code).
extern const RtsConfig defaultRtsConfig;
+/* -----------------------------------------------------------------------------
+ Statistics
+ -------------------------------------------------------------------------- */
+
+//
+// Stats about a single GC
+//
+typedef struct GCDetails_ {
+ // The generation number of this GC
+ uint32_t gen;
+ // Number of threads used in this GC
+ uint32_t threads;
+ // Number of bytes allocated since the previous GC
+ uint64_t allocated_bytes;
+ // Total amount of live data in the heap (incliudes large + compact data)
+ uint64_t live_bytes;
+ // Total amount of live data in large objects
+ uint64_t large_objects_bytes;
+ // Total amount of live data in compact regions
+ uint64_t compact_bytes;
+ // Total amount of slop (wasted memory)
+ uint64_t slop_bytes;
+ // Total amount of memory in use by the RTS
+ uint64_t mem_in_use_bytes;
+ // Total amount of data copied during this GC
+ uint64_t copied_bytes;
+ // In parallel GC, the max amount of data copied by any one thread
+ uint64_t par_max_copied_bytes;
+ // The time elapsed during synchronisation before GC
+ Time sync_elapsed_ns;
+ // The CPU time used during GC itself
+ Time cpu_ns;
+ // The time elapsed during GC itself
+ Time elapsed_ns;
+} GCDetails;
+
+//
+// Stats about the RTS currently, and since the start of execution
+//
+typedef struct _RTSStats {
+
+ // -----------------------------------
+ // Cumulative stats about memory use
+
+ // Total number of GCs
+ uint32_t gcs;
+ // Total number of major (oldest generation) GCs
+ uint32_t major_gcs;
+ // Total bytes allocated
+ uint64_t allocated_bytes;
+ // Maximum live data (including large objects + compact regions)
+ uint64_t max_live_bytes;
+ // Maximum live data in large objects
+ uint64_t max_large_objects_bytes;
+ // Maximum live data in compact regions
+ uint64_t max_compact_bytes;
+ // Maximum slop
+ uint64_t max_slop_bytes;
+ // Maximum memory in use by the RTS
+ uint64_t max_mem_in_use_bytes;
+ // Sum of live bytes across all major GCs. Divided by major_gcs
+ // gives the average live data over the lifetime of the program.
+ uint64_t cumulative_live_bytes;
+ // Sum of copied_bytes across all GCs
+ uint64_t copied_bytes;
+ // Sum of copied_bytes across all parallel GCs
+ uint64_t par_copied_bytes;
+ // Sum of par_max_copied_bytes across all parallel GCs
+ uint64_t cumulative_par_max_copied_bytes;
+
+ // -----------------------------------
+ // Cumulative stats about time use
+ // (we use signed values here because due to inacuracies in timers
+ // the values can occasionally go slightly negative)
+
+ // Total CPU time used by the mutator
+ Time mutator_cpu_ns;
+ // Total elapsed time used by the mutator
+ Time mutator_elapsed_ns;
+ // Total CPU time used by the GC
+ Time gc_cpu_ns;
+ // Total elapsed time used by the GC
+ Time gc_elapsed_ns;
+ // Total CPU time (at the previous GC)
+ Time cpu_ns;
+ // Total elapsed time (at the previous GC)
+ Time elapsed_ns;
+
+ // -----------------------------------
+ // Stats about the most recent GC
+
+ GCDetails gc;
+
+} RTSStats;
+
+void getRTSStats (RTSStats *s);
+int getRTSStatsEnabled (void);
+
+// Returns the total number of bytes allocated since the start of the program.
+// TODO: can we remove this?
+uint64_t getAllocations (void);
+
/* ----------------------------------------------------------------------------
Starting up and shutting down the Haskell RTS.
------------------------------------------------------------------------- */
diff --git a/includes/rts/Time.h b/includes/rts/Time.h
new file mode 100644
index 0000000000..a1debedea0
--- /dev/null
+++ b/includes/rts/Time.h
@@ -0,0 +1,43 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Time values in the RTS
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
+ *
+ * --------------------------------------------------------------------------*/
+
+#ifndef RTSTIME_H
+#define RTSTIME_H
+
+// For most time values in the RTS we use a fixed resolution of nanoseconds,
+// normalising the time we get from platform-dependent APIs to this
+// resolution.
+#define TIME_RESOLUTION 1000000000
+typedef int64_t Time;
+
+#define TIME_MAX HS_INT64_MAX
+
+#if TIME_RESOLUTION == 1000000000
+// I'm being lazy, but it's awkward to define fully general versions of these
+#define TimeToUS(t) ((t) / 1000)
+#define TimeToNS(t) (t)
+#define USToTime(t) ((Time)(t) * 1000)
+#define NSToTime(t) ((Time)(t))
+#else
+#error Fix TimeToNS(), TimeToUS() etc.
+#endif
+
+#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION)
+#define TimeToSeconds(t) ((t) / TIME_RESOLUTION)
+
+// Use instead of SecondsToTime() when we have a floating-point
+// seconds value, to avoid truncating it.
+INLINE_HEADER Time fsecondsToTime (double t)
+{
+ return (Time)(t * TIME_RESOLUTION);
+}
+
+#endif // RTSTIME_H
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index f15fd2a7cf..ddc4238592 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -223,61 +223,6 @@ void revertCAFs (void);
void setKeepCAFs (void);
/* -----------------------------------------------------------------------------
- Stats
- -------------------------------------------------------------------------- */
-
-typedef struct _GCStats {
- StgWord64 bytes_allocated;
- StgWord64 num_gcs;
- StgWord64 num_byte_usage_samples;
- StgWord64 max_bytes_used;
- StgWord64 cumulative_bytes_used;
- StgWord64 bytes_copied;
- StgWord64 current_bytes_used;
- StgWord64 current_bytes_slop;
- StgWord64 max_bytes_slop;
- StgWord64 peak_megabytes_allocated;
- StgWord64 mblocks_allocated;
- StgWord64 par_tot_bytes_copied;
- StgWord64 par_max_bytes_copied;
- StgDouble mutator_cpu_seconds;
- StgDouble mutator_wall_seconds;
- StgDouble gc_cpu_seconds;
- StgDouble gc_wall_seconds;
- StgDouble cpu_seconds;
- StgDouble wall_seconds;
-} GCStats;
-void getGCStats (GCStats *s);
-bool getGCStatsEnabled (void);
-
-// These don't change over execution, so do them elsewhere
-// StgDouble init_cpu_seconds;
-// StgDouble init_wall_seconds;
-
-typedef struct _ParGCStats {
- StgWord64 tot_copied;
- StgWord64 max_copied;
-} ParGCStats;
-void getParGCStats (ParGCStats *s);
-
-/*
-typedef struct _TaskStats {
- StgWord64 mut_time;
- StgWord64 mut_etime;
- StgWord64 gc_time;
- StgWord64 gc_etime;
-} TaskStats;
-// would need to allocate arbitrarily large amount of memory
-// because it's a linked list of results
-void getTaskStats (TaskStats **s);
-// Need to stuff SparkCounters in a public header file...
-void getSparkStats (SparkCounters *s);
-*/
-
-// Returns the total number of bytes allocated since the start of the program.
-HsInt64 getAllocations (void);
-
-/* -----------------------------------------------------------------------------
This is the write barrier for MUT_VARs, a.k.a. IORefs. A
MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
diff --git a/libraries/base/GHC/Stats.hsc b/libraries/base/GHC/Stats.hsc
index a8b43efc8a..ab4012da08 100644
--- a/libraries/base/GHC/Stats.hsc
+++ b/libraries/base/GHC/Stats.hsc
@@ -13,14 +13,25 @@
-- @since 4.5.0.0
-----------------------------------------------------------------------------
module GHC.Stats
- ( GCStats(..)
+ (
+ -- * Runtime statistics
+ RTSStats(..), GCDetails(..)
+ , getRTSStats
+ , getRTSStatsEnabled
+
+ -- * DEPRECATED, don't use
+ , GCStats(..)
, getGCStats
, getGCStatsEnabled
) where
+import Control.Applicative
import Control.Monad
import Data.Int
+import Data.Word
import GHC.Base
+import GHC.Num (Num(..))
+import GHC.Real (quot, fromIntegral, (/))
import GHC.Read ( Read )
import GHC.Show ( Show )
import GHC.IO.Exception
@@ -30,12 +41,163 @@ import Foreign.Ptr
#include "Rts.h"
-foreign import ccall "getGCStats" getGCStats_ :: Ptr () -> IO ()
+foreign import ccall "getRTSStats" getRTSStats_ :: Ptr () -> IO ()
-- | Returns whether GC stats have been enabled (with @+RTS -T@, for example).
--
--- @since 4.6.0.0
-foreign import ccall "getGCStatsEnabled" getGCStatsEnabled :: IO Bool
+-- @since 4.9.0.0
+foreign import ccall "getRTSStatsEnabled" getRTSStatsEnabled :: IO Bool
+
+--
+-- | Statistics about runtime activity since the start of the
+-- program. This is a mirror of the C @struct RTSStats@ in @RtsAPI.h@
+--
+-- @since 4.9.0.0
+--
+data RTSStats = RTSStats {
+ -- -----------------------------------
+ -- Cumulative stats about memory use
+
+ -- | Total number of GCs
+ gcs :: Word32
+ -- | Total number of major (oldest generation) GCs
+ , major_gcs :: Word32
+ -- | Total bytes allocated
+ , allocated_bytes :: Word64
+ -- | Maximum live data (including large objects + compact regions)
+ , max_live_bytes :: Word64
+ -- | Maximum live data in large objects
+ , max_large_objects_bytes :: Word64
+ -- | Maximum live data in compact regions
+ , max_compact_bytes :: Word64
+ -- | Maximum slop
+ , max_slop_bytes :: Word64
+ -- | Maximum memory in use by the RTS
+ , max_mem_in_use_bytes :: Word64
+ -- | Sum of live bytes across all major GCs. Divided by major_gcs
+ -- gives the average live data over the lifetime of the program.
+ , cumulative_live_bytes :: Word64
+ -- | Sum of copied_bytes across all GCs
+ , copied_bytes :: Word64
+ -- | Sum of copied_bytes across all parallel GCs
+ , par_copied_bytes :: Word64
+ -- | Sum of par_max_copied_bytes across all parallel GCs
+ , cumulative_par_max_copied_bytes :: Word64
+
+ -- -----------------------------------
+ -- Cumulative stats about time use
+ -- (we use signed values here because due to inacuracies in timers
+ -- the values can occasionally go slightly negative)
+
+ -- | Total CPU time used by the mutator
+ , mutator_cpu_ns :: RtsTime
+ -- | Total elapsed time used by the mutator
+ , mutator_elapsed_ns :: RtsTime
+ -- | Total CPU time used by the GC
+ , gc_cpu_ns :: RtsTime
+ -- | Total elapsed time used by the GC
+ , gc_elapsed_ns :: RtsTime
+ -- | Total CPU time (at the previous GC)
+ , cpu_ns :: RtsTime
+ -- | Total elapsed time (at the previous GC)
+ , elapsed_ns :: RtsTime
+
+ -- | Details about the most recent GC
+ , gc :: GCDetails
+ }
+
+--
+-- | Statistics about a single GC. This is a mirror of the C @struct
+-- GCDetails@ in @RtsAPI.h@, with the field prefixed with @gc_@ to
+-- avoid collisions with 'RTSStats'.
+--
+data GCDetails = GCDetails {
+ -- | The generation number of this GC
+ gcdetails_gen :: Word32
+ -- | Number of threads used in this GC
+ , gcdetails_threads :: Word32
+ -- | Number of bytes allocated since the previous GC
+ , gcdetails_allocated_bytes :: Word64
+ -- | Total amount of live data in the heap (incliudes large + compact data)
+ , gcdetails_live_bytes :: Word64
+ -- | Total amount of live data in large objects
+ , gcdetails_large_objects_bytes :: Word64
+ -- | Total amount of live data in compact regions
+ , gcdetails_compact_bytes :: Word64
+ -- | Total amount of slop (wasted memory)
+ , gcdetails_slop_bytes :: Word64
+ -- | Total amount of memory in use by the RTS
+ , gcdetails_mem_in_use_bytes :: Word64
+ -- | Total amount of data copied during this GC
+ , gcdetails_copied_bytes :: Word64
+ -- | In parallel GC, the max amount of data copied by any one thread
+ , gcdetails_par_max_copied_bytes :: Word64
+ -- | The time elapsed during synchronisation before GC
+ , gcdetails_sync_elapsed_ns :: RtsTime
+ -- | The CPU time used during GC itself
+ , gcdetails_cpu_ns :: RtsTime
+ -- | The time elapsed during GC itself
+ , gcdetails_elapsed_ns :: RtsTime
+ }
+
+
+type RtsTime = Int64
+
+-- @since 4.9.0.0
+--
+getRTSStats :: IO RTSStats
+getRTSStats = do
+ statsEnabled <- getGCStatsEnabled
+ unless statsEnabled . ioError $ IOError
+ Nothing
+ UnsupportedOperation
+ ""
+ "getGCStats: GC stats not enabled. Use `+RTS -T -RTS' to enable them."
+ Nothing
+ Nothing
+ allocaBytes (#size RTSStats) $ \p -> do
+ getRTSStats_ p
+ gcs <- (# peek RTSStats, gcs) p
+ major_gcs <- (# peek RTSStats, major_gcs) p
+ allocated_bytes <- (# peek RTSStats, allocated_bytes) p
+ max_live_bytes <- (# peek RTSStats, max_live_bytes) p
+ max_large_objects_bytes <- (# peek RTSStats, max_large_objects_bytes) p
+ max_compact_bytes <- (# peek RTSStats, max_compact_bytes) p
+ max_slop_bytes <- (# peek RTSStats, max_slop_bytes) p
+ max_mem_in_use_bytes <- (# peek RTSStats, max_mem_in_use_bytes) p
+ cumulative_live_bytes <- (# peek RTSStats, cumulative_live_bytes) p
+ copied_bytes <- (# peek RTSStats, copied_bytes) p
+ par_copied_bytes <- (# peek RTSStats, par_copied_bytes) p
+ cumulative_par_max_copied_bytes <-
+ (# peek RTSStats, cumulative_par_max_copied_bytes) p
+ mutator_cpu_ns <- (# peek RTSStats, mutator_cpu_ns) p
+ mutator_elapsed_ns <- (# peek RTSStats, mutator_elapsed_ns) p
+ gc_cpu_ns <- (# peek RTSStats, gc_cpu_ns) p
+ gc_elapsed_ns <- (# peek RTSStats, gc_elapsed_ns) p
+ cpu_ns <- (# peek RTSStats, cpu_ns) p
+ elapsed_ns <- (# peek RTSStats, elapsed_ns) p
+ let pgc = (# ptr RTSStats, gc) p
+ gc <- do
+ gcdetails_gen <- (# peek GCDetails, gen) pgc
+ gcdetails_threads <- (# peek GCDetails, threads) pgc
+ gcdetails_allocated_bytes <- (# peek GCDetails, allocated_bytes) pgc
+ gcdetails_live_bytes <- (# peek GCDetails, live_bytes) pgc
+ gcdetails_large_objects_bytes <-
+ (# peek GCDetails, large_objects_bytes) pgc
+ gcdetails_compact_bytes <- (# peek GCDetails, compact_bytes) pgc
+ gcdetails_slop_bytes <- (# peek GCDetails, slop_bytes) pgc
+ gcdetails_mem_in_use_bytes <- (# peek GCDetails, mem_in_use_bytes) pgc
+ gcdetails_copied_bytes <- (# peek GCDetails, copied_bytes) pgc
+ gcdetails_par_max_copied_bytes <-
+ (# peek GCDetails, par_max_copied_bytes) pgc
+ gcdetails_sync_elapsed_ns <- (# peek GCDetails, sync_elapsed_ns) pgc
+ gcdetails_cpu_ns <- (# peek GCDetails, cpu_ns) pgc
+ gcdetails_elapsed_ns <- (# peek GCDetails, elapsed_ns) pgc
+ return GCDetails{..}
+ return RTSStats{..}
+
+-- -----------------------------------------------------------------------------
+-- DEPRECATED API
-- I'm probably violating a bucket of constraints here... oops.
@@ -44,6 +206,7 @@ foreign import ccall "getGCStatsEnabled" getGCStatsEnabled :: IO Bool
-- the program started.
--
-- @since 4.5.0.0
+{-# DEPRECATED GCStats "Use RTSStats instead. This will be removed in GHC 8.4.1" #-}
data GCStats = GCStats
{ -- | Total number of bytes allocated
bytesAllocated :: !Int64
@@ -100,16 +263,13 @@ data GCStats = GCStats
, parMaxBytesCopied :: !Int64
} deriving (Show, Read)
- {-
- , initCpuSeconds :: !Double
- , initWallSeconds :: !Double
- -}
-
-- | Retrieves garbage collection and memory statistics as of the last
-- garbage collection. If you would like your statistics as recent as
-- possible, first run a 'System.Mem.performGC'.
--
-- @since 4.5.0.0
+{-# DEPRECATED getGCStats
+ "Use getRTSStats instead. This will be removed in GHC 8.4.1" #-}
getGCStats :: IO GCStats
getGCStats = do
statsEnabled <- getGCStatsEnabled
@@ -120,56 +280,38 @@ getGCStats = do
"getGCStats: GC stats not enabled. Use `+RTS -T -RTS' to enable them."
Nothing
Nothing
- allocaBytes (#size GCStats) $ \p -> do
- getGCStats_ p
- bytesAllocated <- (# peek GCStats, bytes_allocated) p
- numGcs <- (# peek GCStats, num_gcs ) p
- numByteUsageSamples <- (# peek GCStats, num_byte_usage_samples ) p
- maxBytesUsed <- (# peek GCStats, max_bytes_used ) p
- cumulativeBytesUsed <- (# peek GCStats, cumulative_bytes_used ) p
- bytesCopied <- (# peek GCStats, bytes_copied ) p
- currentBytesUsed <- (# peek GCStats, current_bytes_used ) p
- currentBytesSlop <- (# peek GCStats, current_bytes_slop) p
- maxBytesSlop <- (# peek GCStats, max_bytes_slop) p
- peakMegabytesAllocated <- (# peek GCStats, peak_megabytes_allocated ) p
- mblocksAllocated <- (# peek GCStats, mblocks_allocated) p
- {-
- initCpuSeconds <- (# peek GCStats, init_cpu_seconds) p
- initWallSeconds <- (# peek GCStats, init_wall_seconds) p
- -}
- mutatorCpuSeconds <- (# peek GCStats, mutator_cpu_seconds) p
- mutatorWallSeconds <- (# peek GCStats, mutator_wall_seconds) p
- gcCpuSeconds <- (# peek GCStats, gc_cpu_seconds) p
- gcWallSeconds <- (# peek GCStats, gc_wall_seconds) p
- cpuSeconds <- (# peek GCStats, cpu_seconds) p
- wallSeconds <- (# peek GCStats, wall_seconds) p
- parTotBytesCopied <- (# peek GCStats, par_tot_bytes_copied) p
- parMaxBytesCopied <- (# peek GCStats, par_max_bytes_copied) p
+ allocaBytes (#size RTSStats) $ \p -> do
+ getRTSStats_ p
+ bytesAllocated <- (# peek RTSStats, allocated_bytes) p
+ numGcs <- (# peek RTSStats, gcs ) p
+ numByteUsageSamples <- (# peek RTSStats, major_gcs ) p
+ maxBytesUsed <- (# peek RTSStats, max_live_bytes ) p
+ cumulativeBytesUsed <- (# peek RTSStats, cumulative_live_bytes ) p
+ bytesCopied <- (# peek RTSStats, copied_bytes ) p
+ currentBytesUsed <- (# peek RTSStats, gc.live_bytes ) p
+ currentBytesSlop <- (# peek RTSStats, gc.slop_bytes) p
+ maxBytesSlop <- (# peek RTSStats, max_slop_bytes) p
+ peakMegabytesAllocated <- do
+ bytes <- (# peek RTSStats, max_mem_in_use_bytes ) p
+ return (bytes `quot` (1024*1024))
+ mblocksAllocated <- do
+ bytes <- (# peek RTSStats, gc.mem_in_use_bytes) p
+ return (bytes `quot` (1024*1024))
+ mutatorCpuSeconds <- nsToSecs <$> (# peek RTSStats, mutator_cpu_ns) p
+ mutatorWallSeconds <-
+ nsToSecs <$> (# peek RTSStats, mutator_elapsed_ns) p
+ gcCpuSeconds <- nsToSecs <$> (# peek RTSStats, gc_cpu_ns) p
+ gcWallSeconds <- nsToSecs <$> (# peek RTSStats, gc_elapsed_ns) p
+ cpuSeconds <- nsToSecs <$> (# peek RTSStats, cpu_ns) p
+ wallSeconds <- nsToSecs <$> (# peek RTSStats, elapsed_ns) p
+ parTotBytesCopied <- (# peek RTSStats, par_copied_bytes) p
+ parMaxBytesCopied <- (# peek RTSStats, cumulative_par_max_copied_bytes) p
return GCStats { .. }
-{-
-
--- Nontrivial to implement: TaskStats needs arbitrarily large
--- amounts of memory, spark stats wants to use SparkCounters
--- but that needs a new rts/ header.
-
-data TaskStats = TaskStats
- { taskMutCpuSeconds :: Int64
- , taskMutWallSeconds :: Int64
- , taskGcCpuSeconds :: Int64
- , taskGcWallSeconds :: Int64
- } deriving (Show, Read)
-
-data SparkStats = SparkStats
- { sparksCreated :: Int64
- , sparksDud :: Int64
- , sparksOverflowed :: Int64
- , sparksConverted :: Int64
- , sparksGcd :: Int64
- , sparksFizzled :: Int64
- } deriving (Show, Read)
-
--- We also could get per-generation stats, which requires a
--- non-constant but at runtime known about of memory.
+nsToSecs :: Int64 -> Double
+nsToSecs ns = fromIntegral ns / (# const TIME_RESOLUTION)
--}
+{-# DEPRECATED getGCStatsEnabled
+ "use getRTSStatsEnabled instead. This will be removed in GHC 8.4.1" #-}
+getGCStatsEnabled :: IO Bool
+getGCStatsEnabled = getRTSStatsEnabled
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index e50159642d..4f618df33d 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -567,8 +567,8 @@
SymI_HasProto(getOrSetSystemTimerThreadEventManagerStore) \
SymI_HasProto(getOrSetSystemTimerThreadIOManagerThreadStore) \
SymI_HasProto(getOrSetLibHSghcFastStringTable) \
- SymI_HasProto(getGCStats) \
- SymI_HasProto(getGCStatsEnabled) \
+ SymI_HasProto(getRTSStats) \
+ SymI_HasProto(getRTSStatsEnabled) \
SymI_HasProto(genericRaise) \
SymI_HasProto(getProgArgv) \
SymI_HasProto(getFullProgArgv) \
diff --git a/rts/Stats.c b/rts/Stats.c
index 8fe9adf304..95511f2c35 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -20,25 +20,15 @@
#include "sm/GCThread.h"
#include "sm/BlockAlloc.h"
-/* huh? */
-#define BIG_STRING_LEN 512
-
#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
static Time
start_init_cpu, start_init_elapsed,
end_init_cpu, end_init_elapsed,
start_exit_cpu, start_exit_elapsed,
+ start_exit_gc_elapsed, start_exit_gc_cpu,
end_exit_cpu, end_exit_elapsed;
-static Time GC_tot_cpu = 0;
-
-static StgWord64 GC_tot_alloc = 0;
-static StgWord64 GC_tot_copied = 0;
-
-static StgWord64 GC_par_max_copied = 0;
-static StgWord64 GC_par_tot_copied = 0;
-
#ifdef PROFILING
static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
@@ -53,13 +43,13 @@ static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
#define PROF_VAL(x) 0
#endif
-// current = current as of last GC
-static W_ current_residency = 0; // in words; for stats only
-static W_ max_residency = 0;
-static W_ cumulative_residency = 0;
-static W_ residency_samples = 0; // for stats only
-static W_ current_slop = 0;
-static W_ max_slop = 0;
+//
+// All the stats!
+//
+// This is where we accumulate all the stats during execution, and it's also
+// in a convenient form that we can copy over to a caller of getRTSStats().
+//
+static RTSStats stats;
static W_ GC_end_faults = 0;
@@ -87,7 +77,7 @@ Time stat_getElapsedTime(void)
double
mut_user_time_until( Time t )
{
- return TimeToSecondsDbl(t - GC_tot_cpu);
+ return TimeToSecondsDbl(t - stats.gc_cpu_ns);
// heapCensus() time is included in GC_tot_cpu, so we don't need
// to subtract it here.
}
@@ -108,7 +98,7 @@ mut_user_time( void )
static double
mut_user_time_during_RP( void )
{
- return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
+ return TimeToSecondsDbl(RP_start_time - stats.gc_cpu_ns - RP_tot_time);
}
#endif /* PROFILING */
@@ -127,15 +117,11 @@ initStats0(void)
start_exit_cpu = 0;
start_exit_elapsed = 0;
+ start_exit_gc_cpu = 0;
+ start_exit_gc_elapsed = 0;
end_exit_cpu = 0;
end_exit_elapsed = 0;
- GC_tot_alloc = 0;
- GC_tot_copied = 0;
- GC_par_max_copied = 0;
- GC_par_tot_copied = 0;
- GC_tot_cpu = 0;
-
#ifdef PROFILING
RP_start_time = 0;
RP_tot_time = 0;
@@ -148,12 +134,43 @@ initStats0(void)
HCe_tot_time = 0;
#endif
- max_residency = 0;
- cumulative_residency = 0;
- residency_samples = 0;
- max_slop = 0;
-
GC_end_faults = 0;
+
+ stats = (RTSStats) {
+ .gcs = 0,
+ .major_gcs = 0,
+ .allocated_bytes = 0,
+ .max_live_bytes = 0,
+ .max_large_objects_bytes = 0,
+ .max_compact_bytes = 0,
+ .max_slop_bytes = 0,
+ .max_mem_in_use_bytes = 0,
+ .cumulative_live_bytes = 0,
+ .copied_bytes = 0,
+ .par_copied_bytes = 0,
+ .cumulative_par_max_copied_bytes = 0,
+ .mutator_cpu_ns = 0,
+ .mutator_elapsed_ns = 0,
+ .gc_cpu_ns = 0,
+ .gc_elapsed_ns = 0,
+ .cpu_ns = 0,
+ .elapsed_ns = 0,
+ .gc = {
+ .gen = 0,
+ .threads = 0,
+ .allocated_bytes = 0,
+ .live_bytes = 0,
+ .large_objects_bytes = 0,
+ .compact_bytes = 0,
+ .slop_bytes = 0,
+ .mem_in_use_bytes = 0,
+ .copied_bytes = 0,
+ .par_max_copied_bytes = 0,
+ .sync_elapsed_ns = 0,
+ .cpu_ns = 0,
+ .elapsed_ns = 0
+ }
+ };
}
/* ---------------------------------------------------------------------------
@@ -214,6 +231,8 @@ void
stat_startExit(void)
{
getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
+ start_exit_gc_elapsed = stats.gc_elapsed_ns;
+ start_exit_gc_cpu = stats.gc_cpu_ns;
}
void
@@ -264,17 +283,82 @@ stat_startGC (Capability *cap, gc_thread *gct)
void
stat_endGC (Capability *cap, gc_thread *gct,
W_ live, W_ copied, W_ slop, uint32_t gen,
- uint32_t par_n_threads, W_ par_max_copied, W_ par_tot_copied)
+ uint32_t par_n_threads, W_ par_max_copied)
{
- W_ tot_alloc;
- W_ alloc;
-
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
rtsConfig.gcDoneHook != NULL ||
- RtsFlags.ProfFlags.doHeapProfile)
- // heap profiling needs GC_tot_time
+ RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
{
- Time cpu, elapsed, gc_cpu, gc_elapsed, gc_sync_elapsed;
+ // -------------------------------------------------
+ // Collect all the stats about this GC in stats.gc
+
+ stats.gc.gen = gen;
+ stats.gc.threads = par_n_threads;
+
+ uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+
+ // allocated since the last GC
+ stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+
+ stats.gc.live_bytes = live * sizeof(W_);
+ stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
+ stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
+ stats.gc.slop_bytes = slop * sizeof(W_);
+ stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
+ stats.gc.copied_bytes = copied * sizeof(W_);
+ stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
+
+ Time current_cpu, current_elapsed;
+ getProcessTimes(&current_cpu, &current_elapsed);
+ stats.cpu_ns = current_cpu - start_init_cpu;
+ stats.elapsed_ns = current_elapsed - start_init_elapsed;
+
+ stats.gc.sync_elapsed_ns =
+ gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
+ stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed;
+ stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
+
+ // -------------------------------------------------
+ // Update the cumulative stats
+
+ stats.gcs++;
+ stats.allocated_bytes = tot_alloc_bytes;
+ stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
+
+ GC_coll_cpu[gen] += stats.gc.cpu_ns;
+ GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
+ if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
+ GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
+ }
+
+ stats.copied_bytes += stats.gc.copied_bytes;
+ if (par_n_threads > 1) {
+ stats.par_copied_bytes += stats.gc.copied_bytes;
+ stats.cumulative_par_max_copied_bytes +=
+ stats.gc.par_max_copied_bytes;
+ }
+ stats.gc_cpu_ns += stats.gc.cpu_ns;
+ stats.gc_elapsed_ns += stats.gc.elapsed_ns;
+
+ if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
+ stats.major_gcs++;
+ if (stats.gc.live_bytes > stats.max_live_bytes) {
+ stats.max_live_bytes = stats.gc.live_bytes;
+ }
+ if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
+ stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
+ }
+ if (stats.gc.compact_bytes > stats.max_compact_bytes) {
+ stats.max_compact_bytes = stats.gc.compact_bytes;
+ }
+ if (stats.gc.slop_bytes > stats.max_slop_bytes) {
+ stats.max_slop_bytes = stats.gc.slop_bytes;
+ }
+ stats.cumulative_live_bytes += stats.gc.live_bytes;
+ }
+
+ // -------------------------------------------------
+ // Emit events to the event log
// Has to be emitted while all caps stopped for GC, but before GC_END.
// See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
@@ -285,51 +369,45 @@ stat_endGC (Capability *cap, gc_thread *gct,
// Emitted before GC_END on all caps, which simplifies tools code.
traceEventGcStats(cap,
CAPSET_HEAP_DEFAULT,
- gen,
- copied * sizeof(W_),
- slop * sizeof(W_),
+ stats.gc.gen,
+ stats.gc.copied_bytes,
+ stats.gc.slop_bytes,
/* current loss due to fragmentation */
(mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
* BLOCK_SIZE,
par_n_threads,
- par_max_copied * sizeof(W_),
- par_tot_copied * sizeof(W_));
-
- getProcessTimes(&cpu, &elapsed);
+ stats.gc.par_max_copied_bytes,
+ stats.gc.copied_bytes);
// Post EVENT_GC_END with the same timestamp as used for stats
// (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
// Here, as opposed to other places, the event is emitted on the cap
// that initiates the GC and external tools expect it to have the same
// timestamp as used in +RTS -s calculcations.
- traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
-
- gc_sync_elapsed = gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
- gc_elapsed = elapsed - gct->gc_start_elapsed;
- gc_cpu = cpu - gct->gc_start_cpu;
+ traceEventGcEndAtT(cap, TimeToNS(stats.elapsed_ns));
- /* For the moment we calculate both per-HEC and total allocation.
- * There is thus redundancy here, but for the moment we will calculate
- * it both the old and new way and assert they're the same.
- * When we're sure it's working OK then we can simplify things.
- */
- tot_alloc = calcTotalAllocated();
+ if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
+ traceEventHeapLive(cap,
+ CAPSET_HEAP_DEFAULT,
+ stats.gc.live_bytes);
+ }
- // allocated since the last GC
- alloc = tot_alloc - GC_tot_alloc;
- GC_tot_alloc = tot_alloc;
+ // -------------------------------------------------
+ // Print GC stats to stdout or a file (+RTS -S/-s)
if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
W_ faults = getPageFaults();
statsPrintf("%9" FMT_Word " %9" FMT_Word " %9" FMT_Word,
- alloc*sizeof(W_), copied*sizeof(W_),
- live*sizeof(W_));
- statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
- TimeToSecondsDbl(gc_cpu),
- TimeToSecondsDbl(gc_elapsed),
- TimeToSecondsDbl(cpu),
- TimeToSecondsDbl(elapsed - start_init_elapsed),
+ stats.gc.allocated_bytes, stats.gc.copied_bytes,
+ stats.gc.live_bytes);
+
+ statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4"
+ FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
+ TimeToSecondsDbl(stats.gc.cpu_ns),
+ TimeToSecondsDbl(stats.gc.elapsed_ns),
+ TimeToSecondsDbl(stats.cpu_ns),
+ TimeToSecondsDbl(stats.elapsed_ns),
faults - gct->gc_start_faults,
gct->gc_start_faults - GC_end_faults,
gen);
@@ -340,47 +418,12 @@ stat_endGC (Capability *cap, gc_thread *gct,
if (rtsConfig.gcDoneHook != NULL) {
- rtsConfig.gcDoneHook(gen,
- alloc*sizeof(W_),
- live*sizeof(W_),
- copied*sizeof(W_),
- par_max_copied * sizeof(W_),
- mblocks_allocated * BLOCKS_PER_MBLOCK
- * BLOCK_SIZE,
- slop * sizeof(W_),
- TimeToNS(gc_sync_elapsed),
- TimeToNS(gc_elapsed),
- TimeToNS(gc_cpu));
- }
-
- GC_coll_cpu[gen] += gc_cpu;
- GC_coll_elapsed[gen] += gc_elapsed;
- if (GC_coll_max_pause[gen] < gc_elapsed) {
- GC_coll_max_pause[gen] = gc_elapsed;
+ rtsConfig.gcDoneHook(&stats.gc);
}
- GC_tot_copied += (StgWord64) copied;
- GC_par_max_copied += (StgWord64) par_max_copied;
- GC_par_tot_copied += (StgWord64) par_tot_copied;
- GC_tot_cpu += gc_cpu;
-
traceEventHeapSize(cap,
CAPSET_HEAP_DEFAULT,
- mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
-
- if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
- if (live > max_residency) {
- max_residency = live;
- }
- current_residency = live;
- residency_samples++;
- cumulative_residency += live;
- traceEventHeapLive(cap,
- CAPSET_HEAP_DEFAULT,
- live * sizeof(W_));
- }
-
- if (slop > max_slop) max_slop = slop;
+ mblocks_allocated * MBLOCK_SIZE);
}
}
@@ -502,8 +545,13 @@ StgInt TOTAL_CALLS=1;
statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
-static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
-static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
+STATIC_INLINE Time get_init_cpu(void) {
+ return end_init_cpu - start_init_cpu;
+}
+
+STATIC_INLINE Time get_init_elapsed(void) {
+ return end_init_elapsed - start_init_elapsed;
+}
void
@@ -518,81 +566,86 @@ stat_exit (void)
Time mut_elapsed = 0;
Time exit_cpu = 0;
Time exit_elapsed = 0;
- W_ tot_alloc;
- W_ alloc;
+ Time exit_gc_cpu = 0;
+ Time exit_gc_elapsed = 0;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
- char temp[BIG_STRING_LEN];
+ char temp[512];
Time tot_cpu;
Time tot_elapsed;
- uint32_t i, g, total_collections = 0;
+ uint32_t g;
getProcessTimes( &tot_cpu, &tot_elapsed );
+ tot_cpu -= start_init_cpu;
tot_elapsed -= start_init_elapsed;
- tot_alloc = calcTotalAllocated();
+ uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
// allocated since the last GC
- alloc = tot_alloc - GC_tot_alloc;
- GC_tot_alloc = tot_alloc;
-
- /* Count total garbage collections */
- for (g = 0; g < RtsFlags.GcFlags.generations; g++)
- total_collections += generations[g].collections;
+ stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+ stats.allocated_bytes = tot_alloc_bytes;
/* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
- if (tot_cpu == 0.0) tot_cpu = 1;
- if (tot_elapsed == 0.0) tot_elapsed = 1;
+ if (tot_cpu <= 0) tot_cpu = 1;
+ if (tot_elapsed <= 0) tot_elapsed = 1;
if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
- statsPrintf("%9" FMT_Word " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", "");
+ statsPrintf("%9" FMT_Word " %9.9s %9.9s",
+ (W_)stats.gc.allocated_bytes, "", "");
statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
}
- for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
- gc_cpu += GC_coll_cpu[i];
- gc_elapsed += GC_coll_elapsed[i];
- }
-
// heapCensus() is called by the GC, so RP and HC time are
// included in the GC stats. We therefore subtract them to
// obtain the actual GC cpu time.
- gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time);
- gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
+ gc_cpu = stats.gc_cpu_ns - PROF_VAL(RP_tot_time + HC_tot_time);
+ gc_elapsed = stats.gc_elapsed_ns - PROF_VAL(RPe_tot_time + HCe_tot_time);
init_cpu = get_init_cpu();
init_elapsed = get_init_elapsed();
- exit_cpu = end_exit_cpu - start_exit_cpu;
- exit_elapsed = end_exit_elapsed - start_exit_elapsed;
+ // We do a GC during the EXIT phase. We'll attribute the cost of that
+ // to GC instead of EXIT, so carefully subtract it from the EXIT time.
+ exit_gc_cpu = stats.gc_cpu_ns - start_exit_gc_cpu;
+ exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed;
+ exit_cpu = end_exit_cpu - start_exit_cpu - exit_gc_cpu;
+ exit_elapsed = end_exit_elapsed - start_exit_elapsed - exit_gc_elapsed;
- mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
+ mut_elapsed = start_exit_elapsed - end_init_elapsed -
+ (gc_elapsed - exit_gc_elapsed);
- mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
+ mut_cpu = start_exit_cpu - end_init_cpu - (gc_cpu - exit_gc_cpu)
- PROF_VAL(RP_tot_time + HC_tot_time);
if (mut_cpu < 0) { mut_cpu = 0; }
+ // The subdivision of runtime into INIT/EXIT/GC/MUT is just adding and
+ // subtracting, so the parts should add up to the total exactly. Note
+ // that tot_elapsed is captured a tiny bit later than end_exit_elapsed,
+ // so we don't use it here.
+ ASSERT(init_elapsed + mut_elapsed + gc_elapsed + exit_elapsed
+ == end_exit_elapsed - start_init_elapsed);
+
+
if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
- showStgWord64(GC_tot_alloc*sizeof(W_),
- temp, true/*commas*/);
+ showStgWord64(stats.allocated_bytes, temp, true/*commas*/);
statsPrintf("%16s bytes allocated in the heap\n", temp);
- showStgWord64(GC_tot_copied*sizeof(W_),
- temp, true/*commas*/);
+ showStgWord64(stats.copied_bytes, temp, true/*commas*/);
statsPrintf("%16s bytes copied during GC\n", temp);
- if ( residency_samples > 0 ) {
- showStgWord64(max_residency*sizeof(W_),
- temp, true/*commas*/);
- statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n",
- temp, residency_samples);
+ if ( stats.major_gcs > 0 ) {
+ showStgWord64(stats.max_live_bytes, temp, true/*commas*/);
+ statsPrintf("%16s bytes maximum residency (%" FMT_Word32
+ " sample(s))\n",
+ temp, stats.major_gcs);
}
- showStgWord64(max_slop*sizeof(W_), temp, true/*commas*/);
+ showStgWord64(stats.max_slop_bytes, temp, true/*commas*/);
statsPrintf("%16s bytes maximum slop\n", temp);
- statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
+ statsPrintf("%16" FMT_SizeT " MB total memory in use (%"
+ FMT_SizeT " MB lost due to fragmentation)\n\n",
(size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
(size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
@@ -613,7 +666,7 @@ stat_exit (void)
#if defined(THREADED_RTS)
if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
- 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
+ 100 * (((double)stats.par_copied_bytes / (double)stats.cumulative_par_max_copied_bytes) - 1)
/ (n_capabilities - 1)
);
}
@@ -675,7 +728,8 @@ stat_exit (void)
showStgWord64(0, temp, true/*commas*/);
} else {
showStgWord64(
- (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
+ (StgWord64)((double)stats.allocated_bytes /
+ TimeToSecondsDbl(mut_cpu)),
temp, true/*commas*/);
}
@@ -689,14 +743,6 @@ stat_exit (void)
PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
/ TimeToSecondsDbl(tot_elapsed));
- /*
- TICK_PRINT(1);
- TICK_PRINT(2);
- REPORT(TOTAL_CALLS);
- TICK_PRINT_TOT(1);
- TICK_PRINT_TOT(2);
- */
-
#if defined(THREADED_RTS) && defined(PROF_SPIN)
{
uint32_t g;
@@ -732,13 +778,13 @@ stat_exit (void)
fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n";
}
/* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
- statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
+ statsPrintf(fmt1, stats.allocated_bytes);
statsPrintf(fmt2,
- total_collections,
- residency_samples == 0 ? 0 :
- cumulative_residency*sizeof(W_)/residency_samples,
- max_residency*sizeof(W_),
- residency_samples,
+ stats.gcs,
+ stats.major_gcs == 0 ? 0 :
+ stats.cumulative_live_bytes/stats.major_gcs,
+ stats.max_live_bytes,
+ stats.major_gcs,
(unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
@@ -833,81 +879,32 @@ statDescribeGens(void)
each compilation and expression evaluation.
-------------------------------------------------------------------------- */
-extern HsInt64 getAllocations( void )
-{ return (HsInt64)GC_tot_alloc * sizeof(W_); }
-
-/* EZY: I'm not convinced I got all the casting right. */
+uint64_t getAllocations( void )
+{
+ return stats.allocated_bytes;
+}
-extern bool getGCStatsEnabled( void )
+int getRTSStatsEnabled( void )
{
return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
}
-extern void getGCStats( GCStats *s )
+void getRTSStats( RTSStats *s )
{
- uint32_t total_collections = 0;
- uint32_t g;
- Time gc_cpu = 0;
- Time gc_elapsed = 0;
Time current_elapsed = 0;
Time current_cpu = 0;
- getProcessTimes(&current_cpu, &current_elapsed);
+ *s = stats;
- /* EZY: static inline'ify these */
- for (g = 0; g < RtsFlags.GcFlags.generations; g++)
- total_collections += generations[g].collections;
-
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- gc_cpu += GC_coll_cpu[g];
- gc_elapsed += GC_coll_elapsed[g];
- }
+ getProcessTimes(&current_cpu, &current_elapsed);
+ s->cpu_ns = current_cpu - end_init_cpu;
+ s->elapsed_ns = current_elapsed - end_init_elapsed;
- s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
- s->num_gcs = total_collections;
- s->num_byte_usage_samples = residency_samples;
- s->max_bytes_used = max_residency*sizeof(W_);
- s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
- s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
- s->mblocks_allocated = (StgWord64)mblocks_allocated;
- s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
- s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
- s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
- s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
- /*
- s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
- s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
- */
- s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
- s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
- s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
- s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
- /* EZY: Being consistent with incremental output, but maybe should also discount init */
- s->cpu_seconds = TimeToSecondsDbl(current_cpu);
- s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
- s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
- s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
+ s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns -
+ PROF_VAL(RP_tot_time + HC_tot_time);
+ s->mutator_elapsed_ns = current_elapsed - end_init_elapsed -
+ stats.gc_elapsed_ns;
}
-// extern void getTaskStats( TaskStats **s ) {}
-#if 0
-extern void getSparkStats( SparkCounters *s ) {
- uint32_t i;
- s->created = 0;
- s->dud = 0;
- s->overflowed = 0;
- s->converted = 0;
- s->gcd = 0;
- s->fizzled = 0;
- for (i = 0; i < n_capabilities; i++) {
- s->created += capabilities[i]->spark_stats.created;
- s->dud += capabilities[i]->spark_stats.dud;
- s->overflowed+= capabilities[i]->spark_stats.overflowed;
- s->converted += capabilities[i]->spark_stats.converted;
- s->gcd += capabilities[i]->spark_stats.gcd;
- s->fizzled += capabilities[i]->spark_stats.fizzled;
- }
-}
-#endif
/* -----------------------------------------------------------------------------
Dumping stuff in the stats file, or via the debug message interface
diff --git a/rts/Stats.h b/rts/Stats.h
index 1d95170f2c..537f5695a5 100644
--- a/rts/Stats.h
+++ b/rts/Stats.h
@@ -31,7 +31,7 @@ void stat_startGCSync(struct gc_thread_ *_gct);
void stat_startGC(Capability *cap, struct gc_thread_ *_gct);
void stat_endGC (Capability *cap, struct gc_thread_ *_gct, W_ live,
W_ copied, W_ slop, uint32_t gen, uint32_t n_gc_threads,
- W_ par_max_copied, W_ par_tot_copied);
+ W_ par_max_copied);
#ifdef PROFILING
void stat_startRP(void);
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index ea80d6dec1..c41c9791dc 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -187,7 +187,7 @@ GarbageCollect (uint32_t collect_gen,
{
bdescr *bd;
generation *gen;
- StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
+ StgWord live_blocks, live_words, par_max_copied;
#if defined(THREADED_RTS)
gc_thread *saved_gct;
#endif
@@ -459,7 +459,6 @@ GarbageCollect (uint32_t collect_gen,
copied = 0;
par_max_copied = 0;
- par_tot_copied = 0;
{
uint32_t i;
for (i=0; i < n_gc_threads; i++) {
@@ -474,10 +473,8 @@ GarbageCollect (uint32_t collect_gen,
copied += gc_threads[i]->copied;
par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
}
- par_tot_copied = copied;
if (n_gc_threads == 1) {
par_max_copied = 0;
- par_tot_copied = 0;
}
}
@@ -773,7 +770,7 @@ GarbageCollect (uint32_t collect_gen,
// ok, GC over: tell the stats department what happened.
stat_endGC(cap, gct, live_words, copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */,
- N, n_gc_threads, par_max_copied, par_tot_copied);
+ N, n_gc_threads, par_max_copied);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index ad2519588b..70a5621806 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -1293,6 +1293,28 @@ calcNeeded (bool force_major, memcount *blocks_needed)
return N;
}
+StgWord calcTotalLargeObjectsW (void)
+{
+ uint32_t g;
+ StgWord totalW = 0;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ totalW += generations[g].n_large_words;
+ }
+ return totalW;
+}
+
+StgWord calcTotalCompactW (void)
+{
+ uint32_t g;
+ StgWord totalW = 0;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ totalW += generations[g].n_compact_blocks * BLOCK_SIZE_W;
+ }
+ return totalW;
+}
+
/* ----------------------------------------------------------------------------
Executable memory
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index a4e928a3eb..69901fd6ed 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -100,7 +100,6 @@ StgWord calcTotalAllocated (void);
Stats 'n' DEBUG stuff
-------------------------------------------------------------------------- */
-StgWord countLargeAllocated (void);
StgWord countOccupied (bdescr *bd);
StgWord calcNeeded (bool force_major, StgWord *blocks_needed);
@@ -110,6 +109,9 @@ StgWord gcThreadLiveBlocks (uint32_t i, uint32_t g);
StgWord genLiveWords (generation *gen);
StgWord genLiveBlocks (generation *gen);
+StgWord calcTotalLargeObjectsW (void);
+StgWord calcTotalCompactW (void);
+
/* ----------------------------------------------------------------------------
Storage manager internal APIs and globals
------------------------------------------------------------------------- */