diff options
-rw-r--r-- | includes/Rts.h | 33 | ||||
-rw-r--r-- | includes/RtsAPI.h | 115 | ||||
-rw-r--r-- | includes/rts/Time.h | 43 | ||||
-rw-r--r-- | includes/rts/storage/GC.h | 55 | ||||
-rw-r--r-- | libraries/base/GHC/Stats.hsc | 260 | ||||
-rw-r--r-- | rts/RtsSymbols.c | 4 | ||||
-rw-r--r-- | rts/Stats.c | 437 | ||||
-rw-r--r-- | rts/Stats.h | 2 | ||||
-rw-r--r-- | rts/sm/GC.c | 7 | ||||
-rw-r--r-- | rts/sm/Storage.c | 22 | ||||
-rw-r--r-- | rts/sm/Storage.h | 4 |
11 files changed, 598 insertions, 384 deletions
diff --git a/includes/Rts.h b/includes/Rts.h index be81b0d9c7..0599df655c 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -39,6 +39,7 @@ extern "C" { #endif #include "rts/Types.h" +#include "rts/Time.h" #if __GNUC__ >= 3 #define ATTRIBUTE_ALIGNED(n) __attribute__((aligned(n))) @@ -145,38 +146,6 @@ void _assertFail(const char *filename, unsigned int linenum) #define FMT_HexSizeT "zx" /* ----------------------------------------------------------------------------- - Time values in the RTS - -------------------------------------------------------------------------- */ - -// For most time values in the RTS we use a fixed resolution of nanoseconds, -// normalising the time we get from platform-dependent APIs to this -// resolution. -#define TIME_RESOLUTION 1000000000 -typedef StgInt64 Time; - -#define TIME_MAX HS_INT64_MAX - -#if TIME_RESOLUTION == 1000000000 -// I'm being lazy, but it's awkward to define fully general versions of these -#define TimeToUS(t) ((t) / 1000) -#define TimeToNS(t) (t) -#define USToTime(t) ((Time)(t) * 1000) -#define NSToTime(t) ((Time)(t)) -#else -#error Fix TimeToNS(), TimeToUS() etc. -#endif - -#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION) -#define TimeToSeconds(t) ((t) / TIME_RESOLUTION) - -// Use instead of SecondsToTime() when we have a floating-point -// seconds value, to avoid truncating it. -INLINE_HEADER Time fsecondsToTime (double t) -{ - return (Time)(t * TIME_RESOLUTION); -} - -/* ----------------------------------------------------------------------------- Include everything STG-ish -------------------------------------------------------------------------- */ diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h index 4dccb84fd2..2c68219b8b 100644 --- a/includes/RtsAPI.h +++ b/includes/RtsAPI.h @@ -17,6 +17,7 @@ extern "C" { #endif #include "HsFFI.h" +#include "rts/Time.h" /* * Running the scheduler @@ -56,6 +57,8 @@ typedef enum { RtsOptsAll // all RTS options allowed } RtsOptsEnabledEnum; +struct GCDetails_; + // The RtsConfig struct is passed (by value) to hs_init_ghc(). The // reason for using a struct is extensibility: we can add more // fields to this later without breaking existing client code. @@ -93,15 +96,7 @@ typedef struct { void (* mallocFailHook) (W_ request_size /* in bytes */, const char *msg); // Called for every GC - void (* gcDoneHook) (unsigned int gen, - W_ allocated_bytes, /* since last GC */ - W_ live_bytes, - W_ copied_bytes, - W_ max_copied_per_thread_bytes, - W_ total_bytes, - W_ slop_bytes, - W_ sync_elapsed_ns, W_ elapsed_ns, W_ cpu_ns); - + void (* gcDoneHook) (const struct GCDetails_ *stats); } RtsConfig; // Clients should start with defaultRtsConfig and then customise it. @@ -109,6 +104,108 @@ typedef struct { // you can't do that in C (it generates code). extern const RtsConfig defaultRtsConfig; +/* ----------------------------------------------------------------------------- + Statistics + -------------------------------------------------------------------------- */ + +// +// Stats about a single GC +// +typedef struct GCDetails_ { + // The generation number of this GC + uint32_t gen; + // Number of threads used in this GC + uint32_t threads; + // Number of bytes allocated since the previous GC + uint64_t allocated_bytes; + // Total amount of live data in the heap (incliudes large + compact data) + uint64_t live_bytes; + // Total amount of live data in large objects + uint64_t large_objects_bytes; + // Total amount of live data in compact regions + uint64_t compact_bytes; + // Total amount of slop (wasted memory) + uint64_t slop_bytes; + // Total amount of memory in use by the RTS + uint64_t mem_in_use_bytes; + // Total amount of data copied during this GC + uint64_t copied_bytes; + // In parallel GC, the max amount of data copied by any one thread + uint64_t par_max_copied_bytes; + // The time elapsed during synchronisation before GC + Time sync_elapsed_ns; + // The CPU time used during GC itself + Time cpu_ns; + // The time elapsed during GC itself + Time elapsed_ns; +} GCDetails; + +// +// Stats about the RTS currently, and since the start of execution +// +typedef struct _RTSStats { + + // ----------------------------------- + // Cumulative stats about memory use + + // Total number of GCs + uint32_t gcs; + // Total number of major (oldest generation) GCs + uint32_t major_gcs; + // Total bytes allocated + uint64_t allocated_bytes; + // Maximum live data (including large objects + compact regions) + uint64_t max_live_bytes; + // Maximum live data in large objects + uint64_t max_large_objects_bytes; + // Maximum live data in compact regions + uint64_t max_compact_bytes; + // Maximum slop + uint64_t max_slop_bytes; + // Maximum memory in use by the RTS + uint64_t max_mem_in_use_bytes; + // Sum of live bytes across all major GCs. Divided by major_gcs + // gives the average live data over the lifetime of the program. + uint64_t cumulative_live_bytes; + // Sum of copied_bytes across all GCs + uint64_t copied_bytes; + // Sum of copied_bytes across all parallel GCs + uint64_t par_copied_bytes; + // Sum of par_max_copied_bytes across all parallel GCs + uint64_t cumulative_par_max_copied_bytes; + + // ----------------------------------- + // Cumulative stats about time use + // (we use signed values here because due to inacuracies in timers + // the values can occasionally go slightly negative) + + // Total CPU time used by the mutator + Time mutator_cpu_ns; + // Total elapsed time used by the mutator + Time mutator_elapsed_ns; + // Total CPU time used by the GC + Time gc_cpu_ns; + // Total elapsed time used by the GC + Time gc_elapsed_ns; + // Total CPU time (at the previous GC) + Time cpu_ns; + // Total elapsed time (at the previous GC) + Time elapsed_ns; + + // ----------------------------------- + // Stats about the most recent GC + + GCDetails gc; + +} RTSStats; + +void getRTSStats (RTSStats *s); +int getRTSStatsEnabled (void); + +// Returns the total number of bytes allocated since the start of the program. +// TODO: can we remove this? +uint64_t getAllocations (void); + /* ---------------------------------------------------------------------------- Starting up and shutting down the Haskell RTS. ------------------------------------------------------------------------- */ diff --git a/includes/rts/Time.h b/includes/rts/Time.h new file mode 100644 index 0000000000..a1debedea0 --- /dev/null +++ b/includes/rts/Time.h @@ -0,0 +1,43 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * Time values in the RTS + * + * To understand the structure of the RTS headers, see the wiki: + * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes + * + * --------------------------------------------------------------------------*/ + +#ifndef RTSTIME_H +#define RTSTIME_H + +// For most time values in the RTS we use a fixed resolution of nanoseconds, +// normalising the time we get from platform-dependent APIs to this +// resolution. +#define TIME_RESOLUTION 1000000000 +typedef int64_t Time; + +#define TIME_MAX HS_INT64_MAX + +#if TIME_RESOLUTION == 1000000000 +// I'm being lazy, but it's awkward to define fully general versions of these +#define TimeToUS(t) ((t) / 1000) +#define TimeToNS(t) (t) +#define USToTime(t) ((Time)(t) * 1000) +#define NSToTime(t) ((Time)(t)) +#else +#error Fix TimeToNS(), TimeToUS() etc. +#endif + +#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION) +#define TimeToSeconds(t) ((t) / TIME_RESOLUTION) + +// Use instead of SecondsToTime() when we have a floating-point +// seconds value, to avoid truncating it. +INLINE_HEADER Time fsecondsToTime (double t) +{ + return (Time)(t * TIME_RESOLUTION); +} + +#endif // RTSTIME_H diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index f15fd2a7cf..ddc4238592 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -223,61 +223,6 @@ void revertCAFs (void); void setKeepCAFs (void); /* ----------------------------------------------------------------------------- - Stats - -------------------------------------------------------------------------- */ - -typedef struct _GCStats { - StgWord64 bytes_allocated; - StgWord64 num_gcs; - StgWord64 num_byte_usage_samples; - StgWord64 max_bytes_used; - StgWord64 cumulative_bytes_used; - StgWord64 bytes_copied; - StgWord64 current_bytes_used; - StgWord64 current_bytes_slop; - StgWord64 max_bytes_slop; - StgWord64 peak_megabytes_allocated; - StgWord64 mblocks_allocated; - StgWord64 par_tot_bytes_copied; - StgWord64 par_max_bytes_copied; - StgDouble mutator_cpu_seconds; - StgDouble mutator_wall_seconds; - StgDouble gc_cpu_seconds; - StgDouble gc_wall_seconds; - StgDouble cpu_seconds; - StgDouble wall_seconds; -} GCStats; -void getGCStats (GCStats *s); -bool getGCStatsEnabled (void); - -// These don't change over execution, so do them elsewhere -// StgDouble init_cpu_seconds; -// StgDouble init_wall_seconds; - -typedef struct _ParGCStats { - StgWord64 tot_copied; - StgWord64 max_copied; -} ParGCStats; -void getParGCStats (ParGCStats *s); - -/* -typedef struct _TaskStats { - StgWord64 mut_time; - StgWord64 mut_etime; - StgWord64 gc_time; - StgWord64 gc_etime; -} TaskStats; -// would need to allocate arbitrarily large amount of memory -// because it's a linked list of results -void getTaskStats (TaskStats **s); -// Need to stuff SparkCounters in a public header file... -void getSparkStats (SparkCounters *s); -*/ - -// Returns the total number of bytes allocated since the start of the program. -HsInt64 getAllocations (void); - -/* ----------------------------------------------------------------------------- This is the write barrier for MUT_VARs, a.k.a. IORefs. A MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY diff --git a/libraries/base/GHC/Stats.hsc b/libraries/base/GHC/Stats.hsc index a8b43efc8a..ab4012da08 100644 --- a/libraries/base/GHC/Stats.hsc +++ b/libraries/base/GHC/Stats.hsc @@ -13,14 +13,25 @@ -- @since 4.5.0.0 ----------------------------------------------------------------------------- module GHC.Stats - ( GCStats(..) + ( + -- * Runtime statistics + RTSStats(..), GCDetails(..) + , getRTSStats + , getRTSStatsEnabled + + -- * DEPRECATED, don't use + , GCStats(..) , getGCStats , getGCStatsEnabled ) where +import Control.Applicative import Control.Monad import Data.Int +import Data.Word import GHC.Base +import GHC.Num (Num(..)) +import GHC.Real (quot, fromIntegral, (/)) import GHC.Read ( Read ) import GHC.Show ( Show ) import GHC.IO.Exception @@ -30,12 +41,163 @@ import Foreign.Ptr #include "Rts.h" -foreign import ccall "getGCStats" getGCStats_ :: Ptr () -> IO () +foreign import ccall "getRTSStats" getRTSStats_ :: Ptr () -> IO () -- | Returns whether GC stats have been enabled (with @+RTS -T@, for example). -- --- @since 4.6.0.0 -foreign import ccall "getGCStatsEnabled" getGCStatsEnabled :: IO Bool +-- @since 4.9.0.0 +foreign import ccall "getRTSStatsEnabled" getRTSStatsEnabled :: IO Bool + +-- +-- | Statistics about runtime activity since the start of the +-- program. This is a mirror of the C @struct RTSStats@ in @RtsAPI.h@ +-- +-- @since 4.9.0.0 +-- +data RTSStats = RTSStats { + -- ----------------------------------- + -- Cumulative stats about memory use + + -- | Total number of GCs + gcs :: Word32 + -- | Total number of major (oldest generation) GCs + , major_gcs :: Word32 + -- | Total bytes allocated + , allocated_bytes :: Word64 + -- | Maximum live data (including large objects + compact regions) + , max_live_bytes :: Word64 + -- | Maximum live data in large objects + , max_large_objects_bytes :: Word64 + -- | Maximum live data in compact regions + , max_compact_bytes :: Word64 + -- | Maximum slop + , max_slop_bytes :: Word64 + -- | Maximum memory in use by the RTS + , max_mem_in_use_bytes :: Word64 + -- | Sum of live bytes across all major GCs. Divided by major_gcs + -- gives the average live data over the lifetime of the program. + , cumulative_live_bytes :: Word64 + -- | Sum of copied_bytes across all GCs + , copied_bytes :: Word64 + -- | Sum of copied_bytes across all parallel GCs + , par_copied_bytes :: Word64 + -- | Sum of par_max_copied_bytes across all parallel GCs + , cumulative_par_max_copied_bytes :: Word64 + + -- ----------------------------------- + -- Cumulative stats about time use + -- (we use signed values here because due to inacuracies in timers + -- the values can occasionally go slightly negative) + + -- | Total CPU time used by the mutator + , mutator_cpu_ns :: RtsTime + -- | Total elapsed time used by the mutator + , mutator_elapsed_ns :: RtsTime + -- | Total CPU time used by the GC + , gc_cpu_ns :: RtsTime + -- | Total elapsed time used by the GC + , gc_elapsed_ns :: RtsTime + -- | Total CPU time (at the previous GC) + , cpu_ns :: RtsTime + -- | Total elapsed time (at the previous GC) + , elapsed_ns :: RtsTime + + -- | Details about the most recent GC + , gc :: GCDetails + } + +-- +-- | Statistics about a single GC. This is a mirror of the C @struct +-- GCDetails@ in @RtsAPI.h@, with the field prefixed with @gc_@ to +-- avoid collisions with 'RTSStats'. +-- +data GCDetails = GCDetails { + -- | The generation number of this GC + gcdetails_gen :: Word32 + -- | Number of threads used in this GC + , gcdetails_threads :: Word32 + -- | Number of bytes allocated since the previous GC + , gcdetails_allocated_bytes :: Word64 + -- | Total amount of live data in the heap (incliudes large + compact data) + , gcdetails_live_bytes :: Word64 + -- | Total amount of live data in large objects + , gcdetails_large_objects_bytes :: Word64 + -- | Total amount of live data in compact regions + , gcdetails_compact_bytes :: Word64 + -- | Total amount of slop (wasted memory) + , gcdetails_slop_bytes :: Word64 + -- | Total amount of memory in use by the RTS + , gcdetails_mem_in_use_bytes :: Word64 + -- | Total amount of data copied during this GC + , gcdetails_copied_bytes :: Word64 + -- | In parallel GC, the max amount of data copied by any one thread + , gcdetails_par_max_copied_bytes :: Word64 + -- | The time elapsed during synchronisation before GC + , gcdetails_sync_elapsed_ns :: RtsTime + -- | The CPU time used during GC itself + , gcdetails_cpu_ns :: RtsTime + -- | The time elapsed during GC itself + , gcdetails_elapsed_ns :: RtsTime + } + + +type RtsTime = Int64 + +-- @since 4.9.0.0 +-- +getRTSStats :: IO RTSStats +getRTSStats = do + statsEnabled <- getGCStatsEnabled + unless statsEnabled . ioError $ IOError + Nothing + UnsupportedOperation + "" + "getGCStats: GC stats not enabled. Use `+RTS -T -RTS' to enable them." + Nothing + Nothing + allocaBytes (#size RTSStats) $ \p -> do + getRTSStats_ p + gcs <- (# peek RTSStats, gcs) p + major_gcs <- (# peek RTSStats, major_gcs) p + allocated_bytes <- (# peek RTSStats, allocated_bytes) p + max_live_bytes <- (# peek RTSStats, max_live_bytes) p + max_large_objects_bytes <- (# peek RTSStats, max_large_objects_bytes) p + max_compact_bytes <- (# peek RTSStats, max_compact_bytes) p + max_slop_bytes <- (# peek RTSStats, max_slop_bytes) p + max_mem_in_use_bytes <- (# peek RTSStats, max_mem_in_use_bytes) p + cumulative_live_bytes <- (# peek RTSStats, cumulative_live_bytes) p + copied_bytes <- (# peek RTSStats, copied_bytes) p + par_copied_bytes <- (# peek RTSStats, par_copied_bytes) p + cumulative_par_max_copied_bytes <- + (# peek RTSStats, cumulative_par_max_copied_bytes) p + mutator_cpu_ns <- (# peek RTSStats, mutator_cpu_ns) p + mutator_elapsed_ns <- (# peek RTSStats, mutator_elapsed_ns) p + gc_cpu_ns <- (# peek RTSStats, gc_cpu_ns) p + gc_elapsed_ns <- (# peek RTSStats, gc_elapsed_ns) p + cpu_ns <- (# peek RTSStats, cpu_ns) p + elapsed_ns <- (# peek RTSStats, elapsed_ns) p + let pgc = (# ptr RTSStats, gc) p + gc <- do + gcdetails_gen <- (# peek GCDetails, gen) pgc + gcdetails_threads <- (# peek GCDetails, threads) pgc + gcdetails_allocated_bytes <- (# peek GCDetails, allocated_bytes) pgc + gcdetails_live_bytes <- (# peek GCDetails, live_bytes) pgc + gcdetails_large_objects_bytes <- + (# peek GCDetails, large_objects_bytes) pgc + gcdetails_compact_bytes <- (# peek GCDetails, compact_bytes) pgc + gcdetails_slop_bytes <- (# peek GCDetails, slop_bytes) pgc + gcdetails_mem_in_use_bytes <- (# peek GCDetails, mem_in_use_bytes) pgc + gcdetails_copied_bytes <- (# peek GCDetails, copied_bytes) pgc + gcdetails_par_max_copied_bytes <- + (# peek GCDetails, par_max_copied_bytes) pgc + gcdetails_sync_elapsed_ns <- (# peek GCDetails, sync_elapsed_ns) pgc + gcdetails_cpu_ns <- (# peek GCDetails, cpu_ns) pgc + gcdetails_elapsed_ns <- (# peek GCDetails, elapsed_ns) pgc + return GCDetails{..} + return RTSStats{..} + +-- ----------------------------------------------------------------------------- +-- DEPRECATED API -- I'm probably violating a bucket of constraints here... oops. @@ -44,6 +206,7 @@ foreign import ccall "getGCStatsEnabled" getGCStatsEnabled :: IO Bool -- the program started. -- -- @since 4.5.0.0 +{-# DEPRECATED GCStats "Use RTSStats instead. This will be removed in GHC 8.4.1" #-} data GCStats = GCStats { -- | Total number of bytes allocated bytesAllocated :: !Int64 @@ -100,16 +263,13 @@ data GCStats = GCStats , parMaxBytesCopied :: !Int64 } deriving (Show, Read) - {- - , initCpuSeconds :: !Double - , initWallSeconds :: !Double - -} - -- | Retrieves garbage collection and memory statistics as of the last -- garbage collection. If you would like your statistics as recent as -- possible, first run a 'System.Mem.performGC'. -- -- @since 4.5.0.0 +{-# DEPRECATED getGCStats + "Use getRTSStats instead. This will be removed in GHC 8.4.1" #-} getGCStats :: IO GCStats getGCStats = do statsEnabled <- getGCStatsEnabled @@ -120,56 +280,38 @@ getGCStats = do "getGCStats: GC stats not enabled. Use `+RTS -T -RTS' to enable them." Nothing Nothing - allocaBytes (#size GCStats) $ \p -> do - getGCStats_ p - bytesAllocated <- (# peek GCStats, bytes_allocated) p - numGcs <- (# peek GCStats, num_gcs ) p - numByteUsageSamples <- (# peek GCStats, num_byte_usage_samples ) p - maxBytesUsed <- (# peek GCStats, max_bytes_used ) p - cumulativeBytesUsed <- (# peek GCStats, cumulative_bytes_used ) p - bytesCopied <- (# peek GCStats, bytes_copied ) p - currentBytesUsed <- (# peek GCStats, current_bytes_used ) p - currentBytesSlop <- (# peek GCStats, current_bytes_slop) p - maxBytesSlop <- (# peek GCStats, max_bytes_slop) p - peakMegabytesAllocated <- (# peek GCStats, peak_megabytes_allocated ) p - mblocksAllocated <- (# peek GCStats, mblocks_allocated) p - {- - initCpuSeconds <- (# peek GCStats, init_cpu_seconds) p - initWallSeconds <- (# peek GCStats, init_wall_seconds) p - -} - mutatorCpuSeconds <- (# peek GCStats, mutator_cpu_seconds) p - mutatorWallSeconds <- (# peek GCStats, mutator_wall_seconds) p - gcCpuSeconds <- (# peek GCStats, gc_cpu_seconds) p - gcWallSeconds <- (# peek GCStats, gc_wall_seconds) p - cpuSeconds <- (# peek GCStats, cpu_seconds) p - wallSeconds <- (# peek GCStats, wall_seconds) p - parTotBytesCopied <- (# peek GCStats, par_tot_bytes_copied) p - parMaxBytesCopied <- (# peek GCStats, par_max_bytes_copied) p + allocaBytes (#size RTSStats) $ \p -> do + getRTSStats_ p + bytesAllocated <- (# peek RTSStats, allocated_bytes) p + numGcs <- (# peek RTSStats, gcs ) p + numByteUsageSamples <- (# peek RTSStats, major_gcs ) p + maxBytesUsed <- (# peek RTSStats, max_live_bytes ) p + cumulativeBytesUsed <- (# peek RTSStats, cumulative_live_bytes ) p + bytesCopied <- (# peek RTSStats, copied_bytes ) p + currentBytesUsed <- (# peek RTSStats, gc.live_bytes ) p + currentBytesSlop <- (# peek RTSStats, gc.slop_bytes) p + maxBytesSlop <- (# peek RTSStats, max_slop_bytes) p + peakMegabytesAllocated <- do + bytes <- (# peek RTSStats, max_mem_in_use_bytes ) p + return (bytes `quot` (1024*1024)) + mblocksAllocated <- do + bytes <- (# peek RTSStats, gc.mem_in_use_bytes) p + return (bytes `quot` (1024*1024)) + mutatorCpuSeconds <- nsToSecs <$> (# peek RTSStats, mutator_cpu_ns) p + mutatorWallSeconds <- + nsToSecs <$> (# peek RTSStats, mutator_elapsed_ns) p + gcCpuSeconds <- nsToSecs <$> (# peek RTSStats, gc_cpu_ns) p + gcWallSeconds <- nsToSecs <$> (# peek RTSStats, gc_elapsed_ns) p + cpuSeconds <- nsToSecs <$> (# peek RTSStats, cpu_ns) p + wallSeconds <- nsToSecs <$> (# peek RTSStats, elapsed_ns) p + parTotBytesCopied <- (# peek RTSStats, par_copied_bytes) p + parMaxBytesCopied <- (# peek RTSStats, cumulative_par_max_copied_bytes) p return GCStats { .. } -{- - --- Nontrivial to implement: TaskStats needs arbitrarily large --- amounts of memory, spark stats wants to use SparkCounters --- but that needs a new rts/ header. - -data TaskStats = TaskStats - { taskMutCpuSeconds :: Int64 - , taskMutWallSeconds :: Int64 - , taskGcCpuSeconds :: Int64 - , taskGcWallSeconds :: Int64 - } deriving (Show, Read) - -data SparkStats = SparkStats - { sparksCreated :: Int64 - , sparksDud :: Int64 - , sparksOverflowed :: Int64 - , sparksConverted :: Int64 - , sparksGcd :: Int64 - , sparksFizzled :: Int64 - } deriving (Show, Read) - --- We also could get per-generation stats, which requires a --- non-constant but at runtime known about of memory. +nsToSecs :: Int64 -> Double +nsToSecs ns = fromIntegral ns / (# const TIME_RESOLUTION) --} +{-# DEPRECATED getGCStatsEnabled + "use getRTSStatsEnabled instead. This will be removed in GHC 8.4.1" #-} +getGCStatsEnabled :: IO Bool +getGCStatsEnabled = getRTSStatsEnabled diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index e50159642d..4f618df33d 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -567,8 +567,8 @@ SymI_HasProto(getOrSetSystemTimerThreadEventManagerStore) \ SymI_HasProto(getOrSetSystemTimerThreadIOManagerThreadStore) \ SymI_HasProto(getOrSetLibHSghcFastStringTable) \ - SymI_HasProto(getGCStats) \ - SymI_HasProto(getGCStatsEnabled) \ + SymI_HasProto(getRTSStats) \ + SymI_HasProto(getRTSStatsEnabled) \ SymI_HasProto(genericRaise) \ SymI_HasProto(getProgArgv) \ SymI_HasProto(getFullProgArgv) \ diff --git a/rts/Stats.c b/rts/Stats.c index 8fe9adf304..95511f2c35 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -20,25 +20,15 @@ #include "sm/GCThread.h" #include "sm/BlockAlloc.h" -/* huh? */ -#define BIG_STRING_LEN 512 - #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION) static Time start_init_cpu, start_init_elapsed, end_init_cpu, end_init_elapsed, start_exit_cpu, start_exit_elapsed, + start_exit_gc_elapsed, start_exit_gc_cpu, end_exit_cpu, end_exit_elapsed; -static Time GC_tot_cpu = 0; - -static StgWord64 GC_tot_alloc = 0; -static StgWord64 GC_tot_copied = 0; - -static StgWord64 GC_par_max_copied = 0; -static StgWord64 GC_par_tot_copied = 0; - #ifdef PROFILING static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time @@ -53,13 +43,13 @@ static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time #define PROF_VAL(x) 0 #endif -// current = current as of last GC -static W_ current_residency = 0; // in words; for stats only -static W_ max_residency = 0; -static W_ cumulative_residency = 0; -static W_ residency_samples = 0; // for stats only -static W_ current_slop = 0; -static W_ max_slop = 0; +// +// All the stats! +// +// This is where we accumulate all the stats during execution, and it's also +// in a convenient form that we can copy over to a caller of getRTSStats(). +// +static RTSStats stats; static W_ GC_end_faults = 0; @@ -87,7 +77,7 @@ Time stat_getElapsedTime(void) double mut_user_time_until( Time t ) { - return TimeToSecondsDbl(t - GC_tot_cpu); + return TimeToSecondsDbl(t - stats.gc_cpu_ns); // heapCensus() time is included in GC_tot_cpu, so we don't need // to subtract it here. } @@ -108,7 +98,7 @@ mut_user_time( void ) static double mut_user_time_during_RP( void ) { - return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time); + return TimeToSecondsDbl(RP_start_time - stats.gc_cpu_ns - RP_tot_time); } #endif /* PROFILING */ @@ -127,15 +117,11 @@ initStats0(void) start_exit_cpu = 0; start_exit_elapsed = 0; + start_exit_gc_cpu = 0; + start_exit_gc_elapsed = 0; end_exit_cpu = 0; end_exit_elapsed = 0; - GC_tot_alloc = 0; - GC_tot_copied = 0; - GC_par_max_copied = 0; - GC_par_tot_copied = 0; - GC_tot_cpu = 0; - #ifdef PROFILING RP_start_time = 0; RP_tot_time = 0; @@ -148,12 +134,43 @@ initStats0(void) HCe_tot_time = 0; #endif - max_residency = 0; - cumulative_residency = 0; - residency_samples = 0; - max_slop = 0; - GC_end_faults = 0; + + stats = (RTSStats) { + .gcs = 0, + .major_gcs = 0, + .allocated_bytes = 0, + .max_live_bytes = 0, + .max_large_objects_bytes = 0, + .max_compact_bytes = 0, + .max_slop_bytes = 0, + .max_mem_in_use_bytes = 0, + .cumulative_live_bytes = 0, + .copied_bytes = 0, + .par_copied_bytes = 0, + .cumulative_par_max_copied_bytes = 0, + .mutator_cpu_ns = 0, + .mutator_elapsed_ns = 0, + .gc_cpu_ns = 0, + .gc_elapsed_ns = 0, + .cpu_ns = 0, + .elapsed_ns = 0, + .gc = { + .gen = 0, + .threads = 0, + .allocated_bytes = 0, + .live_bytes = 0, + .large_objects_bytes = 0, + .compact_bytes = 0, + .slop_bytes = 0, + .mem_in_use_bytes = 0, + .copied_bytes = 0, + .par_max_copied_bytes = 0, + .sync_elapsed_ns = 0, + .cpu_ns = 0, + .elapsed_ns = 0 + } + }; } /* --------------------------------------------------------------------------- @@ -214,6 +231,8 @@ void stat_startExit(void) { getProcessTimes(&start_exit_cpu, &start_exit_elapsed); + start_exit_gc_elapsed = stats.gc_elapsed_ns; + start_exit_gc_cpu = stats.gc_cpu_ns; } void @@ -264,17 +283,82 @@ stat_startGC (Capability *cap, gc_thread *gct) void stat_endGC (Capability *cap, gc_thread *gct, W_ live, W_ copied, W_ slop, uint32_t gen, - uint32_t par_n_threads, W_ par_max_copied, W_ par_tot_copied) + uint32_t par_n_threads, W_ par_max_copied) { - W_ tot_alloc; - W_ alloc; - if (RtsFlags.GcFlags.giveStats != NO_GC_STATS || rtsConfig.gcDoneHook != NULL || - RtsFlags.ProfFlags.doHeapProfile) - // heap profiling needs GC_tot_time + RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time { - Time cpu, elapsed, gc_cpu, gc_elapsed, gc_sync_elapsed; + // ------------------------------------------------- + // Collect all the stats about this GC in stats.gc + + stats.gc.gen = gen; + stats.gc.threads = par_n_threads; + + uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_); + + // allocated since the last GC + stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes; + + stats.gc.live_bytes = live * sizeof(W_); + stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_); + stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_); + stats.gc.slop_bytes = slop * sizeof(W_); + stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE; + stats.gc.copied_bytes = copied * sizeof(W_); + stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_); + + Time current_cpu, current_elapsed; + getProcessTimes(¤t_cpu, ¤t_elapsed); + stats.cpu_ns = current_cpu - start_init_cpu; + stats.elapsed_ns = current_elapsed - start_init_elapsed; + + stats.gc.sync_elapsed_ns = + gct->gc_start_elapsed - gct->gc_sync_start_elapsed; + stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed; + stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu; + + // ------------------------------------------------- + // Update the cumulative stats + + stats.gcs++; + stats.allocated_bytes = tot_alloc_bytes; + stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE; + + GC_coll_cpu[gen] += stats.gc.cpu_ns; + GC_coll_elapsed[gen] += stats.gc.elapsed_ns; + if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) { + GC_coll_max_pause[gen] = stats.gc.elapsed_ns; + } + + stats.copied_bytes += stats.gc.copied_bytes; + if (par_n_threads > 1) { + stats.par_copied_bytes += stats.gc.copied_bytes; + stats.cumulative_par_max_copied_bytes += + stats.gc.par_max_copied_bytes; + } + stats.gc_cpu_ns += stats.gc.cpu_ns; + stats.gc_elapsed_ns += stats.gc.elapsed_ns; + + if (gen == RtsFlags.GcFlags.generations-1) { // major GC? + stats.major_gcs++; + if (stats.gc.live_bytes > stats.max_live_bytes) { + stats.max_live_bytes = stats.gc.live_bytes; + } + if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) { + stats.max_large_objects_bytes = stats.gc.large_objects_bytes; + } + if (stats.gc.compact_bytes > stats.max_compact_bytes) { + stats.max_compact_bytes = stats.gc.compact_bytes; + } + if (stats.gc.slop_bytes > stats.max_slop_bytes) { + stats.max_slop_bytes = stats.gc.slop_bytes; + } + stats.cumulative_live_bytes += stats.gc.live_bytes; + } + + // ------------------------------------------------- + // Emit events to the event log // Has to be emitted while all caps stopped for GC, but before GC_END. // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents @@ -285,51 +369,45 @@ stat_endGC (Capability *cap, gc_thread *gct, // Emitted before GC_END on all caps, which simplifies tools code. traceEventGcStats(cap, CAPSET_HEAP_DEFAULT, - gen, - copied * sizeof(W_), - slop * sizeof(W_), + stats.gc.gen, + stats.gc.copied_bytes, + stats.gc.slop_bytes, /* current loss due to fragmentation */ (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks) * BLOCK_SIZE, par_n_threads, - par_max_copied * sizeof(W_), - par_tot_copied * sizeof(W_)); - - getProcessTimes(&cpu, &elapsed); + stats.gc.par_max_copied_bytes, + stats.gc.copied_bytes); // Post EVENT_GC_END with the same timestamp as used for stats // (though converted from Time=StgInt64 to EventTimestamp=StgWord64). // Here, as opposed to other places, the event is emitted on the cap // that initiates the GC and external tools expect it to have the same // timestamp as used in +RTS -s calculcations. - traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed)); - - gc_sync_elapsed = gct->gc_start_elapsed - gct->gc_sync_start_elapsed; - gc_elapsed = elapsed - gct->gc_start_elapsed; - gc_cpu = cpu - gct->gc_start_cpu; + traceEventGcEndAtT(cap, TimeToNS(stats.elapsed_ns)); - /* For the moment we calculate both per-HEC and total allocation. - * There is thus redundancy here, but for the moment we will calculate - * it both the old and new way and assert they're the same. - * When we're sure it's working OK then we can simplify things. - */ - tot_alloc = calcTotalAllocated(); + if (gen == RtsFlags.GcFlags.generations-1) { // major GC? + traceEventHeapLive(cap, + CAPSET_HEAP_DEFAULT, + stats.gc.live_bytes); + } - // allocated since the last GC - alloc = tot_alloc - GC_tot_alloc; - GC_tot_alloc = tot_alloc; + // ------------------------------------------------- + // Print GC stats to stdout or a file (+RTS -S/-s) if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) { W_ faults = getPageFaults(); statsPrintf("%9" FMT_Word " %9" FMT_Word " %9" FMT_Word, - alloc*sizeof(W_), copied*sizeof(W_), - live*sizeof(W_)); - statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" FMT_Word " %4" FMT_Word " (Gen: %2d)\n", - TimeToSecondsDbl(gc_cpu), - TimeToSecondsDbl(gc_elapsed), - TimeToSecondsDbl(cpu), - TimeToSecondsDbl(elapsed - start_init_elapsed), + stats.gc.allocated_bytes, stats.gc.copied_bytes, + stats.gc.live_bytes); + + statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" + FMT_Word " %4" FMT_Word " (Gen: %2d)\n", + TimeToSecondsDbl(stats.gc.cpu_ns), + TimeToSecondsDbl(stats.gc.elapsed_ns), + TimeToSecondsDbl(stats.cpu_ns), + TimeToSecondsDbl(stats.elapsed_ns), faults - gct->gc_start_faults, gct->gc_start_faults - GC_end_faults, gen); @@ -340,47 +418,12 @@ stat_endGC (Capability *cap, gc_thread *gct, if (rtsConfig.gcDoneHook != NULL) { - rtsConfig.gcDoneHook(gen, - alloc*sizeof(W_), - live*sizeof(W_), - copied*sizeof(W_), - par_max_copied * sizeof(W_), - mblocks_allocated * BLOCKS_PER_MBLOCK - * BLOCK_SIZE, - slop * sizeof(W_), - TimeToNS(gc_sync_elapsed), - TimeToNS(gc_elapsed), - TimeToNS(gc_cpu)); - } - - GC_coll_cpu[gen] += gc_cpu; - GC_coll_elapsed[gen] += gc_elapsed; - if (GC_coll_max_pause[gen] < gc_elapsed) { - GC_coll_max_pause[gen] = gc_elapsed; + rtsConfig.gcDoneHook(&stats.gc); } - GC_tot_copied += (StgWord64) copied; - GC_par_max_copied += (StgWord64) par_max_copied; - GC_par_tot_copied += (StgWord64) par_tot_copied; - GC_tot_cpu += gc_cpu; - traceEventHeapSize(cap, CAPSET_HEAP_DEFAULT, - mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_)); - - if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */ - if (live > max_residency) { - max_residency = live; - } - current_residency = live; - residency_samples++; - cumulative_residency += live; - traceEventHeapLive(cap, - CAPSET_HEAP_DEFAULT, - live * sizeof(W_)); - } - - if (slop > max_slop) max_slop = slop; + mblocks_allocated * MBLOCK_SIZE); } } @@ -502,8 +545,13 @@ StgInt TOTAL_CALLS=1; statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \ SLOW_CALLS_##arity * 100.0/TOTAL_CALLS) -static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; } -static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; } +STATIC_INLINE Time get_init_cpu(void) { + return end_init_cpu - start_init_cpu; +} + +STATIC_INLINE Time get_init_elapsed(void) { + return end_init_elapsed - start_init_elapsed; +} void @@ -518,81 +566,86 @@ stat_exit (void) Time mut_elapsed = 0; Time exit_cpu = 0; Time exit_elapsed = 0; - W_ tot_alloc; - W_ alloc; + Time exit_gc_cpu = 0; + Time exit_gc_elapsed = 0; if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { - char temp[BIG_STRING_LEN]; + char temp[512]; Time tot_cpu; Time tot_elapsed; - uint32_t i, g, total_collections = 0; + uint32_t g; getProcessTimes( &tot_cpu, &tot_elapsed ); + tot_cpu -= start_init_cpu; tot_elapsed -= start_init_elapsed; - tot_alloc = calcTotalAllocated(); + uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_); // allocated since the last GC - alloc = tot_alloc - GC_tot_alloc; - GC_tot_alloc = tot_alloc; - - /* Count total garbage collections */ - for (g = 0; g < RtsFlags.GcFlags.generations; g++) - total_collections += generations[g].collections; + stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes; + stats.allocated_bytes = tot_alloc_bytes; /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */ - if (tot_cpu == 0.0) tot_cpu = 1; - if (tot_elapsed == 0.0) tot_elapsed = 1; + if (tot_cpu <= 0) tot_cpu = 1; + if (tot_elapsed <= 0) tot_elapsed = 1; if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) { - statsPrintf("%9" FMT_Word " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", ""); + statsPrintf("%9" FMT_Word " %9.9s %9.9s", + (W_)stats.gc.allocated_bytes, "", ""); statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0); } - for (i = 0; i < RtsFlags.GcFlags.generations; i++) { - gc_cpu += GC_coll_cpu[i]; - gc_elapsed += GC_coll_elapsed[i]; - } - // heapCensus() is called by the GC, so RP and HC time are // included in the GC stats. We therefore subtract them to // obtain the actual GC cpu time. - gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time); - gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time); + gc_cpu = stats.gc_cpu_ns - PROF_VAL(RP_tot_time + HC_tot_time); + gc_elapsed = stats.gc_elapsed_ns - PROF_VAL(RPe_tot_time + HCe_tot_time); init_cpu = get_init_cpu(); init_elapsed = get_init_elapsed(); - exit_cpu = end_exit_cpu - start_exit_cpu; - exit_elapsed = end_exit_elapsed - start_exit_elapsed; + // We do a GC during the EXIT phase. We'll attribute the cost of that + // to GC instead of EXIT, so carefully subtract it from the EXIT time. + exit_gc_cpu = stats.gc_cpu_ns - start_exit_gc_cpu; + exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed; + exit_cpu = end_exit_cpu - start_exit_cpu - exit_gc_cpu; + exit_elapsed = end_exit_elapsed - start_exit_elapsed - exit_gc_elapsed; - mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed; + mut_elapsed = start_exit_elapsed - end_init_elapsed - + (gc_elapsed - exit_gc_elapsed); - mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu + mut_cpu = start_exit_cpu - end_init_cpu - (gc_cpu - exit_gc_cpu) - PROF_VAL(RP_tot_time + HC_tot_time); if (mut_cpu < 0) { mut_cpu = 0; } + // The subdivision of runtime into INIT/EXIT/GC/MUT is just adding and + // subtracting, so the parts should add up to the total exactly. Note + // that tot_elapsed is captured a tiny bit later than end_exit_elapsed, + // so we don't use it here. + ASSERT(init_elapsed + mut_elapsed + gc_elapsed + exit_elapsed + == end_exit_elapsed - start_init_elapsed); + + if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) { - showStgWord64(GC_tot_alloc*sizeof(W_), - temp, true/*commas*/); + showStgWord64(stats.allocated_bytes, temp, true/*commas*/); statsPrintf("%16s bytes allocated in the heap\n", temp); - showStgWord64(GC_tot_copied*sizeof(W_), - temp, true/*commas*/); + showStgWord64(stats.copied_bytes, temp, true/*commas*/); statsPrintf("%16s bytes copied during GC\n", temp); - if ( residency_samples > 0 ) { - showStgWord64(max_residency*sizeof(W_), - temp, true/*commas*/); - statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n", - temp, residency_samples); + if ( stats.major_gcs > 0 ) { + showStgWord64(stats.max_live_bytes, temp, true/*commas*/); + statsPrintf("%16s bytes maximum residency (%" FMT_Word32 + " sample(s))\n", + temp, stats.major_gcs); } - showStgWord64(max_slop*sizeof(W_), temp, true/*commas*/); + showStgWord64(stats.max_slop_bytes, temp, true/*commas*/); statsPrintf("%16s bytes maximum slop\n", temp); - statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n", + statsPrintf("%16" FMT_SizeT " MB total memory in use (%" + FMT_SizeT " MB lost due to fragmentation)\n\n", (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)), (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_))); @@ -613,7 +666,7 @@ stat_exit (void) #if defined(THREADED_RTS) if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) { statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n", - 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1) + 100 * (((double)stats.par_copied_bytes / (double)stats.cumulative_par_max_copied_bytes) - 1) / (n_capabilities - 1) ); } @@ -675,7 +728,8 @@ stat_exit (void) showStgWord64(0, temp, true/*commas*/); } else { showStgWord64( - (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)), + (StgWord64)((double)stats.allocated_bytes / + TimeToSecondsDbl(mut_cpu)), temp, true/*commas*/); } @@ -689,14 +743,6 @@ stat_exit (void) PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100 / TimeToSecondsDbl(tot_elapsed)); - /* - TICK_PRINT(1); - TICK_PRINT(2); - REPORT(TOTAL_CALLS); - TICK_PRINT_TOT(1); - TICK_PRINT_TOT(2); - */ - #if defined(THREADED_RTS) && defined(PROF_SPIN) { uint32_t g; @@ -732,13 +778,13 @@ stat_exit (void) fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n"; } /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */ - statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_)); + statsPrintf(fmt1, stats.allocated_bytes); statsPrintf(fmt2, - total_collections, - residency_samples == 0 ? 0 : - cumulative_residency*sizeof(W_)/residency_samples, - max_residency*sizeof(W_), - residency_samples, + stats.gcs, + stats.major_gcs == 0 ? 0 : + stats.cumulative_live_bytes/stats.major_gcs, + stats.max_live_bytes, + stats.major_gcs, (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)), TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed), TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed), @@ -833,81 +879,32 @@ statDescribeGens(void) each compilation and expression evaluation. -------------------------------------------------------------------------- */ -extern HsInt64 getAllocations( void ) -{ return (HsInt64)GC_tot_alloc * sizeof(W_); } - -/* EZY: I'm not convinced I got all the casting right. */ +uint64_t getAllocations( void ) +{ + return stats.allocated_bytes; +} -extern bool getGCStatsEnabled( void ) +int getRTSStatsEnabled( void ) { return RtsFlags.GcFlags.giveStats != NO_GC_STATS; } -extern void getGCStats( GCStats *s ) +void getRTSStats( RTSStats *s ) { - uint32_t total_collections = 0; - uint32_t g; - Time gc_cpu = 0; - Time gc_elapsed = 0; Time current_elapsed = 0; Time current_cpu = 0; - getProcessTimes(¤t_cpu, ¤t_elapsed); + *s = stats; - /* EZY: static inline'ify these */ - for (g = 0; g < RtsFlags.GcFlags.generations; g++) - total_collections += generations[g].collections; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gc_cpu += GC_coll_cpu[g]; - gc_elapsed += GC_coll_elapsed[g]; - } + getProcessTimes(¤t_cpu, ¤t_elapsed); + s->cpu_ns = current_cpu - end_init_cpu; + s->elapsed_ns = current_elapsed - end_init_elapsed; - s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_); - s->num_gcs = total_collections; - s->num_byte_usage_samples = residency_samples; - s->max_bytes_used = max_residency*sizeof(W_); - s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_); - s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)); - s->mblocks_allocated = (StgWord64)mblocks_allocated; - s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_); - s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_); - s->current_bytes_used = current_residency*(StgWord64)sizeof(W_); - s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_); - /* - s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu()); - s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed()); - */ - s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time)); - s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed); - s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu); - s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed); - /* EZY: Being consistent with incremental output, but maybe should also discount init */ - s->cpu_seconds = TimeToSecondsDbl(current_cpu); - s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed); - s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_); - s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_); + s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns - + PROF_VAL(RP_tot_time + HC_tot_time); + s->mutator_elapsed_ns = current_elapsed - end_init_elapsed - + stats.gc_elapsed_ns; } -// extern void getTaskStats( TaskStats **s ) {} -#if 0 -extern void getSparkStats( SparkCounters *s ) { - uint32_t i; - s->created = 0; - s->dud = 0; - s->overflowed = 0; - s->converted = 0; - s->gcd = 0; - s->fizzled = 0; - for (i = 0; i < n_capabilities; i++) { - s->created += capabilities[i]->spark_stats.created; - s->dud += capabilities[i]->spark_stats.dud; - s->overflowed+= capabilities[i]->spark_stats.overflowed; - s->converted += capabilities[i]->spark_stats.converted; - s->gcd += capabilities[i]->spark_stats.gcd; - s->fizzled += capabilities[i]->spark_stats.fizzled; - } -} -#endif /* ----------------------------------------------------------------------------- Dumping stuff in the stats file, or via the debug message interface diff --git a/rts/Stats.h b/rts/Stats.h index 1d95170f2c..537f5695a5 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -31,7 +31,7 @@ void stat_startGCSync(struct gc_thread_ *_gct); void stat_startGC(Capability *cap, struct gc_thread_ *_gct); void stat_endGC (Capability *cap, struct gc_thread_ *_gct, W_ live, W_ copied, W_ slop, uint32_t gen, uint32_t n_gc_threads, - W_ par_max_copied, W_ par_tot_copied); + W_ par_max_copied); #ifdef PROFILING void stat_startRP(void); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index ea80d6dec1..c41c9791dc 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -187,7 +187,7 @@ GarbageCollect (uint32_t collect_gen, { bdescr *bd; generation *gen; - StgWord live_blocks, live_words, par_max_copied, par_tot_copied; + StgWord live_blocks, live_words, par_max_copied; #if defined(THREADED_RTS) gc_thread *saved_gct; #endif @@ -459,7 +459,6 @@ GarbageCollect (uint32_t collect_gen, copied = 0; par_max_copied = 0; - par_tot_copied = 0; { uint32_t i; for (i=0; i < n_gc_threads; i++) { @@ -474,10 +473,8 @@ GarbageCollect (uint32_t collect_gen, copied += gc_threads[i]->copied; par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); } - par_tot_copied = copied; if (n_gc_threads == 1) { par_max_copied = 0; - par_tot_copied = 0; } } @@ -773,7 +770,7 @@ GarbageCollect (uint32_t collect_gen, // ok, GC over: tell the stats department what happened. stat_endGC(cap, gct, live_words, copied, live_blocks * BLOCK_SIZE_W - live_words /* slop */, - N, n_gc_threads, par_max_copied, par_tot_copied); + N, n_gc_threads, par_max_copied); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index ad2519588b..70a5621806 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -1293,6 +1293,28 @@ calcNeeded (bool force_major, memcount *blocks_needed) return N; } +StgWord calcTotalLargeObjectsW (void) +{ + uint32_t g; + StgWord totalW = 0; + + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + totalW += generations[g].n_large_words; + } + return totalW; +} + +StgWord calcTotalCompactW (void) +{ + uint32_t g; + StgWord totalW = 0; + + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + totalW += generations[g].n_compact_blocks * BLOCK_SIZE_W; + } + return totalW; +} + /* ---------------------------------------------------------------------------- Executable memory diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index a4e928a3eb..69901fd6ed 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -100,7 +100,6 @@ StgWord calcTotalAllocated (void); Stats 'n' DEBUG stuff -------------------------------------------------------------------------- */ -StgWord countLargeAllocated (void); StgWord countOccupied (bdescr *bd); StgWord calcNeeded (bool force_major, StgWord *blocks_needed); @@ -110,6 +109,9 @@ StgWord gcThreadLiveBlocks (uint32_t i, uint32_t g); StgWord genLiveWords (generation *gen); StgWord genLiveBlocks (generation *gen); +StgWord calcTotalLargeObjectsW (void); +StgWord calcTotalCompactW (void); + /* ---------------------------------------------------------------------------- Storage manager internal APIs and globals ------------------------------------------------------------------------- */ |