summaryrefslogtreecommitdiff
path: root/rts/Stats.c
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-11-25 16:45:43 +0000
committerSimon Marlow <marlowsd@gmail.com>2016-12-06 15:25:50 +0000
commit24e6594cc7890babe69b8ba122d171affabad2d1 (patch)
tree0efef02a3e03787e9e6ee9822cb20efc7d48fec5 /rts/Stats.c
parenteec02ab7c8433465cc8d6be0a8889e7c6a222fb0 (diff)
downloadhaskell-24e6594cc7890babe69b8ba122d171affabad2d1.tar.gz
Overhaul GC stats
Summary: Visible API changes: * The C struct `GCDetails` gives the stats about a single GC. This is passed to the `gcDone()` callback if one is set via the RtsConfig. (previously we just passed a collection of values, so this is more extensible, at the expense of breaking the existing API) * `RTSStats` gives cumulative stats since the start of the program, and includes the `GCDetails` for the most recent GC. This struct can be obtained via `getRTSStats()` (the old `getGCStats()` has been removed, and `getGCStatsEnabled()` has been renamed to `getRTSStatsEnabled()`) Improvements: * The per-GC stats and cumulative stats are now cleanly separated. * Inside the RTS we have a top-level `RTSStats` struct to keep all our stats in, previously this was just a collection of strangely-named variables. This struct is mostly just copied in `getRTSStats()`, so the implementation of that function is a lot shorter. * Types are more consistent. We use a uint64_t byte count for all memory values, and Time for all time values. * Names are more consistent. We use a suffix `_bytes` for all byte counts and `_ns` for all time values. * We now collect information about the amount of memory in large objects and compact objects in `GCDetails`. (the latter was the reason I started doing this patch but it seems to have ballooned a bit!) * I fixed a bug in the calculation of the elapsed MUT time, and added an ASSERT to stop the calculations going wrong in the future. For now I kept the Haskell API in `GHC.Stats` the same, by impedence-matching with the new API. We could either break that API and make it match the C API more closely, or we could add a new API and deprecate the old one. Opinions welcome. This stuff is very easy to get wrong, and it's hard to test. Reviews welcome! Test Plan: manual testing validate Reviewers: bgamari, niteria, austin, ezyang, hvr, erikd, rwbarton, Phyx Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2756
Diffstat (limited to 'rts/Stats.c')
-rw-r--r--rts/Stats.c437
1 files changed, 217 insertions, 220 deletions
diff --git a/rts/Stats.c b/rts/Stats.c
index 8fe9adf304..95511f2c35 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -20,25 +20,15 @@
#include "sm/GCThread.h"
#include "sm/BlockAlloc.h"
-/* huh? */
-#define BIG_STRING_LEN 512
-
#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
static Time
start_init_cpu, start_init_elapsed,
end_init_cpu, end_init_elapsed,
start_exit_cpu, start_exit_elapsed,
+ start_exit_gc_elapsed, start_exit_gc_cpu,
end_exit_cpu, end_exit_elapsed;
-static Time GC_tot_cpu = 0;
-
-static StgWord64 GC_tot_alloc = 0;
-static StgWord64 GC_tot_copied = 0;
-
-static StgWord64 GC_par_max_copied = 0;
-static StgWord64 GC_par_tot_copied = 0;
-
#ifdef PROFILING
static Time RP_start_time = 0, RP_tot_time = 0; // retainer prof user time
static Time RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time
@@ -53,13 +43,13 @@ static Time HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
#define PROF_VAL(x) 0
#endif
-// current = current as of last GC
-static W_ current_residency = 0; // in words; for stats only
-static W_ max_residency = 0;
-static W_ cumulative_residency = 0;
-static W_ residency_samples = 0; // for stats only
-static W_ current_slop = 0;
-static W_ max_slop = 0;
+//
+// All the stats!
+//
+// This is where we accumulate all the stats during execution, and it's also
+// in a convenient form that we can copy over to a caller of getRTSStats().
+//
+static RTSStats stats;
static W_ GC_end_faults = 0;
@@ -87,7 +77,7 @@ Time stat_getElapsedTime(void)
double
mut_user_time_until( Time t )
{
- return TimeToSecondsDbl(t - GC_tot_cpu);
+ return TimeToSecondsDbl(t - stats.gc_cpu_ns);
// heapCensus() time is included in GC_tot_cpu, so we don't need
// to subtract it here.
}
@@ -108,7 +98,7 @@ mut_user_time( void )
static double
mut_user_time_during_RP( void )
{
- return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
+ return TimeToSecondsDbl(RP_start_time - stats.gc_cpu_ns - RP_tot_time);
}
#endif /* PROFILING */
@@ -127,15 +117,11 @@ initStats0(void)
start_exit_cpu = 0;
start_exit_elapsed = 0;
+ start_exit_gc_cpu = 0;
+ start_exit_gc_elapsed = 0;
end_exit_cpu = 0;
end_exit_elapsed = 0;
- GC_tot_alloc = 0;
- GC_tot_copied = 0;
- GC_par_max_copied = 0;
- GC_par_tot_copied = 0;
- GC_tot_cpu = 0;
-
#ifdef PROFILING
RP_start_time = 0;
RP_tot_time = 0;
@@ -148,12 +134,43 @@ initStats0(void)
HCe_tot_time = 0;
#endif
- max_residency = 0;
- cumulative_residency = 0;
- residency_samples = 0;
- max_slop = 0;
-
GC_end_faults = 0;
+
+ stats = (RTSStats) {
+ .gcs = 0,
+ .major_gcs = 0,
+ .allocated_bytes = 0,
+ .max_live_bytes = 0,
+ .max_large_objects_bytes = 0,
+ .max_compact_bytes = 0,
+ .max_slop_bytes = 0,
+ .max_mem_in_use_bytes = 0,
+ .cumulative_live_bytes = 0,
+ .copied_bytes = 0,
+ .par_copied_bytes = 0,
+ .cumulative_par_max_copied_bytes = 0,
+ .mutator_cpu_ns = 0,
+ .mutator_elapsed_ns = 0,
+ .gc_cpu_ns = 0,
+ .gc_elapsed_ns = 0,
+ .cpu_ns = 0,
+ .elapsed_ns = 0,
+ .gc = {
+ .gen = 0,
+ .threads = 0,
+ .allocated_bytes = 0,
+ .live_bytes = 0,
+ .large_objects_bytes = 0,
+ .compact_bytes = 0,
+ .slop_bytes = 0,
+ .mem_in_use_bytes = 0,
+ .copied_bytes = 0,
+ .par_max_copied_bytes = 0,
+ .sync_elapsed_ns = 0,
+ .cpu_ns = 0,
+ .elapsed_ns = 0
+ }
+ };
}
/* ---------------------------------------------------------------------------
@@ -214,6 +231,8 @@ void
stat_startExit(void)
{
getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
+ start_exit_gc_elapsed = stats.gc_elapsed_ns;
+ start_exit_gc_cpu = stats.gc_cpu_ns;
}
void
@@ -264,17 +283,82 @@ stat_startGC (Capability *cap, gc_thread *gct)
void
stat_endGC (Capability *cap, gc_thread *gct,
W_ live, W_ copied, W_ slop, uint32_t gen,
- uint32_t par_n_threads, W_ par_max_copied, W_ par_tot_copied)
+ uint32_t par_n_threads, W_ par_max_copied)
{
- W_ tot_alloc;
- W_ alloc;
-
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
rtsConfig.gcDoneHook != NULL ||
- RtsFlags.ProfFlags.doHeapProfile)
- // heap profiling needs GC_tot_time
+ RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
{
- Time cpu, elapsed, gc_cpu, gc_elapsed, gc_sync_elapsed;
+ // -------------------------------------------------
+ // Collect all the stats about this GC in stats.gc
+
+ stats.gc.gen = gen;
+ stats.gc.threads = par_n_threads;
+
+ uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+
+ // allocated since the last GC
+ stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+
+ stats.gc.live_bytes = live * sizeof(W_);
+ stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
+ stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
+ stats.gc.slop_bytes = slop * sizeof(W_);
+ stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
+ stats.gc.copied_bytes = copied * sizeof(W_);
+ stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
+
+ Time current_cpu, current_elapsed;
+ getProcessTimes(&current_cpu, &current_elapsed);
+ stats.cpu_ns = current_cpu - start_init_cpu;
+ stats.elapsed_ns = current_elapsed - start_init_elapsed;
+
+ stats.gc.sync_elapsed_ns =
+ gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
+ stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed;
+ stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
+
+ // -------------------------------------------------
+ // Update the cumulative stats
+
+ stats.gcs++;
+ stats.allocated_bytes = tot_alloc_bytes;
+ stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
+
+ GC_coll_cpu[gen] += stats.gc.cpu_ns;
+ GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
+ if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
+ GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
+ }
+
+ stats.copied_bytes += stats.gc.copied_bytes;
+ if (par_n_threads > 1) {
+ stats.par_copied_bytes += stats.gc.copied_bytes;
+ stats.cumulative_par_max_copied_bytes +=
+ stats.gc.par_max_copied_bytes;
+ }
+ stats.gc_cpu_ns += stats.gc.cpu_ns;
+ stats.gc_elapsed_ns += stats.gc.elapsed_ns;
+
+ if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
+ stats.major_gcs++;
+ if (stats.gc.live_bytes > stats.max_live_bytes) {
+ stats.max_live_bytes = stats.gc.live_bytes;
+ }
+ if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
+ stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
+ }
+ if (stats.gc.compact_bytes > stats.max_compact_bytes) {
+ stats.max_compact_bytes = stats.gc.compact_bytes;
+ }
+ if (stats.gc.slop_bytes > stats.max_slop_bytes) {
+ stats.max_slop_bytes = stats.gc.slop_bytes;
+ }
+ stats.cumulative_live_bytes += stats.gc.live_bytes;
+ }
+
+ // -------------------------------------------------
+ // Emit events to the event log
// Has to be emitted while all caps stopped for GC, but before GC_END.
// See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
@@ -285,51 +369,45 @@ stat_endGC (Capability *cap, gc_thread *gct,
// Emitted before GC_END on all caps, which simplifies tools code.
traceEventGcStats(cap,
CAPSET_HEAP_DEFAULT,
- gen,
- copied * sizeof(W_),
- slop * sizeof(W_),
+ stats.gc.gen,
+ stats.gc.copied_bytes,
+ stats.gc.slop_bytes,
/* current loss due to fragmentation */
(mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
* BLOCK_SIZE,
par_n_threads,
- par_max_copied * sizeof(W_),
- par_tot_copied * sizeof(W_));
-
- getProcessTimes(&cpu, &elapsed);
+ stats.gc.par_max_copied_bytes,
+ stats.gc.copied_bytes);
// Post EVENT_GC_END with the same timestamp as used for stats
// (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
// Here, as opposed to other places, the event is emitted on the cap
// that initiates the GC and external tools expect it to have the same
// timestamp as used in +RTS -s calculcations.
- traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
-
- gc_sync_elapsed = gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
- gc_elapsed = elapsed - gct->gc_start_elapsed;
- gc_cpu = cpu - gct->gc_start_cpu;
+ traceEventGcEndAtT(cap, TimeToNS(stats.elapsed_ns));
- /* For the moment we calculate both per-HEC and total allocation.
- * There is thus redundancy here, but for the moment we will calculate
- * it both the old and new way and assert they're the same.
- * When we're sure it's working OK then we can simplify things.
- */
- tot_alloc = calcTotalAllocated();
+ if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
+ traceEventHeapLive(cap,
+ CAPSET_HEAP_DEFAULT,
+ stats.gc.live_bytes);
+ }
- // allocated since the last GC
- alloc = tot_alloc - GC_tot_alloc;
- GC_tot_alloc = tot_alloc;
+ // -------------------------------------------------
+ // Print GC stats to stdout or a file (+RTS -S/-s)
if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
W_ faults = getPageFaults();
statsPrintf("%9" FMT_Word " %9" FMT_Word " %9" FMT_Word,
- alloc*sizeof(W_), copied*sizeof(W_),
- live*sizeof(W_));
- statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
- TimeToSecondsDbl(gc_cpu),
- TimeToSecondsDbl(gc_elapsed),
- TimeToSecondsDbl(cpu),
- TimeToSecondsDbl(elapsed - start_init_elapsed),
+ stats.gc.allocated_bytes, stats.gc.copied_bytes,
+ stats.gc.live_bytes);
+
+ statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4"
+ FMT_Word " %4" FMT_Word " (Gen: %2d)\n",
+ TimeToSecondsDbl(stats.gc.cpu_ns),
+ TimeToSecondsDbl(stats.gc.elapsed_ns),
+ TimeToSecondsDbl(stats.cpu_ns),
+ TimeToSecondsDbl(stats.elapsed_ns),
faults - gct->gc_start_faults,
gct->gc_start_faults - GC_end_faults,
gen);
@@ -340,47 +418,12 @@ stat_endGC (Capability *cap, gc_thread *gct,
if (rtsConfig.gcDoneHook != NULL) {
- rtsConfig.gcDoneHook(gen,
- alloc*sizeof(W_),
- live*sizeof(W_),
- copied*sizeof(W_),
- par_max_copied * sizeof(W_),
- mblocks_allocated * BLOCKS_PER_MBLOCK
- * BLOCK_SIZE,
- slop * sizeof(W_),
- TimeToNS(gc_sync_elapsed),
- TimeToNS(gc_elapsed),
- TimeToNS(gc_cpu));
- }
-
- GC_coll_cpu[gen] += gc_cpu;
- GC_coll_elapsed[gen] += gc_elapsed;
- if (GC_coll_max_pause[gen] < gc_elapsed) {
- GC_coll_max_pause[gen] = gc_elapsed;
+ rtsConfig.gcDoneHook(&stats.gc);
}
- GC_tot_copied += (StgWord64) copied;
- GC_par_max_copied += (StgWord64) par_max_copied;
- GC_par_tot_copied += (StgWord64) par_tot_copied;
- GC_tot_cpu += gc_cpu;
-
traceEventHeapSize(cap,
CAPSET_HEAP_DEFAULT,
- mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
-
- if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
- if (live > max_residency) {
- max_residency = live;
- }
- current_residency = live;
- residency_samples++;
- cumulative_residency += live;
- traceEventHeapLive(cap,
- CAPSET_HEAP_DEFAULT,
- live * sizeof(W_));
- }
-
- if (slop > max_slop) max_slop = slop;
+ mblocks_allocated * MBLOCK_SIZE);
}
}
@@ -502,8 +545,13 @@ StgInt TOTAL_CALLS=1;
statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
-static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
-static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
+STATIC_INLINE Time get_init_cpu(void) {
+ return end_init_cpu - start_init_cpu;
+}
+
+STATIC_INLINE Time get_init_elapsed(void) {
+ return end_init_elapsed - start_init_elapsed;
+}
void
@@ -518,81 +566,86 @@ stat_exit (void)
Time mut_elapsed = 0;
Time exit_cpu = 0;
Time exit_elapsed = 0;
- W_ tot_alloc;
- W_ alloc;
+ Time exit_gc_cpu = 0;
+ Time exit_gc_elapsed = 0;
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
- char temp[BIG_STRING_LEN];
+ char temp[512];
Time tot_cpu;
Time tot_elapsed;
- uint32_t i, g, total_collections = 0;
+ uint32_t g;
getProcessTimes( &tot_cpu, &tot_elapsed );
+ tot_cpu -= start_init_cpu;
tot_elapsed -= start_init_elapsed;
- tot_alloc = calcTotalAllocated();
+ uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
// allocated since the last GC
- alloc = tot_alloc - GC_tot_alloc;
- GC_tot_alloc = tot_alloc;
-
- /* Count total garbage collections */
- for (g = 0; g < RtsFlags.GcFlags.generations; g++)
- total_collections += generations[g].collections;
+ stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+ stats.allocated_bytes = tot_alloc_bytes;
/* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
- if (tot_cpu == 0.0) tot_cpu = 1;
- if (tot_elapsed == 0.0) tot_elapsed = 1;
+ if (tot_cpu <= 0) tot_cpu = 1;
+ if (tot_elapsed <= 0) tot_elapsed = 1;
if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
- statsPrintf("%9" FMT_Word " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", "");
+ statsPrintf("%9" FMT_Word " %9.9s %9.9s",
+ (W_)stats.gc.allocated_bytes, "", "");
statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
}
- for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
- gc_cpu += GC_coll_cpu[i];
- gc_elapsed += GC_coll_elapsed[i];
- }
-
// heapCensus() is called by the GC, so RP and HC time are
// included in the GC stats. We therefore subtract them to
// obtain the actual GC cpu time.
- gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time);
- gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
+ gc_cpu = stats.gc_cpu_ns - PROF_VAL(RP_tot_time + HC_tot_time);
+ gc_elapsed = stats.gc_elapsed_ns - PROF_VAL(RPe_tot_time + HCe_tot_time);
init_cpu = get_init_cpu();
init_elapsed = get_init_elapsed();
- exit_cpu = end_exit_cpu - start_exit_cpu;
- exit_elapsed = end_exit_elapsed - start_exit_elapsed;
+ // We do a GC during the EXIT phase. We'll attribute the cost of that
+ // to GC instead of EXIT, so carefully subtract it from the EXIT time.
+ exit_gc_cpu = stats.gc_cpu_ns - start_exit_gc_cpu;
+ exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed;
+ exit_cpu = end_exit_cpu - start_exit_cpu - exit_gc_cpu;
+ exit_elapsed = end_exit_elapsed - start_exit_elapsed - exit_gc_elapsed;
- mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
+ mut_elapsed = start_exit_elapsed - end_init_elapsed -
+ (gc_elapsed - exit_gc_elapsed);
- mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
+ mut_cpu = start_exit_cpu - end_init_cpu - (gc_cpu - exit_gc_cpu)
- PROF_VAL(RP_tot_time + HC_tot_time);
if (mut_cpu < 0) { mut_cpu = 0; }
+ // The subdivision of runtime into INIT/EXIT/GC/MUT is just adding and
+ // subtracting, so the parts should add up to the total exactly. Note
+ // that tot_elapsed is captured a tiny bit later than end_exit_elapsed,
+ // so we don't use it here.
+ ASSERT(init_elapsed + mut_elapsed + gc_elapsed + exit_elapsed
+ == end_exit_elapsed - start_init_elapsed);
+
+
if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
- showStgWord64(GC_tot_alloc*sizeof(W_),
- temp, true/*commas*/);
+ showStgWord64(stats.allocated_bytes, temp, true/*commas*/);
statsPrintf("%16s bytes allocated in the heap\n", temp);
- showStgWord64(GC_tot_copied*sizeof(W_),
- temp, true/*commas*/);
+ showStgWord64(stats.copied_bytes, temp, true/*commas*/);
statsPrintf("%16s bytes copied during GC\n", temp);
- if ( residency_samples > 0 ) {
- showStgWord64(max_residency*sizeof(W_),
- temp, true/*commas*/);
- statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n",
- temp, residency_samples);
+ if ( stats.major_gcs > 0 ) {
+ showStgWord64(stats.max_live_bytes, temp, true/*commas*/);
+ statsPrintf("%16s bytes maximum residency (%" FMT_Word32
+ " sample(s))\n",
+ temp, stats.major_gcs);
}
- showStgWord64(max_slop*sizeof(W_), temp, true/*commas*/);
+ showStgWord64(stats.max_slop_bytes, temp, true/*commas*/);
statsPrintf("%16s bytes maximum slop\n", temp);
- statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
+ statsPrintf("%16" FMT_SizeT " MB total memory in use (%"
+ FMT_SizeT " MB lost due to fragmentation)\n\n",
(size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
(size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
@@ -613,7 +666,7 @@ stat_exit (void)
#if defined(THREADED_RTS)
if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
- 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
+ 100 * (((double)stats.par_copied_bytes / (double)stats.cumulative_par_max_copied_bytes) - 1)
/ (n_capabilities - 1)
);
}
@@ -675,7 +728,8 @@ stat_exit (void)
showStgWord64(0, temp, true/*commas*/);
} else {
showStgWord64(
- (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
+ (StgWord64)((double)stats.allocated_bytes /
+ TimeToSecondsDbl(mut_cpu)),
temp, true/*commas*/);
}
@@ -689,14 +743,6 @@ stat_exit (void)
PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
/ TimeToSecondsDbl(tot_elapsed));
- /*
- TICK_PRINT(1);
- TICK_PRINT(2);
- REPORT(TOTAL_CALLS);
- TICK_PRINT_TOT(1);
- TICK_PRINT_TOT(2);
- */
-
#if defined(THREADED_RTS) && defined(PROF_SPIN)
{
uint32_t g;
@@ -732,13 +778,13 @@ stat_exit (void)
fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n";
}
/* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
- statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
+ statsPrintf(fmt1, stats.allocated_bytes);
statsPrintf(fmt2,
- total_collections,
- residency_samples == 0 ? 0 :
- cumulative_residency*sizeof(W_)/residency_samples,
- max_residency*sizeof(W_),
- residency_samples,
+ stats.gcs,
+ stats.major_gcs == 0 ? 0 :
+ stats.cumulative_live_bytes/stats.major_gcs,
+ stats.max_live_bytes,
+ stats.major_gcs,
(unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
@@ -833,81 +879,32 @@ statDescribeGens(void)
each compilation and expression evaluation.
-------------------------------------------------------------------------- */
-extern HsInt64 getAllocations( void )
-{ return (HsInt64)GC_tot_alloc * sizeof(W_); }
-
-/* EZY: I'm not convinced I got all the casting right. */
+uint64_t getAllocations( void )
+{
+ return stats.allocated_bytes;
+}
-extern bool getGCStatsEnabled( void )
+int getRTSStatsEnabled( void )
{
return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
}
-extern void getGCStats( GCStats *s )
+void getRTSStats( RTSStats *s )
{
- uint32_t total_collections = 0;
- uint32_t g;
- Time gc_cpu = 0;
- Time gc_elapsed = 0;
Time current_elapsed = 0;
Time current_cpu = 0;
- getProcessTimes(&current_cpu, &current_elapsed);
+ *s = stats;
- /* EZY: static inline'ify these */
- for (g = 0; g < RtsFlags.GcFlags.generations; g++)
- total_collections += generations[g].collections;
-
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- gc_cpu += GC_coll_cpu[g];
- gc_elapsed += GC_coll_elapsed[g];
- }
+ getProcessTimes(&current_cpu, &current_elapsed);
+ s->cpu_ns = current_cpu - end_init_cpu;
+ s->elapsed_ns = current_elapsed - end_init_elapsed;
- s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
- s->num_gcs = total_collections;
- s->num_byte_usage_samples = residency_samples;
- s->max_bytes_used = max_residency*sizeof(W_);
- s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
- s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
- s->mblocks_allocated = (StgWord64)mblocks_allocated;
- s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
- s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
- s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
- s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
- /*
- s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
- s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
- */
- s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
- s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
- s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
- s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
- /* EZY: Being consistent with incremental output, but maybe should also discount init */
- s->cpu_seconds = TimeToSecondsDbl(current_cpu);
- s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
- s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
- s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
+ s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns -
+ PROF_VAL(RP_tot_time + HC_tot_time);
+ s->mutator_elapsed_ns = current_elapsed - end_init_elapsed -
+ stats.gc_elapsed_ns;
}
-// extern void getTaskStats( TaskStats **s ) {}
-#if 0
-extern void getSparkStats( SparkCounters *s ) {
- uint32_t i;
- s->created = 0;
- s->dud = 0;
- s->overflowed = 0;
- s->converted = 0;
- s->gcd = 0;
- s->fizzled = 0;
- for (i = 0; i < n_capabilities; i++) {
- s->created += capabilities[i]->spark_stats.created;
- s->dud += capabilities[i]->spark_stats.dud;
- s->overflowed+= capabilities[i]->spark_stats.overflowed;
- s->converted += capabilities[i]->spark_stats.converted;
- s->gcd += capabilities[i]->spark_stats.gcd;
- s->fizzled += capabilities[i]->spark_stats.fizzled;
- }
-}
-#endif
/* -----------------------------------------------------------------------------
Dumping stuff in the stats file, or via the debug message interface