Overhaul GC stats

Summary: Visible API changes: * The C struct `GCDetails` gives the stats about a single GC. This is passed to the `gcDone()` callback if one is set via the RtsConfig. (previously we just passed a collection of values, so this is more extensible, at the expense of breaking the existing API) * `RTSStats` gives cumulative stats since the start of the program, and includes the `GCDetails` for the most recent GC. This struct can be obtained via `getRTSStats()` (the old `getGCStats()` has been removed, and `getGCStatsEnabled()` has been renamed to `getRTSStatsEnabled()`) Improvements: * The per-GC stats and cumulative stats are now cleanly separated. * Inside the RTS we have a top-level `RTSStats` struct to keep all our stats in, previously this was just a collection of strangely-named variables. This struct is mostly just copied in `getRTSStats()`, so the implementation of that function is a lot shorter. * Types are more consistent. We use a uint64_t byte count for all memory values, and Time for all time values. * Names are more consistent. We use a suffix `_bytes` for all byte counts and `_ns` for all time values. * We now collect information about the amount of memory in large objects and compact objects in `GCDetails`. (the latter was the reason I started doing this patch but it seems to have ballooned a bit!) * I fixed a bug in the calculation of the elapsed MUT time, and added an ASSERT to stop the calculations going wrong in the future. For now I kept the Haskell API in `GHC.Stats` the same, by impedence-matching with the new API. We could either break that API and make it match the C API more closely, or we could add a new API and deprecate the old one. Opinions welcome. This stuff is very easy to get wrong, and it's hard to test. Reviews welcome! Test Plan: manual testing validate Reviewers: bgamari, niteria, austin, ezyang, hvr, erikd, rwbarton, Phyx Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2756
author: Simon Marlow <marlowsd@gmail.com> 2016-11-25 16:45:43 +0000
committer: Simon Marlow <marlowsd@gmail.com> 2016-12-06 15:25:50 +0000
commit: 24e6594cc7890babe69b8ba122d171affabad2d1 (patch)
tree: 0efef02a3e03787e9e6ee9822cb20efc7d48fec5 /rts/Stats.c
parent: eec02ab7c8433465cc8d6be0a8889e7c6a222fb0 (diff)
download: haskell-24e6594cc7890babe69b8ba122d171affabad2d1.tar.gz
1 files changed, 217 insertions, 220 deletions
diff --git a/rts/Stats.c b/rts/Stats.c
index 8fe9adf304..95511f2c35 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -20,25 +20,15 @@
 #include "sm/GCThread.h"
 #include "sm/BlockAlloc.h"
 
-/* huh? */
-#define BIG_STRING_LEN              512
-
 #define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
 
 static Time
     start_init_cpu, start_init_elapsed,
     end_init_cpu,   end_init_elapsed,
     start_exit_cpu, start_exit_elapsed,
+    start_exit_gc_elapsed, start_exit_gc_cpu,
     end_exit_cpu,   end_exit_elapsed;
 
-static Time GC_tot_cpu  = 0;
-
-static StgWord64 GC_tot_alloc      = 0;
-static StgWord64 GC_tot_copied     = 0;
-
-static StgWord64 GC_par_max_copied = 0;
-static StgWord64 GC_par_tot_copied = 0;
-
 #ifdef PROFILING
 static Time RP_start_time  = 0, RP_tot_time  = 0;  // retainer prof user time
 static Time RPe_start_time = 0, RPe_tot_time = 0;  // retainer prof elap time
@@ -53,13 +43,13 @@ static Time HCe_start_time, HCe_tot_time = 0;   // heap census prof elap time
 #define PROF_VAL(x)   0
 #endif
 
-// current = current as of last GC
-static W_ current_residency = 0; // in words; for stats only
-static W_ max_residency     = 0;
-static W_ cumulative_residency = 0;
-static W_ residency_samples = 0; // for stats only
-static W_ current_slop      = 0;
-static W_ max_slop          = 0;
+//
+// All the stats!
+//
+// This is where we accumulate all the stats during execution, and it's also
+// in a convenient form that we can copy over to a caller of getRTSStats().
+//
+static RTSStats stats;
 
 static W_ GC_end_faults = 0;
 
@@ -87,7 +77,7 @@ Time stat_getElapsedTime(void)
 double
 mut_user_time_until( Time t )
 {
-    return TimeToSecondsDbl(t - GC_tot_cpu);
+    return TimeToSecondsDbl(t - stats.gc_cpu_ns);
     // heapCensus() time is included in GC_tot_cpu, so we don't need
     // to subtract it here.
 }
@@ -108,7 +98,7 @@ mut_user_time( void )
 static double
 mut_user_time_during_RP( void )
 {
-    return TimeToSecondsDbl(RP_start_time - GC_tot_cpu - RP_tot_time);
+    return TimeToSecondsDbl(RP_start_time - stats.gc_cpu_ns - RP_tot_time);
 }
 
 #endif /* PROFILING */
@@ -127,15 +117,11 @@ initStats0(void)
 
     start_exit_cpu    = 0;
     start_exit_elapsed = 0;
+    start_exit_gc_cpu    = 0;
+    start_exit_gc_elapsed = 0;
     end_exit_cpu     = 0;
     end_exit_elapsed  = 0;
 
-    GC_tot_alloc     = 0;
-    GC_tot_copied    = 0;
-    GC_par_max_copied = 0;
-    GC_par_tot_copied = 0;
-    GC_tot_cpu  = 0;
-
 #ifdef PROFILING
     RP_start_time  = 0;
     RP_tot_time  = 0;
@@ -148,12 +134,43 @@ initStats0(void)
     HCe_tot_time = 0;
 #endif
 
-    max_residency = 0;
-    cumulative_residency = 0;
-    residency_samples = 0;
-    max_slop = 0;
-
     GC_end_faults = 0;
+
+    stats = (RTSStats) {
+        .gcs = 0,
+        .major_gcs = 0,
+        .allocated_bytes = 0,
+        .max_live_bytes = 0,
+        .max_large_objects_bytes = 0,
+        .max_compact_bytes = 0,
+        .max_slop_bytes = 0,
+        .max_mem_in_use_bytes = 0,
+        .cumulative_live_bytes = 0,
+        .copied_bytes = 0,
+        .par_copied_bytes = 0,
+        .cumulative_par_max_copied_bytes = 0,
+        .mutator_cpu_ns = 0,
+        .mutator_elapsed_ns = 0,
+        .gc_cpu_ns = 0,
+        .gc_elapsed_ns = 0,
+        .cpu_ns = 0,
+        .elapsed_ns = 0,
+        .gc = {
+            .gen = 0,
+            .threads = 0,
+            .allocated_bytes = 0,
+            .live_bytes = 0,
+            .large_objects_bytes = 0,
+            .compact_bytes = 0,
+            .slop_bytes = 0,
+            .mem_in_use_bytes = 0,
+            .copied_bytes = 0,
+            .par_max_copied_bytes = 0,
+            .sync_elapsed_ns = 0,
+            .cpu_ns = 0,
+            .elapsed_ns = 0
+        }
+    };
 }
 
 /* ---------------------------------------------------------------------------
@@ -214,6 +231,8 @@ void
 stat_startExit(void)
 {
     getProcessTimes(&start_exit_cpu, &start_exit_elapsed);
+    start_exit_gc_elapsed = stats.gc_elapsed_ns;
+    start_exit_gc_cpu = stats.gc_cpu_ns;
 }
 
 void
@@ -264,17 +283,82 @@ stat_startGC (Capability *cap, gc_thread *gct)
 void
 stat_endGC (Capability *cap, gc_thread *gct,
             W_ live, W_ copied, W_ slop, uint32_t gen,
-            uint32_t par_n_threads, W_ par_max_copied, W_ par_tot_copied)
+            uint32_t par_n_threads, W_ par_max_copied)
 {
-    W_ tot_alloc;
-    W_ alloc;
-
     if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
         rtsConfig.gcDoneHook != NULL ||
-        RtsFlags.ProfFlags.doHeapProfile)
-        // heap profiling needs GC_tot_time
+        RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time
     {
-        Time cpu, elapsed, gc_cpu, gc_elapsed, gc_sync_elapsed;
+        // -------------------------------------------------
+        // Collect all the stats about this GC in stats.gc
+
+        stats.gc.gen = gen;
+        stats.gc.threads = par_n_threads;
+
+        uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
+
+        // allocated since the last GC
+        stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+
+        stats.gc.live_bytes = live * sizeof(W_);
+        stats.gc.large_objects_bytes = calcTotalLargeObjectsW() * sizeof(W_);
+        stats.gc.compact_bytes = calcTotalCompactW() * sizeof(W_);
+        stats.gc.slop_bytes = slop * sizeof(W_);
+        stats.gc.mem_in_use_bytes = mblocks_allocated * MBLOCK_SIZE;
+        stats.gc.copied_bytes = copied * sizeof(W_);
+        stats.gc.par_max_copied_bytes = par_max_copied * sizeof(W_);
+
+        Time current_cpu, current_elapsed;
+        getProcessTimes(&current_cpu, &current_elapsed);
+        stats.cpu_ns = current_cpu - start_init_cpu;
+        stats.elapsed_ns = current_elapsed - start_init_elapsed;
+
+        stats.gc.sync_elapsed_ns =
+          gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
+        stats.gc.elapsed_ns = current_elapsed - gct->gc_start_elapsed;
+        stats.gc.cpu_ns = current_cpu - gct->gc_start_cpu;
+
+        // -------------------------------------------------
+        // Update the cumulative stats
+
+        stats.gcs++;
+        stats.allocated_bytes = tot_alloc_bytes;
+        stats.max_mem_in_use_bytes = peak_mblocks_allocated * MBLOCK_SIZE;
+
+        GC_coll_cpu[gen] += stats.gc.cpu_ns;
+        GC_coll_elapsed[gen] += stats.gc.elapsed_ns;
+        if (GC_coll_max_pause[gen] < stats.gc.elapsed_ns) {
+            GC_coll_max_pause[gen] = stats.gc.elapsed_ns;
+        }
+
+        stats.copied_bytes += stats.gc.copied_bytes;
+        if (par_n_threads > 1) {
+            stats.par_copied_bytes += stats.gc.copied_bytes;
+            stats.cumulative_par_max_copied_bytes +=
+                stats.gc.par_max_copied_bytes;
+        }
+        stats.gc_cpu_ns += stats.gc.cpu_ns;
+        stats.gc_elapsed_ns += stats.gc.elapsed_ns;
+
+        if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
+            stats.major_gcs++;
+            if (stats.gc.live_bytes > stats.max_live_bytes) {
+                stats.max_live_bytes = stats.gc.live_bytes;
+            }
+            if (stats.gc.large_objects_bytes > stats.max_large_objects_bytes) {
+                stats.max_large_objects_bytes = stats.gc.large_objects_bytes;
+            }
+            if (stats.gc.compact_bytes > stats.max_compact_bytes) {
+                stats.max_compact_bytes = stats.gc.compact_bytes;
+            }
+            if (stats.gc.slop_bytes > stats.max_slop_bytes) {
+                stats.max_slop_bytes = stats.gc.slop_bytes;
+            }
+            stats.cumulative_live_bytes += stats.gc.live_bytes;
+        }
+
+        // -------------------------------------------------
+        // Emit events to the event log
 
         // Has to be emitted while all caps stopped for GC, but before GC_END.
         // See trac.haskell.org/ThreadScope/wiki/RTSsummaryEvents
@@ -285,51 +369,45 @@ stat_endGC (Capability *cap, gc_thread *gct,
         // Emitted before GC_END on all caps, which simplifies tools code.
         traceEventGcStats(cap,
                           CAPSET_HEAP_DEFAULT,
-                          gen,
-                          copied * sizeof(W_),
-                          slop   * sizeof(W_),
+                          stats.gc.gen,
+                          stats.gc.copied_bytes,
+                          stats.gc.slop_bytes,
                           /* current loss due to fragmentation */
                           (mblocks_allocated * BLOCKS_PER_MBLOCK - n_alloc_blocks)
                                  * BLOCK_SIZE,
                           par_n_threads,
-                          par_max_copied * sizeof(W_),
-                          par_tot_copied * sizeof(W_));
-
-        getProcessTimes(&cpu, &elapsed);
+                          stats.gc.par_max_copied_bytes,
+                          stats.gc.copied_bytes);
 
         // Post EVENT_GC_END with the same timestamp as used for stats
         // (though converted from Time=StgInt64 to EventTimestamp=StgWord64).
         // Here, as opposed to other places, the event is emitted on the cap
         // that initiates the GC and external tools expect it to have the same
         // timestamp as used in +RTS -s calculcations.
-        traceEventGcEndAtT(cap, TimeToNS(elapsed - start_init_elapsed));
-
-        gc_sync_elapsed = gct->gc_start_elapsed - gct->gc_sync_start_elapsed;
-        gc_elapsed = elapsed - gct->gc_start_elapsed;
-        gc_cpu = cpu - gct->gc_start_cpu;
+        traceEventGcEndAtT(cap, TimeToNS(stats.elapsed_ns));
 
-        /* For the moment we calculate both per-HEC and total allocation.
-         * There is thus redundancy here, but for the moment we will calculate
-         * it both the old and new way and assert they're the same.
-         * When we're sure it's working OK then we can simplify things.
-         */
-        tot_alloc = calcTotalAllocated();
+        if (gen == RtsFlags.GcFlags.generations-1) { // major GC?
+            traceEventHeapLive(cap,
+                               CAPSET_HEAP_DEFAULT,
+                               stats.gc.live_bytes);
+        }
 
-        // allocated since the last GC
-        alloc = tot_alloc - GC_tot_alloc;
-        GC_tot_alloc = tot_alloc;
+        // -------------------------------------------------
+        // Print GC stats to stdout or a file (+RTS -S/-s)
 
         if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
             W_ faults = getPageFaults();
 
             statsPrintf("%9" FMT_Word " %9" FMT_Word " %9" FMT_Word,
-                    alloc*sizeof(W_), copied*sizeof(W_),
-                        live*sizeof(W_));
-            statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4" FMT_Word " %4" FMT_Word "  (Gen: %2d)\n",
-                    TimeToSecondsDbl(gc_cpu),
-                    TimeToSecondsDbl(gc_elapsed),
-                    TimeToSecondsDbl(cpu),
-                    TimeToSecondsDbl(elapsed - start_init_elapsed),
+                        stats.gc.allocated_bytes, stats.gc.copied_bytes,
+                        stats.gc.live_bytes);
+
+            statsPrintf(" %6.3f %6.3f %8.3f %8.3f %4"
+                        FMT_Word " %4" FMT_Word "  (Gen: %2d)\n",
+                    TimeToSecondsDbl(stats.gc.cpu_ns),
+                    TimeToSecondsDbl(stats.gc.elapsed_ns),
+                    TimeToSecondsDbl(stats.cpu_ns),
+                    TimeToSecondsDbl(stats.elapsed_ns),
                     faults - gct->gc_start_faults,
                         gct->gc_start_faults - GC_end_faults,
                     gen);
@@ -340,47 +418,12 @@ stat_endGC (Capability *cap, gc_thread *gct,
 
 
         if (rtsConfig.gcDoneHook != NULL) {
-            rtsConfig.gcDoneHook(gen,
-                                 alloc*sizeof(W_),
-                                 live*sizeof(W_),
-                                 copied*sizeof(W_),
-                                 par_max_copied * sizeof(W_),
-                                 mblocks_allocated * BLOCKS_PER_MBLOCK
-                                   * BLOCK_SIZE,
-                                 slop   * sizeof(W_),
-                                 TimeToNS(gc_sync_elapsed),
-                                 TimeToNS(gc_elapsed),
-                                 TimeToNS(gc_cpu));
-        }
-
-        GC_coll_cpu[gen] += gc_cpu;
-        GC_coll_elapsed[gen] += gc_elapsed;
-        if (GC_coll_max_pause[gen] < gc_elapsed) {
-            GC_coll_max_pause[gen] = gc_elapsed;
+            rtsConfig.gcDoneHook(&stats.gc);
         }
 
-        GC_tot_copied += (StgWord64) copied;
-        GC_par_max_copied += (StgWord64) par_max_copied;
-        GC_par_tot_copied += (StgWord64) par_tot_copied;
-        GC_tot_cpu   += gc_cpu;
-
         traceEventHeapSize(cap,
                            CAPSET_HEAP_DEFAULT,
-                           mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_));
-
-        if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
-            if (live > max_residency) {
-                max_residency = live;
-            }
-            current_residency = live;
-            residency_samples++;
-            cumulative_residency += live;
-            traceEventHeapLive(cap,
-                               CAPSET_HEAP_DEFAULT,
-                               live * sizeof(W_));
-        }
-
-        if (slop > max_slop) max_slop = slop;
+                           mblocks_allocated * MBLOCK_SIZE);
     }
 }
 
@@ -502,8 +545,13 @@ StgInt TOTAL_CALLS=1;
   statsPrintf("  (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
               SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
 
-static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; }
-static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; }
+STATIC_INLINE Time get_init_cpu(void) {
+    return end_init_cpu - start_init_cpu;
+}
+
+STATIC_INLINE Time get_init_elapsed(void) {
+    return end_init_elapsed - start_init_elapsed;
+}
 
 
 void
@@ -518,81 +566,86 @@ stat_exit (void)
     Time mut_elapsed = 0;
     Time exit_cpu = 0;
     Time exit_elapsed = 0;
-    W_ tot_alloc;
-    W_ alloc;
+    Time exit_gc_cpu = 0;
+    Time exit_gc_elapsed = 0;
 
     if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
 
-        char temp[BIG_STRING_LEN];
+        char temp[512];
         Time tot_cpu;
         Time tot_elapsed;
-        uint32_t i, g, total_collections = 0;
+        uint32_t g;
 
         getProcessTimes( &tot_cpu, &tot_elapsed );
+        tot_cpu -= start_init_cpu;
         tot_elapsed -= start_init_elapsed;
 
-        tot_alloc = calcTotalAllocated();
+        uint64_t tot_alloc_bytes = calcTotalAllocated() * sizeof(W_);
 
         // allocated since the last GC
-        alloc = tot_alloc - GC_tot_alloc;
-        GC_tot_alloc = tot_alloc;
-
-        /* Count total garbage collections */
-        for (g = 0; g < RtsFlags.GcFlags.generations; g++)
-            total_collections += generations[g].collections;
+        stats.gc.allocated_bytes = tot_alloc_bytes - stats.allocated_bytes;
+        stats.allocated_bytes = tot_alloc_bytes;
 
         /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */
-        if (tot_cpu  == 0.0)  tot_cpu = 1;
-        if (tot_elapsed == 0.0) tot_elapsed = 1;
+        if (tot_cpu <= 0)  tot_cpu = 1;
+        if (tot_elapsed <= 0) tot_elapsed = 1;
 
         if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
-            statsPrintf("%9" FMT_Word " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", "");
+            statsPrintf("%9" FMT_Word " %9.9s %9.9s",
+                        (W_)stats.gc.allocated_bytes, "", "");
             statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0);
         }
 
-        for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
-            gc_cpu     += GC_coll_cpu[i];
-            gc_elapsed += GC_coll_elapsed[i];
-        }
-
         // heapCensus() is called by the GC, so RP and HC time are
         // included in the GC stats.  We therefore subtract them to
         // obtain the actual GC cpu time.
-        gc_cpu     -= PROF_VAL(RP_tot_time + HC_tot_time);
-        gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time);
+        gc_cpu     = stats.gc_cpu_ns - PROF_VAL(RP_tot_time + HC_tot_time);
+        gc_elapsed = stats.gc_elapsed_ns - PROF_VAL(RPe_tot_time + HCe_tot_time);
 
         init_cpu     = get_init_cpu();
         init_elapsed = get_init_elapsed();
 
-        exit_cpu     = end_exit_cpu - start_exit_cpu;
-        exit_elapsed = end_exit_elapsed - start_exit_elapsed;
+        // We do a GC during the EXIT phase.  We'll attribute the cost of that
+        // to GC instead of EXIT, so carefully subtract it from the EXIT time.
+        exit_gc_cpu  = stats.gc_cpu_ns - start_exit_gc_cpu;
+        exit_gc_elapsed = stats.gc_elapsed_ns - start_exit_gc_elapsed;
+        exit_cpu     = end_exit_cpu - start_exit_cpu - exit_gc_cpu;
+        exit_elapsed = end_exit_elapsed - start_exit_elapsed - exit_gc_elapsed;
 
-        mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed;
+        mut_elapsed = start_exit_elapsed - end_init_elapsed -
+            (gc_elapsed - exit_gc_elapsed);
 
-        mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu
+        mut_cpu = start_exit_cpu - end_init_cpu - (gc_cpu - exit_gc_cpu)
             - PROF_VAL(RP_tot_time + HC_tot_time);
         if (mut_cpu < 0) { mut_cpu = 0; }
 
+        // The subdivision of runtime into INIT/EXIT/GC/MUT is just adding and
+        // subtracting, so the parts should add up to the total exactly.  Note
+        // that tot_elapsed is captured a tiny bit later than end_exit_elapsed,
+        // so we don't use it here.
+        ASSERT(init_elapsed + mut_elapsed + gc_elapsed + exit_elapsed
+               == end_exit_elapsed - start_init_elapsed);
+
+
         if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
-            showStgWord64(GC_tot_alloc*sizeof(W_),
-                                 temp, true/*commas*/);
+            showStgWord64(stats.allocated_bytes, temp, true/*commas*/);
             statsPrintf("%16s bytes allocated in the heap\n", temp);
 
-            showStgWord64(GC_tot_copied*sizeof(W_),
-                                 temp, true/*commas*/);
+            showStgWord64(stats.copied_bytes, temp, true/*commas*/);
             statsPrintf("%16s bytes copied during GC\n", temp);
 
-            if ( residency_samples > 0 ) {
-                showStgWord64(max_residency*sizeof(W_),
-                                     temp, true/*commas*/);
-                statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n",
-                        temp, residency_samples);
+            if ( stats.major_gcs > 0 ) {
+                showStgWord64(stats.max_live_bytes, temp, true/*commas*/);
+                statsPrintf("%16s bytes maximum residency (%" FMT_Word32
+                            " sample(s))\n",
+                            temp, stats.major_gcs);
             }
 
-            showStgWord64(max_slop*sizeof(W_), temp, true/*commas*/);
+            showStgWord64(stats.max_slop_bytes, temp, true/*commas*/);
             statsPrintf("%16s bytes maximum slop\n", temp);
 
-            statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n",
+            statsPrintf("%16" FMT_SizeT " MB total memory in use (%"
+                        FMT_SizeT " MB lost due to fragmentation)\n\n",
                         (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)),
                         (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
 
@@ -613,7 +666,7 @@ stat_exit (void)
 #if defined(THREADED_RTS)
             if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
                 statsPrintf("\n  Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n",
-                            100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1)
+                            100 * (((double)stats.par_copied_bytes / (double)stats.cumulative_par_max_copied_bytes) - 1)
                                 / (n_capabilities - 1)
                     );
             }
@@ -675,7 +728,8 @@ stat_exit (void)
                 showStgWord64(0, temp, true/*commas*/);
             } else {
                 showStgWord64(
-                    (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)),
+                    (StgWord64)((double)stats.allocated_bytes /
+                                TimeToSecondsDbl(mut_cpu)),
                     temp, true/*commas*/);
             }
 
@@ -689,14 +743,6 @@ stat_exit (void)
                                 PROF_VAL(RPe_tot_time + HCe_tot_time) - init_elapsed) * 100
                     / TimeToSecondsDbl(tot_elapsed));
 
-            /*
-            TICK_PRINT(1);
-            TICK_PRINT(2);
-            REPORT(TOTAL_CALLS);
-            TICK_PRINT_TOT(1);
-            TICK_PRINT_TOT(2);
-            */
-
 #if defined(THREADED_RTS) && defined(PROF_SPIN)
             {
                 uint32_t g;
@@ -732,13 +778,13 @@ stat_exit (void)
           fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n";
       }
           /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
-          statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_));
+          statsPrintf(fmt1, stats.allocated_bytes);
           statsPrintf(fmt2,
-                    total_collections,
-                    residency_samples == 0 ? 0 :
-                        cumulative_residency*sizeof(W_)/residency_samples,
-                    max_residency*sizeof(W_),
-                    residency_samples,
+                    stats.gcs,
+                    stats.major_gcs == 0 ? 0 :
+                        stats.cumulative_live_bytes/stats.major_gcs,
+                    stats.max_live_bytes,
+                    stats.major_gcs,
                     (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
                     TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed),
                     TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed),
@@ -833,81 +879,32 @@ statDescribeGens(void)
    each compilation and expression evaluation.
    -------------------------------------------------------------------------- */
 
-extern HsInt64 getAllocations( void )
-{ return (HsInt64)GC_tot_alloc * sizeof(W_); }
-
-/* EZY: I'm not convinced I got all the casting right. */
+uint64_t getAllocations( void )
+{
+    return stats.allocated_bytes;
+}
 
-extern bool getGCStatsEnabled( void )
+int getRTSStatsEnabled( void )
 {
     return RtsFlags.GcFlags.giveStats != NO_GC_STATS;
 }
 
-extern void getGCStats( GCStats *s )
+void getRTSStats( RTSStats *s )
 {
-    uint32_t total_collections = 0;
-    uint32_t g;
-    Time gc_cpu = 0;
-    Time gc_elapsed = 0;
     Time current_elapsed = 0;
     Time current_cpu = 0;
 
-    getProcessTimes(&current_cpu, &current_elapsed);
+    *s = stats;
 
-    /* EZY: static inline'ify these */
-    for (g = 0; g < RtsFlags.GcFlags.generations; g++)
-        total_collections += generations[g].collections;
-
-    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-        gc_cpu     += GC_coll_cpu[g];
-        gc_elapsed += GC_coll_elapsed[g];
-    }
+    getProcessTimes(&current_cpu, &current_elapsed);
+    s->cpu_ns = current_cpu - end_init_cpu;
+    s->elapsed_ns = current_elapsed - end_init_elapsed;
 
-    s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_);
-    s->num_gcs = total_collections;
-    s->num_byte_usage_samples = residency_samples;
-    s->max_bytes_used = max_residency*sizeof(W_);
-    s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_);
-    s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L));
-    s->mblocks_allocated = (StgWord64)mblocks_allocated;
-    s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_);
-    s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_);
-    s->current_bytes_used = current_residency*(StgWord64)sizeof(W_);
-    s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_);
-    /*
-    s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu());
-    s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed());
-    */
-    s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time));
-    s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed);
-    s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu);
-    s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed);
-    /* EZY: Being consistent with incremental output, but maybe should also discount init */
-    s->cpu_seconds = TimeToSecondsDbl(current_cpu);
-    s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
-    s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
-    s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
+    s->mutator_cpu_ns = current_cpu - end_init_cpu - stats.gc_cpu_ns -
+        PROF_VAL(RP_tot_time + HC_tot_time);
+    s->mutator_elapsed_ns = current_elapsed - end_init_elapsed -
+        stats.gc_elapsed_ns;
 }
-// extern void getTaskStats( TaskStats **s ) {}
-#if 0
-extern void getSparkStats( SparkCounters *s ) {
-    uint32_t i;
-    s->created = 0;
-    s->dud = 0;
-    s->overflowed = 0;
-    s->converted = 0;
-    s->gcd = 0;
-    s->fizzled = 0;
-    for (i = 0; i < n_capabilities; i++) {
-        s->created   += capabilities[i]->spark_stats.created;
-        s->dud       += capabilities[i]->spark_stats.dud;
-        s->overflowed+= capabilities[i]->spark_stats.overflowed;
-        s->converted += capabilities[i]->spark_stats.converted;
-        s->gcd       += capabilities[i]->spark_stats.gcd;
-        s->fizzled   += capabilities[i]->spark_stats.fizzled;
-    }
-}
-#endif
 
 /* -----------------------------------------------------------------------------
    Dumping stuff in the stats file, or via the debug message interface
author	Simon Marlow <marlowsd@gmail.com>	2016-11-25 16:45:43 +0000
committer	Simon Marlow <marlowsd@gmail.com>	2016-12-06 15:25:50 +0000
commit	24e6594cc7890babe69b8ba122d171affabad2d1 (patch)
tree	0efef02a3e03787e9e6ee9822cb20efc7d48fec5 /rts/Stats.c
parent	eec02ab7c8433465cc8d6be0a8889e7c6a222fb0 (diff)
download	haskell-24e6594cc7890babe69b8ba122d171affabad2d1.tar.gz