diff options
author | Simon Marlow <marlowsd@gmail.com> | 2013-02-14 10:06:44 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2013-02-14 10:06:53 +0000 |
commit | 65a0e1eb88fb48d085f8da498a7acc2fd345c2a8 (patch) | |
tree | 3d5c6489c1b51d085a9f8b313aae5daa3330bcf2 | |
parent | e5085db5d16f904f9307445fbafc206283f630c7 (diff) | |
download | haskell-65a0e1eb88fb48d085f8da498a7acc2fd345c2a8.tar.gz |
Simplify the allocation stats accounting
We were doing it in two different ways and asserting that the results
were the same. In most cases they were, but I found one case where
they weren't: the GC itself allocates some memory for running
finalizers, and this memory was accounted for one way but not the
other.
It was simpler to remove the old way of counting allocation that to
try to fix it up, so I did that.
-rw-r--r-- | includes/rts/storage/GC.h | 2 | ||||
-rw-r--r-- | rts/Stats.c | 71 | ||||
-rw-r--r-- | rts/Stats.h | 4 | ||||
-rw-r--r-- | rts/sm/GC.c | 33 | ||||
-rw-r--r-- | rts/sm/GCThread.h | 1 | ||||
-rw-r--r-- | rts/sm/Storage.c | 32 | ||||
-rw-r--r-- | rts/sm/Storage.h | 4 |
7 files changed, 67 insertions, 80 deletions
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index a5f4ed6f36..80f11d3ee1 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -77,7 +77,7 @@ typedef struct generation_ { memcount n_large_blocks; // no. of blocks used by large objs memcount n_large_words; // no. of words used by large objs memcount n_new_large_words; // words of new large objects - // (for allocation stats) + // (for doYouWantToGC()) memcount max_blocks; // max blocks diff --git a/rts/Stats.c b/rts/Stats.c index 6c8efd638d..3dc1ebe0fb 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -335,14 +335,38 @@ stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED) } /* ----------------------------------------------------------------------------- + * Calculate the total allocated memory since the start of the + * program. Also emits events reporting the per-cap allocation + * totals. + * -------------------------------------------------------------------------- */ + +static StgWord +calcTotalAllocated(void) +{ + W_ tot_alloc = 0; + W_ n; + for (n = 0; n < n_capabilities; n++) { + tot_alloc += capabilities[n].total_allocated; + traceEventHeapAllocated(&capabilities[n], + CAPSET_HEAP_DEFAULT, + capabilities[n].total_allocated * sizeof(W_)); + } + + return tot_alloc; +} + +/* ----------------------------------------------------------------------------- Called at the end of each GC -------------------------------------------------------------------------- */ void stat_endGC (Capability *cap, gc_thread *gct, - W_ alloc, W_ live, W_ copied, W_ slop, nat gen, + W_ live, W_ copied, W_ slop, nat gen, nat par_n_threads, W_ par_max_copied, W_ par_tot_copied) { + W_ tot_alloc; + W_ alloc; + if (RtsFlags.GcFlags.giveStats != NO_GC_STATS || RtsFlags.ProfFlags.doHeapProfile) // heap profiling needs GC_tot_time @@ -380,6 +404,17 @@ stat_endGC (Capability *cap, gc_thread *gct, gc_elapsed = elapsed - gct->gc_start_elapsed; gc_cpu = cpu - gct->gc_start_cpu; + /* For the moment we calculate both per-HEC and total allocation. + * There is thus redundancy here, but for the moment we will calculate + * it both the old and new way and assert they're the same. + * When we're sure it's working OK then we can simplify things. + */ + tot_alloc = calcTotalAllocated(); + + // allocated since the last GC + alloc = tot_alloc - GC_tot_alloc; + GC_tot_alloc = tot_alloc; + if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) { W_ faults = getPageFaults(); @@ -406,29 +441,10 @@ stat_endGC (Capability *cap, gc_thread *gct, } GC_tot_copied += (StgWord64) copied; - GC_tot_alloc += (StgWord64) alloc; GC_par_max_copied += (StgWord64) par_max_copied; GC_par_tot_copied += (StgWord64) par_tot_copied; GC_tot_cpu += gc_cpu; - /* For the moment we calculate both per-HEC and total allocation. - * There is thus redundancy here, but for the moment we will calculate - * it both the old and new way and assert they're the same. - * When we're sure it's working OK then we can simplify things. - * TODO: simplify calcAllocated and clearNurseries so they don't have - * to calculate the total - */ - { - W_ tot_alloc = 0; - W_ n; - for (n = 0; n < n_capabilities; n++) { - tot_alloc += capabilities[n].total_allocated; - traceEventHeapAllocated(&capabilities[n], - CAPSET_HEAP_DEFAULT, - capabilities[n].total_allocated * sizeof(W_)); - } - ASSERT(GC_tot_alloc == tot_alloc); - } traceEventHeapSize(cap, CAPSET_HEAP_DEFAULT, mblocks_allocated * MBLOCK_SIZE_W * sizeof(W_)); @@ -587,8 +603,9 @@ StgInt TOTAL_CALLS=1; static inline Time get_init_cpu(void) { return end_init_cpu - start_init_cpu; } static inline Time get_init_elapsed(void) { return end_init_elapsed - start_init_elapsed; } + void -stat_exit(int alloc) +stat_exit (void) { generation *gen; Time gc_cpu = 0; @@ -599,6 +616,8 @@ stat_exit(int alloc) Time mut_elapsed = 0; Time exit_cpu = 0; Time exit_elapsed = 0; + W_ tot_alloc; + W_ alloc; if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { @@ -610,13 +629,11 @@ stat_exit(int alloc) getProcessTimes( &tot_cpu, &tot_elapsed ); tot_elapsed -= start_init_elapsed; - GC_tot_alloc += alloc; + tot_alloc = calcTotalAllocated(); - for (i = 0; i < n_capabilities; i++) { - traceEventHeapAllocated(&capabilities[i], - CAPSET_HEAP_DEFAULT, - capabilities[i].total_allocated * sizeof(W_)); - } + // allocated since the last GC + alloc = tot_alloc - GC_tot_alloc; + GC_tot_alloc = tot_alloc; /* Count total garbage collections */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) diff --git a/rts/Stats.h b/rts/Stats.h index 0961f3faf1..9839e5cf2a 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -29,7 +29,7 @@ void stat_endInit(void); void stat_startGC(Capability *cap, struct gc_thread_ *_gct); void stat_endGC (Capability *cap, struct gc_thread_ *_gct, - W_ alloc, W_ live, W_ copied, W_ slop, nat gen, + W_ live, W_ copied, W_ slop, nat gen, nat n_gc_threads, W_ par_max_copied, W_ par_tot_copied); void stat_gcWorkerThreadStart (struct gc_thread_ *_gct); @@ -52,7 +52,7 @@ void stat_endHeapCensus(void); void stat_startExit(void); void stat_endExit(void); -void stat_exit(int alloc); +void stat_exit(void); void stat_workerStop(void); void initStats0(void); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index f4a479ec63..ea0e4030bd 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -156,7 +156,7 @@ static StgWord dec_running (void); static void wakeup_gc_threads (nat me); static void shutdown_gc_threads (nat me); static void collect_gct_blocks (void); -static StgWord collect_pinned_object_blocks (void); +static void collect_pinned_object_blocks (void); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -186,7 +186,7 @@ GarbageCollect (nat collect_gen, { bdescr *bd; generation *gen; - StgWord live_blocks, live_words, allocated, par_max_copied, par_tot_copied; + StgWord live_blocks, live_words, par_max_copied, par_tot_copied; #if defined(THREADED_RTS) gc_thread *saved_gct; #endif @@ -243,11 +243,6 @@ GarbageCollect (nat collect_gen, } #endif - /* Approximate how much we allocated. - * Todo: only when generating stats? - */ - allocated = countLargeAllocated(); /* don't count the nursery yet */ - /* Figure out which generation to collect */ N = collect_gen; @@ -304,7 +299,7 @@ GarbageCollect (nat collect_gen, // gather blocks allocated using allocatePinned() from each capability // and put them on the g0->large_object list. - allocated += collect_pinned_object_blocks(); + collect_pinned_object_blocks(); // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { @@ -419,7 +414,7 @@ GarbageCollect (nat collect_gen, } if (!DEBUG_IS_ON && n_gc_threads != 1) { - gct->allocated = clearNursery(cap); + clearNursery(cap); } shutdown_gc_threads(gct->thread_index); @@ -659,17 +654,14 @@ GarbageCollect (nat collect_gen, // Reset the nursery: make the blocks empty if (DEBUG_IS_ON || n_gc_threads == 1) { for (n = 0; n < n_capabilities; n++) { - allocated += clearNursery(&capabilities[n]); + clearNursery(&capabilities[n]); } } else { // When doing parallel GC, clearNursery() is called by the - // worker threads, and the value returned is stored in - // gct->allocated. + // worker threads for (n = 0; n < n_capabilities; n++) { if (gc_threads[n]->idle) { - allocated += clearNursery(&capabilities[n]); - } else { - allocated += gc_threads[n]->allocated; + clearNursery(&capabilities[n]); } } } @@ -781,7 +773,7 @@ GarbageCollect (nat collect_gen, #endif // ok, GC over: tell the stats department what happened. - stat_endGC(cap, gct, allocated, live_words, copied, + stat_endGC(cap, gct, live_words, copied, live_blocks * BLOCK_SIZE_W - live_words /* slop */, N, n_gc_threads, par_max_copied, par_tot_copied); @@ -1094,7 +1086,7 @@ gcWorkerThread (Capability *cap) scavenge_until_all_done(); if (!DEBUG_IS_ON) { - gct->allocated = clearNursery(cap); + clearNursery(cap); } #ifdef THREADED_RTS @@ -1439,17 +1431,15 @@ collect_gct_blocks (void) purposes. -------------------------------------------------------------------------- */ -static StgWord +static void collect_pinned_object_blocks (void) { nat n; bdescr *bd, *prev; - StgWord allocated = 0; for (n = 0; n < n_capabilities; n++) { prev = NULL; for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) { - allocated += bd->free - bd->start; prev = bd; } if (prev != NULL) { @@ -1461,8 +1451,6 @@ collect_pinned_object_blocks (void) capabilities[n].pinned_object_blocks = 0; } } - - return allocated; } /* ----------------------------------------------------------------------------- @@ -1480,7 +1468,6 @@ init_gc_thread (gc_thread *t) t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; - t->allocated = 0; t->copied = 0; t->scanned = 0; t->any_work = 0; diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index 59327bc20b..7aacb4eb51 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -176,7 +176,6 @@ typedef struct gc_thread_ { // ------------------- // stats - W_ allocated; // result of clearNursery() W_ copied; W_ scanned; W_ any_work; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 518ae0d0c8..f14b3b0c9d 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -240,8 +240,8 @@ void storageAddCapabilities (nat from, nat to) void exitStorage (void) { - W_ allocated = updateNurseriesStats(); - stat_exit(allocated); + updateNurseriesStats(); + stat_exit(); } void @@ -508,22 +508,18 @@ allocNurseries (nat from, nat to) assignNurseriesToCapabilities(from, to); } -W_ +void clearNursery (Capability *cap) { bdescr *bd; - W_ allocated = 0; for (bd = nurseries[cap->no].blocks; bd; bd = bd->link) { - allocated += (W_)(bd->free - bd->start); cap->total_allocated += (W_)(bd->free - bd->start); bd->free = bd->start; ASSERT(bd->gen_no == 0); ASSERT(bd->gen == g0); IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); } - - return allocated; } void @@ -771,6 +767,7 @@ allocatePinned (Capability *cap, W_ n) // g0->large_objects. if (bd != NULL) { dbl_link_onto(bd, &cap->pinned_object_blocks); + // add it to the allocation stats when the block is full cap->total_allocated += bd->free - bd->start; } @@ -927,32 +924,19 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p) * updateNurseriesStats() * * Update the per-cap total_allocated numbers with an approximation of - * the amount of memory used in each cap's nursery. Also return the - * total across all caps. - * + * the amount of memory used in each cap's nursery. + * * Since this update is also performed by clearNurseries() then we only * need this function for the final stats when the RTS is shutting down. * -------------------------------------------------------------------------- */ -W_ -updateNurseriesStats (void) +void updateNurseriesStats (void) { - W_ allocated = 0; nat i; for (i = 0; i < n_capabilities; i++) { - int cap_allocated = countOccupied(nurseries[i].blocks); - capabilities[i].total_allocated += cap_allocated; - allocated += cap_allocated; + capabilities[i].total_allocated += countOccupied(nurseries[i].blocks); } - - return allocated; -} - -W_ -countLargeAllocated (void) -{ - return g0->n_new_large_words; } W_ countOccupied (bdescr *bd) diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 65f5242c31..c4f8709847 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -82,7 +82,7 @@ void dirty_TVAR(Capability *cap, StgTVar *p); extern nursery *nurseries; void resetNurseries ( void ); -W_ clearNursery ( Capability *cap ); +void clearNursery ( Capability *cap ); void resizeNurseries ( W_ blocks ); void resizeNurseriesFixed ( W_ blocks ); W_ countNurseryBlocks ( void ); @@ -91,7 +91,7 @@ W_ countNurseryBlocks ( void ); Stats 'n' DEBUG stuff -------------------------------------------------------------------------- */ -W_ updateNurseriesStats (void); +void updateNurseriesStats (void); W_ countLargeAllocated (void); W_ countOccupied (bdescr *bd); W_ calcNeeded (rtsBool force_major, W_ *blocks_needed); |