diff options
Diffstat (limited to 'rts/sm/GC.c')
-rw-r--r-- | rts/sm/GC.c | 187 |
1 files changed, 86 insertions, 101 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c index dfebd55334..ddb538472f 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -6,7 +6,7 @@ * * Documentation on the architecture of the Garbage Collector can be * found in the online commentary: - * + * * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC * * ---------------------------------------------------------------------------*/ @@ -41,9 +41,6 @@ #include "Prelude.h" #include "RtsSignals.h" #include "STM.h" -#if defined(RTS_GTK_FRONTPANEL) -#include "FrontPanel.h" -#endif #include "Trace.h" #include "RetainerProfile.h" #include "LdvProfile.h" @@ -75,13 +72,13 @@ * linking objects on to the list. We use a stack-type list, consing * objects on the front as they are added (this means that the * scavenge phase is depth-first, not breadth-first, but that - * shouldn't matter). + * shouldn't matter). * * A separate list is kept for objects that have been scavenged * already - this is so that we can zero all the marks afterwards. * * An object is on the list if its static link field is non-zero; this - * means that we have to mark the end of the list with '1', not NULL. + * means that we have to mark the end of the list with '1', not NULL. * * Extra notes for generational GC: * @@ -103,7 +100,7 @@ rtsBool major_gc; /* Data used for allocation area sizing. */ -static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC +static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC /* Mut-list stats */ #ifdef DEBUG @@ -216,7 +213,7 @@ GarbageCollect (nat collect_gen, // this is the main thread SET_GCT(gc_threads[cap->no]); - // tell the stats department that we've started a GC + // tell the stats department that we've started a GC stat_startGC(cap, gct); // lock the StablePtr table @@ -235,7 +232,7 @@ GarbageCollect (nat collect_gen, mutlist_OTHERS = 0; #endif - // attribute any costs to CCS_GC + // attribute any costs to CCS_GC #ifdef PROFILING for (n = 0; n < n_capabilities; n++) { save_CCS[n] = capabilities[n].r.rCCCS; @@ -254,7 +251,7 @@ GarbageCollect (nat collect_gen, // It's not always a good idea to do load balancing in parallel // GC. In particular, for a parallel program we don't want to // lose locality by moving cached data into another CPU's cache - // (this effect can be quite significant). + // (this effect can be quite significant). // // We could have a more complex way to deterimine whether to do // work stealing or not, e.g. it might be a good idea to do it @@ -283,14 +280,8 @@ GarbageCollect (nat collect_gen, debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))", N, n_gc_threads); -#ifdef RTS_GTK_FRONTPANEL - if (RtsFlags.GcFlags.frontpanel) { - updateFrontPanelBeforeGC(N); - } -#endif - #ifdef DEBUG - // check for memory leaks if DEBUG is on + // check for memory leaks if DEBUG is on memInventory(DEBUG_gc); #endif @@ -401,10 +392,10 @@ GarbageCollect (nat collect_gen, scavenge_until_all_done(); // The other threads are now stopped. We might recurse back to // here, but from now on this is the only thread. - + // must be last... invariant is that everything is fully // scavenged at this point. - if (traverseWeakPtrList()) { // returns rtsTrue if evaced something + if (traverseWeakPtrList()) { // returns rtsTrue if evaced something inc_running(); continue; } @@ -451,7 +442,7 @@ GarbageCollect (nat collect_gen, // Finally: compact or sweep the oldest generation. if (major_gc && oldest_gen->mark) { - if (oldest_gen->compact) + if (oldest_gen->compact) compact(gct->scavenged_static_objects); else sweep(oldest_gen); @@ -460,7 +451,7 @@ GarbageCollect (nat collect_gen, copied = 0; par_max_copied = 0; par_tot_copied = 0; - { + { nat i; for (i=0; i < n_gc_threads; i++) { if (n_gc_threads > 1) { @@ -494,7 +485,7 @@ GarbageCollect (nat collect_gen, for (g = 0; g < RtsFlags.GcFlags.generations; g++) { if (g == N) { - generations[g].collections++; // for stats + generations[g].collections++; // for stats if (n_gc_threads > 1) generations[g].par_collections++; } @@ -521,7 +512,7 @@ GarbageCollect (nat collect_gen, bdescr *next, *prev; gen = &generations[g]; - // for generations we collected... + // for generations we collected... if (g <= N) { /* free old memory and shift to-space into from-space for all @@ -532,12 +523,12 @@ GarbageCollect (nat collect_gen, { // tack the new blocks on the end of the existing blocks if (gen->old_blocks != NULL) { - + prev = NULL; for (bd = gen->old_blocks; bd != NULL; bd = next) { - + next = bd->link; - + if (!(bd->flags & BF_MARKED)) { if (prev == NULL) { @@ -551,17 +542,17 @@ GarbageCollect (nat collect_gen, else { gen->n_words += bd->free - bd->start; - + // NB. this step might not be compacted next // time, so reset the BF_MARKED flags. // They are set before GC if we're going to // compact. (search for BF_MARKED above). bd->flags &= ~BF_MARKED; - + // between GCs, all blocks in the heap except // for the nursery have the BF_EVACUATED flag set. bd->flags |= BF_EVACUATED; - + prev = bd; } } @@ -606,8 +597,8 @@ GarbageCollect (nat collect_gen, dbl_link_onto(bd, &gen->large_objects); gen->n_large_words += bd->free - bd->start; } - - // add the new blocks we promoted during this GC + + // add the new blocks we promoted during this GC gen->n_large_blocks += gen->n_scavenged_large_blocks; } @@ -634,7 +625,7 @@ GarbageCollect (nat collect_gen, // update the max size of older generations after a major GC resize_generations(); - + // Free the mark stack. if (mark_stack_top_bd != NULL) { debugTrace(DEBUG_gc, "mark stack: %d blocks", @@ -671,10 +662,10 @@ GarbageCollect (nat collect_gen, resetNurseries(); // mark the garbage collected CAFs as dead -#if 0 && defined(DEBUG) // doesn't work at the moment +#if 0 && defined(DEBUG) // doesn't work at the moment if (major_gc) { gcCAFs(); } #endif - + #ifdef PROFILING // resetStaticObjectForRetainerProfiling() must be called before // zeroing below. @@ -683,7 +674,7 @@ GarbageCollect (nat collect_gen, resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects); #endif - // zero the scavenged static object list + // zero the scavenged static object list if (major_gc) { nat i; if (n_gc_threads == 1) { @@ -750,11 +741,11 @@ GarbageCollect (nat collect_gen, IF_DEBUG(gc, statDescribeGens()); #ifdef DEBUG - // symbol-table based profiling + // symbol-table based profiling /* heapCensus(to_blocks); */ /* ToDo */ #endif - // restore enclosing cost centre + // restore enclosing cost centre #ifdef PROFILING for (n = 0; n < n_capabilities; n++) { capabilities[n].r.rCCCS = save_CCS[n]; @@ -762,17 +753,11 @@ GarbageCollect (nat collect_gen, #endif #ifdef DEBUG - // check for memory leaks if DEBUG is on + // check for memory leaks if DEBUG is on memInventory(DEBUG_gc); #endif -#ifdef RTS_GTK_FRONTPANEL - if (RtsFlags.GcFlags.frontpanel) { - updateFrontPanelAfterGC( N, live ); - } -#endif - - // ok, GC over: tell the stats department what happened. + // ok, GC over: tell the stats department what happened. stat_endGC(cap, gct, live_words, copied, live_blocks * BLOCK_SIZE_W - live_words /* slop */, N, n_gc_threads, par_max_copied, par_tot_copied); @@ -821,7 +806,7 @@ new_gc_thread (nat n, gc_thread *t) t->gc_count = 0; init_gc_thread(t); - + #ifdef USE_PAPI t->papi_events = -1; #endif @@ -832,7 +817,7 @@ new_gc_thread (nat n, gc_thread *t) ws->gen = &generations[g]; ASSERT(g == ws->gen->no); ws->my_gct = t; - + // We want to call // alloc_todo_block(ws,0); // but can't, because it uses gct which isn't set up at this point. @@ -960,7 +945,7 @@ any_work (void) if (mark_stack_bd != NULL && !mark_stack_empty()) { return rtsTrue; } - + // Check for global work in any step. We don't need to check for // local work, because we have already exited scavenge_loop(), // which means there is no local work for this thread. @@ -991,13 +976,13 @@ any_work (void) #endif return rtsFalse; -} +} static void scavenge_until_all_done (void) { DEBUG_ONLY( nat r ); - + loop: #if defined(THREADED_RTS) @@ -1023,7 +1008,7 @@ loop: traceEventGcIdle(gct->cap); debugTrace(DEBUG_gc, "%d GC threads still running", r); - + while (gc_running_threads != 0) { // usleep(1); if (any_work()) { @@ -1033,10 +1018,10 @@ loop: } // any_work() does not remove the work from the queue, it // just checks for the presence of work. If we find any, - // then we increment gc_running_threads and go back to + // then we increment gc_running_threads and go back to // scavenge_loop() to perform any pending work. } - + traceEventGcDone(gct->cap); } @@ -1065,7 +1050,7 @@ gcWorkerThread (Capability *cap) gct->wakeup = GC_THREAD_STANDING_BY; debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index); ACQUIRE_SPIN_LOCK(&gct->gc_spin); - + #ifdef USE_PAPI // start performance counters in this thread... if (gct->papi_events == -1) { @@ -1084,7 +1069,7 @@ gcWorkerThread (Capability *cap) scavenge_capability_mut_lists(cap); scavenge_until_all_done(); - + if (!DEBUG_IS_ON) { clearNursery(cap); } @@ -1107,7 +1092,7 @@ gcWorkerThread (Capability *cap) // Wait until we're told to continue RELEASE_SPIN_LOCK(&gct->gc_spin); gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE; - debugTrace(DEBUG_gc, "GC thread %d waiting to continue...", + debugTrace(DEBUG_gc, "GC thread %d waiting to continue...", gct->thread_index); ACQUIRE_SPIN_LOCK(&gct->mut_spin); debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index); @@ -1213,7 +1198,7 @@ releaseGCThreads (Capability *cap USED_IF_THREADS) if (i == me || gc_threads[i]->idle) continue; if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) barf("releaseGCThreads"); - + gc_threads[i]->wakeup = GC_THREAD_INACTIVE; ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin); RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin); @@ -1222,7 +1207,7 @@ releaseGCThreads (Capability *cap USED_IF_THREADS) #endif /* ---------------------------------------------------------------------------- - Initialise a generation that is to be collected + Initialise a generation that is to be collected ------------------------------------------------------------------------- */ static void @@ -1293,7 +1278,7 @@ prepare_collected_gen (generation *gen) for (bd = gen->old_blocks; bd; bd = bd->link) { bd->flags &= ~BF_EVACUATED; } - + // mark the large objects as from-space for (bd = gen->large_objects; bd; bd = bd->link) { bd->flags &= ~BF_EVACUATED; @@ -1304,7 +1289,7 @@ prepare_collected_gen (generation *gen) StgWord bitmap_size; // in bytes bdescr *bitmap_bdescr; StgWord *bitmap; - + bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE); if (bitmap_size > 0) { @@ -1312,19 +1297,19 @@ prepare_collected_gen (generation *gen) / BLOCK_SIZE); gen->bitmap = bitmap_bdescr; bitmap = bitmap_bdescr->start; - + debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p", bitmap_size, bitmap); - + // don't forget to fill it with zeros! memset(bitmap, 0, bitmap_size); - + // For each block in this step, point to its bitmap from the // block descriptor. for (bd=gen->old_blocks; bd != NULL; bd = bd->link) { bd->u.bitmap = bitmap; bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE); - + // Also at this point we set the BF_MARKED flag // for this block. The invariant is that // BF_MARKED is always unset, except during GC @@ -1355,7 +1340,7 @@ stash_mut_list (Capability *cap, nat gen_no) } /* ---------------------------------------------------------------------------- - Initialise a generation that is *not* to be collected + Initialise a generation that is *not* to be collected ------------------------------------------------------------------------- */ static void @@ -1388,10 +1373,10 @@ collect_gct_blocks (void) nat g; gen_workspace *ws; bdescr *bd, *prev; - + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { ws = &gct->gens[g]; - + // there may still be a block attached to ws->todo_bd; // leave it there to use next time. @@ -1400,7 +1385,7 @@ collect_gct_blocks (void) ASSERT(gct->scan_bd == NULL); ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - + prev = NULL; for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { ws->gen->n_words += bd->free - bd->start; @@ -1409,7 +1394,7 @@ collect_gct_blocks (void) if (prev != NULL) { prev->link = ws->gen->blocks; ws->gen->blocks = ws->scavd_list; - } + } ws->gen->n_blocks += ws->n_scavd_blocks; ws->scavd_list = NULL; @@ -1492,9 +1477,9 @@ mark_root(void *user USED_IF_THREADS, StgClosure **root) saved_gct = gct; #endif SET_GCT(user); - + evacuate(root); - + SET_GCT(saved_gct); } @@ -1519,7 +1504,7 @@ zero_static_object_list(StgClosure* first_static) /* ---------------------------------------------------------------------------- Reset the sizes of the older generations when we do a major collection. - + CURRENT STRATEGY: make all generations except zero the same size. We have to stay within the maximum heap size, and leave a certain percentage of the maximum heap size available to allocate into. @@ -1534,7 +1519,7 @@ resize_generations (void) W_ live, size, min_alloc, words; const W_ max = RtsFlags.GcFlags.maxHeapSize; const W_ gens = RtsFlags.GcFlags.generations; - + // live in the oldest generations if (oldest_gen->live_estimate != 0) { words = oldest_gen->live_estimate; @@ -1543,11 +1528,11 @@ resize_generations (void) } live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W + oldest_gen->n_large_blocks; - + // default max size for all generations except zero size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, RtsFlags.GcFlags.minOldGenSize); - + if (RtsFlags.GcFlags.heapSizeSuggestionAuto) { if (max > 0) { RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size); @@ -1564,7 +1549,7 @@ resize_generations (void) // certain percentage of the maximum heap size (default: 30%). if (RtsFlags.GcFlags.compact || (max > 0 && - oldest_gen->n_blocks > + oldest_gen->n_blocks > (RtsFlags.GcFlags.compactThreshold * max) / 100)) { oldest_gen->mark = 1; oldest_gen->compact = 1; @@ -1584,14 +1569,14 @@ resize_generations (void) // different if compaction is turned on, because we don't need // to double the space required to collect the old generation. if (max != 0) { - + // this test is necessary to ensure that the calculations // below don't have any negative results - we're working // with unsigned values here. if (max < min_alloc) { heapOverflow(); } - + if (oldest_gen->compact) { if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) { size = (max - min_alloc) / ((gens - 1) * 2 - 1); @@ -1601,17 +1586,17 @@ resize_generations (void) size = (max - min_alloc) / ((gens - 1) * 2); } } - + if (size < live) { heapOverflow(); } } - + #if 0 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live, min_alloc, size, max); #endif - + for (g = 0; g < gens; g++) { generations[g].max_blocks = size; } @@ -1630,7 +1615,7 @@ resize_nursery (void) if (RtsFlags.GcFlags.generations == 1) { // Two-space collector: W_ blocks; - + /* set up a new nursery. Allocate a nursery size based on a * function of the amount of live data (by default a factor of 2) * Use the blocks from the old nursery if possible, freeing up any @@ -1640,25 +1625,25 @@ resize_nursery (void) * size accordingly. If the nursery is the same size as the live * data (L), then we need 3L bytes. We can reduce the size of the * nursery to bring the required memory down near 2L bytes. - * + * * A normal 2-space collector would need 4L bytes to give the same * performance we get from 3L bytes, reducing to the same * performance at 2L bytes. */ blocks = generations[0].n_blocks; - + if ( RtsFlags.GcFlags.maxHeapSize != 0 && - blocks * RtsFlags.GcFlags.oldGenFactor * 2 > + blocks * RtsFlags.GcFlags.oldGenFactor * 2 > RtsFlags.GcFlags.maxHeapSize ) { - long adjusted_blocks; // signed on purpose - int pc_free; - + long adjusted_blocks; // signed on purpose + int pc_free; + adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks); - - debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld", + + debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks); - + pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize; if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */ { @@ -1678,7 +1663,7 @@ resize_nursery (void) } else // Generational collector { - /* + /* * If the user has given us a suggested heap size, adjust our * allocation area to make best use of the memory available. */ @@ -1688,7 +1673,7 @@ resize_nursery (void) StgWord needed; calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC - + /* Guess how much will be live in generation 0 step 0 next time. * A good approximation is obtained by finding the * percentage of g0 that was live at the last minor GC. @@ -1703,7 +1688,7 @@ resize_nursery (void) g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100) / countNurseryBlocks(); } - + /* Estimate a size for the allocation area based on the * information available. We might end up going slightly under * or over the suggested heap size, but we should be pretty @@ -1716,14 +1701,14 @@ resize_nursery (void) * where 'needed' is the amount of memory needed at the next * collection for collecting all gens except g0. */ - blocks = + blocks = (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) / (100 + (long)g0_pcnt_kept); - + if (blocks < (long)min_nursery) { blocks = min_nursery; } - + resizeNurseries((W_)blocks); } else @@ -1744,7 +1729,7 @@ resize_nursery (void) whenever the program tries to enter a garbage collected CAF. Any garbage collected CAFs are taken off the CAF list at the same - time. + time. -------------------------------------------------------------------------- */ #if 0 && defined(DEBUG) @@ -1762,14 +1747,14 @@ gcCAFs(void) pp = &caf_list; while (p != NULL) { - + info = get_itbl(p); ASSERT(info->type == IND_STATIC); if (STATIC_LINK(info,p) == NULL) { debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p); - // black hole it + // black hole it SET_INFO(p,&stg_BLACKHOLE_info); p = STATIC_LINK2(info,p); *pp = p; @@ -1782,6 +1767,6 @@ gcCAFs(void) } - debugTrace(DEBUG_gccafs, "%d CAFs live", i); + debugTrace(DEBUG_gccafs, "%d CAFs live", i); } #endif |