summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-02-02 15:49:55 +0000
committerSimon Marlow <marlowsd@gmail.com>2011-02-02 15:49:55 +0000
commit18896fa2b06844407fd1e0d3f85cd3db97a96ff4 (patch)
treeecea68990c0891e25a21bf25a26435e70e8e42fa /rts
parent8d809e0a8354e00078529eecb00db843c1602abe (diff)
downloadhaskell-18896fa2b06844407fd1e0d3f85cd3db97a96ff4.tar.gz
GC refactoring and cleanup
Now we keep any partially-full blocks in the gc_thread[] structs after each GC, rather than moving them to the generation. This should give us slightly better locality (though I wasn't able to measure any difference). Also in this patch: better sanity checking with THREADED.
Diffstat (limited to 'rts')
-rw-r--r--rts/RaiseAsync.c6
-rw-r--r--rts/Stats.c34
-rw-r--r--rts/Threads.c2
-rw-r--r--rts/sm/Evac.c10
-rw-r--r--rts/sm/GC.c340
-rw-r--r--rts/sm/Sanity.c155
-rw-r--r--rts/sm/Sanity.h7
-rw-r--r--rts/sm/Scav.c4
-rw-r--r--rts/sm/Storage.c74
-rw-r--r--rts/sm/Storage.h11
10 files changed, 350 insertions, 293 deletions
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index 550f703e8d..775505f887 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -592,7 +592,7 @@ removeFromMVarBlockedQueue (StgTSO *tso)
if (mvar->head == q) {
mvar->head = q->link;
- q->header.info = &stg_IND_info;
+ OVERWRITE_INFO(q, &stg_IND_info);
if (mvar->tail == q) {
mvar->tail = (StgMVarTSOQueue*)END_TSO_QUEUE;
}
@@ -602,10 +602,10 @@ removeFromMVarBlockedQueue (StgTSO *tso)
// we lose the tail pointer when the GC shorts out the IND.
// So we use MSG_NULL as a kind of non-dupable indirection;
// these are ignored by takeMVar/putMVar.
- q->header.info = &stg_MSG_NULL_info;
+ OVERWRITE_INFO(q, &stg_MSG_NULL_info);
}
else {
- q->header.info = &stg_IND_info;
+ OVERWRITE_INFO(q, &stg_IND_info);
}
// revoke the MVar operation
diff --git a/rts/Stats.c b/rts/Stats.c
index cbd02cd688..4b9f6d8651 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -710,7 +710,7 @@ stat_exit(int alloc)
statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin);
+ statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
}
}
#endif
@@ -772,8 +772,9 @@ void
statDescribeGens(void)
{
nat g, mut, lge, i;
- lnat live, slop;
+ lnat gen_slop;
lnat tot_live, tot_slop;
+ lnat gen_live, gen_blocks;
bdescr *bd;
generation *gen;
@@ -785,25 +786,32 @@ statDescribeGens(void)
tot_live = 0;
tot_slop = 0;
+
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ gen = &generations[g];
+
+ for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
+ lge++;
+ }
+
+ gen_live = genLiveWords(gen);
+ gen_blocks = genLiveBlocks(gen);
+
mut = 0;
for (i = 0; i < n_capabilities; i++) {
mut += countOccupied(capabilities[i].mut_lists[g]);
+ gen_live += gcThreadLiveWords(i,g);
+ gen_blocks += gcThreadLiveBlocks(i,g);
}
- gen = &generations[g];
-
debugBelch("%5d %7d %9d", g, gen->max_blocks, mut);
- for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
- lge++;
- }
- live = gen->n_words + countOccupied(gen->large_objects);
- slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live;
- debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge,
- live*sizeof(W_), slop*sizeof(W_));
- tot_live += live;
- tot_slop += slop;
+ gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
+
+ debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
+ gen_live*sizeof(W_), gen_slop*sizeof(W_));
+ tot_live += gen_live;
+ tot_slop += gen_slop;
}
debugBelch("----------------------------------------------------------\n");
debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
diff --git a/rts/Threads.c b/rts/Threads.c
index dcb916a92b..3e1c5cff0b 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -628,7 +628,7 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
// will be discarded after the first overflow, being replaced by a
// non-moving 32k chunk.
if (old_stack->sp == old_stack->stack + old_stack->stack_size) {
- frame->next_chunk = new_stack;
+ frame->next_chunk = (StgStack*)END_TSO_QUEUE; // dummy
}
tso->stackobj = new_stack;
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 18ace21b9e..d049f98bc8 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -248,7 +248,7 @@ evacuate_large(StgPtr p)
bd = Bdescr(p);
gen = bd->gen;
gen_no = bd->gen_no;
- ACQUIRE_SPIN_LOCK(&gen->sync_large_objects);
+ ACQUIRE_SPIN_LOCK(&gen->sync);
// already evacuated?
if (bd->flags & BF_EVACUATED) {
@@ -259,7 +259,7 @@ evacuate_large(StgPtr p)
gct->failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
- RELEASE_SPIN_LOCK(&gen->sync_large_objects);
+ RELEASE_SPIN_LOCK(&gen->sync);
return;
}
@@ -297,16 +297,16 @@ evacuate_large(StgPtr p)
// them straight on the scavenged_large_objects list.
if (bd->flags & BF_PINNED) {
ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS);
- if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync_large_objects); }
+ if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); }
dbl_link_onto(bd, &new_gen->scavenged_large_objects);
new_gen->n_scavenged_large_blocks += bd->blocks;
- if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync_large_objects); }
+ if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); }
} else {
bd->link = ws->todo_large_objects;
ws->todo_large_objects = bd;
}
- RELEASE_SPIN_LOCK(&gen->sync_large_objects);
+ RELEASE_SPIN_LOCK(&gen->sync);
}
/* ----------------------------------------------------------------------------
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index c9908359b8..221f24a5af 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -137,8 +137,8 @@ DECLARE_GCT
static void mark_root (void *user, StgClosure **root);
static void zero_static_object_list (StgClosure* first_static);
static nat initialise_N (rtsBool force_major_gc);
-static void init_collected_gen (nat g, nat threads);
-static void init_uncollected_gen (nat g, nat threads);
+static void prepare_collected_gen (generation *gen);
+static void prepare_uncollected_gen (generation *gen);
static void init_gc_thread (gc_thread *t);
static void resize_generations (void);
static void resize_nursery (void);
@@ -148,6 +148,7 @@ static StgWord inc_running (void);
static StgWord dec_running (void);
static void wakeup_gc_threads (nat n_threads, nat me);
static void shutdown_gc_threads (nat n_threads, nat me);
+static void collect_gct_blocks (void);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
@@ -174,7 +175,7 @@ GarbageCollect (rtsBool force_major_gc,
{
bdescr *bd;
generation *gen;
- lnat live, allocated, max_copied, avg_copied, slop;
+ lnat live_blocks, live_words, allocated, max_copied, avg_copied;
gc_thread *saved_gct;
nat g, t, n;
@@ -274,7 +275,7 @@ GarbageCollect (rtsBool force_major_gc,
#endif
// check sanity *before* GC
- IF_DEBUG(sanity, checkSanity(rtsTrue));
+ IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
// Initialise all our gc_thread structures
for (t = 0; t < n_gc_threads; t++) {
@@ -283,12 +284,11 @@ GarbageCollect (rtsBool force_major_gc,
// Initialise all the generations/steps that we're collecting.
for (g = 0; g <= N; g++) {
- init_collected_gen(g,n_gc_threads);
+ prepare_collected_gen(&generations[g]);
}
-
// Initialise all the generations/steps that we're *not* collecting.
for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
- init_uncollected_gen(g,n_gc_threads);
+ prepare_uncollected_gen(&generations[g]);
}
/* Allocate a mark stack if we're doing a major collection.
@@ -420,76 +420,6 @@ SET_GCT(gc_threads[0]);
}
}
- // For each workspace, in each thread, move the copied blocks to the step
- {
- gc_thread *thr;
- gen_workspace *ws;
- bdescr *prev, *next;
-
- for (t = 0; t < n_gc_threads; t++) {
- thr = gc_threads[t];
-
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- ws = &thr->gens[g];
-
- // Push the final block
- if (ws->todo_bd) {
- push_scanned_block(ws->todo_bd, ws);
- }
-
- ASSERT(gct->scan_bd == NULL);
- ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
-
- prev = NULL;
- for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
- ws->gen->n_words += bd->free - bd->start;
- prev = bd;
- }
- if (prev != NULL) {
- prev->link = ws->gen->blocks;
- ws->gen->blocks = ws->scavd_list;
- }
- ws->gen->n_blocks += ws->n_scavd_blocks;
- }
- }
-
- // Add all the partial blocks *after* we've added all the full
- // blocks. This is so that we can grab the partial blocks back
- // again and try to fill them up in the next GC.
- for (t = 0; t < n_gc_threads; t++) {
- thr = gc_threads[t];
-
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- ws = &thr->gens[g];
-
- prev = NULL;
- for (bd = ws->part_list; bd != NULL; bd = next) {
- next = bd->link;
- if (bd->free == bd->start) {
- if (prev == NULL) {
- ws->part_list = next;
- } else {
- prev->link = next;
- }
- freeGroup(bd);
- ws->n_part_blocks--;
- } else {
- ws->gen->n_words += bd->free - bd->start;
- prev = bd;
- }
- }
- if (prev != NULL) {
- prev->link = ws->gen->blocks;
- ws->gen->blocks = ws->part_list;
- }
- ws->gen->n_blocks += ws->n_part_blocks;
-
- ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
- ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
- }
- }
- }
-
// Finally: compact or sweep the oldest generation.
if (major_gc && oldest_gen->mark) {
if (oldest_gen->compact)
@@ -498,8 +428,6 @@ SET_GCT(gc_threads[0]);
sweep(oldest_gen);
}
- /* run through all the generations/steps and tidy up
- */
copied = 0;
max_copied = 0;
avg_copied = 0;
@@ -525,6 +453,16 @@ SET_GCT(gc_threads[0]);
}
}
+ // Run through all the generations/steps and tidy up.
+ // We're going to:
+ // - count the amount of "live" data (live_words, live_blocks)
+ // - count the amount of "copied" data in this GC (copied)
+ // - free from-space
+ // - make to-space the new from-space (set BF_EVACUATED on all blocks)
+ //
+ live_words = 0;
+ live_blocks = 0;
+
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
if (g == N) {
@@ -622,7 +560,6 @@ SET_GCT(gc_threads[0]);
gen->large_objects = gen->scavenged_large_objects;
gen->n_large_blocks = gen->n_scavenged_large_blocks;
gen->n_new_large_words = 0;
- ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
}
else // for generations > N
{
@@ -637,16 +574,31 @@ SET_GCT(gc_threads[0]);
// add the new blocks we promoted during this GC
gen->n_large_blocks += gen->n_scavenged_large_blocks;
- ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+ }
+
+ ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+
+ gen->scavenged_large_objects = NULL;
+ gen->n_scavenged_large_blocks = 0;
+
+ // Count "live" data
+ live_words += genLiveWords(gen);
+ live_blocks += genLiveBlocks(gen);
+
+ // add in the partial blocks in the gen_workspaces, but ignore gen 0
+ // if this is a local GC (we can't count another capability's part_list)
+ {
+ nat i;
+ for (i = 0; i < n_capabilities; i++) {
+ live_words += gcThreadLiveWords(i, gen->no);
+ live_blocks += gcThreadLiveBlocks(i, gen->no);
+ }
}
} // for all generations
// update the max size of older generations after a major GC
resize_generations();
- // Calculate the amount of live data for stats.
- live = calcLiveWords();
-
// Start a new pinned_object_block
for (n = 0; n < n_capabilities; n++) {
capabilities[n].pinned_object_block = NULL;
@@ -698,11 +650,6 @@ SET_GCT(gc_threads[0]);
}
}
- // send exceptions to any threads which were about to die
- RELEASE_SM_LOCK;
- resurrectThreads(resurrected_threads);
- ACQUIRE_SM_LOCK;
-
// Update the stable pointer hash table.
updateStablePtrTable(major_gc);
@@ -717,6 +664,17 @@ SET_GCT(gc_threads[0]);
scheduleFinalizers(cap, old_weak_ptr_list);
ACQUIRE_SM_LOCK;
+ // check sanity after GC
+ // before resurrectThreads(), because that might overwrite some
+ // closures, which will cause problems with THREADED where we don't
+ // fill slop.
+ IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
+
+ // send exceptions to any threads which were about to die
+ RELEASE_SM_LOCK;
+ resurrectThreads(resurrected_threads);
+ ACQUIRE_SM_LOCK;
+
if (major_gc) {
nat need, got;
need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
@@ -730,10 +688,7 @@ SET_GCT(gc_threads[0]);
}
}
- // check sanity after GC
- IF_DEBUG(sanity, checkSanity(rtsTrue));
-
- // extra GC trace info
+ // extra GC trace info
IF_DEBUG(gc, statDescribeGens());
#ifdef DEBUG
@@ -758,8 +713,8 @@ SET_GCT(gc_threads[0]);
#endif
// ok, GC over: tell the stats department what happened.
- slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
- stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
+ stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied,
+ live_blocks * BLOCK_SIZE_W - live_words /* slop */);
// Guess which generation we'll collect *next* time
initialise_N(force_major_gc);
@@ -858,7 +813,21 @@ new_gc_thread (nat n, gc_thread *t)
ASSERT(g == ws->gen->no);
ws->my_gct = t;
- ws->todo_bd = NULL;
+ // We want to call
+ // alloc_todo_block(ws,0);
+ // but can't, because it uses gct which isn't set up at this point.
+ // Hence, allocate a block for todo_bd manually:
+ {
+ bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
+ initBdescr(bd, ws->gen, ws->gen->to);
+ bd->flags = BF_EVACUATED;
+ bd->u.scan = bd->free = bd->start;
+
+ ws->todo_bd = bd;
+ ws->todo_free = bd->free;
+ ws->todo_lim = bd->start + BLOCK_SIZE_W;
+ }
+
ws->todo_q = newWSDeque(128);
ws->todo_overflow = NULL;
ws->n_todo_overflow = 0;
@@ -1012,6 +981,8 @@ loop:
scavenge_loop();
#endif
+ collect_gct_blocks();
+
// scavenge_loop() only exits when there's no work to do
r = dec_running();
@@ -1197,16 +1168,16 @@ releaseGCThreads (Capability *cap USED_IF_THREADS)
------------------------------------------------------------------------- */
static void
-init_collected_gen (nat g, nat n_threads)
+prepare_collected_gen (generation *gen)
{
- nat t, i;
+ nat i, g, n;
gen_workspace *ws;
- generation *gen;
- bdescr *bd;
+ bdescr *bd, *next;
// Throw away the current mutable list. Invariant: the mutable
// list always has at least one block; this means we can avoid a
// check for NULL in recordMutable().
+ g = gen->no;
if (g != 0) {
for (i = 0; i < n_capabilities; i++) {
freeChain(capabilities[i].mut_lists[g]);
@@ -1231,9 +1202,35 @@ init_collected_gen (nat g, nat n_threads)
gen->live_estimate = 0;
// initialise the large object queues.
- gen->scavenged_large_objects = NULL;
- gen->n_scavenged_large_blocks = 0;
-
+ ASSERT(gen->scavenged_large_objects == NULL);
+ ASSERT(gen->n_scavenged_large_blocks == 0);
+
+ // grab all the partial blocks stashed in the gc_thread workspaces and
+ // move them to the old_blocks list of this gen.
+ for (n = 0; n < n_capabilities; n++) {
+ ws = &gc_threads[n]->gens[gen->no];
+
+ for (bd = ws->part_list; bd != NULL; bd = next) {
+ next = bd->link;
+ bd->link = gen->old_blocks;
+ gen->old_blocks = bd;
+ gen->n_old_blocks += bd->blocks;
+ }
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
+ ASSERT(ws->scavd_list == NULL);
+ ASSERT(ws->n_scavd_blocks == 0);
+
+ if (ws->todo_free != ws->todo_bd->start) {
+ ws->todo_bd->free = ws->todo_free;
+ ws->todo_bd->link = gen->old_blocks;
+ gen->old_blocks = ws->todo_bd;
+ gen->n_old_blocks += ws->todo_bd->blocks;
+ alloc_todo_block(ws,0); // always has one block.
+ }
+ }
+
// mark the small objects as from-space
for (bd = gen->old_blocks; bd; bd = bd->link) {
bd->flags &= ~BF_EVACUATED;
@@ -1285,106 +1282,83 @@ init_collected_gen (nat g, nat n_threads)
}
}
}
-
- // For each GC thread, for each step, allocate a "todo" block to
- // store evacuated objects to be scavenged, and a block to store
- // evacuated objects that do not need to be scavenged.
- for (t = 0; t < n_threads; t++) {
- ws = &gc_threads[t]->gens[g];
-
- ws->todo_large_objects = NULL;
-
- ws->part_list = NULL;
- ws->n_part_blocks = 0;
-
- // allocate the first to-space block; extra blocks will be
- // chained on as necessary.
- ws->todo_bd = NULL;
- ASSERT(looksEmptyWSDeque(ws->todo_q));
- alloc_todo_block(ws,0);
-
- ws->todo_overflow = NULL;
- ws->n_todo_overflow = 0;
-
- ws->scavd_list = NULL;
- ws->n_scavd_blocks = 0;
- }
}
/* ----------------------------------------------------------------------------
+ Save the mutable lists in saved_mut_lists
+ ------------------------------------------------------------------------- */
+
+static void
+stash_mut_list (Capability *cap, nat gen_no)
+{
+ cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
+ cap->mut_lists[gen_no] = allocBlock_sync();
+}
+
+/* ----------------------------------------------------------------------------
Initialise a generation that is *not* to be collected
------------------------------------------------------------------------- */
static void
-init_uncollected_gen (nat g, nat threads)
+prepare_uncollected_gen (generation *gen)
{
- nat t, n;
- gen_workspace *ws;
- generation *gen;
- bdescr *bd;
+ nat i;
+
+
+ ASSERT(gen->no > 0);
// save the current mutable lists for this generation, and
// allocate a fresh block for each one. We'll traverse these
// mutable lists as roots early on in the GC.
- for (n = 0; n < n_capabilities; n++) {
- capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
- capabilities[n].mut_lists[g] = allocBlock();
+ for (i = 0; i < n_capabilities; i++) {
+ stash_mut_list(&capabilities[i], gen->no);
}
- gen = &generations[g];
+ ASSERT(gen->scavenged_large_objects == NULL);
+ ASSERT(gen->n_scavenged_large_blocks == 0);
+}
- gen->scavenged_large_objects = NULL;
- gen->n_scavenged_large_blocks = 0;
+/* -----------------------------------------------------------------------------
+ Collect the completed blocks from a GC thread and attach them to
+ the generation.
+ -------------------------------------------------------------------------- */
- for (t = 0; t < threads; t++) {
- ws = &gc_threads[t]->gens[g];
-
- ASSERT(looksEmptyWSDeque(ws->todo_q));
- ws->todo_large_objects = NULL;
-
- ws->part_list = NULL;
- ws->n_part_blocks = 0;
+static void
+collect_gct_blocks (void)
+{
+ nat g;
+ gen_workspace *ws;
+ bdescr *bd, *prev;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ ws = &gct->gens[g];
- ws->scavd_list = NULL;
- ws->n_scavd_blocks = 0;
+ // there may still be a block attached to ws->todo_bd;
+ // leave it there to use next time.
+
+ if (ws->scavd_list != NULL) {
+ ACQUIRE_SPIN_LOCK(&ws->gen->sync);
+
+ ASSERT(gct->scan_bd == NULL);
+ ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
- // If the block at the head of the list in this generation
- // is less than 3/4 full, then use it as a todo block.
- if (gen->blocks && isPartiallyFull(gen->blocks))
- {
- ws->todo_bd = gen->blocks;
- ws->todo_free = ws->todo_bd->free;
- ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
- gen->blocks = gen->blocks->link;
- gen->n_blocks -= 1;
- gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
- ws->todo_bd->link = NULL;
- // we must scan from the current end point.
- ws->todo_bd->u.scan = ws->todo_bd->free;
- }
- else
- {
- ws->todo_bd = NULL;
- alloc_todo_block(ws,0);
- }
- }
+ prev = NULL;
+ for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
+ ws->gen->n_words += bd->free - bd->start;
+ prev = bd;
+ }
+ if (prev != NULL) {
+ prev->link = ws->gen->blocks;
+ ws->gen->blocks = ws->scavd_list;
+ }
+ ws->gen->n_blocks += ws->n_scavd_blocks;
- // deal out any more partial blocks to the threads' part_lists
- t = 0;
- while (gen->blocks && isPartiallyFull(gen->blocks))
- {
- bd = gen->blocks;
- gen->blocks = bd->link;
- ws = &gc_threads[t]->gens[g];
- bd->link = ws->part_list;
- ws->part_list = bd;
- ws->n_part_blocks += 1;
- bd->u.scan = bd->free;
- gen->n_blocks -= 1;
- gen->n_words -= bd->free - bd->start;
- t++;
- if (t == n_gc_threads) t = 0;
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+
+ RELEASE_SPIN_LOCK(&ws->gen->sync);
+ }
}
}
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 65a70fa05c..8ebb9a2513 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -21,6 +21,7 @@
#include "RtsUtils.h"
#include "sm/Storage.h"
#include "sm/BlockAlloc.h"
+#include "GCThread.h"
#include "Sanity.h"
#include "Schedule.h"
#include "Apply.h"
@@ -468,17 +469,10 @@ checkClosure( StgClosure* p )
all the objects in the remainder of the chain.
-------------------------------------------------------------------------- */
-void
-checkHeap(bdescr *bd)
+void checkHeapChain (bdescr *bd)
{
StgPtr p;
-#if defined(THREADED_RTS)
- // heap sanity checking doesn't work with SMP, because we can't
- // zero the slop (see Updates.h).
- return;
-#endif
-
for (; bd != NULL; bd = bd->link) {
if(!(bd->flags & BF_SWEPT)) {
p = bd->start;
@@ -496,7 +490,7 @@ checkHeap(bdescr *bd)
}
}
-void
+void
checkHeapChunk(StgPtr start, StgPtr end)
{
StgPtr p;
@@ -587,6 +581,24 @@ checkGlobalTSOList (rtsBool checkTSOs)
ASSERT(Bdescr((P_)tso)->gen_no == 0 || (tso->flags & TSO_MARKED));
tso->flags &= ~TSO_MARKED;
}
+
+ {
+ StgStack *stack;
+ StgUnderflowFrame *frame;
+
+ stack = tso->stackobj;
+ while (1) {
+ if (stack->dirty & 1) {
+ ASSERT(Bdescr((P_)stack)->gen_no == 0 || (stack->dirty & TSO_MARKED));
+ stack->dirty &= ~TSO_MARKED;
+ }
+ frame = (StgUnderflowFrame*) (stack->stack + stack->stack_size
+ - sizeofW(StgUnderflowFrame));
+ if (frame->info != &stg_stack_underflow_frame_info
+ || frame->next_chunk == (StgStack*)END_TSO_QUEUE) break;
+ stack = frame->next_chunk;
+ }
+ }
}
}
}
@@ -595,7 +607,7 @@ checkGlobalTSOList (rtsBool checkTSOs)
Check mutable list sanity.
-------------------------------------------------------------------------- */
-void
+static void
checkMutableList( bdescr *mut_bd, nat gen )
{
bdescr *bd;
@@ -605,25 +617,37 @@ checkMutableList( bdescr *mut_bd, nat gen )
for (bd = mut_bd; bd != NULL; bd = bd->link) {
for (q = bd->start; q < bd->free; q++) {
p = (StgClosure *)*q;
- ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen);
- if (get_itbl(p)->type == TSO) {
+ ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen);
+ checkClosure(p);
+
+ switch (get_itbl(p)->type) {
+ case TSO:
((StgTSO *)p)->flags |= TSO_MARKED;
+ break;
+ case STACK:
+ ((StgStack *)p)->dirty |= TSO_MARKED;
+ break;
}
- }
+ }
}
}
-void
-checkMutableLists (rtsBool checkTSOs)
+static void
+checkLocalMutableLists (nat cap_no)
{
- nat g, i;
+ nat g;
+ for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
+ checkMutableList(capabilities[cap_no].mut_lists[g], g);
+ }
+}
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- for (i = 0; i < n_capabilities; i++) {
- checkMutableList(capabilities[i].mut_lists[g], g);
- }
+static void
+checkMutableLists (void)
+{
+ nat i;
+ for (i = 0; i < n_capabilities; i++) {
+ checkLocalMutableLists(i);
}
- checkGlobalTSOList(checkTSOs);
}
/*
@@ -677,7 +701,8 @@ checkNurserySanity (nursery *nursery)
prev = NULL;
for (bd = nursery->blocks; bd != NULL; bd = bd->link) {
- ASSERT(bd->u.back == prev);
+ ASSERT(bd->gen == g0);
+ ASSERT(bd->u.back == prev);
prev = bd;
blocks += bd->blocks;
}
@@ -685,41 +710,59 @@ checkNurserySanity (nursery *nursery)
ASSERT(blocks == nursery->n_blocks);
}
+static void checkGeneration (generation *gen,
+ rtsBool after_major_gc USED_IF_THREADS)
+{
+ nat n;
+ gen_workspace *ws;
+
+ ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+ ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+
+#if defined(THREADED_RTS)
+ // heap sanity checking doesn't work with SMP, because we can't
+ // zero the slop (see Updates.h). However, we can sanity-check
+ // the heap after a major gc, because there is no slop.
+ if (!after_major_gc) return;
+#endif
+
+ checkHeapChain(gen->blocks);
+
+ for (n = 0; n < n_capabilities; n++) {
+ ws = &gc_threads[n]->gens[gen->no];
+ checkHeapChain(ws->todo_bd);
+ checkHeapChain(ws->part_list);
+ checkHeapChain(ws->scavd_list);
+ }
+
+ checkLargeObjects(gen->large_objects);
+}
/* Full heap sanity check. */
-void
-checkSanity( rtsBool check_heap )
+static void checkFullHeap (rtsBool after_major_gc)
{
nat g, n;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- ASSERT(countBlocks(generations[g].blocks)
- == generations[g].n_blocks);
- ASSERT(countBlocks(generations[g].large_objects)
- == generations[g].n_large_blocks);
- if (check_heap) {
- checkHeap(generations[g].blocks);
- }
- checkLargeObjects(generations[g].large_objects);
+ checkGeneration(&generations[g], after_major_gc);
}
-
for (n = 0; n < n_capabilities; n++) {
checkNurserySanity(&nurseries[n]);
}
-
+}
+
+void checkSanity (rtsBool after_gc, rtsBool major_gc)
+{
+ checkFullHeap(after_gc && major_gc);
+
checkFreeListSanity();
-#if defined(THREADED_RTS)
// always check the stacks in threaded mode, because checkHeap()
// does nothing in this case.
- checkMutableLists(rtsTrue);
-#else
- if (check_heap) {
- checkMutableLists(rtsFalse);
- } else {
- checkMutableLists(rtsTrue);
+ if (after_gc) {
+ checkMutableLists();
+ checkGlobalTSOList(rtsTrue);
}
-#endif
}
// If memInventory() calculates that we have a memory leak, this
@@ -732,18 +775,21 @@ checkSanity( rtsBool check_heap )
static void
findMemoryLeak (void)
{
- nat g, i;
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- for (i = 0; i < n_capabilities; i++) {
- markBlocks(capabilities[i].mut_lists[g]);
- }
- markBlocks(generations[g].blocks);
- markBlocks(generations[g].large_objects);
- }
+ nat g, i;
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (i = 0; i < n_capabilities; i++) {
+ markBlocks(capabilities[i].mut_lists[g]);
+ markBlocks(gc_threads[i]->gens[g].part_list);
+ markBlocks(gc_threads[i]->gens[g].scavd_list);
+ markBlocks(gc_threads[i]->gens[g].todo_bd);
+ }
+ markBlocks(generations[g].blocks);
+ markBlocks(generations[g].large_objects);
+ }
- for (i = 0; i < n_capabilities; i++) {
- markBlocks(nurseries[i].blocks);
- }
+ for (i = 0; i < n_capabilities; i++) {
+ markBlocks(nurseries[i].blocks);
+ }
#ifdef PROFILING
// TODO:
@@ -823,6 +869,9 @@ memInventory (rtsBool show)
gen_blocks[g] = 0;
for (i = 0; i < n_capabilities; i++) {
gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
+ gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].part_list);
+ gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].scavd_list);
+ gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].todo_bd);
}
gen_blocks[g] += genBlocks(&generations[g]);
}
diff --git a/rts/sm/Sanity.h b/rts/sm/Sanity.h
index 602be54724..f302bc22b1 100644
--- a/rts/sm/Sanity.h
+++ b/rts/sm/Sanity.h
@@ -21,9 +21,9 @@
# endif
/* debugging routines */
-void checkSanity ( rtsBool check_heap );
+void checkSanity ( rtsBool after_gc, rtsBool major_gc );
void checkNurserySanity ( nursery *nursery );
-void checkHeap ( bdescr *bd );
+void checkHeapChain ( bdescr *bd );
void checkHeapChunk ( StgPtr start, StgPtr end );
void checkLargeObjects ( bdescr *bd );
void checkTSO ( StgTSO* tso );
@@ -33,9 +33,6 @@ void checkStackChunk ( StgPtr sp, StgPtr stack_end );
StgOffset checkStackFrame ( StgPtr sp );
StgOffset checkClosure ( StgClosure* p );
-void checkMutableList ( bdescr *bd, nat gen );
-void checkMutableLists ( rtsBool checkTSOs );
-
void checkRunQueue (Capability *cap);
void memInventory (rtsBool show);
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 9ac152af53..d77734fd05 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -1755,10 +1755,10 @@ scavenge_large (gen_workspace *ws)
// the front when evacuating.
ws->todo_large_objects = bd->link;
- ACQUIRE_SPIN_LOCK(&ws->gen->sync_large_objects);
+ ACQUIRE_SPIN_LOCK(&ws->gen->sync);
dbl_link_onto(bd, &ws->gen->scavenged_large_objects);
ws->gen->n_scavenged_large_blocks += bd->blocks;
- RELEASE_SPIN_LOCK(&ws->gen->sync_large_objects);
+ RELEASE_SPIN_LOCK(&ws->gen->sync);
p = bd->start;
if (scavenge_one(p)) {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 1b8a720870..6c11065670 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -15,6 +15,7 @@
#include "Rts.h"
#include "Storage.h"
+#include "GCThread.h"
#include "RtsUtils.h"
#include "Stats.h"
#include "BlockAlloc.h"
@@ -84,7 +85,7 @@ initGeneration (generation *gen, int g)
gen->compact = 0;
gen->bitmap = NULL;
#ifdef THREADED_RTS
- initSpinLock(&gen->sync_large_objects);
+ initSpinLock(&gen->sync);
#endif
gen->threads = END_TSO_QUEUE;
gen->old_threads = END_TSO_QUEUE;
@@ -766,7 +767,6 @@ lnat
calcAllocated (rtsBool include_nurseries)
{
nat allocated = 0;
- bdescr *bd;
nat i;
// When called from GC.c, we already have the allocation count for
@@ -775,9 +775,7 @@ calcAllocated (rtsBool include_nurseries)
if (include_nurseries)
{
for (i = 0; i < n_capabilities; i++) {
- for (bd = nurseries[i].blocks; bd; bd = bd->link) {
- allocated += (lnat)(bd->free - bd->start);
- }
+ allocated += countOccupied(nurseries[i].blocks);
}
}
@@ -787,25 +785,6 @@ calcAllocated (rtsBool include_nurseries)
return allocated;
}
-/* Approximate the amount of live data in the heap. To be called just
- * after garbage collection (see GarbageCollect()).
- */
-lnat calcLiveBlocks (void)
-{
- nat g;
- lnat live = 0;
- generation *gen;
-
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- /* approximate amount of live data (doesn't take into account slop
- * at end of each block).
- */
- gen = &generations[g];
- live += gen->n_large_blocks + gen->n_blocks;
- }
- return live;
-}
-
lnat countOccupied (bdescr *bd)
{
lnat words;
@@ -818,6 +797,38 @@ lnat countOccupied (bdescr *bd)
return words;
}
+lnat genLiveWords (generation *gen)
+{
+ return gen->n_words + countOccupied(gen->large_objects);
+}
+
+lnat genLiveBlocks (generation *gen)
+{
+ return gen->n_blocks + gen->n_large_blocks;
+}
+
+lnat gcThreadLiveWords (nat i, nat g)
+{
+ lnat words;
+
+ words = countOccupied(gc_threads[i]->gens[g].todo_bd);
+ words += countOccupied(gc_threads[i]->gens[g].part_list);
+ words += countOccupied(gc_threads[i]->gens[g].scavd_list);
+
+ return words;
+}
+
+lnat gcThreadLiveBlocks (nat i, nat g)
+{
+ lnat blocks;
+
+ blocks = countBlocks(gc_threads[i]->gens[g].todo_bd);
+ blocks += gc_threads[i]->gens[g].n_part_blocks;
+ blocks += gc_threads[i]->gens[g].n_scavd_blocks;
+
+ return blocks;
+}
+
// Return an accurate count of the live data in the heap, excluding
// generation 0.
lnat calcLiveWords (void)
@@ -828,8 +839,19 @@ lnat calcLiveWords (void)
live = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- gen = &generations[g];
- live += gen->n_words + countOccupied(gen->large_objects);
+ live += genLiveWords(&generations[g]);
+ }
+ return live;
+}
+
+lnat calcLiveBlocks (void)
+{
+ nat g;
+ lnat live;
+
+ live = 0;
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ live += genLiveBlocks(&generations[g]);
}
return live;
}
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 8927ed69b0..d463d1a2ba 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -87,11 +87,18 @@ lnat countNurseryBlocks ( void );
-------------------------------------------------------------------------- */
lnat calcAllocated (rtsBool count_nurseries);
-lnat calcLiveBlocks (void);
-lnat calcLiveWords (void);
lnat countOccupied (bdescr *bd);
lnat calcNeeded (void);
+lnat gcThreadLiveWords (nat i, nat g);
+lnat gcThreadLiveBlocks (nat i, nat g);
+
+lnat genLiveWords (generation *gen);
+lnat genLiveBlocks (generation *gen);
+
+lnat calcLiveBlocks (void);
+lnat calcLiveWords (void);
+
/* ----------------------------------------------------------------------------
Storage manager internal APIs and globals
------------------------------------------------------------------------- */