summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorSimon Marlow <simonmarhaskell@gmail.com>2008-04-16 22:03:47 +0000
committerSimon Marlow <simonmarhaskell@gmail.com>2008-04-16 22:03:47 +0000
commitdbbf15c0f141357aa49b583286174867baadb821 (patch)
treecbe7320ab2f07b2695e515d165efe45f060732a6 /rts
parent4b123ceba0c0a2f72494479a03ac9c94b6166c92 (diff)
downloadhaskell-dbbf15c0f141357aa49b583286174867baadb821.tar.gz
Allow work units smaller than a block to improve load balancing
Diffstat (limited to 'rts')
-rw-r--r--rts/sm/Evac.c2
-rw-r--r--rts/sm/GC.c50
-rw-r--r--rts/sm/GC.h13
-rw-r--r--rts/sm/GCUtils.c178
-rw-r--r--rts/sm/GCUtils.h10
-rw-r--r--rts/sm/Scav.c37
-rw-r--r--rts/sm/Scav.c-inc17
7 files changed, 193 insertions, 114 deletions
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index f117526f3a..daa60186d1 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -63,7 +63,7 @@ alloc_for_copy (nat size, step *stp)
*/
to = ws->todo_free;
if (to + size > ws->todo_lim) {
- to = gc_alloc_todo_block(ws);
+ to = todo_block_full(size, ws);
}
ws->todo_free = to + size;
ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim);
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 4d0c4ef613..a3611757d8 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -422,22 +422,34 @@ GarbageCollect ( rtsBool force_major_gc )
ws = &thr->steps[s];
// Not true?
// ASSERT( ws->scan_bd == ws->todo_bd );
- ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
+ ASSERT( ws->scan_bd ? ws->scan_bd->u.scan == ws->scan_bd->free : 1 );
// Push the final block
- if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
+ if (ws->scan_bd) { push_scanned_block(ws->scan_bd, ws); }
ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
- prev = ws->scavd_list;
+ prev = ws->part_list;
+ for (bd = ws->part_list; bd != NULL; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED; // now from-space
+ prev = bd;
+ }
+ if (prev != NULL) {
+ prev->link = ws->scavd_list;
+ }
for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
bd->flags &= ~BF_EVACUATED; // now from-space
prev = bd;
}
- prev->link = ws->stp->blocks;
- ws->stp->blocks = ws->scavd_list;
- ws->stp->n_blocks += ws->n_scavd_blocks;
- ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks);
+ prev->link = ws->step->blocks;
+ if (ws->part_list != NULL) {
+ ws->step->blocks = ws->part_list;
+ } else {
+ ws->step->blocks = ws->scavd_list;
+ }
+ ws->step->n_blocks += ws->n_part_blocks;
+ ws->step->n_blocks += ws->n_scavd_blocks;
+ ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
}
}
}
@@ -981,16 +993,18 @@ alloc_gc_thread (int n)
for (s = 0; s < total_steps; s++)
{
ws = &t->steps[s];
- ws->stp = &all_steps[s];
- ASSERT(s == ws->stp->abs_no);
+ ws->step = &all_steps[s];
+ ASSERT(s == ws->step->abs_no);
ws->gct = t;
ws->scan_bd = NULL;
- ws->scan = NULL;
ws->todo_bd = NULL;
ws->buffer_todo_bd = NULL;
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
}
@@ -1304,15 +1318,17 @@ init_collected_gen (nat g, nat n_threads)
ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
ws->scan_bd = NULL;
- ws->scan = NULL;
ws->todo_large_objects = NULL;
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
// allocate the first to-space block; extra blocks will be
// chained on as necessary.
ws->todo_bd = NULL;
ws->buffer_todo_bd = NULL;
- gc_alloc_todo_block(ws);
+ alloc_todo_block(ws,0);
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
@@ -1343,11 +1359,14 @@ init_uncollected_gen (nat g, nat threads)
for (s = 0; s < generations[g].n_steps; s++) {
ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
- stp = ws->stp;
+ stp = ws->step;
ws->buffer_todo_bd = NULL;
ws->todo_large_objects = NULL;
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
@@ -1365,7 +1384,7 @@ init_uncollected_gen (nat g, nat threads)
// this block is also the scan block; we must scan
// from the current end point.
ws->scan_bd = ws->todo_bd;
- ws->scan = ws->scan_bd->free;
+ ws->scan_bd->u.scan = ws->scan_bd->free;
// subtract the contents of this block from the stats,
// because we'll count the whole block later.
@@ -1374,9 +1393,8 @@ init_uncollected_gen (nat g, nat threads)
else
{
ws->scan_bd = NULL;
- ws->scan = NULL;
ws->todo_bd = NULL;
- gc_alloc_todo_block(ws);
+ alloc_todo_block(ws,0);
}
}
}
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 5c2bff96f7..bc14840132 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -73,12 +73,11 @@
------------------------------------------------------------------------- */
typedef struct step_workspace_ {
- step * stp; // the step for this workspace
+ step * step; // the step for this workspace
struct gc_thread_ * gct; // the gc_thread that contains this workspace
// block that is currently being scanned
bdescr * scan_bd;
- StgPtr scan; // the scan pointer
// where objects to be scavenged go
bdescr * todo_bd;
@@ -91,10 +90,14 @@ typedef struct step_workspace_ {
// where large objects to be scavenged go
bdescr * todo_large_objects;
- // Objects that need not be, or have already been, scavenged.
+ // Objects that have already been, scavenged.
bdescr * scavd_list;
lnat n_scavd_blocks; // count of blocks in this list
+ // Partially-full, scavenged, blocks
+ bdescr * part_list;
+ unsigned int n_part_blocks; // count of above
+
} step_workspace;
/* ----------------------------------------------------------------------------
@@ -137,7 +140,7 @@ typedef struct gc_thread_ {
// variable).
rtsBool failed_to_evac; // failure to evacuate an object typically
- // causes it to be recorded in the mutable
+ // Causes it to be recorded in the mutable
// object list
rtsBool eager_promotion; // forces promotion to the evac gen
@@ -194,4 +197,6 @@ extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS;
StgClosure * isAlive(StgClosure *p);
+#define WORK_UNIT_WORDS 128
+
#endif /* GC_H */
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 98e6f8aa35..184b540dca 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -51,7 +51,7 @@ grab_todo_block (step_workspace *ws)
bdescr *bd;
step *stp;
- stp = ws->stp;
+ stp = ws->step;
bd = NULL;
if (ws->buffer_todo_bd)
@@ -76,90 +76,144 @@ grab_todo_block (step_workspace *ws)
return bd;
}
-static void
-push_todo_block (bdescr *bd, step *stp)
+void
+push_scanned_block (bdescr *bd, step_workspace *ws)
{
+ ASSERT(bd != NULL);
ASSERT(bd->link == NULL);
- ACQUIRE_SPIN_LOCK(&stp->sync_todo);
- if (stp->todos_last == NULL) {
- stp->todos_last = bd;
- stp->todos = bd;
- } else {
- stp->todos_last->link = bd;
- stp->todos_last = bd;
+ ASSERT(bd->step == ws->step);
+ ASSERT(bd->u.scan == bd->free);
+
+ if (bd->start + BLOCK_SIZE_W - bd->free > WORK_UNIT_WORDS)
+ {
+ // a partially full block: put it on the part_list list.
+ bd->link = ws->part_list;
+ ws->part_list = bd;
+ ws->n_part_blocks++;
+ IF_DEBUG(sanity,
+ ASSERT(countBlocks(ws->part_list) == ws->n_part_blocks));
+ }
+ else
+ {
+ // put the scan block on the ws->scavd_list.
+ bd->link = ws->scavd_list;
+ ws->scavd_list = bd;
+ ws->n_scavd_blocks ++;
+ IF_DEBUG(sanity,
+ ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks));
}
- stp->n_todos++;
- trace(TRACE_gc|DEBUG_gc, "step %d, n_todos: %d", stp->abs_no, stp->n_todos);
- RELEASE_SPIN_LOCK(&stp->sync_todo);
}
-void
-push_scan_block (bdescr *bd, step_workspace *ws)
+StgPtr
+todo_block_full (nat size, step_workspace *ws)
{
+ bdescr *bd;
+
+ bd = ws->todo_bd;
+
ASSERT(bd != NULL);
ASSERT(bd->link == NULL);
+ ASSERT(bd->step == ws->step);
+
+ bd->free = ws->todo_free;
+
+ // If the global list is not empty, or there's not much work in
+ // this block to push, and there's enough room in
+ // this block to evacuate the current object, then just increase
+ // the limit.
+ if (ws->step->todos != NULL ||
+ (bd->free - bd->u.scan < WORK_UNIT_WORDS / 2)) {
+ if (bd->free + size < bd->start + BLOCK_SIZE_W) {
+ debugTrace(DEBUG_gc, "increasing limit for %p", bd->start);
+ ws->todo_lim = stg_min(bd->start + BLOCK_SIZE_W,
+ ws->todo_lim + stg_max(WORK_UNIT_WORDS,size));
+ return ws->todo_free;
+ }
+ }
+
+ ASSERT(bd->u.scan >= bd->start && bd->u.scan <= bd->free);
+
+ // If this block is not the scan block, we want to push it out and
+ // make room for a new todo block.
+ if (bd != ws->scan_bd)
+ {
+ // If this block does not have enough space to allocate the
+ // current object, but it also doesn't have any work to push, then
+ // push it on to the scanned list. It cannot be empty, because
+ // then there would be enough room to copy the current object.
+ if (bd->u.scan == bd->free)
+ {
+ ASSERT(bd->free != bd->start);
+ push_scanned_block(bd, ws);
+ }
+ // Otherwise, push this block out to the global list.
+ else
+ {
+ step *stp;
+ stp = ws->step;
+ trace(TRACE_gc|DEBUG_gc, "push todo block %p (%d words), step %d, n_todos: %d",
+ bd->start, bd->free - bd->u.scan, stp->abs_no, stp->n_todos);
+ // ToDo: use buffer_todo
+ ACQUIRE_SPIN_LOCK(&stp->sync_todo);
+ if (stp->todos_last == NULL) {
+ stp->todos_last = bd;
+ stp->todos = bd;
+ } else {
+ stp->todos_last->link = bd;
+ stp->todos_last = bd;
+ }
+ stp->n_todos++;
+ RELEASE_SPIN_LOCK(&stp->sync_todo);
+ }
+ }
- // update stats: this is a block that has been copied & scavenged
- gct->copied += bd->free - bd->start;
+ ws->todo_bd = NULL;
+ ws->todo_free = NULL;
+ ws->todo_lim = NULL;
- // put the scan block on the ws->scavd_list.
- bd->link = ws->scavd_list;
- ws->scavd_list = bd;
- ws->n_scavd_blocks ++;
+ alloc_todo_block(ws, size);
- IF_DEBUG(sanity,
- ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks));
+ return ws->todo_free;
}
StgPtr
-gc_alloc_todo_block (step_workspace *ws)
+alloc_todo_block (step_workspace *ws, nat size)
{
bdescr *bd;
- if (ws->todo_bd != NULL) {
- ws->todo_bd->free = ws->todo_free;
+ // Grab a part block if we have one, and it has enough room
+ if (ws->part_list != NULL &&
+ ws->part_list->start + BLOCK_SIZE_W - ws->part_list->free > (int)size)
+ {
+ bd = ws->part_list;
+ ws->part_list = bd->link;
+ ws->n_part_blocks--;
}
-
- // If we already have a todo block, it must be full, so we push it
- // out: first to the buffer_todo_bd, then to the step. BUT, don't
- // push out the block out if it is already the scan block.
- if (ws->todo_bd != NULL && ws->scan_bd != ws->todo_bd) {
- ASSERT(ws->todo_bd->link == NULL);
- if (ws->buffer_todo_bd == NULL) {
- // If the global todo list is empty, push this block
- // out immediately rather than caching it in
- // buffer_todo_bd, because there might be other threads
- // waiting for work.
- if (ws->stp->todos == NULL) {
- push_todo_block(ws->todo_bd, ws->stp);
- } else {
- ws->buffer_todo_bd = ws->todo_bd;
- }
- } else {
- ASSERT(ws->buffer_todo_bd->link == NULL);
- push_todo_block(ws->buffer_todo_bd, ws->stp);
- ws->buffer_todo_bd = ws->todo_bd;
+ else
+ {
+ bd = allocBlock_sync();
+ bd->gen_no = ws->step->gen_no;
+ bd->step = ws->step;
+ bd->u.scan = bd->start;
+
+ // blocks in to-space in generations up to and including N
+ // get the BF_EVACUATED flag.
+ if (ws->step->gen_no <= N) {
+ bd->flags = BF_EVACUATED;
+ } else {
+ bd->flags = 0;
}
- ws->todo_bd = NULL;
- }
-
- bd = allocBlock_sync();
+ }
- bd->gen_no = ws->stp->gen_no;
- bd->step = ws->stp;
bd->link = NULL;
- // blocks in to-space in generations up to and including N
- // get the BF_EVACUATED flag.
- if (ws->stp->gen_no <= N) {
- bd->flags = BF_EVACUATED;
- } else {
- bd->flags = 0;
- }
-
ws->todo_bd = bd;
- ws->todo_free = bd->start;
- ws->todo_lim = bd->start + BLOCK_SIZE_W;
+ ws->todo_free = bd->free;
+ ws->todo_lim = stg_min(bd->start + BLOCK_SIZE_W,
+ bd->free + stg_max(WORK_UNIT_WORDS,size));
+
+ debugTrace(DEBUG_gc, "alloc new todo block %p for step %d",
+ bd->start, ws->step->abs_no);
return ws->todo_free;
}
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index a121dbd38c..34657c23d3 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -20,18 +20,18 @@ extern SpinLock gc_alloc_block_sync;
bdescr *allocBlock_sync(void);
void freeChain_sync(bdescr *bd);
-void push_scan_block (bdescr *bd, step_workspace *ws);
+void push_scanned_block (bdescr *bd, step_workspace *ws);
bdescr *grab_todo_block (step_workspace *ws);
-StgPtr gc_alloc_todo_block (step_workspace *ws);
-bdescr *gc_alloc_scavd_block (step_workspace *ws);
+StgPtr todo_block_full (nat size, step_workspace *ws);
+StgPtr alloc_todo_block (step_workspace *ws, nat size);
-// Returns true if a block is 3/4 full. This predicate is used to try
+// Returns true if a block is partially full. This predicate is used to try
// to re-use partial blocks wherever possible, and to reduce wastage.
// We might need to tweak the actual value.
INLINE_HEADER rtsBool
isPartiallyFull(bdescr *bd)
{
- return (bd->free + BLOCK_SIZE_W/4 < bd->start + BLOCK_SIZE_W);
+ return (bd->free + WORK_UNIT_WORDS < bd->start + BLOCK_SIZE_W);
}
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 6eba1e01a9..b7bd7f384b 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -1355,7 +1355,7 @@ scavenge_large (step_workspace *ws)
bdescr *bd;
StgPtr p;
- gct->evac_step = ws->stp;
+ gct->evac_step = ws->step;
bd = ws->todo_large_objects;
@@ -1367,15 +1367,15 @@ scavenge_large (step_workspace *ws)
// the front when evacuating.
ws->todo_large_objects = bd->link;
- ACQUIRE_SPIN_LOCK(&ws->stp->sync_large_objects);
- dbl_link_onto(bd, &ws->stp->scavenged_large_objects);
- ws->stp->n_scavenged_large_blocks += bd->blocks;
- RELEASE_SPIN_LOCK(&ws->stp->sync_large_objects);
+ ACQUIRE_SPIN_LOCK(&ws->step->sync_large_objects);
+ dbl_link_onto(bd, &ws->step->scavenged_large_objects);
+ ws->step->n_scavenged_large_blocks += bd->blocks;
+ RELEASE_SPIN_LOCK(&ws->step->sync_large_objects);
p = bd->start;
if (scavenge_one(p)) {
- if (ws->stp->gen_no > 0) {
- recordMutableGen_GC((StgClosure *)p, ws->stp->gen);
+ if (ws->step->gen_no > 0) {
+ recordMutableGen_GC((StgClosure *)p, ws->step->gen);
}
}
}
@@ -1439,30 +1439,27 @@ loop:
if (ws->scan_bd == NULL && ws->todo_bd != NULL)
{
ws->scan_bd = ws->todo_bd;
- ws->scan = ws->scan_bd->start;
}
// If we have a scan block with some work to do,
// scavenge everything up to the free pointer.
- if (ws->scan != NULL && ws->scan < ws->scan_bd->free)
+ if (ws->scan_bd != NULL && ws->scan_bd->u.scan < ws->scan_bd->free)
{
if (n_gc_threads == 1) {
- scavenge_block1(ws->scan_bd, ws->scan);
+ scavenge_block1(ws->scan_bd);
} else {
- scavenge_block(ws->scan_bd, ws->scan);
+ scavenge_block(ws->scan_bd);
}
- ws->scan = ws->scan_bd->free;
did_something = rtsTrue;
}
- if (ws->scan_bd != NULL && ws->scan == ws->scan_bd->free
- && ws->scan_bd != ws->todo_bd)
+ if (ws->scan_bd != NULL && ws->scan_bd != ws->todo_bd)
{
+ ASSERT(ws->scan_bd->u.scan == ws->scan_bd->free);
// we're not going to evac any more objects into
// this block, so push it now.
- push_scan_block(ws->scan_bd, ws);
+ push_scanned_block(ws->scan_bd, ws);
ws->scan_bd = NULL;
- ws->scan = NULL;
// we might be able to scan the todo block now.
did_something = rtsTrue;
}
@@ -1482,11 +1479,11 @@ loop:
// our scavd list. This saves pushing out the
// scan_bd block, which might be partial.
if (n_gc_threads == 1) {
- scavenge_block1(bd, bd->start);
+ scavenge_block1(bd);
} else {
- scavenge_block(bd, bd->start);
+ scavenge_block(bd);
}
- push_scan_block(bd, ws);
+ push_scanned_block(bd, ws);
did_something = rtsTrue;
break;
}
@@ -1561,7 +1558,7 @@ any_work (void)
}
ws = &gct->steps[s];
if (ws->todo_large_objects) return rtsTrue;
- if (ws->stp->todos) return rtsTrue;
+ if (ws->step->todos) return rtsTrue;
}
gct->no_work++;
diff --git a/rts/sm/Scav.c-inc b/rts/sm/Scav.c-inc
index 09cdf363ad..6f852035b5 100644
--- a/rts/sm/Scav.c-inc
+++ b/rts/sm/Scav.c-inc
@@ -15,7 +15,7 @@
// defined, the second time without.
#ifndef PARALLEL_GC
-#define scavenge_block(a,b) scavenge_block1(a,b)
+#define scavenge_block(a) scavenge_block1(a)
#define evacuate(a) evacuate1(a)
#define recordMutableGen_GC(a,b) recordMutableGen(a,b)
#else
@@ -24,7 +24,7 @@
#undef recordMutableGen_GC
#endif
-static void scavenge_block (bdescr *bd, StgPtr scan);
+static void scavenge_block (bdescr *bd);
/* -----------------------------------------------------------------------------
Scavenge a block from the given scan pointer up to bd->free.
@@ -39,7 +39,7 @@ static void scavenge_block (bdescr *bd, StgPtr scan);
-------------------------------------------------------------------------- */
static void
-scavenge_block (bdescr *bd, StgPtr scan)
+scavenge_block (bdescr *bd)
{
StgPtr p, q;
StgInfoTable *info;
@@ -47,10 +47,10 @@ scavenge_block (bdescr *bd, StgPtr scan)
rtsBool saved_eager_promotion;
step_workspace *ws;
- p = scan;
+ p = bd->u.scan;
debugTrace(DEBUG_gc, "scavenging block %p (gen %d, step %d) @ %p",
- bd->start, bd->gen_no, bd->step->no, scan);
+ bd->start, bd->gen_no, bd->step->no, p);
gct->evac_step = bd->step;
saved_evac_step = gct->evac_step;
@@ -448,7 +448,12 @@ scavenge_block (bdescr *bd, StgPtr scan)
}
debugTrace(DEBUG_gc, " scavenged %ld bytes",
- (unsigned long)((bd->free - scan) * sizeof(W_)));
+ (unsigned long)((bd->free - bd->u.scan) * sizeof(W_)));
+
+ // update stats: this is a block that has been copied & scavenged
+ gct->copied += bd->free - bd->u.scan;
+
+ bd->u.scan = bd->free;
}
#undef scavenge_block