summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/ClosureTypes.h3
-rw-r--r--rts/ClosureFlags.c5
-rw-r--r--rts/StgMiscClosures.cmm5
-rw-r--r--rts/sm/BlockAlloc.c6
-rw-r--r--rts/sm/Evac.c351
-rw-r--r--rts/sm/GC.c241
-rw-r--r--rts/sm/GC.h11
-rw-r--r--rts/sm/Scav.c65
-rw-r--r--rts/sm/Scav.h5
9 files changed, 487 insertions, 205 deletions
diff --git a/includes/ClosureTypes.h b/includes/ClosureTypes.h
index b7bebd6c6b..8247a1516f 100644
--- a/includes/ClosureTypes.h
+++ b/includes/ClosureTypes.h
@@ -93,6 +93,7 @@
#define ATOMICALLY_FRAME 69
#define CATCH_RETRY_FRAME 70
#define CATCH_STM_FRAME 71
-#define N_CLOSURE_TYPES 72
+#define WHITEHOLE 72
+#define N_CLOSURE_TYPES 73
#endif /* CLOSURETYPES_H */
diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c
index 12e6632f24..eea609eff7 100644
--- a/rts/ClosureFlags.c
+++ b/rts/ClosureFlags.c
@@ -97,9 +97,10 @@ StgWord16 closure_flags[] = {
/* TREC_HEADER = */ ( _NS| _MUT|_UPT ),
/* ATOMICALLY_FRAME = */ ( _BTM ),
/* CATCH_RETRY_FRAME = */ ( _BTM ),
-/* CATCH_STM_FRAME = */ ( _BTM )
+/* CATCH_STM_FRAME = */ ( _BTM ),
+/* WHITEHOLE = */ ( 0 )
};
-#if N_CLOSURE_TYPES != 72
+#if N_CLOSURE_TYPES != 73
#error Closure types changed: update ClosureFlags.c!
#endif
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 270c600f7c..f9076ca9a9 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -394,12 +394,9 @@ INFO_TABLE(stg_SE_CAF_BLACKHOLE,0,1,SE_CAF_BLACKHOLE,"SE_CAF_BLACKHOLE","SE_CAF_
/* ----------------------------------------------------------------------------
Whiteholes are used for the "locked" state of a closure (see lockClosure())
-
- The closure type is BLAKCHOLE, just because we need a valid closure type
- for sanity checking.
------------------------------------------------------------------------- */
-INFO_TABLE(stg_WHITEHOLE, 0,0, BLACKHOLE, "WHITEHOLE", "WHITEHOLE")
+INFO_TABLE(stg_WHITEHOLE, 0,0, WHITEHOLE, "WHITEHOLE", "WHITEHOLE")
{ foreign "C" barf("WHITEHOLE object entered!") never returns; }
/* ----------------------------------------------------------------------------
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 1c4899e37e..8b8595606b 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -310,7 +310,8 @@ allocGroup (nat n)
{
bdescr *bd, *rem;
- ASSERT_SM_LOCK();
+ // Todo: not true in multithreaded GC, where we use allocBlock_sync().
+ // ASSERT_SM_LOCK();
if (n == 0) barf("allocGroup: requested zero blocks");
@@ -439,7 +440,8 @@ freeGroup(bdescr *p)
{
nat p_on_free_list = 0;
- ASSERT_SM_LOCK();
+ // Todo: not true in multithreaded GC
+ // ASSERT_SM_LOCK();
ASSERT(p->free != (P_)-1);
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 5b37729749..fe788cdb6f 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -27,68 +27,130 @@
static StgClosure * eval_thunk_selector (StgSelector * p, rtsBool);
-STATIC_INLINE void
-upd_evacuee(StgClosure *p, StgClosure *dest)
+STATIC_INLINE StgPtr
+alloc_for_copy (nat size, step *stp)
{
- // not true: (ToDo: perhaps it should be)
- // ASSERT(Bdescr((P_)dest)->flags & BF_EVACUATED);
- SET_INFO(p, &stg_EVACUATED_info);
- ((StgEvacuated *)p)->evacuee = dest;
+ StgPtr to;
+ step_workspace *ws;
+ bdescr *bd;
+
+ /* Find out where we're going, using the handy "to" pointer in
+ * the step of the source object. If it turns out we need to
+ * evacuate to an older generation, adjust it here (see comment
+ * by evacuate()).
+ */
+ if (stp->gen_no < gct->evac_gen) {
+ if (gct->eager_promotion) {
+ stp = &generations[gct->evac_gen].steps[0];
+ } else {
+ gct->failed_to_evac = rtsTrue;
+ }
+ }
+
+ ws = &gct->steps[stp->gen_no][stp->no];
+
+ /* chain a new block onto the to-space for the destination step if
+ * necessary.
+ */
+ bd = ws->todo_bd;
+ to = bd->free;
+ if (to + size >= bd->start + BLOCK_SIZE_W) {
+ bd = gc_alloc_todo_block(ws);
+ to = bd->free;
+ }
+ bd->free = to + size;
+
+ return to;
}
+
+STATIC_INLINE StgPtr
+alloc_for_copy_noscav (nat size, step *stp)
+{
+ StgPtr to;
+ step_workspace *ws;
+ bdescr *bd;
+ /* Find out where we're going, using the handy "to" pointer in
+ * the step of the source object. If it turns out we need to
+ * evacuate to an older generation, adjust it here (see comment
+ * by evacuate()).
+ */
+ if (stp->gen_no < gct->evac_gen) {
+ if (gct->eager_promotion) {
+ stp = &generations[gct->evac_gen].steps[0];
+ } else {
+ gct->failed_to_evac = rtsTrue;
+ }
+ }
+
+ ws = &gct->steps[stp->gen_no][stp->no];
+
+ /* chain a new block onto the to-space for the destination step if
+ * necessary.
+ */
+ bd = ws->scavd_list;
+ to = bd->free;
+ if (to + size >= bd->start + BLOCK_SIZE_W) {
+ bd = gc_alloc_scavd_block(ws);
+ to = bd->free;
+ }
+ bd->free = to + size;
+ return to;
+}
+
STATIC_INLINE StgClosure *
copy_tag(StgClosure *src, nat size, step *stp,StgWord tag)
{
StgPtr to, from;
nat i;
- step_workspace *ws;
- bdescr *bd;
-
- TICK_GC_WORDS_COPIED(size);
- /* Find out where we're going, using the handy "to" pointer in
- * the step of the source object. If it turns out we need to
- * evacuate to an older generation, adjust it here (see comment
- * by evacuate()).
- */
- if (stp->gen_no < gct->evac_gen) {
- if (gct->eager_promotion) {
- stp = &generations[gct->evac_gen].steps[0];
- } else {
- gct->failed_to_evac = rtsTrue;
- }
- }
-
- ws = &gct->steps[stp->gen_no][stp->no];
+ StgWord info;
+
+#ifdef THREADED_RTS
+ do {
+ info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
+ // so.. what is it?
+ } while (info == (W_)&stg_WHITEHOLE_info);
+ if (info == (W_)&stg_EVACUATED_info) {
+ src->header.info = (const StgInfoTable *)info;
+ return evacuate(src); // does the failed_to_evac stuff
+ }
+#else
+ info = (W_)src->header.info;
+ src->header.info = &stg_EVACUATED_info;
+#endif
- /* chain a new block onto the to-space for the destination step if
- * necessary.
- */
- bd = ws->todo_bd;
- to = bd->free;
- if (to + size >= bd->start + BLOCK_SIZE_W) {
- bd = gc_alloc_todo_block(ws);
- to = bd->free;
- }
+ to = alloc_for_copy(size,stp);
+
+ TICK_GC_WORDS_COPIED(size);
- from = (StgPtr)src;
- bd->free = to + size;
- for (i = 0; i < size; i++) { // unroll for small i
- to[i] = from[i];
- }
+ from = (StgPtr)src;
+ to[0] = info;
+ for (i = 1; i < size; i++) { // unroll for small i
+ to[i] = from[i];
+ }
+
+ // retag pointer before updating EVACUATE closure and returning
+ to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to);
- /* retag pointer before updating EVACUATE closure and returning */
- to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to);
+// if (to+size+2 < bd->start + BLOCK_SIZE_W) {
+// __builtin_prefetch(to + size + 2, 1);
+// }
- upd_evacuee((StgClosure *)from,(StgClosure *)to);
+ ((StgEvacuated*)from)->evacuee = (StgClosure *)to;
+#ifdef THREADED_RTS
+ write_barrier();
+ ((StgEvacuated*)from)->header.info = &stg_EVACUATED_info;
+#endif
#ifdef PROFILING
- // We store the size of the just evacuated object in the LDV word so that
- // the profiler can guess the position of the next object later.
- SET_EVACUAEE_FOR_LDV(from, size);
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ SET_EVACUAEE_FOR_LDV(from, size);
#endif
- return (StgClosure *)to;
+ return (StgClosure *)to;
}
+
// Same as copy() above, except the object will be allocated in memory
// that will not be scavenged. Used for object that have no pointer
@@ -96,54 +158,48 @@ copy_tag(StgClosure *src, nat size, step *stp,StgWord tag)
STATIC_INLINE StgClosure *
copy_noscav_tag(StgClosure *src, nat size, step *stp, StgWord tag)
{
- StgPtr to, from;
- nat i;
- step_workspace *ws;
- bdescr *bd;
-
- TICK_GC_WORDS_COPIED(size);
- /* Find out where we're going, using the handy "to" pointer in
- * the step of the source object. If it turns out we need to
- * evacuate to an older generation, adjust it here (see comment
- * by evacuate()).
- */
- if (stp->gen_no < gct->evac_gen) {
- if (gct->eager_promotion) {
- stp = &generations[gct->evac_gen].steps[0];
- } else {
- gct->failed_to_evac = rtsTrue;
- }
- }
-
- ws = &gct->steps[stp->gen_no][stp->no];
-
- /* chain a new block onto the to-space for the destination step if
- * necessary.
- */
- bd = ws->scavd_list;
- to = bd->free;
- if (to + size >= bd->start + BLOCK_SIZE_W) {
- bd = gc_alloc_scavd_block(ws);
- to = bd->free;
- }
-
- from = (StgPtr)src;
- bd->free = to + size;
- for (i = 0; i < size; i++) { // unroll for small i
- to[i] = from[i];
- }
+ StgPtr to, from;
+ nat i;
+ StgWord info;
+
+#ifdef THREADED_RTS
+ do {
+ info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
+ } while (info == (W_)&stg_WHITEHOLE_info);
+ if (info == (W_)&stg_EVACUATED_info) {
+ src->header.info = (const StgInfoTable *)info;
+ return evacuate(src); // does the failed_to_evac stuff
+ }
+#else
+ info = (W_)src->header.info;
+ src->header.info = &stg_EVACUATED_info;
+#endif
+
+ to = alloc_for_copy_noscav(size,stp);
- /* retag pointer before updating EVACUATE closure and returning */
- to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to);
+ TICK_GC_WORDS_COPIED(size);
+
+ from = (StgPtr)src;
+ to[0] = info;
+ for (i = 1; i < size; i++) { // unroll for small i
+ to[i] = from[i];
+ }
- upd_evacuee((StgClosure *)from,(StgClosure *)to);
+ // retag pointer before updating EVACUATE closure and returning
+ to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to);
+ ((StgEvacuated*)from)->evacuee = (StgClosure *)to;
+#ifdef THREADED_RTS
+ write_barrier();
+ ((StgEvacuated*)from)->header.info = &stg_EVACUATED_info;
+#endif
+
#ifdef PROFILING
- // We store the size of the just evacuated object in the LDV word so that
- // the profiler can guess the position of the next object later.
- SET_EVACUAEE_FOR_LDV(from, size);
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ SET_EVACUAEE_FOR_LDV(from, size);
#endif
- return (StgClosure *)to;
+ return (StgClosure *)to;
}
@@ -154,46 +210,48 @@ copy_noscav_tag(StgClosure *src, nat size, step *stp, StgWord tag)
static StgClosure *
copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
{
- StgPtr to, from;
- nat i;
- step_workspace *ws;
- bdescr *bd;
-
- TICK_GC_WORDS_COPIED(size_to_copy);
- if (stp->gen_no < gct->evac_gen) {
- if (gct->eager_promotion) {
- stp = &generations[gct->evac_gen].steps[0];
- } else {
- gct->failed_to_evac = rtsTrue;
- }
- }
-
- ws = &gct->steps[stp->gen_no][stp->no];
-
- bd = ws->todo_bd;
- to = bd->free;
- if (to + size_to_reserve >= bd->start + BLOCK_SIZE_W) {
- bd = gc_alloc_todo_block(ws);
- to = bd->free;
- }
+ StgPtr to, from;
+ nat i;
+ StgWord info;
+
+#ifdef THREADED_RTS
+ do {
+ info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
+ } while (info == (W_)&stg_WHITEHOLE_info);
+ if (info == (W_)&stg_EVACUATED_info) {
+ src->header.info = (const StgInfoTable *)info;
+ return evacuate(src); // does the failed_to_evac stuff
+ }
+#else
+ info = (W_)src->header.info;
+ src->header.info = &stg_EVACUATED_info;
+#endif
+
+ to = alloc_for_copy(size_to_reserve, stp);
- from = (StgPtr)src;
- bd->free = to + size_to_reserve;
- for (i = 0; i < size_to_copy; i++) { // unroll for small i
- to[i] = from[i];
- }
-
- upd_evacuee((StgClosure *)from,(StgClosure *)to);
+ TICK_GC_WORDS_COPIED(size_to_copy);
+ from = (StgPtr)src;
+ to[0] = info;
+ for (i = 1; i < size_to_copy; i++) { // unroll for small i
+ to[i] = from[i];
+ }
+
+ ((StgEvacuated*)from)->evacuee = (StgClosure *)to;
+#ifdef THREADED_RTS
+ write_barrier();
+ ((StgEvacuated*)from)->header.info = &stg_EVACUATED_info;
+#endif
+
#ifdef PROFILING
- // We store the size of the just evacuated object in the LDV word so that
- // the profiler can guess the position of the next object later.
- SET_EVACUAEE_FOR_LDV(from, size_to_reserve);
- // fill the slop
- if (size_to_reserve - size_to_copy > 0)
- LDV_FILL_SLOP(to + size_to_copy - 1, (int)(size_to_reserve - size_to_copy));
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ SET_EVACUAEE_FOR_LDV(from, size_to_reserve);
+ // fill the slop
+ if (size_to_reserve - size_to_copy > 0)
+ LDV_FILL_SLOP(to + size_to_copy - 1, (int)(size_to_reserve - size_to_copy));
#endif
- return (StgClosure *)to;
+ return (StgClosure *)to;
}
@@ -343,18 +401,24 @@ loop:
switch (info->type) {
case THUNK_STATIC:
- if (info->srt_bitmap != 0 &&
- *THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
- *THUNK_STATIC_LINK((StgClosure *)q) = static_objects;
- static_objects = (StgClosure *)q;
+ if (info->srt_bitmap != 0) {
+ ACQUIRE_SPIN_LOCK(&static_objects_sync);
+ if (*THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
+ *THUNK_STATIC_LINK((StgClosure *)q) = static_objects;
+ static_objects = (StgClosure *)q;
+ }
+ RELEASE_SPIN_LOCK(&static_objects_sync);
}
return q;
case FUN_STATIC:
- if (info->srt_bitmap != 0 &&
- *FUN_STATIC_LINK((StgClosure *)q) == NULL) {
- *FUN_STATIC_LINK((StgClosure *)q) = static_objects;
- static_objects = (StgClosure *)q;
+ if (info->srt_bitmap != 0) {
+ ACQUIRE_SPIN_LOCK(&static_objects_sync);
+ if (*FUN_STATIC_LINK((StgClosure *)q) == NULL) {
+ *FUN_STATIC_LINK((StgClosure *)q) = static_objects;
+ static_objects = (StgClosure *)q;
+ }
+ RELEASE_SPIN_LOCK(&static_objects_sync);
}
return q;
@@ -363,17 +427,25 @@ loop:
* on the CAF list, so don't do anything with it here (we'll
* scavenge it later).
*/
- if (((StgIndStatic *)q)->saved_info == NULL
- && *IND_STATIC_LINK((StgClosure *)q) == NULL) {
- *IND_STATIC_LINK((StgClosure *)q) = static_objects;
- static_objects = (StgClosure *)q;
+ if (((StgIndStatic *)q)->saved_info == NULL) {
+ ACQUIRE_SPIN_LOCK(&static_objects_sync);
+ if (*IND_STATIC_LINK((StgClosure *)q) == NULL) {
+ *IND_STATIC_LINK((StgClosure *)q) = static_objects;
+ static_objects = (StgClosure *)q;
+ }
+ RELEASE_SPIN_LOCK(&static_objects_sync);
}
return q;
case CONSTR_STATIC:
if (*STATIC_LINK(info,(StgClosure *)q) == NULL) {
- *STATIC_LINK(info,(StgClosure *)q) = static_objects;
- static_objects = (StgClosure *)q;
+ ACQUIRE_SPIN_LOCK(&static_objects_sync);
+ // re-test, after acquiring lock
+ if (*STATIC_LINK(info,(StgClosure *)q) == NULL) {
+ *STATIC_LINK(info,(StgClosure *)q) = static_objects;
+ static_objects = (StgClosure *)q;
+ }
+ RELEASE_SPIN_LOCK(&static_objects_sync);
/* I am assuming that static_objects pointers are not
* written to other objects, and thus, no need to retag. */
}
@@ -456,6 +528,9 @@ loop:
switch (info->type) {
+ case WHITEHOLE:
+ goto loop;
+
case MUT_VAR_CLEAN:
case MUT_VAR_DIRTY:
case MVAR_CLEAN:
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 17bc2041ef..eed2da7020 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -47,6 +47,7 @@
#include "Scav.h"
#include "GCUtils.h"
#include "MarkWeak.h"
+#include "Sparks.h"
#include <string.h> // for memset()
@@ -117,12 +118,16 @@ nat mutlist_MUTVARS,
/* Thread-local data for each GC thread
*/
gc_thread *gc_threads = NULL;
-gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
+// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
// For stats:
long copied; // *words* copied & scavenged during this GC
long scavd_copied; // *words* copied only during this GC
+#ifdef THREADED_RTS
+SpinLock recordMutableGen_sync;
+#endif
+
/* -----------------------------------------------------------------------------
Static function declarations
-------------------------------------------------------------------------- */
@@ -137,6 +142,11 @@ static void init_gc_thread (gc_thread *t);
static void update_task_list (void);
static void resize_generations (void);
static void resize_nursery (void);
+static void start_gc_threads (void);
+static void gc_thread_work (void);
+static nat inc_running (void);
+static nat dec_running (void);
+static void wakeup_gc_threads (nat n_threads);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
@@ -227,6 +237,10 @@ GarbageCollect ( rtsBool force_major_gc )
*/
alloc_gc_threads();
+ /* Start threads, so they can be spinning up while we finish initialisation.
+ */
+ start_gc_threads();
+
/* How many threads will be participating in this GC?
* We don't try to parallelise minor GC.
*/
@@ -253,8 +267,11 @@ GarbageCollect ( rtsBool force_major_gc )
*/
static_objects = END_OF_STATIC_LIST;
scavenged_static_objects = END_OF_STATIC_LIST;
+
#ifdef THREADED_RTS
initSpinLock(&static_objects_sync);
+ initSpinLock(&recordMutableGen_sync);
+ initSpinLock(&gc_alloc_block_sync);
#endif
// Initialise all the generations/steps that we're collecting.
@@ -283,12 +300,16 @@ GarbageCollect ( rtsBool force_major_gc )
init_gc_thread(&gc_threads[t]);
}
+ // the main thread is running: this prevents any other threads from
+ // exiting prematurely, so we can start them now.
+ inc_running();
+ wakeup_gc_threads(n_threads);
+
// Initialise stats
copied = 0;
scavd_copied = 0;
- // start threads etc.
- // For now, we just have one thread, and set gct to gc_threads[0]
+ // this is the main thread
gct = &gc_threads[0];
/* -----------------------------------------------------------------------
@@ -329,25 +350,28 @@ GarbageCollect ( rtsBool force_major_gc )
* Repeatedly scavenge all the areas we know about until there's no
* more scavenging to be done.
*/
- {
- rtsBool flag;
- loop:
- flag = rtsFalse;
-
- scavenge_loop();
-
- // if any blackholes are alive, make the threads that wait on
- // them alive too.
- if (traverseBlackholeQueue())
- flag = rtsTrue;
-
- if (flag) { goto loop; }
+ for (;;)
+ {
+ gc_thread_work();
+ // The other threads are now stopped. We might recurse back to
+ // here, but from now on this is the only thread.
+
+ // if any blackholes are alive, make the threads that wait on
+ // them alive too.
+ if (traverseBlackholeQueue()) {
+ inc_running();
+ continue;
+ }
+
+ // must be last... invariant is that everything is fully
+ // scavenged at this point.
+ if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
+ inc_running();
+ continue;
+ }
- // must be last... invariant is that everything is fully
- // scavenged at this point.
- if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
- goto loop;
- }
+ // If we get to here, there's really nothing left to do.
+ break;
}
// Update pointers from the Task list
@@ -400,7 +424,8 @@ GarbageCollect ( rtsBool force_major_gc )
ws = &thr->steps[g][s];
if (g==0 && s==0) continue;
- ASSERT( ws->scan_bd == ws->todo_bd );
+ // Not true?
+ // ASSERT( ws->scan_bd == ws->todo_bd );
ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
// Push the final block
@@ -679,25 +704,6 @@ GetRoots( evac_fn evac )
Capability *cap;
Task *task;
-#if defined(GRAN)
- for (i=0; i<=RtsFlags.GranFlags.proc; i++) {
- if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL)))
- evac((StgClosure **)&run_queue_hds[i]);
- if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL)))
- evac((StgClosure **)&run_queue_tls[i]);
-
- if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL)))
- evac((StgClosure **)&blocked_queue_hds[i]);
- if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL)))
- evac((StgClosure **)&blocked_queue_tls[i]);
- if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL)))
- evac((StgClosure **)&ccalling_threads[i]);
- }
-
- markEventQueue();
-
-#else /* !GRAN */
-
for (i = 0; i < n_capabilities; i++) {
cap = &capabilities[i];
evac((StgClosure **)(void *)&cap->run_queue_hd);
@@ -715,17 +721,15 @@ GetRoots( evac_fn evac )
}
-
#if !defined(THREADED_RTS)
evac((StgClosure **)(void *)&blocked_queue_hd);
evac((StgClosure **)(void *)&blocked_queue_tl);
evac((StgClosure **)(void *)&sleeping_queue);
#endif
-#endif
// evac((StgClosure **)&blackhole_queue);
-#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN)
+#if defined(THREADED_RTS)
markSparkQueue(evac);
#endif
@@ -856,6 +860,14 @@ alloc_gc_thread (gc_thread *t, int n)
nat g, s;
step_workspace *ws;
+#ifdef THREADED_RTS
+ t->id = 0;
+ initCondition(&t->wake_cond);
+ initMutex(&t->wake_mutex);
+ t->wakeup = rtsFalse;
+ t->exit = rtsFalse;
+#endif
+
t->thread_index = n;
t->free_blocks = NULL;
t->gc_count = 0;
@@ -897,7 +909,6 @@ alloc_gc_threads (void)
if (gc_threads == NULL) {
#if defined(THREADED_RTS)
nat i;
-
gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
sizeof(gc_thread),
"alloc_gc_threads");
@@ -915,6 +926,146 @@ alloc_gc_threads (void)
}
/* ----------------------------------------------------------------------------
+ Start GC threads
+ ------------------------------------------------------------------------- */
+
+static nat gc_running_threads;
+
+#if defined(THREADED_RTS)
+static Mutex gc_running_mutex;
+#endif
+
+static nat
+inc_running (void)
+{
+ nat n_running;
+ ACQUIRE_LOCK(&gc_running_mutex);
+ n_running = ++gc_running_threads;
+ RELEASE_LOCK(&gc_running_mutex);
+ return n_running;
+}
+
+static nat
+dec_running (void)
+{
+ nat n_running;
+ ACQUIRE_LOCK(&gc_running_mutex);
+ n_running = --gc_running_threads;
+ RELEASE_LOCK(&gc_running_mutex);
+ return n_running;
+}
+
+//
+// gc_thread_work(): Scavenge until there's no work left to do and all
+// the running threads are idle.
+//
+static void
+gc_thread_work (void)
+{
+ nat r;
+
+ debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
+
+ // gc_running_threads has already been incremented for us; either
+ // this is the main thread and we incremented it inside
+ // GarbageCollect(), or this is a worker thread and the main
+ // thread bumped gc_running_threads before waking us up.
+
+loop:
+ scavenge_loop();
+ // scavenge_loop() only exits when there's no work to do
+ r = dec_running();
+
+ debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
+ gct->thread_index, r);
+
+ while (gc_running_threads != 0) {
+ if (any_work()) {
+ inc_running();
+ goto loop;
+ }
+ // any_work() does not remove the work from the queue, it
+ // just checks for the presence of work. If we find any,
+ // then we increment gc_running_threads and go back to
+ // scavenge_loop() to perform any pending work.
+ }
+
+ // All threads are now stopped
+ debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
+}
+
+
+#if defined(THREADED_RTS)
+static void
+gc_thread_mainloop (void)
+{
+ while (!gct->exit) {
+
+ // Wait until we're told to wake up
+ ACQUIRE_LOCK(&gct->wake_mutex);
+ while (!gct->wakeup) {
+ debugTrace(DEBUG_gc, "GC thread %d standing by...",
+ gct->thread_index);
+ waitCondition(&gct->wake_cond, &gct->wake_mutex);
+ }
+ RELEASE_LOCK(&gct->wake_mutex);
+ gct->wakeup = rtsFalse;
+ if (gct->exit) break;
+
+ gc_thread_work();
+ }
+}
+#endif
+
+#if defined(THREADED_RTS)
+static void
+gc_thread_entry (gc_thread *my_gct)
+{
+ gct = my_gct;
+ debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
+ gct->id = osThreadId();
+ gc_thread_mainloop();
+}
+#endif
+
+static void
+start_gc_threads (void)
+{
+#if defined(THREADED_RTS)
+ nat i;
+ OSThreadId id;
+ static rtsBool done = rtsFalse;
+
+ gc_running_threads = 0;
+ initMutex(&gc_running_mutex);
+
+ if (!done) {
+ // Start from 1: the main thread is 0
+ for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
+ createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
+ &gc_threads[i]);
+ }
+ done = rtsTrue;
+ }
+#endif
+}
+
+static void
+wakeup_gc_threads (nat n_threads USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ nat i;
+ for (i=1; i < n_threads; i++) {
+ inc_running();
+ ACQUIRE_LOCK(&gc_threads[i].wake_mutex);
+ gc_threads[i].wakeup = rtsTrue;
+ signalCondition(&gc_threads[i].wake_cond);
+ RELEASE_LOCK(&gc_threads[i].wake_mutex);
+ }
+#endif
+}
+
+/* ----------------------------------------------------------------------------
Initialise a generation that is to be collected
------------------------------------------------------------------------- */
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 69fdc10447..013a901329 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -110,6 +110,10 @@ typedef struct step_workspace_ {
typedef struct gc_thread_ {
#ifdef THREADED_RTS
OSThreadId id; // The OS thread that this struct belongs to
+ Mutex wake_mutex;
+ Condition wake_cond; // So we can go to sleep between GCs
+ rtsBool wakeup;
+ rtsBool exit;
#endif
nat thread_index; // a zero based index identifying the thread
@@ -148,7 +152,8 @@ extern nat N;
extern rtsBool major_gc;
extern gc_thread *gc_threads;
-extern gc_thread *gct; // this thread's gct TODO: make thread-local
+register gc_thread *gct __asm__("%rbx");
+// extern gc_thread *gct; // this thread's gct TODO: make thread-local
extern StgClosure* static_objects;
extern StgClosure* scavenged_static_objects;
@@ -165,6 +170,10 @@ extern StgPtr oldgen_scan;
extern long copied;
extern long scavd_copied;
+#ifdef THREADED_RTS
+extern SpinLock static_objects_sync;
+#endif
+
#ifdef DEBUG
extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS;
#endif
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 2a6ea389e6..8d7e5825ee 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -1494,7 +1494,7 @@ scavenge_mutable_list(generation *gen)
static void
scavenge_static(void)
{
- StgClosure* p = static_objects;
+ StgClosure* p;
const StgInfoTable *info;
/* Always evacuate straight to the oldest generation for static
@@ -1503,13 +1503,26 @@ scavenge_static(void)
/* keep going until we've scavenged all the objects on the linked
list... */
- while (p != END_OF_STATIC_LIST) {
+ while (1) {
+
+ ACQUIRE_SPIN_LOCK(&static_objects_sync);
+
+ /* get the next static object from the list. Remember, there might
+ * be more stuff on this list after each evacuation...
+ * (static_objects is a global)
+ */
+ p = static_objects;
+ if (p == END_OF_STATIC_LIST) {
+ RELEASE_SPIN_LOCK(&static_objects_sync);
+ break;
+ }
+
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
info = get_itbl(p);
/*
- if (info->type==RBH)
- info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
+ if (info->type==RBH)
+ info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
*/
// make sure the info pointer is into text space
@@ -1520,6 +1533,8 @@ scavenge_static(void)
*STATIC_LINK(info,p) = scavenged_static_objects;
scavenged_static_objects = p;
+ RELEASE_SPIN_LOCK(&static_objects_sync);
+
switch (info -> type) {
case IND_STATIC:
@@ -1564,12 +1579,6 @@ scavenge_static(void)
}
ASSERT(gct->failed_to_evac == rtsFalse);
-
- /* get the next static object from the list. Remember, there might
- * be more stuff on this list now that we've done some evacuating!
- * (static_objects is a global)
- */
- p = static_objects;
}
}
@@ -1947,3 +1956,39 @@ loop:
if (work_to_do) goto loop;
}
+
+rtsBool
+any_work (void)
+{
+ int g, s;
+ step_workspace *ws;
+
+ write_barrier();
+
+ // scavenge static objects
+ if (major_gc && static_objects != END_OF_STATIC_LIST) {
+ return rtsTrue;
+ }
+
+ // scavenge objects in compacted generation
+ if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
+ (mark_stack_bdescr != NULL && !mark_stack_empty())) {
+ return rtsTrue;
+ }
+
+ // Check for global work in any step. We don't need to check for
+ // local work, because we have already exited scavenge_loop(),
+ // which means there is no local work for this thread.
+ for (g = RtsFlags.GcFlags.generations; --g >= 0; ) {
+ for (s = generations[g].n_steps; --s >= 0; ) {
+ if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+ ws = &gct->steps[g][s];
+ if (ws->todo_large_objects) return rtsTrue;
+ if (ws->stp->todos) return rtsTrue;
+ }
+ }
+
+ return rtsFalse;
+}
diff --git a/rts/sm/Scav.h b/rts/sm/Scav.h
index ab66775641..f149b667ac 100644
--- a/rts/sm/Scav.h
+++ b/rts/sm/Scav.h
@@ -11,5 +11,6 @@
*
* ---------------------------------------------------------------------------*/
-void scavenge_loop (void);
-void scavenge_mutable_list (generation *g);
+void scavenge_loop (void);
+rtsBool any_work (void);
+void scavenge_mutable_list (generation *g);