diff options
-rw-r--r-- | includes/ClosureTypes.h | 3 | ||||
-rw-r--r-- | rts/ClosureFlags.c | 5 | ||||
-rw-r--r-- | rts/StgMiscClosures.cmm | 5 | ||||
-rw-r--r-- | rts/sm/BlockAlloc.c | 6 | ||||
-rw-r--r-- | rts/sm/Evac.c | 351 | ||||
-rw-r--r-- | rts/sm/GC.c | 241 | ||||
-rw-r--r-- | rts/sm/GC.h | 11 | ||||
-rw-r--r-- | rts/sm/Scav.c | 65 | ||||
-rw-r--r-- | rts/sm/Scav.h | 5 |
9 files changed, 487 insertions, 205 deletions
diff --git a/includes/ClosureTypes.h b/includes/ClosureTypes.h index b7bebd6c6b..8247a1516f 100644 --- a/includes/ClosureTypes.h +++ b/includes/ClosureTypes.h @@ -93,6 +93,7 @@ #define ATOMICALLY_FRAME 69 #define CATCH_RETRY_FRAME 70 #define CATCH_STM_FRAME 71 -#define N_CLOSURE_TYPES 72 +#define WHITEHOLE 72 +#define N_CLOSURE_TYPES 73 #endif /* CLOSURETYPES_H */ diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c index 12e6632f24..eea609eff7 100644 --- a/rts/ClosureFlags.c +++ b/rts/ClosureFlags.c @@ -97,9 +97,10 @@ StgWord16 closure_flags[] = { /* TREC_HEADER = */ ( _NS| _MUT|_UPT ), /* ATOMICALLY_FRAME = */ ( _BTM ), /* CATCH_RETRY_FRAME = */ ( _BTM ), -/* CATCH_STM_FRAME = */ ( _BTM ) +/* CATCH_STM_FRAME = */ ( _BTM ), +/* WHITEHOLE = */ ( 0 ) }; -#if N_CLOSURE_TYPES != 72 +#if N_CLOSURE_TYPES != 73 #error Closure types changed: update ClosureFlags.c! #endif diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index 270c600f7c..f9076ca9a9 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -394,12 +394,9 @@ INFO_TABLE(stg_SE_CAF_BLACKHOLE,0,1,SE_CAF_BLACKHOLE,"SE_CAF_BLACKHOLE","SE_CAF_ /* ---------------------------------------------------------------------------- Whiteholes are used for the "locked" state of a closure (see lockClosure()) - - The closure type is BLAKCHOLE, just because we need a valid closure type - for sanity checking. ------------------------------------------------------------------------- */ -INFO_TABLE(stg_WHITEHOLE, 0,0, BLACKHOLE, "WHITEHOLE", "WHITEHOLE") +INFO_TABLE(stg_WHITEHOLE, 0,0, WHITEHOLE, "WHITEHOLE", "WHITEHOLE") { foreign "C" barf("WHITEHOLE object entered!") never returns; } /* ---------------------------------------------------------------------------- diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index 1c4899e37e..8b8595606b 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -310,7 +310,8 @@ allocGroup (nat n) { bdescr *bd, *rem; - ASSERT_SM_LOCK(); + // Todo: not true in multithreaded GC, where we use allocBlock_sync(). + // ASSERT_SM_LOCK(); if (n == 0) barf("allocGroup: requested zero blocks"); @@ -439,7 +440,8 @@ freeGroup(bdescr *p) { nat p_on_free_list = 0; - ASSERT_SM_LOCK(); + // Todo: not true in multithreaded GC + // ASSERT_SM_LOCK(); ASSERT(p->free != (P_)-1); diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 5b37729749..fe788cdb6f 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -27,68 +27,130 @@ static StgClosure * eval_thunk_selector (StgSelector * p, rtsBool); -STATIC_INLINE void -upd_evacuee(StgClosure *p, StgClosure *dest) +STATIC_INLINE StgPtr +alloc_for_copy (nat size, step *stp) { - // not true: (ToDo: perhaps it should be) - // ASSERT(Bdescr((P_)dest)->flags & BF_EVACUATED); - SET_INFO(p, &stg_EVACUATED_info); - ((StgEvacuated *)p)->evacuee = dest; + StgPtr to; + step_workspace *ws; + bdescr *bd; + + /* Find out where we're going, using the handy "to" pointer in + * the step of the source object. If it turns out we need to + * evacuate to an older generation, adjust it here (see comment + * by evacuate()). + */ + if (stp->gen_no < gct->evac_gen) { + if (gct->eager_promotion) { + stp = &generations[gct->evac_gen].steps[0]; + } else { + gct->failed_to_evac = rtsTrue; + } + } + + ws = &gct->steps[stp->gen_no][stp->no]; + + /* chain a new block onto the to-space for the destination step if + * necessary. + */ + bd = ws->todo_bd; + to = bd->free; + if (to + size >= bd->start + BLOCK_SIZE_W) { + bd = gc_alloc_todo_block(ws); + to = bd->free; + } + bd->free = to + size; + + return to; } + +STATIC_INLINE StgPtr +alloc_for_copy_noscav (nat size, step *stp) +{ + StgPtr to; + step_workspace *ws; + bdescr *bd; + /* Find out where we're going, using the handy "to" pointer in + * the step of the source object. If it turns out we need to + * evacuate to an older generation, adjust it here (see comment + * by evacuate()). + */ + if (stp->gen_no < gct->evac_gen) { + if (gct->eager_promotion) { + stp = &generations[gct->evac_gen].steps[0]; + } else { + gct->failed_to_evac = rtsTrue; + } + } + + ws = &gct->steps[stp->gen_no][stp->no]; + + /* chain a new block onto the to-space for the destination step if + * necessary. + */ + bd = ws->scavd_list; + to = bd->free; + if (to + size >= bd->start + BLOCK_SIZE_W) { + bd = gc_alloc_scavd_block(ws); + to = bd->free; + } + bd->free = to + size; + return to; +} + STATIC_INLINE StgClosure * copy_tag(StgClosure *src, nat size, step *stp,StgWord tag) { StgPtr to, from; nat i; - step_workspace *ws; - bdescr *bd; - - TICK_GC_WORDS_COPIED(size); - /* Find out where we're going, using the handy "to" pointer in - * the step of the source object. If it turns out we need to - * evacuate to an older generation, adjust it here (see comment - * by evacuate()). - */ - if (stp->gen_no < gct->evac_gen) { - if (gct->eager_promotion) { - stp = &generations[gct->evac_gen].steps[0]; - } else { - gct->failed_to_evac = rtsTrue; - } - } - - ws = &gct->steps[stp->gen_no][stp->no]; + StgWord info; + +#ifdef THREADED_RTS + do { + info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info); + // so.. what is it? + } while (info == (W_)&stg_WHITEHOLE_info); + if (info == (W_)&stg_EVACUATED_info) { + src->header.info = (const StgInfoTable *)info; + return evacuate(src); // does the failed_to_evac stuff + } +#else + info = (W_)src->header.info; + src->header.info = &stg_EVACUATED_info; +#endif - /* chain a new block onto the to-space for the destination step if - * necessary. - */ - bd = ws->todo_bd; - to = bd->free; - if (to + size >= bd->start + BLOCK_SIZE_W) { - bd = gc_alloc_todo_block(ws); - to = bd->free; - } + to = alloc_for_copy(size,stp); + + TICK_GC_WORDS_COPIED(size); - from = (StgPtr)src; - bd->free = to + size; - for (i = 0; i < size; i++) { // unroll for small i - to[i] = from[i]; - } + from = (StgPtr)src; + to[0] = info; + for (i = 1; i < size; i++) { // unroll for small i + to[i] = from[i]; + } + + // retag pointer before updating EVACUATE closure and returning + to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to); - /* retag pointer before updating EVACUATE closure and returning */ - to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to); +// if (to+size+2 < bd->start + BLOCK_SIZE_W) { +// __builtin_prefetch(to + size + 2, 1); +// } - upd_evacuee((StgClosure *)from,(StgClosure *)to); + ((StgEvacuated*)from)->evacuee = (StgClosure *)to; +#ifdef THREADED_RTS + write_barrier(); + ((StgEvacuated*)from)->header.info = &stg_EVACUATED_info; +#endif #ifdef PROFILING - // We store the size of the just evacuated object in the LDV word so that - // the profiler can guess the position of the next object later. - SET_EVACUAEE_FOR_LDV(from, size); + // We store the size of the just evacuated object in the LDV word so that + // the profiler can guess the position of the next object later. + SET_EVACUAEE_FOR_LDV(from, size); #endif - return (StgClosure *)to; + return (StgClosure *)to; } + // Same as copy() above, except the object will be allocated in memory // that will not be scavenged. Used for object that have no pointer @@ -96,54 +158,48 @@ copy_tag(StgClosure *src, nat size, step *stp,StgWord tag) STATIC_INLINE StgClosure * copy_noscav_tag(StgClosure *src, nat size, step *stp, StgWord tag) { - StgPtr to, from; - nat i; - step_workspace *ws; - bdescr *bd; - - TICK_GC_WORDS_COPIED(size); - /* Find out where we're going, using the handy "to" pointer in - * the step of the source object. If it turns out we need to - * evacuate to an older generation, adjust it here (see comment - * by evacuate()). - */ - if (stp->gen_no < gct->evac_gen) { - if (gct->eager_promotion) { - stp = &generations[gct->evac_gen].steps[0]; - } else { - gct->failed_to_evac = rtsTrue; - } - } - - ws = &gct->steps[stp->gen_no][stp->no]; - - /* chain a new block onto the to-space for the destination step if - * necessary. - */ - bd = ws->scavd_list; - to = bd->free; - if (to + size >= bd->start + BLOCK_SIZE_W) { - bd = gc_alloc_scavd_block(ws); - to = bd->free; - } - - from = (StgPtr)src; - bd->free = to + size; - for (i = 0; i < size; i++) { // unroll for small i - to[i] = from[i]; - } + StgPtr to, from; + nat i; + StgWord info; + +#ifdef THREADED_RTS + do { + info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info); + } while (info == (W_)&stg_WHITEHOLE_info); + if (info == (W_)&stg_EVACUATED_info) { + src->header.info = (const StgInfoTable *)info; + return evacuate(src); // does the failed_to_evac stuff + } +#else + info = (W_)src->header.info; + src->header.info = &stg_EVACUATED_info; +#endif + + to = alloc_for_copy_noscav(size,stp); - /* retag pointer before updating EVACUATE closure and returning */ - to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to); + TICK_GC_WORDS_COPIED(size); + + from = (StgPtr)src; + to[0] = info; + for (i = 1; i < size; i++) { // unroll for small i + to[i] = from[i]; + } - upd_evacuee((StgClosure *)from,(StgClosure *)to); + // retag pointer before updating EVACUATE closure and returning + to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to); + ((StgEvacuated*)from)->evacuee = (StgClosure *)to; +#ifdef THREADED_RTS + write_barrier(); + ((StgEvacuated*)from)->header.info = &stg_EVACUATED_info; +#endif + #ifdef PROFILING - // We store the size of the just evacuated object in the LDV word so that - // the profiler can guess the position of the next object later. - SET_EVACUAEE_FOR_LDV(from, size); + // We store the size of the just evacuated object in the LDV word so that + // the profiler can guess the position of the next object later. + SET_EVACUAEE_FOR_LDV(from, size); #endif - return (StgClosure *)to; + return (StgClosure *)to; } @@ -154,46 +210,48 @@ copy_noscav_tag(StgClosure *src, nat size, step *stp, StgWord tag) static StgClosure * copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) { - StgPtr to, from; - nat i; - step_workspace *ws; - bdescr *bd; - - TICK_GC_WORDS_COPIED(size_to_copy); - if (stp->gen_no < gct->evac_gen) { - if (gct->eager_promotion) { - stp = &generations[gct->evac_gen].steps[0]; - } else { - gct->failed_to_evac = rtsTrue; - } - } - - ws = &gct->steps[stp->gen_no][stp->no]; - - bd = ws->todo_bd; - to = bd->free; - if (to + size_to_reserve >= bd->start + BLOCK_SIZE_W) { - bd = gc_alloc_todo_block(ws); - to = bd->free; - } + StgPtr to, from; + nat i; + StgWord info; + +#ifdef THREADED_RTS + do { + info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info); + } while (info == (W_)&stg_WHITEHOLE_info); + if (info == (W_)&stg_EVACUATED_info) { + src->header.info = (const StgInfoTable *)info; + return evacuate(src); // does the failed_to_evac stuff + } +#else + info = (W_)src->header.info; + src->header.info = &stg_EVACUATED_info; +#endif + + to = alloc_for_copy(size_to_reserve, stp); - from = (StgPtr)src; - bd->free = to + size_to_reserve; - for (i = 0; i < size_to_copy; i++) { // unroll for small i - to[i] = from[i]; - } - - upd_evacuee((StgClosure *)from,(StgClosure *)to); + TICK_GC_WORDS_COPIED(size_to_copy); + from = (StgPtr)src; + to[0] = info; + for (i = 1; i < size_to_copy; i++) { // unroll for small i + to[i] = from[i]; + } + + ((StgEvacuated*)from)->evacuee = (StgClosure *)to; +#ifdef THREADED_RTS + write_barrier(); + ((StgEvacuated*)from)->header.info = &stg_EVACUATED_info; +#endif + #ifdef PROFILING - // We store the size of the just evacuated object in the LDV word so that - // the profiler can guess the position of the next object later. - SET_EVACUAEE_FOR_LDV(from, size_to_reserve); - // fill the slop - if (size_to_reserve - size_to_copy > 0) - LDV_FILL_SLOP(to + size_to_copy - 1, (int)(size_to_reserve - size_to_copy)); + // We store the size of the just evacuated object in the LDV word so that + // the profiler can guess the position of the next object later. + SET_EVACUAEE_FOR_LDV(from, size_to_reserve); + // fill the slop + if (size_to_reserve - size_to_copy > 0) + LDV_FILL_SLOP(to + size_to_copy - 1, (int)(size_to_reserve - size_to_copy)); #endif - return (StgClosure *)to; + return (StgClosure *)to; } @@ -343,18 +401,24 @@ loop: switch (info->type) { case THUNK_STATIC: - if (info->srt_bitmap != 0 && - *THUNK_STATIC_LINK((StgClosure *)q) == NULL) { - *THUNK_STATIC_LINK((StgClosure *)q) = static_objects; - static_objects = (StgClosure *)q; + if (info->srt_bitmap != 0) { + ACQUIRE_SPIN_LOCK(&static_objects_sync); + if (*THUNK_STATIC_LINK((StgClosure *)q) == NULL) { + *THUNK_STATIC_LINK((StgClosure *)q) = static_objects; + static_objects = (StgClosure *)q; + } + RELEASE_SPIN_LOCK(&static_objects_sync); } return q; case FUN_STATIC: - if (info->srt_bitmap != 0 && - *FUN_STATIC_LINK((StgClosure *)q) == NULL) { - *FUN_STATIC_LINK((StgClosure *)q) = static_objects; - static_objects = (StgClosure *)q; + if (info->srt_bitmap != 0) { + ACQUIRE_SPIN_LOCK(&static_objects_sync); + if (*FUN_STATIC_LINK((StgClosure *)q) == NULL) { + *FUN_STATIC_LINK((StgClosure *)q) = static_objects; + static_objects = (StgClosure *)q; + } + RELEASE_SPIN_LOCK(&static_objects_sync); } return q; @@ -363,17 +427,25 @@ loop: * on the CAF list, so don't do anything with it here (we'll * scavenge it later). */ - if (((StgIndStatic *)q)->saved_info == NULL - && *IND_STATIC_LINK((StgClosure *)q) == NULL) { - *IND_STATIC_LINK((StgClosure *)q) = static_objects; - static_objects = (StgClosure *)q; + if (((StgIndStatic *)q)->saved_info == NULL) { + ACQUIRE_SPIN_LOCK(&static_objects_sync); + if (*IND_STATIC_LINK((StgClosure *)q) == NULL) { + *IND_STATIC_LINK((StgClosure *)q) = static_objects; + static_objects = (StgClosure *)q; + } + RELEASE_SPIN_LOCK(&static_objects_sync); } return q; case CONSTR_STATIC: if (*STATIC_LINK(info,(StgClosure *)q) == NULL) { - *STATIC_LINK(info,(StgClosure *)q) = static_objects; - static_objects = (StgClosure *)q; + ACQUIRE_SPIN_LOCK(&static_objects_sync); + // re-test, after acquiring lock + if (*STATIC_LINK(info,(StgClosure *)q) == NULL) { + *STATIC_LINK(info,(StgClosure *)q) = static_objects; + static_objects = (StgClosure *)q; + } + RELEASE_SPIN_LOCK(&static_objects_sync); /* I am assuming that static_objects pointers are not * written to other objects, and thus, no need to retag. */ } @@ -456,6 +528,9 @@ loop: switch (info->type) { + case WHITEHOLE: + goto loop; + case MUT_VAR_CLEAN: case MUT_VAR_DIRTY: case MVAR_CLEAN: diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 17bc2041ef..eed2da7020 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -47,6 +47,7 @@ #include "Scav.h" #include "GCUtils.h" #include "MarkWeak.h" +#include "Sparks.h" #include <string.h> // for memset() @@ -117,12 +118,16 @@ nat mutlist_MUTVARS, /* Thread-local data for each GC thread */ gc_thread *gc_threads = NULL; -gc_thread *gct = NULL; // this thread's gct TODO: make thread-local +// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local // For stats: long copied; // *words* copied & scavenged during this GC long scavd_copied; // *words* copied only during this GC +#ifdef THREADED_RTS +SpinLock recordMutableGen_sync; +#endif + /* ----------------------------------------------------------------------------- Static function declarations -------------------------------------------------------------------------- */ @@ -137,6 +142,11 @@ static void init_gc_thread (gc_thread *t); static void update_task_list (void); static void resize_generations (void); static void resize_nursery (void); +static void start_gc_threads (void); +static void gc_thread_work (void); +static nat inc_running (void); +static nat dec_running (void); +static void wakeup_gc_threads (nat n_threads); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -227,6 +237,10 @@ GarbageCollect ( rtsBool force_major_gc ) */ alloc_gc_threads(); + /* Start threads, so they can be spinning up while we finish initialisation. + */ + start_gc_threads(); + /* How many threads will be participating in this GC? * We don't try to parallelise minor GC. */ @@ -253,8 +267,11 @@ GarbageCollect ( rtsBool force_major_gc ) */ static_objects = END_OF_STATIC_LIST; scavenged_static_objects = END_OF_STATIC_LIST; + #ifdef THREADED_RTS initSpinLock(&static_objects_sync); + initSpinLock(&recordMutableGen_sync); + initSpinLock(&gc_alloc_block_sync); #endif // Initialise all the generations/steps that we're collecting. @@ -283,12 +300,16 @@ GarbageCollect ( rtsBool force_major_gc ) init_gc_thread(&gc_threads[t]); } + // the main thread is running: this prevents any other threads from + // exiting prematurely, so we can start them now. + inc_running(); + wakeup_gc_threads(n_threads); + // Initialise stats copied = 0; scavd_copied = 0; - // start threads etc. - // For now, we just have one thread, and set gct to gc_threads[0] + // this is the main thread gct = &gc_threads[0]; /* ----------------------------------------------------------------------- @@ -329,25 +350,28 @@ GarbageCollect ( rtsBool force_major_gc ) * Repeatedly scavenge all the areas we know about until there's no * more scavenging to be done. */ - { - rtsBool flag; - loop: - flag = rtsFalse; - - scavenge_loop(); - - // if any blackholes are alive, make the threads that wait on - // them alive too. - if (traverseBlackholeQueue()) - flag = rtsTrue; - - if (flag) { goto loop; } + for (;;) + { + gc_thread_work(); + // The other threads are now stopped. We might recurse back to + // here, but from now on this is the only thread. + + // if any blackholes are alive, make the threads that wait on + // them alive too. + if (traverseBlackholeQueue()) { + inc_running(); + continue; + } + + // must be last... invariant is that everything is fully + // scavenged at this point. + if (traverseWeakPtrList()) { // returns rtsTrue if evaced something + inc_running(); + continue; + } - // must be last... invariant is that everything is fully - // scavenged at this point. - if (traverseWeakPtrList()) { // returns rtsTrue if evaced something - goto loop; - } + // If we get to here, there's really nothing left to do. + break; } // Update pointers from the Task list @@ -400,7 +424,8 @@ GarbageCollect ( rtsBool force_major_gc ) ws = &thr->steps[g][s]; if (g==0 && s==0) continue; - ASSERT( ws->scan_bd == ws->todo_bd ); + // Not true? + // ASSERT( ws->scan_bd == ws->todo_bd ); ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); // Push the final block @@ -679,25 +704,6 @@ GetRoots( evac_fn evac ) Capability *cap; Task *task; -#if defined(GRAN) - for (i=0; i<=RtsFlags.GranFlags.proc; i++) { - if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL))) - evac((StgClosure **)&run_queue_hds[i]); - if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL))) - evac((StgClosure **)&run_queue_tls[i]); - - if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL))) - evac((StgClosure **)&blocked_queue_hds[i]); - if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL))) - evac((StgClosure **)&blocked_queue_tls[i]); - if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL))) - evac((StgClosure **)&ccalling_threads[i]); - } - - markEventQueue(); - -#else /* !GRAN */ - for (i = 0; i < n_capabilities; i++) { cap = &capabilities[i]; evac((StgClosure **)(void *)&cap->run_queue_hd); @@ -715,17 +721,15 @@ GetRoots( evac_fn evac ) } - #if !defined(THREADED_RTS) evac((StgClosure **)(void *)&blocked_queue_hd); evac((StgClosure **)(void *)&blocked_queue_tl); evac((StgClosure **)(void *)&sleeping_queue); #endif -#endif // evac((StgClosure **)&blackhole_queue); -#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN) +#if defined(THREADED_RTS) markSparkQueue(evac); #endif @@ -856,6 +860,14 @@ alloc_gc_thread (gc_thread *t, int n) nat g, s; step_workspace *ws; +#ifdef THREADED_RTS + t->id = 0; + initCondition(&t->wake_cond); + initMutex(&t->wake_mutex); + t->wakeup = rtsFalse; + t->exit = rtsFalse; +#endif + t->thread_index = n; t->free_blocks = NULL; t->gc_count = 0; @@ -897,7 +909,6 @@ alloc_gc_threads (void) if (gc_threads == NULL) { #if defined(THREADED_RTS) nat i; - gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads * sizeof(gc_thread), "alloc_gc_threads"); @@ -915,6 +926,146 @@ alloc_gc_threads (void) } /* ---------------------------------------------------------------------------- + Start GC threads + ------------------------------------------------------------------------- */ + +static nat gc_running_threads; + +#if defined(THREADED_RTS) +static Mutex gc_running_mutex; +#endif + +static nat +inc_running (void) +{ + nat n_running; + ACQUIRE_LOCK(&gc_running_mutex); + n_running = ++gc_running_threads; + RELEASE_LOCK(&gc_running_mutex); + return n_running; +} + +static nat +dec_running (void) +{ + nat n_running; + ACQUIRE_LOCK(&gc_running_mutex); + n_running = --gc_running_threads; + RELEASE_LOCK(&gc_running_mutex); + return n_running; +} + +// +// gc_thread_work(): Scavenge until there's no work left to do and all +// the running threads are idle. +// +static void +gc_thread_work (void) +{ + nat r; + + debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index); + + // gc_running_threads has already been incremented for us; either + // this is the main thread and we incremented it inside + // GarbageCollect(), or this is a worker thread and the main + // thread bumped gc_running_threads before waking us up. + +loop: + scavenge_loop(); + // scavenge_loop() only exits when there's no work to do + r = dec_running(); + + debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)", + gct->thread_index, r); + + while (gc_running_threads != 0) { + if (any_work()) { + inc_running(); + goto loop; + } + // any_work() does not remove the work from the queue, it + // just checks for the presence of work. If we find any, + // then we increment gc_running_threads and go back to + // scavenge_loop() to perform any pending work. + } + + // All threads are now stopped + debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index); +} + + +#if defined(THREADED_RTS) +static void +gc_thread_mainloop (void) +{ + while (!gct->exit) { + + // Wait until we're told to wake up + ACQUIRE_LOCK(&gct->wake_mutex); + while (!gct->wakeup) { + debugTrace(DEBUG_gc, "GC thread %d standing by...", + gct->thread_index); + waitCondition(&gct->wake_cond, &gct->wake_mutex); + } + RELEASE_LOCK(&gct->wake_mutex); + gct->wakeup = rtsFalse; + if (gct->exit) break; + + gc_thread_work(); + } +} +#endif + +#if defined(THREADED_RTS) +static void +gc_thread_entry (gc_thread *my_gct) +{ + gct = my_gct; + debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index); + gct->id = osThreadId(); + gc_thread_mainloop(); +} +#endif + +static void +start_gc_threads (void) +{ +#if defined(THREADED_RTS) + nat i; + OSThreadId id; + static rtsBool done = rtsFalse; + + gc_running_threads = 0; + initMutex(&gc_running_mutex); + + if (!done) { + // Start from 1: the main thread is 0 + for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) { + createOSThread(&id, (OSThreadProc*)&gc_thread_entry, + &gc_threads[i]); + } + done = rtsTrue; + } +#endif +} + +static void +wakeup_gc_threads (nat n_threads USED_IF_THREADS) +{ +#if defined(THREADED_RTS) + nat i; + for (i=1; i < n_threads; i++) { + inc_running(); + ACQUIRE_LOCK(&gc_threads[i].wake_mutex); + gc_threads[i].wakeup = rtsTrue; + signalCondition(&gc_threads[i].wake_cond); + RELEASE_LOCK(&gc_threads[i].wake_mutex); + } +#endif +} + +/* ---------------------------------------------------------------------------- Initialise a generation that is to be collected ------------------------------------------------------------------------- */ diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 69fdc10447..013a901329 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -110,6 +110,10 @@ typedef struct step_workspace_ { typedef struct gc_thread_ { #ifdef THREADED_RTS OSThreadId id; // The OS thread that this struct belongs to + Mutex wake_mutex; + Condition wake_cond; // So we can go to sleep between GCs + rtsBool wakeup; + rtsBool exit; #endif nat thread_index; // a zero based index identifying the thread @@ -148,7 +152,8 @@ extern nat N; extern rtsBool major_gc; extern gc_thread *gc_threads; -extern gc_thread *gct; // this thread's gct TODO: make thread-local +register gc_thread *gct __asm__("%rbx"); +// extern gc_thread *gct; // this thread's gct TODO: make thread-local extern StgClosure* static_objects; extern StgClosure* scavenged_static_objects; @@ -165,6 +170,10 @@ extern StgPtr oldgen_scan; extern long copied; extern long scavd_copied; +#ifdef THREADED_RTS +extern SpinLock static_objects_sync; +#endif + #ifdef DEBUG extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS; #endif diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 2a6ea389e6..8d7e5825ee 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -1494,7 +1494,7 @@ scavenge_mutable_list(generation *gen) static void scavenge_static(void) { - StgClosure* p = static_objects; + StgClosure* p; const StgInfoTable *info; /* Always evacuate straight to the oldest generation for static @@ -1503,13 +1503,26 @@ scavenge_static(void) /* keep going until we've scavenged all the objects on the linked list... */ - while (p != END_OF_STATIC_LIST) { + while (1) { + + ACQUIRE_SPIN_LOCK(&static_objects_sync); + + /* get the next static object from the list. Remember, there might + * be more stuff on this list after each evacuation... + * (static_objects is a global) + */ + p = static_objects; + if (p == END_OF_STATIC_LIST) { + RELEASE_SPIN_LOCK(&static_objects_sync); + break; + } + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl(p); /* - if (info->type==RBH) - info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure + if (info->type==RBH) + info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure */ // make sure the info pointer is into text space @@ -1520,6 +1533,8 @@ scavenge_static(void) *STATIC_LINK(info,p) = scavenged_static_objects; scavenged_static_objects = p; + RELEASE_SPIN_LOCK(&static_objects_sync); + switch (info -> type) { case IND_STATIC: @@ -1564,12 +1579,6 @@ scavenge_static(void) } ASSERT(gct->failed_to_evac == rtsFalse); - - /* get the next static object from the list. Remember, there might - * be more stuff on this list now that we've done some evacuating! - * (static_objects is a global) - */ - p = static_objects; } } @@ -1947,3 +1956,39 @@ loop: if (work_to_do) goto loop; } + +rtsBool +any_work (void) +{ + int g, s; + step_workspace *ws; + + write_barrier(); + + // scavenge static objects + if (major_gc && static_objects != END_OF_STATIC_LIST) { + return rtsTrue; + } + + // scavenge objects in compacted generation + if (mark_stack_overflowed || oldgen_scan_bd != NULL || + (mark_stack_bdescr != NULL && !mark_stack_empty())) { + return rtsTrue; + } + + // Check for global work in any step. We don't need to check for + // local work, because we have already exited scavenge_loop(), + // which means there is no local work for this thread. + for (g = RtsFlags.GcFlags.generations; --g >= 0; ) { + for (s = generations[g].n_steps; --s >= 0; ) { + if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { + continue; + } + ws = &gct->steps[g][s]; + if (ws->todo_large_objects) return rtsTrue; + if (ws->stp->todos) return rtsTrue; + } + } + + return rtsFalse; +} diff --git a/rts/sm/Scav.h b/rts/sm/Scav.h index ab66775641..f149b667ac 100644 --- a/rts/sm/Scav.h +++ b/rts/sm/Scav.h @@ -11,5 +11,6 @@ * * ---------------------------------------------------------------------------*/ -void scavenge_loop (void); -void scavenge_mutable_list (generation *g); +void scavenge_loop (void); +rtsBool any_work (void); +void scavenge_mutable_list (generation *g); |