diff options
Diffstat (limited to 'rts/sm')
-rw-r--r-- | rts/sm/Compact.c | 2 | ||||
-rw-r--r-- | rts/sm/Evac.c | 1 | ||||
-rw-r--r-- | rts/sm/GC.c | 100 | ||||
-rw-r--r-- | rts/sm/GCAux.c | 2 | ||||
-rw-r--r-- | rts/sm/GCTDecl.h | 98 | ||||
-rw-r--r-- | rts/sm/GCThread.h | 96 | ||||
-rw-r--r-- | rts/sm/GCUtils.c | 1 | ||||
-rw-r--r-- | rts/sm/GCUtils.h | 2 | ||||
-rw-r--r-- | rts/sm/MarkWeak.c | 1 |
9 files changed, 174 insertions, 129 deletions
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index ff7480cd57..1b57c53805 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -942,6 +942,8 @@ compact(StgClosure *static_objects) // 1. thread the roots markCapabilities((evac_fn)thread_root, NULL); + markScheduler((evac_fn)thread_root, NULL); + // the weak pointer lists... if (weak_ptr_list != NULL) { thread((void *)&weak_ptr_list); diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index d049f98bc8..fdb5477a38 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -18,6 +18,7 @@ #include "Storage.h" #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" #include "GCUtils.h" #include "Compact.h" #include "MarkStack.h" diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 4ba05bf5b4..d0dd44dd8a 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -40,6 +40,7 @@ #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" #include "Compact.h" #include "Evac.h" #include "Scav.h" @@ -146,8 +147,8 @@ static void start_gc_threads (void); static void scavenge_until_all_done (void); static StgWord inc_running (void); static StgWord dec_running (void); -static void wakeup_gc_threads (nat n_threads, nat me); -static void shutdown_gc_threads (nat n_threads, nat me); +static void wakeup_gc_threads (nat me); +static void shutdown_gc_threads (nat me); static void collect_gct_blocks (void); #if 0 && defined(DEBUG) @@ -177,7 +178,7 @@ GarbageCollect (rtsBool force_major_gc, generation *gen; lnat live_blocks, live_words, allocated, max_copied, avg_copied; gc_thread *saved_gct; - nat g, t, n; + nat g, n; // necessary if we stole a callee-saves register for gct: saved_gct = gct; @@ -198,11 +199,11 @@ GarbageCollect (rtsBool force_major_gc, ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord)); // otherwise adjust the padding in gen_workspace. - // tell the stats department that we've started a GC - stat_startGC(); + // this is the main thread + SET_GCT(gc_threads[cap->no]); - // tell the STM to discard any cached closures it's hoping to re-use - stmPreGCHook(); + // tell the stats department that we've started a GC + stat_startGC(gct); // lock the StablePtr table stablePtrPreGC(); @@ -277,11 +278,6 @@ GarbageCollect (rtsBool force_major_gc, // check sanity *before* GC IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc)); - // Initialise all our gc_thread structures - for (t = 0; t < n_gc_threads; t++) { - init_gc_thread(gc_threads[t]); - } - // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { prepare_collected_gen(&generations[g]); @@ -291,6 +287,9 @@ GarbageCollect (rtsBool force_major_gc, prepare_uncollected_gen(&generations[g]); } + // Prepare this gc_thread + init_gc_thread(gct); + /* Allocate a mark stack if we're doing a major collection. */ if (major_gc && oldest_gen->mark) { @@ -305,17 +304,6 @@ GarbageCollect (rtsBool force_major_gc, mark_sp = NULL; } - // this is the main thread -#ifdef THREADED_RTS - if (n_gc_threads == 1) { - SET_GCT(gc_threads[0]); - } else { - SET_GCT(gc_threads[cap->no]); - } -#else -SET_GCT(gc_threads[0]); -#endif - /* ----------------------------------------------------------------------- * follow all the roots that we know about: */ @@ -325,7 +313,9 @@ SET_GCT(gc_threads[0]); // NB. do this after the mutable lists have been saved above, otherwise // the other GC threads will be writing into the old mutable lists. inc_running(); - wakeup_gc_threads(n_gc_threads, gct->thread_index); + wakeup_gc_threads(gct->thread_index); + + traceEventGcWork(gct->cap); // scavenge the capability-private mutable lists. This isn't part // of markSomeCapabilities() because markSomeCapabilities() can only @@ -340,7 +330,7 @@ SET_GCT(gc_threads[0]); #endif } } else { - scavenge_capability_mut_lists(&capabilities[gct->thread_index]); + scavenge_capability_mut_lists(gct->cap); } // follow roots from the CAF list (used by GHCi) @@ -349,8 +339,16 @@ SET_GCT(gc_threads[0]); // follow all the roots that the application knows about. gct->evac_gen_no = 0; - markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, - rtsTrue/*prune sparks*/); + if (n_gc_threads == 1) { + for (n = 0; n < n_capabilities; n++) { + markCapability(mark_root, gct, &capabilities[n], + rtsTrue/*don't mark sparks*/); + } + } else { + markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/); + } + + markScheduler(mark_root, gct); #if defined(RTS_USER_SIGNALS) // mark the signal handlers (signals should be already blocked) @@ -385,7 +383,7 @@ SET_GCT(gc_threads[0]); break; } - shutdown_gc_threads(n_gc_threads, gct->thread_index); + shutdown_gc_threads(gct->thread_index); // Now see which stable names are still alive. gcStablePtrTable(); @@ -396,7 +394,7 @@ SET_GCT(gc_threads[0]); pruneSparkQueue(&capabilities[n]); } } else { - pruneSparkQueue(&capabilities[gct->thread_index]); + pruneSparkQueue(gct->cap); } #endif @@ -713,7 +711,8 @@ SET_GCT(gc_threads[0]); #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied, + stat_endGC(gct, allocated, live_words, + copied, N, max_copied, avg_copied, live_blocks * BLOCK_SIZE_W - live_words /* slop */); // Guess which generation we'll collect *next* time @@ -787,6 +786,8 @@ new_gc_thread (nat n, gc_thread *t) nat g; gen_workspace *ws; + t->cap = &capabilities[n]; + #ifdef THREADED_RTS t->id = 0; initSpinLock(&t->gc_spin); @@ -970,8 +971,6 @@ scavenge_until_all_done (void) loop: - traceEventGcWork(&capabilities[gct->thread_index]); - #if defined(THREADED_RTS) if (n_gc_threads > 1) { scavenge_loop(); @@ -987,7 +986,7 @@ loop: // scavenge_loop() only exits when there's no work to do r = dec_running(); - traceEventGcIdle(&capabilities[gct->thread_index]); + traceEventGcIdle(gct->cap); debugTrace(DEBUG_gc, "%d GC threads still running", r); @@ -995,6 +994,7 @@ loop: // usleep(1); if (any_work()) { inc_running(); + traceEventGcWork(gct->cap); goto loop; } // any_work() does not remove the work from the queue, it @@ -1003,7 +1003,7 @@ loop: // scavenge_loop() to perform any pending work. } - traceEventGcDone(&capabilities[gct->thread_index]); + traceEventGcDone(gct->cap); } #if defined(THREADED_RTS) @@ -1019,6 +1019,8 @@ gcWorkerThread (Capability *cap) gct = gc_threads[cap->no]; gct->id = osThreadId(); + stat_gcWorkerThreadStart(gct); + // Wait until we're told to wake up RELEASE_SPIN_LOCK(&gct->mut_spin); gct->wakeup = GC_THREAD_STANDING_BY; @@ -1032,12 +1034,15 @@ gcWorkerThread (Capability *cap) } papi_thread_start_gc1_count(gct->papi_events); #endif - + + init_gc_thread(gct); + + traceEventGcWork(gct->cap); + // Every thread evacuates some roots. gct->evac_gen_no = 0; - markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, - rtsTrue/*prune sparks*/); - scavenge_capability_mut_lists(&capabilities[gct->thread_index]); + markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/); + scavenge_capability_mut_lists(cap); scavenge_until_all_done(); @@ -1064,6 +1069,9 @@ gcWorkerThread (Capability *cap) ACQUIRE_SPIN_LOCK(&gct->mut_spin); debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index); + // record the time spent doing GC in the Task structure + stat_gcWorkerThreadDone(gct); + SET_GCT(saved_gct); } @@ -1113,11 +1121,14 @@ start_gc_threads (void) } static void -wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) +wakeup_gc_threads (nat me USED_IF_THREADS) { #if defined(THREADED_RTS) nat i; - for (i=0; i < n_threads; i++) { + + if (n_gc_threads == 1) return; + + for (i=0; i < n_gc_threads; i++) { if (i == me) continue; inc_running(); debugTrace(DEBUG_gc, "waking up gc thread %d", i); @@ -1134,11 +1145,14 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) // standby state, otherwise they may still be executing inside // any_work(), and may even remain awake until the next GC starts. static void -shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) +shutdown_gc_threads (nat me USED_IF_THREADS) { #if defined(THREADED_RTS) nat i; - for (i=0; i < n_threads; i++) { + + if (n_gc_threads == 1) return; + + for (i=0; i < n_gc_threads; i++) { if (i == me) continue; while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); } } @@ -1373,7 +1387,7 @@ init_gc_thread (gc_thread *t) t->static_objects = END_OF_STATIC_LIST; t->scavenged_static_objects = END_OF_STATIC_LIST; t->scan_bd = NULL; - t->mut_lists = capabilities[t->thread_index].mut_lists; + t->mut_lists = t->cap->mut_lists; t->evac_gen_no = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 97af17a02c..7f3968faae 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -17,7 +17,7 @@ #include "Capability.h" #include "Trace.h" #include "Schedule.h" -// DO NOT include "GCThread.h", we don't want the register variable +// DO NOT include "GCTDecl.h", we don't want the register variable /* ----------------------------------------------------------------------------- isAlive determines whether the given closure is still alive (after diff --git a/rts/sm/GCTDecl.h b/rts/sm/GCTDecl.h new file mode 100644 index 0000000000..11795ca7fd --- /dev/null +++ b/rts/sm/GCTDecl.h @@ -0,0 +1,98 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2009 + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +#ifndef SM_GCTDECL_H +#define SM_GCTDECL_H + +#include "BeginPrivate.h" + +/* ----------------------------------------------------------------------------- + The gct variable is thread-local and points to the current thread's + gc_thread structure. It is heavily accessed, so we try to put gct + into a global register variable if possible; if we don't have a + register then use gcc's __thread extension to create a thread-local + variable. + -------------------------------------------------------------------------- */ + +#if defined(THREADED_RTS) + +#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg); + +#define SET_GCT(to) gct = (to) + + + +#if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS)) +// Using __thread is better than stealing a register on x86/Linux, because +// we have too few registers available. In my tests it was worth +// about 5% in GC performance, but of course that might change as gcc +// improves. -- SDM 2009/04/03 +// +// We ought to do the same on MacOS X, but __thread is not +// supported there yet (gcc 4.0.1). + +extern __thread gc_thread* gct; +#define DECLARE_GCT __thread gc_thread* gct; + + +#elif defined(sparc_HOST_ARCH) +// On SPARC we can't pin gct to a register. Names like %l1 are just offsets +// into the register window, which change on each function call. +// +// There are eight global (non-window) registers, but they're used for other purposes. +// %g0 -- always zero +// %g1 -- volatile over function calls, used by the linker +// %g2-%g3 -- used as scratch regs by the C compiler (caller saves) +// %g4 -- volatile over function calls, used by the linker +// %g5-%g7 -- reserved by the OS + +extern __thread gc_thread* gct; +#define DECLARE_GCT __thread gc_thread* gct; + + +#elif defined(REG_Base) && !defined(i386_HOST_ARCH) +// on i386, REG_Base is %ebx which is also used for PIC, so we don't +// want to steal it + +GLOBAL_REG_DECL(gc_thread*, gct, REG_Base) +#define DECLARE_GCT /* nothing */ + + +#elif defined(REG_R1) + +GLOBAL_REG_DECL(gc_thread*, gct, REG_R1) +#define DECLARE_GCT /* nothing */ + + +#elif defined(__GNUC__) + +extern __thread gc_thread* gct; +#define DECLARE_GCT __thread gc_thread* gct; + +#else + +#error Cannot find a way to declare the thread-local gct + +#endif + +#else // not the threaded RTS + +extern StgWord8 the_gc_thread[]; + +#define gct ((gc_thread*)&the_gc_thread) +#define SET_GCT(to) /*nothing*/ +#define DECLARE_GCT /*nothing*/ + +#endif // THREADED_RTS + +#include "EndPrivate.h" + +#endif // SM_GCTDECL_H diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index 62dd1fb73a..e42a3a1239 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -15,6 +15,7 @@ #define SM_GCTHREAD_H #include "WSDeque.h" +#include "GetTime.h" // for Ticks #include "BeginPrivate.h" @@ -115,6 +116,8 @@ typedef struct gen_workspace_ { ------------------------------------------------------------------------- */ typedef struct gc_thread_ { + Capability *cap; + #ifdef THREADED_RTS OSThreadId id; // The OS thread that this struct belongs to SpinLock gc_spin; @@ -162,7 +165,8 @@ typedef struct gc_thread_ { // instead of the to-space // corresponding to the object - lnat thunk_selector_depth; // ummm.... not used as of now + lnat thunk_selector_depth; // used to avoid unbounded recursion in + // evacuate() for THUNK_SELECTOR #ifdef USE_PAPI int papi_events; @@ -177,10 +181,15 @@ typedef struct gc_thread_ { lnat no_work; lnat scav_find_work; + Ticks gc_start_cpu; // process CPU time + Ticks gc_start_elapsed; // process elapsed time + Ticks gc_start_thread_cpu; // thread CPU time + lnat gc_start_faults; + // ------------------- // workspaces - // array of workspaces, indexed by stp->abs_no. This is placed + // array of workspaces, indexed by gen->abs_no. This is placed // directly at the end of the gc_thread structure so that we can get from // the gc_thread pointer to a workspace using only pointer // arithmetic, no memory access. This happens in the inner loop @@ -191,91 +200,8 @@ typedef struct gc_thread_ { extern nat n_gc_threads; -/* ----------------------------------------------------------------------------- - The gct variable is thread-local and points to the current thread's - gc_thread structure. It is heavily accessed, so we try to put gct - into a global register variable if possible; if we don't have a - register then use gcc's __thread extension to create a thread-local - variable. - - Even on x86 where registers are scarce, it is worthwhile using a - register variable here: I measured about a 2-5% slowdown with the - __thread version. - -------------------------------------------------------------------------- */ - extern gc_thread **gc_threads; -#if defined(THREADED_RTS) - -#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg); - -#define SET_GCT(to) gct = (to) - - - -#if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS)) -// Using __thread is better than stealing a register on x86/Linux, because -// we have too few registers available. In my tests it was worth -// about 5% in GC performance, but of course that might change as gcc -// improves. -- SDM 2009/04/03 -// -// We ought to do the same on MacOS X, but __thread is not -// supported there yet (gcc 4.0.1). - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - - -#elif defined(sparc_HOST_ARCH) -// On SPARC we can't pin gct to a register. Names like %l1 are just offsets -// into the register window, which change on each function call. -// -// There are eight global (non-window) registers, but they're used for other purposes. -// %g0 -- always zero -// %g1 -- volatile over function calls, used by the linker -// %g2-%g3 -- used as scratch regs by the C compiler (caller saves) -// %g4 -- volatile over function calls, used by the linker -// %g5-%g7 -- reserved by the OS - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - - -#elif defined(REG_Base) && !defined(i386_HOST_ARCH) -// on i386, REG_Base is %ebx which is also used for PIC, so we don't -// want to steal it - -GLOBAL_REG_DECL(gc_thread*, gct, REG_Base) -#define DECLARE_GCT /* nothing */ - - -#elif defined(REG_R1) - -GLOBAL_REG_DECL(gc_thread*, gct, REG_R1) -#define DECLARE_GCT /* nothing */ - - -#elif defined(__GNUC__) - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - -#else - -#error Cannot find a way to declare the thread-local gct - -#endif - -#else // not the threaded RTS - -extern StgWord8 the_gc_thread[]; - -#define gct ((gc_thread*)&the_gc_thread) -#define SET_GCT(to) /*nothing*/ -#define DECLARE_GCT /*nothing*/ - -#endif - #include "EndPrivate.h" #endif // SM_GCTHREAD_H diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 8b63674c77..ef8d0bd56d 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -18,6 +18,7 @@ #include "Storage.h" #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" #include "GCUtils.h" #include "Printer.h" #include "Trace.h" diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h index 3fe78a3310..d47375d946 100644 --- a/rts/sm/GCUtils.h +++ b/rts/sm/GCUtils.h @@ -16,6 +16,8 @@ #include "BeginPrivate.h" +#include "GCTDecl.h" + bdescr *allocBlock_sync(void); void freeChain_sync(bdescr *bd); diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index f4b576ac73..f9275ecb62 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -17,6 +17,7 @@ #include "MarkWeak.h" #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" #include "Evac.h" #include "Trace.h" #include "Schedule.h" |