diff options
author | Simon Marlow <marlowsd@gmail.com> | 2011-04-11 14:48:49 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2011-04-11 14:49:12 +0100 |
commit | 1fb38442d3a55ac92795aa6c5ed4df82011df724 (patch) | |
tree | 3e4aa00f970fb58b4f7b6bb27f16eb9d8b7dbad2 /rts/sm/GCThread.h | |
parent | 7bf5bf37e7f4f140c883016e9da50106535d2a94 (diff) | |
download | haskell-1fb38442d3a55ac92795aa6c5ed4df82011df724.tar.gz |
Refactoring and tidy up
This is a port of some of the changes from my private local-GC branch
(which is still in darcs, I haven't converted it to git yet). There
are a couple of small functional differences in the GC stats: first,
per-thread GC timings should now be more accurate, and secondly we now
report average and maximum pause times. e.g. from minimax +RTS -N8 -s:
Tot time (elapsed) Avg pause Max pause
Gen 0 2755 colls, 2754 par 13.16s 0.93s 0.0003s 0.0150s
Gen 1 769 colls, 769 par 3.71s 0.26s 0.0003s 0.0059s
Diffstat (limited to 'rts/sm/GCThread.h')
-rw-r--r-- | rts/sm/GCThread.h | 96 |
1 files changed, 11 insertions, 85 deletions
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index 62dd1fb73a..e42a3a1239 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -15,6 +15,7 @@ #define SM_GCTHREAD_H #include "WSDeque.h" +#include "GetTime.h" // for Ticks #include "BeginPrivate.h" @@ -115,6 +116,8 @@ typedef struct gen_workspace_ { ------------------------------------------------------------------------- */ typedef struct gc_thread_ { + Capability *cap; + #ifdef THREADED_RTS OSThreadId id; // The OS thread that this struct belongs to SpinLock gc_spin; @@ -162,7 +165,8 @@ typedef struct gc_thread_ { // instead of the to-space // corresponding to the object - lnat thunk_selector_depth; // ummm.... not used as of now + lnat thunk_selector_depth; // used to avoid unbounded recursion in + // evacuate() for THUNK_SELECTOR #ifdef USE_PAPI int papi_events; @@ -177,10 +181,15 @@ typedef struct gc_thread_ { lnat no_work; lnat scav_find_work; + Ticks gc_start_cpu; // process CPU time + Ticks gc_start_elapsed; // process elapsed time + Ticks gc_start_thread_cpu; // thread CPU time + lnat gc_start_faults; + // ------------------- // workspaces - // array of workspaces, indexed by stp->abs_no. This is placed + // array of workspaces, indexed by gen->abs_no. This is placed // directly at the end of the gc_thread structure so that we can get from // the gc_thread pointer to a workspace using only pointer // arithmetic, no memory access. This happens in the inner loop @@ -191,91 +200,8 @@ typedef struct gc_thread_ { extern nat n_gc_threads; -/* ----------------------------------------------------------------------------- - The gct variable is thread-local and points to the current thread's - gc_thread structure. It is heavily accessed, so we try to put gct - into a global register variable if possible; if we don't have a - register then use gcc's __thread extension to create a thread-local - variable. - - Even on x86 where registers are scarce, it is worthwhile using a - register variable here: I measured about a 2-5% slowdown with the - __thread version. - -------------------------------------------------------------------------- */ - extern gc_thread **gc_threads; -#if defined(THREADED_RTS) - -#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg); - -#define SET_GCT(to) gct = (to) - - - -#if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS)) -// Using __thread is better than stealing a register on x86/Linux, because -// we have too few registers available. In my tests it was worth -// about 5% in GC performance, but of course that might change as gcc -// improves. -- SDM 2009/04/03 -// -// We ought to do the same on MacOS X, but __thread is not -// supported there yet (gcc 4.0.1). - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - - -#elif defined(sparc_HOST_ARCH) -// On SPARC we can't pin gct to a register. Names like %l1 are just offsets -// into the register window, which change on each function call. -// -// There are eight global (non-window) registers, but they're used for other purposes. -// %g0 -- always zero -// %g1 -- volatile over function calls, used by the linker -// %g2-%g3 -- used as scratch regs by the C compiler (caller saves) -// %g4 -- volatile over function calls, used by the linker -// %g5-%g7 -- reserved by the OS - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - - -#elif defined(REG_Base) && !defined(i386_HOST_ARCH) -// on i386, REG_Base is %ebx which is also used for PIC, so we don't -// want to steal it - -GLOBAL_REG_DECL(gc_thread*, gct, REG_Base) -#define DECLARE_GCT /* nothing */ - - -#elif defined(REG_R1) - -GLOBAL_REG_DECL(gc_thread*, gct, REG_R1) -#define DECLARE_GCT /* nothing */ - - -#elif defined(__GNUC__) - -extern __thread gc_thread* gct; -#define DECLARE_GCT __thread gc_thread* gct; - -#else - -#error Cannot find a way to declare the thread-local gct - -#endif - -#else // not the threaded RTS - -extern StgWord8 the_gc_thread[]; - -#define gct ((gc_thread*)&the_gc_thread) -#define SET_GCT(to) /*nothing*/ -#define DECLARE_GCT /*nothing*/ - -#endif - #include "EndPrivate.h" #endif // SM_GCTHREAD_H |