summaryrefslogtreecommitdiff
path: root/rts/sm
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm')
-rw-r--r--rts/sm/Compact.c2
-rw-r--r--rts/sm/Evac.c1
-rw-r--r--rts/sm/GC.c100
-rw-r--r--rts/sm/GCAux.c2
-rw-r--r--rts/sm/GCTDecl.h98
-rw-r--r--rts/sm/GCThread.h96
-rw-r--r--rts/sm/GCUtils.c1
-rw-r--r--rts/sm/GCUtils.h2
-rw-r--r--rts/sm/MarkWeak.c1
9 files changed, 174 insertions, 129 deletions
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index ff7480cd57..1b57c53805 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -942,6 +942,8 @@ compact(StgClosure *static_objects)
// 1. thread the roots
markCapabilities((evac_fn)thread_root, NULL);
+ markScheduler((evac_fn)thread_root, NULL);
+
// the weak pointer lists...
if (weak_ptr_list != NULL) {
thread((void *)&weak_ptr_list);
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index d049f98bc8..fdb5477a38 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -18,6 +18,7 @@
#include "Storage.h"
#include "GC.h"
#include "GCThread.h"
+#include "GCTDecl.h"
#include "GCUtils.h"
#include "Compact.h"
#include "MarkStack.h"
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 4ba05bf5b4..d0dd44dd8a 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -40,6 +40,7 @@
#include "GC.h"
#include "GCThread.h"
+#include "GCTDecl.h"
#include "Compact.h"
#include "Evac.h"
#include "Scav.h"
@@ -146,8 +147,8 @@ static void start_gc_threads (void);
static void scavenge_until_all_done (void);
static StgWord inc_running (void);
static StgWord dec_running (void);
-static void wakeup_gc_threads (nat n_threads, nat me);
-static void shutdown_gc_threads (nat n_threads, nat me);
+static void wakeup_gc_threads (nat me);
+static void shutdown_gc_threads (nat me);
static void collect_gct_blocks (void);
#if 0 && defined(DEBUG)
@@ -177,7 +178,7 @@ GarbageCollect (rtsBool force_major_gc,
generation *gen;
lnat live_blocks, live_words, allocated, max_copied, avg_copied;
gc_thread *saved_gct;
- nat g, t, n;
+ nat g, n;
// necessary if we stole a callee-saves register for gct:
saved_gct = gct;
@@ -198,11 +199,11 @@ GarbageCollect (rtsBool force_major_gc,
ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
// otherwise adjust the padding in gen_workspace.
- // tell the stats department that we've started a GC
- stat_startGC();
+ // this is the main thread
+ SET_GCT(gc_threads[cap->no]);
- // tell the STM to discard any cached closures it's hoping to re-use
- stmPreGCHook();
+ // tell the stats department that we've started a GC
+ stat_startGC(gct);
// lock the StablePtr table
stablePtrPreGC();
@@ -277,11 +278,6 @@ GarbageCollect (rtsBool force_major_gc,
// check sanity *before* GC
IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
- // Initialise all our gc_thread structures
- for (t = 0; t < n_gc_threads; t++) {
- init_gc_thread(gc_threads[t]);
- }
-
// Initialise all the generations/steps that we're collecting.
for (g = 0; g <= N; g++) {
prepare_collected_gen(&generations[g]);
@@ -291,6 +287,9 @@ GarbageCollect (rtsBool force_major_gc,
prepare_uncollected_gen(&generations[g]);
}
+ // Prepare this gc_thread
+ init_gc_thread(gct);
+
/* Allocate a mark stack if we're doing a major collection.
*/
if (major_gc && oldest_gen->mark) {
@@ -305,17 +304,6 @@ GarbageCollect (rtsBool force_major_gc,
mark_sp = NULL;
}
- // this is the main thread
-#ifdef THREADED_RTS
- if (n_gc_threads == 1) {
- SET_GCT(gc_threads[0]);
- } else {
- SET_GCT(gc_threads[cap->no]);
- }
-#else
-SET_GCT(gc_threads[0]);
-#endif
-
/* -----------------------------------------------------------------------
* follow all the roots that we know about:
*/
@@ -325,7 +313,9 @@ SET_GCT(gc_threads[0]);
// NB. do this after the mutable lists have been saved above, otherwise
// the other GC threads will be writing into the old mutable lists.
inc_running();
- wakeup_gc_threads(n_gc_threads, gct->thread_index);
+ wakeup_gc_threads(gct->thread_index);
+
+ traceEventGcWork(gct->cap);
// scavenge the capability-private mutable lists. This isn't part
// of markSomeCapabilities() because markSomeCapabilities() can only
@@ -340,7 +330,7 @@ SET_GCT(gc_threads[0]);
#endif
}
} else {
- scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
+ scavenge_capability_mut_lists(gct->cap);
}
// follow roots from the CAF list (used by GHCi)
@@ -349,8 +339,16 @@ SET_GCT(gc_threads[0]);
// follow all the roots that the application knows about.
gct->evac_gen_no = 0;
- markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
- rtsTrue/*prune sparks*/);
+ if (n_gc_threads == 1) {
+ for (n = 0; n < n_capabilities; n++) {
+ markCapability(mark_root, gct, &capabilities[n],
+ rtsTrue/*don't mark sparks*/);
+ }
+ } else {
+ markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
+ }
+
+ markScheduler(mark_root, gct);
#if defined(RTS_USER_SIGNALS)
// mark the signal handlers (signals should be already blocked)
@@ -385,7 +383,7 @@ SET_GCT(gc_threads[0]);
break;
}
- shutdown_gc_threads(n_gc_threads, gct->thread_index);
+ shutdown_gc_threads(gct->thread_index);
// Now see which stable names are still alive.
gcStablePtrTable();
@@ -396,7 +394,7 @@ SET_GCT(gc_threads[0]);
pruneSparkQueue(&capabilities[n]);
}
} else {
- pruneSparkQueue(&capabilities[gct->thread_index]);
+ pruneSparkQueue(gct->cap);
}
#endif
@@ -713,7 +711,8 @@ SET_GCT(gc_threads[0]);
#endif
// ok, GC over: tell the stats department what happened.
- stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied,
+ stat_endGC(gct, allocated, live_words,
+ copied, N, max_copied, avg_copied,
live_blocks * BLOCK_SIZE_W - live_words /* slop */);
// Guess which generation we'll collect *next* time
@@ -787,6 +786,8 @@ new_gc_thread (nat n, gc_thread *t)
nat g;
gen_workspace *ws;
+ t->cap = &capabilities[n];
+
#ifdef THREADED_RTS
t->id = 0;
initSpinLock(&t->gc_spin);
@@ -970,8 +971,6 @@ scavenge_until_all_done (void)
loop:
- traceEventGcWork(&capabilities[gct->thread_index]);
-
#if defined(THREADED_RTS)
if (n_gc_threads > 1) {
scavenge_loop();
@@ -987,7 +986,7 @@ loop:
// scavenge_loop() only exits when there's no work to do
r = dec_running();
- traceEventGcIdle(&capabilities[gct->thread_index]);
+ traceEventGcIdle(gct->cap);
debugTrace(DEBUG_gc, "%d GC threads still running", r);
@@ -995,6 +994,7 @@ loop:
// usleep(1);
if (any_work()) {
inc_running();
+ traceEventGcWork(gct->cap);
goto loop;
}
// any_work() does not remove the work from the queue, it
@@ -1003,7 +1003,7 @@ loop:
// scavenge_loop() to perform any pending work.
}
- traceEventGcDone(&capabilities[gct->thread_index]);
+ traceEventGcDone(gct->cap);
}
#if defined(THREADED_RTS)
@@ -1019,6 +1019,8 @@ gcWorkerThread (Capability *cap)
gct = gc_threads[cap->no];
gct->id = osThreadId();
+ stat_gcWorkerThreadStart(gct);
+
// Wait until we're told to wake up
RELEASE_SPIN_LOCK(&gct->mut_spin);
gct->wakeup = GC_THREAD_STANDING_BY;
@@ -1032,12 +1034,15 @@ gcWorkerThread (Capability *cap)
}
papi_thread_start_gc1_count(gct->papi_events);
#endif
-
+
+ init_gc_thread(gct);
+
+ traceEventGcWork(gct->cap);
+
// Every thread evacuates some roots.
gct->evac_gen_no = 0;
- markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
- rtsTrue/*prune sparks*/);
- scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
+ markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
+ scavenge_capability_mut_lists(cap);
scavenge_until_all_done();
@@ -1064,6 +1069,9 @@ gcWorkerThread (Capability *cap)
ACQUIRE_SPIN_LOCK(&gct->mut_spin);
debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
+ // record the time spent doing GC in the Task structure
+ stat_gcWorkerThreadDone(gct);
+
SET_GCT(saved_gct);
}
@@ -1113,11 +1121,14 @@ start_gc_threads (void)
}
static void
-wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
+wakeup_gc_threads (nat me USED_IF_THREADS)
{
#if defined(THREADED_RTS)
nat i;
- for (i=0; i < n_threads; i++) {
+
+ if (n_gc_threads == 1) return;
+
+ for (i=0; i < n_gc_threads; i++) {
if (i == me) continue;
inc_running();
debugTrace(DEBUG_gc, "waking up gc thread %d", i);
@@ -1134,11 +1145,14 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
// standby state, otherwise they may still be executing inside
// any_work(), and may even remain awake until the next GC starts.
static void
-shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
+shutdown_gc_threads (nat me USED_IF_THREADS)
{
#if defined(THREADED_RTS)
nat i;
- for (i=0; i < n_threads; i++) {
+
+ if (n_gc_threads == 1) return;
+
+ for (i=0; i < n_gc_threads; i++) {
if (i == me) continue;
while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
}
@@ -1373,7 +1387,7 @@ init_gc_thread (gc_thread *t)
t->static_objects = END_OF_STATIC_LIST;
t->scavenged_static_objects = END_OF_STATIC_LIST;
t->scan_bd = NULL;
- t->mut_lists = capabilities[t->thread_index].mut_lists;
+ t->mut_lists = t->cap->mut_lists;
t->evac_gen_no = 0;
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index 97af17a02c..7f3968faae 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -17,7 +17,7 @@
#include "Capability.h"
#include "Trace.h"
#include "Schedule.h"
-// DO NOT include "GCThread.h", we don't want the register variable
+// DO NOT include "GCTDecl.h", we don't want the register variable
/* -----------------------------------------------------------------------------
isAlive determines whether the given closure is still alive (after
diff --git a/rts/sm/GCTDecl.h b/rts/sm/GCTDecl.h
new file mode 100644
index 0000000000..11795ca7fd
--- /dev/null
+++ b/rts/sm/GCTDecl.h
@@ -0,0 +1,98 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2009
+ *
+ * Documentation on the architecture of the Garbage Collector can be
+ * found in the online commentary:
+ *
+ * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef SM_GCTDECL_H
+#define SM_GCTDECL_H
+
+#include "BeginPrivate.h"
+
+/* -----------------------------------------------------------------------------
+ The gct variable is thread-local and points to the current thread's
+ gc_thread structure. It is heavily accessed, so we try to put gct
+ into a global register variable if possible; if we don't have a
+ register then use gcc's __thread extension to create a thread-local
+ variable.
+ -------------------------------------------------------------------------- */
+
+#if defined(THREADED_RTS)
+
+#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);
+
+#define SET_GCT(to) gct = (to)
+
+
+
+#if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS))
+// Using __thread is better than stealing a register on x86/Linux, because
+// we have too few registers available. In my tests it was worth
+// about 5% in GC performance, but of course that might change as gcc
+// improves. -- SDM 2009/04/03
+//
+// We ought to do the same on MacOS X, but __thread is not
+// supported there yet (gcc 4.0.1).
+
+extern __thread gc_thread* gct;
+#define DECLARE_GCT __thread gc_thread* gct;
+
+
+#elif defined(sparc_HOST_ARCH)
+// On SPARC we can't pin gct to a register. Names like %l1 are just offsets
+// into the register window, which change on each function call.
+//
+// There are eight global (non-window) registers, but they're used for other purposes.
+// %g0 -- always zero
+// %g1 -- volatile over function calls, used by the linker
+// %g2-%g3 -- used as scratch regs by the C compiler (caller saves)
+// %g4 -- volatile over function calls, used by the linker
+// %g5-%g7 -- reserved by the OS
+
+extern __thread gc_thread* gct;
+#define DECLARE_GCT __thread gc_thread* gct;
+
+
+#elif defined(REG_Base) && !defined(i386_HOST_ARCH)
+// on i386, REG_Base is %ebx which is also used for PIC, so we don't
+// want to steal it
+
+GLOBAL_REG_DECL(gc_thread*, gct, REG_Base)
+#define DECLARE_GCT /* nothing */
+
+
+#elif defined(REG_R1)
+
+GLOBAL_REG_DECL(gc_thread*, gct, REG_R1)
+#define DECLARE_GCT /* nothing */
+
+
+#elif defined(__GNUC__)
+
+extern __thread gc_thread* gct;
+#define DECLARE_GCT __thread gc_thread* gct;
+
+#else
+
+#error Cannot find a way to declare the thread-local gct
+
+#endif
+
+#else // not the threaded RTS
+
+extern StgWord8 the_gc_thread[];
+
+#define gct ((gc_thread*)&the_gc_thread)
+#define SET_GCT(to) /*nothing*/
+#define DECLARE_GCT /*nothing*/
+
+#endif // THREADED_RTS
+
+#include "EndPrivate.h"
+
+#endif // SM_GCTDECL_H
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index 62dd1fb73a..e42a3a1239 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -15,6 +15,7 @@
#define SM_GCTHREAD_H
#include "WSDeque.h"
+#include "GetTime.h" // for Ticks
#include "BeginPrivate.h"
@@ -115,6 +116,8 @@ typedef struct gen_workspace_ {
------------------------------------------------------------------------- */
typedef struct gc_thread_ {
+ Capability *cap;
+
#ifdef THREADED_RTS
OSThreadId id; // The OS thread that this struct belongs to
SpinLock gc_spin;
@@ -162,7 +165,8 @@ typedef struct gc_thread_ {
// instead of the to-space
// corresponding to the object
- lnat thunk_selector_depth; // ummm.... not used as of now
+ lnat thunk_selector_depth; // used to avoid unbounded recursion in
+ // evacuate() for THUNK_SELECTOR
#ifdef USE_PAPI
int papi_events;
@@ -177,10 +181,15 @@ typedef struct gc_thread_ {
lnat no_work;
lnat scav_find_work;
+ Ticks gc_start_cpu; // process CPU time
+ Ticks gc_start_elapsed; // process elapsed time
+ Ticks gc_start_thread_cpu; // thread CPU time
+ lnat gc_start_faults;
+
// -------------------
// workspaces
- // array of workspaces, indexed by stp->abs_no. This is placed
+ // array of workspaces, indexed by gen->abs_no. This is placed
// directly at the end of the gc_thread structure so that we can get from
// the gc_thread pointer to a workspace using only pointer
// arithmetic, no memory access. This happens in the inner loop
@@ -191,91 +200,8 @@ typedef struct gc_thread_ {
extern nat n_gc_threads;
-/* -----------------------------------------------------------------------------
- The gct variable is thread-local and points to the current thread's
- gc_thread structure. It is heavily accessed, so we try to put gct
- into a global register variable if possible; if we don't have a
- register then use gcc's __thread extension to create a thread-local
- variable.
-
- Even on x86 where registers are scarce, it is worthwhile using a
- register variable here: I measured about a 2-5% slowdown with the
- __thread version.
- -------------------------------------------------------------------------- */
-
extern gc_thread **gc_threads;
-#if defined(THREADED_RTS)
-
-#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);
-
-#define SET_GCT(to) gct = (to)
-
-
-
-#if (defined(i386_HOST_ARCH) && defined(linux_HOST_OS))
-// Using __thread is better than stealing a register on x86/Linux, because
-// we have too few registers available. In my tests it was worth
-// about 5% in GC performance, but of course that might change as gcc
-// improves. -- SDM 2009/04/03
-//
-// We ought to do the same on MacOS X, but __thread is not
-// supported there yet (gcc 4.0.1).
-
-extern __thread gc_thread* gct;
-#define DECLARE_GCT __thread gc_thread* gct;
-
-
-#elif defined(sparc_HOST_ARCH)
-// On SPARC we can't pin gct to a register. Names like %l1 are just offsets
-// into the register window, which change on each function call.
-//
-// There are eight global (non-window) registers, but they're used for other purposes.
-// %g0 -- always zero
-// %g1 -- volatile over function calls, used by the linker
-// %g2-%g3 -- used as scratch regs by the C compiler (caller saves)
-// %g4 -- volatile over function calls, used by the linker
-// %g5-%g7 -- reserved by the OS
-
-extern __thread gc_thread* gct;
-#define DECLARE_GCT __thread gc_thread* gct;
-
-
-#elif defined(REG_Base) && !defined(i386_HOST_ARCH)
-// on i386, REG_Base is %ebx which is also used for PIC, so we don't
-// want to steal it
-
-GLOBAL_REG_DECL(gc_thread*, gct, REG_Base)
-#define DECLARE_GCT /* nothing */
-
-
-#elif defined(REG_R1)
-
-GLOBAL_REG_DECL(gc_thread*, gct, REG_R1)
-#define DECLARE_GCT /* nothing */
-
-
-#elif defined(__GNUC__)
-
-extern __thread gc_thread* gct;
-#define DECLARE_GCT __thread gc_thread* gct;
-
-#else
-
-#error Cannot find a way to declare the thread-local gct
-
-#endif
-
-#else // not the threaded RTS
-
-extern StgWord8 the_gc_thread[];
-
-#define gct ((gc_thread*)&the_gc_thread)
-#define SET_GCT(to) /*nothing*/
-#define DECLARE_GCT /*nothing*/
-
-#endif
-
#include "EndPrivate.h"
#endif // SM_GCTHREAD_H
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 8b63674c77..ef8d0bd56d 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -18,6 +18,7 @@
#include "Storage.h"
#include "GC.h"
#include "GCThread.h"
+#include "GCTDecl.h"
#include "GCUtils.h"
#include "Printer.h"
#include "Trace.h"
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index 3fe78a3310..d47375d946 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -16,6 +16,8 @@
#include "BeginPrivate.h"
+#include "GCTDecl.h"
+
bdescr *allocBlock_sync(void);
void freeChain_sync(bdescr *bd);
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index f4b576ac73..f9275ecb62 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -17,6 +17,7 @@
#include "MarkWeak.h"
#include "GC.h"
#include "GCThread.h"
+#include "GCTDecl.h"
#include "Evac.h"
#include "Trace.h"
#include "Schedule.h"