summaryrefslogtreecommitdiff
path: root/rts/sm/GC.c
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm/GC.c')
-rw-r--r--rts/sm/GC.c137
1 files changed, 81 insertions, 56 deletions
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 0fa927f2ad..8a8acb1b53 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -112,14 +112,8 @@ static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
/* Mut-list stats */
#if defined(DEBUG)
-uint32_t mutlist_MUTVARS,
- mutlist_MUTARRS,
- mutlist_MVARS,
- mutlist_TVAR,
- mutlist_TVAR_WATCH_QUEUE,
- mutlist_TREC_CHUNK,
- mutlist_TREC_HEADER,
- mutlist_OTHERS;
+// For lack of a better option we protect mutlist_scav_stats with oldest_gen->sync
+MutListScavStats mutlist_scav_stats;
#endif
/* Thread-local data for each GC thread
@@ -184,6 +178,36 @@ bdescr *mark_stack_top_bd; // topmost block in the mark stack
bdescr *mark_stack_bd; // current block in the mark stack
StgPtr mark_sp; // pointer to the next unallocated mark stack entry
+
+/* -----------------------------------------------------------------------------
+ Statistics from mut_list scavenging
+ -------------------------------------------------------------------------- */
+
+#if defined(DEBUG)
+void
+zeroMutListScavStats(MutListScavStats *src)
+{
+ memset(src, 0, sizeof(MutListScavStats));
+}
+
+void
+addMutListScavStats(const MutListScavStats *src,
+ MutListScavStats *dest)
+{
+#define ADD_STATS(field) dest->field += src->field;
+ ADD_STATS(n_MUTVAR);
+ ADD_STATS(n_MUTARR);
+ ADD_STATS(n_MVAR);
+ ADD_STATS(n_TVAR);
+ ADD_STATS(n_TREC_CHUNK);
+ ADD_STATS(n_TVAR_WATCH_QUEUE);
+ ADD_STATS(n_TREC_HEADER);
+ ADD_STATS(n_OTHERS);
+#undef ADD_STATS
+}
+#endif /* DEBUG */
+
+
/* -----------------------------------------------------------------------------
GarbageCollect: the main entry point to the garbage collector.
@@ -250,14 +274,7 @@ GarbageCollect (uint32_t collect_gen,
stablePtrLock();
#if defined(DEBUG)
- mutlist_MUTVARS = 0;
- mutlist_MUTARRS = 0;
- mutlist_MVARS = 0;
- mutlist_TVAR = 0;
- mutlist_TVAR_WATCH_QUEUE = 0;
- mutlist_TREC_CHUNK = 0;
- mutlist_TREC_HEADER = 0;
- mutlist_OTHERS = 0;
+ zeroMutListScavStats(&mutlist_scav_stats);
#endif
// attribute any costs to CCS_GC
@@ -520,37 +537,37 @@ GarbageCollect (uint32_t collect_gen,
const gc_thread* thread;
for (i=0; i < n_gc_threads; i++) {
- copied += gc_threads[i]->copied;
+ copied += RELAXED_LOAD(&gc_threads[i]->copied);
}
for (i=0; i < n_gc_threads; i++) {
thread = gc_threads[i];
if (n_gc_threads > 1) {
debugTrace(DEBUG_gc,"thread %d:", i);
debugTrace(DEBUG_gc," copied %ld",
- thread->copied * sizeof(W_));
+ RELAXED_LOAD(&thread->copied) * sizeof(W_));
debugTrace(DEBUG_gc," scanned %ld",
- thread->scanned * sizeof(W_));
+ RELAXED_LOAD(&thread->scanned) * sizeof(W_));
debugTrace(DEBUG_gc," any_work %ld",
- thread->any_work);
+ RELAXED_LOAD(&thread->any_work));
debugTrace(DEBUG_gc," no_work %ld",
- thread->no_work);
+ RELAXED_LOAD(&thread->no_work));
debugTrace(DEBUG_gc," scav_find_work %ld",
- thread->scav_find_work);
+ RELAXED_LOAD(&thread->scav_find_work));
#if defined(THREADED_RTS) && defined(PROF_SPIN)
- gc_spin_spin += thread->gc_spin.spin;
- gc_spin_yield += thread->gc_spin.yield;
- mut_spin_spin += thread->mut_spin.spin;
- mut_spin_yield += thread->mut_spin.yield;
+ gc_spin_spin += RELAXED_LOAD(&thread->gc_spin.spin);
+ gc_spin_yield += RELAXED_LOAD(&thread->gc_spin.yield);
+ mut_spin_spin += RELAXED_LOAD(&thread->mut_spin.spin);
+ mut_spin_yield += RELAXED_LOAD(&thread->mut_spin.yield);
#endif
- any_work += thread->any_work;
- no_work += thread->no_work;
- scav_find_work += thread->scav_find_work;
+ any_work += RELAXED_LOAD(&thread->any_work);
+ no_work += RELAXED_LOAD(&thread->no_work);
+ scav_find_work += RELAXED_LOAD(&thread->scav_find_work);
- par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
+ par_max_copied = stg_max(RELAXED_LOAD(&thread->copied), par_max_copied);
par_balanced_copied_acc +=
- stg_min(n_gc_threads * gc_threads[i]->copied, copied);
+ stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied);
}
}
if (n_gc_threads > 1) {
@@ -590,10 +607,14 @@ GarbageCollect (uint32_t collect_gen,
debugTrace(DEBUG_gc,
"mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d others)",
(unsigned long)(mut_list_size * sizeof(W_)),
- mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
- mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
- mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
- mutlist_OTHERS);
+ mutlist_scav_stats.n_MUTVAR,
+ mutlist_scav_stats.n_MUTARR,
+ mutlist_scav_stats.n_MVAR,
+ mutlist_scav_stats.n_TVAR,
+ mutlist_scav_stats.n_TVAR_WATCH_QUEUE,
+ mutlist_scav_stats.n_TREC_CHUNK,
+ mutlist_scav_stats.n_TREC_HEADER,
+ mutlist_scav_stats.n_OTHERS);
}
bdescr *next, *prev;
@@ -1109,7 +1130,7 @@ inc_running (void)
static StgWord
dec_running (void)
{
- ASSERT(gc_running_threads != 0);
+ ASSERT(RELAXED_LOAD(&gc_running_threads) != 0);
return atomic_dec(&gc_running_threads);
}
@@ -1119,7 +1140,7 @@ any_work (void)
int g;
gen_workspace *ws;
- gct->any_work++;
+ NONATOMIC_ADD(&gct->any_work, 1);
write_barrier();
@@ -1152,7 +1173,7 @@ any_work (void)
}
#endif
- gct->no_work++;
+ __atomic_fetch_add(&gct->no_work, 1, __ATOMIC_RELAXED);
#if defined(THREADED_RTS)
yieldThread();
#endif
@@ -1193,7 +1214,7 @@ loop:
debugTrace(DEBUG_gc, "%d GC threads still running", r);
- while (gc_running_threads != 0) {
+ while (SEQ_CST_LOAD(&gc_running_threads) != 0) {
// usleep(1);
if (any_work()) {
inc_running();
@@ -1230,7 +1251,7 @@ gcWorkerThread (Capability *cap)
// measurements more accurate on Linux, perhaps because it syncs
// the CPU time across the multiple cores. Without this, CPU time
// is heavily skewed towards GC rather than MUT.
- gct->wakeup = GC_THREAD_STANDING_BY;
+ SEQ_CST_STORE(&gct->wakeup, GC_THREAD_STANDING_BY);
debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
ACQUIRE_SPIN_LOCK(&gct->gc_spin);
@@ -1257,10 +1278,13 @@ gcWorkerThread (Capability *cap)
// Wait until we're told to continue
RELEASE_SPIN_LOCK(&gct->gc_spin);
- gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
gct->thread_index);
stat_endGCWorker (cap, gct);
+ // This must come *after* stat_endGCWorker since it serves to
+ // synchronize us with the GC leader, which will later aggregate the
+ // GC statistics.
+ SEQ_CST_STORE(&gct->wakeup, GC_THREAD_WAITING_TO_CONTINUE);
ACQUIRE_SPIN_LOCK(&gct->mut_spin);
debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
@@ -1285,7 +1309,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
while(retry) {
for (i=0; i < n_threads; i++) {
if (i == me || idle_cap[i]) continue;
- if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
prodCapability(capabilities[i], cap->running_task);
}
}
@@ -1295,7 +1319,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
if (i == me || idle_cap[i]) continue;
write_barrier();
interruptCapability(capabilities[i]);
- if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) {
retry = true;
}
}
@@ -1352,10 +1376,10 @@ wakeup_gc_threads (uint32_t me USED_IF_THREADS,
if (i == me || idle_cap[i]) continue;
inc_running();
debugTrace(DEBUG_gc, "waking up gc thread %d", i);
- if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY)
+ if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY)
barf("wakeup_gc_threads");
- gc_threads[i]->wakeup = GC_THREAD_RUNNING;
+ SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_RUNNING);
ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
}
@@ -1376,9 +1400,8 @@ shutdown_gc_threads (uint32_t me USED_IF_THREADS,
for (i=0; i < n_gc_threads; i++) {
if (i == me || idle_cap[i]) continue;
- while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
+ while (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE) {
busy_wait_nop();
- write_barrier();
}
}
#endif
@@ -1393,10 +1416,10 @@ releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
uint32_t i;
for (i=0; i < n_threads; i++) {
if (i == me || idle_cap[i]) continue;
- if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
+ if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE)
barf("releaseGCThreads");
- gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
+ SEQ_CST_STORE(&gc_threads[i]->wakeup, GC_THREAD_INACTIVE);
ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
}
@@ -1412,7 +1435,7 @@ static void
stash_mut_list (Capability *cap, uint32_t gen_no)
{
cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
- cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
+ RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node));
}
/* ----------------------------------------------------------------------------
@@ -1438,9 +1461,11 @@ prepare_collected_gen (generation *gen)
// mutable list always has at least one block; this means we can avoid
// a check for NULL in recordMutable().
for (i = 0; i < n_capabilities; i++) {
- freeChain(capabilities[i]->mut_lists[g]);
- capabilities[i]->mut_lists[g] =
- allocBlockOnNode(capNoToNumaNode(i));
+ bdescr *old = RELAXED_LOAD(&capabilities[i]->mut_lists[g]);
+ freeChain(old);
+
+ bdescr *new = allocBlockOnNode(capNoToNumaNode(i));
+ RELAXED_STORE(&capabilities[i]->mut_lists[g], new);
}
}
@@ -1654,7 +1679,7 @@ collect_pinned_object_blocks (void)
bdescr *last = NULL;
if (use_nonmoving && gen == oldest_gen) {
// Mark objects as belonging to the nonmoving heap
- for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
+ for (bdescr *bd = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks); bd != NULL; bd = bd->link) {
bd->flags |= BF_NONMOVING;
bd->gen = oldest_gen;
bd->gen_no = oldest_gen->no;
@@ -1673,8 +1698,8 @@ collect_pinned_object_blocks (void)
if (gen->large_objects != NULL) {
gen->large_objects->u.back = last;
}
- gen->large_objects = capabilities[n]->pinned_object_blocks;
- capabilities[n]->pinned_object_blocks = NULL;
+ g0->large_objects = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks);
+ RELAXED_STORE(&capabilities[n]->pinned_object_blocks, NULL);
}
}
}