summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/Rts.h4
-rw-r--r--includes/rts/EventLogFormat.h11
-rw-r--r--includes/rts/Flags.h1
-rw-r--r--libraries/base/GHC/RTS/Flags.hsc4
-rw-r--r--rts/RtsFlags.c5
-rw-r--r--rts/Schedule.c23
-rw-r--r--rts/Trace.c53
-rw-r--r--rts/Trace.h21
-rw-r--r--rts/eventlog/EventLog.c75
-rw-r--r--rts/eventlog/EventLog.h10
-rw-r--r--rts/rts.cabal.in1
-rw-r--r--rts/sm/Evac.c67
-rw-r--r--rts/sm/GC.c35
-rw-r--r--rts/sm/GC.h9
-rw-r--r--rts/sm/GCAux.c4
-rw-r--r--rts/sm/NonMoving.c245
-rw-r--r--rts/sm/NonMoving.h47
-rw-r--r--rts/sm/NonMovingCensus.c129
-rw-r--r--rts/sm/NonMovingCensus.h28
-rw-r--r--rts/sm/NonMovingMark.c274
-rw-r--r--rts/sm/NonMovingMark.h44
-rw-r--r--rts/sm/NonMovingScav.c45
-rw-r--r--rts/sm/NonMovingSweep.c159
-rw-r--r--rts/sm/NonMovingSweep.h6
-rw-r--r--rts/sm/Storage.c70
25 files changed, 1154 insertions, 216 deletions
diff --git a/includes/Rts.h b/includes/Rts.h
index def06de90d..7ad6988f3b 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -74,6 +74,10 @@ extern "C" {
#define RTS_UNREACHABLE abort()
#endif
+/* Prefetch primitives */
+#define prefetchForRead(ptr) __builtin_prefetch(ptr, 0)
+#define prefetchForWrite(ptr) __builtin_prefetch(ptr, 1)
+
/* Fix for mingw stat problem (done here so it's early enough) */
#if defined(mingw32_HOST_OS)
#define __MSVCRT__ 1
diff --git a/includes/rts/EventLogFormat.h b/includes/rts/EventLogFormat.h
index ad983e70b3..0ffa77a2df 100644
--- a/includes/rts/EventLogFormat.h
+++ b/includes/rts/EventLogFormat.h
@@ -183,12 +183,21 @@
#define EVENT_USER_BINARY_MSG 181
+#define EVENT_CONC_MARK_BEGIN 200
+#define EVENT_CONC_MARK_END 201
+#define EVENT_CONC_SYNC_BEGIN 202
+#define EVENT_CONC_SYNC_END 203
+#define EVENT_CONC_SWEEP_BEGIN 204
+#define EVENT_CONC_SWEEP_END 205
+#define EVENT_CONC_UPD_REM_SET_FLUSH 206
+#define EVENT_NONMOVING_HEAP_CENSUS 207
+
/*
* The highest event code +1 that ghc itself emits. Note that some event
* ranges higher than this are reserved but not currently emitted by ghc.
* This must match the size of the EventDesc[] array in EventLog.c
*/
-#define NUM_GHC_EVENT_TAGS 182
+#define NUM_GHC_EVENT_TAGS 208
#if 0 /* DEPRECATED EVENTS: */
/* we don't actually need to record the thread, it's implicit */
diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h
index 4499af9da6..9a039fd95c 100644
--- a/includes/rts/Flags.h
+++ b/includes/rts/Flags.h
@@ -170,6 +170,7 @@ typedef struct _TRACE_FLAGS {
bool timestamp; /* show timestamp in stderr output */
bool scheduler; /* trace scheduler events */
bool gc; /* trace GC events */
+ bool nonmoving_gc; /* trace nonmoving GC events */
bool sparks_sampled; /* trace spark events by a sampled method */
bool sparks_full; /* trace spark events 100% accurately */
bool user; /* trace user events (emitted from Haskell code) */
diff --git a/libraries/base/GHC/RTS/Flags.hsc b/libraries/base/GHC/RTS/Flags.hsc
index 4778eac397..913344c166 100644
--- a/libraries/base/GHC/RTS/Flags.hsc
+++ b/libraries/base/GHC/RTS/Flags.hsc
@@ -292,6 +292,8 @@ data TraceFlags = TraceFlags
, timestamp :: Bool -- ^ show timestamp in stderr output
, traceScheduler :: Bool -- ^ trace scheduler events
, traceGc :: Bool -- ^ trace GC events
+ , traceNonmovingGc
+ :: Bool -- ^ trace nonmoving GC heap census samples
, sparksSampled :: Bool -- ^ trace spark events by a sampled method
, sparksFull :: Bool -- ^ trace spark events 100% accurately
, user :: Bool -- ^ trace user events (emitted from Haskell code)
@@ -526,6 +528,8 @@ getTraceFlags = do
<*> (toBool <$>
(#{peek TRACE_FLAGS, gc} ptr :: IO CBool))
<*> (toBool <$>
+ (#{peek TRACE_FLAGS, nonmoving_gc} ptr :: IO CBool))
+ <*> (toBool <$>
(#{peek TRACE_FLAGS, sparks_sampled} ptr :: IO CBool))
<*> (toBool <$>
(#{peek TRACE_FLAGS, sparks_full} ptr :: IO CBool))
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 7d486824ab..c606d86418 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -222,6 +222,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.TraceFlags.timestamp = false;
RtsFlags.TraceFlags.scheduler = false;
RtsFlags.TraceFlags.gc = false;
+ RtsFlags.TraceFlags.nonmoving_gc = false;
RtsFlags.TraceFlags.sparks_sampled= false;
RtsFlags.TraceFlags.sparks_full = false;
RtsFlags.TraceFlags.user = false;
@@ -2131,6 +2132,10 @@ static void read_trace_flags(const char *arg)
RtsFlags.TraceFlags.gc = enabled;
enabled = true;
break;
+ case 'n':
+ RtsFlags.TraceFlags.nonmoving_gc = enabled;
+ enabled = true;
+ break;
case 'u':
RtsFlags.TraceFlags.user = enabled;
enabled = true;
diff --git a/rts/Schedule.c b/rts/Schedule.c
index f66a276602..5387bc615e 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -164,7 +164,8 @@ static void scheduleHandleThreadBlocked( StgTSO *t );
static bool scheduleHandleThreadFinished( Capability *cap, Task *task,
StgTSO *t );
static bool scheduleNeedHeapProfile(bool ready_to_gc);
-static void scheduleDoGC(Capability **pcap, Task *task, bool force_major);
+static void scheduleDoGC( Capability **pcap, Task *task,
+ bool force_major, bool deadlock_detect );
static void deleteThread (StgTSO *tso);
static void deleteAllThreads (void);
@@ -264,7 +265,7 @@ schedule (Capability *initialCapability, Task *task)
case SCHED_INTERRUPTING:
debugTrace(DEBUG_sched, "SCHED_INTERRUPTING");
/* scheduleDoGC() deletes all the threads */
- scheduleDoGC(&cap,task,true);
+ scheduleDoGC(&cap,task,true,false);
// after scheduleDoGC(), we must be shutting down. Either some
// other Capability did the final GC, or we did it above,
@@ -561,7 +562,7 @@ run_thread:
}
if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
- scheduleDoGC(&cap,task,false);
+ scheduleDoGC(&cap,task,false,false);
}
} /* end of while() */
}
@@ -935,7 +936,7 @@ scheduleDetectDeadlock (Capability **pcap, Task *task)
// they are unreachable and will therefore be sent an
// exception. Any threads thus released will be immediately
// runnable.
- scheduleDoGC (pcap, task, true/*force major GC*/);
+ scheduleDoGC (pcap, task, true/*force major GC*/, true/*deadlock detection*/);
cap = *pcap;
// when force_major == true. scheduleDoGC sets
// recent_activity to ACTIVITY_DONE_GC and turns off the timer
@@ -1005,7 +1006,7 @@ scheduleProcessInbox (Capability **pcap USED_IF_THREADS)
while (!emptyInbox(cap)) {
// Executing messages might use heap, so we should check for GC.
if (doYouWantToGC(cap)) {
- scheduleDoGC(pcap, cap->running_task, false);
+ scheduleDoGC(pcap, cap->running_task, false, false);
cap = *pcap;
}
@@ -1552,9 +1553,11 @@ void releaseAllCapabilities(uint32_t n, Capability *keep_cap, Task *task)
* Perform a garbage collection if necessary
* -------------------------------------------------------------------------- */
+// N.B. See Note [Deadlock detection under nonmoving collector] for rationale
+// behind deadlock_detect argument.
static void
scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
- bool force_major)
+ bool force_major, bool deadlock_detect)
{
Capability *cap = *pcap;
bool heap_census;
@@ -1847,9 +1850,9 @@ delete_threads_and_gc:
// emerge they don't immediately re-enter the GC.
pending_sync = 0;
signalCondition(&sync_finished_cond);
- GarbageCollect(collect_gen, heap_census, gc_type, cap, idle_cap);
+ GarbageCollect(collect_gen, heap_census, deadlock_detect, gc_type, cap, idle_cap);
#else
- GarbageCollect(collect_gen, heap_census, 0, cap, NULL);
+ GarbageCollect(collect_gen, heap_census, deadlock_detect, 0, cap, NULL);
#endif
// If we're shutting down, don't leave any idle GC work to do.
@@ -2717,7 +2720,7 @@ exitScheduler (bool wait_foreign USED_IF_THREADS)
nonmovingStop();
Capability *cap = task->cap;
waitForCapability(&cap,task);
- scheduleDoGC(&cap,task,true);
+ scheduleDoGC(&cap,task,true,false);
ASSERT(task->incall->tso == NULL);
releaseCapability(cap);
}
@@ -2785,7 +2788,7 @@ performGC_(bool force_major)
// TODO: do we need to traceTask*() here?
waitForCapability(&cap,task);
- scheduleDoGC(&cap,task,force_major);
+ scheduleDoGC(&cap,task,force_major,false);
releaseCapability(cap);
boundTaskExiting(task);
}
diff --git a/rts/Trace.c b/rts/Trace.c
index de647f762b..ecc28d8fec 100644
--- a/rts/Trace.c
+++ b/rts/Trace.c
@@ -30,6 +30,7 @@
// events
int TRACE_sched;
int TRACE_gc;
+int TRACE_nonmoving_gc;
int TRACE_spark_sampled;
int TRACE_spark_full;
int TRACE_user;
@@ -72,6 +73,9 @@ void initTracing (void)
RtsFlags.GcFlags.giveStats = COLLECT_GC_STATS;
}
+ TRACE_nonmoving_gc =
+ RtsFlags.TraceFlags.nonmoving_gc;
+
TRACE_spark_sampled =
RtsFlags.TraceFlags.sparks_sampled;
@@ -802,6 +806,55 @@ void traceThreadLabel_(Capability *cap,
}
}
+void traceConcMarkBegin()
+{
+ if (eventlog_enabled)
+ postEventNoCap(EVENT_CONC_MARK_BEGIN);
+}
+
+void traceConcMarkEnd(StgWord32 marked_obj_count)
+{
+ if (eventlog_enabled)
+ postConcMarkEnd(marked_obj_count);
+}
+
+void traceConcSyncBegin()
+{
+ if (eventlog_enabled)
+ postEventNoCap(EVENT_CONC_SYNC_BEGIN);
+}
+
+void traceConcSyncEnd()
+{
+ if (eventlog_enabled)
+ postEventNoCap(EVENT_CONC_SYNC_END);
+}
+
+void traceConcSweepBegin()
+{
+ if (eventlog_enabled)
+ postEventNoCap(EVENT_CONC_SWEEP_BEGIN);
+}
+
+void traceConcSweepEnd()
+{
+ if (eventlog_enabled)
+ postEventNoCap(EVENT_CONC_SWEEP_END);
+}
+
+void traceConcUpdRemSetFlush(Capability *cap)
+{
+ if (eventlog_enabled)
+ postConcUpdRemSetFlush(cap);
+}
+
+void traceNonmovingHeapCensus(uint32_t log_blk_size,
+ const struct NonmovingAllocCensus *census)
+{
+ if (eventlog_enabled && TRACE_nonmoving_gc)
+ postNonmovingHeapCensus(log_blk_size, census);
+}
+
void traceThreadStatus_ (StgTSO *tso USED_IF_DEBUG)
{
#if defined(DEBUG)
diff --git a/rts/Trace.h b/rts/Trace.h
index be4d844a6b..7f72fd8093 100644
--- a/rts/Trace.h
+++ b/rts/Trace.h
@@ -9,6 +9,7 @@
#pragma once
#include "rts/EventLogFormat.h"
+#include "sm/NonMovingCensus.h"
#include "Capability.h"
#if defined(DTRACE)
@@ -72,6 +73,7 @@ extern int TRACE_spark_sampled;
extern int TRACE_spark_full;
/* extern int TRACE_user; */ // only used in Trace.c
extern int TRACE_cap;
+extern int TRACE_nonmoving_gc;
// -----------------------------------------------------------------------------
// Posting events
@@ -304,6 +306,16 @@ void traceHeapProfSampleCostCentre(StgWord8 profile_id,
CostCentreStack *stack, StgWord residency);
#endif /* PROFILING */
+void traceConcMarkBegin(void);
+void traceConcMarkEnd(StgWord32 marked_obj_count);
+void traceConcSyncBegin(void);
+void traceConcSyncEnd(void);
+void traceConcSweepBegin(void);
+void traceConcSweepEnd(void);
+void traceConcUpdRemSetFlush(Capability *cap);
+void traceNonmovingHeapCensus(uint32_t log_blk_size,
+ const struct NonmovingAllocCensus *census);
+
void flushTrace(void);
#else /* !TRACING */
@@ -344,6 +356,15 @@ void flushTrace(void);
#define traceHeapProfSampleCostCentre(profile_id, stack, residency) /* nothing */
#define traceHeapProfSampleString(profile_id, label, residency) /* nothing */
+#define traceConcMarkBegin() /* nothing */
+#define traceConcMarkEnd(marked_obj_count) /* nothing */
+#define traceConcSyncBegin() /* nothing */
+#define traceConcSyncEnd() /* nothing */
+#define traceConcSweepBegin() /* nothing */
+#define traceConcSweepEnd() /* nothing */
+#define traceConcUpdRemSetFlush(cap) /* nothing */
+#define traceNonmovingHeapCensus(blk_size, census) /* nothing */
+
#define flushTrace() /* nothing */
#endif /* TRACING */
diff --git a/rts/eventlog/EventLog.c b/rts/eventlog/EventLog.c
index 5c6a1ca48a..8683ad9972 100644
--- a/rts/eventlog/EventLog.c
+++ b/rts/eventlog/EventLog.c
@@ -107,7 +107,15 @@ char *EventDesc[] = {
[EVENT_HEAP_PROF_SAMPLE_END] = "End of heap profile sample",
[EVENT_HEAP_PROF_SAMPLE_STRING] = "Heap profile string sample",
[EVENT_HEAP_PROF_SAMPLE_COST_CENTRE] = "Heap profile cost-centre sample",
- [EVENT_USER_BINARY_MSG] = "User binary message"
+ [EVENT_USER_BINARY_MSG] = "User binary message",
+ [EVENT_CONC_MARK_BEGIN] = "Begin concurrent mark phase",
+ [EVENT_CONC_MARK_END] = "End concurrent mark phase",
+ [EVENT_CONC_SYNC_BEGIN] = "Begin concurrent GC synchronisation",
+ [EVENT_CONC_SYNC_END] = "End concurrent GC synchronisation",
+ [EVENT_CONC_SWEEP_BEGIN] = "Begin concurrent sweep",
+ [EVENT_CONC_SWEEP_END] = "End concurrent sweep",
+ [EVENT_CONC_UPD_REM_SET_FLUSH] = "Update remembered set flushed",
+ [EVENT_NONMOVING_HEAP_CENSUS] = "Nonmoving heap census"
};
// Event type.
@@ -446,6 +454,27 @@ init_event_types(void)
eventTypes[t].size = EVENT_SIZE_DYNAMIC;
break;
+ case EVENT_CONC_MARK_BEGIN:
+ case EVENT_CONC_SYNC_BEGIN:
+ case EVENT_CONC_SYNC_END:
+ case EVENT_CONC_SWEEP_BEGIN:
+ case EVENT_CONC_SWEEP_END:
+ eventTypes[t].size = 0;
+ break;
+
+ case EVENT_CONC_MARK_END:
+ eventTypes[t].size = 4;
+ break;
+
+ case EVENT_CONC_UPD_REM_SET_FLUSH: // (cap)
+ eventTypes[t].size =
+ sizeof(EventCapNo);
+ break;
+
+ case EVENT_NONMOVING_HEAP_CENSUS: // (cap, blk_size, active_segs, filled_segs, live_blks)
+ eventTypes[t].size = 13;
+ break;
+
default:
continue; /* ignore deprecated events */
}
@@ -487,8 +516,10 @@ initEventLogging(const EventLogWriter *ev_writer)
event_log_writer = ev_writer;
initEventLogWriter();
- if (sizeof(EventDesc) / sizeof(char*) != NUM_GHC_EVENT_TAGS) {
- barf("EventDesc array has the wrong number of elements");
+ int num_descs = sizeof(EventDesc) / sizeof(char*);
+ if (num_descs != NUM_GHC_EVENT_TAGS) {
+ barf("EventDesc array has the wrong number of elements (%d, NUM_GHC_EVENT_TAGS=%d)",
+ num_descs, NUM_GHC_EVENT_TAGS);
}
/*
@@ -1005,6 +1036,15 @@ void postTaskDeleteEvent (EventTaskId taskId)
}
void
+postEventNoCap (EventTypeNum tag)
+{
+ ACQUIRE_LOCK(&eventBufMutex);
+ ensureRoomForEvent(&eventBuf, tag);
+ postEventHeader(&eventBuf, tag);
+ RELEASE_LOCK(&eventBufMutex);
+}
+
+void
postEvent (Capability *cap, EventTypeNum tag)
{
EventsBuf *eb = &capEventBuf[cap->no];
@@ -1130,6 +1170,35 @@ void postThreadLabel(Capability *cap,
postBuf(eb, (StgWord8*) label, strsize);
}
+void postConcUpdRemSetFlush(Capability *cap)
+{
+ EventsBuf *eb = &capEventBuf[cap->no];
+ ensureRoomForEvent(eb, EVENT_CONC_UPD_REM_SET_FLUSH);
+ postEventHeader(eb, EVENT_CONC_UPD_REM_SET_FLUSH);
+ postCapNo(eb, cap->no);
+}
+
+void postConcMarkEnd(StgWord32 marked_obj_count)
+{
+ ACQUIRE_LOCK(&eventBufMutex);
+ ensureRoomForEvent(&eventBuf, EVENT_CONC_MARK_END);
+ postEventHeader(&eventBuf, EVENT_CONC_MARK_END);
+ postWord32(&eventBuf, marked_obj_count);
+ RELEASE_LOCK(&eventBufMutex);
+}
+
+void postNonmovingHeapCensus(int log_blk_size,
+ const struct NonmovingAllocCensus *census)
+{
+ ACQUIRE_LOCK(&eventBufMutex);
+ postEventHeader(&eventBuf, EVENT_NONMOVING_HEAP_CENSUS);
+ postWord8(&eventBuf, log_blk_size);
+ postWord32(&eventBuf, census->n_active_segs);
+ postWord32(&eventBuf, census->n_filled_segs);
+ postWord32(&eventBuf, census->n_live_blocks);
+ RELEASE_LOCK(&eventBufMutex);
+}
+
void closeBlockMarker (EventsBuf *ebuf)
{
if (ebuf->marker)
diff --git a/rts/eventlog/EventLog.h b/rts/eventlog/EventLog.h
index d8a614b45c..0d439b836a 100644
--- a/rts/eventlog/EventLog.h
+++ b/rts/eventlog/EventLog.h
@@ -11,6 +11,7 @@
#include "rts/EventLogFormat.h"
#include "rts/EventLogWriter.h"
#include "Capability.h"
+#include "sm/NonMovingCensus.h"
#include "BeginPrivate.h"
@@ -39,6 +40,7 @@ void postSchedEvent(Capability *cap, EventTypeNum tag,
* Post a nullary event.
*/
void postEvent(Capability *cap, EventTypeNum tag);
+void postEventNoCap(EventTypeNum tag);
void postEventAtTimestamp (Capability *cap, EventTimestamp ts,
EventTypeNum tag);
@@ -159,6 +161,11 @@ void postHeapProfSampleCostCentre(StgWord8 profile_id,
StgWord64 residency);
#endif /* PROFILING */
+void postConcUpdRemSetFlush(Capability *cap);
+void postConcMarkEnd(StgWord32 marked_obj_count);
+void postNonmovingHeapCensus(int log_blk_size,
+ const struct NonmovingAllocCensus *census);
+
#else /* !TRACING */
INLINE_HEADER void postSchedEvent (Capability *cap STG_UNUSED,
@@ -172,6 +179,9 @@ INLINE_HEADER void postEvent (Capability *cap STG_UNUSED,
EventTypeNum tag STG_UNUSED)
{ /* nothing */ }
+INLINE_HEADER void postEventNoCap (EventTypeNum tag STG_UNUSED)
+{ /* nothing */ }
+
INLINE_HEADER void postMsg (char *msg STG_UNUSED,
va_list ap STG_UNUSED)
{ /* nothing */ }
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index 2c28426d75..a53c166849 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -467,6 +467,7 @@ library
sm/MBlock.c
sm/MarkWeak.c
sm/NonMoving.c
+ sm/NonMovingCensus.c
sm/NonMovingMark.c
sm/NonMovingScav.c
sm/NonMovingSweep.c
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index c2aaaacffa..f8ee2630b6 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -69,12 +69,6 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
{
ASSERT(gen_no < RtsFlags.GcFlags.generations);
- if (RtsFlags.GcFlags.useNonmoving && major_gc) {
- // unconditionally promote to non-moving heap in major gc
- gct->copied += size;
- return nonmovingAllocate(gct->cap, size);
- }
-
StgPtr to;
gen_workspace *ws;
@@ -91,9 +85,34 @@ alloc_for_copy (uint32_t size, uint32_t gen_no)
}
}
- if (RtsFlags.GcFlags.useNonmoving && gen_no == oldest_gen->no) {
- gct->copied += size;
- return nonmovingAllocate(gct->cap, size);
+ if (RtsFlags.GcFlags.useNonmoving) {
+ /* See Note [Deadlock detection under nonmoving collector]. */
+ if (deadlock_detect_gc)
+ gen_no = oldest_gen->no;
+
+ if (gen_no == oldest_gen->no) {
+ gct->copied += size;
+ to = nonmovingAllocate(gct->cap, size);
+
+ // Add segment to the todo list unless it's already there
+ // current->todo_link == NULL means not in todo list
+ struct NonmovingSegment *seg = nonmovingGetSegment(to);
+ if (!seg->todo_link) {
+ gen_workspace *ws = &gct->gens[oldest_gen->no];
+ seg->todo_link = ws->todo_seg;
+ ws->todo_seg = seg;
+ }
+
+ // The object which refers to this closure may have been aged (i.e.
+ // retained in a younger generation). Consequently, we must add the
+ // closure to the mark queue to ensure that it will be marked.
+ //
+ // However, if we are in a deadlock detection GC then we disable aging
+ // so there is no need.
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, (StgClosure *) to);
+ return to;
+ }
}
ws = &gct->gens[gen_no]; // zero memory references here
@@ -312,9 +331,10 @@ evacuate_large(StgPtr p)
*/
new_gen_no = bd->dest_no;
- if (RtsFlags.GcFlags.useNonmoving && major_gc) {
+ if (deadlock_detect_gc) {
+ /* See Note [Deadlock detection under nonmoving collector]. */
new_gen_no = oldest_gen->no;
- } else if (new_gen_no < gct->evac_gen_no) {
+ } else if (new_gen_no < gct->evac_gen_no) {
if (gct->eager_promotion) {
new_gen_no = gct->evac_gen_no;
} else {
@@ -363,6 +383,13 @@ evacuate_large(StgPtr p)
STATIC_INLINE void
evacuate_static_object (StgClosure **link_field, StgClosure *q)
{
+ if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)) {
+ // See Note [Static objects under the nonmoving collector] in Storage.c.
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
+ return;
+ }
+
StgWord link = (StgWord)*link_field;
// See Note [STATIC_LINK fields] for how the link field bits work
@@ -603,6 +630,8 @@ loop:
// NOTE: large objects in nonmoving heap are also marked with
// BF_NONMOVING. Those are moved to scavenged_large_objects list in
// mark phase.
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
return;
}
@@ -629,6 +658,13 @@ loop:
// they are not)
if (bd->flags & BF_COMPACT) {
evacuate_compact((P_)q);
+
+ // We may have evacuated the block to the nonmoving generation. If so
+ // we need to make sure it is added to the mark queue since the only
+ // reference to it may be from the moving heap.
+ if (major_gc && bd->flags & BF_NONMOVING && !deadlock_detect_gc) {
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
+ }
return;
}
@@ -636,6 +672,13 @@ loop:
*/
if (bd->flags & BF_LARGE) {
evacuate_large((P_)q);
+
+ // We may have evacuated the block to the nonmoving generation. If so
+ // we need to make sure it is added to the mark queue since the only
+ // reference to it may be from the moving heap.
+ if (major_gc && bd->flags & BF_NONMOVING && !deadlock_detect_gc) {
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
+ }
return;
}
@@ -937,6 +980,8 @@ evacuate_BLACKHOLE(StgClosure **p)
ASSERT((bd->flags & BF_COMPACT) == 0);
if (bd->flags & BF_NONMOVING) {
+ if (major_gc && !deadlock_detect_gc)
+ markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q);
return;
}
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 37f9811aaf..83e9c97bd9 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -104,6 +104,7 @@
*/
uint32_t N;
bool major_gc;
+bool deadlock_detect_gc;
/* Data used for allocation area sizing.
*/
@@ -194,6 +195,7 @@ StgPtr mark_sp; // pointer to the next unallocated mark stack entry
void
GarbageCollect (uint32_t collect_gen,
const bool do_heap_census,
+ const bool deadlock_detect,
uint32_t gc_type USED_IF_THREADS,
Capability *cap,
bool idle_cap[])
@@ -263,7 +265,25 @@ GarbageCollect (uint32_t collect_gen,
N = collect_gen;
major_gc = (N == RtsFlags.GcFlags.generations-1);
- if (major_gc) {
+ /* See Note [Deadlock detection under nonmoving collector]. */
+ deadlock_detect_gc = deadlock_detect;
+
+#if defined(THREADED_RTS)
+ if (major_gc && RtsFlags.GcFlags.useNonmoving && concurrent_coll_running) {
+ /* If there is already a concurrent major collection running then
+ * there is no benefit to starting another.
+ * TODO: Catch heap-size runaway.
+ */
+ N--;
+ collect_gen--;
+ major_gc = false;
+ }
+#endif
+
+ /* N.B. The nonmoving collector works a bit differently. See
+ * Note [Static objects under the nonmoving collector].
+ */
+ if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
prev_static_flag = static_flag;
static_flag =
static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
@@ -710,6 +730,14 @@ GarbageCollect (uint32_t collect_gen,
}
} // for all generations
+ // Flush the update remembered set. See Note [Eager update remembered set
+ // flushing] in NonMovingMark.c
+ if (RtsFlags.GcFlags.useNonmoving) {
+ RELEASE_SM_LOCK;
+ nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+ ACQUIRE_SM_LOCK;
+ }
+
// Mark and sweep the oldest generation.
// N.B. This can only happen after we've moved
// oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
@@ -736,6 +764,11 @@ GarbageCollect (uint32_t collect_gen,
// so we need to mark those too.
// Note that in sequential case these lists will be appended with more
// weaks and threads found to be dead in mark.
+#if !defined(THREADED_RTS)
+ // In the non-threaded runtime this is the only time we push to the
+ // upd_rem_set
+ nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
+#endif
nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
ACQUIRE_SM_LOCK;
}
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index ed19b8bddf..bde006913b 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -17,9 +17,12 @@
#include "HeapAlloc.h"
-void GarbageCollect (uint32_t force_major_gc,
+void GarbageCollect (uint32_t collect_gen,
bool do_heap_census,
- uint32_t gc_type, Capability *cap, bool idle_cap[]);
+ bool deadlock_detect,
+ uint32_t gc_type,
+ Capability *cap,
+ bool idle_cap[]);
typedef void (*evac_fn)(void *user, StgClosure **root);
@@ -30,6 +33,8 @@ bool doIdleGCWork(Capability *cap, bool all);
extern uint32_t N;
extern bool major_gc;
+/* See Note [Deadlock detection under nonmoving collector]. */
+extern bool deadlock_detect_gc;
extern bdescr *mark_stack_bd;
extern bdescr *mark_stack_top_bd;
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index 6076f61d6f..11080c1f22 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -148,14 +148,14 @@ markCAFs (evac_fn evac, void *user)
StgIndStatic *c;
for (c = dyn_caf_list;
- c != (StgIndStatic*)END_OF_CAF_LIST;
+ ((StgWord) c | STATIC_FLAG_LIST) != (StgWord)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c);
evac(user, &c->indirectee);
}
for (c = revertible_caf_list;
- c != (StgIndStatic*)END_OF_CAF_LIST;
+ ((StgWord) c | STATIC_FLAG_LIST) != (StgWord)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c);
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index 24464a43f9..6dc1010d43 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -21,6 +21,7 @@
#include "NonMoving.h"
#include "NonMovingMark.h"
#include "NonMovingSweep.h"
+#include "NonMovingCensus.h"
#include "StablePtr.h" // markStablePtrTable
#include "Schedule.h" // markScheduler
#include "Weak.h" // dead_weak_ptr_list
@@ -68,6 +69,113 @@ Mutex concurrent_coll_finished_lock;
* stopAllCapabilitiesWith(SYNC_FLUSH_UPD_REM_SET). Capabilities are held
* the final mark has concluded.
*
+ * Note that possibility of concurrent minor and non-moving collections
+ * requires that we handle static objects a bit specially. See
+ * Note [Static objects under the nonmoving collector] in Storage.c
+ * for details.
+ *
+ *
+ * Note [Aging under the non-moving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The initial design of the non-moving collector mandated that all live data
+ * be evacuated to the non-moving heap prior to a major collection. This
+ * simplified certain bits of implementation and eased reasoning. However, it
+ * was (unsurprisingly) also found to result in significant amounts of
+ * unnecessary copying.
+ *
+ * Consequently, we now allow aging. Aging allows the preparatory GC leading up
+ * to a major collection to evacuate some objects into the young generation.
+ * However, this introduces the following tricky case that might arise after
+ * we have finished the preparatory GC:
+ *
+ * moving heap ┆ non-moving heap
+ * ───────────────┆──────────────────
+ * ┆
+ * B ←────────────── A ←─────────────── root
+ * │ ┆ ↖─────────────── gen1 mut_list
+ * ╰───────────────→ C
+ * ┆
+ *
+ * In this case C is clearly live, but the non-moving collector can only see
+ * this by walking through B, which lives in the moving heap. However, doing so
+ * would require that we synchronize with the mutator/minor GC to ensure that it
+ * isn't in the middle of moving B. What to do?
+ *
+ * The solution we use here is to teach the preparatory moving collector to
+ * "evacuate" objects it encounters in the non-moving heap by adding them to
+ * the mark queue. This is implemented by pushing the object to the update
+ * remembered set of the capability held by the evacuating gc_thread
+ * (implemented by markQueuePushClosureGC)
+ *
+ * Consequently collection of the case above would proceed as follows:
+ *
+ * 1. Initial state:
+ * * A lives in the non-moving heap and is reachable from the root set
+ * * A is on the oldest generation's mut_list, since it contains a pointer
+ * to B, which lives in a younger generation
+ * * B lives in the moving collector's from space
+ * * C lives in the non-moving heap
+ *
+ * 2. Preparatory GC: Scavenging mut_lists:
+ *
+ * The mut_list of the oldest generation is scavenged, resulting in B being
+ * evacuated (aged) into the moving collector's to-space.
+ *
+ * 3. Preparatory GC: Scavenge B
+ *
+ * B (now in to-space) is scavenged, resulting in evacuation of C.
+ * evacuate(C) pushes a reference to C to the mark queue.
+ *
+ * 4. Non-moving GC: C is marked
+ *
+ * The non-moving collector will come to C in the mark queue and mark it.
+ *
+ *
+ * Note [Deadlock detection under the non-moving collector]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * In GHC the garbage collector is responsible for identifying deadlocked
+ * programs. Providing for this responsibility is slightly tricky in the
+ * non-moving collector due to the existence of aging. In particular, the
+ * non-moving collector cannot traverse objects living in a young generation
+ * but reachable from the non-moving generation, as described in Note [Aging
+ * under the non-moving collector].
+ *
+ * However, this can pose trouble for deadlock detection since it means that we
+ * may conservatively mark dead closures as live. Consider this case:
+ *
+ * moving heap ┆ non-moving heap
+ * ───────────────┆──────────────────
+ * ┆
+ * MVAR_QUEUE ←───── TSO ←───────────── gen1 mut_list
+ * ↑ │ ╰────────↗ │
+ * │ │ ┆ │
+ * │ │ ┆ ↓
+ * │ ╰──────────→ MVAR
+ * ╰─────────────────╯
+ * ┆
+ *
+ * In this case we have a TSO blocked on a dead MVar. Because the MVAR_TSO_QUEUE on
+ * which it is blocked lives in the moving heap, the TSO is necessarily on the
+ * oldest generation's mut_list. As in Note [Aging under the non-moving
+ * collector], the MVAR_TSO_QUEUE will be evacuated. If MVAR_TSO_QUEUE is aged
+ * (e.g. evacuated to the young generation) then the MVAR will be added to the
+ * mark queue. Consequently, we will falsely conclude that the MVAR is still
+ * alive and fail to spot the deadlock.
+ *
+ * To avoid this sort of situation we disable aging when we are starting a
+ * major GC specifically for deadlock detection (as done by
+ * scheduleDetectDeadlock). This condition is recorded by the
+ * deadlock_detect_gc global variable declared in GC.h. Setting this has a few
+ * effects on the preparatory GC:
+ *
+ * - Evac.c:alloc_for_copy forces evacuation to the non-moving generation.
+ *
+ * - The evacuation logic usually responsible for pushing objects living in
+ * the non-moving heap to the mark queue is disabled. This is safe because
+ * we know that all live objects will be in the non-moving heap by the end
+ * of the preparatory moving collection.
+ *
*
* Note [Live data accounting in nonmoving collector]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -146,10 +254,26 @@ static struct NonmovingSegment *nonmovingPopFreeSegment(void)
}
}
+unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size)
+{
+ // We compute the overwhelmingly common size cases directly to avoid a very
+ // expensive integer division.
+ switch (log_block_size) {
+ case 3: return nonmovingBlockCount(3);
+ case 4: return nonmovingBlockCount(4);
+ case 5: return nonmovingBlockCount(5);
+ case 6: return nonmovingBlockCount(6);
+ case 7: return nonmovingBlockCount(7);
+ default: return nonmovingBlockCount(log_block_size);
+ }
+}
+
/*
* Request a fresh segment from the free segment list or allocate one of the
* given node.
*
+ * Caller must hold SM_MUTEX (although we take the gc_alloc_block_sync spinlock
+ * under the assumption that we are in a GC context).
*/
static struct NonmovingSegment *nonmovingAllocSegment(uint32_t node)
{
@@ -192,10 +316,12 @@ static inline unsigned long log2_ceil(unsigned long x)
}
// Advance a segment's next_free pointer. Returns true if segment if full.
-static bool advance_next_free(struct NonmovingSegment *seg)
+static bool advance_next_free(struct NonmovingSegment *seg, const unsigned int blk_count)
{
- uint8_t *bitmap = seg->bitmap;
- unsigned int blk_count = nonmovingSegmentBlockCount(seg);
+ const uint8_t *bitmap = seg->bitmap;
+ ASSERT(blk_count == nonmovingSegmentBlockCount(seg));
+#if defined(NAIVE_ADVANCE_FREE)
+ // reference implementation
for (unsigned int i = seg->next_free+1; i < blk_count; i++) {
if (!bitmap[i]) {
seg->next_free = i;
@@ -204,6 +330,16 @@ static bool advance_next_free(struct NonmovingSegment *seg)
}
seg->next_free = blk_count;
return true;
+#else
+ const uint8_t *c = memchr(&bitmap[seg->next_free+1], 0, blk_count - seg->next_free - 1);
+ if (c == NULL) {
+ seg->next_free = blk_count;
+ return true;
+ } else {
+ seg->next_free = c - bitmap;
+ return false;
+ }
+#endif
}
static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *alloca)
@@ -221,34 +357,27 @@ static struct NonmovingSegment *pop_active_segment(struct NonmovingAllocator *al
}
}
-/* sz is in words */
+/* Allocate a block in the nonmoving heap. Caller must hold SM_MUTEX. sz is in words */
GNUC_ATTR_HOT
void *nonmovingAllocate(Capability *cap, StgWord sz)
{
- unsigned int allocator_idx = log2_ceil(sz * sizeof(StgWord)) - NONMOVING_ALLOCA0;
+ unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord));
+ unsigned int block_count = nonmovingBlockCountFromSize(log_block_size);
// The max we ever allocate is 3276 bytes (anything larger is a large
// object and not moved) which is covered by allocator 9.
- ASSERT(allocator_idx < NONMOVING_ALLOCA_CNT);
+ ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT);
- struct NonmovingAllocator *alloca = nonmovingHeap.allocators[allocator_idx];
+ struct NonmovingAllocator *alloca = nonmovingHeap.allocators[log_block_size - NONMOVING_ALLOCA0];
// Allocate into current segment
struct NonmovingSegment *current = alloca->current[cap->no];
ASSERT(current); // current is never NULL
- void *ret = nonmovingSegmentGetBlock(current, current->next_free);
+ void *ret = nonmovingSegmentGetBlock_(current, log_block_size, current->next_free);
ASSERT(GET_CLOSURE_TAG(ret) == 0); // check alignment
- // Add segment to the todo list unless it's already there
- // current->todo_link == NULL means not in todo list
- if (!current->todo_link) {
- gen_workspace *ws = &gct->gens[oldest_gen->no];
- current->todo_link = ws->todo_seg;
- ws->todo_seg = current;
- }
-
// Advance the current segment's next_free or allocate a new segment if full
- bool full = advance_next_free(current);
+ bool full = advance_next_free(current, block_count);
if (full) {
// Current segment is full: update live data estimate link it to
// filled, take an active segment if one exists, otherwise allocate a
@@ -256,8 +385,9 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
// Update live data estimate.
// See Note [Live data accounting in nonmoving collector].
- unsigned int new_blocks = nonmovingSegmentBlockCount(current) - current->next_free_snap;
- atomic_inc(&oldest_gen->live_estimate, new_blocks * nonmovingSegmentBlockSize(current) / sizeof(W_));
+ unsigned int new_blocks = block_count - current->next_free_snap;
+ unsigned int block_size = 1 << log_block_size;
+ atomic_inc(&oldest_gen->live_estimate, new_blocks * block_size / sizeof(W_));
// push the current segment to the filled list
nonmovingPushFilledSegment(current);
@@ -268,7 +398,7 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
// there are no active segments, allocate new segment
if (new_current == NULL) {
new_current = nonmovingAllocSegment(cap->node);
- nonmovingInitSegment(new_current, NONMOVING_ALLOCA0 + allocator_idx);
+ nonmovingInitSegment(new_current, log_block_size);
}
// make it current
@@ -375,37 +505,23 @@ void nonmovingAddCapabilities(uint32_t new_n_caps)
nonmovingHeap.n_caps = new_n_caps;
}
-static void nonmovingClearBitmap(struct NonmovingSegment *seg)
+static inline void nonmovingClearBitmap(struct NonmovingSegment *seg)
{
unsigned int n = nonmovingSegmentBlockCount(seg);
memset(seg->bitmap, 0, n);
}
-static void nonmovingClearSegmentBitmaps(struct NonmovingSegment *seg)
-{
- while (seg) {
- nonmovingClearBitmap(seg);
- seg = seg->link;
- }
-}
-
-static void nonmovingClearAllBitmaps(void)
-{
- for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
- struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
- nonmovingClearSegmentBitmaps(alloca->filled);
- }
-
- // Clear large object bits
- for (bdescr *bd = nonmoving_large_objects; bd; bd = bd->link) {
- bd->flags &= ~BF_MARKED;
- }
-}
-
/* Prepare the heap bitmaps and snapshot metadata for a mark */
static void nonmovingPrepareMark(void)
{
- nonmovingClearAllBitmaps();
+ // See Note [Static objects under the nonmoving collector].
+ prev_static_flag = static_flag;
+ static_flag =
+ static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
+
+ // Should have been cleared by the last sweep
+ ASSERT(nonmovingHeap.sweep_list == NULL);
+
nonmovingBumpEpoch();
for (int alloca_idx = 0; alloca_idx < NONMOVING_ALLOCA_CNT; ++alloca_idx) {
struct NonmovingAllocator *alloca = nonmovingHeap.allocators[alloca_idx];
@@ -416,11 +532,28 @@ static void nonmovingPrepareMark(void)
seg->next_free_snap = seg->next_free;
}
- // Update filled segments' snapshot pointers
- struct NonmovingSegment *seg = alloca->filled;
- while (seg) {
- seg->next_free_snap = seg->next_free;
- seg = seg->link;
+ // Update filled segments' snapshot pointers and move to sweep_list
+ uint32_t n_filled = 0;
+ struct NonmovingSegment *const filled = alloca->filled;
+ alloca->filled = NULL;
+ if (filled) {
+ struct NonmovingSegment *seg = filled;
+ while (true) {
+ n_filled++;
+ prefetchForRead(seg->link);
+ // Clear bitmap
+ prefetchForWrite(seg->link->bitmap);
+ nonmovingClearBitmap(seg);
+ // Set snapshot
+ seg->next_free_snap = seg->next_free;
+ if (seg->link)
+ seg = seg->link;
+ else
+ break;
+ }
+ // add filled segments to sweep_list
+ seg->link = nonmovingHeap.sweep_list;
+ nonmovingHeap.sweep_list = filled;
}
// N.B. It's not necessary to update snapshot pointers of active segments;
@@ -441,6 +574,12 @@ static void nonmovingPrepareMark(void)
oldest_gen->n_large_blocks = 0;
nonmoving_live_words = 0;
+ // Clear large object bits
+ for (bdescr *bd = nonmoving_large_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_MARKED;
+ }
+
+
#if defined(DEBUG)
debug_caf_list_snapshot = debug_caf_list;
debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
@@ -491,7 +630,6 @@ void nonmovingCollect(StgWeak **dead_weaks, StgTSO **resurrected_threads)
resizeGenerations();
nonmovingPrepareMark();
- nonmovingPrepareSweep();
// N.B. These should have been cleared at the end of the last sweep.
ASSERT(nonmoving_marked_large_objects == NULL);
@@ -688,7 +826,7 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
#if defined(DEBUG)
// Zap CAFs that we will sweep
- nonmovingGcCafs(mark_queue);
+ nonmovingGcCafs();
#endif
ASSERT(mark_queue->top->head == 0);
@@ -735,6 +873,8 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
* Sweep
****************************************************/
+ traceConcSweepBegin();
+
// Because we can't mark large object blocks (no room for mark bit) we
// collect them in a map in mark_queue and we pass it here to sweep large
// objects
@@ -744,6 +884,11 @@ static void nonmovingMark_(MarkQueue *mark_queue, StgWeak **dead_weaks, StgTSO *
nonmovingSweep();
ASSERT(nonmovingHeap.sweep_list == NULL);
debugTrace(DEBUG_nonmoving_gc, "Finished sweeping.");
+ traceConcSweepEnd();
+#if defined(DEBUG)
+ if (RtsFlags.DebugFlags.nonmoving_gc)
+ nonmovingPrintAllocatorCensus();
+#endif
// TODO: Remainder of things done by GarbageCollect (update stats)
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
index 0f9cfa0e48..17adfd6666 100644
--- a/rts/sm/NonMoving.h
+++ b/rts/sm/NonMoving.h
@@ -93,6 +93,10 @@ extern struct NonmovingHeap nonmovingHeap;
extern memcount nonmoving_live_words;
+#if defined(THREADED_RTS)
+extern bool concurrent_coll_running;
+#endif
+
void nonmovingInit(void);
void nonmovingStop(void);
void nonmovingExit(void);
@@ -161,22 +165,34 @@ INLINE_HEADER unsigned int nonmovingSegmentBlockSize(struct NonmovingSegment *se
return 1 << seg->block_size;
}
-// How many blocks does the given segment contain? Also the size of the bitmap.
-INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg)
+// How many blocks does a segment with the given block size have?
+INLINE_HEADER unsigned int nonmovingBlockCount(uint8_t log_block_size)
{
- unsigned int sz = nonmovingSegmentBlockSize(seg);
unsigned int segment_data_size = NONMOVING_SEGMENT_SIZE - sizeof(struct NonmovingSegment);
segment_data_size -= segment_data_size % SIZEOF_VOID_P;
- return segment_data_size / (sz + 1);
+ unsigned int blk_size = 1 << log_block_size;
+ // N.B. +1 accounts for the byte in the mark bitmap.
+ return segment_data_size / (blk_size + 1);
}
-// Get a pointer to the given block index
-INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
+unsigned int nonmovingBlockCountFromSize(uint8_t log_block_size);
+
+// How many blocks does the given segment contain? Also the size of the bitmap.
+INLINE_HEADER unsigned int nonmovingSegmentBlockCount(struct NonmovingSegment *seg)
+{
+ return nonmovingBlockCountFromSize(seg->block_size);
+}
+
+// Get a pointer to the given block index assuming that the block size is as
+// given (avoiding a potential cache miss when this information is already
+// available). The log_block_size argument must be equal to seg->block_size.
+INLINE_HEADER void *nonmovingSegmentGetBlock_(struct NonmovingSegment *seg, uint8_t log_block_size, nonmoving_block_idx i)
{
+ ASSERT(log_block_size == seg->block_size);
// Block size in bytes
- unsigned int blk_size = nonmovingSegmentBlockSize(seg);
+ unsigned int blk_size = 1 << log_block_size;
// Bitmap size in bytes
- W_ bitmap_size = nonmovingSegmentBlockCount(seg) * sizeof(uint8_t);
+ W_ bitmap_size = nonmovingBlockCountFromSize(log_block_size) * sizeof(uint8_t);
// Where the actual data starts (address of the first block).
// Use ROUNDUP_BYTES_TO_WDS to align to word size. Note that
// ROUNDUP_BYTES_TO_WDS returns in _words_, not in _bytes_, so convert it back
@@ -185,15 +201,26 @@ INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmo
return (void*)(data + i*blk_size);
}
+// Get a pointer to the given block index.
+INLINE_HEADER void *nonmovingSegmentGetBlock(struct NonmovingSegment *seg, nonmoving_block_idx i)
+{
+ return nonmovingSegmentGetBlock_(seg, seg->block_size, i);
+}
+
// Get the segment which a closure resides in. Assumes that pointer points into
// non-moving heap.
-INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment(StgPtr p)
+INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment_unchecked(StgPtr p)
{
- ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING));
const uintptr_t mask = ~NONMOVING_SEGMENT_MASK;
return (struct NonmovingSegment *) (((uintptr_t) p) & mask);
}
+INLINE_HEADER struct NonmovingSegment *nonmovingGetSegment(StgPtr p)
+{
+ ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING));
+ return nonmovingGetSegment_unchecked(p);
+}
+
INLINE_HEADER nonmoving_block_idx nonmovingGetBlockIdx(StgPtr p)
{
ASSERT(HEAP_ALLOCED_GC(p) && (Bdescr(p)->flags & BF_NONMOVING));
diff --git a/rts/sm/NonMovingCensus.c b/rts/sm/NonMovingCensus.c
new file mode 100644
index 0000000000..670d51263c
--- /dev/null
+++ b/rts/sm/NonMovingCensus.c
@@ -0,0 +1,129 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator: Accounting census
+ *
+ * This is a simple space accounting census useful for characterising
+ * fragmentation in the nonmoving heap.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "NonMoving.h"
+#include "Trace.h"
+#include "NonMovingCensus.h"
+
+// N.B. This may miss segments in the event of concurrent mutation (e.g. if a
+// mutator retires its current segment to the filled list).
+//
+// all_stopped is whether we can guarantee that all mutators and minor GCs are
+// stopped. In this case is safe to look at active and current segments so we can
+// also collect statistics on live words.
+static inline struct NonmovingAllocCensus
+nonmovingAllocatorCensus_(struct NonmovingAllocator *alloc, bool collect_live_words)
+{
+ struct NonmovingAllocCensus census = {0, 0, 0, 0};
+
+ for (struct NonmovingSegment *seg = alloc->filled;
+ seg != NULL;
+ seg = seg->link)
+ {
+ unsigned int n = nonmovingSegmentBlockCount(seg);
+ census.n_filled_segs++;
+ census.n_live_blocks += n;
+ if (collect_live_words) {
+ for (unsigned int i=0; i < n; i++) {
+ StgClosure *c = (StgClosure *) nonmovingSegmentGetBlock(seg, i);
+ census.n_live_words += closure_sizeW(c);
+ }
+ }
+ }
+
+ for (struct NonmovingSegment *seg = alloc->active;
+ seg != NULL;
+ seg = seg->link)
+ {
+ census.n_active_segs++;
+ unsigned int n = nonmovingSegmentBlockCount(seg);
+ for (unsigned int i=0; i < n; i++) {
+ if (nonmovingGetMark(seg, i)) {
+ StgClosure *c = (StgClosure *) nonmovingSegmentGetBlock(seg, i);
+ if (collect_live_words)
+ census.n_live_words += closure_sizeW(c);
+ census.n_live_blocks++;
+ }
+ }
+ }
+
+ for (unsigned int cap=0; cap < n_capabilities; cap++)
+ {
+ struct NonmovingSegment *seg = alloc->current[cap];
+ unsigned int n = nonmovingSegmentBlockCount(seg);
+ for (unsigned int i=0; i < n; i++) {
+ if (nonmovingGetMark(seg, i)) {
+ StgClosure *c = (StgClosure *) nonmovingSegmentGetBlock(seg, i);
+ if (collect_live_words)
+ census.n_live_words += closure_sizeW(c);
+ census.n_live_blocks++;
+ }
+ }
+ }
+ return census;
+}
+
+/* This must not be used when mutators are active since it assumes that
+ * all blocks in nonmoving heap are valid closures.
+ */
+struct NonmovingAllocCensus
+nonmovingAllocatorCensusWithWords(struct NonmovingAllocator *alloc)
+{
+ return nonmovingAllocatorCensus_(alloc, true);
+}
+
+struct NonmovingAllocCensus
+nonmovingAllocatorCensus(struct NonmovingAllocator *alloc)
+{
+ return nonmovingAllocatorCensus_(alloc, false);
+}
+
+
+void nonmovingPrintAllocatorCensus()
+{
+ if (!RtsFlags.GcFlags.useNonmoving)
+ return;
+
+ for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
+ struct NonmovingAllocCensus census =
+ nonmovingAllocatorCensus(nonmovingHeap.allocators[i]);
+
+ uint32_t blk_size = 1 << (i + NONMOVING_ALLOCA0);
+ // We define occupancy as the fraction of space that is used for useful
+ // data (that is, live and not slop).
+ double occupancy = 100.0 * census.n_live_words * sizeof(W_)
+ / (census.n_live_blocks * blk_size);
+ if (census.n_live_blocks == 0) occupancy = 100;
+ (void) occupancy; // silence warning if !DEBUG
+ debugTrace(DEBUG_nonmoving_gc, "Allocator %d (%d bytes - %d bytes): "
+ "%d active segs, %d filled segs, %d live blocks, %d live words "
+ "(%2.1f%% occupancy)",
+ i, 1 << (i + NONMOVING_ALLOCA0 - 1), 1 << (i + NONMOVING_ALLOCA0),
+ census.n_active_segs, census.n_filled_segs, census.n_live_blocks, census.n_live_words,
+ occupancy);
+ }
+}
+
+void nonmovingTraceAllocatorCensus()
+{
+#if defined(TRACING)
+ if (!RtsFlags.GcFlags.useNonmoving && !TRACE_nonmoving_gc)
+ return;
+
+ for (int i=0; i < NONMOVING_ALLOCA_CNT; i++) {
+ const struct NonmovingAllocCensus census =
+ nonmovingAllocatorCensus(nonmovingHeap.allocators[i]);
+ const uint32_t log_blk_size = i + NONMOVING_ALLOCA0;
+ traceNonmovingHeapCensus(log_blk_size, &census);
+ }
+#endif
+}
diff --git a/rts/sm/NonMovingCensus.h b/rts/sm/NonMovingCensus.h
new file mode 100644
index 0000000000..7a66dc9b69
--- /dev/null
+++ b/rts/sm/NonMovingCensus.h
@@ -0,0 +1,28 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2018
+ *
+ * Non-moving garbage collector and allocator: Accounting census
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "NonMoving.h"
+
+struct NonmovingAllocCensus {
+ uint32_t n_active_segs;
+ uint32_t n_filled_segs;
+ uint32_t n_live_blocks;
+ uint32_t n_live_words;
+};
+
+
+struct NonmovingAllocCensus
+nonmovingAllocatorCensusWithWords(struct NonmovingAllocator *alloc);
+
+struct NonmovingAllocCensus
+nonmovingAllocatorCensus(struct NonmovingAllocator *alloc);
+
+void nonmovingPrintAllocatorCensus(void);
+void nonmovingTraceAllocatorCensus(void);
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 72a3ca69cb..bb5d72bbf1 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -131,6 +131,49 @@ StgIndStatic *debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
* The mark phase is responsible for freeing update remembered set block
* allocations.
*
+ * Note that we eagerly flush update remembered sets during minor GCs as
+ * described in Note [Eager update remembered set flushing].
+ *
+ *
+ * Note [Eager update remembered set flushing]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * We eagerly flush update remembered sets during minor GCs to avoid scenarios
+ * like the following which could result in long sync pauses:
+ *
+ * 1. We start a major GC, all thread stacks are added to the mark queue.
+ * 2. The concurrent mark thread starts.
+ * 3. The mutator is allowed to resume. One mutator thread T is scheduled and marks its
+ * stack to its local update remembered set.
+ * 4. The mark thread eventually encounters the mutator thread's stack but
+ * sees that it has already been marked; skips it.
+ * 5. Thread T continues running but does not push enough to its update
+ * remembered set to require a flush.
+ * 6. Eventually the mark thread finished marking and requests a final sync.
+ * 7. The thread T flushes its update remembered set.
+ * 8. We find that a large fraction of the heap (namely the subset that is
+ * only reachable from the thread T's stack) needs to be marked, incurring
+ * a large sync pause
+ *
+ * We avoid this by periodically (during minor GC) forcing a flush of the
+ * update remembered set.
+ *
+ * A better (but more complex) approach that would be worthwhile trying in the
+ * future would be to rather do the following:
+ *
+ * 1. Concurrent mark phase is on-going
+ * 2. Mark thread runs out of things to mark
+ * 3. Mark thread sends a signal to capabilities requesting that they send
+ * their update remembered sets without suspending their execution
+ * 4. The mark thread marks everything it was sent; runs out of things to mark
+ * 5. Mark thread initiates a sync
+ * 6. Capabilities send their final update remembered sets and suspend execution
+ * 7. Mark thread marks everything is was sent
+ * 8. Mark thead allows capabilities to resume.
+ *
+ * However, this is obviously a fair amount of complexity and so far the
+ * periodic eager flushing approach has been sufficient.
+ *
*
* Note [Concurrent read barrier on deRefWeak#]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -205,7 +248,7 @@ static void init_mark_queue_(MarkQueue *queue);
* Really the argument type should be UpdRemSet* but this would be rather
* inconvenient without polymorphism.
*/
-static void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
+void nonmovingAddUpdRemSetBlocks(MarkQueue *rset)
{
if (markQueueIsEmpty(rset)) return;
@@ -238,6 +281,7 @@ void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
debugTrace(DEBUG_nonmoving_gc,
"Capability %d flushing update remembered set: %d",
cap->no, markQueueLength(&cap->upd_rem_set.queue));
+ traceConcUpdRemSetFlush(cap);
nonmovingAddUpdRemSetBlocks(&cap->upd_rem_set.queue);
atomic_inc(&upd_rem_set_flush_count, 1);
signalCondition(&upd_rem_set_flushed_cond);
@@ -251,6 +295,7 @@ void nonmovingFlushCapUpdRemSetBlocks(Capability *cap)
void nonmovingBeginFlush(Task *task)
{
debugTrace(DEBUG_nonmoving_gc, "Starting update remembered set flush...");
+ traceConcSyncBegin();
upd_rem_set_flush_count = 0;
stopAllCapabilitiesWith(NULL, task, SYNC_FLUSH_UPD_REM_SET);
@@ -342,6 +387,7 @@ void nonmovingFinishFlush(Task *task)
upd_rem_set_block_list = NULL;
debugTrace(DEBUG_nonmoving_gc, "Finished update remembered set flush...");
+ traceConcSyncEnd();
releaseAllCapabilities(n_capabilities, NULL, task);
}
#endif
@@ -361,7 +407,7 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
} else {
// allocate a fresh block.
ACQUIRE_SM_LOCK;
- bdescr *bd = allocGroup(1);
+ bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
bd->link = q->blocks;
q->blocks = bd;
q->top = (MarkQueueBlock *) bd->start;
@@ -374,16 +420,49 @@ push (MarkQueue *q, const MarkQueueEnt *ent)
q->top->head++;
}
+/* A variant of push to be used by the minor GC when it encounters a reference
+ * to an object in the non-moving heap. In contrast to the other push
+ * operations this uses the gc_alloc_block_sync spinlock instead of the
+ * SM_LOCK to allocate new blocks in the event that the mark queue is full.
+ */
+void
+markQueuePushClosureGC (MarkQueue *q, StgClosure *p)
+{
+ /* We should not make it here if we are doing a deadlock detect GC.
+ * See Note [Deadlock detection under nonmoving collector].
+ */
+ ASSERT(!deadlock_detect_gc);
+
+ // Are we at the end of the block?
+ if (q->top->head == MARK_QUEUE_BLOCK_ENTRIES) {
+ // Yes, this block is full.
+ // allocate a fresh block.
+ ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
+ bd->link = q->blocks;
+ q->blocks = bd;
+ q->top = (MarkQueueBlock *) bd->start;
+ q->top->head = 0;
+ RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ }
+
+ MarkQueueEnt ent = {
+ .mark_closure = {
+ .p = UNTAG_CLOSURE(p),
+ .origin = NULL,
+ }
+ };
+ q->top->entries[q->top->head] = ent;
+ q->top->head++;
+}
+
static inline
void push_closure (MarkQueue *q,
StgClosure *p,
StgClosure **origin)
{
- // TODO: Push this into callers where they already have the Bdescr
- if (HEAP_ALLOCED_GC(p) && (Bdescr((StgPtr) p)->gen != oldest_gen))
- return;
-
#if defined(DEBUG)
+ ASSERT(!HEAP_ALLOCED_GC(p) || (Bdescr((StgPtr) p)->gen == oldest_gen));
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
// Commenting out: too slow
// if (RtsFlags.DebugFlags.sanity) {
@@ -393,8 +472,12 @@ void push_closure (MarkQueue *q,
// }
#endif
+ // This must be true as origin points to a pointer and therefore must be
+ // word-aligned. However, we check this as otherwise we would confuse this
+ // with a mark_array entry
+ ASSERT(((uintptr_t) origin & 0x3) == 0);
+
MarkQueueEnt ent = {
- .type = MARK_CLOSURE,
.mark_closure = {
.p = UNTAG_CLOSURE(p),
.origin = origin,
@@ -413,10 +496,9 @@ void push_array (MarkQueue *q,
return;
MarkQueueEnt ent = {
- .type = MARK_ARRAY,
.mark_array = {
.array = array,
- .start_index = start_index
+ .start_index = (start_index << 16) | 0x3,
}
};
push(q, &ent);
@@ -493,15 +575,11 @@ void updateRemembSetPushThunkEager(Capability *cap,
MarkQueue *queue = &cap->upd_rem_set.queue;
push_thunk_srt(queue, &info->i);
- // Don't record the origin of objects living outside of the nonmoving
- // heap; we can't perform the selector optimisation on them anyways.
- bool record_origin = check_in_nonmoving_heap((StgClosure*)thunk);
-
for (StgWord i = 0; i < info->i.layout.payload.ptrs; i++) {
if (check_in_nonmoving_heap(thunk->payload[i])) {
- push_closure(queue,
- thunk->payload[i],
- record_origin ? &thunk->payload[i] : NULL);
+ // Don't bother to push origin; it makes the barrier needlessly
+ // expensive with little benefit.
+ push_closure(queue, thunk->payload[i], NULL);
}
}
break;
@@ -510,7 +588,9 @@ void updateRemembSetPushThunkEager(Capability *cap,
{
MarkQueue *queue = &cap->upd_rem_set.queue;
StgAP *ap = (StgAP *) thunk;
- push_closure(queue, ap->fun, &ap->fun);
+ if (check_in_nonmoving_heap(ap->fun)) {
+ push_closure(queue, ap->fun, NULL);
+ }
mark_PAP_payload(queue, ap->fun, ap->payload, ap->n_args);
break;
}
@@ -531,9 +611,10 @@ void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p)
inline void updateRemembSetPushClosure(Capability *cap, StgClosure *p)
{
- if (!check_in_nonmoving_heap(p)) return;
- MarkQueue *queue = &cap->upd_rem_set.queue;
- push_closure(queue, p, NULL);
+ if (check_in_nonmoving_heap(p)) {
+ MarkQueue *queue = &cap->upd_rem_set.queue;
+ push_closure(queue, p, NULL);
+ }
}
void updateRemembSetPushClosure_(StgRegTable *reg, struct StgClosure_ *p)
@@ -630,7 +711,10 @@ void markQueuePushClosure (MarkQueue *q,
StgClosure *p,
StgClosure **origin)
{
- push_closure(q, p, origin);
+ // TODO: Push this into callers where they already have the Bdescr
+ if (check_in_nonmoving_heap(p)) {
+ push_closure(q, p, origin);
+ }
}
/* TODO: Do we really never want to specify the origin here? */
@@ -667,7 +751,7 @@ void markQueuePushArray (MarkQueue *q,
*********************************************************/
// Returns invalid MarkQueueEnt if queue is empty.
-static MarkQueueEnt markQueuePop (MarkQueue *q)
+static MarkQueueEnt markQueuePop_ (MarkQueue *q)
{
MarkQueueBlock *top;
@@ -679,7 +763,7 @@ again:
// Is this the first block of the queue?
if (q->blocks->link == NULL) {
// Yes, therefore queue is empty...
- MarkQueueEnt none = { .type = NULL_ENTRY };
+ MarkQueueEnt none = { .null_entry = { .p = NULL } };
return none;
} else {
// No, unwind to the previous block and try popping again...
@@ -698,6 +782,47 @@ again:
return ent;
}
+static MarkQueueEnt markQueuePop (MarkQueue *q)
+{
+#if MARK_PREFETCH_QUEUE_DEPTH == 0
+ return markQueuePop_(q);
+#else
+ unsigned int i = q->prefetch_head;
+ while (nonmovingMarkQueueEntryType(&q->prefetch_queue[i]) == NULL_ENTRY) {
+ MarkQueueEnt new = markQueuePop_(q);
+ if (nonmovingMarkQueueEntryType(&new) == NULL_ENTRY) {
+ // Mark queue is empty; look for any valid entries in the prefetch
+ // queue
+ for (unsigned int j = (i+1) % MARK_PREFETCH_QUEUE_DEPTH;
+ j != i;
+ j = (j+1) % MARK_PREFETCH_QUEUE_DEPTH)
+ {
+ if (nonmovingMarkQueueEntryType(&q->prefetch_queue[j]) != NULL_ENTRY) {
+ i = j;
+ goto done;
+ }
+ }
+ return new;
+ }
+
+ // The entry may not be a MARK_CLOSURE but it doesn't matter, our
+ // MarkQueueEnt encoding always places the pointer to the object to be
+ // marked first.
+ prefetchForRead(&new.mark_closure.p->header.info);
+ prefetchForRead(&nonmovingGetSegment_unchecked((StgPtr) new.mark_closure.p)->block_size);
+ q->prefetch_queue[i] = new;
+ i = (i + 1) % MARK_PREFETCH_QUEUE_DEPTH;
+ }
+
+ done:
+ ;
+ MarkQueueEnt ret = q->prefetch_queue[i];
+ q->prefetch_queue[i].null_entry.p = NULL;
+ q->prefetch_head = i;
+ return ret;
+#endif
+}
+
/*********************************************************
* Creating and destroying MarkQueues and UpdRemSets
*********************************************************/
@@ -705,17 +830,20 @@ again:
/* Must hold sm_mutex. */
static void init_mark_queue_ (MarkQueue *queue)
{
- bdescr *bd = allocGroup(1);
+ bdescr *bd = allocGroup(MARK_QUEUE_BLOCKS);
queue->blocks = bd;
queue->top = (MarkQueueBlock *) bd->start;
queue->top->head = 0;
+#if MARK_PREFETCH_QUEUE_DEPTH > 0
+ memset(&queue->prefetch_queue, 0, sizeof(queue->prefetch_queue));
+ queue->prefetch_head = 0;
+#endif
}
/* Must hold sm_mutex. */
void initMarkQueue (MarkQueue *queue)
{
init_mark_queue_(queue);
- queue->marked_objects = allocHashTable();
queue->is_upd_rem_set = false;
}
@@ -723,8 +851,6 @@ void initMarkQueue (MarkQueue *queue)
void init_upd_rem_set (UpdRemSet *rset)
{
init_mark_queue_(&rset->queue);
- // Update remembered sets don't have to worry about static objects
- rset->queue.marked_objects = NULL;
rset->queue.is_upd_rem_set = true;
}
@@ -739,7 +865,6 @@ void reset_upd_rem_set (UpdRemSet *rset)
void freeMarkQueue (MarkQueue *queue)
{
freeChain_lock(queue->blocks);
- freeHashTable(queue->marked_objects, NULL);
}
#if defined(THREADED_RTS) && defined(DEBUG)
@@ -986,12 +1111,32 @@ mark_stack (MarkQueue *queue, StgStack *stack)
mark_stack_(queue, stack->sp, stack->stack + stack->stack_size);
}
+/* See Note [Static objects under the nonmoving collector].
+ *
+ * Returns true if the object needs to be marked.
+ */
+static bool
+bump_static_flag(StgClosure **link_field, StgClosure *q STG_UNUSED)
+{
+ while (1) {
+ StgWord link = (StgWord) *link_field;
+ StgWord new = (link & ~STATIC_BITS) | static_flag;
+ if ((link & STATIC_BITS) == static_flag)
+ return false;
+ else if (cas((StgVolatilePtr) link_field, link, new) == link) {
+ return true;
+ }
+ }
+}
+
static GNUC_ATTR_HOT void
mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
{
(void)origin; // TODO: should be used for selector/thunk optimisations
try_again:
+ ;
+ bdescr *bd = NULL;
p = UNTAG_CLOSURE(p);
# define PUSH_FIELD(obj, field) \
@@ -1009,45 +1154,46 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
return;
}
- if (lookupHashTable(queue->marked_objects, (W_)p)) {
- // already marked
- return;
- }
-
- insertHashTable(queue->marked_objects, (W_)p, (P_)1);
-
switch (type) {
case THUNK_STATIC:
if (info->srt != 0) {
- markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above
+ if (bump_static_flag(THUNK_STATIC_LINK((StgClosure *)p), p)) {
+ markQueuePushThunkSrt(queue, info); // TODO this function repeats the check above
+ }
}
return;
case FUN_STATIC:
if (info->srt != 0 || info->layout.payload.ptrs != 0) {
- markQueuePushFunSrt(queue, info); // TODO this function repeats the check above
-
- // a FUN_STATIC can also be an SRT, so it may have pointer
- // fields. See Note [SRTs] in CmmBuildInfoTables, specifically
- // the [FUN] optimisation.
- // TODO (osa) I don't understand this comment
- for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
- PUSH_FIELD(p, payload[i]);
+ if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) {
+ markQueuePushFunSrt(queue, info); // TODO this function repeats the check above
+
+ // a FUN_STATIC can also be an SRT, so it may have pointer
+ // fields. See Note [SRTs] in CmmBuildInfoTables, specifically
+ // the [FUN] optimisation.
+ // TODO (osa) I don't understand this comment
+ for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
+ PUSH_FIELD(p, payload[i]);
+ }
}
}
return;
case IND_STATIC:
- PUSH_FIELD((StgInd *) p, indirectee);
+ if (bump_static_flag(IND_STATIC_LINK((StgClosure *)p), p)) {
+ PUSH_FIELD((StgInd *) p, indirectee);
+ }
return;
case CONSTR:
case CONSTR_1_0:
case CONSTR_2_0:
case CONSTR_1_1:
- for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
- PUSH_FIELD(p, payload[i]);
+ if (bump_static_flag(STATIC_LINK(info, (StgClosure *)p), p)) {
+ for (StgHalfWord i = 0; i < info->layout.payload.ptrs; ++i) {
+ PUSH_FIELD(p, payload[i]);
+ }
}
return;
@@ -1061,19 +1207,17 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
}
}
- bdescr *bd = Bdescr((StgPtr) p);
+ bd = Bdescr((StgPtr) p);
if (bd->gen != oldest_gen) {
- // Here we have an object living outside of the non-moving heap. Since
- // we moved everything to the non-moving heap before starting the major
- // collection, we know that we don't need to trace it: it was allocated
- // after we took our snapshot.
-#if !defined(THREADED_RTS)
- // This should never happen in the non-concurrent case
- barf("Closure outside of non-moving heap: %p", p);
-#else
+ // Here we have an object living outside of the non-moving heap. While
+ // we likely evacuated nearly everything to the nonmoving heap during
+ // preparation there are nevertheless a few ways in which we might trace
+ // a reference into younger generations:
+ //
+ // * a mutable object might have been updated
+ // * we might have aged an object
return;
-#endif
}
ASSERTM(LOOKS_LIKE_CLOSURE_PTR(p), "invalid closure, info=%p", p->header.info);
@@ -1434,19 +1578,20 @@ mark_closure (MarkQueue *queue, StgClosure *p, StgClosure **origin)
GNUC_ATTR_HOT void
nonmovingMark (MarkQueue *queue)
{
+ traceConcMarkBegin();
debugTrace(DEBUG_nonmoving_gc, "Starting mark pass");
unsigned int count = 0;
while (true) {
count++;
MarkQueueEnt ent = markQueuePop(queue);
- switch (ent.type) {
+ switch (nonmovingMarkQueueEntryType(&ent)) {
case MARK_CLOSURE:
mark_closure(queue, ent.mark_closure.p, ent.mark_closure.origin);
break;
case MARK_ARRAY: {
const StgMutArrPtrs *arr = ent.mark_array.array;
- StgWord start = ent.mark_array.start_index;
+ StgWord start = ent.mark_array.start_index >> 16;
StgWord end = start + MARK_ARRAY_CHUNK_LENGTH;
if (end < arr->ptrs) {
markQueuePushArray(queue, ent.mark_array.array, end);
@@ -1474,6 +1619,7 @@ nonmovingMark (MarkQueue *queue)
} else {
// Nothing more to do
debugTrace(DEBUG_nonmoving_gc, "Finished mark pass: %d", count);
+ traceConcMarkEnd(count);
return;
}
}
@@ -1691,13 +1837,17 @@ void nonmovingResurrectThreads (struct MarkQueue_ *queue, StgTSO **resurrected_t
void printMarkQueueEntry (MarkQueueEnt *ent)
{
- if (ent->type == MARK_CLOSURE) {
+ switch(nonmovingMarkQueueEntryType(ent)) {
+ case MARK_CLOSURE:
debugBelch("Closure: ");
printClosure(ent->mark_closure.p);
- } else if (ent->type == MARK_ARRAY) {
+ break;
+ case MARK_ARRAY:
debugBelch("Array\n");
- } else {
+ break;
+ case NULL_ENTRY:
debugBelch("End of mark\n");
+ break;
}
}
diff --git a/rts/sm/NonMovingMark.h b/rts/sm/NonMovingMark.h
index 84b6642d6c..806776cdc5 100644
--- a/rts/sm/NonMovingMark.h
+++ b/rts/sm/NonMovingMark.h
@@ -43,21 +43,40 @@ enum EntryType {
*/
typedef struct {
- enum EntryType type;
- // All pointers should be untagged
+ // Which kind of mark queue entry we have is determined by the low bits of
+ // the second word: they must be zero in the case of a mark_closure entry
+ // (since the second word of a mark_closure entry points to a pointer and
+ // pointers must be word-aligned). In the case of a mark_array we set them
+ // to 0x3 (the value of start_index is shifted to the left to accomodate
+ // this). null_entry where p==NULL is used to indicate the end of the queue.
union {
struct {
+ void *p; // must be NULL
+ } null_entry;
+ struct {
StgClosure *p; // the object to be marked
StgClosure **origin; // field where this reference was found.
// See Note [Origin references in the nonmoving collector]
} mark_closure;
struct {
const StgMutArrPtrs *array;
- StgWord start_index;
+ StgWord start_index; // start index is shifted to the left by 16 bits
} mark_array;
};
} MarkQueueEnt;
+INLINE_HEADER enum EntryType nonmovingMarkQueueEntryType(MarkQueueEnt *ent)
+{
+ if (ent->null_entry.p == NULL) {
+ return NULL_ENTRY;
+ } else if (((uintptr_t) ent->mark_closure.origin & TAG_BITS) == 0) {
+ return MARK_CLOSURE;
+ } else {
+ ASSERT((ent->mark_array.start_index & TAG_BITS) == 0x3);
+ return MARK_ARRAY;
+ }
+}
+
typedef struct {
// index of first *unused* queue entry
uint32_t head;
@@ -65,6 +84,9 @@ typedef struct {
MarkQueueEnt entries[];
} MarkQueueBlock;
+// How far ahead in mark queue to prefetch?
+#define MARK_PREFETCH_QUEUE_DEPTH 5
+
/* The mark queue is not capable of concurrent read or write.
*
* invariants:
@@ -83,9 +105,12 @@ typedef struct MarkQueue_ {
// Is this a mark queue or a capability-local update remembered set?
bool is_upd_rem_set;
- // Marked objects outside of nonmoving heap, namely large and static
- // objects.
- HashTable *marked_objects;
+#if MARK_PREFETCH_QUEUE_DEPTH > 0
+ // A ring-buffer of entries which we will mark next
+ MarkQueueEnt prefetch_queue[MARK_PREFETCH_QUEUE_DEPTH];
+ // The first free slot in prefetch_queue.
+ uint8_t prefetch_head;
+#endif
} MarkQueue;
/* While it shares its representation with MarkQueue, UpdRemSet differs in
@@ -97,8 +122,11 @@ typedef struct {
MarkQueue queue;
} UpdRemSet;
+// Number of blocks to allocate for a mark queue
+#define MARK_QUEUE_BLOCKS 16
+
// The length of MarkQueueBlock.entries
-#define MARK_QUEUE_BLOCK_ENTRIES ((BLOCK_SIZE - sizeof(MarkQueueBlock)) / sizeof(MarkQueueEnt))
+#define MARK_QUEUE_BLOCK_ENTRIES ((MARK_QUEUE_BLOCKS * BLOCK_SIZE - sizeof(MarkQueueBlock)) / sizeof(MarkQueueEnt))
extern bdescr *nonmoving_large_objects, *nonmoving_marked_large_objects;
extern memcount n_nonmoving_large_blocks, n_nonmoving_marked_large_blocks;
@@ -145,8 +173,10 @@ void nonmovingResurrectThreads(struct MarkQueue_ *queue, StgTSO **resurrected_th
bool nonmovingIsAlive(StgClosure *p);
void nonmovingMarkDeadWeak(struct MarkQueue_ *queue, StgWeak *w);
void nonmovingMarkLiveWeak(struct MarkQueue_ *queue, StgWeak *w);
+void nonmovingAddUpdRemSetBlocks(struct MarkQueue_ *rset);
void markQueuePush(MarkQueue *q, const MarkQueueEnt *ent);
+void markQueuePushClosureGC(MarkQueue *q, StgClosure *p);
void markQueuePushClosure(MarkQueue *q,
StgClosure *p,
StgClosure **origin);
diff --git a/rts/sm/NonMovingScav.c b/rts/sm/NonMovingScav.c
index 850750b43b..0efbde688c 100644
--- a/rts/sm/NonMovingScav.c
+++ b/rts/sm/NonMovingScav.c
@@ -16,6 +16,7 @@ nonmovingScavengeOne (StgClosure *q)
ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
StgPtr p = (StgPtr)q;
const StgInfoTable *info = get_itbl(q);
+ const bool saved_eager_promotion = gct->eager_promotion;
switch (info->type) {
@@ -23,9 +24,11 @@ nonmovingScavengeOne (StgClosure *q)
case MVAR_DIRTY:
{
StgMVar *mvar = ((StgMVar *)p);
+ gct->eager_promotion = false;
evacuate((StgClosure **)&mvar->head);
evacuate((StgClosure **)&mvar->tail);
evacuate((StgClosure **)&mvar->value);
+ gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
mvar->header.info = &stg_MVAR_DIRTY_info;
} else {
@@ -37,8 +40,10 @@ nonmovingScavengeOne (StgClosure *q)
case TVAR:
{
StgTVar *tvar = ((StgTVar *)p);
+ gct->eager_promotion = false;
evacuate((StgClosure **)&tvar->current_value);
evacuate((StgClosure **)&tvar->first_watch_queue_entry);
+ gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
tvar->header.info = &stg_TVAR_DIRTY_info;
} else {
@@ -122,10 +127,21 @@ nonmovingScavengeOne (StgClosure *q)
break;
}
+ case WEAK:
+ {
+ // We must evacuate the key since it may refer to an object in the
+ // moving heap which may be long gone by the time we call
+ // nonmovingTidyWeaks.
+ StgWeak *weak = (StgWeak *) p;
+ gct->eager_promotion = true;
+ evacuate(&weak->key);
+ gct->eager_promotion = saved_eager_promotion;
+ goto gen_obj;
+ }
+
gen_obj:
case CONSTR:
case CONSTR_NOCAF:
- case WEAK:
case PRIM:
{
StgPtr end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
@@ -145,7 +161,9 @@ nonmovingScavengeOne (StgClosure *q)
case MUT_VAR_CLEAN:
case MUT_VAR_DIRTY:
+ gct->eager_promotion = false;
evacuate(&((StgMutVar *)p)->var);
+ gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
} else {
@@ -157,10 +175,12 @@ nonmovingScavengeOne (StgClosure *q)
{
StgBlockingQueue *bq = (StgBlockingQueue *)p;
+ gct->eager_promotion = false;
evacuate(&bq->bh);
evacuate((StgClosure**)&bq->owner);
evacuate((StgClosure**)&bq->queue);
evacuate((StgClosure**)&bq->link);
+ gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
@@ -202,11 +222,9 @@ nonmovingScavengeOne (StgClosure *q)
case MUT_ARR_PTRS_CLEAN:
case MUT_ARR_PTRS_DIRTY:
{
- // We don't eagerly promote objects pointed to by a mutable
- // array, but if we find the array only points to objects in
- // the same or an older generation, we mark it "clean" and
- // avoid traversing it during minor GCs.
+ gct->eager_promotion = false;
scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
+ gct->eager_promotion = saved_eager_promotion;
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
} else {
@@ -234,14 +252,13 @@ nonmovingScavengeOne (StgClosure *q)
case SMALL_MUT_ARR_PTRS_DIRTY:
// follow everything
{
- // We don't eagerly promote objects pointed to by a mutable
- // array, but if we find the array only points to objects in
- // the same or an older generation, we mark it "clean" and
- // avoid traversing it during minor GCs.
StgPtr next = p + small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p);
+ gct->eager_promotion = false;
for (p = (P_)((StgSmallMutArrPtrs *)p)->payload; p < next; p++) {
evacuate((StgClosure **)p);
}
+ gct->eager_promotion = saved_eager_promotion;
+
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
} else {
@@ -278,21 +295,21 @@ nonmovingScavengeOne (StgClosure *q)
{
StgStack *stack = (StgStack*)p;
+ gct->eager_promotion = false;
scavenge_stack(stack->sp, stack->stack + stack->stack_size);
+ gct->eager_promotion = saved_eager_promotion;
stack->dirty = gct->failed_to_evac;
- // TODO (osa): There may be something special about stacks that we're
- // missing. All other mut objects are marked by using a different info
- // table except stacks.
-
break;
}
case MUT_PRIM:
{
StgPtr end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ gct->eager_promotion = false;
for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
evacuate((StgClosure **)p);
}
+ gct->eager_promotion = saved_eager_promotion;
gct->failed_to_evac = true; // mutable
break;
}
@@ -302,12 +319,14 @@ nonmovingScavengeOne (StgClosure *q)
StgWord i;
StgTRecChunk *tc = ((StgTRecChunk *) p);
TRecEntry *e = &(tc -> entries[0]);
+ gct->eager_promotion = false;
evacuate((StgClosure **)&tc->prev_chunk);
for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
evacuate((StgClosure **)&e->tvar);
evacuate((StgClosure **)&e->expected_value);
evacuate((StgClosure **)&e->new_value);
}
+ gct->eager_promotion = saved_eager_promotion;
gct->failed_to_evac = true; // mutable
break;
}
diff --git a/rts/sm/NonMovingSweep.c b/rts/sm/NonMovingSweep.c
index fa3e38cca4..7af5508afc 100644
--- a/rts/sm/NonMovingSweep.c
+++ b/rts/sm/NonMovingSweep.c
@@ -17,38 +17,6 @@
#include "Trace.h"
#include "StableName.h"
-static struct NonmovingSegment *pop_all_filled_segments(struct NonmovingAllocator *alloc)
-{
- while (true) {
- struct NonmovingSegment *head = alloc->filled;
- if (cas((StgVolatilePtr) &alloc->filled, (StgWord) head, (StgWord) NULL) == (StgWord) head)
- return head;
- }
-}
-
-void nonmovingPrepareSweep()
-{
- ASSERT(nonmovingHeap.sweep_list == NULL);
-
- // Move blocks in the allocators' filled lists into sweep_list
- for (unsigned int alloc_idx = 0; alloc_idx < NONMOVING_ALLOCA_CNT; alloc_idx++)
- {
- struct NonmovingAllocator *alloc = nonmovingHeap.allocators[alloc_idx];
- struct NonmovingSegment *filled = pop_all_filled_segments(alloc);
-
- // Link filled to sweep_list
- if (filled) {
- struct NonmovingSegment *filled_head = filled;
- // Find end of filled list
- while (filled->link) {
- filled = filled->link;
- }
- filled->link = nonmovingHeap.sweep_list;
- nonmovingHeap.sweep_list = filled_head;
- }
- }
-}
-
// On which list should a particular segment be placed?
enum SweepResult {
SEGMENT_FREE, // segment is empty: place on free list
@@ -102,7 +70,7 @@ nonmovingSweepSegment(struct NonmovingSegment *seg)
#if defined(DEBUG)
-void nonmovingGcCafs(struct MarkQueue_ *queue)
+void nonmovingGcCafs()
{
uint32_t i = 0;
StgIndStatic *next;
@@ -116,7 +84,8 @@ void nonmovingGcCafs(struct MarkQueue_ *queue)
const StgInfoTable *info = get_itbl((StgClosure*)caf);
ASSERT(info->type == IND_STATIC);
- if (lookupHashTable(queue->marked_objects, (StgWord) caf) == NULL) {
+ StgWord flag = ((StgWord) caf->static_link) & STATIC_BITS;
+ if (flag != 0 && flag != static_flag) {
debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", caf);
SET_INFO((StgClosure*)caf, &stg_GCD_CAF_info); // stub it
} else {
@@ -184,6 +153,126 @@ GNUC_ATTR_HOT void nonmovingSweep(void)
}
}
+/* Must a closure remain on the mutable list?
+ *
+ * A closure must remain if any of the following applies:
+ *
+ * 1. it contains references to a younger generation
+ * 2. it's a mutable closure (e.g. mutable array or MUT_PRIM)
+ */
+static bool is_closure_clean(StgClosure *p)
+{
+ const StgInfoTable *info = get_itbl(p);
+
+#define CLEAN(ptr) (!HEAP_ALLOCED((StgClosure*) ptr) || Bdescr((StgPtr) ptr)->gen == oldest_gen)
+
+ switch (info->type) {
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ if (!CLEAN(mvar->head)) goto dirty_MVAR;
+ if (!CLEAN(mvar->tail)) goto dirty_MVAR;
+ if (!CLEAN(mvar->value)) goto dirty_MVAR;
+ mvar->header.info = &stg_MVAR_CLEAN_info;
+ return true;
+
+dirty_MVAR:
+ mvar->header.info = &stg_MVAR_DIRTY_info;
+ return false;
+ }
+
+ case TVAR:
+ {
+ StgTVar *tvar = ((StgTVar *)p);
+ if (!CLEAN(tvar->current_value)) goto dirty_TVAR;
+ if (!CLEAN(tvar->first_watch_queue_entry)) goto dirty_TVAR;
+ tvar->header.info = &stg_TVAR_CLEAN_info;
+ return true;
+
+dirty_TVAR:
+ tvar->header.info = &stg_TVAR_DIRTY_info;
+ return false;
+ }
+
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ {
+ StgPtr end = (StgPtr)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+ for (StgPtr q = (StgPtr)((StgThunk *)p)->payload; q < end; q++) {
+ if (!CLEAN(*q)) return false;
+ }
+ return true;
+ }
+
+ case FUN:
+ case FUN_1_0: // hardly worth specialising these guys
+ case FUN_0_1:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_2_0:
+ case CONSTR:
+ case CONSTR_NOCAF:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_2_0:
+ case PRIM:
+ {
+ StgPtr end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (StgPtr q = (StgPtr)((StgClosure *)p)->payload; q < end; q++) {
+ if (!CLEAN(*q)) return false;
+ }
+ return true;
+ }
+
+ case WEAK:
+ return false; // TODO
+
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ if (!CLEAN(((StgMutVar *)p)->var)) {
+ p->header.info = &stg_MUT_VAR_DIRTY_info;
+ return false;
+ } else {
+ p->header.info = &stg_MUT_VAR_CLEAN_info;
+ return true;
+ }
+
+ case BLOCKING_QUEUE:
+ {
+ StgBlockingQueue *bq = (StgBlockingQueue *)p;
+
+ if (!CLEAN(bq->bh)) goto dirty_BLOCKING_QUEUE;
+ if (!CLEAN(bq->owner)) goto dirty_BLOCKING_QUEUE;
+ if (!CLEAN(bq->queue)) goto dirty_BLOCKING_QUEUE;
+ if (!CLEAN(bq->link)) goto dirty_BLOCKING_QUEUE;
+ bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info;
+ return true;
+
+dirty_BLOCKING_QUEUE:
+ bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
+ return false;
+ }
+
+ case THUNK_SELECTOR:
+ return CLEAN(((StgSelector *) p)->selectee);
+
+ case ARR_WORDS:
+ return true;
+
+ default:
+ // TODO: the rest
+ return false;
+ }
+#undef CLEAN
+}
+
/* N.B. This happens during the pause so we own all capabilities. */
void nonmovingSweepMutLists()
{
@@ -194,7 +283,7 @@ void nonmovingSweepMutLists()
for (bdescr *bd = old_mut_list; bd; bd = bd->link) {
for (StgPtr p = bd->start; p < bd->free; p++) {
StgClosure **q = (StgClosure**)p;
- if (nonmovingIsAlive(*q)) {
+ if (nonmovingIsAlive(*q) && !is_closure_clean(*q)) {
recordMutableCap(*q, cap, oldest_gen->no);
}
}
diff --git a/rts/sm/NonMovingSweep.h b/rts/sm/NonMovingSweep.h
index de2d52acb5..5ae5b687e3 100644
--- a/rts/sm/NonMovingSweep.h
+++ b/rts/sm/NonMovingSweep.h
@@ -22,11 +22,7 @@ void nonmovingSweepLargeObjects(void);
// Remove dead entries in the stable name table
void nonmovingSweepStableNameTable(void);
-// Collect the set of segments to be collected during a major GC into
-// nonmovingHeap.sweep_list.
-void nonmovingPrepareSweep(void);
-
#if defined(DEBUG)
// The non-moving equivalent of the moving collector's gcCAFs.
-void nonmovingGcCafs(struct MarkQueue_ *queue);
+void nonmovingGcCafs(void);
#endif
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 44163a353f..f04b3c5929 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -321,7 +321,8 @@ freeStorage (bool free_heap)
}
/* -----------------------------------------------------------------------------
- Note [CAF management].
+ Note [CAF management]
+ ~~~~~~~~~~~~~~~~~~~~~
The entry code for every CAF does the following:
@@ -356,6 +357,7 @@ freeStorage (bool free_heap)
------------------
Note [atomic CAF entry]
+ ~~~~~~~~~~~~~~~~~~~~~~~
With THREADED_RTS, newCAF() is required to be atomic (see
#5558). This is because if two threads happened to enter the same
@@ -369,6 +371,7 @@ freeStorage (bool free_heap)
------------------
Note [GHCi CAFs]
+ ~~~~~~~~~~~~~~~~
For GHCI, we have additional requirements when dealing with CAFs:
@@ -388,6 +391,51 @@ freeStorage (bool free_heap)
-- SDM 29/1/01
+ ------------------
+ Note [Static objects under the nonmoving collector]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Static object management is a bit tricky under the nonmoving collector as we
+ need to maintain a bit more state than in the moving collector. In
+ particular, the moving collector uses the low bits of the STATIC_LINK field
+ to determine whether the object has been moved to the scavenger's work list
+ (see Note [STATIC_LINK fields] in Storage.h).
+
+ However, the nonmoving collector also needs a place to keep its mark bit.
+ This is problematic as we therefore need at least three bits of state
+ but can assume only two bits are available in STATIC_LINK (due to 32-bit
+ systems).
+
+ To accomodate this we move handling of static objects entirely to the
+ oldest generation when the nonmoving collector is in use. To do this safely
+ and efficiently we allocate the blackhole created by lockCAF() directly in
+ the non-moving heap. This means that the moving collector can completely
+ ignore static objects in minor collections since they are guaranteed not to
+ have any references into the moving heap. Of course, the blackhole itself
+ likely will contain a reference into the moving heap but this is
+ significantly easier to handle, being a heap-allocated object (see Note
+ [Aging under the non-moving collector] in NonMoving.c for details).
+
+ During the moving phase of a major collection we treat static objects
+ as we do any other reference into the non-moving heap by pushing them
+ to the non-moving mark queue (see Note [Aging under the non-moving
+ collector]).
+
+ This allows the non-moving collector to have full control over the flags
+ in STATIC_LINK, which it uses as described in Note [STATIC_LINK fields]).
+ This is implemented by NonMovingMark.c:bump_static_flag.
+
+ In short, the plan is:
+
+ - lockCAF allocates its blackhole in the nonmoving heap. This is important
+ to ensure that we do not need to place the static object on the mut_list
+ lest we would need somw way to ensure that it evacuate only once during
+ a moving collection.
+
+ - evacuate_static_object adds merely pushes objects to the mark queue
+
+ - the nonmoving collector uses the flags in STATIC_LINK as its mark bit.
+
-------------------------------------------------------------------------- */
STATIC_INLINE StgInd *
@@ -441,7 +489,16 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
caf->saved_info = orig_info;
// Allocate the blackhole indirection closure
- bh = (StgInd *)allocate(cap, sizeofW(*bh));
+ if (RtsFlags.GcFlags.useNonmoving) {
+ // See Note [Static objects under the nonmoving collector].
+ ACQUIRE_SM_LOCK;
+ bh = (StgInd *)nonmovingAllocate(cap, sizeofW(*bh));
+ RELEASE_SM_LOCK;
+ recordMutableCap((StgClosure*)bh,
+ regTableToCapability(reg), oldest_gen->no);
+ } else {
+ bh = (StgInd *)allocate(cap, sizeofW(*bh));
+ }
bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
// Ensure that above writes are visible before we introduce reference as CAF indirectee.
@@ -483,7 +540,9 @@ newCAF(StgRegTable *reg, StgIndStatic *caf)
else
{
// Put this CAF on the mutable list for the old generation.
- if (oldest_gen->no != 0) {
+ // N.B. the nonmoving collector works a bit differently: see
+ // Note [Static objects under the nonmoving collector].
+ if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
recordMutableCap((StgClosure*)caf,
regTableToCapability(reg), oldest_gen->no);
}
@@ -560,7 +619,9 @@ StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
if (!bh) return NULL;
// Put this CAF on the mutable list for the old generation.
- if (oldest_gen->no != 0) {
+ // N.B. the nonmoving collector works a bit differently:
+ // see Note [Static objects under the nonmoving collector].
+ if (oldest_gen->no != 0 && !RtsFlags.GcFlags.useNonmoving) {
recordMutableCap((StgClosure*)caf,
regTableToCapability(reg), oldest_gen->no);
}
@@ -1520,6 +1581,7 @@ void flushExec (W_ len, AdjustorExecutable exec_addr)
__clear_cache((void*)begin, (void*)end);
# endif
#elif defined(__GNUC__)
+ /* For all other platforms, fall back to a libgcc builtin. */
unsigned char* begin = (unsigned char*)exec_addr;
unsigned char* end = begin + len;
# if GCC_HAS_BUILTIN_CLEAR_CACHE