summaryrefslogtreecommitdiff
path: root/includes/rts
diff options
context:
space:
mode:
authorBen Gamari <ben@well-typed.com>2019-02-05 11:51:14 -0500
committerBen Gamari <ben@smart-cactus.org>2019-10-20 21:15:52 -0400
commitbd8e3ff43b64a72ed1c820e89691d0a83a1c6e96 (patch)
tree8b07778e3c09460edce24750ae6da4d487eb5774 /includes/rts
parentf8f77a070f4a9a93944dff0b7270162a40931c58 (diff)
downloadhaskell-bd8e3ff43b64a72ed1c820e89691d0a83a1c6e96.tar.gz
rts: Implement concurrent collection in the nonmoving collector
This extends the non-moving collector to allow concurrent collection. The full design of the collector implemented here is described in detail in a technical note B. Gamari. "A Concurrent Garbage Collector For the Glasgow Haskell Compiler" (2018) This extension involves the introduction of a capability-local remembered set, known as the /update remembered set/, which tracks objects which may no longer be visible to the collector due to mutation. To maintain this remembered set we introduce a write barrier on mutations which is enabled while a concurrent mark is underway. The update remembered set representation is similar to that of the nonmoving mark queue, being a chunked array of `MarkEntry`s. Each `Capability` maintains a single accumulator chunk, which it flushed when it (a) is filled, or (b) when the nonmoving collector enters its post-mark synchronization phase. While the write barrier touches a significant amount of code it is conceptually straightforward: the mutator must ensure that the referee of any pointer it overwrites is added to the update remembered set. However, there are a few details: * In the case of objects with a dirty flag (e.g. `MVar`s) we can exploit the fact that only the *first* mutation requires a write barrier. * Weak references, as usual, complicate things. In particular, we must ensure that the referee of a weak object is marked if dereferenced by the mutator. For this we (unfortunately) must introduce a read barrier, as described in Note [Concurrent read barrier on deRefWeak#] (in `NonMovingMark.c`). * Stable names are also a bit tricky as described in Note [Sweeping stable names in the concurrent collector] (`NonMovingSweep.c`). We take quite some pains to ensure that the high thread count often seen in parallel Haskell applications doesn't affect pause times. To this end we allow thread stacks to be marked either by the thread itself (when it is executed or stack-underflows) or the concurrent mark thread (if the thread owning the stack is never scheduled). There is a non-trivial handshake to ensure that this happens without racing which is described in Note [StgStack dirtiness flags and concurrent marking]. Co-Authored-by: Ömer Sinan Ağacan <omer@well-typed.com>
Diffstat (limited to 'includes/rts')
-rw-r--r--includes/rts/NonMoving.h24
-rw-r--r--includes/rts/storage/ClosureMacros.h14
-rw-r--r--includes/rts/storage/GC.h2
-rw-r--r--includes/rts/storage/TSO.h50
4 files changed, 88 insertions, 2 deletions
diff --git a/includes/rts/NonMoving.h b/includes/rts/NonMoving.h
new file mode 100644
index 0000000000..6a6d96b2c8
--- /dev/null
+++ b/includes/rts/NonMoving.h
@@ -0,0 +1,24 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2018-2019
+ *
+ * Non-moving garbage collector
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* This is called by the code generator */
+extern DLL_IMPORT_RTS
+void updateRemembSetPushClosure_(StgRegTable *reg, StgClosure *p);
+
+void updateRemembSetPushClosure(Capability *cap, StgClosure *p);
+
+void updateRemembSetPushThunk_(StgRegTable *reg, StgThunk *p);
+
+extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled);
diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h
index a3873cc49d..2af50863d0 100644
--- a/includes/rts/storage/ClosureMacros.h
+++ b/includes/rts/storage/ClosureMacros.h
@@ -107,6 +107,20 @@ INLINE_HEADER const StgConInfoTable *get_con_itbl(const StgClosure *c)
return CON_INFO_PTR_TO_STRUCT((c)->header.info);
}
+/* Used when we expect another thread to be mutating the info table pointer of
+ * a closure (e.g. when busy-waiting on a WHITEHOLE).
+ */
+INLINE_HEADER const StgInfoTable *get_volatile_itbl(StgClosure *c) {
+ // The volatile here is import to ensure that the compiler does not
+ // optimise away multiple loads, e.g. in a busy-wait loop. Note that
+ // we can't use VOLATILE_LOAD here as the casts result in strict aliasing
+ // rule violations and this header may be compiled outside of the RTS
+ // (where we use -fno-strict-aliasing).
+ StgInfoTable * *volatile p = (StgInfoTable * *volatile) &c->header.info;
+ return INFO_PTR_TO_STRUCT(*p);
+}
+
+
INLINE_HEADER StgHalfWord GET_TAG(const StgClosure *con)
{
return get_itbl(con)->srt;
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index 77dbe60297..7931433019 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -234,7 +234,7 @@ void setKeepCAFs (void);
and is put on the mutable list.
-------------------------------------------------------------------------- */
-void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
+void dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mv, StgClosure *old);
/* set to disable CAF garbage collection in GHCi. */
/* (needed when dynamic libraries are used). */
diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h
index 63d2a11e8e..d56ae8ad27 100644
--- a/includes/rts/storage/TSO.h
+++ b/includes/rts/storage/TSO.h
@@ -185,6 +185,53 @@ typedef struct StgTSO_ {
} *StgTSOPtr; // StgTSO defined in rts/Types.h
+/* Note [StgStack dirtiness flags and concurrent marking]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Without concurrent collection by the nonmoving collector the stack dirtiness story
+ * is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list)
+ * or not.
+ *
+ * However, things are considerably more complicated with concurrent collection
+ * (namely, when nonmoving_write_barrier_enabled is set): In addition to adding
+ * the stack to mut_list and flagging it as STACK_DIRTY, we also must ensure
+ * that stacks are marked in accordance with the nonmoving collector's snapshot
+ * invariant. This is: every stack alive at the time the snapshot is taken must
+ * be marked at some point after the moment the snapshot is taken and before it
+ * is mutated or the commencement of the sweep phase.
+ *
+ * This marking may be done by the concurrent mark phase (in the case of a
+ * thread that never runs during the concurrent mark) or by the mutator when
+ * dirtying the stack. However, it is unsafe for the concurrent collector to
+ * traverse the stack while it is under mutation. Consequently, the following
+ * handshake is obeyed by the mutator's write barrier and the concurrent mark to
+ * ensure this doesn't happen:
+ *
+ * 1. The entity seeking to mark first checks that the stack lives in the nonmoving
+ * generation; if not then the stack was not alive at the time the snapshot
+ * was taken and therefore we need not mark it.
+ *
+ * 2. The entity seeking to mark checks the stack's mark bit. If it is set then
+ * no mark is necessary.
+ *
+ * 3. The entity seeking to mark tries to lock the stack for marking by
+ * atomically setting its `marking` field to the current non-moving mark
+ * epoch:
+ *
+ * a. If the mutator finds the concurrent collector has already locked the
+ * stack then it waits until it is finished (indicated by the mark bit
+ * being set) before proceeding with execution.
+ *
+ * b. If the concurrent collector finds that the mutator has locked the stack
+ * then it moves on, leaving the mutator to mark it. There is no need to wait;
+ * the mark is guaranteed to finish before sweep due to the post-mark
+ * synchronization with mutators.
+ *
+ * c. Whoever succeeds in locking the stack is responsible for marking it and
+ * setting the stack's mark bit (either the BF_MARKED bit for large objects
+ * or otherwise its bit in its segment's mark bitmap).
+ *
+ */
#define STACK_DIRTY 1
// used by sanity checker to verify that all dirty stacks are on the mutable list
@@ -193,7 +240,8 @@ typedef struct StgTSO_ {
typedef struct StgStack_ {
StgHeader header;
StgWord32 stack_size; // stack size in *words*
- StgWord32 dirty; // non-zero => dirty
+ StgWord dirty; // non-zero => dirty
+ StgWord marking; // non-zero => someone is currently marking the stack
StgPtr sp; // current stack pointer
StgWord stack[];
} StgStack;