summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmMachOp.hs1
-rw-r--r--compiler/cmm/CmmParse.y1
-rw-r--r--compiler/cmm/PprC.hs1
-rw-r--r--compiler/codeGen/StgCmmBind.hs1
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs21
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs3
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs3
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs4
-rw-r--r--includes/Cmm.h12
-rw-r--r--includes/stg/SMP.h145
-rw-r--r--rts/Apply.cmm2
-rw-r--r--rts/Compact.cmm3
-rw-r--r--rts/Interpreter.c23
-rw-r--r--rts/Messages.c14
-rw-r--r--rts/PrimOps.cmm76
-rw-r--r--rts/RaiseAsync.c1
-rw-r--r--rts/Sparks.c1
-rw-r--r--rts/StgMiscClosures.cmm6
-rw-r--r--rts/ThreadPaused.c6
-rw-r--r--rts/Threads.c28
-rw-r--r--rts/TopHandler.c1
-rw-r--r--rts/Updates.h10
-rw-r--r--rts/Weak.c5
-rw-r--r--rts/sm/CNF.c5
-rw-r--r--rts/sm/Evac.c12
-rw-r--r--rts/sm/GC.c2
-rw-r--r--rts/sm/GCAux.c1
-rw-r--r--rts/sm/MarkWeak.c10
-rw-r--r--rts/sm/Sanity.c3
-rw-r--r--rts/sm/Scav.c8
-rw-r--r--rts/sm/Storage.c8
31 files changed, 361 insertions, 56 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 053b425ea1..9740d21bef 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -593,6 +593,7 @@ data CallishMachOp
| MO_SubIntC Width
| MO_U_Mul2 Width
+ | MO_ReadBarrier
| MO_WriteBarrier
| MO_Touch -- Keep variables live (when using interior pointers)
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index f83fb6b000..f563145250 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -1001,6 +1001,7 @@ machOps = listToUFM $
callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr]))
callishMachOps = listToUFM $
map (\(x, y) -> (mkFastString x, y)) [
+ ( "read_barrier", (MO_ReadBarrier,)),
( "write_barrier", (MO_WriteBarrier,)),
( "memcpy", memcpyLikeTweakArgs MO_Memcpy ),
( "memset", memcpyLikeTweakArgs MO_Memset ),
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index f684968795..7227edd57e 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -812,6 +812,7 @@ pprCallishMachOp_for_C mop
MO_F32_ExpM1 -> text "expm1f"
MO_F32_Sqrt -> text "sqrtf"
MO_F32_Fabs -> text "fabsf"
+ MO_ReadBarrier -> text "load_load_barrier"
MO_WriteBarrier -> text "write_barrier"
MO_Memcpy _ -> text "memcpy"
MO_Memset _ -> text "memset"
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 6e6e6ac791..68a79878d3 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -632,6 +632,7 @@ emitBlackHoleCode node = do
when eager_blackholing $ do
emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr
+ -- See Note [Heap memory barriers] in SMP.h.
emitPrimCall [] MO_WriteBarrier []
emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo))
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index bdf6a2642f..86a59381b2 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -169,17 +169,25 @@ barrier = do
let s = Fence False SyncSeqCst
return (unitOL s, [])
+-- | Insert a 'barrier', unless the target platform is in the provided list of
+-- exceptions (where no code will be emitted instead).
+barrierUnless :: [Arch] -> LlvmM StmtData
+barrierUnless exs = do
+ platform <- getLlvmPlatform
+ if platformArch platform `elem` exs
+ then return (nilOL, [])
+ else barrier
+
-- | Foreign Calls
genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
-> LlvmM StmtData
--- Write barrier needs to be handled specially as it is implemented as an LLVM
--- intrinsic function.
+-- Barriers need to be handled specially as they are implemented as LLVM
+-- intrinsic functions.
+genCall (PrimTarget MO_ReadBarrier) _ _ =
+ barrierUnless [ArchX86, ArchX86_64, ArchSPARC]
genCall (PrimTarget MO_WriteBarrier) _ _ = do
- platform <- getLlvmPlatform
- if platformArch platform `elem` [ArchX86, ArchX86_64, ArchSPARC]
- then return (nilOL, [])
- else barrier
+ barrierUnless [ArchX86, ArchX86_64, ArchSPARC]
genCall (PrimTarget MO_Touch) _ _
= return (nilOL, [])
@@ -831,6 +839,7 @@ cmmPrimOpFunctions mop = do
-- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
-- appropriate case of genCall.
MO_U_Mul2 {} -> unsupported
+ MO_ReadBarrier -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
MO_UF_Conv _ -> unsupported
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 516cda0eb3..a49526c93a 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -1123,6 +1123,8 @@ genCCall :: ForeignTarget -- function to call
-> [CmmFormal] -- where to put the result
-> [CmmActual] -- arguments (of mixed type)
-> NatM InstrBlock
+genCCall (PrimTarget MO_ReadBarrier) _ _
+ = return $ unitOL LWSYNC
genCCall (PrimTarget MO_WriteBarrier) _ _
= return $ unitOL LWSYNC
@@ -2030,6 +2032,7 @@ genCCall' dflags gcp target dest_regs args
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
MO_U_Mul2 {} -> unsupported
+ MO_ReadBarrier -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
MO_Prefetch_Data _ -> unsupported
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 30a4d6979b..056d0c6fbf 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -401,6 +401,8 @@ genCCall
--
-- In the SPARC case we don't need a barrier.
--
+genCCall (PrimTarget MO_ReadBarrier) _ _
+ = return $ nilOL
genCCall (PrimTarget MO_WriteBarrier) _ _
= return $ nilOL
@@ -691,6 +693,7 @@ outOfLineMachOp_table mop
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
MO_U_Mul2 {} -> unsupported
+ MO_ReadBarrier -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
(MO_Prefetch_Data _) -> unsupported
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 73cfb28d46..13662f6807 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1891,8 +1891,9 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _
possibleWidth = minimum [left, sizeBytes]
dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
+genCCall _ _ (PrimTarget MO_ReadBarrier) _ _ _ = return nilOL
genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL
- -- write barrier compiles to no code on x86/x86-64;
+ -- barriers compile to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.
genCCall _ _ (PrimTarget MO_Touch) _ _ _ = return nilOL
@@ -2948,6 +2949,7 @@ outOfLineCmmOp bid mop res args
MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_U_Mul2 {} -> unsupported
+ MO_ReadBarrier -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
(MO_Prefetch_Data _ ) -> unsupported
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 99f5233ab5..e7d4b4944f 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -308,7 +308,9 @@
#define ENTER_(ret,x) \
again: \
W_ info; \
- LOAD_INFO(ret,x) \
+ LOAD_INFO(ret,x) \
+ /* See Note [Heap memory barriers] in SMP.h */ \
+ prim_read_barrier; \
switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
(TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
case \
@@ -631,6 +633,14 @@
#define OVERWRITING_CLOSURE_OFS(c,n) /* nothing */
#endif
+// Memory barriers.
+// For discussion of how these are used to fence heap object
+// accesses see Note [Heap memory barriers] in SMP.h.
+#if defined(THREADED_RTS)
+#define prim_read_barrier prim %read_barrier()
+#else
+#define prim_read_barrier /* nothing */
+#endif
#if defined(THREADED_RTS)
#define prim_write_barrier prim %write_barrier()
#else
diff --git a/includes/stg/SMP.h b/includes/stg/SMP.h
index 16761516ac..5487a9a4d6 100644
--- a/includes/stg/SMP.h
+++ b/includes/stg/SMP.h
@@ -96,6 +96,151 @@ EXTERN_INLINE void write_barrier(void);
EXTERN_INLINE void store_load_barrier(void);
EXTERN_INLINE void load_load_barrier(void);
+/*
+ * Note [Heap memory barriers]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Machines with weak memory ordering semantics have consequences for how
+ * closures are observed and mutated. For example, consider a thunk that needs
+ * to be updated to an indirection. In order for the indirection to be safe for
+ * concurrent observers to enter, said observers must read the indirection's
+ * info table before they read the indirectee. Furthermore, the indirectee must
+ * be set before the info table pointer. This ensures that if the observer sees
+ * an IND info table then the indirectee is valid.
+ *
+ * When a closure is updated with an indirection, both its info table and its
+ * indirectee must be written. With weak memory ordering, these two writes can
+ * be arbitrarily reordered, and perhaps even interleaved with other threads'
+ * reads and writes (in the absence of memory barrier instructions). Consider
+ * this example of a bad reordering:
+ *
+ * - An updater writes to a closure's info table (INFO_TYPE is now IND).
+ * - A concurrent observer branches upon reading the closure's INFO_TYPE as IND.
+ * - A concurrent observer reads the closure's indirectee and enters it.
+ * - An updater writes the closure's indirectee.
+ *
+ * Here the update to the indirectee comes too late and the concurrent observer
+ * has jumped off into the abyss. Speculative execution can also cause us
+ * issues, consider:
+ *
+ * - an observer is about to case on a value in closure's info table.
+ * - the observer speculatively reads one or more of closure's fields.
+ * - an updater writes to closure's info table.
+ * - the observer takes a branch based on the new info table value, but with the
+ * old closure fields!
+ * - the updater writes to the closure's other fields, but its too late.
+ *
+ * Because of these effects, reads and writes to a closure's info table must be
+ * ordered carefully with respect to reads and writes to the closure's other
+ * fields, and memory barriers must be placed to ensure that reads and writes
+ * occur in program order. Specifically, updates to an already existing closure
+ * must follow the following pattern:
+ *
+ * - Update the closure's (non-info table) fields.
+ * - Write barrier.
+ * - Update the closure's info table.
+ *
+ * Observing the fields of an updateable closure (e.g. a THUNK) must follow the
+ * following pattern:
+ *
+ * - Read the closure's info pointer.
+ * - Read barrier.
+ * - Read the closure's (non-info table) fields.
+ *
+ * We must also take care when we expose a newly-allocated closure to other cores
+ * by writing a pointer to it to some shared data structure (e.g. an MVar#, a Message,
+ * or MutVar#). Specifically, we need to ensure that all writes constructing the
+ * closure are visible *before* the write exposing the new closure is made visible:
+ *
+ * - Allocate memory for the closure
+ * - Write the closure's info pointer and fields (ordering betweeen this doesn't
+ * matter since the closure isn't yet visible to anyone else).
+ * - Write barrier
+ * - Make closure visible to other cores
+ *
+ * Note that thread stacks are inherently thread-local and consequently allocating an
+ * object and introducing a reference to it to our stack needs no barrier.
+ *
+ * There are several ways in which the mutator may make a newly-allocated
+ * closure visible to other cores:
+ *
+ * - Eager blackholing a THUNK:
+ * This is protected by an explicit write barrier in the eager blackholing
+ * code produced by the codegen. See StgCmmBind.emitBlackHoleCode.
+ *
+ * - Lazy blackholing a THUNK:
+ * This is is protected by an explicit write barrier in the thread suspension
+ * code. See ThreadPaused.c:threadPaused.
+ *
+ * - Updating a BLACKHOLE:
+ * This case is protected by explicit write barriers in the the update frame
+ * entry code (see rts/Updates.h).
+ *
+ * - Blocking on an MVar# (e.g. takeMVar#):
+ * In this case the appropriate MVar primops (e.g. stg_takeMVarzh). include
+ * explicit memory barriers to ensure that the the newly-allocated
+ * MVAR_TSO_QUEUE is visible to other cores.
+ *
+ * - Write to an MVar# (e.g. putMVar#):
+ * This protected by the full barrier implied by the CAS in putMVar#.
+ *
+ * - Write to a TVar#:
+ * This is protected by the full barrier implied by the CAS in STM.c:lock_stm.
+ *
+ * - Write to an Array#, ArrayArray#, or SmallArray#:
+ * This case is protected by an explicit write barrier in the code produced
+ * for this primop by the codegen. See StgCmmPrim.doWritePtrArrayOp and
+ * StgCmmPrim.doWriteSmallPtrArrayOp. Relevant issue: #12469.
+ *
+ * - Write to MutVar# via writeMutVar#:
+ * This case is protected by an explicit write barrier in the code produced
+ * for this primop by the codegen.
+ *
+ * - Write to MutVar# via atomicModifyMutVar# or casMutVar#:
+ * This is protected by the full barrier implied by the cmpxchg operations
+ * in this primops.
+ *
+ * - Sending a Message to another capability:
+ * This is protected by the acquition and release of the target capability's
+ * lock in Messages.c:sendMessage.
+ *
+ * Finally, we must ensure that we flush all cores store buffers before
+ * entering and leaving GC, since stacks may be read by other cores. This
+ * happens as a side-effect of taking and release mutexes (which implies
+ * acquire and release barriers, respectively).
+ *
+ * N.B. recordClosureMutated places a reference to the mutated object on
+ * the capability-local mut_list. Consequently this does not require any memory
+ * barrier.
+ *
+ * During parallel GC we need to be careful during evacuation: before replacing
+ * a closure with a forwarding pointer we must commit a write barrier to ensure
+ * that the copy we made in to-space is visible to other cores.
+ *
+ * However, we can be a bit lax when *reading* during GC. Specifically, the GC
+ * can only make a very limited set of changes to existing closures:
+ *
+ * - it can replace a closure's info table with stg_WHITEHOLE.
+ * - it can replace a previously-whitehole'd closure's info table with a
+ * forwarding pointer
+ * - it can replace a previously-whitehole'd closure's info table with a
+ * valid info table pointer (done in eval_thunk_selector)
+ * - it can update the value of a pointer field after evacuating it
+ *
+ * This is quite nice since we don't need to worry about an interleaving
+ * of writes producing an invalid state: a closure's fields remain valid after
+ * an update of its info table pointer and vice-versa.
+ *
+ * After a round of parallel scavenging we must also ensure that any writes the
+ * GC thread workers made are visible to the main GC thread. This is ensured by
+ * the full barrier implied by the atomic decrement in
+ * GC.c:scavenge_until_all_done.
+ *
+ * The work-stealing queue (WSDeque) also requires barriers; these are
+ * documented in WSDeque.c.
+ *
+ */
+
/* ----------------------------------------------------------------------------
Implementations
------------------------------------------------------------------------- */
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index 13eb135412..8d7fc3c012 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -62,6 +62,8 @@ again:
W_ info;
P_ untaggedfun;
W_ arity;
+ // We must obey the correct heap object observation pattern in
+ // Note [Heap memory barriers] in SMP.h.
untaggedfun = UNTAG(fun);
info = %INFO_PTR(untaggedfun);
switch [INVALID_OBJECT .. N_CLOSURE_TYPES]
diff --git a/rts/Compact.cmm b/rts/Compact.cmm
index 061646846d..bae94a03cd 100644
--- a/rts/Compact.cmm
+++ b/rts/Compact.cmm
@@ -53,6 +53,9 @@ import CLOSURE base_GHCziIOziException_cannotCompactPinned_closure;
// data structure. It takes the location to store the address of the
// compacted object as an argument, so that it can be tail-recursive.
//
+// N.B. No memory barrier (see Note [Heap memory barriers] in SMP.h) is needed
+// here since this is essentially an allocation of a new object which won't
+// be visible to other cores until after we return.
stg_compactAddWorkerzh (
P_ compact, // The Compact# object
P_ p, // The object to compact
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index e4b9d5696e..2a886ff8a4 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -266,7 +266,6 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap)
uint32_t size = PAP_sizeW(oldpap->n_args);
StgPAP *pap = (StgPAP *)allocate(cap, size);
enterFunCCS(&cap->r, oldpap->header.prof.ccs);
- SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
pap->arity = oldpap->arity;
pap->n_args = oldpap->n_args;
pap->fun = oldpap->fun;
@@ -274,6 +273,8 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap)
for (i = 0; i < ((StgPAP *)pap)->n_args; i++) {
pap->payload[i] = oldpap->payload[i];
}
+ // No write barrier is needed here as this is a new allocation
+ SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
return (StgClosure *)pap;
}
@@ -799,7 +800,6 @@ do_apply:
// build a new PAP and return it.
StgPAP *new_pap;
new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m));
- SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS);
new_pap->arity = pap->arity - n;
new_pap->n_args = pap->n_args + m;
new_pap->fun = pap->fun;
@@ -809,6 +809,8 @@ do_apply:
for (i = 0; i < m; i++) {
new_pap->payload[pap->n_args + i] = (StgClosure *)SpW(i);
}
+ // No write barrier is needed here as this is a new allocation
+ SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS);
tagged_obj = (StgClosure *)new_pap;
Sp_addW(m);
goto do_return;
@@ -844,13 +846,14 @@ do_apply:
StgPAP *pap;
uint32_t i;
pap = (StgPAP *)allocate(cap, PAP_sizeW(m));
- SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS);
pap->arity = arity - n;
pap->fun = obj;
pap->n_args = m;
for (i = 0; i < m; i++) {
pap->payload[i] = (StgClosure *)SpW(i);
}
+ // No write barrier is needed here as this is a new allocation
+ SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS);
tagged_obj = (StgClosure *)pap;
Sp_addW(m);
goto do_return;
@@ -1081,7 +1084,6 @@ run_BCO:
// the BCO
size_words = BCO_BITMAP_SIZE(obj) + 2;
new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words));
- SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS);
new_aps->size = size_words;
new_aps->fun = &stg_dummy_ret_closure;
@@ -1095,6 +1097,9 @@ run_BCO:
new_aps->payload[i] = (StgClosure *)SpW(i-2);
}
+ // No write barrier is needed here as this is a new allocation
+ SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS);
+
// Arrange the stack to call the breakpoint IO action, and
// continue execution of this BCO when the IO action returns.
//
@@ -1423,6 +1428,8 @@ run_BCO:
ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
SpW(-1) = (W_)ap;
ap->n_args = n_payload;
+ // No write barrier is needed here as this is a new allocation
+ // visible only from our stack
SET_HDR(ap, &stg_AP_info, cap->r.rCCCS)
Sp_subW(1);
goto nextInsn;
@@ -1434,6 +1441,8 @@ run_BCO:
ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
SpW(-1) = (W_)ap;
ap->n_args = n_payload;
+ // No write barrier is needed here as this is a new allocation
+ // visible only from our stack
SET_HDR(ap, &stg_AP_NOUPD_info, cap->r.rCCCS)
Sp_subW(1);
goto nextInsn;
@@ -1447,6 +1456,8 @@ run_BCO:
SpW(-1) = (W_)pap;
pap->n_args = n_payload;
pap->arity = arity;
+ // No write barrier is needed here as this is a new allocation
+ // visible only from our stack
SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS)
Sp_subW(1);
goto nextInsn;
@@ -1522,12 +1533,14 @@ run_BCO:
itbl->layout.payload.nptrs );
StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request);
ASSERT( itbl->layout.payload.ptrs + itbl->layout.payload.nptrs > 0);
- SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS);
for (i = 0; i < n_words; i++) {
con->payload[i] = (StgClosure*)SpW(i);
}
Sp_addW(n_words);
Sp_subW(1);
+ // No write barrier is needed here as this is a new allocation
+ // visible only from our stack
+ SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS);
SpW(0) = (W_)con;
IF_DEBUG(interpreter,
debugBelch("\tBuilt ");
diff --git a/rts/Messages.c b/rts/Messages.c
index 2b13b6306c..d878db5eda 100644
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -173,6 +173,7 @@ uint32_t messageBlackHole(Capability *cap, MessageBlackHole *msg)
"blackhole %p", (W_)msg->tso->id, msg->bh);
info = bh->header.info;
+ load_load_barrier(); // See Note [Heap memory barriers] in SMP.h
// If we got this message in our inbox, it might be that the
// BLACKHOLE has already been updated, and GC has shorted out the
@@ -196,6 +197,7 @@ loop:
// and turns this into an infinite loop.
p = UNTAG_CLOSURE((StgClosure*)VOLATILE_LOAD(&((StgInd*)bh)->indirectee));
info = p->header.info;
+ load_load_barrier(); // See Note [Heap memory barriers] in SMP.h
if (info == &stg_IND_info)
{
@@ -226,7 +228,6 @@ loop:
bq = (StgBlockingQueue*)allocate(cap, sizeofW(StgBlockingQueue));
// initialise the BLOCKING_QUEUE object
- SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM);
bq->bh = bh;
bq->queue = msg;
bq->owner = owner;
@@ -238,6 +239,11 @@ loop:
// a collision to update a BLACKHOLE and a BLOCKING_QUEUE
// becomes orphaned (see updateThunk()).
bq->link = owner->bq;
+ SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM);
+ // We are about to make the newly-constructed message visible to other cores;
+ // a barrier is necessary to ensure that all writes are visible.
+ // See Note [Heap memory barriers] in SMP.h.
+ write_barrier();
owner->bq = bq;
dirty_TSO(cap, owner); // we modified owner->bq
@@ -255,7 +261,7 @@ loop:
}
// point to the BLOCKING_QUEUE from the BLACKHOLE
- write_barrier(); // make the BQ visible
+ write_barrier(); // make the BQ visible, see Note [Heap memory barriers].
((StgInd*)bh)->indirectee = (StgClosure *)bq;
recordClosureMutated(cap,bh); // bh was mutated
@@ -286,10 +292,14 @@ loop:
msg->link = bq->queue;
bq->queue = msg;
+ // No barrier is necessary here: we are only exposing the
+ // closure to the GC. See Note [Heap memory barriers] in SMP.h.
recordClosureMutated(cap,(StgClosure*)msg);
if (info == &stg_BLOCKING_QUEUE_CLEAN_info) {
bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
+ // No barrier is necessary here: we are only exposing the
+ // closure to the GC. See Note [Heap memory barriers] in SMP.h.
recordClosureMutated(cap,(StgClosure*)bq);
}
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index d89f0a952b..afb990dda5 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -102,6 +102,7 @@ stg_newPinnedByteArrayzh ( W_ n )
to BA_ALIGN bytes: */
p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK);
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
return (p);
@@ -144,6 +145,7 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
<alignment> is a power of 2, which is technically not guaranteed */
p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1));
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
return (p);
@@ -254,6 +256,7 @@ stg_newArrayzh ( W_ n /* words */, gcptr init )
}
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
@@ -405,6 +408,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
}
TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgSmallMutArrPtrs_ptrs(arr) = n;
@@ -522,6 +526,7 @@ stg_newMutVarzh ( gcptr init )
ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
mv = Hp - SIZEOF_StgMutVar + WDS(1);
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
StgMutVar_var(mv) = init;
@@ -700,6 +705,7 @@ stg_mkWeakzh ( gcptr key,
ALLOC_PRIM (SIZEOF_StgWeak)
w = Hp - SIZEOF_StgWeak + WDS(1);
+ // No memory barrier needed as this is a new allocation.
SET_HDR(w, stg_WEAK_info, CCCS);
StgWeak_key(w) = key;
@@ -815,6 +821,7 @@ stg_deRefWeakzh ( gcptr w )
gcptr val;
info = GET_INFO(w);
+ prim_read_barrier;
if (info == stg_WHITEHOLE_info) {
// w is locked by another thread. Now it's not immediately clear if w is
@@ -1385,11 +1392,13 @@ stg_readTVarzh (P_ tvar)
stg_readTVarIOzh ( P_ tvar /* :: TVar a */ )
{
- W_ result;
+ W_ result, resultinfo;
again:
result = StgTVar_current_value(tvar);
- if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
+ resultinfo = %INFO_PTR(result);
+ prim_read_barrier;
+ if (resultinfo == stg_TREC_HEADER_info) {
goto again;
}
return (result);
@@ -1458,6 +1467,7 @@ stg_newMVarzh ()
ALLOC_PRIM_ (SIZEOF_StgMVar, stg_newMVarzh);
mvar = Hp - SIZEOF_StgMVar + WDS(1);
+ // No memory barrier needed as this is a new allocation.
SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS);
// MVARs start dirty: generation 0 has no mutable list
StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure;
@@ -1482,7 +1492,7 @@ stg_newMVarzh ()
stg_takeMVarzh ( P_ mvar /* :: MVar a */ )
{
- W_ val, info, tso, q;
+ W_ val, info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
@@ -1504,9 +1514,12 @@ stg_takeMVarzh ( P_ mvar /* :: MVar a */ )
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
- SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
+ SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
+ // Write barrier before we make the new MVAR_TSO_QUEUE
+ // visible to other cores.
+ prim_write_barrier;
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_head(mvar) = q;
@@ -1536,8 +1549,10 @@ loop:
unlockClosure(mvar, info);
return (val);
}
- if (StgHeader_info(q) == stg_IND_info ||
- StgHeader_info(q) == stg_MSG_NULL_info) {
+ qinfo = StgHeader_info(q);
+ prim_read_barrier;
+ if (qinfo == stg_IND_info ||
+ qinfo == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
@@ -1575,7 +1590,7 @@ loop:
stg_tryTakeMVarzh ( P_ mvar /* :: MVar a */ )
{
- W_ val, info, tso, q;
+ W_ val, info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
@@ -1602,8 +1617,11 @@ loop:
return (1, val);
}
- if (StgHeader_info(q) == stg_IND_info ||
- StgHeader_info(q) == stg_MSG_NULL_info) {
+ qinfo = StgHeader_info(q);
+ prim_read_barrier;
+
+ if (qinfo == stg_IND_info ||
+ qinfo == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
@@ -1642,7 +1660,7 @@ loop:
stg_putMVarzh ( P_ mvar, /* :: MVar a */
P_ val, /* :: a */ )
{
- W_ info, tso, q;
+ W_ info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
@@ -1662,10 +1680,12 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
- SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
+ SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
+ prim_write_barrier;
+
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_head(mvar) = q;
} else {
@@ -1692,8 +1712,12 @@ loop:
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return ();
}
- if (StgHeader_info(q) == stg_IND_info ||
- StgHeader_info(q) == stg_MSG_NULL_info) {
+
+ qinfo = StgHeader_info(q);
+ prim_read_barrier;
+
+ if (qinfo == stg_IND_info ||
+ qinfo == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
@@ -1750,7 +1774,7 @@ loop:
stg_tryPutMVarzh ( P_ mvar, /* :: MVar a */
P_ val, /* :: a */ )
{
- W_ info, tso, q;
+ W_ info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
@@ -1773,8 +1797,12 @@ loop:
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return (1);
}
- if (StgHeader_info(q) == stg_IND_info ||
- StgHeader_info(q) == stg_MSG_NULL_info) {
+
+ qinfo = StgHeader_info(q);
+ prim_read_barrier;
+
+ if (qinfo == stg_IND_info ||
+ qinfo == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
@@ -1845,10 +1873,12 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ )
// readMVars are pushed to the front of the queue, so
// they get handled immediately
- SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgMVarTSOQueue_link(q) = StgMVar_head(mvar);
StgMVarTSOQueue_tso(q) = CurrentTSO;
+ SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
+ prim_write_barrier;
+
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
StgTSO_why_blocked(CurrentTSO) = BlockedOnMVarRead::I16;
@@ -1913,6 +1943,10 @@ stg_makeStableNamezh ( P_ obj )
BYTES_TO_WDS(SIZEOF_StgStableName));
SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS);
StgStableName_sn(sn_obj) = index;
+ // This will make the StableName# object visible to other threads;
+ // be sure that its completely visible to other cores.
+ // See Note [Heap memory barriers] in SMP.h.
+ prim_write_barrier;
snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry) = sn_obj;
} else {
sn_obj = snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry);
@@ -1954,6 +1988,7 @@ stg_newBCOzh ( P_ instrs,
ALLOC_PRIM (bytes);
bco = Hp - bytes + WDS(1);
+ // No memory barrier necessary as this is a new allocation.
SET_HDR(bco, stg_BCO_info, CCS_MAIN);
StgBCO_instrs(bco) = instrs;
@@ -1990,6 +2025,7 @@ stg_mkApUpd0zh ( P_ bco )
CCCS_ALLOC(SIZEOF_StgAP);
ap = Hp - SIZEOF_StgAP + WDS(1);
+ // No memory barrier necessary as this is a new allocation.
SET_HDR(ap, stg_AP_info, CCS_MAIN);
StgAP_n_args(ap) = HALF_W_(0);
@@ -2002,6 +2038,7 @@ stg_unpackClosurezh ( P_ closure )
{
W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr;
info = %GET_STD_INFO(UNTAG(closure));
+ prim_read_barrier;
ptrs = TO_W_(%INFO_PTRS(info));
nptrs = TO_W_(%INFO_NPTRS(info));
@@ -2330,7 +2367,10 @@ stg_noDuplicatezh /* no arg list: explicit stack layout */
stg_getApStackValzh ( P_ ap_stack, W_ offset )
{
- if (%INFO_PTR(UNTAG(ap_stack)) == stg_AP_STACK_info) {
+ W_ ap_stackinfo;
+ ap_stackinfo = %INFO_PTR(UNTAG(ap_stack));
+ prim_read_barrier;
+ if (ap_stackinfo == stg_AP_STACK_info) {
return (1,StgAP_STACK_payload(ap_stack,offset));
} else {
return (0,ap_stack);
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index f58f9177c8..807c3e3d30 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -870,6 +870,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
ap->payload[i] = (StgClosure *)*sp++;
}
+ write_barrier(); // XXX: Necessary?
SET_HDR(ap,&stg_AP_STACK_info,
((StgClosure *)frame)->header.prof.ccs /* ToDo */);
TICK_ALLOC_UP_THK(WDS(words+1),0);
diff --git a/rts/Sparks.c b/rts/Sparks.c
index bd5e120863..4022691da2 100644
--- a/rts/Sparks.c
+++ b/rts/Sparks.c
@@ -182,6 +182,7 @@ pruneSparkQueue (Capability *cap)
traceEventSparkFizzle(cap);
} else {
info = spark->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(info)) {
tmp = (StgClosure*)UN_FORWARDING_PTR(info);
/* if valuable work: shift inside the pool */
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index fdd9f1565e..e80ce45172 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -292,12 +292,14 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE")
TICK_ENT_DYN_IND(); /* tick */
retry:
+ prim_read_barrier;
p = StgInd_indirectee(node);
if (GETTAG(p) != 0) {
return (p);
}
info = StgHeader_info(p);
+ prim_read_barrier;
if (info == stg_IND_info) {
// This could happen, if e.g. we got a BLOCKING_QUEUE that has
// just been replaced with an IND by another thread in
@@ -313,9 +315,11 @@ retry:
("ptr" msg) = ccall allocate(MyCapability() "ptr",
BYTES_TO_WDS(SIZEOF_MessageBlackHole));
- SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM);
MessageBlackHole_tso(msg) = CurrentTSO;
MessageBlackHole_bh(msg) = node;
+ SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM);
+ // messageBlackHole has appropriate memory barriers when this object is exposed.
+ // See Note [Heap memory barriers].
(r) = ccall messageBlackHole(MyCapability() "ptr", msg "ptr");
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index a916891aa8..cccc7ad0b0 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -195,6 +195,7 @@ threadPaused(Capability *cap, StgTSO *tso)
const StgRetInfoTable *info;
const StgInfoTable *bh_info;
const StgInfoTable *cur_bh_info USED_IF_THREADS;
+ const StgInfoTable *frame_info;
StgClosure *bh;
StgPtr stack_end;
uint32_t words_to_squeeze = 0;
@@ -218,6 +219,8 @@ threadPaused(Capability *cap, StgTSO *tso)
frame = (StgClosure *)tso->stackobj->sp;
+ // N.B. We know that the TSO is owned by the current capability so no
+ // memory barriers are needed here.
while ((P_)frame < stack_end) {
info = get_ret_itbl(frame);
@@ -226,7 +229,8 @@ threadPaused(Capability *cap, StgTSO *tso)
case UPDATE_FRAME:
// If we've already marked this frame, then stop here.
- if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
+ frame_info = frame->header.info;
+ if (frame_info == (StgInfoTable *)&stg_marked_upd_frame_info) {
if (prev_was_update_frame) {
words_to_squeeze += sizeofW(StgUpdateFrame);
weight += weight_pending;
diff --git a/rts/Threads.c b/rts/Threads.c
index 977635322d..2bdcea1c00 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -126,6 +126,8 @@ createThread(Capability *cap, W_ size)
ACQUIRE_LOCK(&sched_mutex);
tso->id = next_thread_id++; // while we have the mutex
tso->global_link = g0->threads;
+ /* Mutations above need no memory barrier since this lock will provide
+ * a release barrier */
g0->threads = tso;
RELEASE_LOCK(&sched_mutex);
@@ -257,8 +259,10 @@ tryWakeupThread (Capability *cap, StgTSO *tso)
{
MessageWakeup *msg;
msg = (MessageWakeup *)allocate(cap,sizeofW(MessageWakeup));
- SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM);
msg->tso = tso;
+ SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM);
+ // Ensure that writes constructing Message are committed before sending.
+ write_barrier();
sendMessage(cap, tso->cap, (Message*)msg);
debugTraceCap(DEBUG_sched, cap, "message: try wakeup thread %ld on cap %d",
(W_)tso->id, tso->cap->no);
@@ -363,6 +367,7 @@ wakeBlockingQueue(Capability *cap, StgBlockingQueue *bq)
for (msg = bq->queue; msg != (MessageBlackHole*)END_TSO_QUEUE;
msg = msg->link) {
i = msg->header.info;
+ load_load_barrier();
if (i != &stg_IND_info) {
ASSERT(i == &stg_MSG_BLACKHOLE_info);
tryWakeupThread(cap,msg->tso);
@@ -392,15 +397,18 @@ checkBlockingQueues (Capability *cap, StgTSO *tso)
for (bq = tso->bq; bq != (StgBlockingQueue*)END_TSO_QUEUE; bq = next) {
next = bq->link;
- if (bq->header.info == &stg_IND_info) {
+ const StgInfoTable *bqinfo = bq->header.info;
+ load_load_barrier(); // XXX: Is this needed?
+ if (bqinfo == &stg_IND_info) {
// ToDo: could short it out right here, to avoid
// traversing this IND multiple times.
continue;
}
p = bq->bh;
-
- if (p->header.info != &stg_BLACKHOLE_info ||
+ const StgInfoTable *pinfo = p->header.info;
+ load_load_barrier();
+ if (pinfo != &stg_BLACKHOLE_info ||
((StgInd *)p)->indirectee != (StgClosure*)bq)
{
wakeBlockingQueue(cap,bq);
@@ -424,6 +432,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val)
const StgInfoTable *i;
i = thunk->header.info;
+ load_load_barrier();
if (i != &stg_BLACKHOLE_info &&
i != &stg_CAF_BLACKHOLE_info &&
i != &__stg_EAGER_BLACKHOLE_info &&
@@ -444,6 +453,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val)
}
i = v->header.info;
+ load_load_barrier();
if (i == &stg_TSO_info) {
checkBlockingQueues(cap, tso);
return;
@@ -667,6 +677,8 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
new_stack->sp -= chunk_words;
}
+ // No write barriers needed; all of the writes above are to structured
+ // owned by our capability.
tso->stackobj = new_stack;
// we're about to run it, better mark it dirty
@@ -738,6 +750,7 @@ threadStackUnderflow (Capability *cap, StgTSO *tso)
bool performTryPutMVar(Capability *cap, StgMVar *mvar, StgClosure *value)
{
const StgInfoTable *info;
+ const StgInfoTable *qinfo;
StgMVarTSOQueue *q;
StgTSO *tso;
@@ -762,8 +775,11 @@ loop:
unlockClosure((StgClosure*)mvar, &stg_MVAR_DIRTY_info);
return true;
}
- if (q->header.info == &stg_IND_info ||
- q->header.info == &stg_MSG_NULL_info) {
+
+ qinfo = q->header.info;
+ load_load_barrier();
+ if (qinfo == &stg_IND_info ||
+ qinfo == &stg_MSG_NULL_info) {
q = (StgMVarTSOQueue*)((StgInd*)q)->indirectee;
goto loop;
}
diff --git a/rts/TopHandler.c b/rts/TopHandler.c
index c0ac936b85..d5175015e7 100644
--- a/rts/TopHandler.c
+++ b/rts/TopHandler.c
@@ -29,6 +29,7 @@ StgTSO *getTopHandlerThread(void) {
StgWeak *weak = (StgWeak*)deRefStablePtr(topHandlerPtr);
RELEASE_LOCK(&m);
const StgInfoTable *info = weak->header.info;
+ load_load_barrier();
if (info == &stg_WEAK_info) {
StgClosure *key = ((StgWeak*)weak)->key;
diff --git a/rts/Updates.h b/rts/Updates.h
index 1ba398bd35..1bd3e065af 100644
--- a/rts/Updates.h
+++ b/rts/Updates.h
@@ -39,10 +39,16 @@
PROF_HDR_FIELDS(w_,ccs,p2) \
p_ updatee
-
+/*
+ * Getting the memory barriers correct here is quite tricky. Essentially
+ * the write barrier ensures that any writes to the new indirectee are visible
+ * before we introduce the indirection.
+ * See Note [Heap memory barriers] in SMP.h.
+ */
#define updateWithIndirection(p1, p2, and_then) \
W_ bd; \
\
+ prim_write_barrier; \
OVERWRITING_CLOSURE(p1); \
StgInd_indirectee(p1) = p2; \
prim_write_barrier; \
@@ -69,6 +75,8 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,
ASSERT( (P_)p1 != (P_)p2 );
/* not necessarily true: ASSERT( !closure_IND(p1) ); */
/* occurs in RaiseAsync.c:raiseAsync() */
+ /* See Note [Heap memory barriers] in SMP.h */
+ write_barrier();
OVERWRITING_CLOSURE(p1);
((StgInd *)p1)->indirectee = p2;
write_barrier();
diff --git a/rts/Weak.c b/rts/Weak.c
index a322d822af..ec998c214f 100644
--- a/rts/Weak.c
+++ b/rts/Weak.c
@@ -57,7 +57,9 @@ runAllCFinalizers(StgWeak *list)
// If there's no major GC between the time that the finalizer for the
// object from the oldest generation is manually called and shutdown
// we end up running the same finalizer twice. See #7170.
- if (w->header.info != &stg_DEAD_WEAK_info) {
+ const StgInfoTable *winfo = w->header.info;
+ load_load_barrier();
+ if (winfo != &stg_DEAD_WEAK_info) {
runCFinalizers((StgCFinalizerList *)w->cfinalizers);
}
}
@@ -138,6 +140,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
size = n + mutArrPtrsCardTableSize(n);
arr = (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + size);
TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
+ // No write barrier needed here; this array is only going to referred to by this core.
SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, CCS_SYSTEM);
arr->ptrs = n;
arr->size = size;
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index d0447f867c..0432505cd2 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -542,8 +542,9 @@ insertCompactHash (Capability *cap,
StgClosure *p, StgClosure *to)
{
insertHashTable(str->hash, (StgWord)p, (const void*)to);
- if (str->header.info == &stg_COMPACT_NFDATA_CLEAN_info) {
- str->header.info = &stg_COMPACT_NFDATA_DIRTY_info;
+ const StgInfoTable *strinfo = str->header.info;
+ if (strinfo == &stg_COMPACT_NFDATA_CLEAN_info) {
+ strinfo = &stg_COMPACT_NFDATA_DIRTY_info;
recordClosureMutated(cap, (StgClosure*)str);
}
}
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 2a2a26ec09..7c82caa185 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -131,7 +131,7 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
#else
src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to);
*p = TAG_CLOSURE(tag,(StgClosure*)to);
-#endif
+#endif /* defined(PARALLEL_GC) */
#if defined(PROFILING)
// We store the size of the just evacuated object in the LDV word so that
@@ -194,7 +194,7 @@ spin:
if (info == (W_)&stg_WHITEHOLE_info) {
#if defined(PROF_SPIN)
whitehole_gc_spin++;
-#endif
+#endif /* PROF_SPIN */
busy_wait_nop();
goto spin;
}
@@ -205,7 +205,7 @@ spin:
}
#else
info = (W_)src->header.info;
-#endif
+#endif /* PARALLEL_GC */
to = alloc_for_copy(size_to_reserve, gen_no);
@@ -216,8 +216,8 @@ spin:
}
write_barrier();
- src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to);
*p = (StgClosure *)to;
+ src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to);
#if defined(PROFILING)
// We store the size of the just evacuated object in the LDV word so that
@@ -1099,6 +1099,7 @@ selector_chain:
// need the write-barrier stuff.
// - undo the chain we've built to point to p.
SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr);
+ write_barrier();
*q = (StgClosure *)p;
if (evac) evacuate(q);
unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
@@ -1109,7 +1110,7 @@ selector_chain:
// Save the real info pointer (NOTE: not the same as get_itbl()).
info_ptr = (StgWord)p->header.info;
SET_INFO((StgClosure *)p,&stg_WHITEHOLE_info);
-#endif
+#endif /* THREADED_RTS */
field = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr)->layout.selector_offset;
@@ -1165,6 +1166,7 @@ selector_loop:
SET_INFO((StgClosure*)p, (StgInfoTable *)info_ptr);
OVERWRITING_CLOSURE((StgClosure*)p);
SET_INFO((StgClosure*)p, &stg_WHITEHOLE_info);
+ write_barrier();
}
#endif
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 4bf540a4a7..92a5e229a1 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -1104,6 +1104,8 @@ loop:
// scavenge_loop() only exits when there's no work to do
+ // This atomic decrement also serves as a full barrier to ensure that any
+ // writes we made during scavenging are visible to other threads.
#if defined(DEBUG)
r = dec_running();
#else
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index e8ca0c4002..650dc2c1df 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -83,6 +83,7 @@ isAlive(StgClosure *p)
}
info = INFO_PTR_TO_STRUCT(info);
+ load_load_barrier();
switch (info->type) {
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index d5982e2f64..7475b5e625 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -234,16 +234,22 @@ static bool tidyWeakList(generation *gen)
last_w = &gen->old_weak_ptr_list;
for (w = gen->old_weak_ptr_list; w != NULL; w = next_w) {
+ info = w->header.info;
+ /* N.B. This function is executed only during the serial part of GC
+ * so consequently there is no potential for data races and therefore
+ * no need for memory barriers.
+ */
+
/* There might be a DEAD_WEAK on the list if finalizeWeak# was
* called on a live weak pointer object. Just remove it.
*/
- if (w->header.info == &stg_DEAD_WEAK_info) {
+ if (info == &stg_DEAD_WEAK_info) {
next_w = w->link;
*last_w = next_w;
continue;
}
- info = get_itbl((StgClosure *)w);
+ info = INFO_PTR_TO_STRUCT(info);
switch (info->type) {
case WEAK:
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 8082b7e6d0..ff76f747c9 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -233,6 +233,7 @@ checkClosure( const StgClosure* p )
p = UNTAG_CONST_CLOSURE(p);
info = p->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(info)) {
barf("checkClosure: found EVACUATED closure %d", info->type);
@@ -243,6 +244,7 @@ checkClosure( const StgClosure* p )
#endif
info = INFO_PTR_TO_STRUCT(info);
+ load_load_barrier();
switch (info->type) {
@@ -564,6 +566,7 @@ checkTSO(StgTSO *tso)
next = tso->_link;
info = (const StgInfoTable*) tso->_link->header.info;
+ load_load_barrier();
ASSERT(next == END_TSO_QUEUE ||
info == &stg_MVAR_TSO_QUEUE_info ||
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 73a790e9ed..c486cd96c5 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -431,7 +431,7 @@ scavenge_block (bdescr *bd)
// time around the loop.
while (p < bd->free || (bd == ws->todo_bd && p < ws->todo_free)) {
- ASSERT(bd->link == NULL);
+ ASSERT(bd->link == NULL);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
info = get_itbl((StgClosure *)p);
@@ -1580,6 +1580,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
#if defined(DEBUG)
+ const StgInfoTable *pinfo;
switch (get_itbl((StgClosure *)p)->type) {
case MUT_VAR_CLEAN:
// can happen due to concurrent writeMutVars
@@ -1599,9 +1600,10 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
case TREC_CHUNK:
mutlist_TREC_CHUNK++; break;
case MUT_PRIM:
- if (((StgClosure*)p)->header.info == &stg_TVAR_WATCH_QUEUE_info)
+ pinfo = ((StgClosure*)p)->header.info;
+ if (pinfo == &stg_TVAR_WATCH_QUEUE_info)
mutlist_TVAR_WATCH_QUEUE++;
- else if (((StgClosure*)p)->header.info == &stg_TREC_HEADER_info)
+ else if (pinfo == &stg_TREC_HEADER_info)
mutlist_TREC_HEADER++;
else
mutlist_OTHERS++;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 2e03b77695..3f91905f3c 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -407,8 +407,10 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
// Allocate the blackhole indirection closure
bh = (StgInd *)allocate(cap, sizeofW(*bh));
- SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
+ SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
+ // Ensure that above writes are visible before we introduce reference as CAF indirectee.
+ write_barrier();
caf->indirectee = (StgClosure *)bh;
write_barrier();
@@ -1081,6 +1083,8 @@ void
dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
{
Capability *cap = regTableToCapability(reg);
+ // No barrier required here as no other heap object fields are read. See
+ // note [Heap memory barriers] in SMP.h.
if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
p->header.info = &stg_MUT_VAR_DIRTY_info;
recordClosureMutated(cap,p);
@@ -1090,6 +1094,8 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
void
dirty_TVAR(Capability *cap, StgTVar *p)
{
+ // No barrier required here as no other heap object fields are read. See
+ // note [Heap memory barriers] in SMP.h.
if (p->header.info == &stg_TVAR_CLEAN_info) {
p->header.info = &stg_TVAR_DIRTY_info;
recordClosureMutated(cap,(StgClosure*)p);