summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTravis Whitaker <pi.boy.travis@gmail.com>2019-04-03 15:26:16 -0700
committerBen Gamari <ben@smart-cactus.org>2020-05-19 09:55:49 -0400
commit62e19a6d0889187dfbce0ba2f404849b90b1ef02 (patch)
tree7a0aac77e7e6a53455fa4252ea0d50baeeba0989
parent568d7279a80cf945271f0659f11a94eea3f1433d (diff)
downloadhaskell-wip/more-barriers.tar.gz
Correct closure observation, construction, and mutation on weak memory machines.wip/more-barriers
Here the following changes are introduced: - A read barrier machine op is added to Cmm. - The order in which a closure's fields are read and written is changed. - Memory barriers are added to RTS code to ensure correctness on out-or-order machines with weak memory ordering. Cmm has a new CallishMachOp called MO_ReadBarrier. On weak memory machines, this is lowered to an instruction that ensures memory reads that occur after said instruction in program order are not performed before reads coming before said instruction in program order. On machines with strong memory ordering properties (e.g. X86, SPARC in TSO mode) no such instruction is necessary, so MO_ReadBarrier is simply erased. However, such an instruction is necessary on weakly ordered machines, e.g. ARM and PowerPC. Weam memory ordering has consequences for how closures are observed and mutated. For example, consider a closure that needs to be updated to an indirection. In order for the indirection to be safe for concurrent observers to enter, said observers must read the indirection's info table before they read the indirectee. Furthermore, the entering observer makes assumptions about the closure based on its info table contents, e.g. an INFO_TYPE of IND imples the closure has an indirectee pointer that is safe to follow. When a closure is updated with an indirection, both its info table and its indirectee must be written. With weak memory ordering, these two writes can be arbitrarily reordered, and perhaps even interleaved with other threads' reads and writes (in the absence of memory barrier instructions). Consider this example of a bad reordering: - An updater writes to a closure's info table (INFO_TYPE is now IND). - A concurrent observer branches upon reading the closure's INFO_TYPE as IND. - A concurrent observer reads the closure's indirectee and enters it. (!!!) - An updater writes the closure's indirectee. Here the update to the indirectee comes too late and the concurrent observer has jumped off into the abyss. Speculative execution can also cause us issues, consider: - An observer is about to case on a value in closure's info table. - The observer speculatively reads one or more of closure's fields. - An updater writes to closure's info table. - The observer takes a branch based on the new info table value, but with the old closure fields! - The updater writes to the closure's other fields, but its too late. Because of these effects, reads and writes to a closure's info table must be ordered carefully with respect to reads and writes to the closure's other fields, and memory barriers must be placed to ensure that reads and writes occur in program order. Specifically, updates to a closure must follow the following pattern: - Update the closure's (non-info table) fields. - Write barrier. - Update the closure's info table. Observing a closure's fields must follow the following pattern: - Read the closure's info pointer. - Read barrier. - Read the closure's (non-info table) fields. This patch updates RTS code to obey this pattern. This should fix long-standing SMP bugs on ARM (specifically newer aarch64 microarchitectures supporting out-of-order execution) and PowerPC. This fixesd issue #15449.
-rw-r--r--rts/Apply.cmm7
-rw-r--r--rts/Compact.cmm7
-rw-r--r--rts/Interpreter.c38
-rw-r--r--rts/Messages.c5
-rw-r--r--rts/PrimOps.cmm85
-rw-r--r--rts/RaiseAsync.c5
-rw-r--r--rts/RtsAPI.c51
-rw-r--r--rts/StgMiscClosures.cmm4
-rw-r--r--rts/ThreadPaused.c6
-rw-r--r--rts/Threads.c21
-rw-r--r--rts/Weak.c7
-rw-r--r--rts/sm/CNF.c7
-rw-r--r--rts/sm/Compact.c6
-rw-r--r--rts/sm/Evac.c18
-rw-r--r--rts/sm/GCAux.c2
-rw-r--r--rts/sm/Scav.c28
-rw-r--r--rts/sm/Storage.c3
17 files changed, 211 insertions, 89 deletions
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index a706c68194..55fcda7fdf 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -66,6 +66,7 @@ again:
// Note [Heap memory barriers] in SMP.h.
untaggedfun = UNTAG(fun);
info = %INFO_PTR(untaggedfun);
+ prim_read_barrier;
switch [INVALID_OBJECT .. N_CLOSURE_TYPES]
(TO_W_( %INFO_TYPE(%STD_INFO(info)) )) {
case
@@ -106,6 +107,7 @@ again:
CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD);
P_ pap;
pap = Hp - SIZEOF_StgPAP + WDS(1);
+ prim_write_barrier;
SET_HDR(pap, stg_PAP_info, CCCS);
StgPAP_arity(pap) = arity;
if (arity <= TAG_MASK) {
@@ -134,6 +136,7 @@ again:
pap = Hp - size + WDS(1);
// We'll lose the original PAP, so we should enter its CCS
ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(untaggedfun) "ptr");
+ prim_write_barrier;
SET_HDR(pap, stg_PAP_info, CCCS);
StgPAP_arity(pap) = StgPAP_arity(untaggedfun);
StgPAP_n_args(pap) = StgPAP_n_args(untaggedfun);
@@ -284,6 +287,7 @@ for:
info = %GET_FUN_INFO(UNTAG(R1));
W_ type;
type = TO_W_(StgFunInfoExtra_fun_type(info));
+ prim_read_barrier;
if (type == ARG_GEN) {
jump StgFunInfoExtra_slow_apply(info) [R1];
}
@@ -362,6 +366,7 @@ for:
info = %GET_FUN_INFO(UNTAG(R1));
W_ type;
type = TO_W_(StgFunInfoExtra_fun_type(info));
+ prim_read_barrier;
if (type == ARG_GEN) {
jump StgFunInfoExtra_slow_apply(info) [R1];
}
@@ -426,12 +431,14 @@ for:
TICK_ENT_VIA_NODE();
#if defined(NO_ARG_REGS)
+ prim_read_barrier;
jump %GET_ENTRY(UNTAG(R1)) [R1];
#else
W_ info;
info = %GET_FUN_INFO(UNTAG(R1));
W_ type;
type = TO_W_(StgFunInfoExtra_fun_type(info));
+ prim_read_barrier;
if (type == ARG_GEN) {
jump StgFunInfoExtra_slow_apply(info) [R1];
}
diff --git a/rts/Compact.cmm b/rts/Compact.cmm
index bae94a03cd..6b48956e61 100644
--- a/rts/Compact.cmm
+++ b/rts/Compact.cmm
@@ -72,6 +72,7 @@ eval:
tag = GETTAG(p);
p = UNTAG(p);
info = %INFO_PTR(p);
+ prim_read_barrier;
type = TO_W_(%INFO_TYPE(%STD_INFO(info)));
switch [0 .. N_CLOSURE_TYPES] type {
@@ -171,7 +172,6 @@ eval:
cards = SIZEOF_StgMutArrPtrs + WDS(ptrs);
ALLOCATE(compact, BYTES_TO_WDS(size), p, to, tag);
P_[pp] = tag | to;
- SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p));
StgMutArrPtrs_ptrs(to) = ptrs;
StgMutArrPtrs_size(to) = StgMutArrPtrs_size(p);
prim %memcpy(to + cards, p + cards , size - cards, 1);
@@ -185,6 +185,7 @@ eval:
i = i + 1;
goto loop0;
}
+ SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p));
return();
}
@@ -201,7 +202,6 @@ eval:
ptrs = StgSmallMutArrPtrs_ptrs(p);
ALLOCATE(compact, BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + ptrs, p, to, tag);
P_[pp] = tag | to;
- SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p));
StgSmallMutArrPtrs_ptrs(to) = ptrs;
i = 0;
loop1:
@@ -213,6 +213,7 @@ eval:
i = i + 1;
goto loop1;
}
+ SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p));
return();
}
@@ -238,7 +239,6 @@ eval:
ALLOCATE(compact, size, p, to, tag);
P_[pp] = tag | to;
- SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p));
// First, copy the non-pointers
if (nptrs > 0) {
@@ -248,6 +248,7 @@ eval:
i = i + 1;
if (i < ptrs + nptrs) ( likely: True ) goto loop2;
}
+ SET_HDR(to, StgHeader_info(p), StgHeader_ccs(p));
// Next, recursively compact and copy the pointers
if (ptrs == 0) { return(); }
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 463ddae18b..fcd667dabd 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -249,6 +249,7 @@ StgClosure * newEmptyPAP (Capability *cap,
uint32_t arity)
{
StgPAP *pap = (StgPAP *)allocate(cap, sizeofW(StgPAP));
+ write_barrier();
SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
pap->arity = arity;
pap->n_args = 0;
@@ -273,7 +274,7 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap)
for (i = 0; i < ((StgPAP *)pap)->n_args; i++) {
pap->payload[i] = oldpap->payload[i];
}
- // No write barrier is needed here as this is a new allocation
+ write_barrier();
SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
return (StgClosure *)pap;
}
@@ -482,8 +483,9 @@ eval_obj:
{
StgUpdateFrame *__frame;
__frame = (StgUpdateFrame *)Sp;
- SET_INFO((StgClosure *)__frame, (StgInfoTable *)&stg_upd_frame_info);
__frame->updatee = (StgClosure *)(ap);
+ write_barrier();
+ SET_INFO((StgClosure *)__frame, (StgInfoTable *)&stg_upd_frame_info);
}
ENTER_CCS_THUNK(cap,ap);
@@ -809,7 +811,7 @@ do_apply:
for (i = 0; i < m; i++) {
new_pap->payload[pap->n_args + i] = (StgClosure *)SpW(i);
}
- // No write barrier is needed here as this is a new allocation
+ write_barrier();
SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS);
tagged_obj = (StgClosure *)new_pap;
Sp_addW(m);
@@ -852,7 +854,7 @@ do_apply:
for (i = 0; i < m; i++) {
pap->payload[i] = (StgClosure *)SpW(i);
}
- // No write barrier is needed here as this is a new allocation
+ write_barrier();
SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS);
tagged_obj = (StgClosure *)pap;
Sp_addW(m);
@@ -1097,7 +1099,7 @@ run_BCO:
new_aps->payload[i] = (StgClosure *)SpW(i-2);
}
- // No write barrier is needed here as this is a new allocation
+ write_barrier();
SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS);
// Arrange the stack to call the breakpoint IO action, and
@@ -1424,41 +1426,37 @@ run_BCO:
case bci_ALLOC_AP: {
int n_payload = BCO_NEXT;
- StgAP *ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
- SpW(-1) = (W_)ap;
+ StgAP* ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
ap->n_args = n_payload;
ap->arity = 0;
- // No write barrier is needed here as this is a new allocation
- // visible only from our stack
+ write_barrier();
SET_HDR(ap, &stg_AP_info, cap->r.rCCCS)
+ SpW(-1) = (W_)ap;
Sp_subW(1);
goto nextInsn;
}
case bci_ALLOC_AP_NOUPD: {
int n_payload = BCO_NEXT;
- StgAP *ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
- SpW(-1) = (W_)ap;
+ StgAP* ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
ap->n_args = n_payload;
ap->arity = 0;
- // No write barrier is needed here as this is a new allocation
- // visible only from our stack
+ write_barrier();
SET_HDR(ap, &stg_AP_NOUPD_info, cap->r.rCCCS)
+ SpW(-1) = (W_)ap;
Sp_subW(1);
goto nextInsn;
}
case bci_ALLOC_PAP: {
- StgPAP* pap;
int arity = BCO_NEXT;
int n_payload = BCO_NEXT;
- pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload));
- SpW(-1) = (W_)pap;
+ StgPAP* pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload));
pap->n_args = n_payload;
pap->arity = arity;
- // No write barrier is needed here as this is a new allocation
- // visible only from our stack
+ write_barrier();
SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS)
+ SpW(-1) = (W_)pap;
Sp_subW(1);
goto nextInsn;
}
@@ -1529,6 +1527,7 @@ run_BCO:
int o_itbl = BCO_GET_LARGE_ARG;
int n_words = BCO_NEXT;
StgInfoTable* itbl = INFO_PTR_TO_STRUCT((StgInfoTable *)BCO_LIT(o_itbl));
+ load_load_barrier();
int request = CONSTR_sizeW( itbl->layout.payload.ptrs,
itbl->layout.payload.nptrs );
StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request);
@@ -1538,8 +1537,7 @@ run_BCO:
}
Sp_addW(n_words);
Sp_subW(1);
- // No write barrier is needed here as this is a new allocation
- // visible only from our stack
+ write_barrier();
SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS);
SpW(0) = (W_)con;
IF_DEBUG(interpreter,
diff --git a/rts/Messages.c b/rts/Messages.c
index 2f80370845..dcd4d16338 100644
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -28,6 +28,7 @@ void sendMessage(Capability *from_cap, Capability *to_cap, Message *msg)
#if defined(DEBUG)
{
const StgInfoTable *i = msg->header.info;
+ load_load_barrier();
if (i != &stg_MSG_THROWTO_info &&
i != &stg_MSG_BLACKHOLE_info &&
i != &stg_MSG_TRY_WAKEUP_info &&
@@ -70,6 +71,7 @@ executeMessage (Capability *cap, Message *m)
loop:
write_barrier(); // allow m->header to be modified by another thread
i = m->header.info;
+ load_load_barrier();
if (i == &stg_MSG_TRY_WAKEUP_info)
{
StgTSO *tso = ((MessageWakeup *)m)->tso;
@@ -302,6 +304,7 @@ loop:
recordClosureMutated(cap,(StgClosure*)msg);
if (info == &stg_BLOCKING_QUEUE_CLEAN_info) {
+ write_barrier();
bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
// No barrier is necessary here: we are only exposing the
// closure to the GC. See Note [Heap memory barriers] in SMP.h.
@@ -334,6 +337,7 @@ StgTSO * blackHoleOwner (StgClosure *bh)
StgClosure *p;
info = bh->header.info;
+ load_load_barrier();
if (info != &stg_BLACKHOLE_info &&
info != &stg_CAF_BLACKHOLE_info &&
@@ -349,6 +353,7 @@ loop:
// and turns this into an infinite loop.
p = UNTAG_CLOSURE((StgClosure*)VOLATILE_LOAD(&((StgInd*)bh)->indirectee));
info = p->header.info;
+ load_load_barrier();
if (info == &stg_IND_info) goto loop;
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 048cde8065..1ada3d519c 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -68,8 +68,9 @@ stg_newByteArrayzh ( W_ n )
jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
- SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
+ prim_write_barrier;
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
return (p);
}
@@ -98,9 +99,9 @@ stg_newPinnedByteArrayzh ( W_ n )
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
- /* No write barrier needed since this is a new allocation. */
- SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
+ prim_write_barrier;
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
return (p);
}
@@ -133,9 +134,9 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
- /* No write barrier needed since this is a new allocation. */
- SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
+ prim_write_barrier;
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
return (p);
}
@@ -268,8 +269,6 @@ stg_newArrayzh ( W_ n /* words */, gcptr init )
}
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
- /* No write barrier needed since this is a new allocation. */
- SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
@@ -282,6 +281,9 @@ stg_newArrayzh ( W_ n /* words */, gcptr init )
goto for;
}
+ prim_write_barrier;
+ SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
+
return (arr);
}
@@ -293,11 +295,13 @@ stg_unsafeThawArrayzh ( gcptr arr )
// mut_list so no need to add it again. MUT_ARR_PTRS_FROZEN_CLEAN means it's
// not and we should add it to a mut_list.
if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN_DIRTY_info) {
+ prim_write_barrier; // see below:
SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
// must be done after SET_INFO, because it ASSERTs closure_MUTABLE():
recordMutable(arr);
return (arr);
} else {
+ prim_write_barrier;
SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
return (arr);
}
@@ -390,7 +394,6 @@ stg_newArrayArrayzh ( W_ n /* words */ )
}
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
- SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
@@ -403,6 +406,9 @@ stg_newArrayArrayzh ( W_ n /* words */ )
goto for;
}
+ prim_write_barrier;
+ SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
+
return (arr);
}
@@ -425,8 +431,6 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
}
TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
- /* No write barrier needed since this is a new allocation. */
- SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgSmallMutArrPtrs_ptrs(arr) = n;
// Initialise all elements of the array with the value in R2
@@ -441,6 +445,9 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
goto for;
}
+ prim_write_barrier;
+ SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
+
return (arr);
}
@@ -449,11 +456,13 @@ stg_unsafeThawSmallArrayzh ( gcptr arr )
// See stg_unsafeThawArrayzh
if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info) {
SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
+ prim_write_barrier;
recordMutable(arr);
// must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
return (arr);
} else {
SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
+ prim_write_barrier;
return (arr);
}
}
@@ -511,12 +520,13 @@ stg_copySmallArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n)
dst, dst_off, n);
}
- SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
-
dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
bytes = WDS(n);
prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
+
+ prim_write_barrier;
+ SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
}
return ();
@@ -532,8 +542,6 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
dst, dst_off, n);
}
- SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
-
dst_p = dst + SIZEOF_StgSmallMutArrPtrs + WDS(dst_off);
src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(src_off);
bytes = WDS(n);
@@ -542,6 +550,9 @@ stg_copySmallMutableArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
} else {
prim %memcpy(dst_p, src_p, bytes, SIZEOF_W);
}
+
+ prim_write_barrier;
+ SET_INFO(dst, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
}
return ();
@@ -583,9 +594,9 @@ stg_newMutVarzh ( gcptr init )
ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
mv = Hp - SIZEOF_StgMutVar + WDS(1);
- /* No write barrier needed since this is a new allocation. */
- SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
StgMutVar_var(mv) = init;
+ prim_write_barrier;
+ SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
return (mv);
}
@@ -668,16 +679,18 @@ stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f )
TICK_ALLOC_THUNK_2();
CCCS_ALLOC(THUNK_2_SIZE);
z = Hp - THUNK_2_SIZE + WDS(1);
- SET_HDR(z, stg_ap_2_upd_info, CCCS);
LDV_RECORD_CREATE(z);
StgThunk_payload(z,0) = f;
+ prim_write_barrier;
+ SET_HDR(z, stg_ap_2_upd_info, CCCS);
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
y = z - THUNK_1_SIZE;
- SET_HDR(y, stg_sel_0_upd_info, CCCS);
LDV_RECORD_CREATE(y);
StgThunk_payload(y,0) = z;
+ prim_write_barrier;
+ SET_HDR(y, stg_sel_0_upd_info, CCCS);
retry:
x = StgMutVar_var(mv);
@@ -728,9 +741,10 @@ stg_atomicModifyMutVarzuzh ( gcptr mv, gcptr f )
TICK_ALLOC_THUNK();
CCCS_ALLOC(THUNK_SIZE);
z = Hp - THUNK_SIZE + WDS(1);
- SET_HDR(z, stg_ap_2_upd_info, CCCS);
LDV_RECORD_CREATE(z);
StgThunk_payload(z,0) = f;
+ prim_write_barrier;
+ SET_HDR(z, stg_ap_2_upd_info, CCCS);
retry:
x = StgMutVar_var(mv);
@@ -763,8 +777,6 @@ stg_mkWeakzh ( gcptr key,
ALLOC_PRIM (SIZEOF_StgWeak)
w = Hp - SIZEOF_StgWeak + WDS(1);
- // No memory barrier needed as this is a new allocation.
- SET_HDR(w, stg_WEAK_info, CCCS);
StgWeak_key(w) = key;
StgWeak_value(w) = value;
@@ -772,6 +784,10 @@ stg_mkWeakzh ( gcptr key,
StgWeak_cfinalizers(w) = stg_NO_FINALIZER_closure;
StgWeak_link(w) = Capability_weak_ptr_list_hd(MyCapability());
+
+ prim_write_barrier;
+ SET_HDR(w, stg_WEAK_info, CCCS);
+
Capability_weak_ptr_list_hd(MyCapability()) = w;
if (Capability_weak_ptr_list_tl(MyCapability()) == NULL) {
Capability_weak_ptr_list_tl(MyCapability()) = w;
@@ -798,13 +814,15 @@ stg_addCFinalizzerToWeakzh ( W_ fptr, // finalizer
ALLOC_PRIM (SIZEOF_StgCFinalizerList)
c = Hp - SIZEOF_StgCFinalizerList + WDS(1);
- SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
StgCFinalizerList_fptr(c) = fptr;
StgCFinalizerList_ptr(c) = ptr;
StgCFinalizerList_eptr(c) = eptr;
StgCFinalizerList_flag(c) = flag;
+ prim_write_barrier;
+ SET_HDR(c, stg_C_FINALIZER_LIST_info, CCCS);
+
LOCK_CLOSURE(w, info);
if (info == stg_DEAD_WEAK_info) {
@@ -1544,12 +1562,12 @@ stg_newMVarzh ()
ALLOC_PRIM_ (SIZEOF_StgMVar, stg_newMVarzh);
mvar = Hp - SIZEOF_StgMVar + WDS(1);
- // No memory barrier needed as this is a new allocation.
- SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS);
- // MVARs start dirty: generation 0 has no mutable list
StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure;
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
+ prim_write_barrier;
+ SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS);
+ // MVARs start dirty: generation 0 has no mutable list
return (mvar);
}
@@ -1962,12 +1980,13 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ )
StgMVarTSOQueue_link(q) = StgMVar_head(mvar);
StgMVarTSOQueue_tso(q) = CurrentTSO;
- SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
prim_write_barrier;
+ SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
StgTSO_why_blocked(CurrentTSO) = BlockedOnMVarRead::I16;
+ // TODO: Barrier needed here?
StgMVar_head(mvar) = q;
if (StgMVar_tail(mvar) == stg_END_TSO_QUEUE_closure) {
@@ -2074,8 +2093,6 @@ stg_newBCOzh ( P_ instrs,
ALLOC_PRIM (bytes);
bco = Hp - bytes + WDS(1);
- // No memory barrier necessary as this is a new allocation.
- SET_HDR(bco, stg_BCO_info, CCS_MAIN);
StgBCO_instrs(bco) = instrs;
StgBCO_literals(bco) = literals;
@@ -2093,6 +2110,9 @@ for:
goto for;
}
+ prim_write_barrier;
+ SET_HDR(bco, stg_BCO_info, CCS_MAIN);
+
return (bco);
}
@@ -2111,12 +2131,13 @@ stg_mkApUpd0zh ( P_ bco )
CCCS_ALLOC(SIZEOF_StgAP);
ap = Hp - SIZEOF_StgAP + WDS(1);
- // No memory barrier necessary as this is a new allocation.
- SET_HDR(ap, stg_AP_info, CCS_MAIN);
StgAP_n_args(ap) = HALF_W_(0);
StgAP_fun(ap) = bco;
+ prim_write_barrier;
+ SET_HDR(ap, stg_AP_info, CCS_MAIN);
+
return (ap);
}
@@ -2145,7 +2166,6 @@ stg_unpackClosurezh ( P_ closure )
dat_arr = Hp - dat_arr_sz + WDS(1);
- SET_HDR(dat_arr, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(dat_arr) = WDS(len);
p = 0;
for:
@@ -2160,6 +2180,9 @@ for:
// Follow the pointers
("ptr" ptrArray) = foreign "C" heap_view_closurePtrs(MyCapability() "ptr", clos "ptr");
+ prim_write_barrier;
+ SET_HDR(dat_arr, stg_ARR_WORDS_info, CCCS);
+
return (info, dat_arr, ptrArray);
}
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index e8a6a81747..b3b84496d7 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -922,6 +922,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
ap->payload[i] = (StgClosure *)*sp++;
}
+ write_barrier();
SET_HDR(ap,&stg_AP_STACK_NOUPD_info,stack->header.prof.ccs);
TICK_ALLOC_SE_THK(WDS(words+1),0);
@@ -960,6 +961,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
//
raise = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
TICK_ALLOC_SE_THK(WDS(1),0);
+ write_barrier();
SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs);
raise->payload[0] = exception;
@@ -1040,8 +1042,9 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
atomically = (StgThunk*)allocate(cap,sizeofW(StgThunk)+1);
TICK_ALLOC_SE_THK(1,0);
- SET_HDR(atomically,&stg_atomically_info,af->header.prof.ccs);
atomically->payload[0] = af->code;
+ write_barrier();
+ SET_HDR(atomically,&stg_atomically_info,af->header.prof.ccs);
// discard stack up to and including the ATOMICALLY_FRAME
frame += sizeofW(StgAtomicallyFrame);
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c
index 51a1f2b7cf..310fe26c1d 100644
--- a/rts/RtsAPI.c
+++ b/rts/RtsAPI.c
@@ -30,8 +30,9 @@ HaskellObj
rts_mkChar (Capability *cap, HsChar c)
{
StgClosure *p = (StgClosure *)allocate(cap, CONSTR_sizeW(0,1));
- SET_HDR(p, Czh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(StgChar)c;
+ write_barrier();
+ SET_HDR(p, Czh_con_info, CCS_SYSTEM);
return p;
}
@@ -39,8 +40,9 @@ HaskellObj
rts_mkInt (Capability *cap, HsInt i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, Izh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgInt)i;
+ write_barrier();
+ SET_HDR(p, Izh_con_info, CCS_SYSTEM);
return p;
}
@@ -48,9 +50,10 @@ HaskellObj
rts_mkInt8 (Capability *cap, HsInt8 i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, I8zh_con_info, CCS_SYSTEM);
/* Make sure we mask out the bits above the lowest 8 */
p->payload[0] = (StgClosure *)(StgInt)i;
+ write_barrier();
+ SET_HDR(p, I8zh_con_info, CCS_SYSTEM);
return p;
}
@@ -58,9 +61,10 @@ HaskellObj
rts_mkInt16 (Capability *cap, HsInt16 i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, I16zh_con_info, CCS_SYSTEM);
/* Make sure we mask out the relevant bits */
p->payload[0] = (StgClosure *)(StgInt)i;
+ write_barrier();
+ SET_HDR(p, I16zh_con_info, CCS_SYSTEM);
return p;
}
@@ -68,8 +72,9 @@ HaskellObj
rts_mkInt32 (Capability *cap, HsInt32 i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, I32zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgInt)i;
+ write_barrier();
+ SET_HDR(p, I32zh_con_info, CCS_SYSTEM);
return p;
}
@@ -77,8 +82,9 @@ HaskellObj
rts_mkInt64 (Capability *cap, HsInt64 i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2));
- SET_HDR(p, I64zh_con_info, CCS_SYSTEM);
ASSIGN_Int64((P_)&(p->payload[0]), i);
+ write_barrier();
+ SET_HDR(p, I64zh_con_info, CCS_SYSTEM);
return p;
}
@@ -86,8 +92,9 @@ HaskellObj
rts_mkWord (Capability *cap, HsWord i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, Wzh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)i;
+ write_barrier();
+ SET_HDR(p, Wzh_con_info, CCS_SYSTEM);
return p;
}
@@ -96,8 +103,9 @@ rts_mkWord8 (Capability *cap, HsWord8 w)
{
/* see rts_mkInt* comments */
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, W8zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xff);
+ write_barrier();
+ SET_HDR(p, W8zh_con_info, CCS_SYSTEM);
return p;
}
@@ -106,8 +114,9 @@ rts_mkWord16 (Capability *cap, HsWord16 w)
{
/* see rts_mkInt* comments */
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, W16zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xffff);
+ write_barrier();
+ SET_HDR(p, W16zh_con_info, CCS_SYSTEM);
return p;
}
@@ -116,8 +125,9 @@ rts_mkWord32 (Capability *cap, HsWord32 w)
{
/* see rts_mkInt* comments */
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, W32zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xffffffff);
+ write_barrier();
+ SET_HDR(p, W32zh_con_info, CCS_SYSTEM);
return p;
}
@@ -126,8 +136,9 @@ rts_mkWord64 (Capability *cap, HsWord64 w)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2));
/* see mk_Int8 comment */
- SET_HDR(p, W64zh_con_info, CCS_SYSTEM);
ASSIGN_Word64((P_)&(p->payload[0]), w);
+ write_barrier();
+ SET_HDR(p, W64zh_con_info, CCS_SYSTEM);
return p;
}
@@ -136,8 +147,9 @@ HaskellObj
rts_mkFloat (Capability *cap, HsFloat f)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, Fzh_con_info, CCS_SYSTEM);
ASSIGN_FLT((P_)p->payload, (StgFloat)f);
+ write_barrier();
+ SET_HDR(p, Fzh_con_info, CCS_SYSTEM);
return p;
}
@@ -145,8 +157,9 @@ HaskellObj
rts_mkDouble (Capability *cap, HsDouble d)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,sizeofW(StgDouble)));
- SET_HDR(p, Dzh_con_info, CCS_SYSTEM);
ASSIGN_DBL((P_)p->payload, (StgDouble)d);
+ write_barrier();
+ SET_HDR(p, Dzh_con_info, CCS_SYSTEM);
return p;
}
@@ -154,8 +167,9 @@ HaskellObj
rts_mkStablePtr (Capability *cap, HsStablePtr s)
{
StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
- SET_HDR(p, StablePtr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)s;
+ write_barrier();
+ SET_HDR(p, StablePtr_con_info, CCS_SYSTEM);
return p;
}
@@ -163,8 +177,9 @@ HaskellObj
rts_mkPtr (Capability *cap, HsPtr a)
{
StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
- SET_HDR(p, Ptr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)a;
+ write_barrier();
+ SET_HDR(p, Ptr_con_info, CCS_SYSTEM);
return p;
}
@@ -172,8 +187,9 @@ HaskellObj
rts_mkFunPtr (Capability *cap, HsFunPtr a)
{
StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
- SET_HDR(p, FunPtr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)a;
+ write_barrier();
+ SET_HDR(p, FunPtr_con_info, CCS_SYSTEM);
return p;
}
@@ -202,9 +218,10 @@ rts_apply (Capability *cap, HaskellObj f, HaskellObj arg)
// Here we don't want to use CCS_SYSTEM, because it's a hidden cost centre,
// and evaluating Haskell code under a hidden cost centre leads to
// confusing profiling output. (#7753)
- SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_MAIN);
ap->payload[0] = f;
ap->payload[1] = arg;
+ write_barrier();
+ SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_MAIN);
return (StgClosure *)ap;
}
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 5af3a06b89..d06d0cce4d 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -308,9 +308,8 @@ retry:
MessageBlackHole_tso(msg) = CurrentTSO;
MessageBlackHole_bh(msg) = node;
+ prim_write_barrier;
SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM);
- // messageBlackHole has appropriate memory barriers when this object is exposed.
- // See Note [Heap memory barriers].
(r) = ccall messageBlackHole(MyCapability() "ptr", msg "ptr");
@@ -370,6 +369,7 @@ INFO_TABLE(stg_WHITEHOLE, 0,0, WHITEHOLE, "WHITEHOLE", "WHITEHOLE")
loop:
// spin until the WHITEHOLE is updated
info = StgHeader_info(node);
+ prim_read_barrier;
if (info == stg_WHITEHOLE_info) {
#if defined(PROF_SPIN)
W_[whitehole_lockClosure_spin] =
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index 83c621e386..26d07a5d2e 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -220,10 +220,9 @@ threadPaused(Capability *cap, StgTSO *tso)
frame = (StgClosure *)tso->stackobj->sp;
- // N.B. We know that the TSO is owned by the current capability so no
- // memory barriers are needed here.
while ((P_)frame < stack_end) {
info = get_ret_itbl(frame);
+ load_load_barrier();
switch (info->i.type) {
@@ -231,6 +230,7 @@ threadPaused(Capability *cap, StgTSO *tso)
// If we've already marked this frame, then stop here.
frame_info = frame->header.info;
+ load_load_barrier();
if (frame_info == (StgInfoTable *)&stg_marked_upd_frame_info) {
if (prev_was_update_frame) {
words_to_squeeze += sizeofW(StgUpdateFrame);
@@ -240,10 +240,12 @@ threadPaused(Capability *cap, StgTSO *tso)
goto end;
}
+ write_barrier();
SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info);
bh = ((StgUpdateFrame *)frame)->updatee;
bh_info = bh->header.info;
+ load_load_barrier();
IF_NONMOVING_WRITE_BARRIER_ENABLED {
updateRemembSetPushClosure(cap, (StgClosure *) bh);
}
diff --git a/rts/Threads.c b/rts/Threads.c
index 22d58bb48b..24ed0cb54c 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -82,11 +82,12 @@ createThread(Capability *cap, W_ size)
stack_size = round_to_mblocks(size - sizeofW(StgTSO));
stack = (StgStack *)allocate(cap, stack_size);
TICK_ALLOC_STACK(stack_size);
- SET_HDR(stack, &stg_STACK_info, cap->r.rCCCS);
stack->stack_size = stack_size - sizeofW(StgStack);
stack->sp = stack->stack + stack->stack_size;
stack->dirty = STACK_DIRTY;
stack->marking = 0;
+ write_barrier();
+ SET_HDR(stack, &stg_STACK_info, cap->r.rCCCS);
tso = (StgTSO *)allocate(cap, sizeofW(StgTSO));
TICK_ALLOC_TSO();
@@ -117,6 +118,9 @@ createThread(Capability *cap, W_ size)
tso->prof.cccs = CCS_MAIN;
#endif
+ write_barrier();
+ SET_HDR(tso, &stg_TSO_info, CCS_SYSTEM);
+
// put a stop frame on the stack
stack->sp -= sizeofW(StgStopFrame);
SET_HDR((StgClosure*)stack->sp,
@@ -276,9 +280,8 @@ tryWakeupThread (Capability *cap, StgTSO *tso)
MessageWakeup *msg;
msg = (MessageWakeup *)allocate(cap,sizeofW(MessageWakeup));
msg->tso = tso;
- SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM);
- // Ensure that writes constructing Message are committed before sending.
write_barrier();
+ SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM);
sendMessage(cap, tso->cap, (Message*)msg);
debugTraceCap(DEBUG_sched, cap, "message: try wakeup thread %ld on cap %d",
(W_)tso->id, tso->cap->no);
@@ -405,6 +408,8 @@ checkBlockingQueues (Capability *cap, StgTSO *tso)
{
StgBlockingQueue *bq, *next;
StgClosure *p;
+ const StgInfoTable *bqinfo;
+ const StgInfoTable *pinfo;
debugTraceCap(DEBUG_sched, cap,
"collision occurred; checking blocking queues for thread %ld",
@@ -623,13 +628,14 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
new_stack = (StgStack*) allocate(cap, chunk_size);
cap->r.rCurrentTSO = NULL;
- SET_HDR(new_stack, &stg_STACK_info, old_stack->header.prof.ccs);
TICK_ALLOC_STACK(chunk_size);
new_stack->dirty = 0; // begin clean, we'll mark it dirty below
new_stack->marking = 0;
new_stack->stack_size = chunk_size - sizeofW(StgStack);
new_stack->sp = new_stack->stack + new_stack->stack_size;
+ write_barrier();
+ SET_HDR(new_stack, &stg_STACK_info, old_stack->header.prof.ccs);
tso->tot_stack_size += new_stack->stack_size;
@@ -678,8 +684,9 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
} else {
new_stack->sp -= sizeofW(StgUnderflowFrame);
frame = (StgUnderflowFrame*)new_stack->sp;
- frame->info = &stg_stack_underflow_frame_info;
frame->next_chunk = old_stack;
+ write_barrier();
+ frame->info = &stg_stack_underflow_frame_info;
}
// copy the stack chunk between tso->sp and sp to
@@ -694,8 +701,6 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
new_stack->sp -= chunk_words;
}
- // No write barriers needed; all of the writes above are to structured
- // owned by our capability.
tso->stackobj = new_stack;
// we're about to run it, better mark it dirty
@@ -784,6 +789,8 @@ bool performTryPutMVar(Capability *cap, StgMVar *mvar, StgClosure *value)
q = mvar->head;
loop:
+ qinfo = q->header.info;
+ load_load_barrier();
if (q == (StgMVarTSOQueue*)&stg_END_TSO_QUEUE_closure) {
/* No further takes, the MVar is now full. */
if (info == &stg_MVAR_CLEAN_info) {
diff --git a/rts/Weak.c b/rts/Weak.c
index fe4516794a..d45c8d160c 100644
--- a/rts/Weak.c
+++ b/rts/Weak.c
@@ -42,6 +42,7 @@ void
runAllCFinalizers(StgWeak *list)
{
StgWeak *w;
+ const StgInfoTable *winfo;
Task *task;
task = myTask();
@@ -138,6 +139,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
// there's a later call to finalizeWeak# on this weak pointer,
// we don't run the finalizer again.
SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs);
+ write_barrier();
}
n_finalizers += i;
@@ -150,8 +152,6 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
size = n + mutArrPtrsCardTableSize(n);
arr = (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + size);
TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
- // No write barrier needed here; this array is only going to referred to by this core.
- SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, CCS_SYSTEM);
arr->ptrs = n;
arr->size = size;
@@ -167,6 +167,9 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
arr->payload[i] = (StgClosure *)(W_)(-1);
}
+ write_barrier();
+ SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, CCS_SYSTEM);
+
t = createIOThread(cap,
RtsFlags.GcFlags.initialStkSize,
rts_apply(cap,
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index 2c701c2c29..9988fba986 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -376,7 +376,6 @@ compactNew (Capability *cap, StgWord size)
ALLOCATE_NEW);
self = firstBlockGetCompact(block);
- SET_HDR((StgClosure*)self, &stg_COMPACT_NFDATA_CLEAN_info, CCS_SYSTEM);
self->autoBlockW = aligned_size / sizeof(StgWord);
self->nursery = block;
self->last = block;
@@ -394,6 +393,9 @@ compactNew (Capability *cap, StgWord size)
debugTrace(DEBUG_compact, "compactNew: size %" FMT_Word, size);
+ write_barrier();
+ SET_HDR((StgClosure*)self, &stg_COMPACT_NFDATA_CLEAN_info, CCS_SYSTEM);
+
return self;
}
@@ -546,6 +548,7 @@ insertCompactHash (Capability *cap,
{
insertHashTable(str->hash, (StgWord)p, (const void*)to);
const StgInfoTable **strinfo = &str->header.info;
+ load_load_barrier();
if (*strinfo == &stg_COMPACT_NFDATA_CLEAN_info) {
*strinfo = &stg_COMPACT_NFDATA_DIRTY_info;
recordClosureMutated(cap, (StgClosure*)str);
@@ -690,6 +693,7 @@ verify_consistency_block (StgCompactNFData *str, StgCompactNFDataBlock *block)
ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
info = get_itbl(q);
+ load_load_barrier();
switch (info->type) {
case CONSTR_1_0:
check_object_in_compact(str, UNTAG_CLOSURE(q->payload[0]));
@@ -929,6 +933,7 @@ fixup_block(StgCompactNFDataBlock *block, StgWord *fixup_table, uint32_t count)
while (p < bd->free) {
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
info = get_itbl((StgClosure*)p);
+ load_load_barrier();
switch (info->type) {
case CONSTR_1_0:
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index 5031c535a1..e2b5ca70e8 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -197,6 +197,7 @@ STATIC_INLINE StgInfoTable*
get_threaded_info( P_ p )
{
W_ q = (W_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p));
+ load_load_barrier();
loop:
switch (GET_PTR_TAG(q))
@@ -382,6 +383,7 @@ thread_stack(P_ p, P_ stack_end)
StgRetFun *ret_fun = (StgRetFun *)p;
StgFunInfoTable *fun_info =
FUN_INFO_PTR_TO_STRUCT(get_threaded_info((P_)ret_fun->fun));
+ load_load_barrier();
// *before* threading it!
thread(&ret_fun->fun);
p = thread_arg_block(fun_info, ret_fun->payload);
@@ -400,6 +402,7 @@ thread_PAP_payload (StgClosure *fun, StgClosure **payload, W_ size)
{
StgFunInfoTable *fun_info =
FUN_INFO_PTR_TO_STRUCT(get_threaded_info((P_)fun));
+ load_load_barrier();
ASSERT(fun_info->i.type != PAP);
P_ p = (P_)payload;
@@ -620,6 +623,8 @@ update_fwd_large( bdescr *bd )
static /* STATIC_INLINE */ P_
thread_obj (const StgInfoTable *info, P_ p)
{
+ load_load_barrier();
+
switch (info->type) {
case THUNK_0_1:
return p + sizeofW(StgThunk) + 1;
@@ -851,6 +856,7 @@ update_fwd_compact( bdescr *blocks )
// definitely have enough room. Also see bug #1147.
StgInfoTable *iptr = get_threaded_info(p);
StgInfoTable *info = INFO_PTR_TO_STRUCT(iptr);
+ load_load_barrier();
P_ q = p;
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 0ece06016a..57d9e0c919 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -157,6 +157,7 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
{
const StgInfoTable *new_info;
new_info = (const StgInfoTable *)cas((StgPtr)&src->header.info, (W_)info, MK_FORWARDING_PTR(to));
+ load_load_barrier();
if (new_info != info) {
#if defined(PROFILING)
// We copied this object at the same time as another
@@ -175,8 +176,11 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
}
}
#else
- src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to);
+ // if somebody else reads the forwarding pointer, we better make
+ // sure there's a closure at the end of it.
+ write_barrier();
*p = TAG_CLOSURE(tag,(StgClosure*)to);
+ src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to);
#endif /* defined(PARALLEL_GC) */
#if defined(PROFILING)
@@ -251,6 +255,7 @@ spin:
}
#else
info = (W_)src->header.info;
+ load_load_barrier();
#endif /* PARALLEL_GC */
to = alloc_for_copy(size_to_reserve, gen_no);
@@ -703,6 +708,7 @@ loop:
gen_no = bd->dest_no;
info = q->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(info))
{
/* Already evacuated, just return the forwarding address.
@@ -813,11 +819,14 @@ loop:
StgClosure *r;
const StgInfoTable *i;
r = ((StgInd*)q)->indirectee;
+ load_load_barrier();
if (GET_CLOSURE_TAG(r) == 0) {
i = r->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(i)) {
r = (StgClosure *)UN_FORWARDING_PTR(i);
i = r->header.info;
+ load_load_barrier();
}
if (i == &stg_TSO_info
|| i == &stg_WHITEHOLE_info
@@ -1016,6 +1025,7 @@ evacuate_BLACKHOLE(StgClosure **p)
}
gen_no = bd->dest_no;
info = q->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(info))
{
StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
@@ -1208,6 +1218,7 @@ selector_chain:
#else
// Save the real info pointer (NOTE: not the same as get_itbl()).
info_ptr = (StgWord)p->header.info;
+ load_load_barrier();
SET_INFO((StgClosure *)p,&stg_WHITEHOLE_info);
#endif /* THREADED_RTS */
@@ -1226,6 +1237,7 @@ selector_loop:
// that evacuate() doesn't mind if it gets passed a to-space pointer.
info = (StgInfoTable*)selectee->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(info)) {
// We don't follow pointers into to-space; the constructor
@@ -1235,6 +1247,7 @@ selector_loop:
}
info = INFO_PTR_TO_STRUCT(info);
+ load_load_barrier();
switch (info->type) {
case WHITEHOLE:
goto bale_out; // about to be evacuated by another thread (or a loop).
@@ -1282,6 +1295,7 @@ selector_loop:
if (!IS_FORWARDING_PTR(info_ptr))
{
info = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr);
+ load_load_barrier();
switch (info->type) {
case IND:
case IND_STATIC:
@@ -1333,9 +1347,11 @@ selector_loop:
// indirection, as in evacuate().
if (GET_CLOSURE_TAG(r) == 0) {
i = r->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(i)) {
r = (StgClosure *)UN_FORWARDING_PTR(i);
i = r->header.info;
+ load_load_barrier();
}
if (i == &stg_TSO_info
|| i == &stg_WHITEHOLE_info
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index 11080c1f22..210fbaa1b9 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -84,6 +84,7 @@ isAlive(StgClosure *p)
}
info = q->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(info)) {
// alive!
@@ -131,6 +132,7 @@ revertCAFs( void )
SET_INFO((StgClosure *)c, c->saved_info);
c->saved_info = NULL;
+ write_barrier();
// We must reset static_link lest the major GC finds that
// static_flag==3 and will consequently ignore references
// into code that we are trying to unload. This would result
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 501d958aae..95bb99c304 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -386,6 +386,7 @@ scavenge_thunk_srt(const StgInfoTable *info)
if (!major_gc) return;
thunk_info = itbl_to_thunk_itbl(info);
+ load_load_barrier();
if (thunk_info->i.srt) {
StgClosure *srt = (StgClosure*)GET_SRT(thunk_info);
evacuate(&srt);
@@ -400,6 +401,7 @@ scavenge_fun_srt(const StgInfoTable *info)
if (!major_gc) return;
fun_info = itbl_to_fun_itbl(info);
+ load_load_barrier();
if (fun_info->i.srt) {
StgClosure *srt = (StgClosure*)GET_FUN_SRT(fun_info);
evacuate(&srt);
@@ -462,6 +464,7 @@ scavenge_block (bdescr *bd)
evacuate((StgClosure **)&mvar->value);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
mvar->header.info = &stg_MVAR_DIRTY_info;
} else {
@@ -479,6 +482,7 @@ scavenge_block (bdescr *bd)
evacuate((StgClosure **)&tvar->first_watch_queue_entry);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
tvar->header.info = &stg_TVAR_DIRTY_info;
} else {
@@ -613,6 +617,7 @@ scavenge_block (bdescr *bd)
evacuate(&((StgMutVar *)p)->var);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
} else {
@@ -632,6 +637,7 @@ scavenge_block (bdescr *bd)
evacuate((StgClosure**)&bq->link);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
} else {
@@ -684,6 +690,7 @@ scavenge_block (bdescr *bd)
p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
} else {
@@ -701,6 +708,7 @@ scavenge_block (bdescr *bd)
{
p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p);
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
@@ -726,6 +734,7 @@ scavenge_block (bdescr *bd)
}
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
} else {
@@ -747,6 +756,7 @@ scavenge_block (bdescr *bd)
evacuate((StgClosure **)p);
}
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
@@ -887,6 +897,7 @@ scavenge_mark_stack(void)
evacuate((StgClosure **)&mvar->value);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
mvar->header.info = &stg_MVAR_DIRTY_info;
} else {
@@ -903,6 +914,7 @@ scavenge_mark_stack(void)
evacuate((StgClosure **)&tvar->first_watch_queue_entry);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
tvar->header.info = &stg_TVAR_DIRTY_info;
} else {
@@ -1009,6 +1021,7 @@ scavenge_mark_stack(void)
evacuate(&((StgMutVar *)p)->var);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
} else {
@@ -1028,6 +1041,7 @@ scavenge_mark_stack(void)
evacuate((StgClosure**)&bq->link);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
} else {
@@ -1076,6 +1090,7 @@ scavenge_mark_stack(void)
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
} else {
@@ -1095,6 +1110,7 @@ scavenge_mark_stack(void)
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
@@ -1122,6 +1138,7 @@ scavenge_mark_stack(void)
}
gct->eager_promotion = saved_eager;
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
} else {
@@ -1143,6 +1160,7 @@ scavenge_mark_stack(void)
evacuate((StgClosure **)p);
}
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
@@ -1249,6 +1267,7 @@ scavenge_one(StgPtr p)
evacuate((StgClosure **)&mvar->value);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
mvar->header.info = &stg_MVAR_DIRTY_info;
} else {
@@ -1265,6 +1284,7 @@ scavenge_one(StgPtr p)
evacuate((StgClosure **)&tvar->first_watch_queue_entry);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
tvar->header.info = &stg_TVAR_DIRTY_info;
} else {
@@ -1329,6 +1349,7 @@ scavenge_one(StgPtr p)
evacuate(&((StgMutVar *)p)->var);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
} else {
@@ -1348,6 +1369,7 @@ scavenge_one(StgPtr p)
evacuate((StgClosure**)&bq->link);
gct->eager_promotion = saved_eager_promotion;
+ write_barrier();
if (gct->failed_to_evac) {
bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
} else {
@@ -1396,6 +1418,7 @@ scavenge_one(StgPtr p)
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
} else {
@@ -1413,6 +1436,7 @@ scavenge_one(StgPtr p)
// follow everything
scavenge_mut_arr_ptrs((StgMutArrPtrs *)p);
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
@@ -1440,6 +1464,7 @@ scavenge_one(StgPtr p)
}
gct->eager_promotion = saved_eager;
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info;
} else {
@@ -1461,6 +1486,7 @@ scavenge_one(StgPtr p)
evacuate((StgClosure **)p);
}
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info;
} else {
@@ -1613,6 +1639,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
mutlist_TREC_CHUNK++; break;
case MUT_PRIM:
pinfo = ((StgClosure*)p)->header.info;
+ load_load_barrier();
if (pinfo == &stg_TVAR_WATCH_QUEUE_info)
mutlist_TVAR_WATCH_QUEUE++;
else if (pinfo == &stg_TREC_HEADER_info)
@@ -1645,6 +1672,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
scavenge_mut_arr_ptrs_marked((StgMutArrPtrs *)p);
+ write_barrier();
if (gct->failed_to_evac) {
((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
} else {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index a73228dce6..ce67a19840 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -500,9 +500,8 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
bh = (StgInd *)allocate(cap, sizeofW(*bh));
}
bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
- SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
- // Ensure that above writes are visible before we introduce reference as CAF indirectee.
write_barrier();
+ SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
caf->indirectee = (StgClosure *)bh;
write_barrier();