summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
Diffstat (limited to 'rts')
-rw-r--r--rts/Apply.cmm2
-rw-r--r--rts/Compact.cmm3
-rw-r--r--rts/Interpreter.c23
-rw-r--r--rts/Messages.c14
-rw-r--r--rts/PrimOps.cmm76
-rw-r--r--rts/RaiseAsync.c1
-rw-r--r--rts/Sparks.c1
-rw-r--r--rts/StgMiscClosures.cmm6
-rw-r--r--rts/ThreadPaused.c6
-rw-r--r--rts/Threads.c28
-rw-r--r--rts/TopHandler.c1
-rw-r--r--rts/Updates.h10
-rw-r--r--rts/Weak.c5
-rw-r--r--rts/sm/CNF.c5
-rw-r--r--rts/sm/Evac.c12
-rw-r--r--rts/sm/GC.c2
-rw-r--r--rts/sm/GCAux.c1
-rw-r--r--rts/sm/MarkWeak.c10
-rw-r--r--rts/sm/Sanity.c3
-rw-r--r--rts/sm/Scav.c8
-rw-r--r--rts/sm/Storage.c8
21 files changed, 177 insertions, 48 deletions
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index 13eb135412..8d7fc3c012 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -62,6 +62,8 @@ again:
W_ info;
P_ untaggedfun;
W_ arity;
+ // We must obey the correct heap object observation pattern in
+ // Note [Heap memory barriers] in SMP.h.
untaggedfun = UNTAG(fun);
info = %INFO_PTR(untaggedfun);
switch [INVALID_OBJECT .. N_CLOSURE_TYPES]
diff --git a/rts/Compact.cmm b/rts/Compact.cmm
index 061646846d..bae94a03cd 100644
--- a/rts/Compact.cmm
+++ b/rts/Compact.cmm
@@ -53,6 +53,9 @@ import CLOSURE base_GHCziIOziException_cannotCompactPinned_closure;
// data structure. It takes the location to store the address of the
// compacted object as an argument, so that it can be tail-recursive.
//
+// N.B. No memory barrier (see Note [Heap memory barriers] in SMP.h) is needed
+// here since this is essentially an allocation of a new object which won't
+// be visible to other cores until after we return.
stg_compactAddWorkerzh (
P_ compact, // The Compact# object
P_ p, // The object to compact
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index e4b9d5696e..2a886ff8a4 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -266,7 +266,6 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap)
uint32_t size = PAP_sizeW(oldpap->n_args);
StgPAP *pap = (StgPAP *)allocate(cap, size);
enterFunCCS(&cap->r, oldpap->header.prof.ccs);
- SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
pap->arity = oldpap->arity;
pap->n_args = oldpap->n_args;
pap->fun = oldpap->fun;
@@ -274,6 +273,8 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap)
for (i = 0; i < ((StgPAP *)pap)->n_args; i++) {
pap->payload[i] = oldpap->payload[i];
}
+ // No write barrier is needed here as this is a new allocation
+ SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
return (StgClosure *)pap;
}
@@ -799,7 +800,6 @@ do_apply:
// build a new PAP and return it.
StgPAP *new_pap;
new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m));
- SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS);
new_pap->arity = pap->arity - n;
new_pap->n_args = pap->n_args + m;
new_pap->fun = pap->fun;
@@ -809,6 +809,8 @@ do_apply:
for (i = 0; i < m; i++) {
new_pap->payload[pap->n_args + i] = (StgClosure *)SpW(i);
}
+ // No write barrier is needed here as this is a new allocation
+ SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS);
tagged_obj = (StgClosure *)new_pap;
Sp_addW(m);
goto do_return;
@@ -844,13 +846,14 @@ do_apply:
StgPAP *pap;
uint32_t i;
pap = (StgPAP *)allocate(cap, PAP_sizeW(m));
- SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS);
pap->arity = arity - n;
pap->fun = obj;
pap->n_args = m;
for (i = 0; i < m; i++) {
pap->payload[i] = (StgClosure *)SpW(i);
}
+ // No write barrier is needed here as this is a new allocation
+ SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS);
tagged_obj = (StgClosure *)pap;
Sp_addW(m);
goto do_return;
@@ -1081,7 +1084,6 @@ run_BCO:
// the BCO
size_words = BCO_BITMAP_SIZE(obj) + 2;
new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words));
- SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS);
new_aps->size = size_words;
new_aps->fun = &stg_dummy_ret_closure;
@@ -1095,6 +1097,9 @@ run_BCO:
new_aps->payload[i] = (StgClosure *)SpW(i-2);
}
+ // No write barrier is needed here as this is a new allocation
+ SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS);
+
// Arrange the stack to call the breakpoint IO action, and
// continue execution of this BCO when the IO action returns.
//
@@ -1423,6 +1428,8 @@ run_BCO:
ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
SpW(-1) = (W_)ap;
ap->n_args = n_payload;
+ // No write barrier is needed here as this is a new allocation
+ // visible only from our stack
SET_HDR(ap, &stg_AP_info, cap->r.rCCCS)
Sp_subW(1);
goto nextInsn;
@@ -1434,6 +1441,8 @@ run_BCO:
ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
SpW(-1) = (W_)ap;
ap->n_args = n_payload;
+ // No write barrier is needed here as this is a new allocation
+ // visible only from our stack
SET_HDR(ap, &stg_AP_NOUPD_info, cap->r.rCCCS)
Sp_subW(1);
goto nextInsn;
@@ -1447,6 +1456,8 @@ run_BCO:
SpW(-1) = (W_)pap;
pap->n_args = n_payload;
pap->arity = arity;
+ // No write barrier is needed here as this is a new allocation
+ // visible only from our stack
SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS)
Sp_subW(1);
goto nextInsn;
@@ -1522,12 +1533,14 @@ run_BCO:
itbl->layout.payload.nptrs );
StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request);
ASSERT( itbl->layout.payload.ptrs + itbl->layout.payload.nptrs > 0);
- SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS);
for (i = 0; i < n_words; i++) {
con->payload[i] = (StgClosure*)SpW(i);
}
Sp_addW(n_words);
Sp_subW(1);
+ // No write barrier is needed here as this is a new allocation
+ // visible only from our stack
+ SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS);
SpW(0) = (W_)con;
IF_DEBUG(interpreter,
debugBelch("\tBuilt ");
diff --git a/rts/Messages.c b/rts/Messages.c
index 2b13b6306c..d878db5eda 100644
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -173,6 +173,7 @@ uint32_t messageBlackHole(Capability *cap, MessageBlackHole *msg)
"blackhole %p", (W_)msg->tso->id, msg->bh);
info = bh->header.info;
+ load_load_barrier(); // See Note [Heap memory barriers] in SMP.h
// If we got this message in our inbox, it might be that the
// BLACKHOLE has already been updated, and GC has shorted out the
@@ -196,6 +197,7 @@ loop:
// and turns this into an infinite loop.
p = UNTAG_CLOSURE((StgClosure*)VOLATILE_LOAD(&((StgInd*)bh)->indirectee));
info = p->header.info;
+ load_load_barrier(); // See Note [Heap memory barriers] in SMP.h
if (info == &stg_IND_info)
{
@@ -226,7 +228,6 @@ loop:
bq = (StgBlockingQueue*)allocate(cap, sizeofW(StgBlockingQueue));
// initialise the BLOCKING_QUEUE object
- SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM);
bq->bh = bh;
bq->queue = msg;
bq->owner = owner;
@@ -238,6 +239,11 @@ loop:
// a collision to update a BLACKHOLE and a BLOCKING_QUEUE
// becomes orphaned (see updateThunk()).
bq->link = owner->bq;
+ SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM);
+ // We are about to make the newly-constructed message visible to other cores;
+ // a barrier is necessary to ensure that all writes are visible.
+ // See Note [Heap memory barriers] in SMP.h.
+ write_barrier();
owner->bq = bq;
dirty_TSO(cap, owner); // we modified owner->bq
@@ -255,7 +261,7 @@ loop:
}
// point to the BLOCKING_QUEUE from the BLACKHOLE
- write_barrier(); // make the BQ visible
+ write_barrier(); // make the BQ visible, see Note [Heap memory barriers].
((StgInd*)bh)->indirectee = (StgClosure *)bq;
recordClosureMutated(cap,bh); // bh was mutated
@@ -286,10 +292,14 @@ loop:
msg->link = bq->queue;
bq->queue = msg;
+ // No barrier is necessary here: we are only exposing the
+ // closure to the GC. See Note [Heap memory barriers] in SMP.h.
recordClosureMutated(cap,(StgClosure*)msg);
if (info == &stg_BLOCKING_QUEUE_CLEAN_info) {
bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info;
+ // No barrier is necessary here: we are only exposing the
+ // closure to the GC. See Note [Heap memory barriers] in SMP.h.
recordClosureMutated(cap,(StgClosure*)bq);
}
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index d89f0a952b..afb990dda5 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -102,6 +102,7 @@ stg_newPinnedByteArrayzh ( W_ n )
to BA_ALIGN bytes: */
p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK);
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
return (p);
@@ -144,6 +145,7 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
<alignment> is a power of 2, which is technically not guaranteed */
p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1));
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
return (p);
@@ -254,6 +256,7 @@ stg_newArrayzh ( W_ n /* words */, gcptr init )
}
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
@@ -405,6 +408,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
}
TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgSmallMutArrPtrs_ptrs(arr) = n;
@@ -522,6 +526,7 @@ stg_newMutVarzh ( gcptr init )
ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init);
mv = Hp - SIZEOF_StgMutVar + WDS(1);
+ /* No write barrier needed since this is a new allocation. */
SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
StgMutVar_var(mv) = init;
@@ -700,6 +705,7 @@ stg_mkWeakzh ( gcptr key,
ALLOC_PRIM (SIZEOF_StgWeak)
w = Hp - SIZEOF_StgWeak + WDS(1);
+ // No memory barrier needed as this is a new allocation.
SET_HDR(w, stg_WEAK_info, CCCS);
StgWeak_key(w) = key;
@@ -815,6 +821,7 @@ stg_deRefWeakzh ( gcptr w )
gcptr val;
info = GET_INFO(w);
+ prim_read_barrier;
if (info == stg_WHITEHOLE_info) {
// w is locked by another thread. Now it's not immediately clear if w is
@@ -1385,11 +1392,13 @@ stg_readTVarzh (P_ tvar)
stg_readTVarIOzh ( P_ tvar /* :: TVar a */ )
{
- W_ result;
+ W_ result, resultinfo;
again:
result = StgTVar_current_value(tvar);
- if (%INFO_PTR(result) == stg_TREC_HEADER_info) {
+ resultinfo = %INFO_PTR(result);
+ prim_read_barrier;
+ if (resultinfo == stg_TREC_HEADER_info) {
goto again;
}
return (result);
@@ -1458,6 +1467,7 @@ stg_newMVarzh ()
ALLOC_PRIM_ (SIZEOF_StgMVar, stg_newMVarzh);
mvar = Hp - SIZEOF_StgMVar + WDS(1);
+ // No memory barrier needed as this is a new allocation.
SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS);
// MVARs start dirty: generation 0 has no mutable list
StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure;
@@ -1482,7 +1492,7 @@ stg_newMVarzh ()
stg_takeMVarzh ( P_ mvar /* :: MVar a */ )
{
- W_ val, info, tso, q;
+ W_ val, info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
@@ -1504,9 +1514,12 @@ stg_takeMVarzh ( P_ mvar /* :: MVar a */ )
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
- SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
+ SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
+ // Write barrier before we make the new MVAR_TSO_QUEUE
+ // visible to other cores.
+ prim_write_barrier;
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_head(mvar) = q;
@@ -1536,8 +1549,10 @@ loop:
unlockClosure(mvar, info);
return (val);
}
- if (StgHeader_info(q) == stg_IND_info ||
- StgHeader_info(q) == stg_MSG_NULL_info) {
+ qinfo = StgHeader_info(q);
+ prim_read_barrier;
+ if (qinfo == stg_IND_info ||
+ qinfo == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
@@ -1575,7 +1590,7 @@ loop:
stg_tryTakeMVarzh ( P_ mvar /* :: MVar a */ )
{
- W_ val, info, tso, q;
+ W_ val, info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
@@ -1602,8 +1617,11 @@ loop:
return (1, val);
}
- if (StgHeader_info(q) == stg_IND_info ||
- StgHeader_info(q) == stg_MSG_NULL_info) {
+ qinfo = StgHeader_info(q);
+ prim_read_barrier;
+
+ if (qinfo == stg_IND_info ||
+ qinfo == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
@@ -1642,7 +1660,7 @@ loop:
stg_putMVarzh ( P_ mvar, /* :: MVar a */
P_ val, /* :: a */ )
{
- W_ info, tso, q;
+ W_ info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
@@ -1662,10 +1680,12 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */
q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1);
- SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgMVarTSOQueue_link(q) = END_TSO_QUEUE;
StgMVarTSOQueue_tso(q) = CurrentTSO;
+ SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
+ prim_write_barrier;
+
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_head(mvar) = q;
} else {
@@ -1692,8 +1712,12 @@ loop:
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return ();
}
- if (StgHeader_info(q) == stg_IND_info ||
- StgHeader_info(q) == stg_MSG_NULL_info) {
+
+ qinfo = StgHeader_info(q);
+ prim_read_barrier;
+
+ if (qinfo == stg_IND_info ||
+ qinfo == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
@@ -1750,7 +1774,7 @@ loop:
stg_tryPutMVarzh ( P_ mvar, /* :: MVar a */
P_ val, /* :: a */ )
{
- W_ info, tso, q;
+ W_ info, tso, q, qinfo;
LOCK_CLOSURE(mvar, info);
@@ -1773,8 +1797,12 @@ loop:
unlockClosure(mvar, stg_MVAR_DIRTY_info);
return (1);
}
- if (StgHeader_info(q) == stg_IND_info ||
- StgHeader_info(q) == stg_MSG_NULL_info) {
+
+ qinfo = StgHeader_info(q);
+ prim_read_barrier;
+
+ if (qinfo == stg_IND_info ||
+ qinfo == stg_MSG_NULL_info) {
q = StgInd_indirectee(q);
goto loop;
}
@@ -1845,10 +1873,12 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ )
// readMVars are pushed to the front of the queue, so
// they get handled immediately
- SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
StgMVarTSOQueue_link(q) = StgMVar_head(mvar);
StgMVarTSOQueue_tso(q) = CurrentTSO;
+ SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM);
+ prim_write_barrier;
+
StgTSO__link(CurrentTSO) = q;
StgTSO_block_info(CurrentTSO) = mvar;
StgTSO_why_blocked(CurrentTSO) = BlockedOnMVarRead::I16;
@@ -1913,6 +1943,10 @@ stg_makeStableNamezh ( P_ obj )
BYTES_TO_WDS(SIZEOF_StgStableName));
SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS);
StgStableName_sn(sn_obj) = index;
+ // This will make the StableName# object visible to other threads;
+ // be sure that its completely visible to other cores.
+ // See Note [Heap memory barriers] in SMP.h.
+ prim_write_barrier;
snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry) = sn_obj;
} else {
sn_obj = snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry);
@@ -1954,6 +1988,7 @@ stg_newBCOzh ( P_ instrs,
ALLOC_PRIM (bytes);
bco = Hp - bytes + WDS(1);
+ // No memory barrier necessary as this is a new allocation.
SET_HDR(bco, stg_BCO_info, CCS_MAIN);
StgBCO_instrs(bco) = instrs;
@@ -1990,6 +2025,7 @@ stg_mkApUpd0zh ( P_ bco )
CCCS_ALLOC(SIZEOF_StgAP);
ap = Hp - SIZEOF_StgAP + WDS(1);
+ // No memory barrier necessary as this is a new allocation.
SET_HDR(ap, stg_AP_info, CCS_MAIN);
StgAP_n_args(ap) = HALF_W_(0);
@@ -2002,6 +2038,7 @@ stg_unpackClosurezh ( P_ closure )
{
W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr;
info = %GET_STD_INFO(UNTAG(closure));
+ prim_read_barrier;
ptrs = TO_W_(%INFO_PTRS(info));
nptrs = TO_W_(%INFO_NPTRS(info));
@@ -2330,7 +2367,10 @@ stg_noDuplicatezh /* no arg list: explicit stack layout */
stg_getApStackValzh ( P_ ap_stack, W_ offset )
{
- if (%INFO_PTR(UNTAG(ap_stack)) == stg_AP_STACK_info) {
+ W_ ap_stackinfo;
+ ap_stackinfo = %INFO_PTR(UNTAG(ap_stack));
+ prim_read_barrier;
+ if (ap_stackinfo == stg_AP_STACK_info) {
return (1,StgAP_STACK_payload(ap_stack,offset));
} else {
return (0,ap_stack);
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index f58f9177c8..807c3e3d30 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -870,6 +870,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
ap->payload[i] = (StgClosure *)*sp++;
}
+ write_barrier(); // XXX: Necessary?
SET_HDR(ap,&stg_AP_STACK_info,
((StgClosure *)frame)->header.prof.ccs /* ToDo */);
TICK_ALLOC_UP_THK(WDS(words+1),0);
diff --git a/rts/Sparks.c b/rts/Sparks.c
index bd5e120863..4022691da2 100644
--- a/rts/Sparks.c
+++ b/rts/Sparks.c
@@ -182,6 +182,7 @@ pruneSparkQueue (Capability *cap)
traceEventSparkFizzle(cap);
} else {
info = spark->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(info)) {
tmp = (StgClosure*)UN_FORWARDING_PTR(info);
/* if valuable work: shift inside the pool */
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index fdd9f1565e..e80ce45172 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -292,12 +292,14 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE")
TICK_ENT_DYN_IND(); /* tick */
retry:
+ prim_read_barrier;
p = StgInd_indirectee(node);
if (GETTAG(p) != 0) {
return (p);
}
info = StgHeader_info(p);
+ prim_read_barrier;
if (info == stg_IND_info) {
// This could happen, if e.g. we got a BLOCKING_QUEUE that has
// just been replaced with an IND by another thread in
@@ -313,9 +315,11 @@ retry:
("ptr" msg) = ccall allocate(MyCapability() "ptr",
BYTES_TO_WDS(SIZEOF_MessageBlackHole));
- SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM);
MessageBlackHole_tso(msg) = CurrentTSO;
MessageBlackHole_bh(msg) = node;
+ SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM);
+ // messageBlackHole has appropriate memory barriers when this object is exposed.
+ // See Note [Heap memory barriers].
(r) = ccall messageBlackHole(MyCapability() "ptr", msg "ptr");
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index a916891aa8..cccc7ad0b0 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -195,6 +195,7 @@ threadPaused(Capability *cap, StgTSO *tso)
const StgRetInfoTable *info;
const StgInfoTable *bh_info;
const StgInfoTable *cur_bh_info USED_IF_THREADS;
+ const StgInfoTable *frame_info;
StgClosure *bh;
StgPtr stack_end;
uint32_t words_to_squeeze = 0;
@@ -218,6 +219,8 @@ threadPaused(Capability *cap, StgTSO *tso)
frame = (StgClosure *)tso->stackobj->sp;
+ // N.B. We know that the TSO is owned by the current capability so no
+ // memory barriers are needed here.
while ((P_)frame < stack_end) {
info = get_ret_itbl(frame);
@@ -226,7 +229,8 @@ threadPaused(Capability *cap, StgTSO *tso)
case UPDATE_FRAME:
// If we've already marked this frame, then stop here.
- if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
+ frame_info = frame->header.info;
+ if (frame_info == (StgInfoTable *)&stg_marked_upd_frame_info) {
if (prev_was_update_frame) {
words_to_squeeze += sizeofW(StgUpdateFrame);
weight += weight_pending;
diff --git a/rts/Threads.c b/rts/Threads.c
index 977635322d..2bdcea1c00 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -126,6 +126,8 @@ createThread(Capability *cap, W_ size)
ACQUIRE_LOCK(&sched_mutex);
tso->id = next_thread_id++; // while we have the mutex
tso->global_link = g0->threads;
+ /* Mutations above need no memory barrier since this lock will provide
+ * a release barrier */
g0->threads = tso;
RELEASE_LOCK(&sched_mutex);
@@ -257,8 +259,10 @@ tryWakeupThread (Capability *cap, StgTSO *tso)
{
MessageWakeup *msg;
msg = (MessageWakeup *)allocate(cap,sizeofW(MessageWakeup));
- SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM);
msg->tso = tso;
+ SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM);
+ // Ensure that writes constructing Message are committed before sending.
+ write_barrier();
sendMessage(cap, tso->cap, (Message*)msg);
debugTraceCap(DEBUG_sched, cap, "message: try wakeup thread %ld on cap %d",
(W_)tso->id, tso->cap->no);
@@ -363,6 +367,7 @@ wakeBlockingQueue(Capability *cap, StgBlockingQueue *bq)
for (msg = bq->queue; msg != (MessageBlackHole*)END_TSO_QUEUE;
msg = msg->link) {
i = msg->header.info;
+ load_load_barrier();
if (i != &stg_IND_info) {
ASSERT(i == &stg_MSG_BLACKHOLE_info);
tryWakeupThread(cap,msg->tso);
@@ -392,15 +397,18 @@ checkBlockingQueues (Capability *cap, StgTSO *tso)
for (bq = tso->bq; bq != (StgBlockingQueue*)END_TSO_QUEUE; bq = next) {
next = bq->link;
- if (bq->header.info == &stg_IND_info) {
+ const StgInfoTable *bqinfo = bq->header.info;
+ load_load_barrier(); // XXX: Is this needed?
+ if (bqinfo == &stg_IND_info) {
// ToDo: could short it out right here, to avoid
// traversing this IND multiple times.
continue;
}
p = bq->bh;
-
- if (p->header.info != &stg_BLACKHOLE_info ||
+ const StgInfoTable *pinfo = p->header.info;
+ load_load_barrier();
+ if (pinfo != &stg_BLACKHOLE_info ||
((StgInd *)p)->indirectee != (StgClosure*)bq)
{
wakeBlockingQueue(cap,bq);
@@ -424,6 +432,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val)
const StgInfoTable *i;
i = thunk->header.info;
+ load_load_barrier();
if (i != &stg_BLACKHOLE_info &&
i != &stg_CAF_BLACKHOLE_info &&
i != &__stg_EAGER_BLACKHOLE_info &&
@@ -444,6 +453,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val)
}
i = v->header.info;
+ load_load_barrier();
if (i == &stg_TSO_info) {
checkBlockingQueues(cap, tso);
return;
@@ -667,6 +677,8 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
new_stack->sp -= chunk_words;
}
+ // No write barriers needed; all of the writes above are to structured
+ // owned by our capability.
tso->stackobj = new_stack;
// we're about to run it, better mark it dirty
@@ -738,6 +750,7 @@ threadStackUnderflow (Capability *cap, StgTSO *tso)
bool performTryPutMVar(Capability *cap, StgMVar *mvar, StgClosure *value)
{
const StgInfoTable *info;
+ const StgInfoTable *qinfo;
StgMVarTSOQueue *q;
StgTSO *tso;
@@ -762,8 +775,11 @@ loop:
unlockClosure((StgClosure*)mvar, &stg_MVAR_DIRTY_info);
return true;
}
- if (q->header.info == &stg_IND_info ||
- q->header.info == &stg_MSG_NULL_info) {
+
+ qinfo = q->header.info;
+ load_load_barrier();
+ if (qinfo == &stg_IND_info ||
+ qinfo == &stg_MSG_NULL_info) {
q = (StgMVarTSOQueue*)((StgInd*)q)->indirectee;
goto loop;
}
diff --git a/rts/TopHandler.c b/rts/TopHandler.c
index c0ac936b85..d5175015e7 100644
--- a/rts/TopHandler.c
+++ b/rts/TopHandler.c
@@ -29,6 +29,7 @@ StgTSO *getTopHandlerThread(void) {
StgWeak *weak = (StgWeak*)deRefStablePtr(topHandlerPtr);
RELEASE_LOCK(&m);
const StgInfoTable *info = weak->header.info;
+ load_load_barrier();
if (info == &stg_WEAK_info) {
StgClosure *key = ((StgWeak*)weak)->key;
diff --git a/rts/Updates.h b/rts/Updates.h
index 1ba398bd35..1bd3e065af 100644
--- a/rts/Updates.h
+++ b/rts/Updates.h
@@ -39,10 +39,16 @@
PROF_HDR_FIELDS(w_,ccs,p2) \
p_ updatee
-
+/*
+ * Getting the memory barriers correct here is quite tricky. Essentially
+ * the write barrier ensures that any writes to the new indirectee are visible
+ * before we introduce the indirection.
+ * See Note [Heap memory barriers] in SMP.h.
+ */
#define updateWithIndirection(p1, p2, and_then) \
W_ bd; \
\
+ prim_write_barrier; \
OVERWRITING_CLOSURE(p1); \
StgInd_indirectee(p1) = p2; \
prim_write_barrier; \
@@ -69,6 +75,8 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,
ASSERT( (P_)p1 != (P_)p2 );
/* not necessarily true: ASSERT( !closure_IND(p1) ); */
/* occurs in RaiseAsync.c:raiseAsync() */
+ /* See Note [Heap memory barriers] in SMP.h */
+ write_barrier();
OVERWRITING_CLOSURE(p1);
((StgInd *)p1)->indirectee = p2;
write_barrier();
diff --git a/rts/Weak.c b/rts/Weak.c
index a322d822af..ec998c214f 100644
--- a/rts/Weak.c
+++ b/rts/Weak.c
@@ -57,7 +57,9 @@ runAllCFinalizers(StgWeak *list)
// If there's no major GC between the time that the finalizer for the
// object from the oldest generation is manually called and shutdown
// we end up running the same finalizer twice. See #7170.
- if (w->header.info != &stg_DEAD_WEAK_info) {
+ const StgInfoTable *winfo = w->header.info;
+ load_load_barrier();
+ if (winfo != &stg_DEAD_WEAK_info) {
runCFinalizers((StgCFinalizerList *)w->cfinalizers);
}
}
@@ -138,6 +140,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
size = n + mutArrPtrsCardTableSize(n);
arr = (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + size);
TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
+ // No write barrier needed here; this array is only going to referred to by this core.
SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, CCS_SYSTEM);
arr->ptrs = n;
arr->size = size;
diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c
index d0447f867c..0432505cd2 100644
--- a/rts/sm/CNF.c
+++ b/rts/sm/CNF.c
@@ -542,8 +542,9 @@ insertCompactHash (Capability *cap,
StgClosure *p, StgClosure *to)
{
insertHashTable(str->hash, (StgWord)p, (const void*)to);
- if (str->header.info == &stg_COMPACT_NFDATA_CLEAN_info) {
- str->header.info = &stg_COMPACT_NFDATA_DIRTY_info;
+ const StgInfoTable *strinfo = str->header.info;
+ if (strinfo == &stg_COMPACT_NFDATA_CLEAN_info) {
+ strinfo = &stg_COMPACT_NFDATA_DIRTY_info;
recordClosureMutated(cap, (StgClosure*)str);
}
}
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 2a2a26ec09..7c82caa185 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -131,7 +131,7 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
#else
src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to);
*p = TAG_CLOSURE(tag,(StgClosure*)to);
-#endif
+#endif /* defined(PARALLEL_GC) */
#if defined(PROFILING)
// We store the size of the just evacuated object in the LDV word so that
@@ -194,7 +194,7 @@ spin:
if (info == (W_)&stg_WHITEHOLE_info) {
#if defined(PROF_SPIN)
whitehole_gc_spin++;
-#endif
+#endif /* PROF_SPIN */
busy_wait_nop();
goto spin;
}
@@ -205,7 +205,7 @@ spin:
}
#else
info = (W_)src->header.info;
-#endif
+#endif /* PARALLEL_GC */
to = alloc_for_copy(size_to_reserve, gen_no);
@@ -216,8 +216,8 @@ spin:
}
write_barrier();
- src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to);
*p = (StgClosure *)to;
+ src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to);
#if defined(PROFILING)
// We store the size of the just evacuated object in the LDV word so that
@@ -1099,6 +1099,7 @@ selector_chain:
// need the write-barrier stuff.
// - undo the chain we've built to point to p.
SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr);
+ write_barrier();
*q = (StgClosure *)p;
if (evac) evacuate(q);
unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
@@ -1109,7 +1110,7 @@ selector_chain:
// Save the real info pointer (NOTE: not the same as get_itbl()).
info_ptr = (StgWord)p->header.info;
SET_INFO((StgClosure *)p,&stg_WHITEHOLE_info);
-#endif
+#endif /* THREADED_RTS */
field = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr)->layout.selector_offset;
@@ -1165,6 +1166,7 @@ selector_loop:
SET_INFO((StgClosure*)p, (StgInfoTable *)info_ptr);
OVERWRITING_CLOSURE((StgClosure*)p);
SET_INFO((StgClosure*)p, &stg_WHITEHOLE_info);
+ write_barrier();
}
#endif
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 4bf540a4a7..92a5e229a1 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -1104,6 +1104,8 @@ loop:
// scavenge_loop() only exits when there's no work to do
+ // This atomic decrement also serves as a full barrier to ensure that any
+ // writes we made during scavenging are visible to other threads.
#if defined(DEBUG)
r = dec_running();
#else
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index e8ca0c4002..650dc2c1df 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -83,6 +83,7 @@ isAlive(StgClosure *p)
}
info = INFO_PTR_TO_STRUCT(info);
+ load_load_barrier();
switch (info->type) {
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index d5982e2f64..7475b5e625 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -234,16 +234,22 @@ static bool tidyWeakList(generation *gen)
last_w = &gen->old_weak_ptr_list;
for (w = gen->old_weak_ptr_list; w != NULL; w = next_w) {
+ info = w->header.info;
+ /* N.B. This function is executed only during the serial part of GC
+ * so consequently there is no potential for data races and therefore
+ * no need for memory barriers.
+ */
+
/* There might be a DEAD_WEAK on the list if finalizeWeak# was
* called on a live weak pointer object. Just remove it.
*/
- if (w->header.info == &stg_DEAD_WEAK_info) {
+ if (info == &stg_DEAD_WEAK_info) {
next_w = w->link;
*last_w = next_w;
continue;
}
- info = get_itbl((StgClosure *)w);
+ info = INFO_PTR_TO_STRUCT(info);
switch (info->type) {
case WEAK:
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 8082b7e6d0..ff76f747c9 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -233,6 +233,7 @@ checkClosure( const StgClosure* p )
p = UNTAG_CONST_CLOSURE(p);
info = p->header.info;
+ load_load_barrier();
if (IS_FORWARDING_PTR(info)) {
barf("checkClosure: found EVACUATED closure %d", info->type);
@@ -243,6 +244,7 @@ checkClosure( const StgClosure* p )
#endif
info = INFO_PTR_TO_STRUCT(info);
+ load_load_barrier();
switch (info->type) {
@@ -564,6 +566,7 @@ checkTSO(StgTSO *tso)
next = tso->_link;
info = (const StgInfoTable*) tso->_link->header.info;
+ load_load_barrier();
ASSERT(next == END_TSO_QUEUE ||
info == &stg_MVAR_TSO_QUEUE_info ||
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 73a790e9ed..c486cd96c5 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -431,7 +431,7 @@ scavenge_block (bdescr *bd)
// time around the loop.
while (p < bd->free || (bd == ws->todo_bd && p < ws->todo_free)) {
- ASSERT(bd->link == NULL);
+ ASSERT(bd->link == NULL);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
info = get_itbl((StgClosure *)p);
@@ -1580,6 +1580,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
#if defined(DEBUG)
+ const StgInfoTable *pinfo;
switch (get_itbl((StgClosure *)p)->type) {
case MUT_VAR_CLEAN:
// can happen due to concurrent writeMutVars
@@ -1599,9 +1600,10 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
case TREC_CHUNK:
mutlist_TREC_CHUNK++; break;
case MUT_PRIM:
- if (((StgClosure*)p)->header.info == &stg_TVAR_WATCH_QUEUE_info)
+ pinfo = ((StgClosure*)p)->header.info;
+ if (pinfo == &stg_TVAR_WATCH_QUEUE_info)
mutlist_TVAR_WATCH_QUEUE++;
- else if (((StgClosure*)p)->header.info == &stg_TREC_HEADER_info)
+ else if (pinfo == &stg_TREC_HEADER_info)
mutlist_TREC_HEADER++;
else
mutlist_OTHERS++;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 2e03b77695..3f91905f3c 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -407,8 +407,10 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf)
// Allocate the blackhole indirection closure
bh = (StgInd *)allocate(cap, sizeofW(*bh));
- SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
+ SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
+ // Ensure that above writes are visible before we introduce reference as CAF indirectee.
+ write_barrier();
caf->indirectee = (StgClosure *)bh;
write_barrier();
@@ -1081,6 +1083,8 @@ void
dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
{
Capability *cap = regTableToCapability(reg);
+ // No barrier required here as no other heap object fields are read. See
+ // note [Heap memory barriers] in SMP.h.
if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
p->header.info = &stg_MUT_VAR_DIRTY_info;
recordClosureMutated(cap,p);
@@ -1090,6 +1094,8 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
void
dirty_TVAR(Capability *cap, StgTVar *p)
{
+ // No barrier required here as no other heap object fields are read. See
+ // note [Heap memory barriers] in SMP.h.
if (p->header.info == &stg_TVAR_CLEAN_info) {
p->header.info = &stg_TVAR_DIRTY_info;
recordClosureMutated(cap,(StgClosure*)p);