diff options
Diffstat (limited to 'rts')
-rw-r--r-- | rts/Apply.cmm | 2 | ||||
-rw-r--r-- | rts/Compact.cmm | 3 | ||||
-rw-r--r-- | rts/Interpreter.c | 23 | ||||
-rw-r--r-- | rts/Messages.c | 14 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 76 | ||||
-rw-r--r-- | rts/RaiseAsync.c | 1 | ||||
-rw-r--r-- | rts/Sparks.c | 1 | ||||
-rw-r--r-- | rts/StgMiscClosures.cmm | 6 | ||||
-rw-r--r-- | rts/ThreadPaused.c | 6 | ||||
-rw-r--r-- | rts/Threads.c | 28 | ||||
-rw-r--r-- | rts/TopHandler.c | 1 | ||||
-rw-r--r-- | rts/Updates.h | 10 | ||||
-rw-r--r-- | rts/Weak.c | 5 | ||||
-rw-r--r-- | rts/sm/CNF.c | 5 | ||||
-rw-r--r-- | rts/sm/Evac.c | 12 | ||||
-rw-r--r-- | rts/sm/GC.c | 2 | ||||
-rw-r--r-- | rts/sm/GCAux.c | 1 | ||||
-rw-r--r-- | rts/sm/MarkWeak.c | 10 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 3 | ||||
-rw-r--r-- | rts/sm/Scav.c | 8 | ||||
-rw-r--r-- | rts/sm/Storage.c | 8 |
21 files changed, 177 insertions, 48 deletions
diff --git a/rts/Apply.cmm b/rts/Apply.cmm index 13eb135412..8d7fc3c012 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -62,6 +62,8 @@ again: W_ info; P_ untaggedfun; W_ arity; + // We must obey the correct heap object observation pattern in + // Note [Heap memory barriers] in SMP.h. untaggedfun = UNTAG(fun); info = %INFO_PTR(untaggedfun); switch [INVALID_OBJECT .. N_CLOSURE_TYPES] diff --git a/rts/Compact.cmm b/rts/Compact.cmm index 061646846d..bae94a03cd 100644 --- a/rts/Compact.cmm +++ b/rts/Compact.cmm @@ -53,6 +53,9 @@ import CLOSURE base_GHCziIOziException_cannotCompactPinned_closure; // data structure. It takes the location to store the address of the // compacted object as an argument, so that it can be tail-recursive. // +// N.B. No memory barrier (see Note [Heap memory barriers] in SMP.h) is needed +// here since this is essentially an allocation of a new object which won't +// be visible to other cores until after we return. stg_compactAddWorkerzh ( P_ compact, // The Compact# object P_ p, // The object to compact diff --git a/rts/Interpreter.c b/rts/Interpreter.c index e4b9d5696e..2a886ff8a4 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -266,7 +266,6 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap) uint32_t size = PAP_sizeW(oldpap->n_args); StgPAP *pap = (StgPAP *)allocate(cap, size); enterFunCCS(&cap->r, oldpap->header.prof.ccs); - SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS); pap->arity = oldpap->arity; pap->n_args = oldpap->n_args; pap->fun = oldpap->fun; @@ -274,6 +273,8 @@ StgClosure * copyPAP (Capability *cap, StgPAP *oldpap) for (i = 0; i < ((StgPAP *)pap)->n_args; i++) { pap->payload[i] = oldpap->payload[i]; } + // No write barrier is needed here as this is a new allocation + SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS); return (StgClosure *)pap; } @@ -799,7 +800,6 @@ do_apply: // build a new PAP and return it. StgPAP *new_pap; new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m)); - SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS); new_pap->arity = pap->arity - n; new_pap->n_args = pap->n_args + m; new_pap->fun = pap->fun; @@ -809,6 +809,8 @@ do_apply: for (i = 0; i < m; i++) { new_pap->payload[pap->n_args + i] = (StgClosure *)SpW(i); } + // No write barrier is needed here as this is a new allocation + SET_HDR(new_pap,&stg_PAP_info,cap->r.rCCCS); tagged_obj = (StgClosure *)new_pap; Sp_addW(m); goto do_return; @@ -844,13 +846,14 @@ do_apply: StgPAP *pap; uint32_t i; pap = (StgPAP *)allocate(cap, PAP_sizeW(m)); - SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS); pap->arity = arity - n; pap->fun = obj; pap->n_args = m; for (i = 0; i < m; i++) { pap->payload[i] = (StgClosure *)SpW(i); } + // No write barrier is needed here as this is a new allocation + SET_HDR(pap, &stg_PAP_info,cap->r.rCCCS); tagged_obj = (StgClosure *)pap; Sp_addW(m); goto do_return; @@ -1081,7 +1084,6 @@ run_BCO: // the BCO size_words = BCO_BITMAP_SIZE(obj) + 2; new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words)); - SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS); new_aps->size = size_words; new_aps->fun = &stg_dummy_ret_closure; @@ -1095,6 +1097,9 @@ run_BCO: new_aps->payload[i] = (StgClosure *)SpW(i-2); } + // No write barrier is needed here as this is a new allocation + SET_HDR(new_aps,&stg_AP_STACK_info,cap->r.rCCCS); + // Arrange the stack to call the breakpoint IO action, and // continue execution of this BCO when the IO action returns. // @@ -1423,6 +1428,8 @@ run_BCO: ap = (StgAP*)allocate(cap, AP_sizeW(n_payload)); SpW(-1) = (W_)ap; ap->n_args = n_payload; + // No write barrier is needed here as this is a new allocation + // visible only from our stack SET_HDR(ap, &stg_AP_info, cap->r.rCCCS) Sp_subW(1); goto nextInsn; @@ -1434,6 +1441,8 @@ run_BCO: ap = (StgAP*)allocate(cap, AP_sizeW(n_payload)); SpW(-1) = (W_)ap; ap->n_args = n_payload; + // No write barrier is needed here as this is a new allocation + // visible only from our stack SET_HDR(ap, &stg_AP_NOUPD_info, cap->r.rCCCS) Sp_subW(1); goto nextInsn; @@ -1447,6 +1456,8 @@ run_BCO: SpW(-1) = (W_)pap; pap->n_args = n_payload; pap->arity = arity; + // No write barrier is needed here as this is a new allocation + // visible only from our stack SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS) Sp_subW(1); goto nextInsn; @@ -1522,12 +1533,14 @@ run_BCO: itbl->layout.payload.nptrs ); StgClosure* con = (StgClosure*)allocate_NONUPD(cap,request); ASSERT( itbl->layout.payload.ptrs + itbl->layout.payload.nptrs > 0); - SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS); for (i = 0; i < n_words; i++) { con->payload[i] = (StgClosure*)SpW(i); } Sp_addW(n_words); Sp_subW(1); + // No write barrier is needed here as this is a new allocation + // visible only from our stack + SET_HDR(con, (StgInfoTable*)BCO_LIT(o_itbl), cap->r.rCCCS); SpW(0) = (W_)con; IF_DEBUG(interpreter, debugBelch("\tBuilt "); diff --git a/rts/Messages.c b/rts/Messages.c index 2b13b6306c..d878db5eda 100644 --- a/rts/Messages.c +++ b/rts/Messages.c @@ -173,6 +173,7 @@ uint32_t messageBlackHole(Capability *cap, MessageBlackHole *msg) "blackhole %p", (W_)msg->tso->id, msg->bh); info = bh->header.info; + load_load_barrier(); // See Note [Heap memory barriers] in SMP.h // If we got this message in our inbox, it might be that the // BLACKHOLE has already been updated, and GC has shorted out the @@ -196,6 +197,7 @@ loop: // and turns this into an infinite loop. p = UNTAG_CLOSURE((StgClosure*)VOLATILE_LOAD(&((StgInd*)bh)->indirectee)); info = p->header.info; + load_load_barrier(); // See Note [Heap memory barriers] in SMP.h if (info == &stg_IND_info) { @@ -226,7 +228,6 @@ loop: bq = (StgBlockingQueue*)allocate(cap, sizeofW(StgBlockingQueue)); // initialise the BLOCKING_QUEUE object - SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM); bq->bh = bh; bq->queue = msg; bq->owner = owner; @@ -238,6 +239,11 @@ loop: // a collision to update a BLACKHOLE and a BLOCKING_QUEUE // becomes orphaned (see updateThunk()). bq->link = owner->bq; + SET_HDR(bq, &stg_BLOCKING_QUEUE_DIRTY_info, CCS_SYSTEM); + // We are about to make the newly-constructed message visible to other cores; + // a barrier is necessary to ensure that all writes are visible. + // See Note [Heap memory barriers] in SMP.h. + write_barrier(); owner->bq = bq; dirty_TSO(cap, owner); // we modified owner->bq @@ -255,7 +261,7 @@ loop: } // point to the BLOCKING_QUEUE from the BLACKHOLE - write_barrier(); // make the BQ visible + write_barrier(); // make the BQ visible, see Note [Heap memory barriers]. ((StgInd*)bh)->indirectee = (StgClosure *)bq; recordClosureMutated(cap,bh); // bh was mutated @@ -286,10 +292,14 @@ loop: msg->link = bq->queue; bq->queue = msg; + // No barrier is necessary here: we are only exposing the + // closure to the GC. See Note [Heap memory barriers] in SMP.h. recordClosureMutated(cap,(StgClosure*)msg); if (info == &stg_BLOCKING_QUEUE_CLEAN_info) { bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; + // No barrier is necessary here: we are only exposing the + // closure to the GC. See Note [Heap memory barriers] in SMP.h. recordClosureMutated(cap,(StgClosure*)bq); } diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index d89f0a952b..afb990dda5 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -102,6 +102,7 @@ stg_newPinnedByteArrayzh ( W_ n ) to BA_ALIGN bytes: */ p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK); + /* No write barrier needed since this is a new allocation. */ SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrBytes_bytes(p) = n; return (p); @@ -144,6 +145,7 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment ) <alignment> is a power of 2, which is technically not guaranteed */ p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1)); + /* No write barrier needed since this is a new allocation. */ SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrBytes_bytes(p) = n; return (p); @@ -254,6 +256,7 @@ stg_newArrayzh ( W_ n /* words */, gcptr init ) } TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); + /* No write barrier needed since this is a new allocation. */ SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); StgMutArrPtrs_ptrs(arr) = n; StgMutArrPtrs_size(arr) = size; @@ -405,6 +408,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init ) } TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); + /* No write barrier needed since this is a new allocation. */ SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS); StgSmallMutArrPtrs_ptrs(arr) = n; @@ -522,6 +526,7 @@ stg_newMutVarzh ( gcptr init ) ALLOC_PRIM_P (SIZEOF_StgMutVar, stg_newMutVarzh, init); mv = Hp - SIZEOF_StgMutVar + WDS(1); + /* No write barrier needed since this is a new allocation. */ SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS); StgMutVar_var(mv) = init; @@ -700,6 +705,7 @@ stg_mkWeakzh ( gcptr key, ALLOC_PRIM (SIZEOF_StgWeak) w = Hp - SIZEOF_StgWeak + WDS(1); + // No memory barrier needed as this is a new allocation. SET_HDR(w, stg_WEAK_info, CCCS); StgWeak_key(w) = key; @@ -815,6 +821,7 @@ stg_deRefWeakzh ( gcptr w ) gcptr val; info = GET_INFO(w); + prim_read_barrier; if (info == stg_WHITEHOLE_info) { // w is locked by another thread. Now it's not immediately clear if w is @@ -1385,11 +1392,13 @@ stg_readTVarzh (P_ tvar) stg_readTVarIOzh ( P_ tvar /* :: TVar a */ ) { - W_ result; + W_ result, resultinfo; again: result = StgTVar_current_value(tvar); - if (%INFO_PTR(result) == stg_TREC_HEADER_info) { + resultinfo = %INFO_PTR(result); + prim_read_barrier; + if (resultinfo == stg_TREC_HEADER_info) { goto again; } return (result); @@ -1458,6 +1467,7 @@ stg_newMVarzh () ALLOC_PRIM_ (SIZEOF_StgMVar, stg_newMVarzh); mvar = Hp - SIZEOF_StgMVar + WDS(1); + // No memory barrier needed as this is a new allocation. SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS); // MVARs start dirty: generation 0 has no mutable list StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; @@ -1482,7 +1492,7 @@ stg_newMVarzh () stg_takeMVarzh ( P_ mvar /* :: MVar a */ ) { - W_ val, info, tso, q; + W_ val, info, tso, q, qinfo; LOCK_CLOSURE(mvar, info); @@ -1504,9 +1514,12 @@ stg_takeMVarzh ( P_ mvar /* :: MVar a */ ) q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); - SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; StgMVarTSOQueue_tso(q) = CurrentTSO; + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + // Write barrier before we make the new MVAR_TSO_QUEUE + // visible to other cores. + prim_write_barrier; if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = q; @@ -1536,8 +1549,10 @@ loop: unlockClosure(mvar, info); return (val); } - if (StgHeader_info(q) == stg_IND_info || - StgHeader_info(q) == stg_MSG_NULL_info) { + qinfo = StgHeader_info(q); + prim_read_barrier; + if (qinfo == stg_IND_info || + qinfo == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } @@ -1575,7 +1590,7 @@ loop: stg_tryTakeMVarzh ( P_ mvar /* :: MVar a */ ) { - W_ val, info, tso, q; + W_ val, info, tso, q, qinfo; LOCK_CLOSURE(mvar, info); @@ -1602,8 +1617,11 @@ loop: return (1, val); } - if (StgHeader_info(q) == stg_IND_info || - StgHeader_info(q) == stg_MSG_NULL_info) { + qinfo = StgHeader_info(q); + prim_read_barrier; + + if (qinfo == stg_IND_info || + qinfo == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } @@ -1642,7 +1660,7 @@ loop: stg_putMVarzh ( P_ mvar, /* :: MVar a */ P_ val, /* :: a */ ) { - W_ info, tso, q; + W_ info, tso, q, qinfo; LOCK_CLOSURE(mvar, info); @@ -1662,10 +1680,12 @@ stg_putMVarzh ( P_ mvar, /* :: MVar a */ q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); - SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = END_TSO_QUEUE; StgMVarTSOQueue_tso(q) = CurrentTSO; + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + prim_write_barrier; + if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_head(mvar) = q; } else { @@ -1692,8 +1712,12 @@ loop: unlockClosure(mvar, stg_MVAR_DIRTY_info); return (); } - if (StgHeader_info(q) == stg_IND_info || - StgHeader_info(q) == stg_MSG_NULL_info) { + + qinfo = StgHeader_info(q); + prim_read_barrier; + + if (qinfo == stg_IND_info || + qinfo == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } @@ -1750,7 +1774,7 @@ loop: stg_tryPutMVarzh ( P_ mvar, /* :: MVar a */ P_ val, /* :: a */ ) { - W_ info, tso, q; + W_ info, tso, q, qinfo; LOCK_CLOSURE(mvar, info); @@ -1773,8 +1797,12 @@ loop: unlockClosure(mvar, stg_MVAR_DIRTY_info); return (1); } - if (StgHeader_info(q) == stg_IND_info || - StgHeader_info(q) == stg_MSG_NULL_info) { + + qinfo = StgHeader_info(q); + prim_read_barrier; + + if (qinfo == stg_IND_info || + qinfo == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } @@ -1845,10 +1873,12 @@ stg_readMVarzh ( P_ mvar, /* :: MVar a */ ) // readMVars are pushed to the front of the queue, so // they get handled immediately - SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); StgMVarTSOQueue_link(q) = StgMVar_head(mvar); StgMVarTSOQueue_tso(q) = CurrentTSO; + SET_HDR(q, stg_MVAR_TSO_QUEUE_info, CCS_SYSTEM); + prim_write_barrier; + StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVarRead::I16; @@ -1913,6 +1943,10 @@ stg_makeStableNamezh ( P_ obj ) BYTES_TO_WDS(SIZEOF_StgStableName)); SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS); StgStableName_sn(sn_obj) = index; + // This will make the StableName# object visible to other threads; + // be sure that its completely visible to other cores. + // See Note [Heap memory barriers] in SMP.h. + prim_write_barrier; snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry) = sn_obj; } else { sn_obj = snEntry_sn_obj(W_[stable_name_table] + index*SIZEOF_snEntry); @@ -1954,6 +1988,7 @@ stg_newBCOzh ( P_ instrs, ALLOC_PRIM (bytes); bco = Hp - bytes + WDS(1); + // No memory barrier necessary as this is a new allocation. SET_HDR(bco, stg_BCO_info, CCS_MAIN); StgBCO_instrs(bco) = instrs; @@ -1990,6 +2025,7 @@ stg_mkApUpd0zh ( P_ bco ) CCCS_ALLOC(SIZEOF_StgAP); ap = Hp - SIZEOF_StgAP + WDS(1); + // No memory barrier necessary as this is a new allocation. SET_HDR(ap, stg_AP_info, CCS_MAIN); StgAP_n_args(ap) = HALF_W_(0); @@ -2002,6 +2038,7 @@ stg_unpackClosurezh ( P_ closure ) { W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr; info = %GET_STD_INFO(UNTAG(closure)); + prim_read_barrier; ptrs = TO_W_(%INFO_PTRS(info)); nptrs = TO_W_(%INFO_NPTRS(info)); @@ -2330,7 +2367,10 @@ stg_noDuplicatezh /* no arg list: explicit stack layout */ stg_getApStackValzh ( P_ ap_stack, W_ offset ) { - if (%INFO_PTR(UNTAG(ap_stack)) == stg_AP_STACK_info) { + W_ ap_stackinfo; + ap_stackinfo = %INFO_PTR(UNTAG(ap_stack)); + prim_read_barrier; + if (ap_stackinfo == stg_AP_STACK_info) { return (1,StgAP_STACK_payload(ap_stack,offset)); } else { return (0,ap_stack); diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index f58f9177c8..807c3e3d30 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -870,6 +870,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, ap->payload[i] = (StgClosure *)*sp++; } + write_barrier(); // XXX: Necessary? SET_HDR(ap,&stg_AP_STACK_info, ((StgClosure *)frame)->header.prof.ccs /* ToDo */); TICK_ALLOC_UP_THK(WDS(words+1),0); diff --git a/rts/Sparks.c b/rts/Sparks.c index bd5e120863..4022691da2 100644 --- a/rts/Sparks.c +++ b/rts/Sparks.c @@ -182,6 +182,7 @@ pruneSparkQueue (Capability *cap) traceEventSparkFizzle(cap); } else { info = spark->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(info)) { tmp = (StgClosure*)UN_FORWARDING_PTR(info); /* if valuable work: shift inside the pool */ diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index fdd9f1565e..e80ce45172 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -292,12 +292,14 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") TICK_ENT_DYN_IND(); /* tick */ retry: + prim_read_barrier; p = StgInd_indirectee(node); if (GETTAG(p) != 0) { return (p); } info = StgHeader_info(p); + prim_read_barrier; if (info == stg_IND_info) { // This could happen, if e.g. we got a BLOCKING_QUEUE that has // just been replaced with an IND by another thread in @@ -313,9 +315,11 @@ retry: ("ptr" msg) = ccall allocate(MyCapability() "ptr", BYTES_TO_WDS(SIZEOF_MessageBlackHole)); - SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM); MessageBlackHole_tso(msg) = CurrentTSO; MessageBlackHole_bh(msg) = node; + SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM); + // messageBlackHole has appropriate memory barriers when this object is exposed. + // See Note [Heap memory barriers]. (r) = ccall messageBlackHole(MyCapability() "ptr", msg "ptr"); diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index a916891aa8..cccc7ad0b0 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -195,6 +195,7 @@ threadPaused(Capability *cap, StgTSO *tso) const StgRetInfoTable *info; const StgInfoTable *bh_info; const StgInfoTable *cur_bh_info USED_IF_THREADS; + const StgInfoTable *frame_info; StgClosure *bh; StgPtr stack_end; uint32_t words_to_squeeze = 0; @@ -218,6 +219,8 @@ threadPaused(Capability *cap, StgTSO *tso) frame = (StgClosure *)tso->stackobj->sp; + // N.B. We know that the TSO is owned by the current capability so no + // memory barriers are needed here. while ((P_)frame < stack_end) { info = get_ret_itbl(frame); @@ -226,7 +229,8 @@ threadPaused(Capability *cap, StgTSO *tso) case UPDATE_FRAME: // If we've already marked this frame, then stop here. - if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { + frame_info = frame->header.info; + if (frame_info == (StgInfoTable *)&stg_marked_upd_frame_info) { if (prev_was_update_frame) { words_to_squeeze += sizeofW(StgUpdateFrame); weight += weight_pending; diff --git a/rts/Threads.c b/rts/Threads.c index 977635322d..2bdcea1c00 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -126,6 +126,8 @@ createThread(Capability *cap, W_ size) ACQUIRE_LOCK(&sched_mutex); tso->id = next_thread_id++; // while we have the mutex tso->global_link = g0->threads; + /* Mutations above need no memory barrier since this lock will provide + * a release barrier */ g0->threads = tso; RELEASE_LOCK(&sched_mutex); @@ -257,8 +259,10 @@ tryWakeupThread (Capability *cap, StgTSO *tso) { MessageWakeup *msg; msg = (MessageWakeup *)allocate(cap,sizeofW(MessageWakeup)); - SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM); msg->tso = tso; + SET_HDR(msg, &stg_MSG_TRY_WAKEUP_info, CCS_SYSTEM); + // Ensure that writes constructing Message are committed before sending. + write_barrier(); sendMessage(cap, tso->cap, (Message*)msg); debugTraceCap(DEBUG_sched, cap, "message: try wakeup thread %ld on cap %d", (W_)tso->id, tso->cap->no); @@ -363,6 +367,7 @@ wakeBlockingQueue(Capability *cap, StgBlockingQueue *bq) for (msg = bq->queue; msg != (MessageBlackHole*)END_TSO_QUEUE; msg = msg->link) { i = msg->header.info; + load_load_barrier(); if (i != &stg_IND_info) { ASSERT(i == &stg_MSG_BLACKHOLE_info); tryWakeupThread(cap,msg->tso); @@ -392,15 +397,18 @@ checkBlockingQueues (Capability *cap, StgTSO *tso) for (bq = tso->bq; bq != (StgBlockingQueue*)END_TSO_QUEUE; bq = next) { next = bq->link; - if (bq->header.info == &stg_IND_info) { + const StgInfoTable *bqinfo = bq->header.info; + load_load_barrier(); // XXX: Is this needed? + if (bqinfo == &stg_IND_info) { // ToDo: could short it out right here, to avoid // traversing this IND multiple times. continue; } p = bq->bh; - - if (p->header.info != &stg_BLACKHOLE_info || + const StgInfoTable *pinfo = p->header.info; + load_load_barrier(); + if (pinfo != &stg_BLACKHOLE_info || ((StgInd *)p)->indirectee != (StgClosure*)bq) { wakeBlockingQueue(cap,bq); @@ -424,6 +432,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val) const StgInfoTable *i; i = thunk->header.info; + load_load_barrier(); if (i != &stg_BLACKHOLE_info && i != &stg_CAF_BLACKHOLE_info && i != &__stg_EAGER_BLACKHOLE_info && @@ -444,6 +453,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val) } i = v->header.info; + load_load_barrier(); if (i == &stg_TSO_info) { checkBlockingQueues(cap, tso); return; @@ -667,6 +677,8 @@ threadStackOverflow (Capability *cap, StgTSO *tso) new_stack->sp -= chunk_words; } + // No write barriers needed; all of the writes above are to structured + // owned by our capability. tso->stackobj = new_stack; // we're about to run it, better mark it dirty @@ -738,6 +750,7 @@ threadStackUnderflow (Capability *cap, StgTSO *tso) bool performTryPutMVar(Capability *cap, StgMVar *mvar, StgClosure *value) { const StgInfoTable *info; + const StgInfoTable *qinfo; StgMVarTSOQueue *q; StgTSO *tso; @@ -762,8 +775,11 @@ loop: unlockClosure((StgClosure*)mvar, &stg_MVAR_DIRTY_info); return true; } - if (q->header.info == &stg_IND_info || - q->header.info == &stg_MSG_NULL_info) { + + qinfo = q->header.info; + load_load_barrier(); + if (qinfo == &stg_IND_info || + qinfo == &stg_MSG_NULL_info) { q = (StgMVarTSOQueue*)((StgInd*)q)->indirectee; goto loop; } diff --git a/rts/TopHandler.c b/rts/TopHandler.c index c0ac936b85..d5175015e7 100644 --- a/rts/TopHandler.c +++ b/rts/TopHandler.c @@ -29,6 +29,7 @@ StgTSO *getTopHandlerThread(void) { StgWeak *weak = (StgWeak*)deRefStablePtr(topHandlerPtr); RELEASE_LOCK(&m); const StgInfoTable *info = weak->header.info; + load_load_barrier(); if (info == &stg_WEAK_info) { StgClosure *key = ((StgWeak*)weak)->key; diff --git a/rts/Updates.h b/rts/Updates.h index 1ba398bd35..1bd3e065af 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -39,10 +39,16 @@ PROF_HDR_FIELDS(w_,ccs,p2) \ p_ updatee - +/* + * Getting the memory barriers correct here is quite tricky. Essentially + * the write barrier ensures that any writes to the new indirectee are visible + * before we introduce the indirection. + * See Note [Heap memory barriers] in SMP.h. + */ #define updateWithIndirection(p1, p2, and_then) \ W_ bd; \ \ + prim_write_barrier; \ OVERWRITING_CLOSURE(p1); \ StgInd_indirectee(p1) = p2; \ prim_write_barrier; \ @@ -69,6 +75,8 @@ INLINE_HEADER void updateWithIndirection (Capability *cap, ASSERT( (P_)p1 != (P_)p2 ); /* not necessarily true: ASSERT( !closure_IND(p1) ); */ /* occurs in RaiseAsync.c:raiseAsync() */ + /* See Note [Heap memory barriers] in SMP.h */ + write_barrier(); OVERWRITING_CLOSURE(p1); ((StgInd *)p1)->indirectee = p2; write_barrier(); diff --git a/rts/Weak.c b/rts/Weak.c index a322d822af..ec998c214f 100644 --- a/rts/Weak.c +++ b/rts/Weak.c @@ -57,7 +57,9 @@ runAllCFinalizers(StgWeak *list) // If there's no major GC between the time that the finalizer for the // object from the oldest generation is manually called and shutdown // we end up running the same finalizer twice. See #7170. - if (w->header.info != &stg_DEAD_WEAK_info) { + const StgInfoTable *winfo = w->header.info; + load_load_barrier(); + if (winfo != &stg_DEAD_WEAK_info) { runCFinalizers((StgCFinalizerList *)w->cfinalizers); } } @@ -138,6 +140,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list) size = n + mutArrPtrsCardTableSize(n); arr = (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + size); TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0); + // No write barrier needed here; this array is only going to referred to by this core. SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, CCS_SYSTEM); arr->ptrs = n; arr->size = size; diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c index d0447f867c..0432505cd2 100644 --- a/rts/sm/CNF.c +++ b/rts/sm/CNF.c @@ -542,8 +542,9 @@ insertCompactHash (Capability *cap, StgClosure *p, StgClosure *to) { insertHashTable(str->hash, (StgWord)p, (const void*)to); - if (str->header.info == &stg_COMPACT_NFDATA_CLEAN_info) { - str->header.info = &stg_COMPACT_NFDATA_DIRTY_info; + const StgInfoTable *strinfo = str->header.info; + if (strinfo == &stg_COMPACT_NFDATA_CLEAN_info) { + strinfo = &stg_COMPACT_NFDATA_DIRTY_info; recordClosureMutated(cap, (StgClosure*)str); } } diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 2a2a26ec09..7c82caa185 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -131,7 +131,7 @@ copy_tag(StgClosure **p, const StgInfoTable *info, #else src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to); *p = TAG_CLOSURE(tag,(StgClosure*)to); -#endif +#endif /* defined(PARALLEL_GC) */ #if defined(PROFILING) // We store the size of the just evacuated object in the LDV word so that @@ -194,7 +194,7 @@ spin: if (info == (W_)&stg_WHITEHOLE_info) { #if defined(PROF_SPIN) whitehole_gc_spin++; -#endif +#endif /* PROF_SPIN */ busy_wait_nop(); goto spin; } @@ -205,7 +205,7 @@ spin: } #else info = (W_)src->header.info; -#endif +#endif /* PARALLEL_GC */ to = alloc_for_copy(size_to_reserve, gen_no); @@ -216,8 +216,8 @@ spin: } write_barrier(); - src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to); *p = (StgClosure *)to; + src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to); #if defined(PROFILING) // We store the size of the just evacuated object in the LDV word so that @@ -1099,6 +1099,7 @@ selector_chain: // need the write-barrier stuff. // - undo the chain we've built to point to p. SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr); + write_barrier(); *q = (StgClosure *)p; if (evac) evacuate(q); unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); @@ -1109,7 +1110,7 @@ selector_chain: // Save the real info pointer (NOTE: not the same as get_itbl()). info_ptr = (StgWord)p->header.info; SET_INFO((StgClosure *)p,&stg_WHITEHOLE_info); -#endif +#endif /* THREADED_RTS */ field = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr)->layout.selector_offset; @@ -1165,6 +1166,7 @@ selector_loop: SET_INFO((StgClosure*)p, (StgInfoTable *)info_ptr); OVERWRITING_CLOSURE((StgClosure*)p); SET_INFO((StgClosure*)p, &stg_WHITEHOLE_info); + write_barrier(); } #endif diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 4bf540a4a7..92a5e229a1 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1104,6 +1104,8 @@ loop: // scavenge_loop() only exits when there's no work to do + // This atomic decrement also serves as a full barrier to ensure that any + // writes we made during scavenging are visible to other threads. #if defined(DEBUG) r = dec_running(); #else diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index e8ca0c4002..650dc2c1df 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -83,6 +83,7 @@ isAlive(StgClosure *p) } info = INFO_PTR_TO_STRUCT(info); + load_load_barrier(); switch (info->type) { diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index d5982e2f64..7475b5e625 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -234,16 +234,22 @@ static bool tidyWeakList(generation *gen) last_w = &gen->old_weak_ptr_list; for (w = gen->old_weak_ptr_list; w != NULL; w = next_w) { + info = w->header.info; + /* N.B. This function is executed only during the serial part of GC + * so consequently there is no potential for data races and therefore + * no need for memory barriers. + */ + /* There might be a DEAD_WEAK on the list if finalizeWeak# was * called on a live weak pointer object. Just remove it. */ - if (w->header.info == &stg_DEAD_WEAK_info) { + if (info == &stg_DEAD_WEAK_info) { next_w = w->link; *last_w = next_w; continue; } - info = get_itbl((StgClosure *)w); + info = INFO_PTR_TO_STRUCT(info); switch (info->type) { case WEAK: diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 8082b7e6d0..ff76f747c9 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -233,6 +233,7 @@ checkClosure( const StgClosure* p ) p = UNTAG_CONST_CLOSURE(p); info = p->header.info; + load_load_barrier(); if (IS_FORWARDING_PTR(info)) { barf("checkClosure: found EVACUATED closure %d", info->type); @@ -243,6 +244,7 @@ checkClosure( const StgClosure* p ) #endif info = INFO_PTR_TO_STRUCT(info); + load_load_barrier(); switch (info->type) { @@ -564,6 +566,7 @@ checkTSO(StgTSO *tso) next = tso->_link; info = (const StgInfoTable*) tso->_link->header.info; + load_load_barrier(); ASSERT(next == END_TSO_QUEUE || info == &stg_MVAR_TSO_QUEUE_info || diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 73a790e9ed..c486cd96c5 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -431,7 +431,7 @@ scavenge_block (bdescr *bd) // time around the loop. while (p < bd->free || (bd == ws->todo_bd && p < ws->todo_free)) { - ASSERT(bd->link == NULL); + ASSERT(bd->link == NULL); ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl((StgClosure *)p); @@ -1580,6 +1580,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen) ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); #if defined(DEBUG) + const StgInfoTable *pinfo; switch (get_itbl((StgClosure *)p)->type) { case MUT_VAR_CLEAN: // can happen due to concurrent writeMutVars @@ -1599,9 +1600,10 @@ scavenge_mutable_list(bdescr *bd, generation *gen) case TREC_CHUNK: mutlist_TREC_CHUNK++; break; case MUT_PRIM: - if (((StgClosure*)p)->header.info == &stg_TVAR_WATCH_QUEUE_info) + pinfo = ((StgClosure*)p)->header.info; + if (pinfo == &stg_TVAR_WATCH_QUEUE_info) mutlist_TVAR_WATCH_QUEUE++; - else if (((StgClosure*)p)->header.info == &stg_TREC_HEADER_info) + else if (pinfo == &stg_TREC_HEADER_info) mutlist_TREC_HEADER++; else mutlist_OTHERS++; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 2e03b77695..3f91905f3c 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -407,8 +407,10 @@ lockCAF (StgRegTable *reg, StgIndStatic *caf) // Allocate the blackhole indirection closure bh = (StgInd *)allocate(cap, sizeofW(*bh)); - SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs); bh->indirectee = (StgClosure *)cap->r.rCurrentTSO; + SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs); + // Ensure that above writes are visible before we introduce reference as CAF indirectee. + write_barrier(); caf->indirectee = (StgClosure *)bh; write_barrier(); @@ -1081,6 +1083,8 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) { Capability *cap = regTableToCapability(reg); + // No barrier required here as no other heap object fields are read. See + // note [Heap memory barriers] in SMP.h. if (p->header.info == &stg_MUT_VAR_CLEAN_info) { p->header.info = &stg_MUT_VAR_DIRTY_info; recordClosureMutated(cap,p); @@ -1090,6 +1094,8 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) void dirty_TVAR(Capability *cap, StgTVar *p) { + // No barrier required here as no other heap object fields are read. See + // note [Heap memory barriers] in SMP.h. if (p->header.info == &stg_TVAR_CLEAN_info) { p->header.info = &stg_TVAR_DIRTY_info; recordClosureMutated(cap,(StgClosure*)p); |