summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2023-03-27 15:13:01 -0400
committerBen Gamari <ben@smart-cactus.org>2023-04-24 06:03:49 -0400
commitcd8e6a32f658337858292538e9ae32354a31675a (patch)
tree16c7bbe5b92d66920c6cc0c769603ca9729dcbd0
parent6bfb2ac0e2b5557a7bbb27ad72a77e40e8aabb41 (diff)
downloadhaskell-cd8e6a32f658337858292538e9ae32354a31675a.tar.gz
Fix thunk update ordering
Previously we attempted to ensure soundness of concurrent thunk update by synchronizing on the access of the thunk's info table pointer field. This was believed to be sufficient since the indirectee (which may expose a closure allocated by another core) would not be examined until the info table pointer update is complete. However, it turns out that this can result in data races in the presence of multiple threads racing a update a single thunk. For instance, consider this interleaving under the old scheme: Thread A Thread B --------- --------- t=0 Enter t 1 Push update frame 2 Begin evaluation 4 Pause thread 5 t.indirectee=tso 6 Release t.info=BLACKHOLE 7 ... (e.g. GC) 8 Resume thread 9 Finish evaluation 10 Relaxed t.indirectee=x 11 Load t.info 12 Acquire fence 13 Inspect t.indirectee 14 Release t.info=BLACKHOLE Here Thread A enters thunk `t` but is soon paused, resulting in `t` being lazily blackholed at t=6. Then, at t=10 Thread A finishes evaluation and updates `t.indirectee` with a relaxed store. Meanwhile, Thread B enters the blackhole. Under the old scheme this would introduce an acquire-fence but this would only synchronize with Thread A at t=6. Consequently, the result of the evaluation, `x`, is not visible to Thread B, introducing a data race. We fix this by treating the `indirectee` field as we do all other mutable fields. This means we must always access this field with acquire-loads and release-stores. See #23185.
-rw-r--r--compiler/GHC/StgToCmm/Bind.hs16
-rw-r--r--rts/Apply.cmm4
-rw-r--r--rts/Compact.cmm2
-rw-r--r--rts/Heap.c2
-rw-r--r--rts/Interpreter.c2
-rw-r--r--rts/PrimOps.cmm10
-rw-r--r--rts/StableName.c4
-rw-r--r--rts/StgMiscClosures.cmm17
-rw-r--r--rts/ThreadPaused.c2
-rw-r--r--rts/Threads.c2
-rw-r--r--rts/Updates.cmm2
-rw-r--r--rts/Updates.h4
-rw-r--r--rts/include/Cmm.h4
-rw-r--r--rts/include/stg/SMP.h14
-rw-r--r--rts/sm/NonMovingMark.c5
-rw-r--r--utils/genapply/Main.hs6
16 files changed, 54 insertions, 42 deletions
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 57cdb1d3f9..2433a0d93f 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -702,11 +702,19 @@ emitBlackHoleCode node = do
when eager_blackholing $ do
whenUpdRemSetEnabled $ emitUpdRemSetPushThunk node
- emitStore (cmmOffsetW platform node (fixedHdrSizeW profile)) (currentTSOExpr platform)
+ emitAtomicStore platform MemOrderSeqCst
+ (cmmOffsetW platform node (fixedHdrSizeW profile))
+ (currentTSOExpr platform)
-- See Note [Heap memory barriers] in SMP.h.
- let w = wordWidth platform
- emitPrimCall [] (MO_AtomicWrite w MemOrderRelease)
- [node, CmmReg (CmmGlobal $ GlobalRegUse EagerBlackholeInfo $ bWord platform)]
+ emitAtomicStore platform MemOrderRelaxed
+ node
+ (CmmReg (CmmGlobal $ GlobalRegUse EagerBlackholeInfo $ bWord platform))
+
+emitAtomicStore :: Platform -> MemoryOrdering -> CmmExpr -> CmmExpr -> FCode ()
+emitAtomicStore platform mord addr val =
+ emitPrimCall [] (MO_AtomicWrite w mord) [addr, val]
+ where
+ w = typeWidth $ cmmExprType platform val
setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()
-- Nota Bene: this function does not change Node (even if it's a CAF),
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index b338b4c387..e561f4409f 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -108,7 +108,7 @@ again:
IND,
IND_STATIC:
{
- fun = StgInd_indirectee(fun);
+ fun = %acquire StgInd_indirectee(fun);
goto again;
}
case BCO:
@@ -693,7 +693,7 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
}
// Can't add StgInd_indirectee(ap) to UpdRemSet here because the old value is
// not reachable.
- StgInd_indirectee(ap) = CurrentTSO;
+ %release StgInd_indirectee(ap) = CurrentTSO;
SET_INFO_RELEASE(ap, __stg_EAGER_BLACKHOLE_info);
/* ensure there is at least AP_STACK_SPLIM words of headroom available
diff --git a/rts/Compact.cmm b/rts/Compact.cmm
index ecb694cf5c..3a6444a710 100644
--- a/rts/Compact.cmm
+++ b/rts/Compact.cmm
@@ -100,7 +100,7 @@ eval:
// Follow indirections:
case IND, IND_STATIC: {
- p = StgInd_indirectee(p);
+ p = %acquire StgInd_indirectee(p);
goto eval;
}
diff --git a/rts/Heap.c b/rts/Heap.c
index a3be4da749..3e665b60f2 100644
--- a/rts/Heap.c
+++ b/rts/Heap.c
@@ -173,7 +173,7 @@ StgWord collect_pointers(StgClosure *closure, StgClosure *ptrs[]) {
case IND:
case IND_STATIC:
case BLACKHOLE:
- ptrs[nptrs++] = (StgClosure *)(((StgInd *)closure)->indirectee);
+ ptrs[nptrs++] = (StgClosure *) ACQUIRE_LOAD(&((StgInd *)closure)->indirectee);
break;
case MUT_ARR_PTRS_CLEAN:
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index b263f749a3..2939f64b36 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -401,7 +401,7 @@ eval_obj:
case IND:
case IND_STATIC:
{
- tagged_obj = ((StgInd*)obj)->indirectee;
+ tagged_obj = ACQUIRE_LOAD(&((StgInd*)obj)->indirectee);
goto eval_obj;
}
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index e2024d32f4..2638fbce7c 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -1770,7 +1770,7 @@ loop:
qinfo = GET_INFO_ACQUIRE(q);
if (qinfo == stg_IND_info ||
qinfo == stg_MSG_NULL_info) {
- q = StgInd_indirectee(q);
+ q = %acquire StgInd_indirectee(q);
goto loop;
}
@@ -1838,7 +1838,7 @@ loop:
if (qinfo == stg_IND_info ||
qinfo == stg_MSG_NULL_info) {
- q = StgInd_indirectee(q);
+ q = %acquire StgInd_indirectee(q);
goto loop;
}
@@ -1940,7 +1940,7 @@ loop:
if (qinfo == stg_IND_info ||
qinfo == stg_MSG_NULL_info) {
- q = StgInd_indirectee(q);
+ q = %acquire StgInd_indirectee(q);
goto loop;
}
@@ -2029,7 +2029,7 @@ loop:
if (qinfo == stg_IND_info ||
qinfo == stg_MSG_NULL_info) {
- q = StgInd_indirectee(q);
+ q = %acquire StgInd_indirectee(q);
goto loop;
}
@@ -2309,7 +2309,7 @@ loop:
//Possibly IND added by removeFromMVarBlockedQueue
if (StgHeader_info(q) == stg_IND_info ||
StgHeader_info(q) == stg_MSG_NULL_info) {
- q = StgInd_indirectee(q);
+ q = %acquire StgInd_indirectee(q);
goto loop;
}
diff --git a/rts/StableName.c b/rts/StableName.c
index 5d4f2002ad..15ecb57f65 100644
--- a/rts/StableName.c
+++ b/rts/StableName.c
@@ -156,11 +156,11 @@ removeIndirections (StgClosure* p)
switch (get_itbl(q)->type) {
case IND:
case IND_STATIC:
- p = ((StgInd *)q)->indirectee;
+ p = ACQUIRE_LOAD(&((StgInd *)q)->indirectee);
continue;
case BLACKHOLE:
- p = ((StgInd *)q)->indirectee;
+ p = ACQUIRE_LOAD(&((StgInd *)q)->indirectee);
if (GET_CLOSURE_TAG(p) != 0) {
continue;
} else {
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index f5d95e7a00..60af791d92 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -521,8 +521,8 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND")
(P_ node)
{
TICK_ENT_DYN_IND(); /* tick */
- ACQUIRE_FENCE_ON(R1 + OFFSET_StgHeader_info);
- node = UNTAG(StgInd_indirectee(node));
+ node = %acquire StgInd_indirectee(node);
+ node = UNTAG(node);
TICK_ENT_VIA_NODE();
jump %GET_ENTRY(node) (node);
}
@@ -530,8 +530,9 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND")
/* explicit stack */
{
TICK_ENT_DYN_IND(); /* tick */
- ACQUIRE_FENCE_ON(R1 + OFFSET_StgHeader_info);
- R1 = UNTAG(StgInd_indirectee(R1));
+ P_ p;
+ p = %acquire StgInd_indirectee(R1);
+ R1 = UNTAG(p);
TICK_ENT_VIA_NODE();
jump %GET_ENTRY(R1) [R1];
}
@@ -541,8 +542,9 @@ INFO_TABLE(stg_IND_STATIC,1,0,IND_STATIC,"IND_STATIC","IND_STATIC")
/* explicit stack */
{
TICK_ENT_STATIC_IND(); /* tick */
- ACQUIRE_FENCE_ON(R1 + OFFSET_StgHeader_info);
- R1 = UNTAG(StgInd_indirectee(R1));
+ P_ p;
+ p = %acquire StgInd_indirectee(R1);
+ R1 = UNTAG(p);
TICK_ENT_VIA_NODE();
jump %GET_ENTRY(R1) [R1];
}
@@ -567,8 +569,7 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE")
retry:
// Synchronizes with the release-store in updateWithIndirection.
// See Note [Heap memory barriers] in SMP.h.
- ACQUIRE_FENCE_ON(node + OFFSET_StgHeader_info);
- p = %relaxed StgInd_indirectee(node);
+ p = %acquire StgInd_indirectee(node);
if (GETTAG(p) != 0) {
return (p);
}
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index f36602b01b..fd480601d3 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -352,7 +352,7 @@ threadPaused(Capability *cap, StgTSO *tso)
OVERWRITING_CLOSURE_SIZE(bh, closure_sizeW_(bh, INFO_PTR_TO_STRUCT(bh_info)));
// The payload of the BLACKHOLE points to the TSO
- ((StgInd *)bh)->indirectee = (StgClosure *)tso;
+ RELEASE_STORE(&((StgInd *)bh)->indirectee, (StgClosure *)tso);
SET_INFO_RELEASE(bh,&stg_BLACKHOLE_info);
// .. and we need a write barrier, since we just mutated the closure:
diff --git a/rts/Threads.c b/rts/Threads.c
index 29a9525574..d03e9c4cbc 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -437,7 +437,7 @@ checkBlockingQueues (Capability *cap, StgTSO *tso)
p = UNTAG_CLOSURE(bq->bh);
const StgInfoTable *pinfo = ACQUIRE_LOAD(&p->header.info);
if (pinfo != &stg_BLACKHOLE_info ||
- ((StgInd *)p)->indirectee != (StgClosure*)bq)
+ (RELAXED_LOAD(&((StgInd *)p)->indirectee) != (StgClosure*)bq))
{
wakeBlockingQueue(cap,bq);
}
diff --git a/rts/Updates.cmm b/rts/Updates.cmm
index c1de06c5e4..d8c6d625b4 100644
--- a/rts/Updates.cmm
+++ b/rts/Updates.cmm
@@ -59,7 +59,7 @@ INFO_TABLE_RET ( stg_marked_upd_frame, UPDATE_FRAME,
ASSERT(HpAlloc == 0); // Note [HpAlloc]
// we know the closure is a BLACKHOLE
- v = StgInd_indirectee(updatee);
+ v = %acquire StgInd_indirectee(updatee);
if (GETTAG(v) != 0) (likely: False) {
// updated by someone else: discard our value and use the
diff --git a/rts/Updates.h b/rts/Updates.h
index b35790b536..2de4fdf523 100644
--- a/rts/Updates.h
+++ b/rts/Updates.h
@@ -59,8 +59,8 @@
} \
\
OVERWRITING_CLOSURE(p1); \
- %relaxed StgInd_indirectee(p1) = p2; \
- SET_INFO_RELEASE(p1, stg_BLACKHOLE_info); \
+ %release StgInd_indirectee(p1) = p2; \
+ %relaxed SET_INFO(p1, stg_BLACKHOLE_info); \
LDV_RECORD_CREATE(p1); \
and_then;
diff --git a/rts/include/Cmm.h b/rts/include/Cmm.h
index 0d5fd3ece1..26f74c8cf6 100644
--- a/rts/include/Cmm.h
+++ b/rts/include/Cmm.h
@@ -309,7 +309,7 @@
#define ENTER(x) ENTER_(return,x)
#endif
-#define ENTER_R1() ENTER_(RET_R1,R1)
+#define ENTER_R1() P_ _r1; _r1 = R1; ENTER_(RET_R1, _r1)
#define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1]
@@ -324,7 +324,7 @@
IND, \
IND_STATIC: \
{ \
- x = StgInd_indirectee(x); \
+ x = %acquire StgInd_indirectee(x); \
goto again; \
} \
case \
diff --git a/rts/include/stg/SMP.h b/rts/include/stg/SMP.h
index 834f69854b..8f2d68b77b 100644
--- a/rts/include/stg/SMP.h
+++ b/rts/include/stg/SMP.h
@@ -178,6 +178,7 @@ EXTERN_INLINE void load_load_barrier(void);
* - StgSmallMutArrPtrs: payload
* - StgThunk although this is a somewhat special case; see below
* - StgTSO: block_info
+ * - StgInd: indirectee
*
* Writing to a mutable pointer field must be done via a release-store.
* Reading from such a field is done via an acquire-load.
@@ -222,9 +223,9 @@ EXTERN_INLINE void load_load_barrier(void);
* can see the indirectee. Consequently, a thunk update (see rts/Updates.h)
* does the following:
*
- * 1. Use a relaxed-store to place the new indirectee into the thunk's
+ * 1. Use a release-store to place the new indirectee into the thunk's
* indirectee field
- * 2. use a release-store to set the info table to stg_BLACKHOLE (which
+ * 2. use a relaxed-store to set the info table to stg_BLACKHOLE (which
* represents an indirection)
*
* Blackholing a thunk (either eagerly, by GHC.StgToCmm.Bind.emitBlackHoleCode,
@@ -237,13 +238,10 @@ EXTERN_INLINE void load_load_barrier(void);
* 1. We jump into the entry code of the indirection (e.g. stg_BLACKHOLE);
* this of course implies that we have already read the thunk's info table
* pointer, which is done with a relaxed load.
- * 2. use an acquire-fence to ensure that our view on the thunk is
- * up-to-date. This synchronizes with step (2) in the update
- * procedure.
- * 3. relaxed-load the indirectee. Since thunks are updated at most
+ * 2. acquire-load the indirectee. Since thunks are updated at most
* once we know that the fence in the last step has given us
* an up-to-date view of the indirectee closure.
- * 4. enter the indirectee (or block if the indirectee is a TSO)
+ * 3. enter the indirectee (or block if the indirectee is a TSO)
*
* Other closures
* --------------
@@ -270,7 +268,7 @@ EXTERN_INLINE void load_load_barrier(void);
* in this primops.
*
* - Sending a Message to another capability:
- * This is protected by the acquition and release of the target capability's
+ * This is protected by the acquision and release of the target capability's
* lock in Messages.c:sendMessage.
*
* N.B. recordClosureMutated places a reference to the mutated object on
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 89a12362ba..0b2abe0dee 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -688,8 +688,9 @@ void updateRemembSetPushThunkEager(Capability *cap,
case IND:
{
StgInd *ind = (StgInd *) thunk;
- if (check_in_nonmoving_heap(ind->indirectee)) {
- push_closure(queue, ind->indirectee, NULL);
+ StgClosure *indirectee = ACQUIRE_LOAD(&ind->indirectee);
+ if (check_in_nonmoving_heap(indirectee)) {
+ push_closure(queue, indirectee, NULL);
}
break;
}
diff --git a/utils/genapply/Main.hs b/utils/genapply/Main.hs
index fd5557cfed..7575bb7ce9 100644
--- a/utils/genapply/Main.hs
+++ b/utils/genapply/Main.hs
@@ -783,7 +783,11 @@ genApply regstatus args =
text "case IND,",
text " IND_STATIC: {",
nest 4 (vcat [
- text "R1 = StgInd_indirectee(R1);",
+ -- N.B. annoyingly the %acquire syntax must place its result in a local register
+ -- as it is a Cmm prim call node.
+ text "P_ p;",
+ text "p = %acquire StgInd_indirectee(R1);",
+ text "R1 = p;",
-- An indirection node might contain a tagged pointer
text "goto again;"
]),