diff options
author | Ben Gamari <ben@smart-cactus.org> | 2023-03-27 15:13:01 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2023-04-24 06:03:49 -0400 |
commit | cd8e6a32f658337858292538e9ae32354a31675a (patch) | |
tree | 16c7bbe5b92d66920c6cc0c769603ca9729dcbd0 | |
parent | 6bfb2ac0e2b5557a7bbb27ad72a77e40e8aabb41 (diff) | |
download | haskell-cd8e6a32f658337858292538e9ae32354a31675a.tar.gz |
Fix thunk update ordering
Previously we attempted to ensure soundness of concurrent thunk update
by synchronizing on the access of the thunk's info table pointer field.
This was believed to be sufficient since the indirectee (which may
expose a closure allocated by another core) would not be examined
until the info table pointer update is complete.
However, it turns out that this can result in data races in the presence
of multiple threads racing a update a single thunk. For instance,
consider this interleaving under the old scheme:
Thread A Thread B
--------- ---------
t=0 Enter t
1 Push update frame
2 Begin evaluation
4 Pause thread
5 t.indirectee=tso
6 Release t.info=BLACKHOLE
7 ... (e.g. GC)
8 Resume thread
9 Finish evaluation
10 Relaxed t.indirectee=x
11 Load t.info
12 Acquire fence
13 Inspect t.indirectee
14 Release t.info=BLACKHOLE
Here Thread A enters thunk `t` but is soon paused, resulting in `t`
being lazily blackholed at t=6. Then, at t=10 Thread A finishes
evaluation and updates `t.indirectee` with a relaxed store.
Meanwhile, Thread B enters the blackhole. Under the old scheme this
would introduce an acquire-fence but this would only synchronize with
Thread A at t=6. Consequently, the result of the evaluation, `x`, is not
visible to Thread B, introducing a data race.
We fix this by treating the `indirectee` field as we do all other
mutable fields. This means we must always access this field with
acquire-loads and release-stores.
See #23185.
-rw-r--r-- | compiler/GHC/StgToCmm/Bind.hs | 16 | ||||
-rw-r--r-- | rts/Apply.cmm | 4 | ||||
-rw-r--r-- | rts/Compact.cmm | 2 | ||||
-rw-r--r-- | rts/Heap.c | 2 | ||||
-rw-r--r-- | rts/Interpreter.c | 2 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 10 | ||||
-rw-r--r-- | rts/StableName.c | 4 | ||||
-rw-r--r-- | rts/StgMiscClosures.cmm | 17 | ||||
-rw-r--r-- | rts/ThreadPaused.c | 2 | ||||
-rw-r--r-- | rts/Threads.c | 2 | ||||
-rw-r--r-- | rts/Updates.cmm | 2 | ||||
-rw-r--r-- | rts/Updates.h | 4 | ||||
-rw-r--r-- | rts/include/Cmm.h | 4 | ||||
-rw-r--r-- | rts/include/stg/SMP.h | 14 | ||||
-rw-r--r-- | rts/sm/NonMovingMark.c | 5 | ||||
-rw-r--r-- | utils/genapply/Main.hs | 6 |
16 files changed, 54 insertions, 42 deletions
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs index 57cdb1d3f9..2433a0d93f 100644 --- a/compiler/GHC/StgToCmm/Bind.hs +++ b/compiler/GHC/StgToCmm/Bind.hs @@ -702,11 +702,19 @@ emitBlackHoleCode node = do when eager_blackholing $ do whenUpdRemSetEnabled $ emitUpdRemSetPushThunk node - emitStore (cmmOffsetW platform node (fixedHdrSizeW profile)) (currentTSOExpr platform) + emitAtomicStore platform MemOrderSeqCst + (cmmOffsetW platform node (fixedHdrSizeW profile)) + (currentTSOExpr platform) -- See Note [Heap memory barriers] in SMP.h. - let w = wordWidth platform - emitPrimCall [] (MO_AtomicWrite w MemOrderRelease) - [node, CmmReg (CmmGlobal $ GlobalRegUse EagerBlackholeInfo $ bWord platform)] + emitAtomicStore platform MemOrderRelaxed + node + (CmmReg (CmmGlobal $ GlobalRegUse EagerBlackholeInfo $ bWord platform)) + +emitAtomicStore :: Platform -> MemoryOrdering -> CmmExpr -> CmmExpr -> FCode () +emitAtomicStore platform mord addr val = + emitPrimCall [] (MO_AtomicWrite w mord) [addr, val] + where + w = typeWidth $ cmmExprType platform val setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode () -- Nota Bene: this function does not change Node (even if it's a CAF), diff --git a/rts/Apply.cmm b/rts/Apply.cmm index b338b4c387..e561f4409f 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -108,7 +108,7 @@ again: IND, IND_STATIC: { - fun = StgInd_indirectee(fun); + fun = %acquire StgInd_indirectee(fun); goto again; } case BCO: @@ -693,7 +693,7 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") } // Can't add StgInd_indirectee(ap) to UpdRemSet here because the old value is // not reachable. - StgInd_indirectee(ap) = CurrentTSO; + %release StgInd_indirectee(ap) = CurrentTSO; SET_INFO_RELEASE(ap, __stg_EAGER_BLACKHOLE_info); /* ensure there is at least AP_STACK_SPLIM words of headroom available diff --git a/rts/Compact.cmm b/rts/Compact.cmm index ecb694cf5c..3a6444a710 100644 --- a/rts/Compact.cmm +++ b/rts/Compact.cmm @@ -100,7 +100,7 @@ eval: // Follow indirections: case IND, IND_STATIC: { - p = StgInd_indirectee(p); + p = %acquire StgInd_indirectee(p); goto eval; } diff --git a/rts/Heap.c b/rts/Heap.c index a3be4da749..3e665b60f2 100644 --- a/rts/Heap.c +++ b/rts/Heap.c @@ -173,7 +173,7 @@ StgWord collect_pointers(StgClosure *closure, StgClosure *ptrs[]) { case IND: case IND_STATIC: case BLACKHOLE: - ptrs[nptrs++] = (StgClosure *)(((StgInd *)closure)->indirectee); + ptrs[nptrs++] = (StgClosure *) ACQUIRE_LOAD(&((StgInd *)closure)->indirectee); break; case MUT_ARR_PTRS_CLEAN: diff --git a/rts/Interpreter.c b/rts/Interpreter.c index b263f749a3..2939f64b36 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -401,7 +401,7 @@ eval_obj: case IND: case IND_STATIC: { - tagged_obj = ((StgInd*)obj)->indirectee; + tagged_obj = ACQUIRE_LOAD(&((StgInd*)obj)->indirectee); goto eval_obj; } diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index e2024d32f4..2638fbce7c 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1770,7 +1770,7 @@ loop: qinfo = GET_INFO_ACQUIRE(q); if (qinfo == stg_IND_info || qinfo == stg_MSG_NULL_info) { - q = StgInd_indirectee(q); + q = %acquire StgInd_indirectee(q); goto loop; } @@ -1838,7 +1838,7 @@ loop: if (qinfo == stg_IND_info || qinfo == stg_MSG_NULL_info) { - q = StgInd_indirectee(q); + q = %acquire StgInd_indirectee(q); goto loop; } @@ -1940,7 +1940,7 @@ loop: if (qinfo == stg_IND_info || qinfo == stg_MSG_NULL_info) { - q = StgInd_indirectee(q); + q = %acquire StgInd_indirectee(q); goto loop; } @@ -2029,7 +2029,7 @@ loop: if (qinfo == stg_IND_info || qinfo == stg_MSG_NULL_info) { - q = StgInd_indirectee(q); + q = %acquire StgInd_indirectee(q); goto loop; } @@ -2309,7 +2309,7 @@ loop: //Possibly IND added by removeFromMVarBlockedQueue if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { - q = StgInd_indirectee(q); + q = %acquire StgInd_indirectee(q); goto loop; } diff --git a/rts/StableName.c b/rts/StableName.c index 5d4f2002ad..15ecb57f65 100644 --- a/rts/StableName.c +++ b/rts/StableName.c @@ -156,11 +156,11 @@ removeIndirections (StgClosure* p) switch (get_itbl(q)->type) { case IND: case IND_STATIC: - p = ((StgInd *)q)->indirectee; + p = ACQUIRE_LOAD(&((StgInd *)q)->indirectee); continue; case BLACKHOLE: - p = ((StgInd *)q)->indirectee; + p = ACQUIRE_LOAD(&((StgInd *)q)->indirectee); if (GET_CLOSURE_TAG(p) != 0) { continue; } else { diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index f5d95e7a00..60af791d92 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -521,8 +521,8 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND") (P_ node) { TICK_ENT_DYN_IND(); /* tick */ - ACQUIRE_FENCE_ON(R1 + OFFSET_StgHeader_info); - node = UNTAG(StgInd_indirectee(node)); + node = %acquire StgInd_indirectee(node); + node = UNTAG(node); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(node) (node); } @@ -530,8 +530,9 @@ INFO_TABLE(stg_IND,1,0,IND,"IND","IND") /* explicit stack */ { TICK_ENT_DYN_IND(); /* tick */ - ACQUIRE_FENCE_ON(R1 + OFFSET_StgHeader_info); - R1 = UNTAG(StgInd_indirectee(R1)); + P_ p; + p = %acquire StgInd_indirectee(R1); + R1 = UNTAG(p); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1) [R1]; } @@ -541,8 +542,9 @@ INFO_TABLE(stg_IND_STATIC,1,0,IND_STATIC,"IND_STATIC","IND_STATIC") /* explicit stack */ { TICK_ENT_STATIC_IND(); /* tick */ - ACQUIRE_FENCE_ON(R1 + OFFSET_StgHeader_info); - R1 = UNTAG(StgInd_indirectee(R1)); + P_ p; + p = %acquire StgInd_indirectee(R1); + R1 = UNTAG(p); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1) [R1]; } @@ -567,8 +569,7 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") retry: // Synchronizes with the release-store in updateWithIndirection. // See Note [Heap memory barriers] in SMP.h. - ACQUIRE_FENCE_ON(node + OFFSET_StgHeader_info); - p = %relaxed StgInd_indirectee(node); + p = %acquire StgInd_indirectee(node); if (GETTAG(p) != 0) { return (p); } diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index f36602b01b..fd480601d3 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -352,7 +352,7 @@ threadPaused(Capability *cap, StgTSO *tso) OVERWRITING_CLOSURE_SIZE(bh, closure_sizeW_(bh, INFO_PTR_TO_STRUCT(bh_info))); // The payload of the BLACKHOLE points to the TSO - ((StgInd *)bh)->indirectee = (StgClosure *)tso; + RELEASE_STORE(&((StgInd *)bh)->indirectee, (StgClosure *)tso); SET_INFO_RELEASE(bh,&stg_BLACKHOLE_info); // .. and we need a write barrier, since we just mutated the closure: diff --git a/rts/Threads.c b/rts/Threads.c index 29a9525574..d03e9c4cbc 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -437,7 +437,7 @@ checkBlockingQueues (Capability *cap, StgTSO *tso) p = UNTAG_CLOSURE(bq->bh); const StgInfoTable *pinfo = ACQUIRE_LOAD(&p->header.info); if (pinfo != &stg_BLACKHOLE_info || - ((StgInd *)p)->indirectee != (StgClosure*)bq) + (RELAXED_LOAD(&((StgInd *)p)->indirectee) != (StgClosure*)bq)) { wakeBlockingQueue(cap,bq); } diff --git a/rts/Updates.cmm b/rts/Updates.cmm index c1de06c5e4..d8c6d625b4 100644 --- a/rts/Updates.cmm +++ b/rts/Updates.cmm @@ -59,7 +59,7 @@ INFO_TABLE_RET ( stg_marked_upd_frame, UPDATE_FRAME, ASSERT(HpAlloc == 0); // Note [HpAlloc] // we know the closure is a BLACKHOLE - v = StgInd_indirectee(updatee); + v = %acquire StgInd_indirectee(updatee); if (GETTAG(v) != 0) (likely: False) { // updated by someone else: discard our value and use the diff --git a/rts/Updates.h b/rts/Updates.h index b35790b536..2de4fdf523 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -59,8 +59,8 @@ } \ \ OVERWRITING_CLOSURE(p1); \ - %relaxed StgInd_indirectee(p1) = p2; \ - SET_INFO_RELEASE(p1, stg_BLACKHOLE_info); \ + %release StgInd_indirectee(p1) = p2; \ + %relaxed SET_INFO(p1, stg_BLACKHOLE_info); \ LDV_RECORD_CREATE(p1); \ and_then; diff --git a/rts/include/Cmm.h b/rts/include/Cmm.h index 0d5fd3ece1..26f74c8cf6 100644 --- a/rts/include/Cmm.h +++ b/rts/include/Cmm.h @@ -309,7 +309,7 @@ #define ENTER(x) ENTER_(return,x) #endif -#define ENTER_R1() ENTER_(RET_R1,R1) +#define ENTER_R1() P_ _r1; _r1 = R1; ENTER_(RET_R1, _r1) #define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1] @@ -324,7 +324,7 @@ IND, \ IND_STATIC: \ { \ - x = StgInd_indirectee(x); \ + x = %acquire StgInd_indirectee(x); \ goto again; \ } \ case \ diff --git a/rts/include/stg/SMP.h b/rts/include/stg/SMP.h index 834f69854b..8f2d68b77b 100644 --- a/rts/include/stg/SMP.h +++ b/rts/include/stg/SMP.h @@ -178,6 +178,7 @@ EXTERN_INLINE void load_load_barrier(void); * - StgSmallMutArrPtrs: payload * - StgThunk although this is a somewhat special case; see below * - StgTSO: block_info + * - StgInd: indirectee * * Writing to a mutable pointer field must be done via a release-store. * Reading from such a field is done via an acquire-load. @@ -222,9 +223,9 @@ EXTERN_INLINE void load_load_barrier(void); * can see the indirectee. Consequently, a thunk update (see rts/Updates.h) * does the following: * - * 1. Use a relaxed-store to place the new indirectee into the thunk's + * 1. Use a release-store to place the new indirectee into the thunk's * indirectee field - * 2. use a release-store to set the info table to stg_BLACKHOLE (which + * 2. use a relaxed-store to set the info table to stg_BLACKHOLE (which * represents an indirection) * * Blackholing a thunk (either eagerly, by GHC.StgToCmm.Bind.emitBlackHoleCode, @@ -237,13 +238,10 @@ EXTERN_INLINE void load_load_barrier(void); * 1. We jump into the entry code of the indirection (e.g. stg_BLACKHOLE); * this of course implies that we have already read the thunk's info table * pointer, which is done with a relaxed load. - * 2. use an acquire-fence to ensure that our view on the thunk is - * up-to-date. This synchronizes with step (2) in the update - * procedure. - * 3. relaxed-load the indirectee. Since thunks are updated at most + * 2. acquire-load the indirectee. Since thunks are updated at most * once we know that the fence in the last step has given us * an up-to-date view of the indirectee closure. - * 4. enter the indirectee (or block if the indirectee is a TSO) + * 3. enter the indirectee (or block if the indirectee is a TSO) * * Other closures * -------------- @@ -270,7 +268,7 @@ EXTERN_INLINE void load_load_barrier(void); * in this primops. * * - Sending a Message to another capability: - * This is protected by the acquition and release of the target capability's + * This is protected by the acquision and release of the target capability's * lock in Messages.c:sendMessage. * * N.B. recordClosureMutated places a reference to the mutated object on diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c index 89a12362ba..0b2abe0dee 100644 --- a/rts/sm/NonMovingMark.c +++ b/rts/sm/NonMovingMark.c @@ -688,8 +688,9 @@ void updateRemembSetPushThunkEager(Capability *cap, case IND: { StgInd *ind = (StgInd *) thunk; - if (check_in_nonmoving_heap(ind->indirectee)) { - push_closure(queue, ind->indirectee, NULL); + StgClosure *indirectee = ACQUIRE_LOAD(&ind->indirectee); + if (check_in_nonmoving_heap(indirectee)) { + push_closure(queue, indirectee, NULL); } break; } diff --git a/utils/genapply/Main.hs b/utils/genapply/Main.hs index fd5557cfed..7575bb7ce9 100644 --- a/utils/genapply/Main.hs +++ b/utils/genapply/Main.hs @@ -783,7 +783,11 @@ genApply regstatus args = text "case IND,", text " IND_STATIC: {", nest 4 (vcat [ - text "R1 = StgInd_indirectee(R1);", + -- N.B. annoyingly the %acquire syntax must place its result in a local register + -- as it is a Cmm prim call node. + text "P_ p;", + text "p = %acquire StgInd_indirectee(R1);", + text "R1 = p;", -- An indirection node might contain a tagged pointer text "goto again;" ]), |