diff options
author | Ubuntu <ubuntu@ip-172-31-25-209.ec2.internal> | 2023-04-18 09:31:38 +0000 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2023-04-24 06:04:18 -0400 |
commit | 59793d312cc0a8d4cc2bc9544eb14db49b20d3a7 (patch) | |
tree | 9eb5bbfedfd5d01f2677f133a4473cf6ac6a91f0 | |
parent | 96adc99cef86f9ce0347c8b98a2f731b4c46b0ee (diff) | |
download | haskell-59793d312cc0a8d4cc2bc9544eb14db49b20d3a7.tar.gz |
Fix thunk update further
-rw-r--r-- | rts/StgMiscClosures.cmm | 7 | ||||
-rw-r--r-- | rts/Updates.h | 4 | ||||
-rw-r--r-- | rts/include/stg/SMP.h | 57 |
3 files changed, 54 insertions, 14 deletions
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index f5d95e7a00..201e76d58a 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -565,7 +565,12 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") TICK_ENT_DYN_IND(); /* tick */ retry: - // Synchronizes with the release-store in updateWithIndirection. + // Synchronizes with the SET_INFO_RELEASE in + // updateWithIndirection + ACQUIRE_FENCE; + + // Synchronizes with the release-store in + // updateWithIndirection. // See Note [Heap memory barriers] in SMP.h. ACQUIRE_FENCE_ON(node + OFFSET_StgHeader_info); p = %relaxed StgInd_indirectee(node); diff --git a/rts/Updates.h b/rts/Updates.h index 086b1e0283..121eebf40b 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -60,7 +60,7 @@ \ OVERWRITING_CLOSURE(p1); \ %release StgInd_indirectee(p1) = p2; \ - %relaxed SET_INFO(p1, stg_BLACKHOLE_info); \ + %release SET_INFO(p1, stg_BLACKHOLE_info); \ LDV_RECORD_CREATE(p1); \ and_then; @@ -86,7 +86,7 @@ INLINE_HEADER void updateWithIndirection (Capability *cap, } OVERWRITING_CLOSURE(p1); RELEASE_STORE(&((StgInd *)p1)->indirectee, p2); - SET_INFO_RELAXED(p1, &stg_BLACKHOLE_info); + SET_INFO_RELEASE(p1, &stg_BLACKHOLE_info); LDV_RECORD_CREATE(p1); } diff --git a/rts/include/stg/SMP.h b/rts/include/stg/SMP.h index e078e65546..53f5666e68 100644 --- a/rts/include/stg/SMP.h +++ b/rts/include/stg/SMP.h @@ -274,10 +274,11 @@ EXTERN_INLINE void load_load_barrier(void); * can see the indirectee. Consequently, a thunk update (see rts/Updates.h) * does the following: * - * 1. Use a release-store to place the new indirectee into the thunk's - * indirectee field - * 2. use a relaxed-store to set the info table to stg_BLACKHOLE (which - * represents an indirection) + * U1. use a release-store to place the new indirectee into the thunk's + * indirectee field + * + * U2. use a relaxed-store to set the info table to stg_BLACKHOLE (which + * represents an indirection) * * Blackholing a thunk (either eagerly, by GHC.StgToCmm.Bind.emitBlackHoleCode, * or lazily, by ThreadPaused.c:threadPaused) is done similarly. @@ -286,13 +287,47 @@ EXTERN_INLINE void load_load_barrier(void); * stg_IND, and stg_IND_STATIC in rts/StgMiscClosure.cmm) does the * following: * - * 1. We jump into the entry code of the indirection (e.g. stg_BLACKHOLE); - * this of course implies that we have already read the thunk's info table - * pointer, which is done with a relaxed load. - * 2. acquire-load the indirectee. Since thunks are updated at most - * once we know that the fence in the last step has given us - * an up-to-date view of the indirectee closure. - * 3. enter the indirectee (or block if the indirectee is a TSO) + * E1. jump into the entry code of the indirection (e.g. stg_BLACKHOLE); + * this of course implies that we have already read the thunk's info table + * pointer, which is done with a relaxed load. + * + * E2. acquire-fence + * + * E3. acquire-load the indirectee. Since thunks are updated at most + * once we know that the fence in the last step has given us + * an up-to-date view of the indirectee closure. + * + * E4. enter the indirectee (or block if the indirectee is a TSO) + * + * The acquire-fence in step (E2) is somewhat surprising but is necessary as + * the C11 memory model does not guarantee that the store (U1) is visible to + * (E3) despite (U1) preceding (U2) in program-order (due to the relaxed + * ordering of (E3)). This is demonstrated by the following CppMem model: + * + * int main() { + * atomic_int x = 0; // info table pointer + * atomic_int y = 0; // indirectee + * {{{ + * { // blackhole update + * y.store(1, memory_order_release); // U1 + * x.store(2, memory_order_release); // U2 + * } + * ||| + * { // blackhole entry + * r1=x.load(memory_order_relaxed).readsvalue(2); // E1 + * //fence(memory_order_acquire); // E2 + * r2=y.load(memory_order_acquire); // E3 + * } + * }}}; + * return 0; + * } + * + * Under the C11 memory model this program admits an execution where the + * indirectee `r2=0`. + * + * Of course, this could also be addressed by strengthing the ordering of E1 + * to acquire, but this would incur a significant cost on every closure entry + * (including non-blackholes). * * Other closures * -------------- |