summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUbuntu <ubuntu@ip-172-31-25-209.ec2.internal>2023-04-18 09:31:38 +0000
committerBen Gamari <ben@smart-cactus.org>2023-04-24 06:04:18 -0400
commit59793d312cc0a8d4cc2bc9544eb14db49b20d3a7 (patch)
tree9eb5bbfedfd5d01f2677f133a4473cf6ac6a91f0
parent96adc99cef86f9ce0347c8b98a2f731b4c46b0ee (diff)
downloadhaskell-59793d312cc0a8d4cc2bc9544eb14db49b20d3a7.tar.gz
Fix thunk update further
-rw-r--r--rts/StgMiscClosures.cmm7
-rw-r--r--rts/Updates.h4
-rw-r--r--rts/include/stg/SMP.h57
3 files changed, 54 insertions, 14 deletions
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index f5d95e7a00..201e76d58a 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -565,7 +565,12 @@ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE")
TICK_ENT_DYN_IND(); /* tick */
retry:
- // Synchronizes with the release-store in updateWithIndirection.
+ // Synchronizes with the SET_INFO_RELEASE in
+ // updateWithIndirection
+ ACQUIRE_FENCE;
+
+ // Synchronizes with the release-store in
+ // updateWithIndirection.
// See Note [Heap memory barriers] in SMP.h.
ACQUIRE_FENCE_ON(node + OFFSET_StgHeader_info);
p = %relaxed StgInd_indirectee(node);
diff --git a/rts/Updates.h b/rts/Updates.h
index 086b1e0283..121eebf40b 100644
--- a/rts/Updates.h
+++ b/rts/Updates.h
@@ -60,7 +60,7 @@
\
OVERWRITING_CLOSURE(p1); \
%release StgInd_indirectee(p1) = p2; \
- %relaxed SET_INFO(p1, stg_BLACKHOLE_info); \
+ %release SET_INFO(p1, stg_BLACKHOLE_info); \
LDV_RECORD_CREATE(p1); \
and_then;
@@ -86,7 +86,7 @@ INLINE_HEADER void updateWithIndirection (Capability *cap,
}
OVERWRITING_CLOSURE(p1);
RELEASE_STORE(&((StgInd *)p1)->indirectee, p2);
- SET_INFO_RELAXED(p1, &stg_BLACKHOLE_info);
+ SET_INFO_RELEASE(p1, &stg_BLACKHOLE_info);
LDV_RECORD_CREATE(p1);
}
diff --git a/rts/include/stg/SMP.h b/rts/include/stg/SMP.h
index e078e65546..53f5666e68 100644
--- a/rts/include/stg/SMP.h
+++ b/rts/include/stg/SMP.h
@@ -274,10 +274,11 @@ EXTERN_INLINE void load_load_barrier(void);
* can see the indirectee. Consequently, a thunk update (see rts/Updates.h)
* does the following:
*
- * 1. Use a release-store to place the new indirectee into the thunk's
- * indirectee field
- * 2. use a relaxed-store to set the info table to stg_BLACKHOLE (which
- * represents an indirection)
+ * U1. use a release-store to place the new indirectee into the thunk's
+ * indirectee field
+ *
+ * U2. use a relaxed-store to set the info table to stg_BLACKHOLE (which
+ * represents an indirection)
*
* Blackholing a thunk (either eagerly, by GHC.StgToCmm.Bind.emitBlackHoleCode,
* or lazily, by ThreadPaused.c:threadPaused) is done similarly.
@@ -286,13 +287,47 @@ EXTERN_INLINE void load_load_barrier(void);
* stg_IND, and stg_IND_STATIC in rts/StgMiscClosure.cmm) does the
* following:
*
- * 1. We jump into the entry code of the indirection (e.g. stg_BLACKHOLE);
- * this of course implies that we have already read the thunk's info table
- * pointer, which is done with a relaxed load.
- * 2. acquire-load the indirectee. Since thunks are updated at most
- * once we know that the fence in the last step has given us
- * an up-to-date view of the indirectee closure.
- * 3. enter the indirectee (or block if the indirectee is a TSO)
+ * E1. jump into the entry code of the indirection (e.g. stg_BLACKHOLE);
+ * this of course implies that we have already read the thunk's info table
+ * pointer, which is done with a relaxed load.
+ *
+ * E2. acquire-fence
+ *
+ * E3. acquire-load the indirectee. Since thunks are updated at most
+ * once we know that the fence in the last step has given us
+ * an up-to-date view of the indirectee closure.
+ *
+ * E4. enter the indirectee (or block if the indirectee is a TSO)
+ *
+ * The acquire-fence in step (E2) is somewhat surprising but is necessary as
+ * the C11 memory model does not guarantee that the store (U1) is visible to
+ * (E3) despite (U1) preceding (U2) in program-order (due to the relaxed
+ * ordering of (E3)). This is demonstrated by the following CppMem model:
+ *
+ * int main() {
+ * atomic_int x = 0; // info table pointer
+ * atomic_int y = 0; // indirectee
+ * {{{
+ * { // blackhole update
+ * y.store(1, memory_order_release); // U1
+ * x.store(2, memory_order_release); // U2
+ * }
+ * |||
+ * { // blackhole entry
+ * r1=x.load(memory_order_relaxed).readsvalue(2); // E1
+ * //fence(memory_order_acquire); // E2
+ * r2=y.load(memory_order_acquire); // E3
+ * }
+ * }}};
+ * return 0;
+ * }
+ *
+ * Under the C11 memory model this program admits an execution where the
+ * indirectee `r2=0`.
+ *
+ * Of course, this could also be addressed by strengthing the ordering of E1
+ * to acquire, but this would incur a significant cost on every closure entry
+ * (including non-blackholes).
*
* Other closures
* --------------