summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2023-04-28 12:33:31 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2023-05-15 18:02:20 -0400
commit451d65a6913d85088a350be8e9b7a6d834453326 (patch)
tree5d56e932a0b44d90f20da2645dd2370caa921f52
parent37fb61d89d36719cbadde28ab2514e9eb804fa2e (diff)
downloadhaskell-451d65a6913d85088a350be8e9b7a6d834453326.tar.gz
rts: Don't sanity-check StgTSO.global_link
See Note [Avoid dangling global_link pointers]. Fixes #19146.
-rw-r--r--rts/sm/Sanity.c41
1 files changed, 38 insertions, 3 deletions
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 7fcc601bbd..1de1a7f0d9 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -737,6 +737,38 @@ checkSTACK (StgStack *stack)
checkStackChunk(sp, stack_end);
}
+/*
+ * Note [Sanity-checking global_link]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * TSOs are a bit odd in that they have a global_link pointer field
+ * which is not scavenged by the GC. This field is used to track the
+ * generations[_].[old_]threads lists and is ultimately updated by
+ * MarkWeak.c:tidyThreadList, which walks the thread lists and updates
+ * the global_link references of all TSOs that it finds.
+ *
+ * Typically the fact that this field is not scavenged is fine as all reachable
+ * TSOs on the heap are guaranteed to be on some generation's thread list and
+ * therefore will be scavenged by tidyThreadList. However, the sanity checker
+ * poses a bit of a challenge here as it walks heap blocks directly and
+ * therefore may encounter TSOs which aren't reachable via the the global
+ * thread lists.
+ *
+ * How might such orphan TSOs arise? One such way is via racing evacuation.
+ * Specifically, if two GC threads attempt to simultaneously evacuate a
+ * TSO, both threads will produce a copy of the TSO in their respective
+ * to-space. However, only one will succeed in turning the from-space TSO into
+ * a forwarding pointer. Consequently, tidyThreadList will find and update the
+ * copy which "won". Meanwhile, the "losing" copy will contain a dangling
+ * global_link pointer into from-space.
+ *
+ * For this reason, checkTSO does not check global_link. Instead, we only do
+ * so in checkGlobalTSOList, which by definition will only look at
+ * threads which are reachable via a thread list (and therefore must have won
+ * the forwarding-pointer race).
+ *
+ * See #19146.
+ */
+
void
checkTSO(StgTSO *tso)
{
@@ -761,9 +793,12 @@ checkTSO(StgTSO *tso)
ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->bq));
ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->blocked_exceptions));
ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->stackobj));
- ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->global_link) &&
- (tso->global_link == END_TSO_QUEUE ||
- get_itbl((StgClosure*)tso->global_link)->type == TSO));
+
+ // This assertion sadly does not always hold.
+ // See Note [Sanity-checking global_link] for why.
+ //ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->global_link) &&
+ // (tso->global_link == END_TSO_QUEUE ||
+ // get_itbl((StgClosure*)tso->global_link)->type == TSO));
if (tso->label) {
ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->label));