From 451d65a6913d85088a350be8e9b7a6d834453326 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Fri, 28 Apr 2023 12:33:31 -0400 Subject: rts: Don't sanity-check StgTSO.global_link See Note [Avoid dangling global_link pointers]. Fixes #19146. --- rts/sm/Sanity.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 7fcc601bbd..1de1a7f0d9 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -737,6 +737,38 @@ checkSTACK (StgStack *stack) checkStackChunk(sp, stack_end); } +/* + * Note [Sanity-checking global_link] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * TSOs are a bit odd in that they have a global_link pointer field + * which is not scavenged by the GC. This field is used to track the + * generations[_].[old_]threads lists and is ultimately updated by + * MarkWeak.c:tidyThreadList, which walks the thread lists and updates + * the global_link references of all TSOs that it finds. + * + * Typically the fact that this field is not scavenged is fine as all reachable + * TSOs on the heap are guaranteed to be on some generation's thread list and + * therefore will be scavenged by tidyThreadList. However, the sanity checker + * poses a bit of a challenge here as it walks heap blocks directly and + * therefore may encounter TSOs which aren't reachable via the the global + * thread lists. + * + * How might such orphan TSOs arise? One such way is via racing evacuation. + * Specifically, if two GC threads attempt to simultaneously evacuate a + * TSO, both threads will produce a copy of the TSO in their respective + * to-space. However, only one will succeed in turning the from-space TSO into + * a forwarding pointer. Consequently, tidyThreadList will find and update the + * copy which "won". Meanwhile, the "losing" copy will contain a dangling + * global_link pointer into from-space. + * + * For this reason, checkTSO does not check global_link. Instead, we only do + * so in checkGlobalTSOList, which by definition will only look at + * threads which are reachable via a thread list (and therefore must have won + * the forwarding-pointer race). + * + * See #19146. + */ + void checkTSO(StgTSO *tso) { @@ -761,9 +793,12 @@ checkTSO(StgTSO *tso) ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->bq)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->blocked_exceptions)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->stackobj)); - ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->global_link) && - (tso->global_link == END_TSO_QUEUE || - get_itbl((StgClosure*)tso->global_link)->type == TSO)); + + // This assertion sadly does not always hold. + // See Note [Sanity-checking global_link] for why. + //ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->global_link) && + // (tso->global_link == END_TSO_QUEUE || + // get_itbl((StgClosure*)tso->global_link)->type == TSO)); if (tso->label) { ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->label)); -- cgit v1.2.1