summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-08-18 12:48:27 -0400
committerBen Gamari <ben@smart-cactus.org>2022-08-19 19:10:26 -0400
commitc895c44911ec44b9506cbe97555753ac402d6acf (patch)
tree453c8d8346707d5c27d424a7fbf0bb6971ca1e89
parente293029db0d60852908feaf2312794849194b08c (diff)
downloadhaskell-wip/T22038.tar.gz
compiler: Rework handling of mutator abortingwip/T22038
Previously `-dtag-inference-checks`, `-dcheck-prim-bounds`, and `-falignment-sanitization` all aborted by calling `barf` from the mutator. However, this can lead to deadlocks in the threaded RTS. For instance, in the case of `-dcheck-prim-bounds` the following can happen 1. the mutator takes a capability and begins execution 2. the bounds check fails, calling `barf` 3. `barf` calls `rtsFatalInternalErrorFn`, which in turn calls `endEventLogging` 4. `endEventLogging` calls `flushEventLog`, which it turn initiates a sync to request that all capabilities flush their local event logs 5. we deadlock as the the capability held by the crashing mutator can never join the sync To avoid this we now have a more principled means of aborting: we return to the scheduler setting the thread's return value to ThreadAborting. The scheduler will see this and call `barf`. Fixes #22038.
-rw-r--r--compiler/GHC/Cmm/CLabel.hs13
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs2
-rw-r--r--compiler/GHC/StgToCmm/TagCheck.hs7
-rw-r--r--compiler/GHC/StgToCmm/Utils.hs6
-rw-r--r--rts/PrimOps.cmm8
-rw-r--r--rts/RtsMessages.c18
-rw-r--r--rts/Schedule.c8
-rw-r--r--rts/StgMiscClosures.cmm46
-rw-r--r--rts/include/rts/Constants.h1
9 files changed, 72 insertions, 37 deletions
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
index c12ecff5eb..5b9c10fc38 100644
--- a/compiler/GHC/Cmm/CLabel.hs
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -64,6 +64,7 @@ module GHC.Cmm.CLabel (
mkSMAP_FROZEN_DIRTY_infoLabel,
mkSMAP_DIRTY_infoLabel,
mkBadAlignmentLabel,
+ mkTagInferenceCheckFailureLabel,
mkOutOfBoundsAccessLabel,
mkArrWords_infoLabel,
mkSRTInfoLabel,
@@ -637,8 +638,9 @@ mkDirty_MUT_VAR_Label,
mkTopTickyCtrLabel,
mkCAFBlackHoleInfoTableLabel,
mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel,
- mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel,
- mkOutOfBoundsAccessLabel, mkMUT_VAR_CLEAN_infoLabel :: CLabel
+ mkSMAP_DIRTY_infoLabel, mkMUT_VAR_CLEAN_infoLabel,
+ mkBadAlignmentLabel, mkTagInferenceCheckFailureLabel, mkOutOfBoundsAccessLabel
+ :: CLabel
mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction
mkNonmovingWriteBarrierEnabledLabel
= CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "nonmoving_write_barrier_enabled") CmmData
@@ -655,9 +657,10 @@ mkArrWords_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsL
mkSMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo
mkSMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo
mkSMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo
-mkBadAlignmentLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_badAlignment") CmmEntry
-mkOutOfBoundsAccessLabel = mkForeignLabel (fsLit "rtsOutOfBoundsAccess") Nothing ForeignLabelInExternalPackage IsFunction
-mkMUT_VAR_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_MUT_VAR_CLEAN") CmmInfo
+mkMUT_VAR_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_MUT_VAR_CLEAN") CmmInfo
+mkBadAlignmentLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_badAlignment") CmmPrimCall
+mkOutOfBoundsAccessLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_outOfBoundsAccess") CmmPrimCall
+mkTagInferenceCheckFailureLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_tagInferenceCheckFailure") CmmPrimCall
mkSRTInfoLabel :: Int -> CLabel
mkSRTInfoLabel n = CmmLabel rtsUnitId (NeedExternDecl False) lbl CmmInfo
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index 1d482b0143..a044b7076f 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -3211,7 +3211,7 @@ doBoundsCheck idx sz = do
when do_bounds_check (doCheck platform)
where
doCheck platform = do
- boundsCheckFailed <- getCode $ emitCCall [] (mkLblExpr mkOutOfBoundsAccessLabel) []
+ boundsCheckFailed <- getCode $ emitCall (NativeNodeCall, NativeReturn) (mkLblExpr mkOutOfBoundsAccessLabel) [idx, sz]
emit =<< mkCmmIfThen' isOutOfBounds boundsCheckFailed (Just False)
where
uGE = cmmUGeWord platform
diff --git a/compiler/GHC/StgToCmm/TagCheck.hs b/compiler/GHC/StgToCmm/TagCheck.hs
index afa3fef426..d41bc356fb 100644
--- a/compiler/GHC/StgToCmm/TagCheck.hs
+++ b/compiler/GHC/StgToCmm/TagCheck.hs
@@ -19,9 +19,13 @@ import GHC.Prelude
import GHC.StgToCmm.Env
import GHC.StgToCmm.Monad
import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Layout (emitCall)
+import GHC.StgToCmm.Lit (newStringCLit)
import GHC.Cmm
import GHC.Cmm.BlockId
+import GHC.Cmm.CLabel (mkTagInferenceCheckFailureLabel)
import GHC.Cmm.Graph as CmmGraph
+import GHC.Cmm.Utils
import GHC.Core.Type
import GHC.Types.Id
@@ -95,7 +99,8 @@ emitTagAssertion onWhat fun = do
; needsArgTag fun lbarf lret
; emitLabel lbarf
- ; emitBarf ("Tag inference failed on:" ++ onWhat)
+ ; onWhat_str <- newStringCLit onWhat
+ ; _ <- emitCall (NativeNodeCall, NativeReturn) (mkLblExpr mkTagInferenceCheckFailureLabel) [CmmLit onWhat_str]
; emitLabel lret
}
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index ddda97ab2a..0656d6f971 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -12,7 +12,6 @@ module GHC.StgToCmm.Utils (
emitDataLits, emitRODataLits,
emitDataCon,
emitRtsCall, emitRtsCallWithResult, emitRtsCallGen,
- emitBarf,
assignTemp, newTemp,
newUnboxedTupleRegs,
@@ -158,11 +157,6 @@ tagToClosure platform tycon tag
--
-------------------------------------------------------------------------
-emitBarf :: String -> FCode ()
-emitBarf msg = do
- strLbl <- newStringCLit msg
- emitRtsCall rtsUnitId (fsLit "barf") [(CmmLit strLbl,AddrHint)] False
-
emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
emitRtsCall pkg fun = emitRtsCallGen [] (mkCmmCodeLabel pkg fun)
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 7bf403086d..77f9efe826 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -42,7 +42,7 @@ import CLOSURE CCS_MAIN;
#if defined(DEBUG)
#define ASSERT_IN_BOUNDS(ind, sz) \
- if (ind >= sz) { ccall rtsOutOfBoundsAccess(); }
+ if (ind >= sz) { ccall stg_outOfBoundsAccess(ind, sz); }
#else
#define ASSERT_IN_BOUNDS(ind, sz)
#endif
@@ -1150,7 +1150,7 @@ stg_threadStatuszh ( gcptr tso )
* TVar primitives
* -------------------------------------------------------------------------- */
-stg_abort /* no arg list: explicit stack layout */
+stg_abort_tx /* no arg list: explicit stack layout */
{
W_ frame_type;
W_ frame;
@@ -1159,7 +1159,7 @@ stg_abort /* no arg list: explicit stack layout */
W_ r;
// STM operations may allocate
- MAYBE_GC_ (stg_abort); // NB. not MAYBE_GC(), we cannot make a
+ MAYBE_GC_ (stg_abort_tx); // NB. not MAYBE_GC(), we cannot make a
// function call in an explicit-stack proc
// Find the enclosing ATOMICALLY_FRAME
@@ -1217,7 +1217,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
} else {
// Did not commit: abort and restart.
StgTSO_trec(CurrentTSO) = outer;
- jump stg_abort();
+ jump stg_abort_tx();
}
}
diff --git a/rts/RtsMessages.c b/rts/RtsMessages.c
index 8ece485854..382dbe78ee 100644
--- a/rts/RtsMessages.c
+++ b/rts/RtsMessages.c
@@ -320,21 +320,3 @@ rtsDebugMsgFn(const char *s, va_list ap)
return r;
}
-
-// Used in stg_badAlignment_entry defined in StgStartup.cmm.
-void rtsBadAlignmentBarf(void) GNUC3_ATTRIBUTE(__noreturn__);
-
-void
-rtsBadAlignmentBarf()
-{
- barf("Encountered incorrectly aligned pointer. This can't be good.");
-}
-
-// Used by code generator
-void rtsOutOfBoundsAccess(void) GNUC3_ATTRIBUTE(__noreturn__);
-
-void
-rtsOutOfBoundsAccess()
-{
- barf("Encountered out of bounds array access.");
-}
diff --git a/rts/Schedule.c b/rts/Schedule.c
index bc0e7d3acf..ff74645392 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -571,8 +571,12 @@ run_thread:
ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
break;
+ case ThreadAborted:
+ interruptStgRts();
+ break;
+
default:
- barf("schedule: invalid thread return code %d", (int)ret);
+ barf("schedule: invalid thread return code %d", (int)ret);
}
if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) {
@@ -3090,7 +3094,7 @@ findRetryFrameHelper (Capability *cap, StgTSO *tso)
/* -----------------------------------------------------------------------------
findAtomicallyFrameHelper
- This function is called by stg_abort via catch_retry_frame primitive. It is
+ This function is called by stg_abort_tx via catch_retry_frame primitive. It is
like findRetryFrameHelper but it will only stop at ATOMICALLY_FRAME.
-------------------------------------------------------------------------- */
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 10ae67562e..7b2c85d8ab 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -1486,3 +1486,49 @@ section "data" {
}
#endif
+
+/* Note [Aborting from the mutator]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * GHC supports a number of runtime checking modes (largely for debugging
+ * purposes) which may need to abort execution at runtime. This include
+ * -dtag-inference-check, -dcheck-prim-bounds, and -falignment-sanitisation.
+ * To abort execution one might think that we could just call `barf`; however
+ * this is not ideal since it doesn't allow the RTS to gracefully shutdown.
+ *
+ * In #22038 we saw this manifest as a deadlock when -dcheck-prim-bounds
+ * failed. In particular, we saw the following:
+ *
+ * 1. the mutator takes a capability and begins execution
+ * 2. the bounds check fails, calling `barf`
+ * 3. `barf` calls `rtsFatalInternalErrorFn`, which in turn calls `endEventLogging`
+ * 4. `endEventLogging` calls `flushEventLog`, which it turn initiates a
+ * sync to request that all capabilities flush their local event logs
+ * 5. we deadlock as the the capability held by the crashing mutator can
+ * never yields to the sync
+ *
+ * Consequently, we instead crash in a more principled manner by yielding back
+ * to the scheduler, indicating that we should abort by setting the thread's
+ * return value to ThreadAborted. This is done by stg_abort().
+ */
+
+stg_tagInferenceCheckFailure(W_ what) {
+ ccall debugBelch("Tag inference failed on: %s\n", what);
+ jump stg_abort();
+}
+
+stg_outOfBoundsAccess(W_ ind, W_ sz) {
+ ccall debugBelch("Encountered out of bounds array access (index=%d, size=%d)", ind, sz);
+ jump stg_abort();
+}
+
+stg_badAlignment() {
+ ccall debugBelch("Encountered incorrectly aligned pointer. This can't be good.");
+ jump stg_abort();
+}
+
+stg_abort() {
+ StgTSO_what_next(CurrentTSO) = ThreadKilled :: I16;
+ StgRegTable_rRet(BaseReg) = ThreadAborted :: W_;
+ R1 = BaseReg;
+ jump stg_returnToSched [R1];
+}
diff --git a/rts/include/rts/Constants.h b/rts/include/rts/Constants.h
index 3bf5a7a2d5..220598c186 100644
--- a/rts/include/rts/Constants.h
+++ b/rts/include/rts/Constants.h
@@ -268,6 +268,7 @@
#define ThreadYielding 3
#define ThreadBlocked 4
#define ThreadFinished 5
+#define ThreadAborted 6 /* See Note [Aborting from the mutator] */
/*
* Flags for the tso->flags field.