diff options
author | Ben Gamari <ben@smart-cactus.org> | 2022-08-18 12:48:27 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2022-08-19 19:10:26 -0400 |
commit | c895c44911ec44b9506cbe97555753ac402d6acf (patch) | |
tree | 453c8d8346707d5c27d424a7fbf0bb6971ca1e89 | |
parent | e293029db0d60852908feaf2312794849194b08c (diff) | |
download | haskell-wip/T22038.tar.gz |
compiler: Rework handling of mutator abortingwip/T22038
Previously `-dtag-inference-checks`, `-dcheck-prim-bounds`, and
`-falignment-sanitization` all aborted by calling `barf` from the
mutator. However, this can lead to deadlocks in the threaded RTS. For
instance, in the case of `-dcheck-prim-bounds` the following can happen
1. the mutator takes a capability and begins execution
2. the bounds check fails, calling `barf`
3. `barf` calls `rtsFatalInternalErrorFn`, which in turn calls `endEventLogging`
4. `endEventLogging` calls `flushEventLog`, which it turn initiates a
sync to request that all capabilities flush their local event logs
5. we deadlock as the the capability held by the crashing mutator can
never join the sync
To avoid this we now have a more principled means of aborting: we return
to the scheduler setting the thread's return value to ThreadAborting.
The scheduler will see this and call `barf`.
Fixes #22038.
-rw-r--r-- | compiler/GHC/Cmm/CLabel.hs | 13 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Prim.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/TagCheck.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Utils.hs | 6 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 8 | ||||
-rw-r--r-- | rts/RtsMessages.c | 18 | ||||
-rw-r--r-- | rts/Schedule.c | 8 | ||||
-rw-r--r-- | rts/StgMiscClosures.cmm | 46 | ||||
-rw-r--r-- | rts/include/rts/Constants.h | 1 |
9 files changed, 72 insertions, 37 deletions
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs index c12ecff5eb..5b9c10fc38 100644 --- a/compiler/GHC/Cmm/CLabel.hs +++ b/compiler/GHC/Cmm/CLabel.hs @@ -64,6 +64,7 @@ module GHC.Cmm.CLabel ( mkSMAP_FROZEN_DIRTY_infoLabel, mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel, + mkTagInferenceCheckFailureLabel, mkOutOfBoundsAccessLabel, mkArrWords_infoLabel, mkSRTInfoLabel, @@ -637,8 +638,9 @@ mkDirty_MUT_VAR_Label, mkTopTickyCtrLabel, mkCAFBlackHoleInfoTableLabel, mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel, - mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel, - mkOutOfBoundsAccessLabel, mkMUT_VAR_CLEAN_infoLabel :: CLabel + mkSMAP_DIRTY_infoLabel, mkMUT_VAR_CLEAN_infoLabel, + mkBadAlignmentLabel, mkTagInferenceCheckFailureLabel, mkOutOfBoundsAccessLabel + :: CLabel mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction mkNonmovingWriteBarrierEnabledLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "nonmoving_write_barrier_enabled") CmmData @@ -655,9 +657,10 @@ mkArrWords_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsL mkSMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo mkSMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo mkSMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo -mkBadAlignmentLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_badAlignment") CmmEntry -mkOutOfBoundsAccessLabel = mkForeignLabel (fsLit "rtsOutOfBoundsAccess") Nothing ForeignLabelInExternalPackage IsFunction -mkMUT_VAR_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_MUT_VAR_CLEAN") CmmInfo +mkMUT_VAR_CLEAN_infoLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_MUT_VAR_CLEAN") CmmInfo +mkBadAlignmentLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_badAlignment") CmmPrimCall +mkOutOfBoundsAccessLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_outOfBoundsAccess") CmmPrimCall +mkTagInferenceCheckFailureLabel = CmmLabel rtsUnitId (NeedExternDecl False) (fsLit "stg_tagInferenceCheckFailure") CmmPrimCall mkSRTInfoLabel :: Int -> CLabel mkSRTInfoLabel n = CmmLabel rtsUnitId (NeedExternDecl False) lbl CmmInfo diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index 1d482b0143..a044b7076f 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -3211,7 +3211,7 @@ doBoundsCheck idx sz = do when do_bounds_check (doCheck platform) where doCheck platform = do - boundsCheckFailed <- getCode $ emitCCall [] (mkLblExpr mkOutOfBoundsAccessLabel) [] + boundsCheckFailed <- getCode $ emitCall (NativeNodeCall, NativeReturn) (mkLblExpr mkOutOfBoundsAccessLabel) [idx, sz] emit =<< mkCmmIfThen' isOutOfBounds boundsCheckFailed (Just False) where uGE = cmmUGeWord platform diff --git a/compiler/GHC/StgToCmm/TagCheck.hs b/compiler/GHC/StgToCmm/TagCheck.hs index afa3fef426..d41bc356fb 100644 --- a/compiler/GHC/StgToCmm/TagCheck.hs +++ b/compiler/GHC/StgToCmm/TagCheck.hs @@ -19,9 +19,13 @@ import GHC.Prelude import GHC.StgToCmm.Env import GHC.StgToCmm.Monad import GHC.StgToCmm.Utils +import GHC.StgToCmm.Layout (emitCall) +import GHC.StgToCmm.Lit (newStringCLit) import GHC.Cmm import GHC.Cmm.BlockId +import GHC.Cmm.CLabel (mkTagInferenceCheckFailureLabel) import GHC.Cmm.Graph as CmmGraph +import GHC.Cmm.Utils import GHC.Core.Type import GHC.Types.Id @@ -95,7 +99,8 @@ emitTagAssertion onWhat fun = do ; needsArgTag fun lbarf lret ; emitLabel lbarf - ; emitBarf ("Tag inference failed on:" ++ onWhat) + ; onWhat_str <- newStringCLit onWhat + ; _ <- emitCall (NativeNodeCall, NativeReturn) (mkLblExpr mkTagInferenceCheckFailureLabel) [CmmLit onWhat_str] ; emitLabel lret } diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs index ddda97ab2a..0656d6f971 100644 --- a/compiler/GHC/StgToCmm/Utils.hs +++ b/compiler/GHC/StgToCmm/Utils.hs @@ -12,7 +12,6 @@ module GHC.StgToCmm.Utils ( emitDataLits, emitRODataLits, emitDataCon, emitRtsCall, emitRtsCallWithResult, emitRtsCallGen, - emitBarf, assignTemp, newTemp, newUnboxedTupleRegs, @@ -158,11 +157,6 @@ tagToClosure platform tycon tag -- ------------------------------------------------------------------------- -emitBarf :: String -> FCode () -emitBarf msg = do - strLbl <- newStringCLit msg - emitRtsCall rtsUnitId (fsLit "barf") [(CmmLit strLbl,AddrHint)] False - emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () emitRtsCall pkg fun = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 7bf403086d..77f9efe826 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -42,7 +42,7 @@ import CLOSURE CCS_MAIN; #if defined(DEBUG) #define ASSERT_IN_BOUNDS(ind, sz) \ - if (ind >= sz) { ccall rtsOutOfBoundsAccess(); } + if (ind >= sz) { ccall stg_outOfBoundsAccess(ind, sz); } #else #define ASSERT_IN_BOUNDS(ind, sz) #endif @@ -1150,7 +1150,7 @@ stg_threadStatuszh ( gcptr tso ) * TVar primitives * -------------------------------------------------------------------------- */ -stg_abort /* no arg list: explicit stack layout */ +stg_abort_tx /* no arg list: explicit stack layout */ { W_ frame_type; W_ frame; @@ -1159,7 +1159,7 @@ stg_abort /* no arg list: explicit stack layout */ W_ r; // STM operations may allocate - MAYBE_GC_ (stg_abort); // NB. not MAYBE_GC(), we cannot make a + MAYBE_GC_ (stg_abort_tx); // NB. not MAYBE_GC(), we cannot make a // function call in an explicit-stack proc // Find the enclosing ATOMICALLY_FRAME @@ -1217,7 +1217,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, } else { // Did not commit: abort and restart. StgTSO_trec(CurrentTSO) = outer; - jump stg_abort(); + jump stg_abort_tx(); } } diff --git a/rts/RtsMessages.c b/rts/RtsMessages.c index 8ece485854..382dbe78ee 100644 --- a/rts/RtsMessages.c +++ b/rts/RtsMessages.c @@ -320,21 +320,3 @@ rtsDebugMsgFn(const char *s, va_list ap) return r; } - -// Used in stg_badAlignment_entry defined in StgStartup.cmm. -void rtsBadAlignmentBarf(void) GNUC3_ATTRIBUTE(__noreturn__); - -void -rtsBadAlignmentBarf() -{ - barf("Encountered incorrectly aligned pointer. This can't be good."); -} - -// Used by code generator -void rtsOutOfBoundsAccess(void) GNUC3_ATTRIBUTE(__noreturn__); - -void -rtsOutOfBoundsAccess() -{ - barf("Encountered out of bounds array access."); -} diff --git a/rts/Schedule.c b/rts/Schedule.c index bc0e7d3acf..ff74645392 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -571,8 +571,12 @@ run_thread: ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task); break; + case ThreadAborted: + interruptStgRts(); + break; + default: - barf("schedule: invalid thread return code %d", (int)ret); + barf("schedule: invalid thread return code %d", (int)ret); } if (ready_to_gc || scheduleNeedHeapProfile(ready_to_gc)) { @@ -3090,7 +3094,7 @@ findRetryFrameHelper (Capability *cap, StgTSO *tso) /* ----------------------------------------------------------------------------- findAtomicallyFrameHelper - This function is called by stg_abort via catch_retry_frame primitive. It is + This function is called by stg_abort_tx via catch_retry_frame primitive. It is like findRetryFrameHelper but it will only stop at ATOMICALLY_FRAME. -------------------------------------------------------------------------- */ diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index 10ae67562e..7b2c85d8ab 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -1486,3 +1486,49 @@ section "data" { } #endif + +/* Note [Aborting from the mutator] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * GHC supports a number of runtime checking modes (largely for debugging + * purposes) which may need to abort execution at runtime. This include + * -dtag-inference-check, -dcheck-prim-bounds, and -falignment-sanitisation. + * To abort execution one might think that we could just call `barf`; however + * this is not ideal since it doesn't allow the RTS to gracefully shutdown. + * + * In #22038 we saw this manifest as a deadlock when -dcheck-prim-bounds + * failed. In particular, we saw the following: + * + * 1. the mutator takes a capability and begins execution + * 2. the bounds check fails, calling `barf` + * 3. `barf` calls `rtsFatalInternalErrorFn`, which in turn calls `endEventLogging` + * 4. `endEventLogging` calls `flushEventLog`, which it turn initiates a + * sync to request that all capabilities flush their local event logs + * 5. we deadlock as the the capability held by the crashing mutator can + * never yields to the sync + * + * Consequently, we instead crash in a more principled manner by yielding back + * to the scheduler, indicating that we should abort by setting the thread's + * return value to ThreadAborted. This is done by stg_abort(). + */ + +stg_tagInferenceCheckFailure(W_ what) { + ccall debugBelch("Tag inference failed on: %s\n", what); + jump stg_abort(); +} + +stg_outOfBoundsAccess(W_ ind, W_ sz) { + ccall debugBelch("Encountered out of bounds array access (index=%d, size=%d)", ind, sz); + jump stg_abort(); +} + +stg_badAlignment() { + ccall debugBelch("Encountered incorrectly aligned pointer. This can't be good."); + jump stg_abort(); +} + +stg_abort() { + StgTSO_what_next(CurrentTSO) = ThreadKilled :: I16; + StgRegTable_rRet(BaseReg) = ThreadAborted :: W_; + R1 = BaseReg; + jump stg_returnToSched [R1]; +} diff --git a/rts/include/rts/Constants.h b/rts/include/rts/Constants.h index 3bf5a7a2d5..220598c186 100644 --- a/rts/include/rts/Constants.h +++ b/rts/include/rts/Constants.h @@ -268,6 +268,7 @@ #define ThreadYielding 3 #define ThreadBlocked 4 #define ThreadFinished 5 +#define ThreadAborted 6 /* See Note [Aborting from the mutator] */ /* * Flags for the tso->flags field. |