56 files changed, 1344 insertions, 55 deletions
diff --git a/compiler/GHC/Builtin/Names.hs b/compiler/GHC/Builtin/Names.hs
index f97103a90f..e714036cd4 100644
--- a/compiler/GHC/Builtin/Names.hs
+++ b/compiler/GHC/Builtin/Names.hs
@@ -1849,7 +1849,8 @@ statePrimTyConKey, stableNamePrimTyConKey, stableNameTyConKey,
     typeConKey, threadIdPrimTyConKey, bcoPrimTyConKey, ptrTyConKey,
     funPtrTyConKey, tVarPrimTyConKey, eqPrimTyConKey,
     eqReprPrimTyConKey, eqPhantPrimTyConKey,
-    compactPrimTyConKey, stackSnapshotPrimTyConKey :: Unique
+    compactPrimTyConKey, stackSnapshotPrimTyConKey,
+    promptTagPrimTyConKey :: Unique
 statePrimTyConKey                       = mkPreludeTyConUnique 50
 stableNamePrimTyConKey                  = mkPreludeTyConUnique 51
 stableNameTyConKey                      = mkPreludeTyConUnique 52
@@ -1878,6 +1879,7 @@ funPtrTyConKey                          = mkPreludeTyConUnique 78
 tVarPrimTyConKey                        = mkPreludeTyConUnique 79
 compactPrimTyConKey                     = mkPreludeTyConUnique 80
 stackSnapshotPrimTyConKey               = mkPreludeTyConUnique 81
+promptTagPrimTyConKey                   = mkPreludeTyConUnique 82
 
 eitherTyConKey :: Unique
 eitherTyConKey                          = mkPreludeTyConUnique 84
diff --git a/compiler/GHC/Builtin/Types/Prim.hs b/compiler/GHC/Builtin/Types/Prim.hs
index 2eec67613d..83d36af673 100644
--- a/compiler/GHC/Builtin/Types/Prim.hs
+++ b/compiler/GHC/Builtin/Types/Prim.hs
@@ -80,6 +80,7 @@ module GHC.Builtin.Types.Prim(
         weakPrimTyCon,                  mkWeakPrimTy,
         threadIdPrimTyCon,              threadIdPrimTy,
         stackSnapshotPrimTyCon,         stackSnapshotPrimTy,
+        promptTagPrimTyCon,             mkPromptTagPrimTy,
 
         int8PrimTyCon,          int8PrimTy, int8PrimTyConName,
         word8PrimTyCon,         word8PrimTy, word8PrimTyConName,
@@ -198,6 +199,7 @@ exposedPrimTyCons
     , word32PrimTyCon
     , word64PrimTyCon
     , stackSnapshotPrimTyCon
+    , promptTagPrimTyCon
 
     , tYPETyCon
     , funTyCon
@@ -231,7 +233,7 @@ charPrimTyConName, intPrimTyConName, int8PrimTyConName, int16PrimTyConName, int3
   stableNamePrimTyConName, compactPrimTyConName, bcoPrimTyConName,
   weakPrimTyConName, threadIdPrimTyConName,
   eqPrimTyConName, eqReprPrimTyConName, eqPhantPrimTyConName,
-  stackSnapshotPrimTyConName :: Name
+  stackSnapshotPrimTyConName, promptTagPrimTyConName :: Name
 charPrimTyConName             = mkPrimTc (fsLit "Char#") charPrimTyConKey charPrimTyCon
 intPrimTyConName              = mkPrimTc (fsLit "Int#") intPrimTyConKey  intPrimTyCon
 int8PrimTyConName             = mkPrimTc (fsLit "Int8#") int8PrimTyConKey int8PrimTyCon
@@ -269,6 +271,7 @@ stackSnapshotPrimTyConName    = mkPrimTc (fsLit "StackSnapshot#") stackSnapshotP
 bcoPrimTyConName              = mkPrimTc (fsLit "BCO") bcoPrimTyConKey bcoPrimTyCon
 weakPrimTyConName             = mkPrimTc (fsLit "Weak#") weakPrimTyConKey weakPrimTyCon
 threadIdPrimTyConName         = mkPrimTc (fsLit "ThreadId#") threadIdPrimTyConKey threadIdPrimTyCon
+promptTagPrimTyConName        = mkPrimTc (fsLit "PromptTag#") promptTagPrimTyConKey promptTagPrimTyCon
 
 {-
 ************************************************************************
@@ -1171,6 +1174,20 @@ threadIdPrimTyCon = pcPrimTyCon0 threadIdPrimTyConName unliftedRepTy
 {-
 ************************************************************************
 *                                                                      *
+   The ``prompt tag'' type
+*                                                                      *
+************************************************************************
+-}
+
+promptTagPrimTyCon :: TyCon
+promptTagPrimTyCon = pcPrimTyCon promptTagPrimTyConName [Representational] unliftedRepTy
+
+mkPromptTagPrimTy :: Type -> Type
+mkPromptTagPrimTy v = TyConApp promptTagPrimTyCon [v]
+
+{-
+************************************************************************
+*                                                                      *
 \subsection{SIMD vector types}
 *                                                                      *
 ************************************************************************
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp
index d46e5401fb..acd84c0926 100644
--- a/compiler/GHC/Builtin/primops.txt.pp
+++ b/compiler/GHC/Builtin/primops.txt.pp
@@ -2633,6 +2633,192 @@ primop  MaskStatus "getMaskingState#" GenPrimOp
    has_side_effects = True
 
 ------------------------------------------------------------------------
+section "Continuations"
+  { These operations provide access to first-class delimited continuations,
+    which allow a computation to access and manipulate portions of its
+    /current continuation/. Operationally, they are implemented by direct
+    manipulation of the RTS call stack, which may provide significant
+    performance gains relative to manual continuation-passing style (CPS) for
+    some programs.
+
+    Intuitively, the delimited control operators 'prompt#' and
+    'control0#' can be understood by analogy to 'catch#' and 'raiseIO#',
+    respectively:
+
+      * Like 'catch#', 'prompt#' does not do anything on its own, it
+        just /delimits/ a subcomputation (the source of the name "delimited
+        continuations").
+
+      * Like 'raiseIO#', 'control0#' aborts to the nearest enclosing
+        'prompt#' before resuming execution.
+
+    However, /unlike/ 'raiseIO#', 'control0#' does /not/ discard
+    the aborted computation: instead, it /captures/ it in a form that allows
+    it to be resumed later. In other words, 'control0#' does not
+    irreversibly abort the local computation before returning to the enclosing
+    'prompt#', it merely suspends it. All local context of the suspended
+    computation is packaged up and returned as an ordinary function that can be
+    invoked at a later point in time to /continue/ execution, which is why
+    the suspended computation is known as a /first-class continuation/.
+
+    In GHC, every continuation prompt is associated with exactly one
+    'PromptTag#'. Prompt tags are unique, opaque values created by
+    'newPromptTag#' that may only be compared for equality. Both 'prompt#'
+    and 'control0#' accept a 'PromptTag#' argument, and 'control0#'
+    captures the continuation up to the nearest enclosing use of 'prompt#'
+    /with the same tag/. This allows a program to control exactly which
+    prompt it will abort to by using different tags, similar to how a program
+    can control which 'catch' it will abort to by throwing different types
+    of exceptions. Additionally, 'PromptTag#' accepts a single type parameter,
+    which is used to relate the expected result type at the point of the
+    'prompt#' to the type of the continuation produced by 'control0#'.
+
+    == The gory details
+
+    The high-level explanation provided above should hopefully provide some
+    intuition for what these operations do, but it is not very precise; this
+    section provides a more thorough explanation.
+
+    The 'prompt#' operation morally has the following type:
+
+@
+'prompt#' :: 'PromptTag#' a -> IO a -> IO a
+@
+
+    If a computation @/m/@ never calls 'control0#', then
+    @'prompt#' /tag/ /m/@ is equivalent to just @/m/@, i.e. the 'prompt#' is
+    a no-op. This implies the following law:
+
+    \[
+    \mathtt{prompt\#}\ \mathit{tag}\ (\mathtt{pure}\ x) \equiv \mathtt{pure}\ x
+    \]
+
+    The 'control0#' operation morally has the following type:
+
+@
+'control0#' :: 'PromptTag#' a -> ((IO b -> IO a) -> IO a) -> IO b
+@
+
+    @'control0#' /tag/ /f/@ captures the current continuation up to the nearest
+    enclosing @'prompt#' /tag/@ and resumes execution from the point of the call
+    to 'prompt#', passing the captured continuation to @/f/@. To make that
+    somewhat more precise, we can say 'control0#' obeys the following law:
+
+    \[
+    \mathtt{prompt\#}\ \mathit{tag}\ (\mathtt{control0\#}\ tag\ f \mathbin{\mathtt{>>=}} k)
+      \equiv f\ (\lambda\ m \rightarrow m \mathbin{\mathtt{>>=}} k)
+    \]
+
+    However, this law does not fully describe the behavior of 'control0#',
+    as it does not account for situations where 'control0#' does not appear
+    immediately inside 'prompt#'. Capturing the semantics more precisely
+    requires some additional notational machinery; a common approach is to
+    use [reduction semantics](https://en.wikipedia.org/wiki/Operational_semantics#Reduction_semantics).
+    Assuming an appropriate definition of evaluation contexts \(E\), the
+    semantics of 'prompt#' and 'control0#' can be given as follows:
+
+    \[
+    \begin{aligned}
+    E[\mathtt{prompt\#}\ \mathit{tag}\ (\mathtt{pure}\ v)]
+      &\longrightarrow E[\mathtt{pure}\ v] \\[8pt]
+    E_1[\mathtt{prompt\#}\ \mathit{tag}\ E_2[\mathtt{control0\#}\ tag\ f]]
+      &\longrightarrow E_1[f\ (\lambda\ m \rightarrow E_2[m])] \\[-2pt]
+      \mathrm{where}\;\: \mathtt{prompt\#}\ \mathit{tag} &\not\in E_2
+    \end{aligned}
+    \]
+
+    A full treatment of the semantics and metatheory of delimited control is
+    well outside the scope of this documentation, but a good, thorough
+    overview (in Haskell) is provided in [A Monadic Framework for Delimited
+    Continuations](https://legacy.cs.indiana.edu/~dyb/pubs/monadicDC.pdf) by
+    Dybvig et al.
+
+    == Safety and invariants
+
+    Correct uses of 'control0#' must obey the following restrictions:
+
+    1. The behavior of 'control0#' is only well-defined within a /strict
+       'State#' thread/, such as those associated with @IO@ and strict @ST@
+       computations.
+
+    2. Furthermore, 'control0#' may only be called within the dynamic extent
+       of a 'prompt#' with a matching tag somewhere in the /current/ strict
+       'State#' thread. Effectively, this means that a matching prompt must
+       exist somewhere, and the captured continuation must /not/ contain any
+       uses of @unsafePerformIO@, @runST@, @unsafeInterleaveIO@, etc. For
+       example, the following program is ill-defined:
+
+        @
+        'prompt#' /tag/ $
+          evaluate (unsafePerformIO $ 'control0#' /tag/ /f/)
+        @
+
+        In this example, the use of 'prompt#' appears in a different 'State#'
+        thread from the use of 'control0#', so there is no valid prompt in
+        scope to capture up to.
+
+    3. Finally, 'control0#' may not be used within 'State#' threads associated
+       with an STM transaction (i.e. those introduced by 'atomically#').
+
+    If the runtime is able to detect that any of these invariants have been
+    violated in a way that would compromise internal invariants of the runtime,
+    'control0#' will fail by raising an exception. However, such violations
+    are only detected on a best-effort basis, as the bookkeeping necessary for
+    detecting /all/ illegal uses of 'control0#' would have significant overhead.
+    Therefore, although the operations are “safe” from the runtime’s point of
+    view (e.g. they will not compromise memory safety or clobber internal runtime
+    state), it is still ultimately the programmer’s responsibility to ensure
+    these invariants hold to guarantee predictable program behavior.
+
+    In a similar vein, since each captured continuation includes the full local
+    context of the suspended computation, it can safely be resumed arbitrarily
+    many times without violating any invariants of the runtime system. However,
+    use of these operations in an arbitrary 'IO' computation may be unsafe for
+    other reasons, as most 'IO' code is not written with reentrancy in mind. For
+    example, a computation suspended in the middle of reading a file will likely
+    finish reading it when it is resumed; further attempts to resume from the
+    same place would then fail because the file handle was already closed.
+
+    In other words, although the RTS ensures that a computation’s control state
+    and local variables are properly restored for each distinct resumption of
+    a continuation, it makes no attempt to duplicate any local state the
+    computation may have been using (and could not possibly do so in general).
+    Furthermore, it provides no mechanism for an arbitrary computation to
+    protect itself against unwanted reentrancy (i.e. there is no analogue to
+    Scheme’s @dynamic-wind@). For those reasons, manipulating the continuation
+    is only safe if the caller can be certain that doing so will not violate any
+    expectations or invariants of the enclosing computation. }
+------------------------------------------------------------------------
+
+primtype PromptTag# a
+
+primop  NewPromptTagOp "newPromptTag#" GenPrimOp
+        State# RealWorld -> (# State# RealWorld, PromptTag# a #)
+   with
+   out_of_line = True
+   has_side_effects = True
+
+primop  PromptOp "prompt#" GenPrimOp
+        PromptTag# a
+     -> (State# RealWorld -> (# State# RealWorld, a #))
+     -> State# RealWorld -> (# State# RealWorld, a #)
+   with
+   strictness = { \ _arity -> mkClosedDmdSig [topDmd, strictOnceApply1Dmd, topDmd] topDiv }
+   out_of_line = True
+   has_side_effects = True
+
+primop  Control0Op "control0#" GenPrimOp
+        PromptTag# a
+     -> (((State# RealWorld -> (# State# RealWorld, p #))
+          -> State# RealWorld -> (# State# RealWorld, a #))
+         -> State# RealWorld -> (# State# RealWorld, a #))
+     -> State# RealWorld -> (# State# RealWorld, p #)
+   with
+   strictness = { \ _arity -> mkClosedDmdSig [topDmd, lazyApply2Dmd, topDmd] topDiv }
+   out_of_line = True
+   has_side_effects = True
+
+------------------------------------------------------------------------
 section "STM-accessible Mutable Variables"
 ------------------------------------------------------------------------
 
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index 67ce361a02..ed47fa7a7f 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -492,15 +492,14 @@ info    :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
                                              , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
                                 []) }
 
-        | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')'
-                -- ptrs, nptrs, closure type, description, type, fun type
+        | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ',' INT ')'
+                -- ptrs, nptrs, closure type, description, type, arity, fun type
                 {% do
                       home_unit_id <- getHomeUnitId
                       liftP $ pure $ do
                         profile <- getProfile
                         let prof = profilingInfo profile $11 $13
-                            ty   = Fun 0 (ArgSpec (fromIntegral $15))
-                                  -- Arity zero, arg_type $15
+                            ty   = Fun (fromIntegral $15) (ArgSpec (fromIntegral $17))
                             rep = mkRTSRep (fromIntegral $9) $
                                       mkHeapRep profile False (fromIntegral $5)
                                                       (fromIntegral $7) ty
@@ -510,7 +509,7 @@ info    :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) }
                                              , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing },
                                 []) }
                 -- we leave most of the fields zero here.  This is only used
-                -- to generate the BCO info table in the RTS at the moment.
+                -- to generate the BCO and CONTINUATION info tables in the RTS at the moment.
 
         | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
                 -- ptrs, nptrs, tag, closure type, description, type
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index 1d482b0143..eb26b4a503 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -1568,6 +1568,9 @@ emitPrimOp cfg primop =
   MaskUninterruptibleOp -> alwaysExternal
   UnmaskAsyncExceptionsOp -> alwaysExternal
   MaskStatus -> alwaysExternal
+  NewPromptTagOp -> alwaysExternal
+  PromptOp -> alwaysExternal
+  Control0Op -> alwaysExternal
   AtomicallyOp -> alwaysExternal
   RetryOp -> alwaysExternal
   CatchRetryOp -> alwaysExternal
diff --git a/docs/users_guide/9.6.1-notes.rst b/docs/users_guide/9.6.1-notes.rst
index e58b56a6d8..2e8302ad22 100644
--- a/docs/users_guide/9.6.1-notes.rst
+++ b/docs/users_guide/9.6.1-notes.rst
@@ -63,14 +63,13 @@ Language
 Compiler
 ~~~~~~~~
 
-- The :extension:`TypeInType` is now marked as deprecated. Its meaning has been included 
+- The :extension:`TypeInType` is now marked as deprecated. Its meaning has been included
   in :extension:`PolyKinds` and :extension:`DataKinds`.
 
-
 GHCi
 ~~~~
 
-- GHCi will now accept any file-header pragmas it finds, such as 
+- GHCi will now accept any file-header pragmas it finds, such as
   ``{-# OPTIONS_GHC ... #-}`` and ``{-# LANGUAGE ... #-}`` (see :ref:`pragmas`).  For example,
   instead of using :ghci-cmd:`:set` to enable :ghc-flag:`-Wmissing-signatures`,
   you could instead write:
@@ -81,6 +80,17 @@ GHCi
 
 This can be convenient when pasting large multi-line blocks of code into GHCi.
 
+Runtime system
+~~~~~~~~~~~~~~
+
+- The `Delimited continuation primops <https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0313-delimited-continuation-primops.rst>`_
+  proposal has been implemented, adding native support for first-class,
+  delimited continuations to the RTS. For the reasons given in the proposal,
+  no safe API to access this functionality is provided anywhere in ``base``.
+  Instead, the ``prompt#`` and ``control0#`` primops are intended to be consumed
+  by library authors directly, who may wrap them a safe API that maintains the
+  necessary invariants. See the documentation in ``GHC.Exts`` for more details.
+
 ``base`` library
 ~~~~~~~~~~~~~~~~
 
@@ -152,4 +162,3 @@ for further change information.
     libraries/unix/unix.cabal:               Dependency of ``ghc`` library
     libraries/Win32/Win32.cabal:             Dependency of ``ghc`` library
     libraries/xhtml/xhtml.cabal:             Dependency of ``haddock`` executable
-
diff --git a/libraries/base/Control/Exception/Base.hs b/libraries/base/Control/Exception/Base.hs
index 3cc6a24433..c2c675c65d 100644
--- a/libraries/base/Control/Exception/Base.hs
+++ b/libraries/base/Control/Exception/Base.hs
@@ -42,6 +42,7 @@ module Control.Exception.Base (
         RecUpdError(..),
         ErrorCall(..),
         TypeError(..), -- #10284, custom error type for deferred type errors
+        NoMatchingContinuationPrompt(..),
 
         -- * Throwing exceptions
         throwIO,
@@ -96,7 +97,7 @@ module Control.Exception.Base (
         recSelError, recConError, runtimeError,
         nonExhaustiveGuardsError, patError, noMethodBindingError,
         typeError,
-        nonTermination, nestedAtomically,
+        nonTermination, nestedAtomically, noMatchingContinuationPrompt,
   ) where
 
 import           GHC.Base
@@ -391,6 +392,22 @@ instance Exception NestedAtomically
 
 -----
 
+-- | Thrown when the program attempts a continuation capture, but no prompt with
+-- the given prompt tag exists in the current continuation.
+--
+-- @since 4.18
+data NoMatchingContinuationPrompt = NoMatchingContinuationPrompt
+
+-- | @since 4.18
+instance Show NoMatchingContinuationPrompt where
+  showsPrec _ NoMatchingContinuationPrompt =
+    showString "GHC.Exts.control0#: no matching prompt in the current continuation"
+
+-- | @since 4.18
+instance Exception NoMatchingContinuationPrompt
+
+-----
+
 -- See Note [Compiler error functions] in ghc-prim:GHC.Prim.Panic
 recSelError, recConError, runtimeError,
   nonExhaustiveGuardsError, patError, noMethodBindingError,
@@ -414,3 +431,7 @@ nonTermination = toException NonTermination
 -- GHC's RTS calls this
 nestedAtomically :: SomeException
 nestedAtomically = toException NestedAtomically
+
+-- GHC's RTS calls this
+noMatchingContinuationPrompt :: SomeException
+noMatchingContinuationPrompt = toException NoMatchingContinuationPrompt
diff --git a/libraries/base/GHC/Exts.hs b/libraries/base/GHC/Exts.hs
index f5b2498542..90a37be35b 100755
--- a/libraries/base/GHC/Exts.hs
+++ b/libraries/base/GHC/Exts.hs
@@ -59,6 +59,7 @@ module GHC.Exts
         sameMutVar#,
         sameTVar#,
         sameIOPort#,
+        samePromptTag#,
 
         -- ** Compat wrapper
         atomicModifyMutVar#,
diff --git a/libraries/ghc-heap/GHC/Exts/Heap/ClosureTypes.hs b/libraries/ghc-heap/GHC/Exts/Heap/ClosureTypes.hs
index 5eefe02d0d..70da80d66b 100644
--- a/libraries/ghc-heap/GHC/Exts/Heap/ClosureTypes.hs
+++ b/libraries/ghc-heap/GHC/Exts/Heap/ClosureTypes.hs
@@ -80,6 +80,7 @@ data ClosureType
     | SMALL_MUT_ARR_PTRS_FROZEN_DIRTY
     | SMALL_MUT_ARR_PTRS_FROZEN_CLEAN
     | COMPACT_NFDATA
+    | CONTINUATION
     | N_CLOSURE_TYPES
  deriving (Enum, Eq, Ord, Show, Generic)
 
diff --git a/libraries/ghc-prim/GHC/Prim/PtrEq.hs b/libraries/ghc-prim/GHC/Prim/PtrEq.hs
index 34285a879a..49e78b1713 100644
--- a/libraries/ghc-prim/GHC/Prim/PtrEq.hs
+++ b/libraries/ghc-prim/GHC/Prim/PtrEq.hs
@@ -30,6 +30,7 @@ module GHC.Prim.PtrEq
     sameTVar#,
     sameMVar#,
     sameIOPort#,
+    samePromptTag#,
     eqStableName#
   ) where
 
@@ -113,6 +114,10 @@ sameMVar# = reallyUnsafePtrEquality#
 sameIOPort# :: IOPort# s a -> IOPort# s a -> Int#
 sameIOPort# = reallyUnsafePtrEquality#
 
+-- | Compare the underlying pointers of two 'PromptTag#'s.
+samePromptTag# :: PromptTag# a -> PromptTag# a -> Int#
+samePromptTag# = reallyUnsafePtrEquality#
+
 -- Note [Comparing stable names]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 -- A StableName# is actually a pointer to a stable name object (SNO)
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index 4c3177ae2f..78a0256d3a 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -87,7 +87,8 @@ again:
             FUN_2_0,
             FUN_1_1,
             FUN_0_2,
-            FUN_STATIC:
+            FUN_STATIC,
+            CONTINUATION:
         {
             arity = TO_W_(StgFunInfoExtra_arity(%FUN_INFO(info)));
         dofun:
diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c
index 9caef370dd..3eff82f3ff 100644
--- a/rts/ClosureFlags.c
+++ b/rts/ClosureFlags.c
@@ -87,8 +87,9 @@ const StgWord16 closure_flags[] = {
  [SMALL_MUT_ARR_PTRS_FROZEN_DIRTY] =  (_HNF|     _NS|     _MUT|_UPT     ),
  [SMALL_MUT_ARR_PTRS_FROZEN_CLEAN] =  (_HNF|     _NS|          _UPT     ),
  [COMPACT_NFDATA]       =  (_HNF|     _NS                               ),
+ [CONTINUATION]         =  (_HNF|     _NS|         _UPT                 ),
 };
 
-#if N_CLOSURE_TYPES != 64
+#if N_CLOSURE_TYPES != 65
 #error Closure types changed: update ClosureFlags.c!
 #endif
diff --git a/rts/ClosureSize.c b/rts/ClosureSize.c
index 8fa99c6e6e..23d6eb4b6d 100644
--- a/rts/ClosureSize.c
+++ b/rts/ClosureSize.c
@@ -63,6 +63,8 @@ closure_sizeW_ (const StgClosure *p, const StgInfoTable *info)
         return bco_sizeW((StgBCO *)p);
     case TREC_CHUNK:
         return sizeofW(StgTRecChunk);
+    case CONTINUATION:
+        return continuation_sizeW((StgContinuation *)p);
     default:
         return sizeW_fromITBL(info);
     }
diff --git a/rts/Compact.cmm b/rts/Compact.cmm
index 101c002834..cfa8bba0f1 100644
--- a/rts/Compact.cmm
+++ b/rts/Compact.cmm
@@ -133,7 +133,8 @@ eval:
         FUN_0_2,
         FUN_STATIC,
         BCO,
-        PAP: {
+        PAP,
+        CONTINUATION: {
         jump stg_raisezh(base_GHCziIOziException_cannotCompactFunction_closure);
     }
 
diff --git a/rts/Continuation.c b/rts/Continuation.c
new file mode 100644
index 0000000000..09be4d368e
--- /dev/null
+++ b/rts/Continuation.c
@@ -0,0 +1,492 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2020
+ *
+ * Continuations
+ *
+ * --------------------------------------------------------------------------*/
+
+#include "rts/PosixSource.h"
+#include "Rts.h"
+
+#include "sm/Storage.h"
+#include "sm/Sanity.h"
+#include "Continuation.h"
+#include "Threads.h"
+
+#include <string.h>
+
+/* Note [Continuations overview]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A first-class continuation is represented in the RTS as a closure with type
+CONTINUATION (which corresponds to the StgContinuation struct type).
+Continuation closures are similar to AP_STACK closures in that they store a
+chunk of stack, but while AP_STACK closures are a special type of thunk,
+continuation closures are a special type of *function*. More specifically, every
+continuation is a function of arity 2, accepting one pointer and one RealWorld
+token.
+
+Continuation capture is performed through the use of two cooperating primops,
+`prompt#` and `control0#`, which morally have the following types:
+
+    prompt# :: PromptTag a -> IO a -> IO a
+    control0# :: PromptTag a -> ((IO b -> IO a) -> IO a) -> IO b
+
+(In reality, their types use `State# RealWorld` rather than `IO` in the usual
+way, but the type of control0# is nearly incomprehensible when presented in
+those terms, so thinking in terms of `IO` is a helpful abbreviation.)
+
+GHC implements *delimited* continuations: `prompt#` introduces a delimiter that
+`control0#` looks for to determine how much of the local continuation should be
+captured. Operationally, each use of `prompt#` pushes a *prompt frame* onto the
+stack (annotated with a user-provided *prompt tag*), and each use of `control0#`
+copies the portion of the stack up to the nearest prompt frame (with a matching
+tag) into the heap to form a new continuation closure. `control0#` then aborts
+to the prompt frame and resumes execution by applying the argument to
+`control0#` to the continuation. This process is mostly handled in C, via
+`captureContinuationAndAbort`.
+
+When a continuation closure is applied, the process occurs in reverse: the chunk
+of stack frames stored in the closure are pushed onto the current stack, and
+execution resumes by applying the argument to the continuation to a RealWorld
+token. This is a non-destructive operation---the caller is free to apply the
+continuation arbitrarily many times. This process is handled in Cmm, via
+`stg_CONTINUATION_apply` in ContinuationOps.cmm.
+
+For the most part, capture and restoration of continuations is surprisingly
+straightforward: the bulk of the work on each side of the process is just doing
+the necessary copying. However, there are a few additional subtleties:
+
+  * It is possible for continuation capture to *fail* if no matching prompt
+    frame is on the stack or if the continuation would include thunk update or
+    STM frames; see Note [When capturing the continuation fails] for details.
+
+  * Special care must be taken to ensure the async exception masking state is
+    properly updated across continuation captures and restores, see
+    Note [Continuations and async exception masking] for details.
+
+Note [When capturing the continuation fails]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+How can continuation capture fail? There are three possible scenarios:
+
+  1. There’s no matching prompt frame *anywhere* on the stack.
+  2. The captured continuation would include a thunk update frame.
+  3. The captured continuation would include part of an STM transaction.
+
+The first case is fairly self-explanatory: if there’s no matching prompt frame,
+we don’t know where to capture up to. The other two cases are important to
+protect RTS invariants, as continuations can be applied arbitrarily many times,
+but both thunk updates and STM transactions are non-reentrant.
+
+Moreover, any attempt to capture across a thunk update frame is necessarily
+ill-defined. Such frames indicate the start of a given `State#` thread (i.e.
+they likely correspond to `unsafePerformIO` or `runST`), but the “current
+continuation” is only predictable in code with a well-defined evaluation order.
+Any attempt to capture across such a boundary would be correspondingly
+unpredictable, so we want to be sure to reject it as programmer error. However,
+note that we cannot detect and reject *all* such errors, see Note [Detecting
+illegal captures is not guaranteed] for why.
+
+To identify these error cases while searching for a matching prompt frame, we
+also look for any stack frames that would indicate we’ve gone astray:
+
+  1. If we see a STOP_FRAME, we’ve just plain run out of stack frames.
+  2. To identify thunk updates, we can just look for UPDATE_FRAMEs.
+  3. To identify STM transactions, we look for STM-related frames, namely
+     ATOMICALLY_FRAME, CATCH_RETRY_FRAME, or CATCH_STM_FRAME.
+
+If it finds any of these frames before a matching prompt frame,
+`captureContinuationAndAbort` returns NULL, which `stg_control0zh` treats as a
+signal that it should raise an exception.
+
+Note [Detecting illegal captures is not guaranteed]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As alluded to in Note [When capturing the continuation fails], the “current
+continuation” is only well-defined within a given `State#` thread. For a
+concrete example illustrating why, consider the following program:
+
+    do tag <- newPromptTag
+       let v = unsafePerformIO (control0 tag (\k -> k ()))
+       prompt tag (pure $! v)
+
+If we were to allow this program, what would its result be? The answer depends
+on how and when we evaluate `v`. If we allocate a thunk for `v` and force it
+once the prompt has been installed, the program would successfully return `()`.
+But since GHC can tell that `v` is used strictly, it may very well choose to
+evaluate it immediately, before the prompt has been installed, in which case
+there would be no matching prompt in scope at the time the call to `control0` is
+evaluated, and the program would raise an error.
+
+Without uses of `pseq`, GHC makes no guarantees about the order in which it will
+evaluate pure expressions, so the optimizer may rearrange them significantly.
+Therefore, any code that attempts such a capture is ill-defined, and we want to
+do our best to detect and reject such mistakes. However, we cannot guarantee
+that we will catch all such misuses! For example, given the program
+
+    prompt tag (pure (1 + unsafePerformIO (control0 tag f)))
+
+it is extremely unlikely that we will signal an error, despite the erroneous
+capture. The reason is Note [Simplification of runRW#] in GHC.CoreToStg.Prep:
+when `runRW#` appears in a strict context, there is no reason to allocate a
+thunk, so GHC takes care to ensure it will not do so. With no thunk to update,
+there is naturally no thunk update frame, so we cannot possibly detect at
+runtime that anything was amiss.
+
+Preserving the information necessary to reliably detect all of these sorts of
+misuses at runtime in all situations would be disastrous for performance, so it
+is the programmer’s responsibility to ensure this does not happen. Any program
+for which continuation capture fails is a buggy program---there is NO WAY to
+write a safe program that relies upon catching exceptions raised by continuation
+capture failure. In other words, such programs invoke undefined behavior.
+
+Given the behavior of such programs is already undefined, one might ask why we
+bother detecting and reporting such failure conditions at all. In theory, we
+could ignore thunk update frames completely and let the program behave
+unpredictably. But detecting the failures we *can* detect is still worthwhile:
+
+  * From the programmer’s point of view, best-effort detection and reporting of
+    such misuses is still helpful, and the performance overhead of checking for
+    them is minimal.
+
+  * From the runtime’s point of view, detecting and eagerly rejecting such uses
+    gives us much more confidence they will not violate internal invariants, so
+    even if a buggy program does the wrong thing, it won’t corrupt the runtime.
+
+In summary, the runtime does the best it can, but if it fails to detect and
+report a misuse of `control0#`, the bug is in the program, not GHC.
+
+Note [Continuations and async exception masking]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It’s possible for a captured continuation to include a frame that alters the
+async exception masking state. For example, consider the following program:
+
+    prompt tag $ maskAsyncExceptions $
+      control0 tag (\k -> ...) >>= do_something
+
+The captured continuation will look like this:
+
+    \m -> maskAsyncExceptions (m >>= do_something)
+
+This situation requires some additional care:
+
+  1. When aborting to the prompt as part of continuation capture, we need to
+     restore the async exception masking state to whatever it was when the
+     prompt frame was initially pushed.
+
+  2. When restoring the continuation, we need to update the async exception
+     masking state to whatever it was when the continuation was captured.
+
+  3. When restoring the continuation, we need to update the pushed stack frames
+     themselves to restore the new context’s async exception masking state when
+     they return.
+
+The third point is by far the most subtle, and it stems from the way primops
+like `maskAsyncExceptions#` arrange to restore the async exception masking state
+when their subcomputation returns. Specifically, when a primop like
+`unmaskAsyncExceptions#`, `maskAsyncExceptions#`, or `maskUninterruptible#` is
+called, it pushes one of three different frames onto the stack, depending on the
+enclosing context’s masking state:
+
+  * If exceptions were unmasked, it pushes `stg_unmaskAsyncExceptionszh_ret`.
+  * If exceptions were interruptibly masked, it pushes `stg_maskAsyncExceptionszh_ret`.
+  * If exceptions were uninterruptibly masked, it pushes `stg_maskUninterruptiblezh_ret`.
+
+Note that, somewhat confusingly, which frame is pushed depends only on the
+*enclosing* context’s masking state, *not* the new masking state installed for
+the subcomputation. This works out, since the frame only exists to restore the
+previous masking state, but it means the frames on the stack do not themselves
+determine how the masking state was modified.
+
+To cooperate with this strategy, we look for the aforementioned return
+frames while walking the stack during continuation capture. If we find any of
+them, we record two pieces of information:
+
+  1. The captured continuation is necessarily responsible for whatever the
+     masking state happens to be currently, so the *current* masking state must
+     be restored upon continuation resumption. We set the `apply_mask_frame`
+     field to a stack frame info pointer that will update the masking state
+     accordingly if returned to.
+
+  2. We set the `mask_frame_offset` field to the word offset of the *outermost*
+     stack frame that restores the masking state. This serves a dual purpose:
+
+       a. When we return to Cmm, `stg_control0zh` returns to this frame to
+          restore the async exception masking state.
+
+       b. When the continuation is restored, this frame is substituted with one
+          that restores the masking state of the new context (i.e. the one in
+          which the continuation is restored).
+
+This is all quite subtle, so to illustrate with an example, suppose we have the
+following state at the start of a continuation capture:
+
+    ┌───────┐                                       ┌───────────────────┐
+    │ STACK │                                       │    tso->flags     │
+    ╞═══════╡                                       ╞═══════════════╤═══╡
+    │  ...  │                                       │ BLOCKEX       │ 1 │
+    ├───────┤                                       ├───────────────┼───┤
+    │  RET  │──→ stg_maskAsyncExceptionszh_ret      │ INTERRUPTIBLE │ 0 │
+    ├───────┤                                       └───────────────┴───┘
+    │  ...  │
+    ├───────┤
+    │  RET  │──→ stg_unmaskAsyncExceptionszh_ret
+    ├───────┤
+    │  ...  │
+    ├───────┤
+    │  RET  │──→ stg_prompt_frame
+    ├───────┤
+
+We’ll copy the relevant stack frames into the heap, and we’ll set the
+`apply_mask_frame` and `mask_frame_offset` fields accordingly:
+
+    ┌───────────────────┐
+    │   CONTINUATION    │
+    ╞═══════════════════╡  ╭──→ stg_maskUninterruptiblezh_ret
+    │ apply_mask_frame  │──╯
+    ├───────────────────┤      ┌───────┐
+    │      stack        │─────→│ STACK │
+    ├───────────────────┤      ╞═══════╡
+    │ mask_frame_offset │──╮   │  ...  │
+    └───────────────────┘  │   ├───────┤
+                           │   │  RET  │──→ stg_maskAsyncExceptionszh_ret
+                           │   ├───────┤
+                           │   │  ...  │
+                           │   ├───────┤
+                           ╰──→│  RET  │──→ stg_unmaskAsyncExceptionszh_ret
+                               ├───────┤
+                               │  ...  │
+                               └───────┘
+
+Next, `captureContinuationAndAbort` returns to Cmm, which sets up the stack so
+that it can return to the frame indicated by `mask_frame_offset`, which in this
+case is `stg_unmaskAsyncExceptionszh_ret`:
+
+    ┌───────┐                                        ┌───────────────────┐
+    │ STACK │                                        │    tso->flags     │
+    ╞═══════╡                                        ╞═══════════════╤═══╡
+    │  RET  │──→ stg_unmaskAsyncExceptionszh_ret     │ BLOCKEX       │ 1 │
+    ├───────┤                                        ├───────────────┼───┤
+    │  RET  │──→ stg_ap_pv                           │ INTERRUPTIBLE │ 0 │
+    ├───────┤                                        └───────────────┴───┘
+    │ cont  │
+    ├───────┤
+    │  ...  │
+    ├───────┤
+
+`stg_control0zh` then returns to `stg_unmaskAsyncExceptionszh`, which restores
+the exception masking state appropriately. It then returns to `stg_ap_pv`, which
+applies the handler to the captured continuation, and execution continues with
+exceptions properly unmasked.
+
+Next, let’s consider what happens when the continuation is restored. Suppose we
+start in the following state:
+
+    ┌───────┐                                        ┌───────────────────┐
+    │ STACK │                                        │    tso->flags     │
+    ╞═══════╡                                        ╞═══════════════╤═══╡
+    │  ...  │                                        │ BLOCKEX       │ 1 │
+    ├───────┤                                        ├───────────────┼───┤
+                                                     │ INTERRUPTIBLE │ 1 │
+                                                     └───────────────┴───┘
+
+`stg_CONTINUATION_apply` will start by copying the frames from the continuation
+back onto the stack, plus `apply_mask_frame` on top, which in this case is
+`stg_maskUninterruptiblezh_ret`:
+
+                         ┌───────┐
+                         │ STACK │
+                         ╞═══════╡
+                         │  RET  │──→ stg_maskUninterruptiblezh_ret
+                         ├───────┤
+                         │  RET  │──→ stg_ap_v
+                         ├───────┤
+                         │  ...  │
+                         ├───────┤
+                         │  RET  │──→ stg_maskAsyncExceptionszh_ret
+                         ├───────┤
+                         │  ...  │
+                         ├───────┤
+    mask_frame_offset──→ │  RET  │──→ stg_unmaskAsyncExceptionszh_ret
+                         ├───────┤
+                         │  ...  │
+                         ├───────┤
+
+Next, it will update the frame at `mask_frame_offset` based on the current async
+exception masking state. In this case, exceptions are interruptibly masked, so
+the frame will be replaced with `stg_maskAsyncExceptionszh_ret`.
+`stg_CONTINUATION_apply` will then return to the top of the stack, and
+`stg_maskUninterruptiblezh_ret` will update the async exception masking state:
+
+    ┌───────┐                                        ┌───────────────────┐
+    │ STACK │                                        │    tso->flags     │
+    ╞═══════╡                                        ╞═══════════════╤═══╡
+    │  RET  │──→ stg_ap_v                            │ BLOCKEX       │ 1 │
+    ├───────┤                                        ├───────────────┼───┤
+    │  ...  │                                        │ INTERRUPTIBLE │ 0 │
+    ├───────┤                                        └───────────────┴───┘
+    │  RET  │──→ stg_maskAsyncExceptionszh_ret
+    ├───────┤
+    │  ...  │
+    ├───────┤
+    │  RET  │──→ stg_unmaskAsyncExceptionszh_ret
+    ├───────┤
+    │  ...  │
+    ├───────┤
+
+Control then returns to `stg_ap_v`, which applies the argument in R1 to resume
+execution with exceptions re-masked, and we’re done. Phew.
+
+One might naturally wonder why we bother with all this complicated indirection
+involving returning to mask/unmask frames rather than just adjusting
+`tso->flags` directly ourselves. That would indeed be significantly simpler,
+but returning to `stg_unmaskAsyncExceptionszh_ret` has the important side-effect
+of checking eagerly for a pending async exception and raising it if one is
+available. So we do some tricky trampolining, and that frees us from having to
+worry about that in the continuation capture/restore logic as well. */
+
+static bool is_mask_frame_info(const StgInfoTable *info)
+{
+  return info == &stg_unmaskAsyncExceptionszh_ret_info
+      || info == &stg_maskAsyncExceptionszh_ret_info
+      || info == &stg_maskUninterruptiblezh_ret_info;
+}
+
+static StgStack *pop_stack_chunk(Capability *cap, StgTSO *tso)
+{
+  StgStack *stack = tso->stackobj;
+  stack->sp = stack->stack + stack->stack_size - sizeofW(StgUnderflowFrame);
+  threadStackUnderflow(cap, tso);
+  return tso->stackobj;
+}
+
+// see Note [Continuations overview]
+StgClosure *captureContinuationAndAbort(Capability *cap, StgTSO *tso, StgPromptTag prompt_tag)
+{
+  // We’d better own this thread if we’re doing this!
+  ASSERT(tso->cap == cap);
+
+  StgStack *stack = tso->stackobj;
+  StgPtr frame = stack->sp;
+
+  // We perform the capture in two phases:
+  //
+  //   1. We walk the stack to find the prompt frame to capture up to (if any).
+  //
+  //   2. If we successfully find a matching prompt, we proceed with the actual
+  //      by allocating space for the continuation, performing the necessary
+  //      copying, and unwinding the stack.
+  //
+  // These variables are modified in Phase 1 to keep track of how far we had to
+  // walk before finding the prompt frame. Afterwards, Phase 2 consults them to
+  // determine how to proceed with the actual capture.
+
+  StgWord total_words = 0;
+  bool in_first_chunk = true;
+  StgWord first_chunk_words = 0;
+  StgWord last_chunk_words = 0;
+  StgWord full_chunks = 0;
+
+  // see Note [Continuations and async exception masking]
+  const StgInfoTable *apply_mask_frame = NULL;
+  StgWord mask_frame_offset = 0;
+
+  /* --- Phase 1: Find the matching prompt frame ---------------------------- */
+
+  while (true) {
+    const StgInfoTable *info_ptr = ((StgClosure *)frame)->header.info;
+    const StgRetInfoTable *info = get_ret_itbl((StgClosure *)frame);
+    StgWord chunk_words = frame - stack->sp;
+
+    if (info_ptr == &stg_prompt_frame_info
+        && ((StgPromptFrame *)frame)->tag == prompt_tag) {
+      total_words += chunk_words;
+      if (in_first_chunk) {
+        first_chunk_words = chunk_words;
+      } else {
+        last_chunk_words = chunk_words;
+      }
+      break;
+    }
+
+    if (info->i.type == UNDERFLOW_FRAME) {
+      total_words += chunk_words;
+      if (in_first_chunk) {
+        first_chunk_words = chunk_words;
+      } else {
+        full_chunks++;
+      }
+
+      stack = ((StgUnderflowFrame *)frame)->next_chunk;
+      frame = stack->sp;
+      in_first_chunk = false;
+      continue;
+    }
+
+    // Finding any of these mean we failed to find the prompt frame;
+    // see Note [When capturing the continuation fails] for details
+    if (RTS_UNLIKELY(info->i.type == STOP_FRAME
+                  || info->i.type == UPDATE_FRAME
+                  || info->i.type == ATOMICALLY_FRAME
+                  || info->i.type == CATCH_RETRY_FRAME
+                  || info->i.type == CATCH_STM_FRAME)) {
+      return NULL; // Bail out
+    }
+
+    // see Note [Continuations and async exception masking]
+    if (is_mask_frame_info(info_ptr)) {
+      mask_frame_offset = total_words + chunk_words;
+      if (apply_mask_frame == NULL) {
+        if ((tso->flags & TSO_BLOCKEX) == 0) {
+          apply_mask_frame = &stg_unmaskAsyncExceptionszh_ret_info;
+        } else if ((tso->flags & TSO_INTERRUPTIBLE) == 0) {
+          apply_mask_frame = &stg_maskUninterruptiblezh_ret_info;
+        } else {
+          apply_mask_frame = &stg_maskAsyncExceptionszh_ret_info;
+        }
+      }
+    }
+
+    // Advance to the next frame.
+    frame += stack_frame_sizeW((StgClosure *)frame);
+  }
+
+  /* --- Phase 2: Perform the capture --------------------------------------- */
+
+  dirty_TSO(cap, tso);
+  dirty_STACK(cap, stack);
+
+  StgContinuation *cont = (StgContinuation *)allocate(cap, CONTINUATION_sizeW(total_words));
+  SET_HDR(cont, &stg_CONTINUATION_info, stack->header.prof.ccs);
+  cont->apply_mask_frame = apply_mask_frame;
+  cont->mask_frame_offset = mask_frame_offset;
+  cont->stack_size = total_words;
+
+  stack = tso->stackobj;
+  StgPtr cont_stack = cont->stack;
+  memcpy(cont_stack, stack->sp, first_chunk_words * sizeof(StgWord));
+  cont_stack += first_chunk_words;
+
+  if (in_first_chunk) {
+    stack->sp += first_chunk_words;
+  } else {
+    stack = pop_stack_chunk(cap, tso);
+
+    for (StgWord i = 0; i < full_chunks; i++) {
+      memcpy(cont_stack, stack->sp, stack->stack_size * sizeof(StgWord));
+      cont_stack += stack->stack_size;
+      stack = pop_stack_chunk(cap, tso);
+    }
+
+    memcpy(cont_stack, stack->sp, last_chunk_words * sizeof(StgWord));
+    stack->sp += last_chunk_words;
+  }
+
+  ASSERT(cont->stack + total_words == cont_stack);
+  ASSERT(((StgClosure *)stack->sp)->header.info == &stg_prompt_frame_info);
+  stack->sp += stack_frame_sizeW((StgClosure *)frame);
+  IF_DEBUG(sanity,
+    checkClosure((StgClosure *)cont);
+    checkTSO(tso));
+
+  return TAG_CLOSURE(2, (StgClosure *)cont);
+}
diff --git a/rts/Continuation.h b/rts/Continuation.h
new file mode 100644
index 0000000000..d4fe0092da
--- /dev/null
+++ b/rts/Continuation.h
@@ -0,0 +1,15 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2020
+ *
+ * Continuations
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "BeginPrivate.h"
+
+StgClosure *captureContinuationAndAbort(Capability *cap, StgTSO *tso, StgPromptTag prompt_tag);
+
+#include "EndPrivate.h"
diff --git a/rts/ContinuationOps.cmm b/rts/ContinuationOps.cmm
new file mode 100644
index 0000000000..f7437a6f73
--- /dev/null
+++ b/rts/ContinuationOps.cmm
@@ -0,0 +1,209 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2022
+ *
+ * Continuation support
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC.  It is compiled by GHC directly.  For the
+ * syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+
+import CLOSURE base_ControlziExceptionziBase_noMatchingContinuationPrompt_closure;
+
+/* --------------------------------------------------------------------------
+                         Prompts and prompt tags
+   -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_PROMPT_TAG,0,0,PRIM,"PROMPT_TAG","PROMPT_TAG")
+{ foreign "C" barf("PROMPT_TAG object (%p) entered!", R1) never returns; }
+
+stg_newPromptTagzh()
+{
+  W_ tag;
+
+  ALLOC_PRIM_(SIZEOF_StgHeader, stg_newPromptTagzh);
+  tag = Hp - SIZEOF_StgHeader + WDS(1);
+  SET_HDR(tag,stg_PROMPT_TAG_info,CCCS);
+
+  return (tag);
+}
+
+INFO_TABLE_RET(stg_prompt_frame, RET_SMALL, W_ info_ptr, P_ tag /* :: PromptTag# a */)
+  return (P_ ret /* :: a */)
+{
+  return (ret);
+}
+
+// see Note [Continuations overview] in Continuation.c
+stg_promptzh(P_ tag /* :: PromptTag# a */, P_ io /* :: IO a */)
+{
+  STK_CHK_GEN();
+  TICK_UNKNOWN_CALL();
+  TICK_SLOW_CALL_fast_v();
+  jump stg_ap_v_fast (stg_prompt_frame_info, tag) (io);
+}
+
+/* --------------------------------------------------------------------------
+                          Continuation capture
+   -------------------------------------------------------------------------- */
+
+// see Note [Continuations overview] in Continuation.c
+stg_control0zh(P_ tag /* :: PromptTag# a */, P_ f /* :: (IO b -> IO a) -> IO a */)
+{
+  // We receive two arguments, so we need to use a high-level Cmm entrypoint to
+  // receive them with the platform-specific calling convention, but we just
+  // jump to `stg_control0zh_ll` immediately, since we need to be in low-level
+  // Cmm to manipulate the stack.
+  R1 = tag;
+  R2 = f;
+  jump stg_control0zh_ll [R1, R2];
+}
+
+// see Note [Continuations overview] in Continuation.c
+stg_control0zh_ll // explicit stack
+{
+  P_ tag  /* :: PromptTag# a */,
+     f    /* :: (IO b -> IO a) -> IO a */,
+     cont /* :: IO b -> IO a */;
+  tag = R1;
+  f = R2;
+
+  SAVE_THREAD_STATE();
+  (cont) = ccall captureContinuationAndAbort(MyCapability() "ptr",
+                                             CurrentTSO "ptr",
+                                             tag);
+  LOAD_THREAD_STATE();
+
+  // see Note [When capturing the continuation fails] in Continuation.c
+  if (cont == NULL) (likely: False) {
+    jump stg_raisezh(base_ControlziExceptionziBase_noMatchingContinuationPrompt_closure);
+  }
+
+  W_ apply_mask_frame;
+  apply_mask_frame = StgContinuation_apply_mask_frame(cont);
+
+  // The stack has been updated, so it’s time to apply the input function,
+  // passing the captured continuation and a RealWorld token as arguments.
+  TICK_UNKNOWN_CALL();
+  TICK_SLOW_CALL_fast_pv();
+
+  // If `apply_mask_frame` is NULL, that means the captured continuation doesn’t
+  // make any adjustments to the async exception masking state, which means we
+  // don’t have any adjustments to undo, either. Therefore, we can just apply
+  // the function directly.
+  if (apply_mask_frame == NULL) {
+    Sp_adj(-2);
+    Sp(1) = cont;
+    R1 = f;
+    jump RET_LBL(stg_ap_pv) [R1];
+  }
+
+  // Otherwise, the continuation did adjust the masking state, so we have to
+  // undo it before resuming execution.
+  //
+  // Rather than deal with updating the state ourselves, we return to the
+  // relevant unmask frame (defined in Exception.cmm) that happens to be at the
+  // bottom of the captured continuation (see Note [Continuations and async
+  // exception masking] in Continuation.c for all the details).
+  //
+  // We start by extracting the unmask frame’s info table pointer from the chunk
+  // of captured stack.
+  P_ untagged_cont;
+  W_ cont_stack, mask_frame_offset, mask_frame;
+  untagged_cont = UNTAG(cont);
+  cont_stack = untagged_cont + SIZEOF_StgHeader + OFFSET_StgContinuation_stack;
+  mask_frame_offset = StgContinuation_mask_frame_offset(untagged_cont);
+  mask_frame = W_[cont_stack + WDS(mask_frame_offset)];
+
+  // Now we have the relevant info table to return to in `mask_frame`, so we
+  // just set up the stack to apply the function when the unmask frame returns
+  // and jump to the frame’s entry code.
+  Sp_adj(-3); // Note -3, not -2, because `mask_frame` will
+              // try to pop itself off the stack when it returns!
+  Sp(1) = stg_ap_pv_info;
+  Sp(2) = cont;
+  R1 = f;
+  jump %ENTRY_CODE(mask_frame) [R1];
+}
+
+/* --------------------------------------------------------------------------
+                           Continuation restore
+   -------------------------------------------------------------------------- */
+
+INFO_TABLE_FUN(stg_CONTINUATION,0,0,CONTINUATION,"CONTINUATION","CONTINUATION",2,ARG_P)
+  (P_ cont /* :: IO b -> IO a */, P_ io /* :: IO b */)
+{
+  // We receive two arguments, so we need to use a high-level Cmm entrypoint to
+  // receive them with the platform-specific calling convention, but we just
+  // jump to `stg_CONTINUATION_apply` immediately, since we need to be in
+  // low-level Cmm to manipulate the stack.
+  R1 = UNTAG(cont);
+  R2 = io;
+  jump stg_CONTINUATION_apply [R1, R2];
+}
+
+// see Note [Continuations overview] in Continuation.c
+stg_CONTINUATION_apply // explicit stack
+{
+  P_ cont, io;
+  cont = R1;
+  io = R2;
+
+  IF_DEBUG(sanity, ccall checkClosure(cont "ptr"));
+
+  W_ new_stack_words, apply_mask_frame, mask_frame_offset;
+  new_stack_words = StgContinuation_stack_size(cont);
+  apply_mask_frame = StgContinuation_apply_mask_frame(cont);
+  mask_frame_offset = StgContinuation_mask_frame_offset(cont);
+
+  // Make sure we have enough space to restore the stack.
+  STK_CHK_PP_LL(WDS(new_stack_words), stg_CONTINUATION_apply, cont, io);
+
+  TICK_ENT_CONTINUATION();
+  LDV_ENTER(cont);
+#if defined(PROFILING)
+  ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(cont) "ptr");
+#endif
+
+  // Restore the stack.
+  W_ p;
+  p = cont + SIZEOF_StgHeader + OFFSET_StgContinuation_stack;
+  Sp_adj(-new_stack_words);
+  prim %memcpy(Sp, p, WDS(new_stack_words), SIZEOF_W);
+
+  TICK_UNKNOWN_CALL();
+  TICK_SLOW_CALL_fast_v();
+
+  // If `apply_mask_frame` is NULL, there’s no need to adjust the async
+  // exception masking state, so just apply the argument directly.
+  if (apply_mask_frame == NULL) {
+    R1 = io;
+    jump stg_ap_v_fast [R1];
+  }
+
+  // Otherwise, we need to update the masking state, but before we do, we also
+  // need to update the unmask frame at the bottom of the restored chunk of
+  // stack so that it returns the masking state to whatever it was before the
+  // continuation was applied (see also Note [Continuations and async exception
+  // masking] in Continuation.c).
+  if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_BLOCKEX) == 0) {
+    Sp(mask_frame_offset) = stg_unmaskAsyncExceptionszh_ret_info;
+  } else {
+    if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_INTERRUPTIBLE) == 0) {
+      Sp(mask_frame_offset) = stg_maskUninterruptiblezh_ret_info;
+    } else {
+      Sp(mask_frame_offset) = stg_maskAsyncExceptionszh_ret_info;
+    }
+  }
+
+  // Now we just set up the stack so that `apply_mask_frame` will apply `io`
+  // when it returns and jump to it.
+  Sp_adj(-2);
+  Sp(1) = stg_ap_v_info;
+  R1 = io;
+  jump %ENTRY_CODE(apply_mask_frame) [R1];
+}
diff --git a/rts/Exception.cmm b/rts/Exception.cmm
index 0ef74a8c57..f246d60a1e 100644
--- a/rts/Exception.cmm
+++ b/rts/Exception.cmm
@@ -57,7 +57,6 @@ import CLOSURE base_GHCziExceptionziType_overflowException_closure;
 
    -------------------------------------------------------------------------- */
 
-
 INFO_TABLE_RET(stg_unmaskAsyncExceptionszh_ret, RET_SMALL, W_ info_ptr)
     /* explicit stack */
 {
@@ -151,9 +150,11 @@ stg_maskAsyncExceptionszh /* explicit stack */
             Sp(0) = stg_unmaskAsyncExceptionszh_ret_info;
         }
     } else {
+        Sp_adj(-1);
         if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_INTERRUPTIBLE) == 0) {
-            Sp_adj(-1);
             Sp(0) = stg_maskUninterruptiblezh_ret_info;
+        } else {
+            Sp(0) = stg_maskAsyncExceptionszh_ret_info;
         }
     }
 
@@ -179,10 +180,12 @@ stg_maskUninterruptiblezh /* explicit stack */
             Sp(0) = stg_unmaskAsyncExceptionszh_ret_info;
         }
     } else {
-        if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_INTERRUPTIBLE) != 0) {
-            Sp_adj(-1);
-            Sp(0) = stg_maskAsyncExceptionszh_ret_info;
-        }
+      Sp_adj(-1);
+      if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_INTERRUPTIBLE) == 0) {
+          Sp(0) = stg_maskUninterruptiblezh_ret_info;
+      } else {
+          Sp(0) = stg_maskAsyncExceptionszh_ret_info;
+      }
     }
 
     StgTSO_flags(CurrentTSO) = %lobits32(
@@ -203,12 +206,15 @@ stg_unmaskAsyncExceptionszh /* explicit stack */
     io = R1;
 
     STK_CHK_P_LL (WDS(4), stg_unmaskAsyncExceptionszh, io);
-    /* 4 words: one for the unmask frame, 3 for setting up the
+    /* 4 words: one for the mask frame, 3 for setting up the
      * stack to call maybePerformBlockedException() below.
      */
 
-    /* If exceptions are already unmasked, there's nothing to do */
-    if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_BLOCKEX) != 0) {
+    /* If exceptions are already unmasked, no need to check for a masked exception */
+    if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_BLOCKEX) == 0) {
+        Sp_adj(-1);
+        Sp(0) = stg_unmaskAsyncExceptionszh_ret_info;
+    } else {
 
         /* avoid growing the stack unnecessarily */
         if (Sp(0) == stg_unmaskAsyncExceptionszh_ret_info) {
@@ -377,22 +383,6 @@ INFO_TABLE_RET(stg_catch_frame, CATCH_FRAME,
     return (ret);
 }
 
-/* -----------------------------------------------------------------------------
- * The catch infotable
- *
- * This should be exactly the same as would be generated by this STG code
- *
- * catch = {x,h} \n {} -> catch#{x,h}
- *
- * It is used in deleteThread when reverting blackholes.
- * -------------------------------------------------------------------------- */
-
-INFO_TABLE(stg_catch,2,0,FUN,"catch","catch")
-    (P_ node)
-{
-    jump stg_catchzh(StgClosure_payload(node,0),StgClosure_payload(node,1));
-}
-
 stg_catchzh ( P_ io,      /* :: IO a */
               P_ handler  /* :: Exception -> IO a */ )
 {
diff --git a/rts/Heap.c b/rts/Heap.c
index 516f27ba6d..491324e706 100644
--- a/rts/Heap.c
+++ b/rts/Heap.c
@@ -241,6 +241,10 @@ StgWord collect_pointers(StgClosure *closure, StgClosure *ptrs[]) {
             break;
         }
 
+        case CONTINUATION:
+            // See the note in AP_STACK about the stack chunk.
+            break;
+
         default:
             fprintf(stderr,"closurePtrs: Cannot handle type %s yet\n",
                            closure_type_names[info->type]);
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index ba89f75522..8ab8356550 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -229,6 +229,35 @@ stg_gc_prim_p_ll
     jump stg_gc_noregs [];
 }
 
+INFO_TABLE_RET(stg_gc_prim_pp_ll, RET_SMALL, W_ info, P_ arg1, P_ arg2, W_ fun)
+    /* explicit stack */
+{
+    W_ fun;
+    P_ arg1, arg2;
+    fun = Sp(3);
+    arg2 = Sp(2);
+    arg1 = Sp(1);
+    Sp_adj(4);
+    R1 = arg1;
+    R2 = arg2;
+    jump fun [R1, R2];
+}
+
+stg_gc_prim_pp_ll
+{
+    W_ fun;
+    P_ arg1, arg2;
+    fun = R3;
+    arg1 = R1;
+    arg2 = R2;
+    Sp_adj(-4);
+    Sp(3) = fun;
+    Sp(2) = arg2;
+    Sp(1) = arg1;
+    Sp(0) = stg_gc_prim_pp_ll_info;
+    jump stg_gc_noregs [];
+}
+
 /* -----------------------------------------------------------------------------
    Info tables for returning values of various types.  These are used
    when we want to push a frame on the stack that will return a value
diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c
index b56c4d07bb..2cdeb36ce2 100644
--- a/rts/LdvProfile.c
+++ b/rts/LdvProfile.c
@@ -119,6 +119,7 @@ processHeapClosureForDead( const StgClosure *c )
     case FUN_2_0:
     case FUN_1_1:
     case FUN_0_2:
+    case CONTINUATION:
     case BLACKHOLE:
     case BLOCKING_QUEUE:
         /*
diff --git a/rts/Printer.c b/rts/Printer.c
index ef9394f24e..45b4a606cf 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -423,6 +423,15 @@ printClosure( const StgClosure *obj )
         debugBelch("TREC_CHUNK\n");
         break;
 
+    case CONTINUATION:
+    {
+        StgContinuation *u = (StgContinuation *)obj;
+        debugBelch("CONTINUATION(apply_mask_frame=");
+        printPtr((StgPtr)u->apply_mask_frame);
+        debugBelch(",stack_size=%" FMT_Word ")\n", u->stack_size);
+        break;
+    }
+
     default:
             //barf("printClosure %d",get_itbl(obj)->type);
             debugBelch("*** printClosure: unknown type %d ****\n",
@@ -563,6 +572,8 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
                 debugBelch("stg_ret_d_info\n" );
             } else if (c == (StgWord)&stg_ret_l_info) {
                 debugBelch("stg_ret_l_info\n" );
+            } else if (c == (StgWord)&stg_prompt_frame_info) {
+                debugBelch("stg_prompt_frame_info\n");
 #if defined(PROFILING)
             } else if (c == (StgWord)&stg_restore_cccs_info) {
                 debugBelch("stg_restore_cccs_info\n" );
@@ -1063,10 +1074,11 @@ const char *closure_type_names[] = {
  [SMALL_MUT_ARR_PTRS_DIRTY] = "SMALL_MUT_ARR_PTRS_DIRTY",
  [SMALL_MUT_ARR_PTRS_FROZEN_DIRTY] = "SMALL_MUT_ARR_PTRS_FROZEN_DIRTY",
  [SMALL_MUT_ARR_PTRS_FROZEN_CLEAN] = "SMALL_MUT_ARR_PTRS_FROZEN_CLEAN",
- [COMPACT_NFDATA]        = "COMPACT_NFDATA"
+ [COMPACT_NFDATA]        = "COMPACT_NFDATA",
+ [CONTINUATION]          = "CONTINUATION",
 };
 
-#if N_CLOSURE_TYPES != 64
+#if N_CLOSURE_TYPES != 65
 #error Closure types changed: update Printer.c!
 #endif
 
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index d9e66d76d1..41c8ab2508 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -1219,6 +1219,10 @@ heapCensusBlock(Census *census, bdescr *bd)
             size = sizeofW(StgTRecChunk);
             break;
 
+        case CONTINUATION:
+            size = continuation_sizeW((StgContinuation *)p);
+            break;
+
         case COMPACT_NFDATA:
             barf("heapCensus, found compact object in the wrong list");
             break;
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 90d3c8c0f4..6e2692f8cd 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -177,6 +177,7 @@ isRetainer( const StgClosure *c )
     case FUN_0_2:
         // partial applications
     case PAP:
+    case CONTINUATION:
         // indirection
     // IND_STATIC used to be an error, but at the moment it can happen
     // as isAlive doesn't look through IND_STATIC as it ignores static
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index f979c324ba..317b284158 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -442,6 +442,7 @@ extern char **environ;
       SymI_HasProto(ENT_DYN_IND_ctr)                    \
       SymI_HasProto(ENT_PERM_IND_ctr)                   \
       SymI_HasProto(ENT_PAP_ctr)                        \
+      SymI_HasProto(ENT_CONTINUATION_ctr)                  \
       SymI_HasProto(ENT_AP_ctr)                         \
       SymI_HasProto(ENT_AP_STACK_ctr)                   \
       SymI_HasProto(ENT_BH_ctr)                         \
@@ -1047,6 +1048,9 @@ extern char **environ;
       SymI_HasProto(sendCloneStackMessage)                              \
       SymI_HasProto(cloneStack)                                         \
       SymI_HasProto(decodeClonedStack)                                  \
+      SymI_HasProto(stg_newPromptTagzh)                                 \
+      SymI_HasProto(stg_promptzh)                                       \
+      SymI_HasProto(stg_control0zh)                                     \
       RTS_USER_SIGNALS_SYMBOLS                                          \
       RTS_INTCHAR_SYMBOLS
 
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 10ae67562e..ff0ec062f8 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -432,7 +432,7 @@ INFO_TABLE_RET( stg_dead_thread, RET_SMALL,
    Entry code for a BCO
    ------------------------------------------------------------------------- */
 
-INFO_TABLE_FUN( stg_BCO, 3, 0, BCO, "BCO", "BCO", ARG_BCO )
+INFO_TABLE_FUN( stg_BCO, 3, 0, BCO, "BCO", "BCO", 0, ARG_BCO )
     /* explicit stack */
 {
   /* entering a BCO means "apply it", same as a function */
diff --git a/rts/Ticky.c b/rts/Ticky.c
index a216fcb00f..2cd5e34142 100644
--- a/rts/Ticky.c
+++ b/rts/Ticky.c
@@ -287,6 +287,7 @@ PrintTickyInfo(void)
   PR_CTR(ENT_AP_ctr);
   PR_CTR(ENT_PAP_ctr);
   PR_CTR(ENT_AP_STACK_ctr);
+  PR_CTR(ENT_CONTINUATION_ctr);
   PR_CTR(ENT_BH_ctr);
   PR_CTR(ENT_STATIC_THK_SINGLE_ctr);
   PR_CTR(ENT_STATIC_THK_MANY_ctr);
diff --git a/rts/TraverseHeap.c b/rts/TraverseHeap.c
index 60c88a278d..e40e380765 100644
--- a/rts/TraverseHeap.c
+++ b/rts/TraverseHeap.c
@@ -336,8 +336,8 @@ traversePushReturn(traverseState *ts, StgClosure *c, stackAccum acc, stackElemen
  *
  * Invariants:
  *
- *  - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
- *       be any stack objects.
+ *  - 'c' is not any of TSO, AP, PAP, AP_STACK, or CONTINUATION, which means
+ *    that there cannot be any stack objects.
  *
  * Note: SRTs are considered to be children as well.
  */
@@ -517,6 +517,7 @@ traverseGetChildren(StgClosure *c, StgClosure **first_child, bool *other_childre
     case PAP:
     case AP:
     case AP_STACK:
+    case CONTINUATION:
     case TSO:
     case STACK:
     case IND_STATIC:
@@ -818,6 +819,7 @@ traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data,
         case PAP:
         case AP:
         case AP_STACK:
+        case CONTINUATION:
         case TSO:
         case STACK:
         case IND_STATIC:
@@ -1288,6 +1290,14 @@ inner_loop:
                     (StgPtr)((StgAP_STACK *)c)->payload +
                              ((StgAP_STACK *)c)->size);
         goto loop;
+
+    case CONTINUATION:
+    {
+        StgContinuation *cont = (StgContinuation *)c;
+        traversePushStack(ts, c, sep, child_data,
+                          cont->stack, cont->stack + cont->stack_size);
+        goto loop;
+    }
     }
 
     stackElement se;
diff --git a/rts/include/Cmm.h b/rts/include/Cmm.h
index 1470b2e0e2..07301bf602 100644
--- a/rts/include/Cmm.h
+++ b/rts/include/Cmm.h
@@ -335,7 +335,8 @@
     FUN_0_2,                                            \
     FUN_STATIC,                                         \
     BCO,                                                \
-    PAP:                                                \
+    PAP,                                                \
+    CONTINUATION:                                       \
    {                                                    \
        ret(x);                                          \
    }                                                    \
@@ -498,6 +499,12 @@
 #define GC_PRIM_PP(fun,arg1,arg2)               \
         jump stg_gc_prim_pp(arg1,arg2,fun);
 
+#define GC_PRIM_PP_LL(fun,arg1,arg2)            \
+        R1 = arg1;                              \
+        R2 = arg2;                              \
+        R3 = fun;                               \
+        jump stg_gc_prim_pp_ll [R1,R2,R3];
+
 #define MAYBE_GC_(fun)                          \
     if (CHECK_GC()) {                           \
         HpAlloc = 0;                            \
@@ -540,6 +547,12 @@
         GC_PRIM_PP(fun,arg1,arg2)               \
     }
 
+#define STK_CHK_PP_LL(n, fun, arg1, arg2)       \
+    TICK_BUMP(STK_CHK_ctr);                     \
+    if (Sp - (n) < SpLim) {                     \
+        GC_PRIM_PP_LL(fun,arg1,arg2)            \
+    }
+
 #define STK_CHK_ENTER(n, closure)               \
     TICK_BUMP(STK_CHK_ctr);                     \
     if (Sp - (n) < SpLim) {                     \
@@ -685,6 +698,7 @@
 #define TICK_ENT_PAP()                  TICK_BUMP(ENT_PAP_ctr)
 #define TICK_ENT_AP()                   TICK_BUMP(ENT_AP_ctr)
 #define TICK_ENT_AP_STACK()             TICK_BUMP(ENT_AP_STACK_ctr)
+#define TICK_ENT_CONTINUATION()         TICK_BUMP(ENT_CONTINUATION_ctr)
 #define TICK_ENT_BH()                   TICK_BUMP(ENT_BH_ctr)
 #define TICK_ENT_LNE()                  TICK_BUMP(ENT_LNE_ctr)
 #define TICK_UNKNOWN_CALL()             TICK_BUMP(UNKNOWN_CALL_ctr)
diff --git a/rts/include/rts/storage/ClosureMacros.h b/rts/include/rts/storage/ClosureMacros.h
index 1e3589fec2..ba57df1699 100644
--- a/rts/include/rts/storage/ClosureMacros.h
+++ b/rts/include/rts/storage/ClosureMacros.h
@@ -300,6 +300,10 @@ EXTERN_INLINE StgOffset AP_STACK_sizeW ( uint32_t size );
 EXTERN_INLINE StgOffset AP_STACK_sizeW ( uint32_t size )
 { return sizeofW(StgAP_STACK) + size; }
 
+EXTERN_INLINE StgWord CONTINUATION_sizeW(StgWord stack_size);
+EXTERN_INLINE StgWord CONTINUATION_sizeW(StgWord stack_size)
+{ return sizeofW(StgContinuation) + stack_size; }
+
 EXTERN_INLINE StgOffset CONSTR_sizeW( uint32_t p, uint32_t np );
 EXTERN_INLINE StgOffset CONSTR_sizeW( uint32_t p, uint32_t np )
 { return sizeofW(StgHeader) + p + np; }
@@ -340,6 +344,10 @@ EXTERN_INLINE StgOffset pap_sizeW( StgPAP* x );
 EXTERN_INLINE StgOffset pap_sizeW( StgPAP* x )
 { return PAP_sizeW(x->n_args); }
 
+EXTERN_INLINE StgWord continuation_sizeW(StgContinuation *x);
+EXTERN_INLINE StgWord continuation_sizeW(StgContinuation *x)
+{ return CONTINUATION_sizeW(x->stack_size); }
+
 EXTERN_INLINE StgWord arr_words_words( StgArrBytes* x);
 EXTERN_INLINE StgWord arr_words_words( StgArrBytes* x)
 { return ROUNDUP_BYTES_TO_WDS(x->bytes); }
diff --git a/rts/include/rts/storage/ClosureTypes.h b/rts/include/rts/storage/ClosureTypes.h
index 31e75cc617..a24a3235df 100644
--- a/rts/include/rts/storage/ClosureTypes.h
+++ b/rts/include/rts/storage/ClosureTypes.h
@@ -87,4 +87,5 @@
 #define SMALL_MUT_ARR_PTRS_FROZEN_DIRTY 61
 #define SMALL_MUT_ARR_PTRS_FROZEN_CLEAN 62
 #define COMPACT_NFDATA                63
-#define N_CLOSURE_TYPES               64
+#define CONTINUATION                  64
+#define N_CLOSURE_TYPES               65
diff --git a/rts/include/rts/storage/Closures.h b/rts/include/rts/storage/Closures.h
index c1de80aa4d..a2b6eb079f 100644
--- a/rts/include/rts/storage/Closures.h
+++ b/rts/include/rts/storage/Closures.h
@@ -658,3 +658,31 @@ typedef struct StgCompactNFData_ {
       // Used by compacting GC for linking CNFs with threaded hash tables.
       // See Note [CNFs in compacting GC] in Compact.c for details.
 } StgCompactNFData;
+
+/* ----------------------------------------------------------------------------
+   Continuations (see Note [Continuations overview] in Continuation.c)
+   ------------------------------------------------------------------------- */
+
+typedef StgClosure *StgPromptTag;
+
+typedef struct {
+    StgHeader header;
+    StgPromptTag tag;
+} StgPromptFrame;
+
+// Closure types: CONTINUATION
+typedef struct {
+    StgHeader header;
+    const StgInfoTable *apply_mask_frame;
+      // A pointer to a stack frame info table that should be returned to after
+      // applying this continuation to update the async exception masking state,
+      // or NULL if the masking state of the calling context should be preserved;
+      // see Note [Continuations and async exception masking] in Continuation.c
+    StgWord mask_frame_offset;
+      // Word offset into `stack` for the outermost mask/unmask frame, or 0 if
+      // `apply_mask_frame` is NULL;
+      // see Note [Continuations and async exception masking] in Continuation.c
+    StgWord stack_size;
+      // Number of words of captured stack
+    StgWord stack[];
+} StgContinuation;
diff --git a/rts/include/rts/storage/TSO.h b/rts/include/rts/storage/TSO.h
index 93c077d36a..4ca19853d7 100644
--- a/rts/include/rts/storage/TSO.h
+++ b/rts/include/rts/storage/TSO.h
@@ -266,11 +266,16 @@ typedef struct StgStack_ {
     StgWord    stack[];
 } StgStack;
 
+INLINE_HEADER StgPtr stack_SpLim(StgStack *stack)
+{
+    return stack->stack + RESERVED_STACK_WORDS;
+}
+
 // Calculate SpLim from a TSO (reads tso->stackobj, but no fields from
 // the stackobj itself).
 INLINE_HEADER StgPtr tso_SpLim (StgTSO* tso)
 {
-    return tso->stackobj->stack + RESERVED_STACK_WORDS;
+    return stack_SpLim(tso->stackobj);
 }
 
 /* -----------------------------------------------------------------------------
diff --git a/rts/include/stg/MiscClosures.h b/rts/include/stg/MiscClosures.h
index 1d6cc4584f..9d043257a6 100644
--- a/rts/include/stg/MiscClosures.h
+++ b/rts/include/stg/MiscClosures.h
@@ -64,6 +64,7 @@ RTS_RET(stg_stack_underflow_frame);
 RTS_RET(stg_keepAlive_frame);
 RTS_RET(stg_restore_cccs);
 RTS_RET(stg_restore_cccs_eval);
+RTS_RET(stg_prompt_frame);
 
 // RTS_FUN(stg_interp_constr1_entry);
 // RTS_FUN(stg_interp_constr2_entry);
@@ -211,6 +212,8 @@ RTS_ENTRY(stg_AP);
 RTS_ENTRY(stg_AP_NOUPD);
 RTS_ENTRY(stg_AP_STACK);
 RTS_ENTRY(stg_AP_STACK_NOUPD);
+RTS_ENTRY(stg_CONTINUATION);
+RTS_ENTRY(stg_PROMPT_TAG);
 RTS_ENTRY(stg_dummy_ret);
 RTS_ENTRY(stg_raise);
 RTS_ENTRY(stg_raise_ret);
@@ -355,6 +358,7 @@ RTS_FUN_DECL(stg_ap_pppp_fast);
 RTS_FUN_DECL(stg_ap_ppppp_fast);
 RTS_FUN_DECL(stg_ap_pppppp_fast);
 RTS_FUN_DECL(stg_PAP_apply);
+RTS_FUN_DECL(stg_CONTINUATION_apply);
 
 /* standard GC & stack check entry points, all defined in HeapStackCheck.cmm */
 
@@ -374,7 +378,9 @@ RTS_FUN_DECL(stg_gc_prim_pp);
 RTS_FUN_DECL(stg_gc_prim_n);
 
 RTS_RET(stg_gc_prim_p_ll_ret);
+RTS_RET(stg_gc_prim_pp_ll_ret);
 RTS_FUN_DECL(stg_gc_prim_p_ll);
+RTS_FUN_DECL(stg_gc_prim_pp_ll);
 
 RTS_RET(stg_enter);
 RTS_FUN_DECL(__stg_gc_enter_1);
@@ -502,6 +508,11 @@ RTS_FUN_DECL(stg_paniczh);
 RTS_FUN_DECL(stg_keepAlivezh);
 RTS_FUN_DECL(stg_absentErrorzh);
 
+RTS_FUN_DECL(stg_newPromptTagzh);
+RTS_FUN_DECL(stg_promptzh);
+RTS_FUN_DECL(stg_control0zh);
+RTS_FUN_DECL(stg_control0zh_ll);
+
 RTS_FUN_DECL(stg_makeStableNamezh);
 RTS_FUN_DECL(stg_makeStablePtrzh);
 RTS_FUN_DECL(stg_deRefStablePtrzh);
diff --git a/rts/include/stg/Ticky.h b/rts/include/stg/Ticky.h
index 3353bb2f1a..6116457eea 100644
--- a/rts/include/stg/Ticky.h
+++ b/rts/include/stg/Ticky.h
@@ -56,6 +56,7 @@ EXTERN StgInt ENT_STATIC_IND_ctr INIT(0);
 EXTERN StgInt ENT_DYN_IND_ctr INIT(0);
 EXTERN StgInt ENT_PERM_IND_ctr INIT(0);
 EXTERN StgInt ENT_PAP_ctr INIT(0);
+EXTERN StgInt ENT_CONTINUATION_ctr INIT(0);
 EXTERN StgInt ENT_AP_ctr INIT(0);
 EXTERN StgInt ENT_AP_STACK_ctr INIT(0);
 EXTERN StgInt ENT_BH_ctr INIT(0);
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index d0c1a588c5..88e10fe4ce 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -431,6 +431,7 @@ library
 
     cmm-sources: Apply.cmm
                  Compact.cmm
+                 ContinuationOps.cmm
                  Exception.cmm
                  HeapStackCheck.cmm
                  PrimOps.cmm
@@ -475,6 +476,7 @@ library
                CloneStack.c
                ClosureFlags.c
                ClosureSize.c
+               Continuation.c
                Disassembler.c
                FileLock.c
                ForeignExports.c
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index 53d27a8e68..718650456b 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -451,6 +451,13 @@ thread_AP_STACK (StgAP_STACK *ap)
     return (P_)ap + sizeofW(StgAP_STACK) + ap->size;
 }
 
+STATIC_INLINE P_
+thread_continuation(StgContinuation *cont)
+{
+    thread_stack(cont->stack, cont->stack + cont->stack_size);
+    return (P_)cont + continuation_sizeW(cont);
+}
+
 static P_
 thread_TSO (StgTSO *tso)
 {
@@ -614,6 +621,10 @@ update_fwd_large( bdescr *bd )
         continue;
     }
 
+    case CONTINUATION:
+        thread_continuation((StgContinuation *)p);
+        continue;
+
     default:
       barf("update_fwd_large: unknown/strange object  %d", (int)(info->type));
     }
@@ -800,6 +811,9 @@ thread_obj (const StgInfoTable *info, P_ p)
         return p + sizeofW(StgTRecChunk);
     }
 
+    case CONTINUATION:
+        return thread_continuation((StgContinuation *)p);
+
     default:
         barf("update_fwd: unknown/strange object  %d", (int)(info->type));
         return NULL;
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index a1dcd03c4e..161ccb2e40 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -1062,6 +1062,10 @@ loop:
       copy(p,info,q,sizeofW(StgTRecChunk),gen_no);
       return;
 
+  case CONTINUATION:
+      copy(p,info,q,continuation_sizeW((StgContinuation*)q),gen_no);
+      return;
+
   default:
     barf("evacuate: strange closure type %d", (int)(INFO_PTR_TO_STRUCT(info)->type));
   }
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index 75fe88b03b..91708c84f9 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -1627,6 +1627,12 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
     case COMPACT_NFDATA:
         break;
 
+    case CONTINUATION: {
+        StgContinuation *cont = (StgContinuation *)p;
+        trace_stack_(queue, cont->stack, cont->stack + cont->stack_size);
+        break;
+    }
+
     default:
         barf("mark_closure: unimplemented/strange closure type %d @ %p",
              info->type, p);
diff --git a/rts/sm/NonMovingScav.c b/rts/sm/NonMovingScav.c
index b9e04ae527..9f92563032 100644
--- a/rts/sm/NonMovingScav.c
+++ b/rts/sm/NonMovingScav.c
@@ -428,6 +428,10 @@ nonmovingScavengeOne (StgClosure *q)
         scavenge_compact((StgCompactNFData*)p);
         break;
 
+    case CONTINUATION:
+        scavenge_continuation((StgContinuation *)p);
+        break;
+
     default:
         barf("nonmoving scavenge: unimplemented/strange closure type %d @ %p",
              info->type, p);
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index e5dabaa54d..06a1ddcc91 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -560,6 +560,19 @@ checkClosure( const StgClosure* p )
         return sizeofW(StgTRecChunk);
       }
 
+    case CONTINUATION:
+    {
+        StgContinuation *cont = (StgContinuation *)p;
+        if (cont->apply_mask_frame) {
+          ASSERT(cont->apply_mask_frame == &stg_unmaskAsyncExceptionszh_ret_info
+              || cont->apply_mask_frame == &stg_maskAsyncExceptionszh_ret_info
+              || cont->apply_mask_frame == &stg_maskUninterruptiblezh_ret_info);
+          ASSERT(LOOKS_LIKE_CLOSURE_PTR(cont->stack + cont->mask_frame_offset));
+        }
+        checkStackChunk(cont->stack, cont->stack + cont->stack_size);
+        return continuation_sizeW(cont);
+    }
+
     default:
         barf("checkClosure (closure type %d)", info->type);
     }
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 2b9ace36a4..be30e75b8f 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -90,6 +90,7 @@ static void scavenge_large_bitmap (StgPtr p,
 # define scavenge_mut_arr_ptrs(info) scavenge_mut_arr_ptrs1(info)
 # define scavenge_PAP(pap) scavenge_PAP1(pap)
 # define scavenge_AP(ap) scavenge_AP1(ap)
+# define scavenge_continuation(pap) scavenge_continuation1(pap)
 # define scavenge_compact(str) scavenge_compact1(str)
 #endif
 
@@ -386,6 +387,13 @@ scavenge_AP (StgAP *ap)
     return scavenge_PAP_payload (ap->fun, ap->payload, ap->n_args);
 }
 
+StgPtr
+scavenge_continuation(StgContinuation *cont)
+{
+    scavenge_stack(cont->stack, cont->stack + cont->stack_size);
+    return (StgPtr)cont + continuation_sizeW(cont);
+}
+
 /* -----------------------------------------------------------------------------
    Scavenge SRTs
    -------------------------------------------------------------------------- */
@@ -831,6 +839,10 @@ scavenge_block (bdescr *bd)
         break;
       }
 
+    case CONTINUATION:
+        p = scavenge_continuation((StgContinuation *)p);
+        break;
+
     default:
         barf("scavenge: unimplemented/strange closure type %d @ %p",
              info->type, p);
@@ -1223,6 +1235,10 @@ scavenge_mark_stack(void)
             break;
           }
 
+        case CONTINUATION:
+            scavenge_continuation((StgContinuation *)p);
+            break;
+
         default:
             barf("scavenge_mark_stack: unimplemented/strange closure type %d @ %p",
                  info->type, p);
@@ -1581,6 +1597,10 @@ scavenge_one(StgPtr p)
         scavenge_compact((StgCompactNFData*)p);
         break;
 
+    case CONTINUATION:
+        scavenge_continuation((StgContinuation *)p);
+        break;
+
     default:
         barf("scavenge_one: strange object %d", (int)(info->type));
     }
diff --git a/rts/sm/Scav.h b/rts/sm/Scav.h
index 94250bcf7a..9c5f0679a0 100644
--- a/rts/sm/Scav.h
+++ b/rts/sm/Scav.h
@@ -6,7 +6,7 @@
  *
  * Documentation on the architecture of the Garbage Collector can be
  * found in the online commentary:
- * 
+ *
  *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage/gc
  *
  * ---------------------------------------------------------------------------*/
@@ -24,6 +24,7 @@ void    scavenge_thunk_srt (const StgInfoTable *info);
 StgPtr  scavenge_mut_arr_ptrs (StgMutArrPtrs *a);
 StgPtr  scavenge_PAP (StgPAP *pap);
 StgPtr  scavenge_AP (StgAP *ap);
+StgPtr  scavenge_continuation(StgContinuation *pap);
 void    scavenge_compact (StgCompactNFData *str);
 
 #if defined(THREADED_RTS)
@@ -36,6 +37,7 @@ void    scavenge_thunk_srt1 (const StgInfoTable *info);
 StgPtr  scavenge_mut_arr_ptrs1 (StgMutArrPtrs *a);
 StgPtr  scavenge_PAP1 (StgPAP *pap);
 StgPtr  scavenge_AP1 (StgAP *ap);
+StgPtr  scavenge_continuation1(StgContinuation *pap);
 void    scavenge_compact1 (StgCompactNFData *str);
 #endif
 
diff --git a/testsuite/tests/rts/continuations/ContIO.hs b/testsuite/tests/rts/continuations/ContIO.hs
new file mode 100644
index 0000000000..5e55cc26d7
--- /dev/null
+++ b/testsuite/tests/rts/continuations/ContIO.hs
@@ -0,0 +1,28 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+-- | This module just wraps the continuation primops so they can be used in
+-- 'IO'. This isn't provided anywhere in @base@ because it's still very unsafe!
+module ContIO where
+
+import GHC.Prim
+import GHC.Types
+
+data PromptTag a = PromptTag (PromptTag# a)
+
+newPromptTag :: IO (PromptTag a)
+newPromptTag = IO (\s -> case newPromptTag# s of
+  (# s', tag #) -> (# s, PromptTag tag #))
+
+prompt :: PromptTag a -> IO a -> IO a
+prompt (PromptTag tag) (IO m) = IO (prompt# tag m)
+
+control0 :: PromptTag a -> ((IO b -> IO a) -> IO a) -> IO b
+control0 (PromptTag tag) f =
+  IO (control0# tag (\k -> case f (\(IO a) -> IO (k a)) of IO b -> b))
+
+reset :: PromptTag a -> IO a -> IO a
+reset = prompt
+
+shift :: PromptTag a -> ((IO b -> IO a) -> IO a) -> IO b
+shift tag f = control0 tag (\k -> reset tag (f (\m -> reset tag (k m))))
diff --git a/testsuite/tests/rts/continuations/all.T b/testsuite/tests/rts/continuations/all.T
new file mode 100644
index 0000000000..fb6b6f2ce1
--- /dev/null
+++ b/testsuite/tests/rts/continuations/all.T
@@ -0,0 +1,4 @@
+test('cont_simple_shift', [extra_files(['ContIO.hs'])], multimod_compile_and_run, ['cont_simple_shift', ''])
+test('cont_exn_masking', [extra_files(['ContIO.hs'])], multimod_compile_and_run, ['cont_exn_masking', ''])
+test('cont_missing_prompt_err', [extra_files(['ContIO.hs']), exit_code(1)], multimod_compile_and_run, ['cont_missing_prompt_err', ''])
+test('cont_nondet_handler', [extra_files(['ContIO.hs'])], multimod_compile_and_run, ['cont_nondet_handler', ''])
diff --git a/testsuite/tests/rts/continuations/cont_exn_masking.hs b/testsuite/tests/rts/continuations/cont_exn_masking.hs
new file mode 100644
index 0000000000..d3c9790692
--- /dev/null
+++ b/testsuite/tests/rts/continuations/cont_exn_masking.hs
@@ -0,0 +1,17 @@
+-- This test verifies that the async exception masking state is captured and
+-- restored appropriately during continuation capture and restore.
+
+import Control.Exception
+import ContIO
+
+main :: IO ()
+main = do
+  tag <- newPromptTag
+  uninterruptibleMask $ \unmaskUninterruptible ->
+    prompt tag $ unmaskUninterruptible $
+    mask $ \unmaskInterruptible ->
+    control0 tag $ \k -> do
+      print =<< getMaskingState -- should be MaskedUninterruptible
+      unmaskInterruptible $ do
+        k (print =<< getMaskingState) -- should be MaskedInterruptible
+        print =<< getMaskingState -- should be Unmasked
diff --git a/testsuite/tests/rts/continuations/cont_exn_masking.stdout b/testsuite/tests/rts/continuations/cont_exn_masking.stdout
new file mode 100644
index 0000000000..04b17f772d
--- /dev/null
+++ b/testsuite/tests/rts/continuations/cont_exn_masking.stdout
@@ -0,0 +1,3 @@
+MaskedUninterruptible
+MaskedInterruptible
+Unmasked
diff --git a/testsuite/tests/rts/continuations/cont_missing_prompt_err.hs b/testsuite/tests/rts/continuations/cont_missing_prompt_err.hs
new file mode 100644
index 0000000000..b48510417d
--- /dev/null
+++ b/testsuite/tests/rts/continuations/cont_missing_prompt_err.hs
@@ -0,0 +1,9 @@
+import ContIO
+
+main :: IO ()
+main = do
+  tag1 <- newPromptTag
+  tag2 <- newPromptTag
+  prompt tag1 $
+    control0 tag2 $ \k -> -- should error: no such prompt on the stack!
+      k (pure ())
diff --git a/testsuite/tests/rts/continuations/cont_missing_prompt_err.stderr b/testsuite/tests/rts/continuations/cont_missing_prompt_err.stderr
new file mode 100644
index 0000000000..0138fedd5c
--- /dev/null
+++ b/testsuite/tests/rts/continuations/cont_missing_prompt_err.stderr
@@ -0,0 +1 @@
+cont_missing_prompt_err: GHC.Exts.control0#: no matching prompt in the current continuation
diff --git a/testsuite/tests/rts/continuations/cont_nondet_handler.hs b/testsuite/tests/rts/continuations/cont_nondet_handler.hs
new file mode 100644
index 0000000000..8c4e75754a
--- /dev/null
+++ b/testsuite/tests/rts/continuations/cont_nondet_handler.hs
@@ -0,0 +1,47 @@
+{-# LANGUAGE LambdaCase #-}
+
+-- This implements a (very) simple API along the lines of those used by
+-- algebraic effect systems, and it uses a distinct prompt tag to identify each
+-- handler. This is not the approach taken by real effect systems for various
+-- reasons, but it's a decent, minimal exercise of the continuations API.
+
+import Control.Applicative
+import ContIO
+
+data HandlerTag f where
+  HandlerTag :: PromptTag a
+             -> (forall b. f b -> (b -> IO a) -> IO a)
+             -> HandlerTag f
+
+send :: HandlerTag f -> f b -> IO b
+send (HandlerTag tag f) v = control0 tag $ \k -> f v (prompt tag . k . pure)
+
+handle :: (HandlerTag f -> IO a)
+       -> (forall b. f b -> (b -> IO a) -> IO a)
+       -> IO a
+handle f g = do
+  tag <- newPromptTag
+  prompt tag $ f (HandlerTag tag g)
+
+data NonDet a where
+  Choice :: NonDet Bool
+
+handleNonDet :: (HandlerTag NonDet -> IO a) -> IO [a]
+handleNonDet f = handle (fmap (:[]) . f) $ \Choice k ->
+  liftA2 (++) (k True) (k False)
+
+amb :: HandlerTag NonDet -> a -> a -> IO a
+amb tag a b = send tag Choice >>= \case
+  True  -> pure a
+  False -> pure b
+
+example :: IO [[(Integer, Char)]]
+example =
+  handleNonDet $ \tag1 ->
+  handleNonDet $ \tag2 -> do
+    x <- amb tag2  1   2
+    y <- amb tag1 'a' 'b'
+    pure (x, y)
+
+main :: IO ()
+main = print =<< example
diff --git a/testsuite/tests/rts/continuations/cont_nondet_handler.stdout b/testsuite/tests/rts/continuations/cont_nondet_handler.stdout
new file mode 100644
index 0000000000..6c6aaa387c
--- /dev/null
+++ b/testsuite/tests/rts/continuations/cont_nondet_handler.stdout
@@ -0,0 +1 @@
+[[(1,'a'),(2,'a')],[(1,'a'),(2,'b')],[(1,'b'),(2,'a')],[(1,'b'),(2,'b')]]
diff --git a/testsuite/tests/rts/continuations/cont_simple_shift.hs b/testsuite/tests/rts/continuations/cont_simple_shift.hs
new file mode 100644
index 0000000000..72280b5cf6
--- /dev/null
+++ b/testsuite/tests/rts/continuations/cont_simple_shift.hs
@@ -0,0 +1,16 @@
+-- This test is a very simple exercise of continuation capture and restore.
+
+import ContIO
+
+example :: IO [Integer]
+example = do
+  tag <- newPromptTag
+  reset tag $ do
+    n <- shift tag $ \k -> do
+      a <- k (pure 2)
+      b <- k (pure 3)
+      pure (a ++ b)
+    pure [n]
+
+main :: IO ()
+main = print =<< example
diff --git a/testsuite/tests/rts/continuations/cont_simple_shift.stdout b/testsuite/tests/rts/continuations/cont_simple_shift.stdout
new file mode 100644
index 0000000000..057d00e585
--- /dev/null
+++ b/testsuite/tests/rts/continuations/cont_simple_shift.stdout
@@ -0,0 +1 @@
+[2,3]
diff --git a/utils/deriveConstants/Main.hs b/utils/deriveConstants/Main.hs
index 4dd187cd52..65c3deb3e5 100644
--- a/utils/deriveConstants/Main.hs
+++ b/utils/deriveConstants/Main.hs
@@ -481,6 +481,12 @@ wanteds os = concat
           ,closureFieldGcptr C "StgAP_STACK" "fun"
           ,closurePayload    C "StgAP_STACK" "payload"
 
+          ,closureSize       C "StgContinuation"
+          ,closureField      C "StgContinuation" "apply_mask_frame"
+          ,closureField      C "StgContinuation" "mask_frame_offset"
+          ,closureField      C "StgContinuation" "stack_size"
+          ,closurePayload    C "StgContinuation" "stack"
+
           ,thunkSize C "StgSelector"
 
           ,closureFieldGcptr C "StgInd" "indirectee"
@@ -1005,4 +1011,3 @@ execute verbose prog args
       ec <- rawSystem prog args
       unless (ec == ExitSuccess) $
           die ("Executing " ++ show prog ++ " failed")
-
diff --git a/utils/genapply/Main.hs b/utils/genapply/Main.hs
index 7166968ddd..34d793236f 100644
--- a/utils/genapply/Main.hs
+++ b/utils/genapply/Main.hs
@@ -726,7 +726,8 @@ genApply regstatus args =
         text "     FUN_2_0,",
         text "     FUN_1_1,",
         text "     FUN_0_2,",
-        text "     FUN_STATIC: {",
+        text "     FUN_STATIC,",
+        text "     CONTINUATION: {",
         nest 4 (vcat [
           text "arity = TO_W_(StgFunInfoExtra_arity(%FUN_INFO(info)));",
           text "ASSERT(arity > 0);",
@@ -851,7 +852,8 @@ genApplyFast regstatus args =
           text "     FUN_2_0,",
           text "     FUN_1_1,",
           text "     FUN_0_2,",
-          text "     FUN_STATIC: {",
+          text "     FUN_STATIC,",
+          text "     CONTINUATION: {",
           nest 4 (vcat [
             text "arity = TO_W_(StgFunInfoExtra_arity(%GET_FUN_INFO(R1)));",
             text "ASSERT(arity > 0);",
diff --git a/utils/genprimopcode/Main.hs b/utils/genprimopcode/Main.hs
index 7fff343188..288ba325fb 100644
--- a/utils/genprimopcode/Main.hs
+++ b/utils/genprimopcode/Main.hs
@@ -298,8 +298,7 @@ gen_hs_source (Info defaults entries) =
            hdr (PrimVecTypeSpec { ty = TyApp (VecTyCon n _) _ }) = wrapOp n ++ ","
            hdr (PrimVecTypeSpec {})                              = error $ "Illegal type spec"
 
-           sec s = "\n-- * " ++ title s ++ "\n"
-                    ++ (unlines $ map ("-- " ++ ) $ lines $ "|" ++ desc s)
+           sec s = "\n{- * " ++ title s ++ "-}\n{-|" ++ desc s ++ "-}"
 
 
            ent   (Section {})         = []
@@ -313,7 +312,7 @@ gen_hs_source (Info defaults entries) =
              -- Doc comments
              [ case desc o ++ extra (opts o) of
                  "" -> []
-                 cmmt -> map ("-- " ++) $ lines $ "|" ++ cmmt
+                 cmmt -> lines ("{-|" ++ cmmt ++ "-}")
 
              -- Deprecations
              , [ d | Just n <- [getName o], d <- prim_deprecated (opts o) n ]
@@ -804,6 +803,8 @@ ppType (TyApp (TyCon "IOPort#") [x,y])   = "mkIOPortPrimTy " ++ ppType x
                                            ++ " " ++ ppType y
 ppType (TyApp (TyCon "TVar#") [x,y])     = "mkTVarPrimTy " ++ ppType x
                                            ++ " " ++ ppType y
+
+ppType (TyApp (TyCon "PromptTag#") [x])  = "mkPromptTagPrimTy " ++ ppType x
 ppType (TyApp (VecTyCon _ pptc) [])      = pptc
 
 ppType (TyUTup ts) = "(mkTupleTy Unboxed "