summaryrefslogtreecommitdiff
path: root/rts/Continuation.c
blob: f2ec62fe6023b042eae4fa9b78fd299b69c012a2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
/* ---------------------------------------------------------------------------
 *
 * (c) The GHC Team, 2020
 *
 * Continuations
 *
 * --------------------------------------------------------------------------*/

#include "rts/PosixSource.h"
#include "Rts.h"

#include "sm/Storage.h"
#include "sm/Sanity.h"
#include "Continuation.h"
#include "Printer.h"
#include "Threads.h"

#include <string.h>

/* Note [Continuations overview]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A first-class continuation is represented in the RTS as a closure with type
CONTINUATION (which corresponds to the StgContinuation struct type).
Continuation closures are similar to AP_STACK closures in that they store a
chunk of stack, but while AP_STACK closures are a special type of thunk,
continuation closures are a special type of *function*. More specifically, every
continuation is a function of arity 2, accepting one pointer and one RealWorld
token.

Continuation capture is performed through the use of two cooperating primops,
`prompt#` and `control0#`, which morally have the following types:

    prompt# :: PromptTag a -> IO a -> IO a
    control0# :: PromptTag a -> ((IO b -> IO a) -> IO a) -> IO b

(In reality, their types use `State# RealWorld` rather than `IO` in the usual
way, but the type of control0# is nearly incomprehensible when presented in
those terms, so thinking in terms of `IO` is a helpful abbreviation.)

GHC implements *delimited* continuations: `prompt#` introduces a delimiter that
`control0#` looks for to determine how much of the local continuation should be
captured. Operationally, each use of `prompt#` pushes a *prompt frame* onto the
stack (annotated with a user-provided *prompt tag*), and each use of `control0#`
copies the portion of the stack up to the nearest prompt frame (with a matching
tag) into the heap to form a new continuation closure. `control0#` then aborts
to the prompt frame and resumes execution by applying the argument to
`control0#` to the continuation. This process is mostly handled in C, via
`captureContinuationAndAbort`.

When a continuation closure is applied, the process occurs in reverse: the chunk
of stack frames stored in the closure are pushed onto the current stack, and
execution resumes by applying the argument to the continuation to a RealWorld
token. This is a non-destructive operation---the caller is free to apply the
continuation arbitrarily many times. This process is handled in Cmm, via
`stg_CONTINUATION_apply` in ContinuationOps.cmm.

For the most part, capture and restoration of continuations is surprisingly
straightforward: the bulk of the work on each side of the process is just doing
the necessary copying. However, there are a few additional subtleties:

  * It is possible for continuation capture to *fail* if no matching prompt
    frame is on the stack or if the continuation would include thunk update or
    STM frames; see Note [When capturing the continuation fails] for details.

  * Special care must be taken to ensure the async exception masking state is
    properly updated across continuation captures and restores, see
    Note [Continuations and async exception masking] for details.

Note [When capturing the continuation fails]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
How can continuation capture fail? There are three possible scenarios:

  1. There’s no matching prompt frame *anywhere* on the stack.
  2. The captured continuation would include a thunk update frame.
  3. The captured continuation would include part of an STM transaction.

The first case is fairly self-explanatory: if there’s no matching prompt frame,
we don’t know where to capture up to. The other two cases are important to
protect RTS invariants, as continuations can be applied arbitrarily many times,
but both thunk updates and STM transactions are non-reentrant.

Moreover, any attempt to capture across a thunk update frame is necessarily
ill-defined. Such frames indicate the start of a given `State#` thread (i.e.
they likely correspond to `unsafePerformIO` or `runST`), but the “current
continuation” is only predictable in code with a well-defined evaluation order.
Any attempt to capture across such a boundary would be correspondingly
unpredictable, so we want to be sure to reject it as programmer error. However,
note that we cannot detect and reject *all* such errors, see Note [Detecting
illegal captures is not guaranteed] for why.

To identify these error cases while searching for a matching prompt frame, we
also look for any stack frames that would indicate we’ve gone astray:

  1. If we see a STOP_FRAME, we’ve just plain run out of stack frames.
  2. To identify thunk updates, we can just look for UPDATE_FRAMEs.
  3. To identify STM transactions, we look for STM-related frames, namely
     ATOMICALLY_FRAME, CATCH_RETRY_FRAME, or CATCH_STM_FRAME.

If it finds any of these frames before a matching prompt frame,
`captureContinuationAndAbort` returns NULL, which `stg_control0zh` treats as a
signal that it should raise an exception.

Note [Detecting illegal captures is not guaranteed]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As alluded to in Note [When capturing the continuation fails], the “current
continuation” is only well-defined within a given `State#` thread. For a
concrete example illustrating why, consider the following program:

    do tag <- newPromptTag
       let v = unsafePerformIO (control0 tag (\k -> k ()))
       prompt tag (pure $! v)

If we were to allow this program, what would its result be? The answer depends
on how and when we evaluate `v`. If we allocate a thunk for `v` and force it
once the prompt has been installed, the program would successfully return `()`.
But since GHC can tell that `v` is used strictly, it may very well choose to
evaluate it immediately, before the prompt has been installed, in which case
there would be no matching prompt in scope at the time the call to `control0` is
evaluated, and the program would raise an error.

Without uses of `pseq`, GHC makes no guarantees about the order in which it will
evaluate pure expressions, so the optimizer may rearrange them significantly.
Therefore, any code that attempts such a capture is ill-defined, and we want to
do our best to detect and reject such mistakes. However, we cannot guarantee
that we will catch all such misuses! For example, given the program

    prompt tag (pure (1 + unsafePerformIO (control0 tag f)))

it is extremely unlikely that we will signal an error, despite the erroneous
capture. The reason is Note [Simplification of runRW#] in GHC.CoreToStg.Prep:
when `runRW#` appears in a strict context, there is no reason to allocate a
thunk, so GHC takes care to ensure it will not do so. With no thunk to update,
there is naturally no thunk update frame, so we cannot possibly detect at
runtime that anything was amiss.

Preserving the information necessary to reliably detect all of these sorts of
misuses at runtime in all situations would be disastrous for performance, so it
is the programmer’s responsibility to ensure this does not happen. Any program
for which continuation capture fails is a buggy program---there is NO WAY to
write a safe program that relies upon catching exceptions raised by continuation
capture failure. In other words, such programs invoke undefined behavior.

Given the behavior of such programs is already undefined, one might ask why we
bother detecting and reporting such failure conditions at all. In theory, we
could ignore thunk update frames completely and let the program behave
unpredictably. But detecting the failures we *can* detect is still worthwhile:

  * From the programmer’s point of view, best-effort detection and reporting of
    such misuses is still helpful, and the performance overhead of checking for
    them is minimal.

  * From the runtime’s point of view, detecting and eagerly rejecting such uses
    gives us much more confidence they will not violate internal invariants, so
    even if a buggy program does the wrong thing, it won’t corrupt the runtime.

In summary, the runtime does the best it can, but if it fails to detect and
report a misuse of `control0#`, the bug is in the program, not GHC.

Note [Continuations and async exception masking]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It’s possible for a captured continuation to include a frame that alters the
async exception masking state. For example, consider the following program:

    prompt tag $ maskAsyncExceptions $
      control0 tag (\k -> ...) >>= do_something

The captured continuation will look like this:

    \m -> maskAsyncExceptions (m >>= do_something)

This situation requires some additional care:

  1. When aborting to the prompt as part of continuation capture, we need to
     restore the async exception masking state to whatever it was when the
     prompt frame was initially pushed.

  2. When restoring the continuation, we need to update the async exception
     masking state to whatever it was when the continuation was captured.

  3. When restoring the continuation, we need to update the pushed stack frames
     themselves to restore the new context’s async exception masking state when
     they return.

The third point is by far the most subtle, and it stems from the way primops
like `maskAsyncExceptions#` arrange to restore the async exception masking state
when their subcomputation returns. Specifically, when a primop like
`unmaskAsyncExceptions#`, `maskAsyncExceptions#`, or `maskUninterruptible#` is
called, it pushes one of three different frames onto the stack, depending on the
enclosing context’s masking state:

  * If exceptions were unmasked, it pushes `stg_unmaskAsyncExceptionszh_ret`.
  * If exceptions were interruptibly masked, it pushes `stg_maskAsyncExceptionszh_ret`.
  * If exceptions were uninterruptibly masked, it pushes `stg_maskUninterruptiblezh_ret`.

Note that, somewhat confusingly, which frame is pushed depends only on the
*enclosing* context’s masking state, *not* the new masking state installed for
the subcomputation. This works out, since the frame only exists to restore the
previous masking state, but it means the frames on the stack do not themselves
determine how the masking state was modified.

To cooperate with this strategy, we look for the aforementioned return
frames while walking the stack during continuation capture. If we find any of
them, we record two pieces of information:

  1. The captured continuation is necessarily responsible for whatever the
     masking state happens to be currently, so the *current* masking state must
     be restored upon continuation resumption. We set the `apply_mask_frame`
     field to a stack frame info pointer that will update the masking state
     accordingly if returned to.

  2. We set the `mask_frame_offset` field to the word offset of the *outermost*
     stack frame that restores the masking state. This serves a dual purpose:

       a. When we return to Cmm, `stg_control0zh` returns to this frame to
          restore the async exception masking state.

       b. When the continuation is restored, this frame is substituted with one
          that restores the masking state of the new context (i.e. the one in
          which the continuation is restored).

This is all quite subtle, so to illustrate with an example, suppose we have the
following state at the start of a continuation capture:

    ┌───────┐                                       ┌───────────────────┐
    │ STACK │                                       │    tso->flags     │
    ╞═══════╡                                       ╞═══════════════╤═══╡
    │  ...  │                                       │ BLOCKEX       │ 1 │
    ├───────┤                                       ├───────────────┼───┤
    │  RET  │──→ stg_maskAsyncExceptionszh_ret      │ INTERRUPTIBLE │ 0 │
    ├───────┤                                       └───────────────┴───┘
    │  ...  │
    ├───────┤
    │  RET  │──→ stg_unmaskAsyncExceptionszh_ret
    ├───────┤
    │  ...  │
    ├───────┤
    │  RET  │──→ stg_prompt_frame
    ├───────┤

We’ll copy the relevant stack frames into the heap, and we’ll set the
`apply_mask_frame` and `mask_frame_offset` fields accordingly:

    ┌───────────────────┐
    │   CONTINUATION    │
    ╞═══════════════════╡  ╭──→ stg_maskUninterruptiblezh_ret
    │ apply_mask_frame  │──╯
    ├───────────────────┤      ┌───────┐
    │      stack        │─────→│ STACK │
    ├───────────────────┤      ╞═══════╡
    │ mask_frame_offset │──╮   │  ...  │
    └───────────────────┘  │   ├───────┤
                           │   │  RET  │──→ stg_maskAsyncExceptionszh_ret
                           │   ├───────┤
                           │   │  ...  │
                           │   ├───────┤
                           ╰──→│  RET  │──→ stg_unmaskAsyncExceptionszh_ret
                               ├───────┤
                               │  ...  │
                               └───────┘

Next, `captureContinuationAndAbort` returns to Cmm, which sets up the stack so
that it can return to the frame indicated by `mask_frame_offset`, which in this
case is `stg_unmaskAsyncExceptionszh_ret`:

    ┌───────┐                                        ┌───────────────────┐
    │ STACK │                                        │    tso->flags     │
    ╞═══════╡                                        ╞═══════════════╤═══╡
    │  RET  │──→ stg_unmaskAsyncExceptionszh_ret     │ BLOCKEX       │ 1 │
    ├───────┤                                        ├───────────────┼───┤
    │  RET  │──→ stg_ap_pv                           │ INTERRUPTIBLE │ 0 │
    ├───────┤                                        └───────────────┴───┘
    │ cont  │
    ├───────┤
    │  ...  │
    ├───────┤

`stg_control0zh` then returns to `stg_unmaskAsyncExceptionszh`, which restores
the exception masking state appropriately. It then returns to `stg_ap_pv`, which
applies the handler to the captured continuation, and execution continues with
exceptions properly unmasked.

Next, let’s consider what happens when the continuation is restored. Suppose we
start in the following state:

    ┌───────┐                                        ┌───────────────────┐
    │ STACK │                                        │    tso->flags     │
    ╞═══════╡                                        ╞═══════════════╤═══╡
    │  ...  │                                        │ BLOCKEX       │ 1 │
    ├───────┤                                        ├───────────────┼───┤
                                                     │ INTERRUPTIBLE │ 1 │
                                                     └───────────────┴───┘

`stg_CONTINUATION_apply` will start by copying the frames from the continuation
back onto the stack, plus `apply_mask_frame` on top, which in this case is
`stg_maskUninterruptiblezh_ret`:

                         ┌───────┐
                         │ STACK │
                         ╞═══════╡
                         │  RET  │──→ stg_maskUninterruptiblezh_ret
                         ├───────┤
                         │  RET  │──→ stg_ap_v
                         ├───────┤
                         │  ...  │
                         ├───────┤
                         │  RET  │──→ stg_maskAsyncExceptionszh_ret
                         ├───────┤
                         │  ...  │
                         ├───────┤
    mask_frame_offset──→ │  RET  │──→ stg_unmaskAsyncExceptionszh_ret
                         ├───────┤
                         │  ...  │
                         ├───────┤

Next, it will update the frame at `mask_frame_offset` based on the current async
exception masking state. In this case, exceptions are interruptibly masked, so
the frame will be replaced with `stg_maskAsyncExceptionszh_ret`.
`stg_CONTINUATION_apply` will then return to the top of the stack, and
`stg_maskUninterruptiblezh_ret` will update the async exception masking state:

    ┌───────┐                                        ┌───────────────────┐
    │ STACK │                                        │    tso->flags     │
    ╞═══════╡                                        ╞═══════════════╤═══╡
    │  RET  │──→ stg_ap_v                            │ BLOCKEX       │ 1 │
    ├───────┤                                        ├───────────────┼───┤
    │  ...  │                                        │ INTERRUPTIBLE │ 0 │
    ├───────┤                                        └───────────────┴───┘
    │  RET  │──→ stg_maskAsyncExceptionszh_ret
    ├───────┤
    │  ...  │
    ├───────┤
    │  RET  │──→ stg_unmaskAsyncExceptionszh_ret
    ├───────┤
    │  ...  │
    ├───────┤

Control then returns to `stg_ap_v`, which applies the argument in R1 to resume
execution with exceptions re-masked, and we’re done. Phew.

One might naturally wonder why we bother with all this complicated indirection
involving returning to mask/unmask frames rather than just adjusting
`tso->flags` directly ourselves. That would indeed be significantly simpler,
but returning to `stg_unmaskAsyncExceptionszh_ret` has the important side-effect
of checking eagerly for a pending async exception and raising it if one is
available. So we do some tricky trampolining, and that frees us from having to
worry about that in the continuation capture/restore logic as well. */

static bool is_mask_frame_info(const StgInfoTable *info)
{
  return info == &stg_unmaskAsyncExceptionszh_ret_info
      || info == &stg_maskAsyncExceptionszh_ret_info
      || info == &stg_maskUninterruptiblezh_ret_info;
}

static StgStack *pop_stack_chunk(Capability *cap, StgTSO *tso)
{
  StgStack *stack = tso->stackobj;
  stack->sp = stack->stack + stack->stack_size - sizeofW(StgUnderflowFrame);
  threadStackUnderflow(cap, tso);
  return tso->stackobj;
}

// see Note [Continuations overview]
StgClosure *captureContinuationAndAbort(Capability *cap, StgTSO *tso, StgPromptTag prompt_tag)
{
  // We’d better own this thread if we’re doing this!
  ASSERT(tso->cap == cap);

  StgStack *stack = tso->stackobj;
  StgPtr frame = stack->sp;

  // We perform the capture in two phases:
  //
  //   1. We walk the stack to find the prompt frame to capture up to (if any).
  //
  //   2. If we successfully find a matching prompt, we proceed with the actual
  //      by allocating space for the continuation, performing the necessary
  //      copying, and unwinding the stack.
  //
  // These variables are modified in Phase 1 to keep track of how far we had to
  // walk before finding the prompt frame. Afterwards, Phase 2 consults them to
  // determine how to proceed with the actual capture.

  StgWord total_words = 0;
  bool in_first_chunk = true;
  StgWord first_chunk_words = 0;
  StgWord last_chunk_words = 0;
  StgWord full_chunks = 0;

  // see Note [Continuations and async exception masking]
  const StgInfoTable *apply_mask_frame = NULL;
  StgWord mask_frame_offset = 0;

  /* --- Phase 1: Find the matching prompt frame ---------------------------- */

  IF_DEBUG(continuation,
    debugBelch("captureContinuationAndAbort: searching for prompt\n");
    debugBelch("  prompt_tag = "); printClosure(prompt_tag));

  while (true) {
    IF_DEBUG(continuation,
      printStackChunk(frame, frame + stack_frame_sizeW((StgClosure *)frame)));

    const StgInfoTable *info_ptr = ((StgClosure *)frame)->header.info;
    const StgRetInfoTable *info = get_ret_itbl((StgClosure *)frame);
    StgWord chunk_words = frame - stack->sp;

    if (info_ptr == &stg_prompt_frame_info
        && ((StgPromptFrame *)frame)->tag == prompt_tag) {
      total_words += chunk_words;
      if (in_first_chunk) {
        first_chunk_words = chunk_words;
      } else {
        last_chunk_words = chunk_words;
      }
      break;
    }

    if (info->i.type == UNDERFLOW_FRAME) {
      total_words += chunk_words;
      if (in_first_chunk) {
        first_chunk_words = chunk_words;
      } else {
        full_chunks++;
      }

      stack = ((StgUnderflowFrame *)frame)->next_chunk;
      frame = stack->sp;
      in_first_chunk = false;
      continue;
    }

    // Finding any of these mean we failed to find the prompt frame;
    // see Note [When capturing the continuation fails] for details
    if (RTS_UNLIKELY(info->i.type == STOP_FRAME
                  || info->i.type == UPDATE_FRAME
                  || info->i.type == ATOMICALLY_FRAME
                  || info->i.type == CATCH_RETRY_FRAME
                  || info->i.type == CATCH_STM_FRAME)) {
      IF_DEBUG(continuation,
        debugBelch("captureContinuationAndAbort: could not find prompt, bailing out\n"));
      return NULL; // Bail out
    }

    // see Note [Continuations and async exception masking]
    if (is_mask_frame_info(info_ptr)) {
      mask_frame_offset = total_words + chunk_words;
      if (apply_mask_frame == NULL) {
        if ((tso->flags & TSO_BLOCKEX) == 0) {
          apply_mask_frame = &stg_unmaskAsyncExceptionszh_ret_info;
        } else if ((tso->flags & TSO_INTERRUPTIBLE) == 0) {
          apply_mask_frame = &stg_maskUninterruptiblezh_ret_info;
        } else {
          apply_mask_frame = &stg_maskAsyncExceptionszh_ret_info;
        }
      }
    }

    // Advance to the next frame.
    frame += stack_frame_sizeW((StgClosure *)frame);
  }

  /* --- Phase 2: Perform the capture --------------------------------------- */

  IF_DEBUG(continuation,
    debugBelch("captureContinuationAndAbort: found prompt, "
               "capturing %" FMT_Word " words of stack\n", total_words));

  dirty_TSO(cap, tso);
  dirty_STACK(cap, stack);

  StgContinuation *cont = (StgContinuation *)allocate(cap, CONTINUATION_sizeW(total_words));
  SET_HDR(cont, &stg_CONTINUATION_info, stack->header.prof.ccs);
  cont->apply_mask_frame = apply_mask_frame;
  cont->mask_frame_offset = mask_frame_offset;
  cont->stack_size = total_words;

  stack = tso->stackobj;
  StgPtr cont_stack = cont->stack;
  memcpy(cont_stack, stack->sp, first_chunk_words * sizeof(StgWord));
  cont_stack += first_chunk_words;

  if (in_first_chunk) {
    stack->sp += first_chunk_words;
  } else {
    stack = pop_stack_chunk(cap, tso);

    for (StgWord i = 0; i < full_chunks; i++) {
      const size_t chunk_words = stack->stack + stack->stack_size - stack->sp - sizeofW(StgUnderflowFrame);
      memcpy(cont_stack, stack->sp, chunk_words * sizeof(StgWord));
      cont_stack += chunk_words;
      stack = pop_stack_chunk(cap, tso);
    }

    memcpy(cont_stack, stack->sp, last_chunk_words * sizeof(StgWord));
    cont_stack += last_chunk_words;
    stack->sp += last_chunk_words;
  }

  ASSERT(cont->stack + total_words == cont_stack);
  ASSERT(((StgClosure *)stack->sp)->header.info == &stg_prompt_frame_info);
  stack->sp += stack_frame_sizeW((StgClosure *)frame);
  IF_DEBUG(sanity,
    checkClosure((StgClosure *)cont);
    checkTSO(tso));

  return TAG_CLOSURE(2, (StgClosure *)cont);
}