summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2017-06-03 20:26:13 +0100
committerSimon Marlow <marlowsd@gmail.com>2017-06-08 08:38:09 +0100
commit598472908ebb08f6811b892f285490554c290ae3 (patch)
tree84079aceefe1a7eacda507f104c0f0d4d8c12417
parentbca56bd040de64315564cdac4b7e943fc8a75ea0 (diff)
downloadhaskell-598472908ebb08f6811b892f285490554c290ae3.tar.gz
Fix a lost-wakeup bug in BLACKHOLE handling (#13751)
Summary: The problem occurred when * Threads A & B evaluate the same thunk * Thread A context-switches, so the thunk gets blackholed * Thread C enters the blackhole, creates a BLOCKING_QUEUE attached to the blackhole and thread A's `tso->bq` queue * Thread B updates the blackhole with a value, overwriting the BLOCKING_QUEUE * We GC, replacing A's update frame with stg_enter_checkbh * Throw an exception in A, which ignores the stg_enter_checkbh frame Now we have C blocked on A's tso->bq queue, but we forgot to check the queue because the stg_enter_checkbh frame has been thrown away by the exception. The solution and alternative designs are discussed in Note [upd-black-hole]. This also exposed a bug in the interpreter, whereby we were sometimes context-switching without calling `threadPaused()`. I've fixed this and added some Notes. Test Plan: * `cd testsuite/tests/concurrent && make slow` * validate Reviewers: niteria, bgamari, austin, erikd Reviewed By: erikd Subscribers: rwbarton, thomie GHC Trac Issues: #13751 Differential Revision: https://phabricator.haskell.org/D3630
-rw-r--r--includes/stg/MiscClosures.h2
-rw-r--r--rts/HeapStackCheck.cmm24
-rw-r--r--rts/Interpreter.c10
-rw-r--r--rts/Messages.c4
-rw-r--r--rts/Schedule.c7
-rw-r--r--rts/StgStartup.cmm3
-rw-r--r--rts/sm/Evac.c64
-rw-r--r--rts/sm/Evac.h3
-rw-r--r--rts/sm/Scav.c70
-rw-r--r--testsuite/tests/concurrent/should_run/all.T1
10 files changed, 133 insertions, 55 deletions
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 4e8fe68bd6..76cfbd6c8c 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -275,8 +275,6 @@ RTS_FUN_DECL(stg_PAP_apply);
RTS_FUN_DECL(stg_gc_noregs);
-RTS_RET(stg_enter_checkbh);
-
RTS_RET(stg_ret_v);
RTS_RET(stg_ret_p);
RTS_RET(stg_ret_n);
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index 001e5f6299..85fb1cbef6 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -230,30 +230,6 @@ stg_gc_prim_p_ll
}
/* -----------------------------------------------------------------------------
- stg_enter_checkbh is just like stg_enter, except that we also call
- checkBlockingQueues(). The point of this is that the GC can
- replace an stg_marked_upd_frame with an stg_enter_checkbh if it
- finds that the BLACKHOLE has already been updated by another
- thread. It would be unsafe to use stg_enter, because there might
- be an orphaned BLOCKING_QUEUE now.
- -------------------------------------------------------------------------- */
-
-/* The stg_enter_checkbh frame has the same shape as an update frame: */
-
-INFO_TABLE_RET ( stg_enter_checkbh, RET_SMALL,
- UPDATE_FRAME_FIELDS(W_,P_,info_ptr,ccs,p2,updatee))
- return (P_ ret)
-{
- foreign "C" checkBlockingQueues(MyCapability() "ptr",
- CurrentTSO);
-
- // we need to return updatee now. Note that it might be a pointer
- // to an indirection or a tagged value, we don't know which, so we
- // need to ENTER() rather than return().
- ENTER(updatee);
-}
-
-/* -----------------------------------------------------------------------------
Info tables for returning values of various types. These are used
when we want to push a frame on the stack that will return a value
to the frame underneath it.
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 4926d1dab5..1a883a5b4b 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -115,6 +115,16 @@
cap->r.rRet = (retcode); \
return cap;
+// Note [avoiding threadPaused]
+//
+// Switching between the interpreter to compiled code can happen very
+// frequently, so we don't want to call threadPaused(), which is
+// expensive. BUT we must be careful not to violate the invariant
+// that threadPaused() has been called on all threads before we GC
+// (see Note [upd-black-hole]. So the scheduler must ensure that when
+// we return in this way that we definitely immediately run the thread
+// again and don't GC or do something else.
+//
#define RETURN_TO_SCHEDULER_NO_PAUSE(todo,retcode) \
SAVE_THREAD_STATE(); \
cap->r.rCurrentTSO->what_next = (todo); \
diff --git a/rts/Messages.c b/rts/Messages.c
index 0508a20d20..8fab314bc4 100644
--- a/rts/Messages.c
+++ b/rts/Messages.c
@@ -289,7 +289,9 @@ loop:
recordClosureMutated(cap,(StgClosure*)bq);
}
- debugTraceCap(DEBUG_sched, cap, "thread %d blocked on thread %d",
+ debugTraceCap(DEBUG_sched, cap,
+ "thread %d blocked on existing BLOCKING_QUEUE "
+ "owned by thread %d",
(W_)msg->tso->id, (W_)owner->id);
// See above, #3838
diff --git a/rts/Schedule.c b/rts/Schedule.c
index f82d924183..b4f60f8d3a 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -1215,10 +1215,9 @@ scheduleHandleYield( Capability *cap, StgTSO *t, uint32_t prev_what_next )
ASSERT(t->_link == END_TSO_QUEUE);
- // Shortcut if we're just switching evaluators: don't bother
- // doing stack squeezing (which can be expensive), just run the
- // thread.
- if (cap->context_switch == 0 && t->what_next != prev_what_next) {
+ // Shortcut if we're just switching evaluators: just run the thread. See
+ // Note [avoiding threadPaused] in Interpreter.c.
+ if (t->what_next != prev_what_next) {
debugTrace(DEBUG_sched,
"--<< thread %ld (%s) stopped to switch evaluators",
(long)t->id, what_next_strs[t->what_next]);
diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm
index a650e5c367..0cd18628e2 100644
--- a/rts/StgStartup.cmm
+++ b/rts/StgStartup.cmm
@@ -127,6 +127,9 @@ stg_returnToSched /* no args: explicit stack layout */
// current thread. This is used for switching from compiled execution to the
// interpreter, where calling threadPaused() on every switch would be too
// expensive.
+//
+// See Note [avoiding threadPaused] in Interpreter.c
+//
stg_returnToSchedNotPaused /* no args: explicit stack layout */
{
SAVE_THREAD_STATE();
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 78adf62f6c..fb1af0f692 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -34,6 +34,7 @@ StgWord64 whitehole_spin = 0;
#if defined(THREADED_RTS) && !defined(PARALLEL_GC)
#define evacuate(p) evacuate1(p)
+#define evacuate_BLACKHOLE(p) evacuate_BLACKHOLE1(p)
#define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p)
#endif
@@ -532,7 +533,6 @@ loop:
ASSERTM(LOOKS_LIKE_CLOSURE_PTR(q), "invalid closure, info=%p", q->header.info);
if (!HEAP_ALLOCED_GC(q)) {
-
if (!major_gc) return;
info = get_itbl(q);
@@ -873,6 +873,68 @@ loop:
}
/* -----------------------------------------------------------------------------
+ Evacuate a pointer that is guaranteed to point to a BLACKHOLE.
+
+ This is used for evacuating the updatee of an update frame on the stack. We
+ want to copy the blackhole even if it has been updated by another thread and
+ is now an indirection, because the original update frame still needs to
+ update it.
+
+ See also Note [upd-black-hole] in sm/Scav.c.
+ -------------------------------------------------------------------------- */
+
+void
+evacuate_BLACKHOLE(StgClosure **p)
+{
+ bdescr *bd;
+ uint32_t gen_no;
+ StgClosure *q;
+ const StgInfoTable *info;
+ q = *p;
+
+ // closure is required to be a heap-allocated BLACKHOLE
+ ASSERT(HEAP_ALLOCED_GC(q));
+ ASSERT(GET_CLOSURE_TAG(q) == 0);
+
+ bd = Bdescr((P_)q);
+
+ // blackholes can't be in a compact, or large
+ ASSERT((bd->flags & (BF_COMPACT | BF_LARGE)) == 0);
+
+ if (bd->flags & BF_EVACUATED) {
+ if (bd->gen_no < gct->evac_gen_no) {
+ gct->failed_to_evac = true;
+ TICK_GC_FAILED_PROMOTION();
+ }
+ return;
+ }
+ if (bd->flags & BF_MARKED) {
+ if (!is_marked((P_)q,bd)) {
+ mark((P_)q,bd);
+ push_mark_stack((P_)q);
+ }
+ return;
+ }
+ gen_no = bd->dest_no;
+ info = q->header.info;
+ if (IS_FORWARDING_PTR(info))
+ {
+ StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
+ *p = e;
+ if (gen_no < gct->evac_gen_no) { // optimisation
+ if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) {
+ gct->failed_to_evac = true;
+ TICK_GC_FAILED_PROMOTION();
+ }
+ }
+ return;
+ }
+
+ ASSERT(INFO_PTR_TO_STRUCT(info)->type == BLACKHOLE);
+ copy(p,info,q,sizeofW(StgInd),gen_no);
+}
+
+/* -----------------------------------------------------------------------------
Evaluate a THUNK_SELECTOR if possible.
p points to a THUNK_SELECTOR that we want to evaluate. The
diff --git a/rts/sm/Evac.h b/rts/sm/Evac.h
index 65a15a2493..11f505c526 100644
--- a/rts/sm/Evac.h
+++ b/rts/sm/Evac.h
@@ -34,6 +34,9 @@
REGPARM1 void evacuate (StgClosure **p);
REGPARM1 void evacuate1 (StgClosure **p);
+void evacuate_BLACKHOLE(StgClosure **p);
+void evacuate_BLACKHOLE1(StgClosure **p);
+
extern W_ thunk_selector_depth;
#include "EndPrivate.h"
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index d26a893af8..1ae8a4c19b 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -39,6 +39,7 @@ static void scavenge_large_bitmap (StgPtr p,
#if defined(THREADED_RTS) && !defined(PARALLEL_GC)
# define evacuate(a) evacuate1(a)
+# define evacuate_BLACKHOLE(a) evacuate_BLACKHOLE1(a)
# define scavenge_loop(a) scavenge_loop1(a)
# define scavenge_block(a) scavenge_block1(a)
# define scavenge_mutable_list(bd,g) scavenge_mutable_list1(bd,g)
@@ -1891,6 +1892,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
case UPDATE_FRAME:
// Note [upd-black-hole]
+ //
// In SMP, we can get update frames that point to indirections
// when two threads evaluate the same thunk. We do attempt to
// discover this situation in threadPaused(), but it's
@@ -1906,35 +1908,57 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
// Now T is an indirection, and the update frame is already
// marked on A's stack, so we won't traverse it again in
// threadPaused(). We could traverse the whole stack again
- // before GC, but that seems like overkill.
+ // before GC, but that would be too expensive.
//
// Scavenging this update frame as normal would be disastrous;
- // the updatee would end up pointing to the value. So we
- // check whether the value after evacuation is a BLACKHOLE,
- // and if not, we change the update frame to an stg_enter
- // frame that simply returns the value. Hence, blackholing is
- // compulsory (otherwise we would have to check for thunks
- // too).
+ // the indirection will be shorted out, and the updatee would
+ // end up pointing to the value. The update code will then
+ // overwrite the value, instead of the BLACKHOLE it is
+ // expecting to write to.
+ //
+ // One way we could try to fix this is to detect when the
+ // BLACKHOLE has been updated by another thread, and then
+ // replace this update frame with a special frame that just
+ // enters the value. But this introduces some other
+ // complexities:
+ //
+ // - we must be careful to call checkBlockingQueues() in this
+ // special frame, because we might otherwise miss wakeups
+ // for threads that blocked on the original BLACKHOLE,
+ // - we must spot this frame when we're stripping the stack in
+ // raiseAsync() and raiseExceptionHelper(), and arrange to call
+ // checkBlockingQueues() there too.
+ //
+ // This is hard to get right, indeed we previously got it
+ // wrong (see #13751). So we now take a different approach:
+ // always copy the BLACKHOLE, even if it is actually an
+ // indirection. This way we keep the update frame, we're
+ // guaranteed to still perform the update, and check for
+ // missed wakeups even when stripping the stack in
+ // raiseAsync() and raiseExceptionHelper(). This is also a
+ // little more efficient, because evacuating a known BLACKHOLE
+ // is faster than evacuating an unknown closure.
+ //
+ // NOTE: for the reasons above, blackholing (either lazy or
+ // eager) is NOT optional. See also Note [avoiding
+ // threadPaused] in Interpreter.c.
//
- // One slight hiccup is that the THUNK_SELECTOR machinery can
- // overwrite the updatee with an IND. In parallel GC, this
- // could even be happening concurrently, so we can't check for
- // the IND. Fortunately if we assume that blackholing is
- // happening (either lazy or eager), then we can be sure that
- // the updatee is never a THUNK_SELECTOR and we're ok.
- // NB. this is a new invariant: blackholing is not optional.
+ // There are a couple of alternative solutions:
+ // - if we see an update frame that points to an indirection,
+ // arrange to call checkBlockingQueues() on that thread
+ // after GC.
+ // - spot a BLOCKING_QUEUE that points to a value and
+ // arrange to wake it up after the GC.
+ //
+ // These are more difficult to implement, requiring an extra
+ // list to be maintained during GC. They also rely on more
+ // subtle invariants than the solution implemented here.
+ //
+
{
StgUpdateFrame *frame = (StgUpdateFrame *)p;
- StgClosure *v;
- evacuate(&frame->updatee);
- v = frame->updatee;
- if (GET_CLOSURE_TAG(v) != 0 ||
- (get_itbl(v)->type != BLACKHOLE)) {
- // blackholing is compulsory, see above.
- frame->header.info = (const StgInfoTable*)&stg_enter_checkbh_info;
- }
- ASSERT(v->header.info != &stg_TSO_info);
+ evacuate_BLACKHOLE(&frame->updatee);
p += sizeofW(StgUpdateFrame);
continue;
}
diff --git a/testsuite/tests/concurrent/should_run/all.T b/testsuite/tests/concurrent/should_run/all.T
index 16363ed0b8..69b8ad7a1e 100644
--- a/testsuite/tests/concurrent/should_run/all.T
+++ b/testsuite/tests/concurrent/should_run/all.T
@@ -266,6 +266,7 @@ test('hs_try_putmvar001',
# This one should work for both threaded and non-threaded RTS
test('hs_try_putmvar002',
[pre_cmd('$MAKE -s --no-print-directory hs_try_putmvar002_setup'),
+ omit_ways(['ghci']),
extra_run_opts('1 8 10000')],
compile_and_run, ['hs_try_putmvar002_c.c'])