diff options
author | Sergei Trofimovich <slyfox@gentoo.org> | 2015-07-07 17:00:23 +0200 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2015-07-07 19:17:43 +0200 |
commit | 5857e0afb5823987e84e6d3dd8d0b269b7546166 (patch) | |
tree | 41c3b179e139a19a09dbf605e3a1d4aac6d8b0d7 | |
parent | 9a3e1657db4c0292fc06d6183a802af631c3666a (diff) | |
download | haskell-5857e0afb5823987e84e6d3dd8d0b269b7546166.tar.gz |
fix EBADF unqueueing in select backend (Trac #10590)
Alexander found a interesting case:
1. We have a queue of two waiters in a blocked_queue
2. first file descriptor changes state to RUNNABLE,
second changes to INVALID
3. awaitEvent function dequeued RUNNABLE thread to a
run queue and attempted to dequeue INVALID descriptor
to a run queue.
Unqueueing INVALID fails thusly:
#3 0x000000000045cf1c in barf (s=0x4c1cb0 "removeThreadFromDeQueue: not found")
at rts/RtsMessages.c:42
#4 0x000000000046848b in removeThreadFromDeQueue (...) at rts/Threads.c:249
#5 0x000000000049a120 in removeFromQueues (...) at rts/RaiseAsync.c:719
#6 0x0000000000499502 in throwToSingleThreaded__ (...) at rts/RaiseAsync.c:67
#7 0x0000000000499555 in throwToSingleThreaded (..) at rts/RaiseAsync.c:75
#8 0x000000000047c27d in awaitEvent (wait=rtsFalse) at rts/posix/Select.c:415
The problem here is a throwToSingleThreaded function that tries
to unqueue a TSO from blocked_queue, but awaitEvent function
leaves blocked_queue in a inconsistent state while traverses
over blocked_queue:
case RTS_FD_IS_READY:
IF_DEBUG(scheduler,
debugBelch("Waking up blocked thread %lu\n",
(unsigned long)tso->id));
tso->why_blocked = NotBlocked;
tso->_link = END_TSO_QUEUE; // Here we break the queue head
pushOnRunQueue(&MainCapability,tso);
break;
Signed-off-by: Sergei Trofimovich <siarheit@google.com>
Test Plan: tested on a sample from T10590
Reviewers: austin, bgamari, simonmar
Reviewed By: bgamari, simonmar
Subscribers: qnikst, thomie, bgamari
Differential Revision: https://phabricator.haskell.org/D1024
GHC Trac Issues: #10590, #4934
-rw-r--r-- | rts/RaiseAsync.c | 8 | ||||
-rw-r--r-- | rts/RaiseAsync.h | 6 | ||||
-rw-r--r-- | rts/posix/Select.c | 10 | ||||
-rw-r--r-- | testsuite/tests/rts/T10590.hs | 37 | ||||
-rw-r--r-- | testsuite/tests/rts/all.T | 5 |
5 files changed, 57 insertions, 9 deletions
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 3b206ffa7e..267707cefb 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -23,12 +23,6 @@ #include "win32/IOManager.h" #endif -static StgTSO* raiseAsync (Capability *cap, - StgTSO *tso, - StgClosure *exception, - rtsBool stop_at_atomically, - StgUpdateFrame *stop_here); - static void removeFromQueues(Capability *cap, StgTSO *tso); static void removeFromMVarBlockedQueue (StgTSO *tso); @@ -777,7 +771,7 @@ removeFromQueues(Capability *cap, StgTSO *tso) * * -------------------------------------------------------------------------- */ -static StgTSO * +StgTSO * raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, rtsBool stop_at_atomically, StgUpdateFrame *stop_here) { diff --git a/rts/RaiseAsync.h b/rts/RaiseAsync.h index 6bfed8d6ca..1f939d4000 100644 --- a/rts/RaiseAsync.h +++ b/rts/RaiseAsync.h @@ -19,6 +19,12 @@ void blockedThrowTo (Capability *cap, StgTSO *target, MessageThrowTo *msg); +StgTSO* raiseAsync (Capability *cap, + StgTSO *tso, + StgClosure *exception, + rtsBool stop_at_atomically, + StgUpdateFrame *stop_here); + void throwToSingleThreaded (Capability *cap, StgTSO *tso, StgClosure *exception); diff --git a/rts/posix/Select.c b/rts/posix/Select.c index 4b1923504b..d5c9b553cb 100644 --- a/rts/posix/Select.c +++ b/rts/posix/Select.c @@ -375,6 +375,12 @@ awaitEvent(rtsBool wait) prev = NULL; { + /* + * The queue is being rebuilt in this loop: + * 'blocked_queue_hd' will contain already + * traversed blocked TSOs. As a result you + * can't use functions accessing 'blocked_queue_hd'. + */ for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) { next = tso->_link; int fd; @@ -412,8 +418,8 @@ awaitEvent(rtsBool wait) IF_DEBUG(scheduler, debugBelch("Killing blocked thread %lu on bad fd=%i\n", (unsigned long)tso->id, fd)); - throwToSingleThreaded(&MainCapability, tso, - (StgClosure *)blockedOnBadFD_closure); + raiseAsync(&MainCapability, tso, + (StgClosure *)blockedOnBadFD_closure, rtsFalse, NULL); break; case RTS_FD_IS_READY: IF_DEBUG(scheduler, diff --git a/testsuite/tests/rts/T10590.hs b/testsuite/tests/rts/T10590.hs new file mode 100644 index 0000000000..24198abb30 --- /dev/null +++ b/testsuite/tests/rts/T10590.hs @@ -0,0 +1,37 @@ +import Foreign.C +import Foreign.Marshal.Array +import Foreign.Storable +import Control.Concurrent + +-- The test works only on UNIX like. +-- unportable bits: +import qualified System.Posix.Internals as SPI +import qualified System.Posix.Types as SPT + +pipe :: IO (CInt, CInt) +pipe = allocaArray 2 $ \fds -> do + throwErrnoIfMinus1_ "pipe" $ SPI.c_pipe fds + rd <- peekElemOff fds 0 + wr <- peekElemOff fds 1 + return (rd, wr) + +main :: IO () +main = do + (r1, w1) <- pipe + (r2, _w2) <- pipe + _ <- forkIO $ do -- thread A + threadWaitRead (SPT.Fd r1) + _ <- forkIO $ do -- thread B + threadWaitRead (SPT.Fd r2) + yield -- switch to A, then B + -- now both are blocked + _ <- SPI.c_close w1 -- unblocking thread A fd + _ <- SPI.c_close r2 -- breaking thread B fd + yield -- kick RTS IO manager + +{- + Trac #10590 exposed a bug as: + T10590: internal error: removeThreadFromDeQueue: not found + (GHC version 7.11.20150702 for x86_64_unknown_linux) + Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug + -} diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T index 8f6137f9d4..0e892499e0 100644 --- a/testsuite/tests/rts/all.T +++ b/testsuite/tests/rts/all.T @@ -318,3 +318,8 @@ test('T9839_05', [ only_ways(prof_ways), ignore_output, exit_code(1), extra_run_ test('T9839_06', [ only_ways(prof_ways), ignore_output, exit_code(1), extra_run_opts('+RTS -xtx')], compile_and_run, ['']) + +# ignore_output as RTS reports slightly different error messages +# in 'epoll' and 'select' backends on reading from EBADF +# mingw32 skip as UNIX pipe and close(fd) is used to exercise the problem +test('T10590', [ignore_output, when(opsys('mingw32'),skip)], compile_and_run, ['']) |