diff options
author | Max Bolingbroke <batterseapower@hotmail.com> | 2013-04-10 21:13:28 +0100 |
---|---|---|
committer | Max Bolingbroke <batterseapower@hotmail.com> | 2013-04-10 21:13:28 +0100 |
commit | 69a8c2d23a75f9f1bda59f14c61fbd057e200126 (patch) | |
tree | 6280f1c1eb9d24cc9de24c960011028d0e1732f4 /libraries/base/GHC/IO/Handle | |
parent | 90d1d92ce6ac7d34111f4b6c583a1dbc77fb90a7 (diff) | |
download | haskell-69a8c2d23a75f9f1bda59f14c61fbd057e200126.tar.gz |
Fix #7522 by checking for empty byte buffers a little more
Quite a few lines have changed but that is mostly comments.
Diffstat (limited to 'libraries/base/GHC/IO/Handle')
-rw-r--r-- | libraries/base/GHC/IO/Handle/Internals.hs | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/libraries/base/GHC/IO/Handle/Internals.hs b/libraries/base/GHC/IO/Handle/Internals.hs index 79228d3500..855e31cb44 100644 --- a/libraries/base/GHC/IO/Handle/Internals.hs +++ b/libraries/base/GHC/IO/Handle/Internals.hs @@ -378,7 +378,15 @@ streamEncode codec from to = go (from, to) -- underflow particularly, and we want to delay errors about invalid -- sequences as far as possible. case why of - Encoding.InvalidSequence | bufL from == bufL from' -> recover codec from' to' >>= go + Encoding.InvalidSequence | bufL from == bufL from' -> do + -- NB: it is OK to call recover here. Because we saw InvalidSequence, by the invariants + -- on "encode" it must be the case that there is at least one elements available in the output + -- buffer. Furthermore, clearly there is at least one element in the input buffer since we found + -- something invalid there! + --debugIO $ "Before streamEncode recovery: from=" ++ summaryBuffer from' ++ ", to=" ++ summaryBuffer to' + (from', to') <- recover codec from' to' + --debugIO $ "After streamEncode recovery: from=" ++ summaryBuffer from' ++ ", to=" ++ summaryBuffer to' + go (from', to') _ -> return (from', to') -- ----------------------------------------------------------------------------- @@ -802,6 +810,16 @@ debugIO s -- Read characters into the provided buffer. Return when any -- characters are available; raise an exception if the end of -- file is reached. +-- +-- In uses of readTextDevice within base, the input buffer is either: +-- * empty +-- * or contains a single \r (when doing newline translation) +-- +-- The input character buffer must have a capacity at least 1 greater +-- than the number of elements it currently contains. +-- +-- Users of this function expect that the buffer returned contains +-- at least 1 more character than the input buffer. readTextDevice :: Handle__ -> CharBuffer -> IO CharBuffer readTextDevice h_@Handle__{..} cbuf = do -- @@ -832,9 +850,13 @@ readTextDevice h_@Handle__{..} cbuf = do debugIO ("readTextDevice after decoding: cbuf=" ++ summaryBuffer cbuf' ++ " bbuf=" ++ summaryBuffer bbuf2) + -- We can't return from readTextDevice without reading at least a single extra character, + -- so check that we have managed to achieve that writeIORef haByteBuffer bbuf2 - if bufR cbuf' == bufR cbuf -- no new characters - then readTextDevice' h_ bbuf2 cbuf -- we need more bytes to make a Char + if bufR cbuf' == bufR cbuf + -- we need more bytes to make a Char. NB: bbuf2 may be empty (even though bbuf1 wasn't) when we + -- are using an encoding that can skip bytes without outputting characters, such as UTF8//IGNORE + then readTextDevice' h_ bbuf2 cbuf else return cbuf' -- we have an incomplete byte sequence at the end of the buffer: try to @@ -853,7 +875,11 @@ readTextDevice' h_@Handle__{..} bbuf0 cbuf0 = do (r,bbuf2) <- Buffered.fillReadBuffer haDevice bbuf1 if r == 0 then do + -- bbuf2 can be empty here when we encounter an invalid byte sequence at the end of the input + -- with a //IGNORE codec which consumes bytes without outputting characters + if isEmptyBuffer bbuf2 then ioe_EOF else do (bbuf3, cbuf1) <- recover decoder bbuf2 cbuf0 + debugIO ("readTextDevice' after recovery: bbuf=" ++ summaryBuffer bbuf3 ++ ", cbuf=" ++ summaryBuffer cbuf1) writeIORef haByteBuffer bbuf3 -- We should recursively invoke readTextDevice after recovery, -- if recovery did not add at least one new character to the buffer: |