diff options
author | Ben.Lippmeier@anu.edu.au <unknown> | 2009-09-30 08:42:29 +0000 |
---|---|---|
committer | Ben.Lippmeier@anu.edu.au <unknown> | 2009-09-30 08:42:29 +0000 |
commit | 7f5e4736b3fdf5071a68d8b05d14e947bc45995a (patch) | |
tree | 02cb2ac71a593ea0d527077579e7f0c522b50fa9 /libraries/base/GHC/IO | |
parent | ad96a2248d351f0344c13413e34596a60fef066b (diff) | |
download | haskell-7f5e4736b3fdf5071a68d8b05d14e947bc45995a.tar.gz |
Strip any Byte Order Mark (BOM) from the front of decoded streams.
When decoding to UTF-32, Solaris iconv inserts a BOM at the front
of the stream, but Linux iconv doesn't.
Diffstat (limited to 'libraries/base/GHC/IO')
-rw-r--r-- | libraries/base/GHC/IO/Handle/Internals.hs | 33 |
1 files changed, 6 insertions, 27 deletions
diff --git a/libraries/base/GHC/IO/Handle/Internals.hs b/libraries/base/GHC/IO/Handle/Internals.hs index b8dc82ab70..cc9e3d3aa5 100644 --- a/libraries/base/GHC/IO/Handle/Internals.hs +++ b/libraries/base/GHC/IO/Handle/Internals.hs @@ -727,7 +727,7 @@ readTextDevice h_@Handle__{..} cbuf = do debugIO ("readTextDevice after reading: bbuf=" ++ summaryBuffer bbuf1) - (bbuf2,cbuf2) <- + (bbuf2,cbuf') <- case haDecoder of Nothing -> do writeIORef haLastDecode (error "codec_state", bbuf1) @@ -737,16 +737,13 @@ readTextDevice h_@Handle__{..} cbuf = do writeIORef haLastDecode (state, bbuf1) (encode decoder) bbuf1 cbuf - debugIO ("readTextDevice after decoding: cbuf=" ++ summaryBuffer cbuf2 ++ + debugIO ("readTextDevice after decoding: cbuf=" ++ summaryBuffer cbuf' ++ " bbuf=" ++ summaryBuffer bbuf2) - cbuf3 <- stripByteOrderMark cbuf2 - writeIORef haByteBuffer bbuf2 - if bufR cbuf3 == bufR cbuf -- no new characters + if bufR cbuf' == bufR cbuf -- no new characters then readTextDevice' h_ bbuf2 cbuf -- we need more bytes to make a Char - else return cbuf3 - + else return cbuf' -- we have an incomplete byte sequence at the end of the buffer: try to -- read more bytes. @@ -795,7 +792,7 @@ readTextDeviceNonBlocking h_@Handle__{..} cbuf = do if isNothing r then ioe_EOF else do -- raise EOF return bbuf1 - (bbuf2,cbuf2) <- + (bbuf2,cbuf') <- case haDecoder of Nothing -> do writeIORef haLastDecode (error "codec_state", bbuf1) @@ -805,23 +802,5 @@ readTextDeviceNonBlocking h_@Handle__{..} cbuf = do writeIORef haLastDecode (state, bbuf1) (encode decoder) bbuf1 cbuf - cbuf3 <- stripByteOrderMark cbuf2 - writeIORef haByteBuffer bbuf2 - return cbuf3 - - --- | When converting from UTF-8 to UCS-4, Solaris iconv adds a Byte Order Mark (BOM) --- of value 0xfeff to the start of the stream. We don't want to return this to --- the caller, so strip it here. This is a safe operation for other platforms, --- so always do it. -stripByteOrderMark :: CharBuffer -> IO CharBuffer -stripByteOrderMark cbuf - | isEmptyBuffer cbuf - = return cbuf - - | otherwise - = do firstChar <- peekCharBuf (bufRaw cbuf) 0 - if firstChar == chr 0xfeff - then return (bufferRemove 1 cbuf) - else return cbuf + return cbuf' |