summaryrefslogtreecommitdiff
path: root/libraries/base/GHC/IO/Handle/Internals.hs
diff options
context:
space:
mode:
authorBen.Lippmeier@anu.edu.au <unknown>2009-09-30 08:42:29 +0000
committerBen.Lippmeier@anu.edu.au <unknown>2009-09-30 08:42:29 +0000
commit7f5e4736b3fdf5071a68d8b05d14e947bc45995a (patch)
tree02cb2ac71a593ea0d527077579e7f0c522b50fa9 /libraries/base/GHC/IO/Handle/Internals.hs
parentad96a2248d351f0344c13413e34596a60fef066b (diff)
downloadhaskell-7f5e4736b3fdf5071a68d8b05d14e947bc45995a.tar.gz
Strip any Byte Order Mark (BOM) from the front of decoded streams.
When decoding to UTF-32, Solaris iconv inserts a BOM at the front of the stream, but Linux iconv doesn't.
Diffstat (limited to 'libraries/base/GHC/IO/Handle/Internals.hs')
-rw-r--r--libraries/base/GHC/IO/Handle/Internals.hs33
1 files changed, 6 insertions, 27 deletions
diff --git a/libraries/base/GHC/IO/Handle/Internals.hs b/libraries/base/GHC/IO/Handle/Internals.hs
index b8dc82ab70..cc9e3d3aa5 100644
--- a/libraries/base/GHC/IO/Handle/Internals.hs
+++ b/libraries/base/GHC/IO/Handle/Internals.hs
@@ -727,7 +727,7 @@ readTextDevice h_@Handle__{..} cbuf = do
debugIO ("readTextDevice after reading: bbuf=" ++ summaryBuffer bbuf1)
- (bbuf2,cbuf2) <-
+ (bbuf2,cbuf') <-
case haDecoder of
Nothing -> do
writeIORef haLastDecode (error "codec_state", bbuf1)
@@ -737,16 +737,13 @@ readTextDevice h_@Handle__{..} cbuf = do
writeIORef haLastDecode (state, bbuf1)
(encode decoder) bbuf1 cbuf
- debugIO ("readTextDevice after decoding: cbuf=" ++ summaryBuffer cbuf2 ++
+ debugIO ("readTextDevice after decoding: cbuf=" ++ summaryBuffer cbuf' ++
" bbuf=" ++ summaryBuffer bbuf2)
- cbuf3 <- stripByteOrderMark cbuf2
-
writeIORef haByteBuffer bbuf2
- if bufR cbuf3 == bufR cbuf -- no new characters
+ if bufR cbuf' == bufR cbuf -- no new characters
then readTextDevice' h_ bbuf2 cbuf -- we need more bytes to make a Char
- else return cbuf3
-
+ else return cbuf'
-- we have an incomplete byte sequence at the end of the buffer: try to
-- read more bytes.
@@ -795,7 +792,7 @@ readTextDeviceNonBlocking h_@Handle__{..} cbuf = do
if isNothing r then ioe_EOF else do -- raise EOF
return bbuf1
- (bbuf2,cbuf2) <-
+ (bbuf2,cbuf') <-
case haDecoder of
Nothing -> do
writeIORef haLastDecode (error "codec_state", bbuf1)
@@ -805,23 +802,5 @@ readTextDeviceNonBlocking h_@Handle__{..} cbuf = do
writeIORef haLastDecode (state, bbuf1)
(encode decoder) bbuf1 cbuf
- cbuf3 <- stripByteOrderMark cbuf2
-
writeIORef haByteBuffer bbuf2
- return cbuf3
-
-
--- | When converting from UTF-8 to UCS-4, Solaris iconv adds a Byte Order Mark (BOM)
--- of value 0xfeff to the start of the stream. We don't want to return this to
--- the caller, so strip it here. This is a safe operation for other platforms,
--- so always do it.
-stripByteOrderMark :: CharBuffer -> IO CharBuffer
-stripByteOrderMark cbuf
- | isEmptyBuffer cbuf
- = return cbuf
-
- | otherwise
- = do firstChar <- peekCharBuf (bufRaw cbuf) 0
- if firstChar == chr 0xfeff
- then return (bufferRemove 1 cbuf)
- else return cbuf
+ return cbuf'