summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoachim Breitner <mail@joachim-breitner.de>2015-09-23 10:10:03 +0200
committerJoachim Breitner <mail@joachim-breitner.de>2015-09-25 13:09:53 +0200
commitf7fd864ce6d41cf22d25f18a0cdc5e2e9db71304 (patch)
tree3c3fbb5e34f88daa53997bd2454b70f5452fe666
parenta0b1f414a459e102f5c3d93bfbf53ebe0d81c467 (diff)
downloadhaskell-f7fd864ce6d41cf22d25f18a0cdc5e2e9db71304.tar.gz
Skip a possible BOM in utf8 encoding
and not the system locale, which might be something else. This fixes bug #10907. A test is added, but less useful than it could be until task #10909 is done. Differential Revision: D1274
-rw-r--r--compiler/utils/StringBuffer.hs10
-rw-r--r--testsuite/tests/parser/unicode/T10907.hs1
-rw-r--r--testsuite/tests/parser/unicode/all.T3
3 files changed, 11 insertions, 3 deletions
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 2e339d8d75..6b39fc8608 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -53,6 +53,8 @@ import Data.Maybe
import Control.Exception
import System.IO
import System.IO.Unsafe ( unsafePerformIO )
+import GHC.IO.Encoding.UTF8 ( mkUTF8 )
+import GHC.IO.Encoding.Failure ( CodingFailureMode(IgnoreCodingFailure) )
import GHC.Exts
@@ -131,14 +133,16 @@ skipBOM h size offset =
then do
-- Validate assumption that handle is in binary mode.
ASSERTM( hGetEncoding h >>= return . isNothing )
- -- Temporarily select text mode to make `hLookAhead` and
- -- `hGetChar` return full Unicode characters.
- bracket_ (hSetBinaryMode h False) (hSetBinaryMode h True) $ do
+ -- Temporarily select utf8 encoding with error ignoring,
+ -- to make `hLookAhead` and `hGetChar` return full Unicode characters.
+ bracket_ (hSetEncoding h safeEncoding) (hSetBinaryMode h True) $ do
c <- hLookAhead h
if c == '\xfeff'
then hGetChar h >> hTell h
else return offset
else return offset
+ where
+ safeEncoding = mkUTF8 IgnoreCodingFailure
newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer buf ptr size = do
diff --git a/testsuite/tests/parser/unicode/T10907.hs b/testsuite/tests/parser/unicode/T10907.hs
new file mode 100644
index 0000000000..60aa3e7394
--- /dev/null
+++ b/testsuite/tests/parser/unicode/T10907.hs
@@ -0,0 +1 @@
+module ByteOrderMark () where
diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T
index ec08ae552c..6972a0d602 100644
--- a/testsuite/tests/parser/unicode/all.T
+++ b/testsuite/tests/parser/unicode/all.T
@@ -22,3 +22,6 @@ test('T2302', only_ways(['normal']), compile_fail, [''])
test('T4373', normal, compile, [''])
test('T6016', extra_clean(['T6016-twoBOMs']), compile_and_run, ['-package ghc'])
test('T7671', normal, compile, [''])
+# TODO: This test ought to be run in a non-UTF8 locale, but this is not yet
+# supported by the test suite (see 10907)
+test('T10907', normal, compile, [''])