summaryrefslogtreecommitdiff
path: root/testsuite
diff options
context:
space:
mode:
authorThomas Miedema <thomasmiedema@gmail.com>2014-09-01 15:11:50 -0500
committerAustin Seipp <austin@well-typed.com>2014-09-01 15:11:50 -0500
commit9e939403241b758a685834c9ff62edcd3172a2cf (patch)
treed59a43de3ff5440685c3961fb3abd145ff7fafc9 /testsuite
parente81e02807c7a0e723ed7b0e83418c95f99140449 (diff)
downloadhaskell-9e939403241b758a685834c9ff62edcd3172a2cf.tar.gz
StringBuffer should not contain initial byte-order mark (BOM)
Summary: Just skipping over a BOM, but leaving it in the Stringbuffer, is not sufficient. The Lexer calls prevChar when a regular expression starts with '^' (which is a shorthand for '\n^'). It would never match on the first line, since instead of '\n', prevChar would still return '\xfeff'. Test Plan: validate Reviewers: austin, ezyang Reviewed By: austin, ezyang Subscribers: simonmar, ezyang, carter Differential Revision: https://phabricator.haskell.org/D176 GHC Trac Issues: #6016
Diffstat (limited to 'testsuite')
-rw-r--r--testsuite/.gitignore2
-rw-r--r--testsuite/tests/parser/unicode/T6016.hs34
-rw-r--r--testsuite/tests/parser/unicode/all.T1
3 files changed, 37 insertions, 0 deletions
diff --git a/testsuite/.gitignore b/testsuite/.gitignore
index 591545cdc3..4f8ac870e6 100644
--- a/testsuite/.gitignore
+++ b/testsuite/.gitignore
@@ -1074,6 +1074,8 @@ mk/ghcconfig_*_inplace_bin_ghc-stage2.exe.mk
/tests/parser/should_run/readRun004
/tests/parser/unicode/1744
/tests/parser/unicode/T1744
+/tests/parser/unicode/T6016
+/tests/parser/unicode/T6016-twoBOMs
/tests/parser/unicode/utf8_024
/tests/patsyn/should_run/bidir-explicit
/tests/patsyn/should_run/bidir-explicit-scope
diff --git a/testsuite/tests/parser/unicode/T6016.hs b/testsuite/tests/parser/unicode/T6016.hs
new file mode 100644
index 0000000000..5783a72843
--- /dev/null
+++ b/testsuite/tests/parser/unicode/T6016.hs
@@ -0,0 +1,34 @@
+module Main where
+
+import Control.Exception
+import Data.Char
+import System.IO
+
+import StringBuffer
+
+twoBOMs = "T6016-twoBOMs"
+
+ignoreFirstBOM = do
+ -- StringBuffer should not contain initial byte-order mark.
+ --
+ -- Just skipping over it, but leaving it in the Stringbuffer, is not
+ -- sufficient. The Lexer calls prevChar when a regular expression
+ -- starts with '^' (which is a shorthand for '\n^'). It would never
+ -- match on the first line, since instead of '\n', prevChar would
+ -- still return '\xfeff'.
+ s <- hGetStringBuffer twoBOMs
+ assert (prevChar s '\n' == '\n') return ()
+
+dontIgnoreSecondBOM = do
+ -- U+FEFF is considered a BOM only if it appears as the first
+ -- character of a file.
+ h <- openBinaryFile twoBOMs ReadMode
+ hSeek h AbsoluteSeek 3
+ s <- hGetStringBufferBlock h 3
+ hClose h
+ assert (currentChar s == '\xfeff') return ()
+
+main = do
+ writeFile twoBOMs "\xfeff\xfeff"
+ ignoreFirstBOM
+ dontIgnoreSecondBOM
diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T
index a8e19ebec9..2ff7edf927 100644
--- a/testsuite/tests/parser/unicode/all.T
+++ b/testsuite/tests/parser/unicode/all.T
@@ -20,4 +20,5 @@ test('T1744', normal, compile_and_run, [''])
test('T1103', normal, compile, [''])
test('T2302', only_ways(['normal']), compile_fail, [''])
test('T4373', normal, compile, [''])
+test('T6016', extra_clean('T6016-twoBOMs'), compile_and_run, ['-package ghc'])
test('T7671', normal, compile, [''])