summaryrefslogtreecommitdiff
path: root/libraries/base/tests
diff options
context:
space:
mode:
authorReid Barton <rwbarton@gmail.com>2015-07-21 19:13:20 +0200
committerBen Gamari <ben@smart-cactus.org>2015-07-21 21:42:54 +0200
commitdbe6dac96543f426297a59d8d16c3f5afacf42d4 (patch)
treeece8ced882e19add696a4dfc99e298a93ad1a247 /libraries/base/tests
parent97a50d5096e134a1b713a085a7ba859bdd7143d1 (diff)
downloadhaskell-dbe6dac96543f426297a59d8d16c3f5afacf42d4.tar.gz
When iconv is unavailable, use an ASCII encoding to encode ASCII
D898 and D1059 implemented a fallback behavior to handle the case that the end user's iconv installation is broken (typically due to running inside a chroot in which the necessary locale files and/or gconv modules have not been installed). In this case, if the program requests an ASCII locale, GHC's char8 encoding is used rather than the program failing. However, silently mangling data like char8 does when the programmer did not ask for it is poor behavior, for reasons described in D1059. This commit implements an ASCII encoding and uses it in the fallback case when iconv is unavailable and the user has requested ASCII. Test Plan: Added tests for the encodings defined in Latin1. Also, manually ran a statically-linked executable of that test in a chroot and the tests passed (up to the ones that call mkTextEncoding "LATIN1", since there is no fallback from iconv for that case yet). Reviewers: austin, hvr, hsyl20, bgamari Reviewed By: hsyl20, bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D1085 GHC Trac Issues: #7695, #10623
Diffstat (limited to 'libraries/base/tests')
-rw-r--r--libraries/base/tests/.gitignore1
-rw-r--r--libraries/base/tests/IO/all.T1
-rw-r--r--libraries/base/tests/IO/encoding005.hs115
-rw-r--r--libraries/base/tests/IO/encoding005.stdout5
4 files changed, 122 insertions, 0 deletions
diff --git a/libraries/base/tests/.gitignore b/libraries/base/tests/.gitignore
index af90b5e47c..a430bd700a 100644
--- a/libraries/base/tests/.gitignore
+++ b/libraries/base/tests/.gitignore
@@ -105,6 +105,7 @@
/IO/encoding002
/IO/encoding003
/IO/encoding004
+/IO/encoding005
/IO/encodingerror001
/IO/environment001
/IO/finalization001
diff --git a/libraries/base/tests/IO/all.T b/libraries/base/tests/IO/all.T
index 43d94da452..29779457e4 100644
--- a/libraries/base/tests/IO/all.T
+++ b/libraries/base/tests/IO/all.T
@@ -138,6 +138,7 @@ test('encoding001',
test('encoding002', normal, compile_and_run, [''])
test('encoding003', normal, compile_and_run, [''])
test('encoding004', normal, compile_and_run, [''])
+test('encoding005', normal, compile_and_run, [''])
test('environment001',
[extra_clean(['environment001'])],
diff --git a/libraries/base/tests/IO/encoding005.hs b/libraries/base/tests/IO/encoding005.hs
new file mode 100644
index 0000000000..99db84af59
--- /dev/null
+++ b/libraries/base/tests/IO/encoding005.hs
@@ -0,0 +1,115 @@
+import Control.Monad
+import Data.Word (Word8)
+import Foreign.Ptr
+import Foreign.Marshal.Array
+import GHC.Foreign (peekCStringLen, withCStringLen)
+import GHC.IO.Encoding.Failure (CodingFailureMode(..))
+import qualified GHC.IO.Encoding.Latin1 as Latin1
+import System.IO
+import System.IO.Error
+
+-- Tests for single-byte encodings that map directly to Unicode
+-- (module GHC.IO.Encoding.Latin1)
+
+eitherToMaybe :: Either a b -> Maybe b
+eitherToMaybe (Left _) = Nothing
+eitherToMaybe (Right b) = Just b
+
+decode :: TextEncoding -> [Word8] -> IO (Maybe String)
+decode enc xs = fmap eitherToMaybe . tryIOError $ withArrayLen xs (\sz p -> peekCStringLen enc (castPtr p, sz))
+
+encode :: TextEncoding -> String -> IO (Maybe [Word8])
+encode enc cs = fmap eitherToMaybe . tryIOError $ withCStringLen enc cs (\(p, sz) -> peekArray sz (castPtr p))
+
+testIO :: (Eq a, Show a) => IO a -> a -> IO ()
+testIO action expected = do
+ result <- action
+ when (result /= expected) $
+ putStrLn $ "Test failed: expected " ++ show expected ++ ", but got " ++ show result
+
+-- Test char8-like encodings
+test_char8 :: TextEncoding -> IO ()
+test_char8 enc = do
+ testIO (decode enc [0..0xff]) $ Just ['\0'..'\xff']
+
+ testIO (encode enc ['\0'..'\x200']) $ Just ([0..0xff] ++ [0..0xff] ++ [0])
+
+-- Test latin1-like encodings
+test_latin1 :: CodingFailureMode -> TextEncoding -> IO ()
+test_latin1 cfm enc = do
+ testIO (decode enc [0..0xff]) $ Just ['\0'..'\xff']
+
+ testIO (encode enc ['\0'..'\xff']) $ Just [0..0xff]
+ testIO (encode enc "\xfe\xff\x100\x101\x100\xff\xfe") $ case cfm of
+ ErrorOnCodingFailure -> Nothing
+ IgnoreCodingFailure -> Just [0xfe,0xff,0xff,0xfe]
+ TransliterateCodingFailure -> Just [0xfe,0xff,0x3f,0x3f,0x3f,0xff,0xfe]
+ -- N.B. The argument "LATIN1//TRANSLIT" to mkTextEncoding does not
+ -- correspond to "LATIN1//TRANSLIT" in iconv! Instead GHC asks iconv
+ -- to encode to "LATIN1" and uses its own "evil hack" to insert '?'
+ -- (ASCII 0x3f) in place of failures. See GHC.IO.Encoding.recoverEncode.
+ --
+ -- U+0100 is LATIN CAPITAL LETTER A WITH MACRON, which iconv would
+ -- transliterate to 'A' (ASCII 0x41). Similarly iconv would
+ -- transliterate U+0101 LATIN SMALL LETTER A WITH MACRON to 'a'
+ -- (ASCII 0x61).
+ RoundtripFailure -> Nothing
+
+test_ascii :: CodingFailureMode -> TextEncoding -> IO ()
+test_ascii cfm enc = do
+ testIO (decode enc [0..0x7f]) $ Just ['\0'..'\x7f']
+ testIO (decode enc [0x7e,0x7f,0x80,0x81,0x80,0x7f,0x7e]) $ case cfm of
+ ErrorOnCodingFailure -> Nothing
+ IgnoreCodingFailure -> Just "\x7e\x7f\x7f\x7e"
+ TransliterateCodingFailure -> Just "\x7e\x7f\xfffd\xfffd\xfffd\x7f\x7e"
+ -- Another GHC special: decode invalid input to the Char U+FFFD
+ -- REPLACEMENT CHARACTER.
+ RoundtripFailure -> Just "\x7e\x7f\xdc80\xdc81\xdc80\x7f\x7e"
+ -- GHC's PEP383-style String-encoding of invalid input,
+ -- see Note [Roundtripping]
+
+ testIO (encode enc ['\0'..'\x7f']) $ Just [0..0x7f]
+ testIO (encode enc "\x7e\x7f\x80\x81\x80\x7f\xe9") $ case cfm of
+ ErrorOnCodingFailure -> Nothing
+ IgnoreCodingFailure -> Just [0x7e,0x7f,0x7f]
+ TransliterateCodingFailure -> Just [0x7e,0x7f,0x3f,0x3f,0x3f,0x7f,0x3f]
+ -- See comment in test_latin1. iconv -t ASCII//TRANSLIT would encode
+ -- U+00E9 LATIN SMALL LETTER E WITH ACUTE as 'e' (ASCII 0x65).
+ RoundtripFailure -> Nothing
+
+ -- Test roundtripping for good measure
+ case cfm of
+ RoundtripFailure -> do
+ Just s <- decode enc [0..0xff]
+ testIO (encode enc s) $ Just [0..0xff]
+ _ -> return ()
+
+main = do
+ putStrLn "char8 tests"
+ test_char8 char8 -- char8 never fails in either direction
+
+ -- These use GHC's own implementation
+ putStrLn "Latin1.ascii tests"
+ test_ascii ErrorOnCodingFailure (Latin1.ascii)
+ test_ascii IgnoreCodingFailure (Latin1.mkAscii IgnoreCodingFailure)
+ test_ascii TransliterateCodingFailure (Latin1.mkAscii TransliterateCodingFailure)
+ test_ascii RoundtripFailure (Latin1.mkAscii RoundtripFailure)
+
+ putStrLn "Latin1.latin1_checked tests"
+ test_latin1 ErrorOnCodingFailure (Latin1.latin1_checked)
+ test_latin1 IgnoreCodingFailure (Latin1.mkLatin1_checked IgnoreCodingFailure)
+ test_latin1 TransliterateCodingFailure (Latin1.mkLatin1_checked TransliterateCodingFailure)
+ test_latin1 RoundtripFailure (Latin1.mkLatin1_checked RoundtripFailure)
+
+ -- These use iconv (normally, unless it is broken)
+ putStrLn "mkTextEncoding ASCII tests"
+ test_ascii ErrorOnCodingFailure =<< mkTextEncoding "ASCII"
+ test_ascii IgnoreCodingFailure =<< mkTextEncoding "ASCII//IGNORE"
+ test_ascii TransliterateCodingFailure =<< mkTextEncoding "ASCII//TRANSLIT"
+ test_ascii RoundtripFailure =<< mkTextEncoding "ASCII//ROUNDTRIP"
+
+ putStrLn "mkTextEncoding LATIN1 tests"
+ test_latin1 ErrorOnCodingFailure =<< mkTextEncoding "LATIN1"
+ test_latin1 IgnoreCodingFailure =<< mkTextEncoding "LATIN1//IGNORE"
+ test_latin1 TransliterateCodingFailure =<< mkTextEncoding "LATIN1//TRANSLIT"
+ test_latin1 RoundtripFailure =<< mkTextEncoding "LATIN1//ROUNDTRIP"
diff --git a/libraries/base/tests/IO/encoding005.stdout b/libraries/base/tests/IO/encoding005.stdout
new file mode 100644
index 0000000000..664a193592
--- /dev/null
+++ b/libraries/base/tests/IO/encoding005.stdout
@@ -0,0 +1,5 @@
+char8 tests
+Latin1.ascii tests
+Latin1.latin1_checked tests
+mkTextEncoding ASCII tests
+mkTextEncoding LATIN1 tests