diff options
Diffstat (limited to 'testsuite/tests/lib/IO/encoding002.hs')
-rw-r--r-- | testsuite/tests/lib/IO/encoding002.hs | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/testsuite/tests/lib/IO/encoding002.hs b/testsuite/tests/lib/IO/encoding002.hs new file mode 100644 index 0000000000..65d60a3993 --- /dev/null +++ b/testsuite/tests/lib/IO/encoding002.hs @@ -0,0 +1,67 @@ +import Control.Monad + +import System.IO +import Control.Exception + +import Foreign.Marshal.Array +import Foreign.Ptr + +import GHC.Foreign +import GHC.IO.Encoding (TextEncoding, mkTextEncoding) + +import Data.Char +import Data.Word + +import Prelude hiding (catch) + + +decode :: TextEncoding -> [Word8] -> IO String +decode enc xs = withArrayLen xs (\sz p -> peekCStringLen enc (castPtr p, sz)) `catch` \e -> return (show (e :: IOException)) + +encode :: TextEncoding -> String -> IO [Word8] +encode enc cs = withCStringLen enc cs (\(p, sz) -> peekArray sz (castPtr p)) `catch` \e -> return (const [] (e :: IOException)) + +asc :: Char -> Word8 +asc = fromIntegral . ord + +families = [ ([asc 'H', asc 'i', 0xED, 0xB2, 0x80, asc '!'], + ["UTF-8", "UTF-8//IGNORE", "UTF-8//TRANSLIT", "UTF-8//ROUNDTRIP"]) + , ([asc 'H', 0, asc 'i', 0, 0xFF, 0xDF, 0xFF, 0xDF, asc '!', 0], + ["UTF-16LE", "UTF-16LE//IGNORE", "UTF-16LE//TRANSLIT", "UTF-16LE//ROUNDTRIP"]) + , ([0, asc 'H', 0, asc 'i', 0xDF, 0xFF, 0xDF, 0xFF, 0, asc '!'], + ["UTF-16BE", "UTF-16BE//IGNORE", "UTF-16BE//TRANSLIT", "UTF-16BE//ROUNDTRIP"]) + , ([asc 'H', 0, 0, 0, asc 'i', 0, 0, 0, 0xED, 0xB2, 0x80, 0, asc '!', 0, 0, 0], + ["UTF-32LE", "UTF-32LE//IGNORE", "UTF-32LE//TRANSLIT", "UTF-32LE//ROUNDTRIP"]) + , ([0, 0, 0, asc 'H', 0, 0, 0, asc 'i', 0, 0x80, 0xB2, 0xED, 0, 0, 0, asc '!'], + ["UTF-32BE", "UTF-32BE//IGNORE", "UTF-32BE//TRANSLIT", "UTF-32BE//ROUNDTRIP"]) + ] + +main = do + surrogate_enc <- mkTextEncoding "UTF-8//ROUNDTRIP" + + -- Test that invalid input is correctly roundtripped as surrogates + -- This only works for the UTF-8 UTF since it is the only UTF which + -- is an ASCII superset. + putStrLn $ "== UTF-8: roundtripping" + let invalid_bytes = [asc 'H', asc 'i', 0xED, 0xB2, 0x80, asc '!'] + surrogates <- decode surrogate_enc invalid_bytes + invalid_bytes' <- encode surrogate_enc surrogates + print invalid_bytes + print surrogates + print invalid_bytes' + print (invalid_bytes == invalid_bytes') + putStrLn "" + + forM families $ \(invalid_bytes, enc_names) -> do + encs <- mapM mkTextEncoding enc_names + let name = head enc_names + + -- How we deal with decoding errors in the various modes: + putStrLn $ "== " ++ name ++ ": decoding" + forM encs $ \enc -> decode enc invalid_bytes >>= print + + -- How about encoding errors, particularly those from embedded surrogates? + putStrLn $ "== " ++ name ++ ": encoding" + forM encs $ \enc -> encode enc "Hi\xDC80!" >>= print + + putStrLn "" |