1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
{-# OPTIONS_GHC -fno-implicit-prelude -funbox-strict-fields #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.IO.Encoding
-- Copyright : (c) The University of Glasgow, 2008-2009
-- License : see libraries/base/LICENSE
--
-- Maintainer : libraries@haskell.org
-- Stability : internal
-- Portability : non-portable
--
-- Text codecs for I/O
--
-----------------------------------------------------------------------------
module GHC.IO.Encoding (
BufferCodec(..), TextEncoding(..), TextEncoder, TextDecoder,
latin1, latin1_encode, latin1_decode,
utf8,
utf16, utf16le, utf16be,
utf32, utf32le, utf32be,
localeEncoding,
mkTextEncoding,
) where
import GHC.Base
--import GHC.IO
import GHC.IO.Buffer
import GHC.IO.Encoding.Types
import GHC.Word
#if !defined(mingw32_HOST_OS)
import qualified GHC.IO.Encoding.Iconv as Iconv
#endif
import qualified GHC.IO.Encoding.Latin1 as Latin1
import qualified GHC.IO.Encoding.UTF8 as UTF8
import qualified GHC.IO.Encoding.UTF16 as UTF16
import qualified GHC.IO.Encoding.UTF32 as UTF32
#if defined(mingw32_HOST_OS)
import Data.Maybe
import GHC.IO.Exception
#endif
-- -----------------------------------------------------------------------------
-- | The Latin1 (ISO8859-1) encoding. This encoding maps bytes
-- directly to the first 256 Unicode code points, and is thus not a
-- complete Unicode encoding. An attempt to write a character greater than
-- '\255' to a 'Handle' using the 'latin1' encoding will result in an error.
latin1 :: TextEncoding
latin1 = Latin1.latin1_checked
-- | The UTF-8 Unicode encoding
utf8 :: TextEncoding
utf8 = UTF8.utf8
-- | The UTF-16 Unicode encoding (a byte-order-mark should be used to
-- indicate endianness).
utf16 :: TextEncoding
utf16 = UTF16.utf16
-- | The UTF-16 Unicode encoding (litte-endian)
utf16le :: TextEncoding
utf16le = UTF16.utf16le
-- | The UTF-16 Unicode encoding (big-endian)
utf16be :: TextEncoding
utf16be = UTF16.utf16be
-- | The UTF-32 Unicode encoding (a byte-order-mark should be used to
-- indicate endianness).
utf32 :: TextEncoding
utf32 = UTF32.utf32
-- | The UTF-32 Unicode encoding (litte-endian)
utf32le :: TextEncoding
utf32le = UTF32.utf32le
-- | The UTF-32 Unicode encoding (big-endian)
utf32be :: TextEncoding
utf32be = UTF32.utf32be
-- | The Unicode encoding of the current locale
localeEncoding :: TextEncoding
#if !defined(mingw32_HOST_OS)
localeEncoding = Iconv.localeEncoding
#else
localeEncoding = Latin1.latin1
#endif
-- | Look up the named Unicode encoding. May fail with
--
-- * 'isDoesNotExistError' if the encoding is unknown
--
-- The set of known encodings is system-dependent.
--
mkTextEncoding :: String -> IO TextEncoding
#if !defined(mingw32_HOST_OS)
mkTextEncoding = Iconv.mkTextEncoding
#else
mkTextEncoding "UTF-8" = return utf8
mkTextEncoding "UTF-16" = return utf16
mkTextEncoding "UTF-16LE" = return utf16le
mkTextEncoding "UTF-16BE" = return utf16be
mkTextEncoding "UTF-32" = return utf32
mkTextEncoding "UTF-32LE" = return utf32le
mkTextEncoding "UTF-32BE" = return utf32be
mkTextEncoding e = ioException
(IOError Nothing NoSuchThing "mkTextEncoding"
("unknown encoding:" ++ e) Nothing Nothing)
#endif
latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
latin1_encode = Latin1.latin1_encode -- unchecked, used for binary
--latin1_encode = unsafePerformIO $ do mkTextEncoder Iconv.latin1 >>= return.encode
latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
latin1_decode = Latin1.latin1_decode
--latin1_decode = unsafePerformIO $ do mkTextDecoder Iconv.latin1 >>= return.encode
|