summaryrefslogtreecommitdiff
path: root/libraries/base/Foreign
diff options
context:
space:
mode:
authorMax Bolingbroke <batterseapower@hotmail.com>2011-05-14 22:50:46 +0100
committerMax Bolingbroke <batterseapower@hotmail.com>2011-05-14 22:50:46 +0100
commitdc58b7398910a433259a6c0f58a0d05a48555191 (patch)
treea01062281a0cf1dd42329110ff0d0326be407f2b /libraries/base/Foreign
parentcdbce1218d9f9fb4152bdabffe8bbdee09f5ce60 (diff)
downloadhaskell-dc58b7398910a433259a6c0f58a0d05a48555191.tar.gz
Big patch to improve Unicode support in GHC. Validated on OS X and Windows, this
patch series fixes #5061, #1414, #3309, #3308, #3307, #4006 and #4855. The major changes are: 1) Make Foreign.C.String.*CString use the locale encoding This change follows the FFI specification in Haskell 98, which has never actually been implemented before. The functions exported from Foreign.C.String are partially-applied versions of those from GHC.Foreign, which allows the user to supply their own TextEncoding. We also introduce foreignEncoding as the name of the text encoding that follows the FFI appendix in that it transliterates encoding errors. 2) I also changed the code so that mkTextEncoding always tries the native-Haskell decoders in preference to those from iconv, even on non-Windows. The motivation here is simply that it is better for compatibility if we do this, and those are the ones you get for the utf* and latin1* predefined TextEncodings anyway. 3) Implement surrogate-byte error handling mode for TextEncoding This implements PEP383-like behaviour so that we are able to roundtrip byte strings through Strings without loss of information. The withFilePath function now uses this encoding to get to/from CStrings, so any code that uses that will get the right PEP383 behaviour automatically. 4) Implement three other coding failure modes: ignore, throw error, transliterate These mimic the behaviour of the GNU Iconv extensions.
Diffstat (limited to 'libraries/base/Foreign')
-rw-r--r--libraries/base/Foreign/C/String.hs44
1 files changed, 43 insertions, 1 deletions
diff --git a/libraries/base/Foreign/C/String.hs b/libraries/base/Foreign/C/String.hs
index becfa4ea68..fdefdc67c8 100644
--- a/libraries/base/Foreign/C/String.hs
+++ b/libraries/base/Foreign/C/String.hs
@@ -23,7 +23,6 @@
-----------------------------------------------------------------------------
module Foreign.C.String ( -- representation of strings in C
-
-- * C strings
CString, -- = Ptr CChar
@@ -31,8 +30,14 @@ module Foreign.C.String ( -- representation of strings in C
-- ** Using a locale-dependent encoding
+#ifndef __GLASGOW_HASKELL__
-- | Currently these functions are identical to their @CAString@ counterparts;
-- eventually they will use an encoding determined by the current locale.
+#else
+ -- | These functions are different from their @CAString@ counterparts
+ -- in that they will use an encoding determined by the current locale,
+ -- rather than always assuming ASCII.
+#endif
-- conversion of C strings into Haskell strings
--
@@ -102,10 +107,15 @@ import Foreign.Storable
import Data.Word
#ifdef __GLASGOW_HASKELL__
+import Control.Monad
+
import GHC.List
import GHC.Real
import GHC.Num
import GHC.Base
+
+import {-# SOURCE #-} GHC.IO.Encoding
+import qualified GHC.Foreign as GHC
#else
import Data.Char ( chr, ord )
#define unsafeChr chr
@@ -133,12 +143,20 @@ type CStringLen = (Ptr CChar, Int)
-- | Marshal a NUL terminated C string into a Haskell string.
--
peekCString :: CString -> IO String
+#ifndef __GLASGOW_HASKELL__
peekCString = peekCAString
+#else
+peekCString = GHC.peekCString foreignEncoding
+#endif
-- | Marshal a C string with explicit length into a Haskell string.
--
peekCStringLen :: CStringLen -> IO String
+#ifndef __GLASGOW_HASKELL__
peekCStringLen = peekCAStringLen
+#else
+peekCStringLen = GHC.peekCStringLen foreignEncoding
+#endif
-- | Marshal a Haskell string into a NUL terminated C string.
--
@@ -149,7 +167,11 @@ peekCStringLen = peekCAStringLen
-- 'Foreign.Marshal.Alloc.finalizerFree'.
--
newCString :: String -> IO CString
+#ifndef __GLASGOW_HASKELL__
newCString = newCAString
+#else
+newCString = GHC.newCString foreignEncoding
+#endif
-- | Marshal a Haskell string into a C string (ie, character array) with
-- explicit length information.
@@ -159,7 +181,11 @@ newCString = newCAString
-- 'Foreign.Marshal.Alloc.finalizerFree'.
--
newCStringLen :: String -> IO CStringLen
+#ifndef __GLASGOW_HASKELL__
newCStringLen = newCAStringLen
+#else
+newCStringLen = GHC.newCStringLen foreignEncoding
+#endif
-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
@@ -171,7 +197,11 @@ newCStringLen = newCAStringLen
-- storage must /not/ be used after this.
--
withCString :: String -> (CString -> IO a) -> IO a
+#ifndef __GLASGOW_HASKELL__
withCString = withCAString
+#else
+withCString = GHC.withCString foreignEncoding
+#endif
-- | Marshal a Haskell string into a C string (ie, character array)
-- in temporary storage, with explicit length information.
@@ -181,14 +211,26 @@ withCString = withCAString
-- storage must /not/ be used after this.
--
withCStringLen :: String -> (CStringLen -> IO a) -> IO a
+#ifndef __GLASGOW_HASKELL__
withCStringLen = withCAStringLen
+#else
+withCStringLen = GHC.withCStringLen foreignEncoding
+#endif
+
+#ifndef __GLASGOW_HASKELL__
-- | Determines whether a character can be accurately encoded in a 'CString'.
-- Unrepresentable characters are converted to @\'?\'@.
--
-- Currently only Latin-1 characters are representable.
charIsRepresentable :: Char -> IO Bool
charIsRepresentable c = return (ord c < 256)
+#else
+-- -- | Determines whether a character can be accurately encoded in a 'CString'.
+-- -- Unrepresentable characters are converted to '?' or their nearest visual equivalent.
+charIsRepresentable :: Char -> IO Bool
+charIsRepresentable = GHC.charIsRepresentable foreignEncoding
+#endif
-- single byte characters
-- ----------------------