diff options
Diffstat (limited to 'libraries/base/GHC/Foreign.hs')
-rw-r--r-- | libraries/base/GHC/Foreign.hs | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/libraries/base/GHC/Foreign.hs b/libraries/base/GHC/Foreign.hs new file mode 100644 index 0000000000..ef64d48572 --- /dev/null +++ b/libraries/base/GHC/Foreign.hs @@ -0,0 +1,256 @@ +{-# LANGUAGE Trustworthy #-} +{-# LANGUAGE NoImplicitPrelude #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.Foreign +-- Copyright : (c) The University of Glasgow, 2008-2011 +-- License : see libraries/base/LICENSE +-- +-- Maintainer : libraries@haskell.org +-- Stability : internal +-- Portability : non-portable +-- +-- Foreign marshalling support for CStrings with configurable encodings +-- +----------------------------------------------------------------------------- + +module GHC.Foreign ( + -- * C strings with a configurable encoding + + -- conversion of C strings into Haskell strings + -- + peekCString, + peekCStringLen, + + -- conversion of Haskell strings into C strings + -- + newCString, + newCStringLen, + + -- conversion of Haskell strings into C strings using temporary storage + -- + withCString, + withCStringLen, + + charIsRepresentable, + ) where + +import Foreign.Marshal.Array +import Foreign.C.Types +import Foreign.Ptr +import Foreign.Storable + +import Data.Word + +-- Imports for the locale-encoding version of marshallers +import Control.Monad + +import Data.Tuple (fst) +import Data.Maybe + +import GHC.Show ( show ) + +import Foreign.Marshal.Alloc +import Foreign.ForeignPtr + +import GHC.Debug +import GHC.List +import GHC.Num +import GHC.Base + +import GHC.IO +import GHC.IO.Exception +import GHC.IO.Buffer +import GHC.IO.Encoding.Types + + +c_DEBUG_DUMP :: Bool +c_DEBUG_DUMP = False + +putDebugMsg :: String -> IO () +putDebugMsg | c_DEBUG_DUMP = debugLn + | otherwise = const (return ()) + + +-- These definitions are identical to those in Foreign.C.String, but copied in here to avoid a cycle: +type CString = Ptr CChar +type CStringLen = (Ptr CChar, Int) + +-- exported functions +-- ------------------ + +-- | Marshal a NUL terminated C string into a Haskell string. +-- +peekCString :: TextEncoding -> CString -> IO String +peekCString enc cp = do + sz <- lengthArray0 nUL cp + peekEncodedCString enc (cp, sz * cCharSize) + +-- | Marshal a C string with explicit length into a Haskell string. +-- +peekCStringLen :: TextEncoding -> CStringLen -> IO String +peekCStringLen = peekEncodedCString + +-- | Marshal a Haskell string into a NUL terminated C string. +-- +-- * the Haskell string may /not/ contain any NUL characters +-- +-- * new storage is allocated for the C string and must be +-- explicitly freed using 'Foreign.Marshal.Alloc.free' or +-- 'Foreign.Marshal.Alloc.finalizerFree'. +-- +newCString :: TextEncoding -> String -> IO CString +newCString enc = liftM fst . newEncodedCString enc True + +-- | Marshal a Haskell string into a C string (ie, character array) with +-- explicit length information. +-- +-- * new storage is allocated for the C string and must be +-- explicitly freed using 'Foreign.Marshal.Alloc.free' or +-- 'Foreign.Marshal.Alloc.finalizerFree'. +-- +newCStringLen :: TextEncoding -> String -> IO CStringLen +newCStringLen enc = newEncodedCString enc False + +-- | Marshal a Haskell string into a NUL terminated C string using temporary +-- storage. +-- +-- * the Haskell string may /not/ contain any NUL characters +-- +-- * the memory is freed when the subcomputation terminates (either +-- normally or via an exception), so the pointer to the temporary +-- storage must /not/ be used after this. +-- +withCString :: TextEncoding -> String -> (CString -> IO a) -> IO a +withCString enc s act = withEncodedCString enc True s $ \(cp, _sz) -> act cp + +-- | Marshal a Haskell string into a C string (ie, character array) +-- in temporary storage, with explicit length information. +-- +-- * the memory is freed when the subcomputation terminates (either +-- normally or via an exception), so the pointer to the temporary +-- storage must /not/ be used after this. +-- +withCStringLen :: TextEncoding -> String -> (CStringLen -> IO a) -> IO a +withCStringLen enc = withEncodedCString enc False + + +-- | Determines whether a character can be accurately encoded in a 'CString'. +-- +-- Pretty much anyone who uses this function is in a state of sin because +-- whether or not a character is encodable will, in general, depend on the +-- context in which it occurs. +charIsRepresentable :: TextEncoding -> Char -> IO Bool +charIsRepresentable enc c = withCString enc [c] (fmap (== [c]) . peekCString enc) `catchException` (\e -> let _ = e :: IOException in return False) + +-- auxiliary definitions +-- ---------------------- + +-- C's end of string character +nUL :: CChar +nUL = 0 + +-- Size of a CChar in bytes +cCharSize :: Int +cCharSize = sizeOf (undefined :: CChar) + + +{-# INLINE peekEncodedCString #-} +peekEncodedCString :: TextEncoding -- ^ Encoding of CString + -> CStringLen + -> IO String -- ^ String in Haskell terms +peekEncodedCString (TextEncoding { mkTextDecoder = mk_decoder }) (p, sz_bytes) + = bracket mk_decoder close $ \decoder -> do + let chunk_size = sz_bytes `max` 1 -- Decode buffer chunk size in characters: one iteration only for ASCII + from0 <- fmap (\fp -> bufferAdd sz_bytes (emptyBuffer fp sz_bytes ReadBuffer)) $ newForeignPtr_ (castPtr p) + to <- newCharBuffer chunk_size WriteBuffer + + let go iteration from = do + (why, from', to') <- encode decoder from to + if isEmptyBuffer from' + then + -- No input remaining: @why@ will be InputUnderflow, but we don't care + withBuffer to' $ peekArray (bufferElems to') + else do + -- Input remaining: what went wrong? + putDebugMsg ("peekEncodedCString: " ++ show iteration ++ " " ++ show why) + (from'', to'') <- case why of InvalidSequence -> recover decoder from' to' -- These conditions are equally bad because + InputUnderflow -> recover decoder from' to' -- they indicate malformed/truncated input + OutputUnderflow -> return (from', to') -- We will have more space next time round + putDebugMsg ("peekEncodedCString: from " ++ summaryBuffer from ++ " " ++ summaryBuffer from' ++ " " ++ summaryBuffer from'') + putDebugMsg ("peekEncodedCString: to " ++ summaryBuffer to ++ " " ++ summaryBuffer to' ++ " " ++ summaryBuffer to'') + to_chars <- withBuffer to'' $ peekArray (bufferElems to'') + fmap (to_chars++) $ go (iteration + 1) from'' + + go (0 :: Int) from0 + +{-# INLINE withEncodedCString #-} +withEncodedCString :: TextEncoding -- ^ Encoding of CString to create + -> Bool -- ^ Null-terminate? + -> String -- ^ String to encode + -> (CStringLen -> IO a) -- ^ Worker that can safely use the allocated memory + -> IO a +withEncodedCString (TextEncoding { mkTextEncoder = mk_encoder }) null_terminate s act + = bracket mk_encoder close $ \encoder -> withArrayLen s $ \sz p -> do + from <- fmap (\fp -> bufferAdd sz (emptyBuffer fp sz ReadBuffer)) $ newForeignPtr_ p + + let go iteration to_sz_bytes = do + putDebugMsg ("withEncodedCString: " ++ show iteration) + allocaBytes to_sz_bytes $ \to_p -> do + mb_res <- tryFillBufferAndCall encoder null_terminate from to_p to_sz_bytes act + case mb_res of + Nothing -> go (iteration + 1) (to_sz_bytes * 2) + Just res -> return res + + -- If the input string is ASCII, this value will ensure we only allocate once + go (0 :: Int) (cCharSize * (sz + 1)) + +{-# INLINE newEncodedCString #-} +newEncodedCString :: TextEncoding -- ^ Encoding of CString to create + -> Bool -- ^ Null-terminate? + -> String -- ^ String to encode + -> IO CStringLen +newEncodedCString (TextEncoding { mkTextEncoder = mk_encoder }) null_terminate s + = bracket mk_encoder close $ \encoder -> withArrayLen s $ \sz p -> do + from <- fmap (\fp -> bufferAdd sz (emptyBuffer fp sz ReadBuffer)) $ newForeignPtr_ p + + let go iteration to_p to_sz_bytes = do + putDebugMsg ("newEncodedCString: " ++ show iteration) + mb_res <- tryFillBufferAndCall encoder null_terminate from to_p to_sz_bytes return + case mb_res of + Nothing -> do + let to_sz_bytes' = to_sz_bytes * 2 + to_p' <- reallocBytes to_p to_sz_bytes' + go (iteration + 1) to_p' to_sz_bytes' + Just res -> return res + + -- If the input string is ASCII, this value will ensure we only allocate once + let to_sz_bytes = cCharSize * (sz + 1) + to_p <- mallocBytes to_sz_bytes + go (0 :: Int) to_p to_sz_bytes + + +tryFillBufferAndCall :: TextEncoder dstate -> Bool -> Buffer Char -> Ptr Word8 -> Int + -> (CStringLen -> IO a) -> IO (Maybe a) +tryFillBufferAndCall encoder null_terminate from0 to_p to_sz_bytes act = do + to_fp <- newForeignPtr_ to_p + go (0 :: Int) (from0, emptyBuffer to_fp to_sz_bytes WriteBuffer) + where + go iteration (from, to) = do + (why, from', to') <- encode encoder from to + putDebugMsg ("tryFillBufferAndCall: " ++ show iteration ++ " " ++ show why ++ " " ++ summaryBuffer from ++ " " ++ summaryBuffer from') + if isEmptyBuffer from' + then if null_terminate && bufferAvailable to' == 0 + then return Nothing -- We had enough for the string but not the terminator: ask the caller for more buffer + else do + -- Awesome, we had enough buffer + let bytes = bufferElems to' + withBuffer to' $ \to_ptr -> do + when null_terminate $ pokeElemOff to_ptr (bufR to') 0 + fmap Just $ act (castPtr to_ptr, bytes) -- NB: the length information is specified as being in *bytes* + else case why of -- We didn't consume all of the input + InputUnderflow -> recover encoder from' to' >>= go (iteration + 1) -- These conditions are equally bad + InvalidSequence -> recover encoder from' to' >>= go (iteration + 1) -- since the input was truncated/invalid + OutputUnderflow -> return Nothing -- Oops, out of buffer during decoding: ask the caller for more + |