summaryrefslogtreecommitdiff
path: root/compiler/utils/StringBuffer.lhs
diff options
context:
space:
mode:
authorSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
committerSimon Marlow <simonmar@microsoft.com>2006-04-07 02:05:11 +0000
commit0065d5ab628975892cea1ec7303f968c3338cbe1 (patch)
tree8e2afe0ab48ee33cf95009809d67c9649573ef92 /compiler/utils/StringBuffer.lhs
parent28a464a75e14cece5db40f2765a29348273ff2d2 (diff)
downloadhaskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz
Reorganisation of the source tree
Most of the other users of the fptools build system have migrated to Cabal, and with the move to darcs we can now flatten the source tree without losing history, so here goes. The main change is that the ghc/ subdir is gone, and most of what it contained is now at the top level. The build system now makes no pretense at being multi-project, it is just the GHC build system. No doubt this will break many things, and there will be a period of instability while we fix the dependencies. A straightforward build should work, but I haven't yet fixed binary/source distributions. Changes to the Building Guide will follow, too.
Diffstat (limited to 'compiler/utils/StringBuffer.lhs')
-rw-r--r--compiler/utils/StringBuffer.lhs240
1 files changed, 240 insertions, 0 deletions
diff --git a/compiler/utils/StringBuffer.lhs b/compiler/utils/StringBuffer.lhs
new file mode 100644
index 0000000000..e52e7e78da
--- /dev/null
+++ b/compiler/utils/StringBuffer.lhs
@@ -0,0 +1,240 @@
+%
+% (c) The University of Glasgow, 1997-2006
+%
+\section{String buffers}
+
+Buffers for scanning string input stored in external arrays.
+
+\begin{code}
+module StringBuffer
+ (
+ StringBuffer(..),
+ -- non-abstract for vs\/HaskellService
+
+ -- * Creation\/destruction
+ hGetStringBuffer,
+ hGetStringBufferBlock,
+ appendStringBuffers,
+ stringToStringBuffer,
+
+ -- * Inspection
+ nextChar,
+ currentChar,
+ prevChar,
+ atEnd,
+
+ -- * Moving and comparison
+ stepOn,
+ offsetBytes,
+ byteDiff,
+
+ -- * Conversion
+ lexemeToString,
+ lexemeToFastString,
+
+ -- * Parsing integers
+ parseInteger,
+ ) where
+
+#include "HsVersions.h"
+
+import Encoding
+import FastString ( FastString,mkFastString,mkFastStringBytes )
+
+import Foreign
+import System.IO ( hGetBuf, hFileSize,IOMode(ReadMode), hClose
+ , Handle, hTell )
+
+import GHC.Ptr ( Ptr(..) )
+import GHC.Exts
+import GHC.IOBase ( IO(..) )
+import GHC.Base ( unsafeChr )
+
+#if __GLASGOW_HASKELL__ >= 601
+import System.IO ( openBinaryFile )
+#else
+import IOExts ( openFileEx, IOModeEx(..) )
+#endif
+
+#if __GLASGOW_HASKELL__ < 601
+openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
+#endif
+
+-- -----------------------------------------------------------------------------
+-- The StringBuffer type
+
+-- |A StringBuffer is an internal pointer to a sized chunk of bytes.
+-- The bytes are intended to be *immutable*. There are pure
+-- operations to read the contents of a StringBuffer.
+--
+-- A StringBuffer may have a finalizer, depending on how it was
+-- obtained.
+--
+data StringBuffer
+ = StringBuffer {
+ buf :: {-# UNPACK #-} !(ForeignPtr Word8),
+ len :: {-# UNPACK #-} !Int, -- length
+ cur :: {-# UNPACK #-} !Int -- current pos
+ }
+ -- The buffer is assumed to be UTF-8 encoded, and furthermore
+ -- we add three '\0' bytes to the end as sentinels so that the
+ -- decoder doesn't have to check for overflow at every single byte
+ -- of a multibyte sequence.
+
+instance Show StringBuffer where
+ showsPrec _ s = showString "<stringbuffer("
+ . shows (len s) . showString "," . shows (cur s)
+ . showString ">"
+
+-- -----------------------------------------------------------------------------
+-- Creation / Destruction
+
+hGetStringBuffer :: FilePath -> IO StringBuffer
+hGetStringBuffer fname = do
+ h <- openBinaryFile fname ReadMode
+ size_i <- hFileSize h
+ let size = fromIntegral size_i
+ buf <- mallocForeignPtrArray (size+3)
+ withForeignPtr buf $ \ptr -> do
+ r <- if size == 0 then return 0 else hGetBuf h ptr size
+ hClose h
+ if (r /= size)
+ then ioError (userError "short read of file")
+ else do
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ return (StringBuffer buf size 0)
+
+hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
+hGetStringBufferBlock handle wanted
+ = do size_i <- hFileSize handle
+ offset_i <- hTell handle
+ let size = min wanted (fromIntegral $ size_i-offset_i)
+ buf <- mallocForeignPtrArray (size+3)
+ withForeignPtr buf $ \ptr ->
+ do r <- if size == 0 then return 0 else hGetBuf handle ptr size
+ if r /= size
+ then ioError (userError $ "short read of file: "++show(r,size,fromIntegral size_i,handle))
+ else do pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ return (StringBuffer buf size 0)
+
+appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
+appendStringBuffers sb1 sb2
+ = do newBuf <- mallocForeignPtrArray (size+3)
+ withForeignPtr newBuf $ \ptr ->
+ withForeignPtr (buf sb1) $ \sb1Ptr ->
+ withForeignPtr (buf sb2) $ \sb2Ptr ->
+ do copyArray (sb1Ptr `advancePtr` cur sb1) ptr (calcLen sb1)
+ copyArray (sb2Ptr `advancePtr` cur sb2) (ptr `advancePtr` cur sb1) (calcLen sb2)
+ pokeArray (ptr `advancePtr` size) [0,0,0]
+ return (StringBuffer newBuf size 0)
+ where calcLen sb = len sb - cur sb
+ size = calcLen sb1 + calcLen sb2
+
+stringToStringBuffer :: String -> IO StringBuffer
+stringToStringBuffer str = do
+ let size = utf8EncodedLength str
+ buf <- mallocForeignPtrArray (size+3)
+ withForeignPtr buf $ \ptr -> do
+ utf8EncodeString ptr str
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ return (StringBuffer buf size 0)
+
+-- -----------------------------------------------------------------------------
+-- Grab a character
+
+-- Getting our fingers dirty a little here, but this is performance-critical
+{-# INLINE nextChar #-}
+nextChar :: StringBuffer -> (Char,StringBuffer)
+nextChar (StringBuffer buf len (I# cur#)) =
+ inlinePerformIO $ do
+ withForeignPtr buf $ \(Ptr a#) -> do
+ case utf8DecodeChar# (a# `plusAddr#` cur#) of
+ (# c#, b# #) ->
+ let cur' = I# (b# `minusAddr#` a#) in
+ return (C# c#, StringBuffer buf len cur')
+
+currentChar :: StringBuffer -> Char
+currentChar = fst . nextChar
+
+prevChar :: StringBuffer -> Char -> Char
+prevChar (StringBuffer buf len 0) deflt = deflt
+prevChar (StringBuffer buf len cur) deflt =
+ inlinePerformIO $ do
+ withForeignPtr buf $ \p -> do
+ p' <- utf8PrevChar (p `plusPtr` cur)
+ return (fst (utf8DecodeChar p'))
+
+-- -----------------------------------------------------------------------------
+-- Moving
+
+stepOn :: StringBuffer -> StringBuffer
+stepOn s = snd (nextChar s)
+
+offsetBytes :: Int -> StringBuffer -> StringBuffer
+offsetBytes i s = s { cur = cur s + i }
+
+byteDiff :: StringBuffer -> StringBuffer -> Int
+byteDiff s1 s2 = cur s2 - cur s1
+
+atEnd :: StringBuffer -> Bool
+atEnd (StringBuffer _ l c) = l == c
+
+-- -----------------------------------------------------------------------------
+-- Conversion
+
+lexemeToString :: StringBuffer -> Int {-bytes-} -> String
+lexemeToString _ 0 = ""
+lexemeToString (StringBuffer buf _ cur) bytes =
+ inlinePerformIO $
+ withForeignPtr buf $ \ptr ->
+ utf8DecodeString (ptr `plusPtr` cur) bytes
+
+lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
+lexemeToFastString _ 0 = mkFastString ""
+lexemeToFastString (StringBuffer buf _ cur) len =
+ inlinePerformIO $
+ withForeignPtr buf $ \ptr ->
+ return $! mkFastStringBytes (ptr `plusPtr` cur) len
+
+-- -----------------------------------------------------------------------------
+-- Parsing integer strings in various bases
+
+byteOff :: StringBuffer -> Int -> Char
+byteOff (StringBuffer buf _ cur) i =
+ inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+ w <- peek (ptr `plusPtr` (cur+i))
+ return (unsafeChr (fromIntegral (w::Word8)))
+
+-- | XXX assumes ASCII digits only
+parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
+parseInteger buf len radix to_int
+ = go 0 0
+ where go i x | i == len = x
+ | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i)))
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+-- Just like unsafePerformIO, but we inline it.
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+
+#if __GLASGOW_HASKELL__ < 600
+mallocForeignPtrArray :: Storable a => Int -> IO (ForeignPtr a)
+mallocForeignPtrArray = doMalloc undefined
+ where
+ doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b)
+ doMalloc dummy size = mallocForeignPtrBytes (size * sizeOf dummy)
+
+mallocForeignPtrBytes :: Int -> IO (ForeignPtr a)
+mallocForeignPtrBytes n = do
+ r <- mallocBytes n
+ newForeignPtr r (finalizerFree r)
+
+foreign import ccall unsafe "stdlib.h free"
+ finalizerFree :: Ptr a -> IO ()
+#endif
+\end{code}