diff options
author | Phil Ruffwind <rf@rufflewind.com> | 2016-12-17 18:09:06 -0500 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2016-12-17 18:09:41 -0500 |
commit | 21dde8126d615a082648c916a3e20d9878f22517 (patch) | |
tree | 5ed1653b1d26c1fda64851f33ea800d511f65a66 /compiler/utils | |
parent | 90cfa84981ee1f9fb756a3af1bd707674c18c034 (diff) | |
download | haskell-21dde8126d615a082648c916a3e20d9878f22517.tar.gz |
Improve StringBuffer and FastString docs
This area of code contains a lot of unsafe functionality, so it might be
worth documenting to reduce the risk of misuse.
Test Plan: inspection
Reviewers: austin, bgamari
Reviewed By: bgamari
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D2872
Diffstat (limited to 'compiler/utils')
-rw-r--r-- | compiler/utils/FastString.hs | 13 | ||||
-rw-r--r-- | compiler/utils/StringBuffer.hs | 42 |
2 files changed, 51 insertions, 4 deletions
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs index 8f76584875..8d1bbb5c67 100644 --- a/compiler/utils/FastString.hs +++ b/compiler/utils/FastString.hs @@ -23,6 +23,10 @@ -- * Outputing them is fast. -- * Generated by 'sLit'. -- * Turn into 'Outputable.SDoc' with 'Outputable.ptext' +-- * Requires manual memory management. +-- Improper use may lead to memory leaks or dangling pointers. +-- * It assumes Latin-1 as the encoding, therefore it cannot represent +-- arbitrary Unicode strings. -- -- Use 'LitString' unless you want the facilities of 'FastString'. module FastString @@ -560,14 +564,19 @@ hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs -- ----------------------------------------------------------------------------- -- LitStrings, here for convenience only. +-- | A 'LitString' is a pointer to some null-terminated array of bytes. type LitString = Ptr Word8 --Why do we recalculate length every time it's requested? --If it's commonly needed, we should perhaps have --data LitString = LitString {-#UNPACK#-}!Addr# {-#UNPACK#-}!Int# +-- | Wrap an unboxed address into a 'LitString'. mkLitString# :: Addr# -> LitString mkLitString# a# = Ptr a# +-- | Encode a 'String' into a newly allocated 'LitString' using Latin-1 +-- encoding. The original string must not contain non-Latin-1 characters +-- (above codepoint @0xff@). {-# INLINE mkLitString #-} mkLitString :: String -> LitString mkLitString s = @@ -583,9 +592,13 @@ mkLitString s = return p ) +-- | Decode a 'LitString' back into a 'String' using Latin-1 encoding. +-- This does not free the memory associated with 'LitString'. unpackLitString :: LitString -> String unpackLitString (Ptr p) = unpackCString# p +-- | Compute the length of a 'LitString', which must necessarily be +-- null-terminated. lengthLS :: LitString -> Int lengthLS = ptrStrLength diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs index 7da9f6c22d..bac752ac28 100644 --- a/compiler/utils/StringBuffer.hs +++ b/compiler/utils/StringBuffer.hs @@ -89,6 +89,8 @@ instance Show StringBuffer where -- ----------------------------------------------------------------------------- -- Creation / Destruction +-- | Read a file into a 'StringBuffer'. The resulting buffer is automatically +-- managed by the garbage collector. hGetStringBuffer :: FilePath -> IO StringBuffer hGetStringBuffer fname = do h <- openBinaryFile fname ReadMode @@ -161,6 +163,8 @@ appendStringBuffers sb1 sb2 calcLen sb = len sb - cur sb size = sb1_len + sb2_len +-- | Encode a 'String' into a 'StringBuffer' as UTF-8. The resulting buffer +-- is automatically managed by the garbage collector. stringToStringBuffer :: String -> StringBuffer stringToStringBuffer str = unsafePerformIO $ do @@ -175,10 +179,15 @@ stringToStringBuffer str = -- ----------------------------------------------------------------------------- -- Grab a character --- Getting our fingers dirty a little here, but this is performance-critical +-- | Return the first UTF-8 character of a nonempty 'StringBuffer' and as well +-- the remaining portion (analogous to 'Data.List.uncons'). __Warning:__ The +-- behavior is undefined if the 'StringBuffer' is empty. The result shares +-- the same buffer as the original. Similar to 'utf8DecodeChar', if the +-- character cannot be decoded as UTF-8, '\0' is returned. {-# INLINE nextChar #-} nextChar :: StringBuffer -> (Char,StringBuffer) nextChar (StringBuffer buf len (I# cur#)) = + -- Getting our fingers dirty a little here, but this is performance-critical inlinePerformIO $ do withForeignPtr buf $ \(Ptr a#) -> do case utf8DecodeChar# (a# `plusAddr#` cur#) of @@ -186,6 +195,10 @@ nextChar (StringBuffer buf len (I# cur#)) = let cur' = I# (cur# +# nBytes#) in return (C# c#, StringBuffer buf len cur') +-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous +-- to 'Data.List.head'). __Warning:__ The behavior is undefined if the +-- 'StringBuffer' is empty. Similar to 'utf8DecodeChar', if the character +-- cannot be decoded as UTF-8, '\0' is returned. currentChar :: StringBuffer -> Char currentChar = fst . nextChar @@ -200,29 +213,50 @@ prevChar (StringBuffer buf _ cur) _ = -- ----------------------------------------------------------------------------- -- Moving +-- | Return a 'StringBuffer' with the first UTF-8 character removed (analogous +-- to 'Data.List.tail'). __Warning:__ The behavior is undefined if the +-- 'StringBuffer' is empty. The result shares the same buffer as the +-- original. stepOn :: StringBuffer -> StringBuffer stepOn s = snd (nextChar s) -offsetBytes :: Int -> StringBuffer -> StringBuffer +-- | Return a 'StringBuffer' with the first @n@ bytes removed. __Warning:__ +-- If there aren't enough characters, the returned 'StringBuffer' will be +-- invalid and any use of it may lead to undefined behavior. The result +-- shares the same buffer as the original. +offsetBytes :: Int -- ^ @n@, the number of bytes + -> StringBuffer + -> StringBuffer offsetBytes i s = s { cur = cur s + i } +-- | Compute the difference in offset between two 'StringBuffer's that share +-- the same buffer. __Warning:__ The behavior is undefined if the +-- 'StringBuffer's use separate buffers. byteDiff :: StringBuffer -> StringBuffer -> Int byteDiff s1 s2 = cur s2 - cur s1 +-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null'). atEnd :: StringBuffer -> Bool atEnd (StringBuffer _ l c) = l == c -- ----------------------------------------------------------------------------- -- Conversion -lexemeToString :: StringBuffer -> Int {-bytes-} -> String +-- | Decode the first @n@ bytes of a 'StringBuffer' as UTF-8 into a 'String'. +-- Similar to 'utf8DecodeChar', if the character cannot be decoded as UTF-8, +-- they will be replaced with '\0'. +lexemeToString :: StringBuffer + -> Int -- ^ @n@, the number of bytes + -> String lexemeToString _ 0 = "" lexemeToString (StringBuffer buf _ cur) bytes = inlinePerformIO $ withForeignPtr buf $ \ptr -> utf8DecodeString (ptr `plusPtr` cur) bytes -lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString +lexemeToFastString :: StringBuffer + -> Int -- ^ @n@, the number of bytes + -> FastString lexemeToFastString _ 0 = nilFS lexemeToFastString (StringBuffer buf _ cur) len = inlinePerformIO $ |