Improve StringBuffer and FastString docs

This area of code contains a lot of unsafe functionality, so it might be worth documenting to reduce the risk of misuse. Test Plan: inspection Reviewers: austin, bgamari Reviewed By: bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2872
author: Phil Ruffwind <rf@rufflewind.com> 2016-12-17 18:09:06 -0500
committer: Ben Gamari <ben@smart-cactus.org> 2016-12-17 18:09:41 -0500
commit: 21dde8126d615a082648c916a3e20d9878f22517 (patch)
tree: 5ed1653b1d26c1fda64851f33ea800d511f65a66 /compiler/utils
parent: 90cfa84981ee1f9fb756a3af1bd707674c18c034 (diff)
download: haskell-21dde8126d615a082648c916a3e20d9878f22517.tar.gz
2 files changed, 51 insertions, 4 deletions
diff --git a/compiler/utils/FastString.hs b/compiler/utils/FastString.hs
index 8f76584875..8d1bbb5c67 100644
--- a/compiler/utils/FastString.hs
+++ b/compiler/utils/FastString.hs
@@ -23,6 +23,10 @@
 --   * Outputing them is fast.
 --   * Generated by 'sLit'.
 --   * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
+--   * Requires manual memory management.
+--     Improper use may lead to memory leaks or dangling pointers.
+--   * It assumes Latin-1 as the encoding, therefore it cannot represent
+--     arbitrary Unicode strings.
 --
 -- Use 'LitString' unless you want the facilities of 'FastString'.
 module FastString
@@ -560,14 +564,19 @@ hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs
 -- -----------------------------------------------------------------------------
 -- LitStrings, here for convenience only.
 
+-- | A 'LitString' is a pointer to some null-terminated array of bytes.
 type LitString = Ptr Word8
 --Why do we recalculate length every time it's requested?
 --If it's commonly needed, we should perhaps have
 --data LitString = LitString {-#UNPACK#-}!Addr# {-#UNPACK#-}!Int#
 
+-- | Wrap an unboxed address into a 'LitString'.
 mkLitString# :: Addr# -> LitString
 mkLitString# a# = Ptr a#
 
+-- | Encode a 'String' into a newly allocated 'LitString' using Latin-1
+-- encoding.  The original string must not contain non-Latin-1 characters
+-- (above codepoint @0xff@).
 {-# INLINE mkLitString #-}
 mkLitString :: String -> LitString
 mkLitString s =
@@ -583,9 +592,13 @@ mkLitString s =
    return p
  )
 
+-- | Decode a 'LitString' back into a 'String' using Latin-1 encoding.
+-- This does not free the memory associated with 'LitString'.
 unpackLitString :: LitString -> String
 unpackLitString (Ptr p) = unpackCString# p
 
+-- | Compute the length of a 'LitString', which must necessarily be
+-- null-terminated.
 lengthLS :: LitString -> Int
 lengthLS = ptrStrLength
 
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 7da9f6c22d..bac752ac28 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -89,6 +89,8 @@ instance Show StringBuffer where
 -- -----------------------------------------------------------------------------
 -- Creation / Destruction
 
+-- | Read a file into a 'StringBuffer'.  The resulting buffer is automatically
+-- managed by the garbage collector.
 hGetStringBuffer :: FilePath -> IO StringBuffer
 hGetStringBuffer fname = do
    h <- openBinaryFile fname ReadMode
@@ -161,6 +163,8 @@ appendStringBuffers sb1 sb2
           calcLen sb = len sb - cur sb
           size =  sb1_len + sb2_len
 
+-- | Encode a 'String' into a 'StringBuffer' as UTF-8.  The resulting buffer
+-- is automatically managed by the garbage collector.
 stringToStringBuffer :: String -> StringBuffer
 stringToStringBuffer str =
  unsafePerformIO $ do
@@ -175,10 +179,15 @@ stringToStringBuffer str =
 -- -----------------------------------------------------------------------------
 -- Grab a character
 
--- Getting our fingers dirty a little here, but this is performance-critical
+-- | Return the first UTF-8 character of a nonempty 'StringBuffer' and as well
+-- the remaining portion (analogous to 'Data.List.uncons').  __Warning:__ The
+-- behavior is undefined if the 'StringBuffer' is empty.  The result shares
+-- the same buffer as the original.  Similar to 'utf8DecodeChar', if the
+-- character cannot be decoded as UTF-8, '\0' is returned.
 {-# INLINE nextChar #-}
 nextChar :: StringBuffer -> (Char,StringBuffer)
 nextChar (StringBuffer buf len (I# cur#)) =
+  -- Getting our fingers dirty a little here, but this is performance-critical
   inlinePerformIO $ do
     withForeignPtr buf $ \(Ptr a#) -> do
         case utf8DecodeChar# (a# `plusAddr#` cur#) of
@@ -186,6 +195,10 @@ nextChar (StringBuffer buf len (I# cur#)) =
              let cur' = I# (cur# +# nBytes#) in
              return (C# c#, StringBuffer buf len cur')
 
+-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
+-- to 'Data.List.head').  __Warning:__ The behavior is undefined if the
+-- 'StringBuffer' is empty.  Similar to 'utf8DecodeChar', if the character
+-- cannot be decoded as UTF-8, '\0' is returned.
 currentChar :: StringBuffer -> Char
 currentChar = fst . nextChar
 
@@ -200,29 +213,50 @@ prevChar (StringBuffer buf _   cur) _     =
 -- -----------------------------------------------------------------------------
 -- Moving
 
+-- | Return a 'StringBuffer' with the first UTF-8 character removed (analogous
+-- to 'Data.List.tail').  __Warning:__ The behavior is undefined if the
+-- 'StringBuffer' is empty.  The result shares the same buffer as the
+-- original.
 stepOn :: StringBuffer -> StringBuffer
 stepOn s = snd (nextChar s)
 
-offsetBytes :: Int -> StringBuffer -> StringBuffer
+-- | Return a 'StringBuffer' with the first @n@ bytes removed.  __Warning:__
+-- If there aren't enough characters, the returned 'StringBuffer' will be
+-- invalid and any use of it may lead to undefined behavior.  The result
+-- shares the same buffer as the original.
+offsetBytes :: Int                      -- ^ @n@, the number of bytes
+            -> StringBuffer
+            -> StringBuffer
 offsetBytes i s = s { cur = cur s + i }
 
+-- | Compute the difference in offset between two 'StringBuffer's that share
+-- the same buffer.  __Warning:__ The behavior is undefined if the
+-- 'StringBuffer's use separate buffers.
 byteDiff :: StringBuffer -> StringBuffer -> Int
 byteDiff s1 s2 = cur s2 - cur s1
 
+-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null').
 atEnd :: StringBuffer -> Bool
 atEnd (StringBuffer _ l c) = l == c
 
 -- -----------------------------------------------------------------------------
 -- Conversion
 
-lexemeToString :: StringBuffer -> Int {-bytes-} -> String
+-- | Decode the first @n@ bytes of a 'StringBuffer' as UTF-8 into a 'String'.
+-- Similar to 'utf8DecodeChar', if the character cannot be decoded as UTF-8,
+-- they will be replaced with '\0'.
+lexemeToString :: StringBuffer
+               -> Int                   -- ^ @n@, the number of bytes
+               -> String
 lexemeToString _ 0 = ""
 lexemeToString (StringBuffer buf _ cur) bytes =
   inlinePerformIO $
     withForeignPtr buf $ \ptr ->
       utf8DecodeString (ptr `plusPtr` cur) bytes
 
-lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
+lexemeToFastString :: StringBuffer
+                   -> Int               -- ^ @n@, the number of bytes
+                   -> FastString
 lexemeToFastString _ 0 = nilFS
 lexemeToFastString (StringBuffer buf _ cur) len =
    inlinePerformIO $
author	Phil Ruffwind <rf@rufflewind.com>	2016-12-17 18:09:06 -0500
committer	Ben Gamari <ben@smart-cactus.org>	2016-12-17 18:09:41 -0500
commit	21dde8126d615a082648c916a3e20d9878f22517 (patch)
tree	5ed1653b1d26c1fda64851f33ea800d511f65a66 /compiler/utils
parent	90cfa84981ee1f9fb756a3af1bd707674c18c034 (diff)
download	haskell-21dde8126d615a082648c916a3e20d9878f22517.tar.gz