summaryrefslogtreecommitdiff
path: root/libraries/compact/Data/Compact.hs
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/compact/Data/Compact.hs')
-rw-r--r--libraries/compact/Data/Compact.hs89
1 files changed, 75 insertions, 14 deletions
diff --git a/libraries/compact/Data/Compact.hs b/libraries/compact/Data/Compact.hs
index 42245c54f5..ce6bf2bb83 100644
--- a/libraries/compact/Data/Compact.hs
+++ b/libraries/compact/Data/Compact.hs
@@ -15,10 +15,48 @@
-- Stability : unstable
-- Portability : non-portable (GHC Extensions)
--
--- This module provides a data structure, called a Compact, for
--- holding fully evaluated data in a consecutive block of memory.
+-- This module provides a data structure, called a 'Compact', for
+-- holding immutable, fully evaluated data in a consecutive block of memory.
+-- Compact regions are good for two things:
--
--- /Since: 1.0.0/
+-- 1. Data in a compact region is not traversed during GC; any
+-- incoming pointer to a compact region keeps the entire region
+-- live. Thus, if you put a long-lived data structure in a compact
+-- region, you may save a lot of cycles during major collections,
+-- since you will no longer be (uselessly) retraversing this
+-- data structure.
+--
+-- 2. Because the data is stored contiguously, you can easily
+-- dump the memory to disk and/or send it over the network.
+-- For applications that are not bandwidth bound (GHC's heap
+-- representation can be as much of a x4 expansion over a
+-- binary serialization), this can lead to substantial speed ups.
+--
+-- For example, suppose you have a function @loadBigStruct :: IO BigStruct@,
+-- which loads a large data structure from the file system. First,
+-- ensure that @BigStruct@ is immutable by defining an 'NFData' instance
+-- for it. Then, you can "compact" the structure with the following code:
+--
+-- @
+-- do r <- 'compact' =<< loadBigStruct
+-- let x = 'getCompact' r :: BigStruct
+-- -- Do things with x
+-- @
+--
+-- Note that 'compact' will not preserve internal sharing; use
+-- 'compactWithSharing' (which is 10x slower) if you have cycles and/or
+-- must preserve sharing. The 'Compact' pointer @r@ can be used
+-- to add more data to a compact region; see 'compactAdd' or
+-- 'compactAddWithSharing'.
+--
+-- The implementation of compact regions is described by:
+--
+-- * Edward Z. Yang, Giovanni Campagna, Ömer Ağacan, Ahmed El-Hassany, Abhishek
+-- Kulkarni, Ryan Newton. \"/Efficient communication and Collection with Compact
+-- Normal Forms/\". In Proceedings of the 20th ACM SIGPLAN International
+-- Conference on Functional Programming. September 2015. <http://ezyang.com/compact.html>
+--
+-- This library is supported by GHC 8.2 and later.
module Data.Compact (
-- * The Compact type
@@ -47,15 +85,21 @@ import GHC.Types
import Data.Compact.Internal as Internal
--- | Retrieve the object that was stored in a 'Compact'
+-- | Retrieve a direct pointer to the value pointed at by a 'Compact' reference.
+-- If you have used 'compactAdd', there may be multiple 'Compact' references
+-- into the same compact region. Upholds the property:
+--
+-- > inCompact c (getCompact c) == True
+--
getCompact :: Compact a -> a
getCompact (Compact _ obj _) = obj
-- | Compact a value. /O(size of unshared data)/
--
-- If the structure contains any internal sharing, the shared data
--- will be duplicated during the compaction process. Loops if the
--- structure contains cycles.
+-- will be duplicated during the compaction process. This will
+-- not terminate if the structure contains cycles (use 'compactWithSharing'
+-- instead).
--
-- The NFData constraint is just to ensure that the object contains no
-- functions, 'compact' does not actually use it. If your object
@@ -80,39 +124,56 @@ compact = Internal.compactSized 31268 False
compactWithSharing :: NFData a => a -> IO (Compact a)
compactWithSharing = Internal.compactSized 31268 True
--- | Add a value to an existing 'Compact'. Behaves exactly like
--- 'compact' with respect to sharing and the 'NFData' constraint.
+-- | Add a value to an existing 'Compact'. This will help you avoid
+-- copying when the value contains pointers into the compact region,
+-- but remember that after compaction this value will only be deallocated
+-- with the entire compact region.
+--
+-- Behaves exactly like 'compact' with respect to sharing and the 'NFData'
+-- constraint.
+--
compactAdd :: NFData a => Compact b -> a -> IO (Compact a)
compactAdd (Compact compact# _ lock) a = withMVar lock $ \_ -> IO $ \s ->
case compactAdd# compact# a s of { (# s1, pk #) ->
(# s1, Compact compact# pk lock #) }
--- | Add a value to an existing 'Compact'. Behaves exactly like
--- 'compactWithSharing' with respect to sharing and the 'NFData'
--- constraint.
+-- | Add a value to an existing 'Compact', like 'compactAdd', but
+-- behaving exactly like 'compactWithSharing' with respect to
+-- sharing and the 'NFData' constraint.
+--
compactAddWithSharing :: NFData a => Compact b -> a -> IO (Compact a)
compactAddWithSharing (Compact compact# _ lock) a =
withMVar lock $ \_ -> IO $ \s ->
case compactAddWithSharing# compact# a s of { (# s1, pk #) ->
(# s1, Compact compact# pk lock #) }
-
--- | Check if the second argument is inside the 'Compact'
+-- | Check if the second argument is inside the passed 'Compact'.
+--
inCompact :: Compact b -> a -> IO Bool
inCompact (Compact buffer _ _) !val =
IO (\s -> case compactContains# buffer val s of
(# s', v #) -> (# s', isTrue# v #) )
--- | Check if the argument is in any 'Compact'
+-- | Check if the argument is in any 'Compact'. If true, the value in question
+-- is also fully evaluated, since any value in a compact region must
+-- be fully evaluated.
+--
isCompact :: a -> IO Bool
isCompact !val =
IO (\s -> case compactContainsAny# val s of
(# s', v #) -> (# s', isTrue# v #) )
+-- | Returns the size in bytes of the compact region.
+--
compactSize :: Compact a -> IO Word
compactSize (Compact buffer _ lock) = withMVar lock $ \_ -> IO $ \s0 ->
case compactSize# buffer s0 of (# s1, sz #) -> (# s1, W# sz #)
+-- | *Experimental.* This function doesn't actually resize a compact
+-- region; rather, it changes the default block size which we allocate
+-- when the current block runs out of space, and also appends a block
+-- to the compact region.
+--
compactResize :: Compact a -> Word -> IO ()
compactResize (Compact oldBuffer _ lock) (W# new_size) =
withMVar lock $ \_ -> IO $ \s ->