diff options
-rw-r--r-- | libraries/compact/Data/Compact.hs | 89 | ||||
-rw-r--r-- | libraries/compact/Data/Compact/Internal.hs | 42 | ||||
-rw-r--r-- | libraries/compact/Data/Compact/Serialized.hs | 14 |
3 files changed, 110 insertions, 35 deletions
diff --git a/libraries/compact/Data/Compact.hs b/libraries/compact/Data/Compact.hs index 42245c54f5..ce6bf2bb83 100644 --- a/libraries/compact/Data/Compact.hs +++ b/libraries/compact/Data/Compact.hs @@ -15,10 +15,48 @@ -- Stability : unstable -- Portability : non-portable (GHC Extensions) -- --- This module provides a data structure, called a Compact, for --- holding fully evaluated data in a consecutive block of memory. +-- This module provides a data structure, called a 'Compact', for +-- holding immutable, fully evaluated data in a consecutive block of memory. +-- Compact regions are good for two things: -- --- /Since: 1.0.0/ +-- 1. Data in a compact region is not traversed during GC; any +-- incoming pointer to a compact region keeps the entire region +-- live. Thus, if you put a long-lived data structure in a compact +-- region, you may save a lot of cycles during major collections, +-- since you will no longer be (uselessly) retraversing this +-- data structure. +-- +-- 2. Because the data is stored contiguously, you can easily +-- dump the memory to disk and/or send it over the network. +-- For applications that are not bandwidth bound (GHC's heap +-- representation can be as much of a x4 expansion over a +-- binary serialization), this can lead to substantial speed ups. +-- +-- For example, suppose you have a function @loadBigStruct :: IO BigStruct@, +-- which loads a large data structure from the file system. First, +-- ensure that @BigStruct@ is immutable by defining an 'NFData' instance +-- for it. Then, you can "compact" the structure with the following code: +-- +-- @ +-- do r <- 'compact' =<< loadBigStruct +-- let x = 'getCompact' r :: BigStruct +-- -- Do things with x +-- @ +-- +-- Note that 'compact' will not preserve internal sharing; use +-- 'compactWithSharing' (which is 10x slower) if you have cycles and/or +-- must preserve sharing. The 'Compact' pointer @r@ can be used +-- to add more data to a compact region; see 'compactAdd' or +-- 'compactAddWithSharing'. +-- +-- The implementation of compact regions is described by: +-- +-- * Edward Z. Yang, Giovanni Campagna, Ömer Ağacan, Ahmed El-Hassany, Abhishek +-- Kulkarni, Ryan Newton. \"/Efficient communication and Collection with Compact +-- Normal Forms/\". In Proceedings of the 20th ACM SIGPLAN International +-- Conference on Functional Programming. September 2015. <http://ezyang.com/compact.html> +-- +-- This library is supported by GHC 8.2 and later. module Data.Compact ( -- * The Compact type @@ -47,15 +85,21 @@ import GHC.Types import Data.Compact.Internal as Internal --- | Retrieve the object that was stored in a 'Compact' +-- | Retrieve a direct pointer to the value pointed at by a 'Compact' reference. +-- If you have used 'compactAdd', there may be multiple 'Compact' references +-- into the same compact region. Upholds the property: +-- +-- > inCompact c (getCompact c) == True +-- getCompact :: Compact a -> a getCompact (Compact _ obj _) = obj -- | Compact a value. /O(size of unshared data)/ -- -- If the structure contains any internal sharing, the shared data --- will be duplicated during the compaction process. Loops if the --- structure contains cycles. +-- will be duplicated during the compaction process. This will +-- not terminate if the structure contains cycles (use 'compactWithSharing' +-- instead). -- -- The NFData constraint is just to ensure that the object contains no -- functions, 'compact' does not actually use it. If your object @@ -80,39 +124,56 @@ compact = Internal.compactSized 31268 False compactWithSharing :: NFData a => a -> IO (Compact a) compactWithSharing = Internal.compactSized 31268 True --- | Add a value to an existing 'Compact'. Behaves exactly like --- 'compact' with respect to sharing and the 'NFData' constraint. +-- | Add a value to an existing 'Compact'. This will help you avoid +-- copying when the value contains pointers into the compact region, +-- but remember that after compaction this value will only be deallocated +-- with the entire compact region. +-- +-- Behaves exactly like 'compact' with respect to sharing and the 'NFData' +-- constraint. +-- compactAdd :: NFData a => Compact b -> a -> IO (Compact a) compactAdd (Compact compact# _ lock) a = withMVar lock $ \_ -> IO $ \s -> case compactAdd# compact# a s of { (# s1, pk #) -> (# s1, Compact compact# pk lock #) } --- | Add a value to an existing 'Compact'. Behaves exactly like --- 'compactWithSharing' with respect to sharing and the 'NFData' --- constraint. +-- | Add a value to an existing 'Compact', like 'compactAdd', but +-- behaving exactly like 'compactWithSharing' with respect to +-- sharing and the 'NFData' constraint. +-- compactAddWithSharing :: NFData a => Compact b -> a -> IO (Compact a) compactAddWithSharing (Compact compact# _ lock) a = withMVar lock $ \_ -> IO $ \s -> case compactAddWithSharing# compact# a s of { (# s1, pk #) -> (# s1, Compact compact# pk lock #) } - --- | Check if the second argument is inside the 'Compact' +-- | Check if the second argument is inside the passed 'Compact'. +-- inCompact :: Compact b -> a -> IO Bool inCompact (Compact buffer _ _) !val = IO (\s -> case compactContains# buffer val s of (# s', v #) -> (# s', isTrue# v #) ) --- | Check if the argument is in any 'Compact' +-- | Check if the argument is in any 'Compact'. If true, the value in question +-- is also fully evaluated, since any value in a compact region must +-- be fully evaluated. +-- isCompact :: a -> IO Bool isCompact !val = IO (\s -> case compactContainsAny# val s of (# s', v #) -> (# s', isTrue# v #) ) +-- | Returns the size in bytes of the compact region. +-- compactSize :: Compact a -> IO Word compactSize (Compact buffer _ lock) = withMVar lock $ \_ -> IO $ \s0 -> case compactSize# buffer s0 of (# s1, sz #) -> (# s1, W# sz #) +-- | *Experimental.* This function doesn't actually resize a compact +-- region; rather, it changes the default block size which we allocate +-- when the current block runs out of space, and also appends a block +-- to the compact region. +-- compactResize :: Compact a -> Word -> IO () compactResize (Compact oldBuffer _ lock) (W# new_size) = withMVar lock $ \_ -> IO $ \s -> diff --git a/libraries/compact/Data/Compact/Internal.hs b/libraries/compact/Data/Compact/Internal.hs index 2780d19b1a..2857a9d615 100644 --- a/libraries/compact/Data/Compact/Internal.hs +++ b/libraries/compact/Data/Compact/Internal.hs @@ -13,11 +13,9 @@ -- Stability : unstable -- Portability : non-portable (GHC Extensions) -- --- This module provides a data structure, called a Compact, for --- holding fully evaluated data in a consecutive block of memory. --- --- This is a private implementation detail of the package and should --- not be imported directly. +-- This module provides some internal functions and representation for dealing +-- with compact regions, which may be of interest if you need higher +-- performance. -- -- /Since: 1.0.0/ @@ -50,13 +48,20 @@ import GHC.Types -- separate copy of the data. -- -- The cost of compaction is similar to the cost of GC for the same --- data, but it is perfomed only once. However, retainining internal --- sharing during the compaction process is very costly, so it is --- optional; there are two ways to create a 'Compact': 'compact' and --- 'compactWithSharing'. --- --- Data can be added to an existing 'Compact' with 'compactAdd' or --- 'compactAddWithSharing'. +-- data, but it is perfomed only once. However, because +-- "Data.Compact.compact" does not stop-the-world, retaining internal +-- sharing during the compaction process is very costly. The user +-- can choose wether to 'compact' or 'compactWithSharing'. +-- +-- When you have a @'Compact' a@, you can get a pointer to the actual object +-- in the region using "Data.Compact.getCompact". The 'Compact' type +-- serves as handle on the region itself; you can use this handle +-- to add data to a specific 'Compact' with 'compactAdd' or +-- 'compactAddWithSharing' (giving you a new handle which corresponds +-- to the same compact region, but points to the newly added object +-- in the region). At the moment, due to technical reasons, +-- it's not possible to get the @'Compact' a@ if you only have an @a@, +-- so make sure you hold on to the handle as necessary. -- -- Data in a compact doesn't ever move, so compacting data is also a -- way to pin arbitrary data structures in memory. @@ -70,8 +75,8 @@ import GHC.Types -- address (the address might be stored in a C data structure, for -- example), so we can't make a copy of it to store in the 'Compact'. -- --- * Mutable objects also cannot be compacted, because subsequent --- mutation would destroy the property that a compact is +-- * Objects with mutable pointer fields also cannot be compacted, +-- because subsequent mutation would destroy the property that a compact is -- self-contained. -- -- If compaction encounters any of the above, a 'CompactionFailed' @@ -83,6 +88,10 @@ data Compact a = Compact Compact# a (MVar ()) -- The MVar here is to enforce mutual exclusion among writers. -- Note: the MVar protects the Compact# only, not the pure value 'a' +-- | Make a new 'Compact' object, given a pointer to the true +-- underlying region. You must uphold the invariant that @a@ lives +-- in the compact region. +-- mkCompact :: Compact# -> a -> State# RealWorld -> (# State# RealWorld, Compact a #) mkCompact compact# a s = @@ -91,6 +100,11 @@ mkCompact compact# a s = where unIO (IO a) = a +-- | Transfer @a@ into a new compact region, with a preallocated size, +-- possibly preserving sharing or not. If you know how big the data +-- structure in question is, you can save time by picking an appropriate +-- block size for the compact region. +-- compactSized :: NFData a => Int -> Bool -> a -> IO (Compact a) compactSized (I# size) share a = IO $ \s0 -> case compactNew# (int2Word# size) s0 of { (# s1, compact# #) -> diff --git a/libraries/compact/Data/Compact/Serialized.hs b/libraries/compact/Data/Compact/Serialized.hs index bdc2aff733..bf2b4f7918 100644 --- a/libraries/compact/Data/Compact/Serialized.hs +++ b/libraries/compact/Data/Compact/Serialized.hs @@ -14,9 +14,6 @@ -- Stability : unstable -- Portability : non-portable (GHC Extensions) -- --- This module provides a data structure, called a Compact, for --- holding fully evaluated data in a consecutive block of memory. --- -- This module contains support for serializing a Compact for network -- transmission and on-disk storage. -- @@ -45,7 +42,7 @@ import Control.DeepSeq(NFData, force) import Data.Compact.Internal --- |A serialized version of the 'Compact' metadata (each block with +-- | A serialized version of the 'Compact' metadata (each block with -- address and size and the address of the root). This structure is -- meant to be sent alongside the actual 'Compact' data. It can be -- sent out of band in advance if the data is to be sent over RDMA @@ -58,7 +55,6 @@ data SerializedCompact a = SerializedCompact addrIsNull :: Addr# -> Bool addrIsNull addr = isTrue# (nullAddr# `eqAddr#` addr) - compactGetFirstBlock :: Compact# -> IO (Ptr a, Word) compactGetFirstBlock buffer = IO (\s -> case compactGetFirstBlock# buffer s of @@ -85,10 +81,11 @@ mkBlockList buffer = compactGetFirstBlock buffer >>= go -- before func had a chance to copy everything into its own -- buffers/sockets/whatever --- |Serialize the 'Compact', and call the provided function with +-- | Serialize the 'Compact', and call the provided function with -- with the 'Compact' serialized representation. The resulting -- action will be executed synchronously before this function -- completes. +-- {-# NOINLINE withSerializedCompact #-} withSerializedCompact :: NFData c => Compact a -> (SerializedCompact a -> IO c) -> IO c @@ -115,7 +112,7 @@ fixupPointers firstBlock rootAddr s = (# root #) -> case mkCompact buffer root s' of (# s'', c #) -> (# s'', Just c #) --- |Deserialize a 'SerializedCompact' into a in-memory 'Compact'. The +-- | Deserialize a 'SerializedCompact' into a in-memory 'Compact'. The -- provided function will be called with the address and size of each -- newly allocated block in succession, and should fill the memory -- from the external source (eg. by reading from a socket or from disk) @@ -191,6 +188,9 @@ sanityCheckByteStrings (SerializedCompact scl _) bsl = go scl bsl go ((_, size):scs) (bs:bss) = fromIntegral size == ByteString.length bs && go scs bss +-- | Convenience function for importing a compact region that is represented +-- by a list of strict 'ByteString's. +-- importCompactByteStrings :: SerializedCompact a -> [ByteString.ByteString] -> IO (Maybe (Compact a)) importCompactByteStrings serialized stringList = |