diff options
Diffstat (limited to 'libraries/ghc-compact/GHC/Compact.hs')
-rw-r--r-- | libraries/ghc-compact/GHC/Compact.hs | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/libraries/ghc-compact/GHC/Compact.hs b/libraries/ghc-compact/GHC/Compact.hs new file mode 100644 index 0000000000..e3efaf24bc --- /dev/null +++ b/libraries/ghc-compact/GHC/Compact.hs @@ -0,0 +1,264 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# OPTIONS_GHC -Wno-redundant-constraints -Wno-name-shadowing #-} + +----------------------------------------------------------------------------- +-- | +-- Module : GHC.Compact +-- Copyright : (c) The University of Glasgow 2001-2009 +-- (c) Giovanni Campagna <gcampagn@cs.stanford.edu> 2014 +-- License : BSD-style (see the file LICENSE) +-- +-- Maintainer : libraries@haskell.org +-- Stability : unstable +-- Portability : non-portable (GHC Extensions) +-- +-- This module provides a data structure, called a 'Compact', for +-- holding immutable, fully evaluated data in a consecutive block of memory. +-- Compact regions are good for two things: +-- +-- 1. Data in a compact region is not traversed during GC; any +-- incoming pointer to a compact region keeps the entire region +-- live. Thus, if you put a long-lived data structure in a compact +-- region, you may save a lot of cycles during major collections, +-- since you will no longer be (uselessly) retraversing this +-- data structure. +-- +-- 2. Because the data is stored contiguously, you can easily +-- dump the memory to disk and/or send it over the network. +-- For applications that are not bandwidth bound (GHC's heap +-- representation can be as much of a x4 expansion over a +-- binary serialization), this can lead to substantial speed ups. +-- +-- For example, suppose you have a function @loadBigStruct :: IO BigStruct@, +-- which loads a large data structure from the file system. You can "compact" +-- the structure with the following code: +-- +-- @ +-- do r <- 'compact' =<< loadBigStruct +-- let x = 'getCompact' r :: BigStruct +-- -- Do things with x +-- @ +-- +-- Note that 'compact' will not preserve internal sharing; use +-- 'compactWithSharing' (which is 10x slower) if you have cycles and/or +-- must preserve sharing. The 'Compact' pointer @r@ can be used +-- to add more data to a compact region; see 'compactAdd' or +-- 'compactAddWithSharing'. +-- +-- The implementation of compact regions is described by: +-- +-- * Edward Z. Yang, Giovanni Campagna, Ömer Ağacan, Ahmed El-Hassany, Abhishek +-- Kulkarni, Ryan Newton. \"/Efficient communication and Collection with Compact +-- Normal Forms/\". In Proceedings of the 20th ACM SIGPLAN International +-- Conference on Functional Programming. September 2015. <http://ezyang.com/compact.html> +-- +-- This library is supported by GHC 8.2 and later. + +module GHC.Compact ( + -- * The Compact type + Compact(..), + + -- * Compacting data + compact, + compactWithSharing, + compactAdd, + compactAddWithSharing, + + -- * Inspecting a Compact + getCompact, + inCompact, + isCompact, + compactSize, + + -- * Other utilities + compactResize, + + -- * Internal operations + mkCompact, + compactSized, + ) where + +import Control.Concurrent.MVar +import GHC.Prim +import GHC.Types + +-- | A 'Compact' contains fully evaluated, pure, immutable data. +-- +-- 'Compact' serves two purposes: +-- +-- * Data stored in a 'Compact' has no garbage collection overhead. +-- The garbage collector considers the whole 'Compact' to be alive +-- if there is a reference to any object within it. +-- +-- * A 'Compact' can be serialized, stored, and deserialized again. +-- The serialized data can only be deserialized by the exact binary +-- that created it, but it can be stored indefinitely before +-- deserialization. +-- +-- Compacts are self-contained, so compacting data involves copying +-- it; if you have data that lives in two 'Compact's, each will have a +-- separate copy of the data. +-- +-- The cost of compaction is similar to the cost of GC for the same +-- data, but it is perfomed only once. However, because +-- "Data.Compact.compact" does not stop-the-world, retaining internal +-- sharing during the compaction process is very costly. The user +-- can choose wether to 'compact' or 'compactWithSharing'. +-- +-- When you have a @'Compact' a@, you can get a pointer to the actual object +-- in the region using "Data.Compact.getCompact". The 'Compact' type +-- serves as handle on the region itself; you can use this handle +-- to add data to a specific 'Compact' with 'compactAdd' or +-- 'compactAddWithSharing' (giving you a new handle which corresponds +-- to the same compact region, but points to the newly added object +-- in the region). At the moment, due to technical reasons, +-- it's not possible to get the @'Compact' a@ if you only have an @a@, +-- so make sure you hold on to the handle as necessary. +-- +-- Data in a compact doesn't ever move, so compacting data is also a +-- way to pin arbitrary data structures in memory. +-- +-- There are some limitations on what can be compacted: +-- +-- * Functions. Compaction only applies to data. +-- +-- * Pinned 'ByteArray#' objects cannot be compacted. This is for a +-- good reason: the memory is pinned so that it can be referenced by +-- address (the address might be stored in a C data structure, for +-- example), so we can't make a copy of it to store in the 'Compact'. +-- +-- * Objects with mutable pointer fields also cannot be compacted, +-- because subsequent mutation would destroy the property that a compact is +-- self-contained. +-- +-- If compaction encounters any of the above, a 'CompactionFailed' +-- exception will be thrown by the compaction operation. +-- +data Compact a = Compact Compact# a (MVar ()) + -- we can *read* from a Compact without taking a lock, but only + -- one thread can be writing to the compact at any given time. + -- The MVar here is to enforce mutual exclusion among writers. + -- Note: the MVar protects the Compact# only, not the pure value 'a' + +-- | Make a new 'Compact' object, given a pointer to the true +-- underlying region. You must uphold the invariant that @a@ lives +-- in the compact region. +-- +mkCompact + :: Compact# -> a -> State# RealWorld -> (# State# RealWorld, Compact a #) +mkCompact compact# a s = + case unIO (newMVar ()) s of { (# s1, lock #) -> + (# s1, Compact compact# a lock #) } + where + unIO (IO a) = a + +-- | Transfer @a@ into a new compact region, with a preallocated size, +-- possibly preserving sharing or not. If you know how big the data +-- structure in question is, you can save time by picking an appropriate +-- block size for the compact region. +-- +compactSized :: Int -> Bool -> a -> IO (Compact a) +compactSized (I# size) share a = IO $ \s0 -> + case compactNew# (int2Word# size) s0 of { (# s1, compact# #) -> + case compactAddPrim compact# a s1 of { (# s2, pk #) -> + mkCompact compact# pk s2 }} + where + compactAddPrim + | share = compactAddWithSharing# + | otherwise = compactAdd# + +-- | Retrieve a direct pointer to the value pointed at by a 'Compact' reference. +-- If you have used 'compactAdd', there may be multiple 'Compact' references +-- into the same compact region. Upholds the property: +-- +-- > inCompact c (getCompact c) == True +-- +getCompact :: Compact a -> a +getCompact (Compact _ obj _) = obj + +-- | Compact a value. /O(size of unshared data)/ +-- +-- If the structure contains any internal sharing, the shared data +-- will be duplicated during the compaction process. This will +-- not terminate if the structure contains cycles (use 'compactWithSharing' +-- instead). +-- +-- The object in question must not contain any functions or mutable data; if it +-- does, 'compact' will raise an exception. In the future, we may add a type +-- class which will help statically check if this is the case or not. +-- +compact :: a -> IO (Compact a) +compact = compactSized 31268 False + +-- | Compact a value, retaining any internal sharing and +-- cycles. /O(size of data)/ +-- +-- This is typically about 10x slower than 'compact', because it works +-- by maintaining a hash table mapping uncompacted objects to +-- compacted objects. +-- +-- The object in question must not contain any functions or mutable data; if it +-- does, 'compact' will raise an exception. In the future, we may add a type +-- class which will help statically check if this is the case or not. +-- +compactWithSharing :: a -> IO (Compact a) +compactWithSharing = compactSized 31268 True + +-- | Add a value to an existing 'Compact'. This will help you avoid +-- copying when the value contains pointers into the compact region, +-- but remember that after compaction this value will only be deallocated +-- with the entire compact region. +-- +-- Behaves exactly like 'compact' with respect to sharing and what data +-- it accepts. +-- +compactAdd :: Compact b -> a -> IO (Compact a) +compactAdd (Compact compact# _ lock) a = withMVar lock $ \_ -> IO $ \s -> + case compactAdd# compact# a s of { (# s1, pk #) -> + (# s1, Compact compact# pk lock #) } + +-- | Add a value to an existing 'Compact', like 'compactAdd', +-- but behaving exactly like 'compactWithSharing' with respect to sharing and +-- what data it accepts. +-- +compactAddWithSharing :: Compact b -> a -> IO (Compact a) +compactAddWithSharing (Compact compact# _ lock) a = + withMVar lock $ \_ -> IO $ \s -> + case compactAddWithSharing# compact# a s of { (# s1, pk #) -> + (# s1, Compact compact# pk lock #) } + +-- | Check if the second argument is inside the passed 'Compact'. +-- +inCompact :: Compact b -> a -> IO Bool +inCompact (Compact buffer _ _) !val = + IO (\s -> case compactContains# buffer val s of + (# s', v #) -> (# s', isTrue# v #) ) + +-- | Check if the argument is in any 'Compact'. If true, the value in question +-- is also fully evaluated, since any value in a compact region must +-- be fully evaluated. +-- +isCompact :: a -> IO Bool +isCompact !val = + IO (\s -> case compactContainsAny# val s of + (# s', v #) -> (# s', isTrue# v #) ) + +-- | Returns the size in bytes of the compact region. +-- +compactSize :: Compact a -> IO Word +compactSize (Compact buffer _ lock) = withMVar lock $ \_ -> IO $ \s0 -> + case compactSize# buffer s0 of (# s1, sz #) -> (# s1, W# sz #) + +-- | *Experimental.* This function doesn't actually resize a compact +-- region; rather, it changes the default block size which we allocate +-- when the current block runs out of space, and also appends a block +-- to the compact region. +-- +compactResize :: Compact a -> Word -> IO () +compactResize (Compact oldBuffer _ lock) (W# new_size) = + withMVar lock $ \_ -> IO $ \s -> + case compactResize# oldBuffer new_size s of + s' -> (# s', () #) |