summaryrefslogtreecommitdiff
path: root/compiler/utils
diff options
context:
space:
mode:
authorAustin Seipp <austin@well-typed.com>2015-08-23 20:08:04 +0200
committerThomas Miedema <thomasmiedema@gmail.com>2015-08-23 20:08:04 +0200
commit4e9a41e1ba29afc231f61cee53fce1b04ed52370 (patch)
tree194b3a7555d4036afaf9a70880ee26073c8e2113 /compiler/utils
parent98f8c9e597bc54b16f588a4641d8fe3bad36c7bb (diff)
downloadhaskell-wip/D1159.tar.gz
compiler/iface: compress .hi fileswip/D1159
Summary: Compress all interface files generated by the compiler with LZ4. While having an extremely small amount of code, LZ4 is both very fast at compression and decompression while having quite good space saving properties. Non-scientific size test: size of stage2 compiler .hi files: `find ./compiler/stage2 -type f -iname '*.hi' -exec du -ch {} + | grep total$` Without this patch: 22MB of .hi files for stage2. With this patch: 9.2MB of .hi files for stage2. Signed-off-by: Austin Seipp <austin@well-typed.com> Test Plan: I ran `./validate` Reviewers: hvr, bgamari, thomie Subscribers: duncan Differential Revision: https://phabricator.haskell.org/D1159
Diffstat (limited to 'compiler/utils')
-rw-r--r--compiler/utils/Binary.hs37
-rw-r--r--compiler/utils/LZ4.hs141
2 files changed, 159 insertions, 19 deletions
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs
index 8f0d8e50dc..e06891da11 100644
--- a/compiler/utils/Binary.hs
+++ b/compiler/utils/Binary.hs
@@ -70,10 +70,12 @@ import FastMutInt
import Fingerprint
import BasicTypes
import SrcLoc
+import qualified LZ4 as LZ4
import Foreign
import Data.Array
import Data.ByteString (ByteString)
+import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Unsafe as BS
import Data.IORef
@@ -81,8 +83,7 @@ import Data.Char ( ord, chr )
import Data.Time
import Data.Typeable
import Data.Typeable.Internal
-import Control.Monad ( when )
-import System.IO as IO
+import Control.Monad ( when, liftM )
import System.IO.Unsafe ( unsafeInterleaveIO )
import System.IO.Error ( mkIOError, eofErrorType )
import GHC.Real ( Ratio(..) )
@@ -184,29 +185,27 @@ isEOFBin (BinMem _ ix_r sz_r _) = do
writeBinMem :: BinHandle -> FilePath -> IO ()
writeBinMem (BinMem _ ix_r _ arr_r) fn = do
- h <- openBinaryFile fn WriteMode
arr <- readIORef arr_r
ix <- readFastMutInt ix_r
- withForeignPtr arr $ \p -> hPutBuf h p ix
- hClose h
+ let bs = LZ4.compress $ BS.fromForeignPtr arr 0 ix
+ case bs of
+ Nothing -> error "Binary.writeBinMem: compression failed"
+ Just x -> B.writeFile fn x
readBinMem :: FilePath -> IO BinHandle
-- Return a BinHandle with a totally undefined State
readBinMem filename = do
- h <- openBinaryFile filename ReadMode
- filesize' <- hFileSize h
- let filesize = fromIntegral filesize'
- arr <- mallocForeignPtrBytes (filesize*2)
- count <- withForeignPtr arr $ \p -> hGetBuf h p filesize
- when (count /= filesize) $
- error ("Binary.readBinMem: only read " ++ show count ++ " bytes")
- hClose h
- arr_r <- newIORef arr
- ix_r <- newFastMutInt
- writeFastMutInt ix_r 0
- sz_r <- newFastMutInt
- writeFastMutInt sz_r filesize
- return (BinMem noUserData ix_r sz_r arr_r)
+ bs <- liftM LZ4.decompress (B.readFile filename)
+ case bs of
+ Nothing -> error "Binary.readBinMem: decompression failed"
+ Just x -> do
+ let (arr, ix, size) = BS.toForeignPtr x
+ arr_r <- newIORef arr
+ ix_r <- newFastMutInt
+ writeFastMutInt ix_r ix
+ sz_r <- newFastMutInt
+ writeFastMutInt sz_r size
+ return (BinMem noUserData ix_r sz_r arr_r)
fingerprintBinMem :: BinHandle -> IO Fingerprint
fingerprintBinMem (BinMem _ ix_r _ arr_r) = do
diff --git a/compiler/utils/LZ4.hs b/compiler/utils/LZ4.hs
new file mode 100644
index 0000000000..337b415227
--- /dev/null
+++ b/compiler/utils/LZ4.hs
@@ -0,0 +1,141 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+-- |
+-- Module : LZ4
+-- Copyright : (c) Mark Wotton, Austin Seipp 2012-2015
+-- License : BSD3
+--
+-- Compression utilities (currently utilizing @LZ4 r127@).
+--
+module LZ4
+ ( compress -- :: S.ByteString -> S.ByteString
+ , decompress -- :: S.ByteString -> Maybe S.ByteString
+ ) where
+
+import Prelude hiding (max)
+import Data.Word
+import Data.Bits
+import Foreign.Ptr
+import Foreign.C
+--import Foreign.Marshal.Alloc
+import System.IO.Unsafe (unsafePerformIO)
+#if __GLASGOW_HASKELL__ < 709
+import Control.Applicative
+#endif
+--import Control.Monad
+import Data.Monoid ((<>))
+
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Internal as SI
+import qualified Data.ByteString.Lazy.Builder as B
+import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString.Unsafe as U
+
+--------------------------------------------------------------------------------
+-- Compression
+
+-- | Compresses the input 'ByteString'.
+--
+-- Will return 'Nothing' if the compression fails. Otherwise, returns
+-- @Just xs@ with the compressed string.
+compress :: S.ByteString -> Maybe S.ByteString
+compress xs | S.null xs = Just S.empty
+compress xs = unsafePerformIO $
+ U.unsafeUseAsCStringLen xs $ \(cstr,len) -> do
+ let len' = fromIntegral len :: CInt
+ let max = c_LZ4_compressBound len'
+ bs <- SI.createAndTrim (fromIntegral max) $ \output ->
+ fromIntegral <$> c_LZ4_compress cstr output len'
+ case (S.null bs) of
+ True -> return Nothing
+ -- Prefix the compressed string with the uncompressed length
+ False -> return $ Just (format (fromIntegral len) bs)
+{-# INLINEABLE compress #-}
+
+-- | Compresses a block of allocated memory and passes the compressed
+-- block on to a continuation.
+{-
+compress :: Ptr a -- ^ Memory block
+ -> Int -- ^ Size of memory block (in bytes)
+ -> (Ptr a -> Int -> IO a) -- ^ Continuation given compressed memory
+ -> IO ()
+compress ptr count k = do
+ let len' = fromIntegral count :: CInt
+ let max = fromIntegral (c_LZ4_compressBound len')
+ allocaBytes max $ \output -> do
+ sz <- c_LZ4_compress ptr output len'
+ when (sz == 0) $ error "LZ4: compression failed"
+ k output (fromIntegral sz)
+ return ()
+-}
+
+--------------------------------------------------------------------------------
+-- Decompression
+
+-- | Decompress the input 'ByteString'.
+decompress :: S.ByteString -> Maybe S.ByteString
+decompress xs | S.null xs = Just S.empty
+-- Get the length of the uncompressed buffer and do our thing
+decompress xs = maybe Nothing (unsafePerformIO . go) (unformat xs)
+ where
+ go (l, str) =
+ U.unsafeUseAsCString str $ \cstr -> do
+ out <- SI.createAndTrim l $ \p -> do
+ r :: Int <- fromIntegral <$> c_LZ4_uncompress cstr p (fromIntegral l)
+ --- NOTE: r is the count of bytes c_LZ4_uncompress read from
+ --- input buffer, and NOT the count of bytes used in result
+ --- buffer
+ return $! if (r <= 0) then 0 else l
+ return $! if (S.null out) then Nothing else (Just out)
+{-# INLINEABLE decompress #-}
+
+--------------------------------------------------------------------------------
+-- Utilities
+
+-- | Pushes a Word32 and a ByteString into the format we use to correctly
+-- encode/decode.
+format :: Word32 -> S.ByteString -> S.ByteString
+format l xs = L.toStrict $ B.toLazyByteString $
+ B.word32LE l
+ <> B.word32LE (fromIntegral $ S.length xs)
+ <> B.byteString xs
+
+-- | Gets a ByteString and it's length from the compressed format.
+unformat :: S.ByteString -> Maybe (Int, S.ByteString)
+unformat xs
+ | S.length xs < 8 = Nothing -- Need at least 8 bytes
+ | bsLen /= S.length rest = Nothing -- Header doesn't match real size
+ | otherwise = Just (fromIntegral origLen, rest)
+ where
+ origLen = fromIntegral (read32LE l0 l1 l2 l3) :: Int
+ bsLen = fromIntegral (read32LE s0 s1 s2 s3) :: Int
+
+ [l0,l1,l2,l3] = S.unpack (S.take 4 xs)
+ [s0,s1,s2,s3] = S.unpack (S.take 4 $ S.drop 4 xs)
+ rest = S.drop 8 xs
+
+read32LE :: Word8 -> Word8 -> Word8 -> Word8 -> Word32
+read32LE x0 x1 x2 x3
+ = fi x0 + (fi x1 `shiftL` 8) + (fi x2 `shiftL` 16) + (fi x3 `shiftL` 24)
+ where fi = fromIntegral :: Word8 -> Word32
+
+--------------------------------------------------------------------------------
+-- FFI Bindings
+
+-- | Worst case compression bounds on an input string.
+foreign import ccall unsafe "LZ4_compressBound"
+ c_LZ4_compressBound :: CInt -> CInt
+
+-- | Compresses a string.
+foreign import ccall unsafe "LZ4_compress"
+ c_LZ4_compress :: Ptr a -- ^ Source
+ -> Ptr b -- ^ Dest
+ -> CInt -- ^ Input size
+ -> IO CInt -- ^ Result
+
+-- | Decompresses a string.
+foreign import ccall unsafe "LZ4_decompress_fast"
+ c_LZ4_uncompress :: Ptr a -- ^ Source
+ -> Ptr b -- ^ Dest
+ -> CInt -- ^ Size of ORIGINAL INPUT
+ -> IO CInt -- ^ Result