diff options
author | Austin Seipp <austin@well-typed.com> | 2015-08-23 20:08:04 +0200 |
---|---|---|
committer | Thomas Miedema <thomasmiedema@gmail.com> | 2015-08-23 20:08:04 +0200 |
commit | 4e9a41e1ba29afc231f61cee53fce1b04ed52370 (patch) | |
tree | 194b3a7555d4036afaf9a70880ee26073c8e2113 /compiler/utils | |
parent | 98f8c9e597bc54b16f588a4641d8fe3bad36c7bb (diff) | |
download | haskell-wip/D1159.tar.gz |
compiler/iface: compress .hi fileswip/D1159
Summary:
Compress all interface files generated by the compiler with LZ4. While
having an extremely small amount of code, LZ4 is both very fast at
compression and decompression while having quite good space saving
properties.
Non-scientific size test: size of stage2 compiler .hi files:
`find ./compiler/stage2 -type f -iname '*.hi' -exec du -ch {} + | grep total$`
Without this patch: 22MB of .hi files for stage2.
With this patch: 9.2MB of .hi files for stage2.
Signed-off-by: Austin Seipp <austin@well-typed.com>
Test Plan: I ran `./validate`
Reviewers: hvr, bgamari, thomie
Subscribers: duncan
Differential Revision: https://phabricator.haskell.org/D1159
Diffstat (limited to 'compiler/utils')
-rw-r--r-- | compiler/utils/Binary.hs | 37 | ||||
-rw-r--r-- | compiler/utils/LZ4.hs | 141 |
2 files changed, 159 insertions, 19 deletions
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs index 8f0d8e50dc..e06891da11 100644 --- a/compiler/utils/Binary.hs +++ b/compiler/utils/Binary.hs @@ -70,10 +70,12 @@ import FastMutInt import Fingerprint import BasicTypes import SrcLoc +import qualified LZ4 as LZ4 import Foreign import Data.Array import Data.ByteString (ByteString) +import qualified Data.ByteString as B import qualified Data.ByteString.Internal as BS import qualified Data.ByteString.Unsafe as BS import Data.IORef @@ -81,8 +83,7 @@ import Data.Char ( ord, chr ) import Data.Time import Data.Typeable import Data.Typeable.Internal -import Control.Monad ( when ) -import System.IO as IO +import Control.Monad ( when, liftM ) import System.IO.Unsafe ( unsafeInterleaveIO ) import System.IO.Error ( mkIOError, eofErrorType ) import GHC.Real ( Ratio(..) ) @@ -184,29 +185,27 @@ isEOFBin (BinMem _ ix_r sz_r _) = do writeBinMem :: BinHandle -> FilePath -> IO () writeBinMem (BinMem _ ix_r _ arr_r) fn = do - h <- openBinaryFile fn WriteMode arr <- readIORef arr_r ix <- readFastMutInt ix_r - withForeignPtr arr $ \p -> hPutBuf h p ix - hClose h + let bs = LZ4.compress $ BS.fromForeignPtr arr 0 ix + case bs of + Nothing -> error "Binary.writeBinMem: compression failed" + Just x -> B.writeFile fn x readBinMem :: FilePath -> IO BinHandle -- Return a BinHandle with a totally undefined State readBinMem filename = do - h <- openBinaryFile filename ReadMode - filesize' <- hFileSize h - let filesize = fromIntegral filesize' - arr <- mallocForeignPtrBytes (filesize*2) - count <- withForeignPtr arr $ \p -> hGetBuf h p filesize - when (count /= filesize) $ - error ("Binary.readBinMem: only read " ++ show count ++ " bytes") - hClose h - arr_r <- newIORef arr - ix_r <- newFastMutInt - writeFastMutInt ix_r 0 - sz_r <- newFastMutInt - writeFastMutInt sz_r filesize - return (BinMem noUserData ix_r sz_r arr_r) + bs <- liftM LZ4.decompress (B.readFile filename) + case bs of + Nothing -> error "Binary.readBinMem: decompression failed" + Just x -> do + let (arr, ix, size) = BS.toForeignPtr x + arr_r <- newIORef arr + ix_r <- newFastMutInt + writeFastMutInt ix_r ix + sz_r <- newFastMutInt + writeFastMutInt sz_r size + return (BinMem noUserData ix_r sz_r arr_r) fingerprintBinMem :: BinHandle -> IO Fingerprint fingerprintBinMem (BinMem _ ix_r _ arr_r) = do diff --git a/compiler/utils/LZ4.hs b/compiler/utils/LZ4.hs new file mode 100644 index 0000000000..337b415227 --- /dev/null +++ b/compiler/utils/LZ4.hs @@ -0,0 +1,141 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} +-- | +-- Module : LZ4 +-- Copyright : (c) Mark Wotton, Austin Seipp 2012-2015 +-- License : BSD3 +-- +-- Compression utilities (currently utilizing @LZ4 r127@). +-- +module LZ4 + ( compress -- :: S.ByteString -> S.ByteString + , decompress -- :: S.ByteString -> Maybe S.ByteString + ) where + +import Prelude hiding (max) +import Data.Word +import Data.Bits +import Foreign.Ptr +import Foreign.C +--import Foreign.Marshal.Alloc +import System.IO.Unsafe (unsafePerformIO) +#if __GLASGOW_HASKELL__ < 709 +import Control.Applicative +#endif +--import Control.Monad +import Data.Monoid ((<>)) + +import qualified Data.ByteString as S +import qualified Data.ByteString.Internal as SI +import qualified Data.ByteString.Lazy.Builder as B +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Unsafe as U + +-------------------------------------------------------------------------------- +-- Compression + +-- | Compresses the input 'ByteString'. +-- +-- Will return 'Nothing' if the compression fails. Otherwise, returns +-- @Just xs@ with the compressed string. +compress :: S.ByteString -> Maybe S.ByteString +compress xs | S.null xs = Just S.empty +compress xs = unsafePerformIO $ + U.unsafeUseAsCStringLen xs $ \(cstr,len) -> do + let len' = fromIntegral len :: CInt + let max = c_LZ4_compressBound len' + bs <- SI.createAndTrim (fromIntegral max) $ \output -> + fromIntegral <$> c_LZ4_compress cstr output len' + case (S.null bs) of + True -> return Nothing + -- Prefix the compressed string with the uncompressed length + False -> return $ Just (format (fromIntegral len) bs) +{-# INLINEABLE compress #-} + +-- | Compresses a block of allocated memory and passes the compressed +-- block on to a continuation. +{- +compress :: Ptr a -- ^ Memory block + -> Int -- ^ Size of memory block (in bytes) + -> (Ptr a -> Int -> IO a) -- ^ Continuation given compressed memory + -> IO () +compress ptr count k = do + let len' = fromIntegral count :: CInt + let max = fromIntegral (c_LZ4_compressBound len') + allocaBytes max $ \output -> do + sz <- c_LZ4_compress ptr output len' + when (sz == 0) $ error "LZ4: compression failed" + k output (fromIntegral sz) + return () +-} + +-------------------------------------------------------------------------------- +-- Decompression + +-- | Decompress the input 'ByteString'. +decompress :: S.ByteString -> Maybe S.ByteString +decompress xs | S.null xs = Just S.empty +-- Get the length of the uncompressed buffer and do our thing +decompress xs = maybe Nothing (unsafePerformIO . go) (unformat xs) + where + go (l, str) = + U.unsafeUseAsCString str $ \cstr -> do + out <- SI.createAndTrim l $ \p -> do + r :: Int <- fromIntegral <$> c_LZ4_uncompress cstr p (fromIntegral l) + --- NOTE: r is the count of bytes c_LZ4_uncompress read from + --- input buffer, and NOT the count of bytes used in result + --- buffer + return $! if (r <= 0) then 0 else l + return $! if (S.null out) then Nothing else (Just out) +{-# INLINEABLE decompress #-} + +-------------------------------------------------------------------------------- +-- Utilities + +-- | Pushes a Word32 and a ByteString into the format we use to correctly +-- encode/decode. +format :: Word32 -> S.ByteString -> S.ByteString +format l xs = L.toStrict $ B.toLazyByteString $ + B.word32LE l + <> B.word32LE (fromIntegral $ S.length xs) + <> B.byteString xs + +-- | Gets a ByteString and it's length from the compressed format. +unformat :: S.ByteString -> Maybe (Int, S.ByteString) +unformat xs + | S.length xs < 8 = Nothing -- Need at least 8 bytes + | bsLen /= S.length rest = Nothing -- Header doesn't match real size + | otherwise = Just (fromIntegral origLen, rest) + where + origLen = fromIntegral (read32LE l0 l1 l2 l3) :: Int + bsLen = fromIntegral (read32LE s0 s1 s2 s3) :: Int + + [l0,l1,l2,l3] = S.unpack (S.take 4 xs) + [s0,s1,s2,s3] = S.unpack (S.take 4 $ S.drop 4 xs) + rest = S.drop 8 xs + +read32LE :: Word8 -> Word8 -> Word8 -> Word8 -> Word32 +read32LE x0 x1 x2 x3 + = fi x0 + (fi x1 `shiftL` 8) + (fi x2 `shiftL` 16) + (fi x3 `shiftL` 24) + where fi = fromIntegral :: Word8 -> Word32 + +-------------------------------------------------------------------------------- +-- FFI Bindings + +-- | Worst case compression bounds on an input string. +foreign import ccall unsafe "LZ4_compressBound" + c_LZ4_compressBound :: CInt -> CInt + +-- | Compresses a string. +foreign import ccall unsafe "LZ4_compress" + c_LZ4_compress :: Ptr a -- ^ Source + -> Ptr b -- ^ Dest + -> CInt -- ^ Input size + -> IO CInt -- ^ Result + +-- | Decompresses a string. +foreign import ccall unsafe "LZ4_decompress_fast" + c_LZ4_uncompress :: Ptr a -- ^ Source + -> Ptr b -- ^ Dest + -> CInt -- ^ Size of ORIGINAL INPUT + -> IO CInt -- ^ Result |