diff options
-rw-r--r-- | compiler/utils/Binary.hs | 96 |
1 files changed, 74 insertions, 22 deletions
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs index 5734528458..baca4be929 100644 --- a/compiler/utils/Binary.hs +++ b/compiler/utils/Binary.hs @@ -4,6 +4,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE BangPatterns #-} {-# OPTIONS_GHC -O2 -funbox-strict-fields #-} -- We always optimise this, otherwise performance of a non-optimised @@ -79,11 +80,12 @@ import qualified Data.ByteString.Unsafe as BS import Data.IORef import Data.Char ( ord, chr ) import Data.Time +import Data.List (unfoldr) import Type.Reflection import Type.Reflection.Unsafe import Data.Kind (Type) import GHC.Exts (TYPE, RuntimeRep(..), VecCount(..), VecElem(..)) -import Control.Monad ( when ) +import Control.Monad ( when, (<$!>) ) import System.IO as IO import System.IO.Unsafe ( unsafeInterleaveIO ) import System.IO.Error ( mkIOError, eofErrorType ) @@ -502,40 +504,90 @@ instance Binary DiffTime where get bh = do r <- get bh return $ fromRational r ---to quote binary-0.3 on this code idea, --- --- TODO This instance is not architecture portable. GMP stores numbers as --- arrays of machine sized words, so the byte format is not portable across --- architectures with different endianness and word size. --- --- This makes it hard (impossible) to make an equivalent instance --- with code that is compilable with non-GHC. Do we need any instance --- Binary Integer, and if so, does it have to be blazing fast? Or can --- we just change this instance to be portable like the rest of the --- instances? (binary package has code to steal for that) --- --- yes, we need Binary Integer and Binary Rational in basicTypes/Literal.hs +{- +Finally - a reasonable portable Integer instance. + +We used to encode values in the Int32 range as such, +falling back to a string of all things. In either case +we stored a tag byte to discriminate between the two cases. + +This made some sense as it's highly portable but also not very +efficient. + +However GHC stores a surprisingly large number off large Integer +values. In the examples looked at between 25% and 50% of Integers +serialized were outside of the Int32 range. + +Consider a valie like `2724268014499746065`, some sort of hash +actually generated by GHC. +In the old scheme this was encoded as a list of 19 chars. This +gave a size of 77 Bytes, one for the length of the list and 76 +since we encod chars as Word32 as well. + +We can easily do better. The new plan is: + +* Start with a tag byte + * 0 => Int32 value + * 1 => Int64 + * 2 => Negative large interger + * 3 => Positive large integer +* Followed by the value: + * Int32/64 is encoded as usual + * Large integers are encoded as a list of bytes (Word8). + We use Data.Bits which defines a bit order independent of the representation. + Values are stored LSB first. + +This means our example value `2724268014499746065` is now only 10 bytes large. +* One byte tag +* One byte for the length of the [Word8] list. +* 8 bytes for the actual date. + +The new scheme also does not depend in any way on +architecture specific details. + +The instance is used for in Binary Integer and Binary Rational in basicTypes/Literal.hs +-} instance Binary Integer where put_ bh i | i >= lo32 && i <= hi32 = do putWord8 bh 0 put_ bh (fromIntegral i :: Int32) - | otherwise = do + | i >= lo64 && i <= hi64 = do putWord8 bh 1 - put_ bh (show i) + put_ bh (fromIntegral i :: Int64) + | otherwise = do + if i < 0 + then putWord8 bh 2 + else putWord8 bh 3 + put_ bh (unroll $ abs i) where lo32 = fromIntegral (minBound :: Int32) hi32 = fromIntegral (maxBound :: Int32) - + lo64 = fromIntegral (minBound :: Int64) + hi64 = fromIntegral (maxBound :: Int64) get bh = do int_kind <- getWord8 bh case int_kind of - 0 -> fromIntegral <$> (get bh :: IO Int32) - _ -> do str <- get bh - case reads str of - [(i, "")] -> return i - _ -> fail ("Binary integer: got " ++ show str) + 0 -> fromIntegral <$!> (get bh :: IO Int32) + 1 -> fromIntegral <$!> (get bh :: IO Int64) + -- Large integer + _ -> do + !i <- roll <$!> (get bh :: IO [Word8]) :: IO Integer + if int_kind == 2 then return $! negate i -- Negative + else return $! i -- Positive + +unroll :: (Integral a, Bits a) => a -> [Word8] +unroll = unfoldr step + where + step 0 = Nothing + step i = Just (fromIntegral i, i `shiftR` 8) + +roll :: (Integral a, Bits a) => [Word8] -> a +roll = foldl' unstep 0 . reverse + where + unstep a b = a `shiftL` 8 .|. fromIntegral b + {- -- This code is currently commented out. |