summaryrefslogtreecommitdiff
path: root/compiler/utils
diff options
context:
space:
mode:
authorBen Gamari <bgamari.foss@gmail.com>2016-10-13 21:53:13 -0400
committerBen Gamari <ben@smart-cactus.org>2016-10-13 22:57:13 -0400
commit34d933d6a821edf5abfcbee76d9325362fc28a13 (patch)
treec371fe1f7d1b6ea6d8b3fb9185d10bf4115fb2e6 /compiler/utils
parent1cccb646e2e4bcf3bbb1f2ad01737f7e745b5f1b (diff)
downloadhaskell-34d933d6a821edf5abfcbee76d9325362fc28a13.tar.gz
Clean up handling of known-key Names in interface files
Previously BinIface had some dedicated logic for handling tuple names in the symbol table. As it turns out, this logic was essentially dead code as it was superceded by the special handling of known-key things. Here we cull the tuple code-path and use the known-key codepath for all tuple-ish things. This had a surprising number of knock-on effects, * constraint tuple datacons had to be made known-key (previously they were not) * IfaceTopBndr was changed from being a synonym of OccName to a synonym of Name (since we now need to be able to deserialize Names directly from interface files) * the change to IfaceTopBndr complicated fingerprinting, since we need to ensure that we don't go looking for the fingerprint of the thing we are currently fingerprinting in the fingerprint environment (see notes in MkIface). Handling this required distinguishing between binding and non-binding Name occurrences in the Binary serializers. * the original name cache logic which previously lived in IfaceEnv has been moved to a new NameCache module * I ripped tuples and sums out of knownKeyNames since they introduce a very large number of entries. During interface file deserialization we use static functions (defined in the new KnownUniques module) to map from a Unique to a known-key Name (the Unique better correspond to a known-key name!) When we need to do an original name cache lookup we rely on the parser implemented in isBuiltInOcc_maybe. * HscMain.allKnownKeyNames was folded into PrelInfo.knownKeyNames. * Lots of comments were sprinkled about describing the new scheme. Updates haddock submodule. Test Plan: Validate Reviewers: niteria, simonpj, austin, hvr Reviewed By: simonpj Subscribers: simonmar, niteria, thomie Differential Revision: https://phabricator.haskell.org/D2467 GHC Trac Issues: #12532, #12415
Diffstat (limited to 'compiler/utils')
-rw-r--r--compiler/utils/Binary.hs80
-rw-r--r--compiler/utils/Fingerprint.hsc15
2 files changed, 63 insertions, 32 deletions
diff --git a/compiler/utils/Binary.hs b/compiler/utils/Binary.hs
index c3814cd908..61e1ee8cd1 100644
--- a/compiler/utils/Binary.hs
+++ b/compiler/utils/Binary.hs
@@ -29,25 +29,23 @@ module Binary
seekBy,
tellBin,
castBin,
+ isEOFBin,
+ withBinBuffer,
writeBinMem,
readBinMem,
- fingerprintBinMem,
- computeFingerprint,
-
- isEOFBin,
-
putAt, getAt,
- -- for writing instances:
+ -- * For writing instances
putByte,
getByte,
- -- lazy Bin I/O
+ -- * Lazy Binary I/O
lazyGet,
lazyPut,
+ -- * User data
UserData(..), getUserData, setUserData,
newReadState, newWriteState,
putDictionary, getDictionary, putFS,
@@ -105,6 +103,17 @@ getUserData bh = bh_usr bh
setUserData :: BinHandle -> UserData -> BinHandle
setUserData bh us = bh { bh_usr = us }
+-- | Get access to the underlying buffer.
+--
+-- It is quite important that no references to the 'ByteString' leak out of the
+-- continuation lest terrible things happen.
+withBinBuffer :: BinHandle -> (ByteString -> IO a) -> IO a
+withBinBuffer (BinMem _ ix_r _ arr_r) action = do
+ arr <- readIORef arr_r
+ ix <- readFastMutInt ix_r
+ withForeignPtr arr $ \ptr ->
+ BS.unsafePackCStringLen (castPtr ptr, ix) >>= action
+
---------------------------------------------------------------
-- Bin
@@ -200,23 +209,6 @@ readBinMem filename = do
writeFastMutInt sz_r filesize
return (BinMem noUserData ix_r sz_r arr_r)
-fingerprintBinMem :: BinHandle -> IO Fingerprint
-fingerprintBinMem (BinMem _ ix_r _ arr_r) = do
- arr <- readIORef arr_r
- ix <- readFastMutInt ix_r
- withForeignPtr arr $ \p -> fingerprintData p ix
-
-computeFingerprint :: Binary a
- => (BinHandle -> Name -> IO ())
- -> a
- -> IO Fingerprint
-
-computeFingerprint put_name a = do
- bh <- openBinMem (3*1024) -- just less than a block
- bh <- return $ setUserData bh $ newWriteState put_name putFS
- put_ bh a
- fingerprintBinMem bh
-
-- expand the size of the array to include a specified offset
expandBin :: BinHandle -> Int -> IO ()
expandBin (BinMem _ _ sz_r arr_r) off = do
@@ -614,6 +606,25 @@ lazyGet bh = do
-- UserData
-- -----------------------------------------------------------------------------
+-- | Information we keep around during interface file
+-- serialization/deserialization. Namely we keep the functions for serializing
+-- and deserializing 'Name's and 'FastString's. We do this because we actually
+-- use serialization in two distinct settings,
+--
+-- * When serializing interface files themselves
+--
+-- * When computing the fingerprint of an IfaceDecl (which we computing by
+-- hashing its Binary serialization)
+--
+-- These two settings have different needs while serializing Names:
+--
+-- * Names in interface files are serialized via a symbol table (see Note
+-- [Symbol table representation of names] in BinIface).
+--
+-- * During fingerprinting a binding Name is serialized as the OccName and a
+-- non-binding Name is serialized as the fingerprint of the thing they
+-- represent. See Note [Fingerprinting IfaceDecls] for further discussion.
+--
data UserData =
UserData {
-- for *deserialising* only:
@@ -621,27 +632,36 @@ data UserData =
ud_get_fs :: BinHandle -> IO FastString,
-- for *serialising* only:
- ud_put_name :: BinHandle -> Name -> IO (),
+ ud_put_nonbinding_name :: BinHandle -> Name -> IO (),
+ -- ^ serialize a non-binding 'Name' (e.g. a reference to another
+ -- binding).
+ ud_put_binding_name :: BinHandle -> Name -> IO (),
+ -- ^ serialize a binding 'Name' (e.g. the name of an IfaceDecl)
ud_put_fs :: BinHandle -> FastString -> IO ()
}
-newReadState :: (BinHandle -> IO Name)
+newReadState :: (BinHandle -> IO Name) -- ^ how to deserialize 'Name's
-> (BinHandle -> IO FastString)
-> UserData
newReadState get_name get_fs
= UserData { ud_get_name = get_name,
ud_get_fs = get_fs,
- ud_put_name = undef "put_name",
+ ud_put_nonbinding_name = undef "put_nonbinding_name",
+ ud_put_binding_name = undef "put_binding_name",
ud_put_fs = undef "put_fs"
}
-newWriteState :: (BinHandle -> Name -> IO ())
+newWriteState :: (BinHandle -> Name -> IO ())
+ -- ^ how to serialize non-binding 'Name's
+ -> (BinHandle -> Name -> IO ())
+ -- ^ how to serialize binding 'Name's
-> (BinHandle -> FastString -> IO ())
-> UserData
-newWriteState put_name put_fs
+newWriteState put_nonbinding_name put_binding_name put_fs
= UserData { ud_get_name = undef "get_name",
ud_get_fs = undef "get_fs",
- ud_put_name = put_name,
+ ud_put_nonbinding_name = put_nonbinding_name,
+ ud_put_binding_name = put_binding_name,
ud_put_fs = put_fs
}
diff --git a/compiler/utils/Fingerprint.hsc b/compiler/utils/Fingerprint.hsc
index ed4cd6fff7..f797654e0c 100644
--- a/compiler/utils/Fingerprint.hsc
+++ b/compiler/utils/Fingerprint.hsc
@@ -11,19 +11,25 @@
-- ----------------------------------------------------------------------------
module Fingerprint (
- Fingerprint(..), fingerprint0,
readHexFingerprint,
+ fingerprintByteString,
+ -- * Re-exported from GHC.Fingerprint
+ Fingerprint(..), fingerprint0,
fingerprintData,
fingerprintString,
- -- Re-exported from GHC.Fingerprint
getFileHash
) where
#include "md5.h"
##include "HsVersions.h"
+import Foreign
+import GHC.IO
import Numeric ( readHex )
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe as BS
+
import GHC.Fingerprint
-- useful for parsing the output of 'md5sum', should we want to do that.
@@ -32,3 +38,8 @@ readHexFingerprint s = Fingerprint w1 w2
where (s1,s2) = splitAt 16 s
[(w1,"")] = readHex s1
[(w2,"")] = readHex (take 16 s2)
+
+-- this can move to GHC.Fingerprint in GHC 8.6
+fingerprintByteString :: BS.ByteString -> Fingerprint
+fingerprintByteString bs = unsafeDupablePerformIO $
+ BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> fingerprintData (castPtr ptr) len