summaryrefslogtreecommitdiff
path: root/compiler/backpack
diff options
context:
space:
mode:
authorEdward Z. Yang <ezyang@cs.stanford.edu>2015-06-23 13:15:17 -0700
committerEdward Z. Yang <ezyang@cs.stanford.edu>2015-07-23 13:35:45 -0700
commitf9687caf337d409e4735d5bb4cf73a7dc629a58c (patch)
tree3f4d0bc7fcd74b66ad750eed4d134c4afdcb7803 /compiler/backpack
parent5ff4daddd9bc8f424d8f71fb01ebbbae9d608cdf (diff)
downloadhaskell-f9687caf337d409e4735d5bb4cf73a7dc629a58c.tar.gz
Library names, with Cabal submodule update
A library name is a package name, package version, and hash of the version names of all textual dependencies (i.e. packages which were included.) A library name is a coarse approximation of installed package IDs, which are suitable for inclusion in package keys (you don't want to put an IPID in a package key, since it means the key will change any time the source changes.) - We define ShPackageKey, which is the semantic object which is hashed into a PackageKey. You can use 'newPackageKey' to hash a ShPackageKey to a PackageKey - Given a PackageKey, we can lookup its ShPackageKey with 'lookupPackageKey'. The way we can do this is by consulting the 'pkgKeyCache', which records a reverse mapping from every hash to the ShPackageKey. This means that if you load in PackageKeys from external sources (e.g. interface files), you also need to load in a mapping of PackageKeys to their ShPackageKeys so we can populate the cache. - We define a 'LibraryName' which encapsulates the full depenency resolution that Cabal may have selected; this is opaque to GHC but can be used to distinguish different versions of a package. - Definite packages don't have an interesting PackageKey, so we rely on Cabal to pass them to us. - We can pretty-print package keys while displaying the instantiation, but it's not wired up to anything (e.g. the Outputable instance of PackageKey). Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu> Test Plan: validate Reviewers: austin, bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D1056 GHC Trac Issues: #10566
Diffstat (limited to 'compiler/backpack')
-rw-r--r--compiler/backpack/ShPackageKey.hs280
1 files changed, 280 insertions, 0 deletions
diff --git a/compiler/backpack/ShPackageKey.hs b/compiler/backpack/ShPackageKey.hs
new file mode 100644
index 0000000000..9fc44ae5cb
--- /dev/null
+++ b/compiler/backpack/ShPackageKey.hs
@@ -0,0 +1,280 @@
+{-# LANGUAGE CPP #-}
+module ShPackageKey(
+ ShFreeHoles,
+ calcModuleFreeHoles,
+
+ newPackageKey,
+ newPackageKeyWithScope,
+ lookupPackageKey,
+
+ generalizeHoleModule,
+ canonicalizeModule,
+
+ pprPackageKey
+) where
+
+#include "HsVersions.h"
+
+import Module
+import Packages
+import FastString
+import UniqFM
+import UniqSet
+import Outputable
+import Util
+import DynFlags
+
+import System.IO.Unsafe ( unsafePerformIO )
+import Control.Monad
+import Numeric
+import Data.IORef
+import GHC.Fingerprint
+import Data.Word
+import qualified Data.Char as Char
+import Data.List
+import Data.Function
+
+-- NB: didn't put this in Module, that seems a bit too low in the
+-- hierarchy, need to refer to DynFlags
+
+{-
+************************************************************************
+* *
+ Package Keys
+* *
+************************************************************************
+-}
+
+-- Note: [PackageKey cache]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
+-- The built-in PackageKey type (used by Module, Name, etc)
+-- records the instantiation of the package as an MD5 hash
+-- which is not reversible without some extra information.
+-- However, the shape merging process requires us to be able
+-- to substitute Module occurrences /inside/ the package key.
+--
+-- Thus, we maintain the invariant: for every PackageKey
+-- in our system, either:
+--
+-- 1. It is in the installed package database (lookupPackage)
+-- so we can lookup the recorded instantiatedWith
+-- 2. We've recorded the associated mapping in the
+-- PackageKeyCache.
+--
+-- A PackageKey can be expanded into a ShPackageKey which has
+-- the instance mapping. In the mapping, we don't bother
+-- expanding a 'Module'; depending on 'shPackageKeyFreeHoles',
+-- it may not be necessary to do a substitution (you only
+-- need to drill down when substituing HOLE:H if H is in scope.
+
+-- Note: [Module name in scope set]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Similar to InScopeSet, ShFreeHoles is an optimization that
+-- allows us to avoid expanding a PackageKey into an ShPackageKey
+-- if there isn't actually anything in the module expression that
+-- we can substitute.
+
+-- | Given a Name or Module, the 'ShFreeHoles' contains the set
+-- of free variables, i.e. HOLE:A modules, which may be substituted.
+-- If this set is empty no substitutions are possible.
+type ShFreeHoles = UniqSet ModuleName
+
+-- | Calculate the free holes of a 'Module'.
+calcModuleFreeHoles :: DynFlags -> Module -> IO ShFreeHoles
+calcModuleFreeHoles dflags m
+ | modulePackageKey m == holePackageKey = return (unitUniqSet (moduleName m))
+ | otherwise = do
+ shpk <- lookupPackageKey dflags (modulePackageKey m)
+ return $ case shpk of
+ ShDefinitePackageKey{} -> emptyUniqSet
+ ShPackageKey{ shPackageKeyFreeHoles = in_scope } -> in_scope
+
+-- | Calculate the free holes of the hole map @[('ModuleName', 'Module')]@.
+calcInstsFreeHoles :: DynFlags -> [(ModuleName, Module)] -> IO ShFreeHoles
+calcInstsFreeHoles dflags insts =
+ fmap unionManyUniqSets (mapM (calcModuleFreeHoles dflags . snd) insts)
+
+-- | Given a 'UnitName', a 'LibraryName', and sorted mapping of holes to
+-- their implementations, compute the 'PackageKey' associated with it, as well
+-- as the recursively computed 'ShFreeHoles' of holes that may be substituted.
+newPackageKeyWithScope :: DynFlags
+ -> UnitName
+ -> LibraryName
+ -> [(ModuleName, Module)]
+ -> IO (PackageKey, ShFreeHoles)
+newPackageKeyWithScope dflags pn vh insts = do
+ fhs <- calcInstsFreeHoles dflags insts
+ pk <- newPackageKey' dflags (ShPackageKey pn vh insts fhs)
+ return (pk, fhs)
+
+-- | Given a 'UnitName' and sorted mapping of holes to
+-- their implementations, compute the 'PackageKey' associated with it.
+-- (Analogous to 'newGlobalBinder').
+newPackageKey :: DynFlags
+ -> UnitName
+ -> LibraryName
+ -> [(ModuleName, Module)]
+ -> IO PackageKey
+newPackageKey dflags pn vh insts = do
+ (pk, _) <- newPackageKeyWithScope dflags pn vh insts
+ return pk
+
+-- | Given a 'ShPackageKey', compute the 'PackageKey' associated with it.
+-- This function doesn't calculate the 'ShFreeHoles', because it is
+-- provided with 'ShPackageKey'.
+newPackageKey' :: DynFlags -> ShPackageKey -> IO PackageKey
+newPackageKey' _ (ShDefinitePackageKey pk) = return pk
+newPackageKey' dflags
+ shpk@(ShPackageKey pn vh insts fhs) = do
+ ASSERTM( fmap (==fhs) (calcInstsFreeHoles dflags insts) )
+ let pk = mkPackageKey pn vh insts
+ pkt_var = pkgKeyCache dflags
+ pk_cache <- readIORef pkt_var
+ let consistent pk_cache = maybe True (==shpk) (lookupUFM pk_cache pk)
+ MASSERT( consistent pk_cache )
+ when (not (elemUFM pk pk_cache)) $
+ atomicModifyIORef' pkt_var (\pk_cache ->
+ -- Could race, but it's guaranteed to be the same
+ ASSERT( consistent pk_cache ) (addToUFM pk_cache pk shpk, ()))
+ return pk
+
+-- | Given a 'PackageKey', reverse lookup the 'ShPackageKey' associated
+-- with it. This only gives useful information for keys which are
+-- created using 'newPackageKey' or the associated functions, or that are
+-- already in the installed package database, since we generally cannot reverse
+-- MD5 hashes.
+lookupPackageKey :: DynFlags
+ -> PackageKey
+ -> IO ShPackageKey
+lookupPackageKey dflags pk
+ | pk `elem` wiredInPackageKeys
+ || pk == mainPackageKey
+ || pk == holePackageKey
+ = return (ShDefinitePackageKey pk)
+ | otherwise = do
+ let pkt_var = pkgKeyCache dflags
+ pk_cache <- readIORef pkt_var
+ case lookupUFM pk_cache pk of
+ Just r -> return r
+ _ -> return (ShDefinitePackageKey pk)
+
+pprPackageKey :: PackageKey -> SDoc
+pprPackageKey pk = sdocWithDynFlags $ \dflags ->
+ -- name cache is a memotable
+ let shpk = unsafePerformIO (lookupPackageKey dflags pk)
+ in case shpk of
+ shpk@ShPackageKey{} ->
+ ppr (shPackageKeyUnitName shpk) <>
+ parens (hsep
+ (punctuate comma [ ppUnless (moduleName m == modname)
+ (ppr modname <+> text "->")
+ <+> ppr m
+ | (modname, m) <- shPackageKeyInsts shpk]))
+ <> ifPprDebug (braces (ftext (packageKeyFS pk)))
+ ShDefinitePackageKey pk -> ftext (packageKeyFS pk)
+
+-- NB: newPackageKey and lookupPackageKey are mutually recursive; this
+-- recursion is guaranteed to bottom out because you can't set up cycles
+-- of PackageKeys.
+
+
+{-
+************************************************************************
+* *
+ Package key hashing
+* *
+************************************************************************
+-}
+
+-- | Generates a 'PackageKey'. Don't call this directly; you probably
+-- want to cache the result.
+mkPackageKey :: UnitName
+ -> LibraryName
+ -> [(ModuleName, Module)] -- hole instantiations
+ -> PackageKey
+mkPackageKey (UnitName fsUnitName)
+ (LibraryName fsLibraryName) unsorted_holes =
+ -- NB: don't use concatFS here, it's not much of an improvement
+ fingerprintPackageKey . fingerprintString $
+ unpackFS fsUnitName ++ "\n" ++
+ unpackFS fsLibraryName ++ "\n" ++
+ concat [ moduleNameString m
+ ++ " " ++ packageKeyString (modulePackageKey b)
+ ++ ":" ++ moduleNameString (moduleName b) ++ "\n"
+ | (m, b) <- sortBy (stableModuleNameCmp `on` fst) unsorted_holes]
+
+-- | Generalize a 'Module' into one where all the holes are indefinite.
+-- @p(A -> ...):C@ generalizes to @p(A -> HOLE:A):C@. Useful when
+-- you need to figure out if you've already type-checked the generalized
+-- version of this module, so you don't have to do the whole rigamarole.
+generalizeHoleModule :: DynFlags -> Module -> IO Module
+generalizeHoleModule dflags m = do
+ pk <- generalizeHolePackageKey dflags (modulePackageKey m)
+ return (mkModule pk (moduleName m))
+
+-- | Generalize a 'PackageKey' into one where all the holes are indefinite.
+-- @p(A -> q():A) generalizes to p(A -> HOLE:A)@.
+generalizeHolePackageKey :: DynFlags -> PackageKey -> IO PackageKey
+generalizeHolePackageKey dflags pk = do
+ shpk <- lookupPackageKey dflags pk
+ case shpk of
+ ShDefinitePackageKey _ -> return pk
+ ShPackageKey { shPackageKeyUnitName = pn,
+ shPackageKeyLibraryName = vh,
+ shPackageKeyInsts = insts0 }
+ -> let insts = map (\(x, _) -> (x, mkModule holePackageKey x)) insts0
+ in newPackageKey dflags pn vh insts
+
+-- | Canonicalize a 'Module' so that it uniquely identifies a module.
+-- For example, @p(A -> M):A@ canonicalizes to @M@. Useful for making
+-- sure the interface you've loaded as the right @mi_module@.
+canonicalizeModule :: DynFlags -> Module -> IO Module
+canonicalizeModule dflags m = do
+ let pk = modulePackageKey m
+ shpk <- lookupPackageKey dflags pk
+ return $ case shpk of
+ ShPackageKey { shPackageKeyInsts = insts }
+ | Just m' <- lookup (moduleName m) insts -> m'
+ _ -> m
+
+{-
+************************************************************************
+* *
+ Base 62
+* *
+************************************************************************
+-}
+
+--------------------------------------------------------------------------
+-- Base 62
+
+-- The base-62 code is based off of 'locators'
+-- ((c) Operational Dynamics Consulting, BSD3 licensed)
+
+-- Note: Instead of base-62 encoding a single 128-bit integer
+-- (ceil(21.49) characters), we'll base-62 a pair of 64-bit integers
+-- (2 * ceil(10.75) characters). Luckily for us, it's the same number of
+-- characters! In the long term, this should go in GHC.Fingerprint,
+-- but not now...
+
+-- | Size of a 64-bit word when written as a base-62 string
+word64Base62Len :: Int
+word64Base62Len = 11
+
+-- | Converts a 64-bit word into a base-62 string
+toBase62 :: Word64 -> String
+toBase62 w = pad ++ str
+ where
+ pad = replicate len '0'
+ len = word64Base62Len - length str -- 11 == ceil(64 / lg 62)
+ str = showIntAtBase 62 represent w ""
+ represent :: Int -> Char
+ represent x
+ | x < 10 = Char.chr (48 + x)
+ | x < 36 = Char.chr (65 + x - 10)
+ | x < 62 = Char.chr (97 + x - 36)
+ | otherwise = error ("represent (base 62): impossible!")
+
+fingerprintPackageKey :: Fingerprint -> PackageKey
+fingerprintPackageKey (Fingerprint a b)
+ = stringToPackageKey (toBase62 a ++ toBase62 b)