From f9687caf337d409e4735d5bb4cf73a7dc629a58c Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 23 Jun 2015 13:15:17 -0700 Subject: Library names, with Cabal submodule update A library name is a package name, package version, and hash of the version names of all textual dependencies (i.e. packages which were included.) A library name is a coarse approximation of installed package IDs, which are suitable for inclusion in package keys (you don't want to put an IPID in a package key, since it means the key will change any time the source changes.) - We define ShPackageKey, which is the semantic object which is hashed into a PackageKey. You can use 'newPackageKey' to hash a ShPackageKey to a PackageKey - Given a PackageKey, we can lookup its ShPackageKey with 'lookupPackageKey'. The way we can do this is by consulting the 'pkgKeyCache', which records a reverse mapping from every hash to the ShPackageKey. This means that if you load in PackageKeys from external sources (e.g. interface files), you also need to load in a mapping of PackageKeys to their ShPackageKeys so we can populate the cache. - We define a 'LibraryName' which encapsulates the full depenency resolution that Cabal may have selected; this is opaque to GHC but can be used to distinguish different versions of a package. - Definite packages don't have an interesting PackageKey, so we rely on Cabal to pass them to us. - We can pretty-print package keys while displaying the instantiation, but it's not wired up to anything (e.g. the Outputable instance of PackageKey). Signed-off-by: Edward Z. Yang Test Plan: validate Reviewers: austin, bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D1056 GHC Trac Issues: #10566 --- compiler/backpack/ShPackageKey.hs | 280 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 compiler/backpack/ShPackageKey.hs (limited to 'compiler/backpack') diff --git a/compiler/backpack/ShPackageKey.hs b/compiler/backpack/ShPackageKey.hs new file mode 100644 index 0000000000..9fc44ae5cb --- /dev/null +++ b/compiler/backpack/ShPackageKey.hs @@ -0,0 +1,280 @@ +{-# LANGUAGE CPP #-} +module ShPackageKey( + ShFreeHoles, + calcModuleFreeHoles, + + newPackageKey, + newPackageKeyWithScope, + lookupPackageKey, + + generalizeHoleModule, + canonicalizeModule, + + pprPackageKey +) where + +#include "HsVersions.h" + +import Module +import Packages +import FastString +import UniqFM +import UniqSet +import Outputable +import Util +import DynFlags + +import System.IO.Unsafe ( unsafePerformIO ) +import Control.Monad +import Numeric +import Data.IORef +import GHC.Fingerprint +import Data.Word +import qualified Data.Char as Char +import Data.List +import Data.Function + +-- NB: didn't put this in Module, that seems a bit too low in the +-- hierarchy, need to refer to DynFlags + +{- +************************************************************************ +* * + Package Keys +* * +************************************************************************ +-} + +-- Note: [PackageKey cache] +-- ~~~~~~~~~~~~~~~~~~~~~~~~ +-- The built-in PackageKey type (used by Module, Name, etc) +-- records the instantiation of the package as an MD5 hash +-- which is not reversible without some extra information. +-- However, the shape merging process requires us to be able +-- to substitute Module occurrences /inside/ the package key. +-- +-- Thus, we maintain the invariant: for every PackageKey +-- in our system, either: +-- +-- 1. It is in the installed package database (lookupPackage) +-- so we can lookup the recorded instantiatedWith +-- 2. We've recorded the associated mapping in the +-- PackageKeyCache. +-- +-- A PackageKey can be expanded into a ShPackageKey which has +-- the instance mapping. In the mapping, we don't bother +-- expanding a 'Module'; depending on 'shPackageKeyFreeHoles', +-- it may not be necessary to do a substitution (you only +-- need to drill down when substituing HOLE:H if H is in scope. + +-- Note: [Module name in scope set] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Similar to InScopeSet, ShFreeHoles is an optimization that +-- allows us to avoid expanding a PackageKey into an ShPackageKey +-- if there isn't actually anything in the module expression that +-- we can substitute. + +-- | Given a Name or Module, the 'ShFreeHoles' contains the set +-- of free variables, i.e. HOLE:A modules, which may be substituted. +-- If this set is empty no substitutions are possible. +type ShFreeHoles = UniqSet ModuleName + +-- | Calculate the free holes of a 'Module'. +calcModuleFreeHoles :: DynFlags -> Module -> IO ShFreeHoles +calcModuleFreeHoles dflags m + | modulePackageKey m == holePackageKey = return (unitUniqSet (moduleName m)) + | otherwise = do + shpk <- lookupPackageKey dflags (modulePackageKey m) + return $ case shpk of + ShDefinitePackageKey{} -> emptyUniqSet + ShPackageKey{ shPackageKeyFreeHoles = in_scope } -> in_scope + +-- | Calculate the free holes of the hole map @[('ModuleName', 'Module')]@. +calcInstsFreeHoles :: DynFlags -> [(ModuleName, Module)] -> IO ShFreeHoles +calcInstsFreeHoles dflags insts = + fmap unionManyUniqSets (mapM (calcModuleFreeHoles dflags . snd) insts) + +-- | Given a 'UnitName', a 'LibraryName', and sorted mapping of holes to +-- their implementations, compute the 'PackageKey' associated with it, as well +-- as the recursively computed 'ShFreeHoles' of holes that may be substituted. +newPackageKeyWithScope :: DynFlags + -> UnitName + -> LibraryName + -> [(ModuleName, Module)] + -> IO (PackageKey, ShFreeHoles) +newPackageKeyWithScope dflags pn vh insts = do + fhs <- calcInstsFreeHoles dflags insts + pk <- newPackageKey' dflags (ShPackageKey pn vh insts fhs) + return (pk, fhs) + +-- | Given a 'UnitName' and sorted mapping of holes to +-- their implementations, compute the 'PackageKey' associated with it. +-- (Analogous to 'newGlobalBinder'). +newPackageKey :: DynFlags + -> UnitName + -> LibraryName + -> [(ModuleName, Module)] + -> IO PackageKey +newPackageKey dflags pn vh insts = do + (pk, _) <- newPackageKeyWithScope dflags pn vh insts + return pk + +-- | Given a 'ShPackageKey', compute the 'PackageKey' associated with it. +-- This function doesn't calculate the 'ShFreeHoles', because it is +-- provided with 'ShPackageKey'. +newPackageKey' :: DynFlags -> ShPackageKey -> IO PackageKey +newPackageKey' _ (ShDefinitePackageKey pk) = return pk +newPackageKey' dflags + shpk@(ShPackageKey pn vh insts fhs) = do + ASSERTM( fmap (==fhs) (calcInstsFreeHoles dflags insts) ) + let pk = mkPackageKey pn vh insts + pkt_var = pkgKeyCache dflags + pk_cache <- readIORef pkt_var + let consistent pk_cache = maybe True (==shpk) (lookupUFM pk_cache pk) + MASSERT( consistent pk_cache ) + when (not (elemUFM pk pk_cache)) $ + atomicModifyIORef' pkt_var (\pk_cache -> + -- Could race, but it's guaranteed to be the same + ASSERT( consistent pk_cache ) (addToUFM pk_cache pk shpk, ())) + return pk + +-- | Given a 'PackageKey', reverse lookup the 'ShPackageKey' associated +-- with it. This only gives useful information for keys which are +-- created using 'newPackageKey' or the associated functions, or that are +-- already in the installed package database, since we generally cannot reverse +-- MD5 hashes. +lookupPackageKey :: DynFlags + -> PackageKey + -> IO ShPackageKey +lookupPackageKey dflags pk + | pk `elem` wiredInPackageKeys + || pk == mainPackageKey + || pk == holePackageKey + = return (ShDefinitePackageKey pk) + | otherwise = do + let pkt_var = pkgKeyCache dflags + pk_cache <- readIORef pkt_var + case lookupUFM pk_cache pk of + Just r -> return r + _ -> return (ShDefinitePackageKey pk) + +pprPackageKey :: PackageKey -> SDoc +pprPackageKey pk = sdocWithDynFlags $ \dflags -> + -- name cache is a memotable + let shpk = unsafePerformIO (lookupPackageKey dflags pk) + in case shpk of + shpk@ShPackageKey{} -> + ppr (shPackageKeyUnitName shpk) <> + parens (hsep + (punctuate comma [ ppUnless (moduleName m == modname) + (ppr modname <+> text "->") + <+> ppr m + | (modname, m) <- shPackageKeyInsts shpk])) + <> ifPprDebug (braces (ftext (packageKeyFS pk))) + ShDefinitePackageKey pk -> ftext (packageKeyFS pk) + +-- NB: newPackageKey and lookupPackageKey are mutually recursive; this +-- recursion is guaranteed to bottom out because you can't set up cycles +-- of PackageKeys. + + +{- +************************************************************************ +* * + Package key hashing +* * +************************************************************************ +-} + +-- | Generates a 'PackageKey'. Don't call this directly; you probably +-- want to cache the result. +mkPackageKey :: UnitName + -> LibraryName + -> [(ModuleName, Module)] -- hole instantiations + -> PackageKey +mkPackageKey (UnitName fsUnitName) + (LibraryName fsLibraryName) unsorted_holes = + -- NB: don't use concatFS here, it's not much of an improvement + fingerprintPackageKey . fingerprintString $ + unpackFS fsUnitName ++ "\n" ++ + unpackFS fsLibraryName ++ "\n" ++ + concat [ moduleNameString m + ++ " " ++ packageKeyString (modulePackageKey b) + ++ ":" ++ moduleNameString (moduleName b) ++ "\n" + | (m, b) <- sortBy (stableModuleNameCmp `on` fst) unsorted_holes] + +-- | Generalize a 'Module' into one where all the holes are indefinite. +-- @p(A -> ...):C@ generalizes to @p(A -> HOLE:A):C@. Useful when +-- you need to figure out if you've already type-checked the generalized +-- version of this module, so you don't have to do the whole rigamarole. +generalizeHoleModule :: DynFlags -> Module -> IO Module +generalizeHoleModule dflags m = do + pk <- generalizeHolePackageKey dflags (modulePackageKey m) + return (mkModule pk (moduleName m)) + +-- | Generalize a 'PackageKey' into one where all the holes are indefinite. +-- @p(A -> q():A) generalizes to p(A -> HOLE:A)@. +generalizeHolePackageKey :: DynFlags -> PackageKey -> IO PackageKey +generalizeHolePackageKey dflags pk = do + shpk <- lookupPackageKey dflags pk + case shpk of + ShDefinitePackageKey _ -> return pk + ShPackageKey { shPackageKeyUnitName = pn, + shPackageKeyLibraryName = vh, + shPackageKeyInsts = insts0 } + -> let insts = map (\(x, _) -> (x, mkModule holePackageKey x)) insts0 + in newPackageKey dflags pn vh insts + +-- | Canonicalize a 'Module' so that it uniquely identifies a module. +-- For example, @p(A -> M):A@ canonicalizes to @M@. Useful for making +-- sure the interface you've loaded as the right @mi_module@. +canonicalizeModule :: DynFlags -> Module -> IO Module +canonicalizeModule dflags m = do + let pk = modulePackageKey m + shpk <- lookupPackageKey dflags pk + return $ case shpk of + ShPackageKey { shPackageKeyInsts = insts } + | Just m' <- lookup (moduleName m) insts -> m' + _ -> m + +{- +************************************************************************ +* * + Base 62 +* * +************************************************************************ +-} + +-------------------------------------------------------------------------- +-- Base 62 + +-- The base-62 code is based off of 'locators' +-- ((c) Operational Dynamics Consulting, BSD3 licensed) + +-- Note: Instead of base-62 encoding a single 128-bit integer +-- (ceil(21.49) characters), we'll base-62 a pair of 64-bit integers +-- (2 * ceil(10.75) characters). Luckily for us, it's the same number of +-- characters! In the long term, this should go in GHC.Fingerprint, +-- but not now... + +-- | Size of a 64-bit word when written as a base-62 string +word64Base62Len :: Int +word64Base62Len = 11 + +-- | Converts a 64-bit word into a base-62 string +toBase62 :: Word64 -> String +toBase62 w = pad ++ str + where + pad = replicate len '0' + len = word64Base62Len - length str -- 11 == ceil(64 / lg 62) + str = showIntAtBase 62 represent w "" + represent :: Int -> Char + represent x + | x < 10 = Char.chr (48 + x) + | x < 36 = Char.chr (65 + x - 10) + | x < 62 = Char.chr (97 + x - 36) + | otherwise = error ("represent (base 62): impossible!") + +fingerprintPackageKey :: Fingerprint -> PackageKey +fingerprintPackageKey (Fingerprint a b) + = stringToPackageKey (toBase62 a ++ toBase62 b) -- cgit v1.2.1