diff options
author | Duncan Coutts <duncan@well-typed.com> | 2014-08-19 20:33:10 +0100 |
---|---|---|
committer | Edward Z. Yang <ezyang@cs.stanford.edu> | 2014-08-29 12:39:04 +0100 |
commit | 8d7a1dcdbee47a980d0ecc8fa8e9336866a75d1b (patch) | |
tree | c9f7a599c9d96dfd49ab96ff081ff6e76a204870 | |
parent | ce29a2609cdd2c1941fcd184d7c76a73cdd050f9 (diff) | |
download | haskell-8d7a1dcdbee47a980d0ecc8fa8e9336866a75d1b.tar.gz |
Introduce new file format for the package database binary cache
The purpose of the new format is to make it possible for the compiler
to not depend on the Cabal library. The new cache file format contains
more or less the same information duplicated in two different sections
using different representations.
One section is basically the same as what the package db contains now,
a list of packages using the types defined in the Cabal library. This
section is read back by ghc-pkg, and used for things like ghc-pkg dump
which have to produce output using the Cabal InstalledPackageInfo text
representation.
The other section is a ghc-local type which contains a subset of the
information from the Cabal InstalledPackageInfo -- just the bits that
the compiler cares about.
The trick is that the compiler can read this second section without
needing to know the representation (or types) of the first part. The
ghc-pkg tool knows about both representations and writes both.
This patch introduces the new cache file format but does not yet use it
properly. More patches to follow. (As of this patch, the compiler reads
the part intended for ghc-pkg so it still depends on Cabal and the
ghc-local package type is not yet fully defined.)
-rw-r--r-- | compiler/main/Packages.lhs | 6 | ||||
-rw-r--r-- | libraries/bin-package-db/Distribution/InstalledPackageInfo/Binary.hs | 19 | ||||
-rw-r--r-- | libraries/bin-package-db/GHC/PackageDb.hs | 206 | ||||
-rw-r--r-- | libraries/bin-package-db/bin-package-db.cabal | 18 | ||||
-rw-r--r-- | utils/ghc-pkg/Main.hs | 33 |
5 files changed, 238 insertions, 44 deletions
diff --git a/compiler/main/Packages.lhs b/compiler/main/Packages.lhs index 8bb56fdab7..ae2669edcd 100644 --- a/compiler/main/Packages.lhs +++ b/compiler/main/Packages.lhs @@ -61,8 +61,9 @@ import Outputable import Maybes import System.Environment ( getEnv ) +import GHC.PackageDb (readPackageDbForGhcPkg) import Distribution.InstalledPackageInfo -import Distribution.InstalledPackageInfo.Binary +import Distribution.InstalledPackageInfo.Binary () import Distribution.Package hiding (depends, PackageKey, mkPackageKey) import Distribution.ModuleExport import FastString @@ -385,7 +386,8 @@ readPackageConfig dflags conf_file = do if isdir then do let filename = conf_file </> "package.cache" debugTraceMsg dflags 2 (text "Using binary package database:" <+> text filename) - conf <- readBinPackageDB filename + conf <- readPackageDbForGhcPkg filename + -- TODO readPackageDbForGhc ^^ instead return (map installedPackageInfoToPackageConfig conf) else do diff --git a/libraries/bin-package-db/Distribution/InstalledPackageInfo/Binary.hs b/libraries/bin-package-db/Distribution/InstalledPackageInfo/Binary.hs index 9fd27f64df..571424f410 100644 --- a/libraries/bin-package-db/Distribution/InstalledPackageInfo/Binary.hs +++ b/libraries/bin-package-db/Distribution/InstalledPackageInfo/Binary.hs @@ -14,10 +14,7 @@ -- Portability : portable -- -module Distribution.InstalledPackageInfo.Binary ( - readBinPackageDB, - writeBinPackageDB - ) where +module Distribution.InstalledPackageInfo.Binary () where import Distribution.Version import Distribution.Package hiding (depends) @@ -29,20 +26,6 @@ import Distribution.Text (display) import Data.Binary as Bin import Control.Exception as Exception -readBinPackageDB :: Binary m => FilePath -> IO [InstalledPackageInfo_ m] -readBinPackageDB file - = do xs <- Bin.decodeFile file - _ <- Exception.evaluate $ length xs - return xs - `catchUserError` - (\err -> error ("While parsing " ++ show file ++ ": " ++ err)) - -catchUserError :: IO a -> (String -> IO a) -> IO a -catchUserError io f = io `Exception.catch` \(ErrorCall err) -> f err - -writeBinPackageDB :: Binary m => FilePath -> [InstalledPackageInfo_ m] -> IO () -writeBinPackageDB file ipis = Bin.encodeFile file ipis - instance Binary m => Binary (InstalledPackageInfo_ m) where put = putInstalledPackageInfo get = getInstalledPackageInfo diff --git a/libraries/bin-package-db/GHC/PackageDb.hs b/libraries/bin-package-db/GHC/PackageDb.hs new file mode 100644 index 0000000000..0ed508524b --- /dev/null +++ b/libraries/bin-package-db/GHC/PackageDb.hs @@ -0,0 +1,206 @@ +{-# LANGUAGE CPP, RecordWildCards, NamedFieldPuns #-} +{-# OPTIONS_GHC -fno-warn-name-shadowing #-} +#if __GLASGOW_HASKELL__ >= 701 +{-# LANGUAGE Trustworthy #-} +#endif +----------------------------------------------------------------------------- +-- | +-- Module : GHC.PackageDb +-- Copyright : (c) The University of Glasgow 2009, Duncan Coutts 2014 +-- +-- Maintainer : ghc-devs@haskell.org +-- Portability : portable +-- +-- This module provides the view of GHC's database of registered packages that +-- is shared between GHC the compiler\/library, and the ghc-pkg program. It +-- defines the database format that is shared between GHC and ghc-pkg. +-- +-- The database format, and this library are constructed so that GHC does not +-- have to depend on the Cabal library. The ghc-pkg program acts as the +-- gateway between the external package format (which is defined by Cabal) and +-- the internal package format which is specialised just for GHC. +-- +-- GHC the compiler only needs some of the information which is kept about +-- registerd packages, such as module names, various paths etc. On the other +-- hand ghc-pkg has to keep all the information from Cabal packages and be able +-- to regurgitate it for users and other tools. +-- +-- The first trick is that we duplicate some of the information in the package +-- database. We essentially keep two versions of the datbase in one file, one +-- version used only by ghc-pkg which keeps the full information (using the +-- serialised form of the 'InstalledPackageInfo' type defined by the Cabal +-- library); and a second version written by ghc-pkg and read by GHC which has +-- just the subset of information that GHC needs. +-- +-- The second trick is that this module only defines in detail the format of +-- the second version -- the bit GHC uses -- and the part managed by ghc-pkg +-- is kept in the file but here we treat it as an opaque blob of data. That way +-- this library avoids depending on Cabal. +-- +module GHC.PackageDb ( + GhcPackageInfo(..), + readPackageDbForGhc, + readPackageDbForGhcPkg, + writePackageDb + ) where + +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BS.Char8 +import qualified Data.ByteString.Lazy as BS.Lazy +import qualified Data.ByteString.Lazy.Internal as BS.Lazy (defaultChunkSize) +import Data.Binary as Bin +import Data.Binary.Put as Bin +import Data.Binary.Get as Bin +import Control.Exception as Exception +import Control.Monad (when) +import System.FilePath +import System.IO +import System.IO.Error +import GHC.IO.Exception (IOErrorType(InappropriateType)) +import System.Directory + + +-- | This is a subset of Cabal's 'InstalledPackageInfo', with just the bits +-- that GHC is interested in. +-- +data GhcPackageInfo = GhcPackageInfo { + --TODO + } + deriving (Eq, Show) + + +-- | Read the part of the package DB that GHC is interested in. +-- +readPackageDbForGhc :: FilePath -> IO [GhcPackageInfo] +readPackageDbForGhc file = + decodeFromFile file getDbForGhc + where + getDbForGhc = do + _version <- getHeader + _ghcPartLen <- get :: Get Word32 + ghcPart <- get :: Get [GhcPackageInfo] + -- the next part is for ghc-pkg, but we stop here. + return ghcPart + +-- | Read the part of the package DB that ghc-pkg is interested in +-- +readPackageDbForGhcPkg :: Binary pkgs => FilePath -> IO pkgs +readPackageDbForGhcPkg file = + decodeFromFile file getDbForGhcPkg + where + getDbForGhcPkg = do + _version <- getHeader + -- skip over the ghc part + ghcPartLen <- get :: Get Word32 + _ghcPart <- skip (fromIntegral ghcPartLen) + -- the next part is for ghc-pkg + ghcPkgPart <- get + return ghcPkgPart + +-- | Write the whole of the package DB, both parts. +-- +writePackageDb :: Binary pkgs => FilePath -> [GhcPackageInfo] -> pkgs -> IO () +writePackageDb file ghcPkgs ghcPkgPart = + writeFileAtomic file (runPut putDbForGhcPkg) + where + putDbForGhcPkg = do + putHeader + put ghcPartLen + putLazyByteString ghcPart + put ghcPkgPart + where + ghcPartLen :: Word32 + ghcPartLen = fromIntegral (BS.Lazy.length ghcPart) + ghcPart = encode ghcPkgs + +getHeader :: Get (Word32, Word32) +getHeader = do + magic <- getByteString (BS.length headerMagic) + when (magic /= headerMagic) $ + fail "not a ghc-pkg db file, wrong file magic number" + + majorVersion <- get :: Get Word32 + -- The major version is for incompatible changes + + minorVersion <- get :: Get Word32 + -- The minor version is for compatible extensions + + when (majorVersion /= 1) $ + fail "unsupported ghc-pkg db format version" + -- If we ever support multiple major versions then we'll have to change + -- this code + + -- The header can be extended without incrementing the major version, + -- we ignore fields we don't know about (currently all). + headerExtraLen <- get :: Get Word32 + skip (fromIntegral headerExtraLen) + + return (majorVersion, minorVersion) + +putHeader :: Put +putHeader = do + putByteString headerMagic + put majorVersion + put minorVersion + put headerExtraLen + where + majorVersion = 1 :: Word32 + minorVersion = 0 :: Word32 + headerExtraLen = 0 :: Word32 + +headerMagic :: BS.ByteString +headerMagic = BS.Char8.pack "\0ghcpkg\0" + + +-- | Feed a 'Get' decoder with data chunks from a file. +-- +decodeFromFile :: FilePath -> Get a -> IO a +decodeFromFile file decoder = + withBinaryFile file ReadMode $ \hnd -> + feed hnd (runGetIncremental decoder) + where + feed hnd (Partial k) = do chunk <- BS.hGet hnd BS.Lazy.defaultChunkSize + if BS.null chunk + then feed hnd (k Nothing) + else feed hnd (k (Just chunk)) + feed _ (Done _ _ result) = return result + feed _ (Fail _ _ msg) = ioError err + where + err = mkIOError InappropriateType loc Nothing (Just file) + `ioeSetErrorString` msg + loc = "GHC.PackageDb.readPackageDb" + +writeFileAtomic :: FilePath -> BS.Lazy.ByteString -> IO () +writeFileAtomic targetPath content = do + let (targetDir, targetName) = splitFileName targetPath + Exception.bracketOnError + (openBinaryTempFileWithDefaultPermissions targetDir $ targetName <.> "tmp") + (\(tmpPath, handle) -> hClose handle >> removeFile tmpPath) + (\(tmpPath, handle) -> do + BS.Lazy.hPut handle content + hClose handle +#if mingw32_HOST_OS || mingw32_TARGET_OS + renameFile tmpPath targetPath + -- If the targetPath exists then renameFile will fail + `catchIO` \err -> do + exists <- doesFileExist targetPath + if exists + then do removeFile targetPath + -- Big fat hairy race condition + renameFile newFile targetPath + -- If the removeFile succeeds and the renameFile fails + -- then we've lost the atomic property. + else throwIOIO err +#else + renameFile tmpPath targetPath +#endif + ) + + +instance Binary GhcPackageInfo where + put (GhcPackageInfo {-TODO-}) = do + return () + + get = do + return (GhcPackageInfo {-TODO-}) + diff --git a/libraries/bin-package-db/bin-package-db.cabal b/libraries/bin-package-db/bin-package-db.cabal index e8b4fd45ee..0fcff0f1f4 100644 --- a/libraries/bin-package-db/bin-package-db.cabal +++ b/libraries/bin-package-db/bin-package-db.cabal @@ -3,7 +3,19 @@ version: 0.0.0.0 license: BSD3 maintainer: ghc-devs@haskell.org bug-reports: glasgow-haskell-bugs@haskell.org -synopsis: A binary format for the package database +synopsis: The GHC compiler's view of the GHC package database format +description: This library is shared between GHC and ghc-pkg and is used by + GHC to read package databases. + . + It only deals with the subset of the package database that the + compiler cares about: modules paths etc and not package + metadata like description, authors etc. It is thus not a + library interface to ghc-pkg and is *not* suitable for + modifying GHC package databases. + . + The package database format and this library are constructed in + such a way that while ghc-pkg depends on Cabal, the GHC library + and program do not have to depend on Cabal. cabal-version: >=1.10 build-type: Simple @@ -23,8 +35,10 @@ Library exposed-modules: Distribution.InstalledPackageInfo.Binary + GHC.PackageDb build-depends: base >= 4 && < 5, - binary >= 0.5 && < 0.8, + binary >= 0.7 && < 0.8, + bytestring, directory, filepath, Cabal >= 1.20 && < 1.22 diff --git a/utils/ghc-pkg/Main.hs b/utils/ghc-pkg/Main.hs index f270fe98b6..06205e3349 100644 --- a/utils/ghc-pkg/Main.hs +++ b/utils/ghc-pkg/Main.hs @@ -10,10 +10,11 @@ module Main (main) where import Version ( version, targetOS, targetARCH ) +import qualified GHC.PackageDb as GhcPkg import Distribution.InstalledPackageInfo.Binary() import qualified Distribution.Simple.PackageIndex as PackageIndex import Distribution.ModuleName hiding (main) -import Distribution.InstalledPackageInfo +import Distribution.InstalledPackageInfo as Cabal import Distribution.Compat.ReadP import Distribution.ParseUtils import Distribution.ModuleExport @@ -50,7 +51,6 @@ import GHC.IO.Exception (IOErrorType(InappropriateType)) import Data.List import Control.Concurrent -import qualified Data.ByteString.Lazy as B import qualified Data.Binary as Bin import qualified Data.Binary.Get as Bin @@ -715,7 +715,7 @@ readParseDatabase verbosity mb_user_conf modify use_cache path then do when (verbosity > Normal) $ infoLn ("using cache: " ++ cache) - pkgs <- myReadBinPackageDB cache + pkgs <- GhcPkg.readPackageDbForGhcPkg cache mkPackageDB pkgs else do when (verbosity >= Normal && not modify || verbosity > Normal) $ do @@ -740,18 +740,6 @@ readParseDatabase verbosity mb_user_conf modify use_cache path packages = pkgs } --- read the package.cache file strictly, to work around a problem with --- bytestring 0.9.0.x (fixed in 0.9.1.x) where the file wasn't closed --- after it has been completely read, leading to a sharing violation --- later. -myReadBinPackageDB :: FilePath -> IO [InstalledPackageInfo] -myReadBinPackageDB filepath = do - h <- openBinaryFile filepath ReadMode - sz <- hFileSize h - b <- B.hGet h (fromIntegral sz) - hClose h - return $ Bin.runGet Bin.get b - parseSingletonPackageConf :: Verbosity -> FilePath -> IO InstalledPackageInfo parseSingletonPackageConf verbosity file = do when (verbosity > Normal) $ infoLn ("reading package config: " ++ file) @@ -1016,9 +1004,16 @@ changeDBDir verbosity cmds db = do updateDBCache :: Verbosity -> PackageDB -> IO () updateDBCache verbosity db = do let filename = location db </> cachefilename + + pkgsCabalFormat :: [InstalledPackageInfo] + pkgsCabalFormat = packages db + + pkgsGhcCacheFormat :: [GhcPkg.GhcPackageInfo] + pkgsGhcCacheFormat = [] -- TODO: for the moment + when (verbosity > Normal) $ infoLn ("writing cache " ++ filename) - writeBinaryFileAtomic filename (packages db) + GhcPkg.writePackageDb filename pkgsGhcCacheFormat pkgsCabalFormat `catchIO` \e -> if isPermissionError e then die (filename ++ ": you don't have permission to modify this file") @@ -1862,12 +1857,6 @@ catchError io handler = io `Exception.catch` handler' tryIO :: IO a -> IO (Either Exception.IOException a) tryIO = Exception.try -writeBinaryFileAtomic :: Bin.Binary a => FilePath -> a -> IO () -writeBinaryFileAtomic targetFile obj = - withFileAtomic targetFile $ \h -> do - hSetBinaryMode h True - B.hPutStr h (Bin.encode obj) - writeFileUtf8Atomic :: FilePath -> String -> IO () writeFileUtf8Atomic targetFile content = withFileAtomic targetFile $ \h -> do |