summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorromes <rodrigo.m.mesquita@gmail.com>2023-03-13 15:04:28 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2023-05-04 14:58:14 -0400
commit3fdb18f8df209ebfee51f16288c46acd1ca024b2 (patch)
tree8b035a10a8c03031ad11fc74080bcfe582ea465c
parent8cc9a534951d8352c31c9a21f5f91bbf188722b2 (diff)
downloadhaskell-3fdb18f8df209ebfee51f16288c46acd1ca024b2.tar.gz
Hardwire a better unit-id for ghc
Previously, the unit-id of ghc-the-library was fixed as `ghc`. This was done primarily because the compiler must know the unit-id of some packages (including ghc) a-priori to define wired-in names. However, as seen in #20742, a reinstallable `ghc` whose unit-id is fixed to `ghc` might result in subtle bugs when different ghc's interact. A good example of this is having GHC_A load a plugin compiled by GHC_B, where GHC_A and GHC_B are linked to ghc-libraries that are ABI incompatible. Without a distinction between the unit-id of the ghc library GHC_A is linked against and the ghc library the plugin it is loading was compiled against, we can't check compatibility. This patch gives a slightly better unit-id to ghc (ghc-version) by (1) Not setting -this-unit-id to ghc, but rather to the new unit-id (modulo stage0) (2) Adding a definition to `GHC.Settings.Config` whose value is the new unit-id. (2.1) `GHC.Settings.Config` is generated by Hadrian (2.2) and also by cabal through `compiler/Setup.hs` This unit-id definition is imported by `GHC.Unit.Types` and used to set the wired-in unit-id of "ghc", which was previously fixed to "ghc" The commits following this one will improve the unit-id with a cabal-style package hash and check compatibility when loading plugins. Note that we also ensure that ghc's unit key matches unit id both when hadrian or cabal builds ghc, and in this way we no longer need to add `ghc` to the WiringMap.
-rw-r--r--compiler/GHC/Driver/Session.hs1
-rw-r--r--compiler/GHC/Unit/Types.hs46
-rw-r--r--compiler/Setup.hs21
-rw-r--r--compiler/ghc.cabal.in15
-rw-r--r--hadrian/src/Rules/Generate.hs15
-rw-r--r--hadrian/src/Settings/Builders/Ghc.hs10
-rw-r--r--hadrian/src/Settings/Packages.hs5
-rw-r--r--testsuite/tests/driver/j-space/jspace.hs6
-rw-r--r--utils/count-deps/Main.hs23
9 files changed, 121 insertions, 21 deletions
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index da5cf29506..027b97d226 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -4822,6 +4822,7 @@ compilerInfo dflags
("Project Patch Level", cProjectPatchLevel),
("Project Patch Level1", cProjectPatchLevel1),
("Project Patch Level2", cProjectPatchLevel2),
+ ("Project Unit Id", cProjectUnitId),
("Booter version", cBooterVersion),
("Stage", cStage),
("Build platform", cBuildPlatformString),
diff --git a/compiler/GHC/Unit/Types.hs b/compiler/GHC/Unit/Types.hs
index 7439ab7dde..48db9bcdde 100644
--- a/compiler/GHC/Unit/Types.hs
+++ b/compiler/GHC/Unit/Types.hs
@@ -99,6 +99,7 @@ import GHC.Data.FastString
import GHC.Utils.Encoding
import GHC.Utils.Fingerprint
import GHC.Utils.Misc
+import GHC.Settings.Config (cProjectUnitId)
import Control.DeepSeq
import Data.Data
@@ -597,7 +598,7 @@ primUnitId = UnitId (fsLit "ghc-prim")
bignumUnitId = UnitId (fsLit "ghc-bignum")
baseUnitId = UnitId (fsLit "base")
rtsUnitId = UnitId (fsLit "rts")
-thisGhcUnitId = UnitId (fsLit "ghc")
+thisGhcUnitId = UnitId (fsLit cProjectUnitId) -- See Note [GHC's Unit Id]
interactiveUnitId = UnitId (fsLit "interactive")
thUnitId = UnitId (fsLit "template-haskell")
@@ -625,8 +626,49 @@ wiredInUnitIds =
, baseUnitId
, rtsUnitId
, thUnitId
- , thisGhcUnitId
]
+ -- NB: ghc is no longer part of the wired-in units since its unit-id, given
+ -- by hadrian or cabal, is no longer overwritten and now matches both the
+ -- cProjectUnitId defined in build-time-generated module GHC.Version, and
+ -- the unit key.
+ --
+ -- See also Note [About units], taking into consideration ghc is still a
+ -- wired-in unit but whose unit-id no longer needs special handling because
+ -- we take care that it matches the unit key.
+
+{-
+Note [GHC's Unit Id]
+~~~~~~~~~~~~~~~~~~~~
+Previously, the unit-id of ghc-the-library was fixed as `ghc`.
+This was done primarily because the compiler must know the unit-id of
+some packages (including ghc) a-priori to define wired-in names.
+
+However, as seen in #20742, a reinstallable `ghc` whose unit-id is fixed
+to `ghc` might result in subtle bugs when different ghc's interact.
+
+A good example of this is having GHC_A load a plugin compiled by GHC_B,
+where GHC_A and GHC_B are linked to ghc-libraries that are ABI
+incompatible. Without a distinction between the unit-id of the ghc library
+GHC_A is linked against and the ghc library the plugin it is loading was
+compiled against, we can't check compatibility.
+
+Now, we give a better unit-id to ghc (`ghc-version-hash`) by
+
+(1) Not setting -this-unit-id fixed to `ghc` in `ghc.cabal`, but rather by having
+ (1.1) Hadrian pass the new unit-id with -this-unit-id for stage0-1
+ (1.2) Cabal pass the unit-id it computes to ghc, which it already does by default
+
+(2) Adding a definition to `GHC.Settings.Config` whose value is the new
+unit-id. This is crucial to define the wired-in name of the GHC unit
+(`thisGhcUnitId`) which *must* match the value of the -this-unit-id flag.
+(Where `GHC.Settings.Config` is a module generated by the build system which,
+be it either hadrian or cabal, knows exactly the unit-id it passed with -this-unit-id)
+
+Note that we also ensure the ghc's unit key matches its unit id, both when
+hadrian or cabal is building ghc. This way, we no longer need to add `ghc` to
+the WiringMap, and that's why 'wiredInUnitIds' no longer includes
+'thisGhcUnitId'.
+-}
---------------------------------------------------------------------
-- Boot Modules
diff --git a/compiler/Setup.hs b/compiler/Setup.hs
index 97662a7775..f3f7d522d2 100644
--- a/compiler/Setup.hs
+++ b/compiler/Setup.hs
@@ -3,7 +3,10 @@ module Main where
import Distribution.Simple
import Distribution.Simple.BuildPaths
+import Distribution.Types.ComponentLocalBuildInfo
+import Distribution.Types.ComponentName (ComponentName(CLibName))
import Distribution.Types.LocalBuildInfo
+import Distribution.Types.LibraryName (LibraryName(LMainLibName))
import Distribution.Verbosity
import Distribution.Simple.Program
import Distribution.Simple.Utils
@@ -15,6 +18,7 @@ import System.Directory
import System.FilePath
import Control.Monad
import Data.Char
+import qualified Data.Map as Map
import GHC.ResponseFile
import System.Environment
@@ -85,9 +89,13 @@ ghcAutogen verbosity lbi@LocalBuildInfo{..} = do
callProcess "deriveConstants" ["--gen-haskell-type","-o",tmp,"--target-os",targetOS]
renameFile tmp platformConstantsPath
+ let cProjectUnitId = case Map.lookup (CLibName LMainLibName) componentNameMap of
+ Just [LibComponentLocalBuildInfo{componentUnitId}] -> unUnitId componentUnitId
+ _ -> error "Couldn't find unique cabal library when building ghc"
+
-- Write GHC.Settings.Config
- let configHsPath = autogenPackageModulesDir lbi </> "GHC/Settings/Config.hs"
- configHs = generateConfigHs settings
+ configHsPath = autogenPackageModulesDir lbi </> "GHC/Settings/Config.hs"
+ configHs = generateConfigHs cProjectUnitId settings
createDirectoryIfMissingVerbose verbosity True (takeDirectory configHsPath)
rewriteFileEx verbosity configHsPath configHs
@@ -98,8 +106,9 @@ getSetting settings kh kr = go settings kr
Nothing -> Left (show k ++ " not found in settings: " ++ show settings)
Just v -> Right v
-generateConfigHs :: [(String,String)] -> String
-generateConfigHs settings = either error id $ do
+generateConfigHs :: String -- ^ ghc's cabal-generated unit-id, which matches its package-id/key
+ -> [(String,String)] -> String
+generateConfigHs cProjectUnitId settings = either error id $ do
let getSetting' = getSetting $ (("cStage","2"):) settings
buildPlatform <- getSetting' "cBuildPlatformString" "Host platform"
hostPlatform <- getSetting' "cHostPlatformString" "Target platform"
@@ -114,6 +123,7 @@ generateConfigHs settings = either error id $ do
, " , cProjectName"
, " , cBooterVersion"
, " , cStage"
+ , " , cProjectUnitId"
, " ) where"
, ""
, "import GHC.Prelude.Basic"
@@ -134,4 +144,7 @@ generateConfigHs settings = either error id $ do
, ""
, "cStage :: String"
, "cStage = show ("++ cStage ++ " :: Int)"
+ , ""
+ , "cProjectUnitId :: String"
+ , "cProjectUnitId = " ++ show cProjectUnitId
]
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 4194dd7b05..a0a5f23133 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -39,7 +39,7 @@ extra-source-files:
custom-setup
- setup-depends: base >= 3 && < 5, Cabal >= 1.6 && <3.10, directory, process, filepath
+ setup-depends: base >= 3 && < 5, Cabal >= 1.6 && <3.10, directory, process, filepath, containers
Flag internal-interpreter
Description: Build with internal interpreter support.
@@ -57,6 +57,12 @@ Flag build-tool-depends
Description: Use build-tool-depends
Default: True
+-- While the boot compiler fixes ghc's unit-id to `ghc`, the stage0 compiler must still be compiled with `-this-unit-id ghc`
+Flag hadrian-stage0
+ Description: Enable if compiling the stage0 compiler with hadrian
+ Default: False
+ Manual: True
+
Library
Default-Language: Haskell2010
Exposed: False
@@ -137,9 +143,10 @@ Library
Include-Dirs: .
- -- We need to set the unit id to ghc (without a version number)
- -- as it's magic.
- GHC-Options: -this-unit-id ghc
+ if flag(hadrian-stage0)
+ -- We need to set the unit id to ghc (without a version number)
+ -- as it's magic.
+ GHC-Options: -this-unit-id ghc
c-sources:
cbits/cutils.c
diff --git a/hadrian/src/Rules/Generate.hs b/hadrian/src/Rules/Generate.hs
index de4369d4f8..c38d454617 100644
--- a/hadrian/src/Rules/Generate.hs
+++ b/hadrian/src/Rules/Generate.hs
@@ -483,6 +483,16 @@ generateConfigHs = do
trackGenerateHs
cProjectName <- getSetting ProjectName
cBooterVersion <- getSetting GhcVersion
+ cProjectVersionMunged <- getSetting ProjectVersionMunged
+ -- ROMES:TODO:HASH First we attempt a fixed unit-id with version but without hash.
+ --
+ -- We now use a more informative unit-id for ghc. See Note [GHC's Unit Id]
+ -- in GHC.Unit.Types
+ --
+ -- It's crucial that the unit-id matches the unit-key -- ghc is no longer
+ -- part of the WiringMap, so we don't to go back and forth between the
+ -- unit-id and the unit-key -- we take care here that they are the same.
+ let cProjectUnitId = "ghc-" ++ cProjectVersionMunged -- ROMES:TODO:HASH
return $ unlines
[ "module GHC.Settings.Config"
, " ( module GHC.Version"
@@ -491,6 +501,7 @@ generateConfigHs = do
, " , cProjectName"
, " , cBooterVersion"
, " , cStage"
+ , " , cProjectUnitId"
, " ) where"
, ""
, "import GHC.Prelude.Basic"
@@ -511,6 +522,9 @@ generateConfigHs = do
, ""
, "cStage :: String"
, "cStage = show (" ++ stageString stage ++ " :: Int)"
+ , ""
+ , "cProjectUnitId :: String"
+ , "cProjectUnitId = " ++ show cProjectUnitId
]
where
stageString (Stage0 InTreeLibs) = "1"
@@ -530,6 +544,7 @@ generateVersionHs = do
cProjectPatchLevel <- getSetting ProjectPatchLevel
cProjectPatchLevel1 <- getSetting ProjectPatchLevel1
cProjectPatchLevel2 <- getSetting ProjectPatchLevel2
+
return $ unlines
[ "module GHC.Version where"
, ""
diff --git a/hadrian/src/Settings/Builders/Ghc.hs b/hadrian/src/Settings/Builders/Ghc.hs
index 947172ecb8..b211657b9e 100644
--- a/hadrian/src/Settings/Builders/Ghc.hs
+++ b/hadrian/src/Settings/Builders/Ghc.hs
@@ -250,6 +250,16 @@ packageGhcArgs :: Args
packageGhcArgs = do
package <- getPackage
ghc_ver <- readVersion <$> (expr . ghcVersionStage =<< getStage)
+ -- ROMES: Until the boot compiler no longer needs ghc's
+ -- unit-id to be "ghc", the stage0 compiler must be built
+ -- with `-this-unit-id ghc`, while the wired-in unit-id of
+ -- ghc is correctly set to the unit-id we'll generate for
+ -- stage1 (set in generateVersionHs in Rules.Generate).
+ --
+ -- However, we don't need to set the unit-id of "ghc" to "ghc" when
+ -- building stage0 because we have a flag in compiler/ghc.cabal.in that is
+ -- sets `-this-unit-id ghc` when hadrian is building stage0, which will
+ -- overwrite this one.
pkgId <- expr $ pkgIdentifier package
mconcat [ arg "-hide-all-packages"
, arg "-no-user-package-db"
diff --git a/hadrian/src/Settings/Packages.hs b/hadrian/src/Settings/Packages.hs
index 05f7b6fb1e..354ad68b3f 100644
--- a/hadrian/src/Settings/Packages.hs
+++ b/hadrian/src/Settings/Packages.hs
@@ -74,6 +74,11 @@ packageArgs = do
[ andM [expr ghcWithInterpreter, notStage0] `cabalFlag` "internal-interpreter"
, notM cross `cabalFlag` "terminfo"
, arg "-build-tool-depends"
+ -- ROMES: While the boot compiler is not updated wrt -this-unit-id
+ -- not being fixed to `ghc`, when building stage0, we must set
+ -- -this-unit-id to `ghc` because the boot compiler expects that.
+ -- We do it through a cabal flag in ghc.cabal
+ , stage0 ? arg "+hadrian-stage0"
]
, builder (Haddock BuildPackage) ? arg ("--optghc=-I" ++ path) ]
diff --git a/testsuite/tests/driver/j-space/jspace.hs b/testsuite/tests/driver/j-space/jspace.hs
index b0c83eb4d1..7f1ef697e8 100644
--- a/testsuite/tests/driver/j-space/jspace.hs
+++ b/testsuite/tests/driver/j-space/jspace.hs
@@ -2,6 +2,7 @@ module Main where
import GHC
import GHC.Driver.Monad
+import GHC.Driver.Session
import System.Environment
import GHC.Driver.Env.Types
import GHC.Profiling
@@ -25,6 +26,9 @@ initGhcM xs = do
let cmdOpts = ["-fforce-recomp"] ++ xs
(df2, leftovers, _) <- parseDynamicFlags (hsc_logger session) df1 (map noLoc cmdOpts)
setSessionDynFlags df2
+ ghcUnitId <- case lookup "Project Unit Id" (compilerInfo df2) of
+ Nothing -> fail "failed to find ghc's unit-id in the compiler info"
+ Just ghcUnitId -> pure ghcUnitId
ts <- mapM (\s -> guessTarget s Nothing Nothing) $ map unLoc leftovers
setTargets ts
_ <- load LoadAllTargets
@@ -36,7 +40,7 @@ initGhcM xs = do
liftIO $ do
requestHeapCensus
performGC
- [ys] <- filter (isPrefixOf "ghc:GHC.Unit.Module.ModDetails.ModDetails") . lines <$> readFile "jspace.hp"
+ [ys] <- filter (isPrefixOf (ghcUnitId <> ":GHC.Unit.Module.ModDetails.ModDetails")) . lines <$> readFile "jspace.hp"
let (n :: Int) = read (last (words ys))
-- The output should be 50 * 8 * word_size (i.e. 3200, or 1600 on 32-bit architectures):
-- the test contains DEPTH + WIDTH + 2 = 50 modules J, H_0, .., H_DEPTH, W_1, .., W_WIDTH,
diff --git a/utils/count-deps/Main.hs b/utils/count-deps/Main.hs
index d431f00dda..1b249047d5 100644
--- a/utils/count-deps/Main.hs
+++ b/utils/count-deps/Main.hs
@@ -56,25 +56,28 @@ calcDeps modName libdir =
logger <- getLogger
(df, _, _) <- parseDynamicFlags logger df [noLoc "-package=ghc"]
setSessionDynFlags df
- env <- getSession
- loop env Map.empty [mkModuleName modName]
+ case lookup "Project Unit Id" (compilerInfo df) of
+ Nothing -> fail "failed to find ghc's unit-id in the compiler info"
+ Just ghcUnitId -> do
+ env <- getSession
+ loop ghcUnitId env Map.empty [mkModuleName modName]
where
-- Source imports are only guaranteed to show up in the 'mi_deps'
-- of modules that import them directly and don’t propagate
-- transitively so we loop.
- loop :: HscEnv -> Map.Map ModuleName [ModuleName] -> [ModuleName] -> Ghc (Map.Map ModuleName [ModuleName])
- loop env modules (m : ms) =
+ loop :: String -> HscEnv -> Map.Map ModuleName [ModuleName] -> [ModuleName] -> Ghc (Map.Map ModuleName [ModuleName])
+ loop ghcUnitId env modules (m : ms) =
if m `Map.member` modules
- then loop env modules ms
+ then loop ghcUnitId env modules ms
else do
- mi <- liftIO $ hscGetModuleInterface env (mkModule m)
+ mi <- liftIO $ hscGetModuleInterface env (mkModule ghcUnitId m)
let deps = modDeps mi
modules <- return $ Map.insert m [] modules
- loop env (Map.insert m deps modules) $ ms ++ filter (not . (`Map.member` modules)) deps
- loop _ modules [] = return modules
+ loop ghcUnitId env (Map.insert m deps modules) $ ms ++ filter (not . (`Map.member` modules)) deps
+ loop _ _ modules [] = return modules
- mkModule :: ModuleName -> Module
- mkModule = Module (stringToUnit "ghc")
+ mkModule :: String -> ModuleName -> Module
+ mkModule ghcUnitId = Module (stringToUnit ghcUnitId)
modDeps :: ModIface -> [ModuleName]
modDeps mi = map (gwib_mod . snd) $ Set.toList $ dep_direct_mods (mi_deps mi)