summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexis King <lexi.lambda@gmail.com>2023-01-18 17:00:54 -0600
committerMarge Bot <ben+marge-bot@smart-cactus.org>2023-02-20 15:27:17 -0500
commit26243de1e3716886161d79918af9359f7639314b (patch)
treec106f6bf05cbd460d23a7bdc00ba7f42636cf7a2
parent2592ab6924ee34ed0f0d82a7cb0aed393d93bb14 (diff)
downloadhaskell-26243de1e3716886161d79918af9359f7639314b.tar.gz
Handle top-level Addr# literals in the bytecode compiler
Fixes #22376.
-rw-r--r--compiler/GHC/ByteCode/Asm.hs39
-rw-r--r--compiler/GHC/ByteCode/Instr.hs6
-rw-r--r--compiler/GHC/ByteCode/Linker.hs61
-rw-r--r--compiler/GHC/ByteCode/Types.hs14
-rw-r--r--compiler/GHC/Linker/Loader.hs76
-rw-r--r--compiler/GHC/Linker/Types.hs58
-rw-r--r--compiler/GHC/StgToByteCode.hs111
-rw-r--r--testsuite/tests/bytecode/T22376/A.hs6
-rw-r--r--testsuite/tests/bytecode/T22376/B.hs4
-rw-r--r--testsuite/tests/bytecode/T22376/T22376.hs6
-rw-r--r--testsuite/tests/bytecode/T22376/T22376.stdout1
-rw-r--r--testsuite/tests/bytecode/T22376/all.T2
12 files changed, 241 insertions, 143 deletions
diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs
index 391949d448..f020f0af0a 100644
--- a/compiler/GHC/ByteCode/Asm.hs
+++ b/compiler/GHC/ByteCode/Asm.hs
@@ -97,7 +97,7 @@ assembleBCOs
-> Profile
-> [ProtoBCO Name]
-> [TyCon]
- -> [RemotePtr ()]
+ -> AddrEnv
-> Maybe ModBreaks
-> IO CompiledByteCode
assembleBCOs interp profile proto_bcos tycons top_strs modbreaks = do
@@ -105,27 +105,40 @@ assembleBCOs interp profile proto_bcos tycons top_strs modbreaks = do
-- fixed for an interpreter
itblenv <- mkITbls interp profile tycons
bcos <- mapM (assembleBCO (profilePlatform profile)) proto_bcos
- (bcos',ptrs) <- mallocStrings interp bcos
+ bcos' <- mallocStrings interp bcos
return CompiledByteCode
{ bc_bcos = bcos'
, bc_itbls = itblenv
, bc_ffis = concatMap protoBCOFFIs proto_bcos
- , bc_strs = top_strs ++ ptrs
+ , bc_strs = top_strs
, bc_breaks = modbreaks
}
--- Find all the literal strings and malloc them together. We want to
--- do this because:
+-- Note [Allocating string literals]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Our strategy for handling top-level string literal bindings is described in
+-- Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode,
+-- but not all Addr# literals in a program are guaranteed to be lifted to the
+-- top level. Our strategy for handling local Addr# literals is somewhat simpler:
+-- after assembling, we find all the BCONPtrStr arguments in the program, malloc
+-- memory for them, and bake the resulting addresses into the instruction stream
+-- in the form of BCONPtrWord arguments.
--
--- a) It should be done when we compile the module, not each time we relink it
--- b) For -fexternal-interpreter It's more efficient to malloc the strings
--- as a single batch message, especially when compiling in parallel.
+-- Since we do this when assembling, we only allocate the memory when we compile
+-- the module, not each time we relink it. However, we do want to take care to
+-- malloc the memory all in one go, since that is more efficient with
+-- -fexternal-interpreter, especially when compiling in parallel.
--
-mallocStrings :: Interp -> [UnlinkedBCO] -> IO ([UnlinkedBCO], [RemotePtr ()])
+-- Note that, as with top-level string literal bindings, this memory is never
+-- freed, so it just leaks if the BCO is unloaded. See Note [Generating code for
+-- top-level string literal bindings] in GHC.StgToByteCode for some discussion
+-- about why.
+--
+mallocStrings :: Interp -> [UnlinkedBCO] -> IO [UnlinkedBCO]
mallocStrings interp ulbcos = do
let bytestrings = reverse (execState (mapM_ collect ulbcos) [])
ptrs <- interpCmd interp (MallocStrings bytestrings)
- return (evalState (mapM splice ulbcos) ptrs, ptrs)
+ return (evalState (mapM splice ulbcos) ptrs)
where
splice bco@UnlinkedBCO{..} = do
lits <- mapM spliceLit unlinkedBCOLits
@@ -162,7 +175,7 @@ assembleOneBCO interp profile pbco = do
-- TODO: the profile should be bundled with the interpreter: the rts ways are
-- fixed for an interpreter
ubco <- assembleBCO (profilePlatform profile) pbco
- ([ubco'], _ptrs) <- mallocStrings interp [ubco]
+ [ubco'] <- mallocStrings interp [ubco]
return ubco'
assembleBCO :: Platform -> ProtoBCO Name -> IO UnlinkedBCO
@@ -411,6 +424,10 @@ assembleI platform i = case i of
PUSH_UBX lit nws -> do np <- literal lit
emit bci_PUSH_UBX [Op np, SmallOp nws]
+ -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode
+ PUSH_ADDR nm -> do np <- lit [BCONPtrAddr nm]
+ emit bci_PUSH_UBX [Op np, SmallOp 1]
+
PUSH_APPLY_N -> emit bci_PUSH_APPLY_N []
PUSH_APPLY_V -> emit bci_PUSH_APPLY_V []
PUSH_APPLY_F -> emit bci_PUSH_APPLY_F []
diff --git a/compiler/GHC/ByteCode/Instr.hs b/compiler/GHC/ByteCode/Instr.hs
index 34baa57d40..4f9fd75fc4 100644
--- a/compiler/GHC/ByteCode/Instr.hs
+++ b/compiler/GHC/ByteCode/Instr.hs
@@ -112,6 +112,10 @@ data BCInstr
-- type, and it appears impossible to get hold of the bits of
-- an addr, even though we need to assemble BCOs.
+ -- Push a top-level Addr#. This is a pseudo-instruction assembled to PUSH_UBX,
+ -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode.
+ | PUSH_ADDR Name
+
-- various kinds of application
| PUSH_APPLY_N
| PUSH_APPLY_V
@@ -284,6 +288,7 @@ instance Outputable BCInstr where
ppr (PUSH_UBX16 lit) = text "PUSH_UBX16" <+> ppr lit
ppr (PUSH_UBX32 lit) = text "PUSH_UBX32" <+> ppr lit
ppr (PUSH_UBX lit nw) = text "PUSH_UBX" <+> parens (ppr nw) <+> ppr lit
+ ppr (PUSH_ADDR nm) = text "PUSH_ADDR" <+> ppr nm
ppr PUSH_APPLY_N = text "PUSH_APPLY_N"
ppr PUSH_APPLY_V = text "PUSH_APPLY_V"
ppr PUSH_APPLY_F = text "PUSH_APPLY_F"
@@ -397,6 +402,7 @@ bciStackUse (PUSH_UBX8 _) = 1 -- overapproximation
bciStackUse (PUSH_UBX16 _) = 1 -- overapproximation
bciStackUse (PUSH_UBX32 _) = 1 -- overapproximation on 64bit arch
bciStackUse (PUSH_UBX _ nw) = fromIntegral nw
+bciStackUse PUSH_ADDR{} = 1
bciStackUse PUSH_APPLY_N{} = 1
bciStackUse PUSH_APPLY_V{} = 1
bciStackUse PUSH_APPLY_F{} = 1
diff --git a/compiler/GHC/ByteCode/Linker.hs b/compiler/GHC/ByteCode/Linker.hs
index c3af3d4e85..8a7a24ae1a 100644
--- a/compiler/GHC/ByteCode/Linker.hs
+++ b/compiler/GHC/ByteCode/Linker.hs
@@ -8,10 +8,7 @@
-- | Bytecode assembler and linker
module GHC.ByteCode.Linker
- ( ClosureEnv
- , emptyClosureEnv
- , extendClosureEnv
- , linkBCO
+ ( linkBCO
, lookupStaticPtr
, lookupIE
, nameToCLabel
@@ -35,6 +32,8 @@ import GHC.Unit.Types
import GHC.Data.FastString
import GHC.Data.SizedSeq
+import GHC.Linker.Types
+
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
import GHC.Utils.Outputable
@@ -53,45 +52,34 @@ import GHC.Exts
Linking interpretables into something we can run
-}
-type ClosureEnv = NameEnv (Name, ForeignHValue)
-
-emptyClosureEnv :: ClosureEnv
-emptyClosureEnv = emptyNameEnv
-
-extendClosureEnv :: ClosureEnv -> [(Name,ForeignHValue)] -> ClosureEnv
-extendClosureEnv cl_env pairs
- = extendNameEnvList cl_env [ (n, (n,v)) | (n,v) <- pairs]
-
-{-
- Linking interpretables into something we can run
--}
-
linkBCO
:: Interp
- -> ItblEnv
- -> ClosureEnv
+ -> LinkerEnv
-> NameEnv Int
-> RemoteRef BreakArray
-> UnlinkedBCO
-> IO ResolvedBCO
-linkBCO interp ie ce bco_ix breakarray
+linkBCO interp le bco_ix breakarray
(UnlinkedBCO _ arity insns bitmap lits0 ptrs0) = do
-- fromIntegral Word -> Word64 should be a no op if Word is Word64
-- otherwise it will result in a cast to longlong on 32bit systems.
- lits <- mapM (fmap fromIntegral . lookupLiteral interp ie) (ssElts lits0)
- ptrs <- mapM (resolvePtr interp ie ce bco_ix breakarray) (ssElts ptrs0)
+ lits <- mapM (fmap fromIntegral . lookupLiteral interp le) (ssElts lits0)
+ ptrs <- mapM (resolvePtr interp le bco_ix breakarray) (ssElts ptrs0)
return (ResolvedBCO isLittleEndian arity insns bitmap
(listArray (0, fromIntegral (sizeSS lits0)-1) lits)
(addListToSS emptySS ptrs))
-lookupLiteral :: Interp -> ItblEnv -> BCONPtr -> IO Word
-lookupLiteral interp ie ptr = case ptr of
+lookupLiteral :: Interp -> LinkerEnv -> BCONPtr -> IO Word
+lookupLiteral interp le ptr = case ptr of
BCONPtrWord lit -> return lit
BCONPtrLbl sym -> do
Ptr a# <- lookupStaticPtr interp sym
return (W# (int2Word# (addr2Int# a#)))
BCONPtrItbl nm -> do
- Ptr a# <- lookupIE interp ie nm
+ Ptr a# <- lookupIE interp (itbl_env le) nm
+ return (W# (int2Word# (addr2Int# a#)))
+ BCONPtrAddr nm -> do
+ Ptr a# <- lookupAddr interp (addr_env le) nm
return (W# (int2Word# (addr2Int# a#)))
BCONPtrStr _ ->
-- should be eliminated during assembleBCOs
@@ -124,6 +112,20 @@ lookupIE interp ie con_nm =
(unpackFS sym_to_find1 ++ " or " ++
unpackFS sym_to_find2)
+-- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode
+lookupAddr :: Interp -> AddrEnv -> Name -> IO (Ptr ())
+lookupAddr interp ae addr_nm = do
+ case lookupNameEnv ae addr_nm of
+ Just (_, AddrPtr ptr) -> return (fromRemotePtr ptr)
+ Nothing -> do -- try looking up in the object files.
+ let sym_to_find = nameToCLabel addr_nm "bytes"
+ -- see Note [Bytes label] in GHC.Cmm.CLabel
+ m <- lookupSymbol interp sym_to_find
+ case m of
+ Just ptr -> return ptr
+ Nothing -> linkFail "GHC.ByteCode.Linker.lookupAddr"
+ (unpackFS sym_to_find)
+
lookupPrimOp :: Interp -> PrimOp -> IO (RemotePtr ())
lookupPrimOp interp primop = do
let sym_to_find = primopToCLabel primop "closure"
@@ -134,18 +136,17 @@ lookupPrimOp interp primop = do
resolvePtr
:: Interp
- -> ItblEnv
- -> ClosureEnv
+ -> LinkerEnv
-> NameEnv Int
-> RemoteRef BreakArray
-> BCOPtr
-> IO ResolvedBCOPtr
-resolvePtr interp ie ce bco_ix breakarray ptr = case ptr of
+resolvePtr interp le bco_ix breakarray ptr = case ptr of
BCOPtrName nm
| Just ix <- lookupNameEnv bco_ix nm
-> return (ResolvedBCORef ix) -- ref to another BCO in this group
- | Just (_, rhv) <- lookupNameEnv ce nm
+ | Just (_, rhv) <- lookupNameEnv (closure_env le) nm
-> return (ResolvedBCOPtr (unsafeForeignRefToRemoteRef rhv))
| otherwise
@@ -161,7 +162,7 @@ resolvePtr interp ie ce bco_ix breakarray ptr = case ptr of
-> ResolvedBCOStaticPtr <$> lookupPrimOp interp op
BCOPtrBCO bco
- -> ResolvedBCOPtrBCO <$> linkBCO interp ie ce bco_ix breakarray bco
+ -> ResolvedBCOPtrBCO <$> linkBCO interp le bco_ix breakarray bco
BCOPtrBreakArray
-> return (ResolvedBCOPtrBreakArray breakarray)
diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs
index a100e72085..e16fcf7f5a 100644
--- a/compiler/GHC/ByteCode/Types.hs
+++ b/compiler/GHC/ByteCode/Types.hs
@@ -14,6 +14,7 @@ module GHC.ByteCode.Types
, ByteOff(..), WordOff(..)
, UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
, ItblEnv, ItblPtr(..)
+ , AddrEnv, AddrPtr(..)
, CgBreakInfo(..)
, ModBreaks (..), BreakIndex, emptyModBreaks
, CCostCentre
@@ -51,7 +52,7 @@ data CompiledByteCode = CompiledByteCode
{ bc_bcos :: [UnlinkedBCO] -- Bunch of interpretable bindings
, bc_itbls :: ItblEnv -- A mapping from DataCons to their itbls
, bc_ffis :: [FFIInfo] -- ffi blocks we allocated
- , bc_strs :: [RemotePtr ()] -- malloc'd strings
+ , bc_strs :: AddrEnv -- malloc'd top-level strings
, bc_breaks :: Maybe ModBreaks -- breakpoint info (Nothing if we're not
-- creating breakpoints, for some reason)
}
@@ -69,7 +70,7 @@ seqCompiledByteCode CompiledByteCode{..} =
rnf bc_bcos `seq`
seqEltsNameEnv rnf bc_itbls `seq`
rnf bc_ffis `seq`
- rnf bc_strs `seq`
+ seqEltsNameEnv rnf bc_strs `seq`
rnf (fmap seqModBreaks bc_breaks)
newtype ByteOff = ByteOff Int
@@ -131,11 +132,14 @@ voidPrimCallInfo :: NativeCallInfo
voidPrimCallInfo = NativeCallInfo NativePrimCall 0 emptyRegSet 0
type ItblEnv = NameEnv (Name, ItblPtr)
+type AddrEnv = NameEnv (Name, AddrPtr)
-- We need the Name in the range so we know which
-- elements to filter out when unloading a module
newtype ItblPtr = ItblPtr (RemotePtr Heap.StgInfoTable)
deriving (Show, NFData)
+newtype AddrPtr = AddrPtr (RemotePtr ())
+ deriving (NFData)
data UnlinkedBCO
= UnlinkedBCO {
@@ -166,6 +170,12 @@ data BCONPtr
= BCONPtrWord {-# UNPACK #-} !Word
| BCONPtrLbl !FastString
| BCONPtrItbl !Name
+ -- | A reference to a top-level string literal; see
+ -- Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode.
+ | BCONPtrAddr !Name
+ -- | Only used internally in the assembler in an intermediate representation;
+ -- should never appear in a fully-assembled UnlinkedBCO.
+ -- Also see Note [Allocating string literals] in GHC.ByteCode.Asm.
| BCONPtrStr !ByteString
instance NFData BCONPtr where
diff --git a/compiler/GHC/Linker/Loader.hs b/compiler/GHC/Linker/Loader.hs
index 3c9baf45cf..1b3a283d92 100644
--- a/compiler/GHC/Linker/Loader.hs
+++ b/compiler/GHC/Linker/Loader.hs
@@ -141,8 +141,11 @@ getLoaderState interp = readMVar (loader_state (interpLoader interp))
emptyLoaderState :: LoaderState
emptyLoaderState = LoaderState
- { closure_env = emptyNameEnv
- , itbl_env = emptyNameEnv
+ { linker_env = LinkerEnv
+ { closure_env = emptyNameEnv
+ , itbl_env = emptyNameEnv
+ , addr_env = emptyNameEnv
+ }
, pkgs_loaded = init_pkgs
, bcos_loaded = emptyModuleEnv
, objs_loaded = emptyModuleEnv
@@ -157,17 +160,16 @@ emptyLoaderState = LoaderState
extendLoadedEnv :: Interp -> [(Name,ForeignHValue)] -> IO ()
extendLoadedEnv interp new_bindings =
- modifyLoaderState_ interp $ \pls@LoaderState{..} -> do
- let new_ce = extendClosureEnv closure_env new_bindings
- return $! pls{ closure_env = new_ce }
+ modifyLoaderState_ interp $ \pls -> do
+ return $! modifyClosureEnv pls $ \ce ->
+ extendClosureEnv ce new_bindings
-- strictness is important for not retaining old copies of the pls
deleteFromLoadedEnv :: Interp -> [Name] -> IO ()
deleteFromLoadedEnv interp to_remove =
modifyLoaderState_ interp $ \pls -> do
- let ce = closure_env pls
- let new_ce = delListFromNameEnv ce to_remove
- return pls{ closure_env = new_ce }
+ return $ modifyClosureEnv pls $ \ce ->
+ delListFromNameEnv ce to_remove
-- | Load the module containing the given Name and get its associated 'HValue'.
--
@@ -185,7 +187,7 @@ loadName interp hsc_env name = do
then throwGhcExceptionIO (ProgramError "")
else return (pls', links, pkgs)
- case lookupNameEnv (closure_env pls) name of
+ case lookupNameEnv (closure_env (linker_env pls)) name of
Just (_,aa) -> return (pls,(aa, links, pkgs))
Nothing -> assertPpr (isExternalName name) (ppr name) $
do let sym_to_find = nameToCLabel name "closure"
@@ -247,10 +249,7 @@ withExtendedLoadedEnv interp new_env action
-- package), so the reset action only removes the names we
-- added earlier.
reset_old_env = liftIO $
- modifyLoaderState_ interp $ \pls ->
- let cur = closure_env pls
- new = delListFromNameEnv cur (map fst new_env)
- in return pls{ closure_env = new }
+ deleteFromLoadedEnv interp (map fst new_env)
-- | Display the loader state.
@@ -594,13 +593,11 @@ loadExpr interp hsc_env span root_ul_bco = do
then throwGhcExceptionIO (ProgramError "")
else do
-- Load the expression itself
- let ie = itbl_env pls
- ce = closure_env pls
-
-- Load the necessary packages and linkables
- let nobreakarray = error "no break array"
+ let le = linker_env pls
+ nobreakarray = error "no break array"
bco_ix = mkNameEnv [(unlinkedBCOName root_ul_bco, 0)]
- resolved <- linkBCO interp ie ce bco_ix nobreakarray root_ul_bco
+ resolved <- linkBCO interp le bco_ix nobreakarray root_ul_bco
bco_opts <- initBCOOpts (hsc_dflags hsc_env)
[root_hvref] <- createBCOs interp bco_opts [resolved]
fhv <- mkFinalizedHValue interp root_hvref
@@ -944,15 +941,16 @@ loadDecls interp hsc_env span cbc@CompiledByteCode{..} = do
then throwGhcExceptionIO (ProgramError "")
else do
-- Link the expression itself
- let ie = plusNameEnv (itbl_env pls) bc_itbls
- ce = closure_env pls
+ let le = linker_env pls
+ le2 = le { itbl_env = plusNameEnv (itbl_env le) bc_itbls
+ , addr_env = plusNameEnv (addr_env le) bc_strs }
-- Link the necessary packages and linkables
bco_opts <- initBCOOpts (hsc_dflags hsc_env)
- new_bindings <- linkSomeBCOs bco_opts interp ie ce [cbc]
+ new_bindings <- linkSomeBCOs bco_opts interp le2 [cbc]
nms_fhvs <- makeForeignNamedHValueRefs interp new_bindings
- let pls2 = pls { closure_env = extendClosureEnv ce nms_fhvs
- , itbl_env = ie }
+ let ce2 = extendClosureEnv (closure_env le2) nms_fhvs
+ !pls2 = pls { linker_env = le2 { closure_env = ce2 } }
return (pls2, (nms_fhvs, links_needed, units_needed))
where
free_names = uniqDSetToList $
@@ -1170,11 +1168,12 @@ dynLinkBCOs bco_opts interp pls bcos = do
cbcs = concatMap byteCodeOfObject unlinkeds
- ies = map bc_itbls cbcs
- gce = closure_env pls
- final_ie = foldr plusNameEnv (itbl_env pls) ies
+ le1 = linker_env pls
+ ie2 = foldr plusNameEnv (itbl_env le1) (map bc_itbls cbcs)
+ ae2 = foldr plusNameEnv (addr_env le1) (map bc_strs cbcs)
+ le2 = le1 { itbl_env = ie2, addr_env = ae2 }
- names_and_refs <- linkSomeBCOs bco_opts interp final_ie gce cbcs
+ names_and_refs <- linkSomeBCOs bco_opts interp le2 cbcs
-- We only want to add the external ones to the ClosureEnv
let (to_add, to_drop) = partition (isExternalName.fst) names_and_refs
@@ -1184,21 +1183,20 @@ dynLinkBCOs bco_opts interp pls bcos = do
-- Wrap finalizers on the ones we want to keep
new_binds <- makeForeignNamedHValueRefs interp to_add
- return pls1 { closure_env = extendClosureEnv gce new_binds,
- itbl_env = final_ie }
+ let ce2 = extendClosureEnv (closure_env le2) new_binds
+ return $! pls1 { linker_env = le2 { closure_env = ce2 } }
-- Link a bunch of BCOs and return references to their values
linkSomeBCOs :: BCOOpts
-> Interp
- -> ItblEnv
- -> ClosureEnv
+ -> LinkerEnv
-> [CompiledByteCode]
-> IO [(Name,HValueRef)]
-- The returned HValueRefs are associated 1-1 with
-- the incoming unlinked BCOs. Each gives the
-- value of the corresponding unlinked BCO
-linkSomeBCOs bco_opts interp ie ce mods = foldr fun do_link mods []
+linkSomeBCOs bco_opts interp le mods = foldr fun do_link mods []
where
fun CompiledByteCode{..} inner accum =
case bc_breaks of
@@ -1211,7 +1209,7 @@ linkSomeBCOs bco_opts interp ie ce mods = foldr fun do_link mods []
let flat = [ (breakarray, bco) | (breakarray, bcos) <- mods, bco <- bcos ]
names = map (unlinkedBCOName . snd) flat
bco_ix = mkNameEnv (zip names [0..])
- resolved <- sequence [ linkBCO interp ie ce bco_ix breakarray bco
+ resolved <- sequence [ linkBCO interp le bco_ix breakarray bco
| (breakarray, bco) <- flat ]
hvrefs <- createBCOs interp bco_opts resolved
return (zip names hvrefs)
@@ -1301,15 +1299,11 @@ unload_wkr interp keep_linkables pls@LoaderState{..} = do
let -- Note that we want to remove all *local*
-- (i.e. non-isExternal) names too (these are the
-- temporary bindings from the command line).
- keep_name :: (Name, a) -> Bool
- keep_name (n,_) = isExternalName n &&
- nameModule n `elemModuleEnv` remaining_bcos_loaded
-
- itbl_env' = filterNameEnv keep_name itbl_env
- closure_env' = filterNameEnv keep_name closure_env
+ keep_name :: Name -> Bool
+ keep_name n = isExternalName n &&
+ nameModule n `elemModuleEnv` remaining_bcos_loaded
- !new_pls = pls { itbl_env = itbl_env',
- closure_env = closure_env',
+ !new_pls = pls { linker_env = filterLinkerEnv keep_name linker_env,
bcos_loaded = remaining_bcos_loaded,
objs_loaded = remaining_objs_loaded }
diff --git a/compiler/GHC/Linker/Types.hs b/compiler/GHC/Linker/Types.hs
index c26cdb0dad..c343537b08 100644
--- a/compiler/GHC/Linker/Types.hs
+++ b/compiler/GHC/Linker/Types.hs
@@ -10,6 +10,12 @@ module GHC.Linker.Types
( Loader (..)
, LoaderState (..)
, uninitializedLoader
+ , modifyClosureEnv
+ , LinkerEnv(..)
+ , filterLinkerEnv
+ , ClosureEnv
+ , emptyClosureEnv
+ , extendClosureEnv
, Linkable(..)
, LinkableSet
, mkLinkableSet
@@ -32,12 +38,12 @@ where
import GHC.Prelude
import GHC.Unit ( UnitId, Module )
-import GHC.ByteCode.Types ( ItblEnv, CompiledByteCode )
+import GHC.ByteCode.Types ( ItblEnv, AddrEnv, CompiledByteCode )
import GHC.Fingerprint.Type ( Fingerprint )
import GHCi.RemoteTypes ( ForeignHValue )
import GHC.Types.Var ( Id )
-import GHC.Types.Name.Env ( NameEnv )
+import GHC.Types.Name.Env ( NameEnv, emptyNameEnv, extendNameEnvList, filterNameEnv )
import GHC.Types.Name ( Name )
import GHC.Utils.Outputable
@@ -67,23 +73,16 @@ serves to ensure mutual exclusion between multiple loaded copies of the GHC
library. The Maybe may be Nothing to indicate that the linker has not yet been
initialised.
-The LoaderState maps Names to actual closures (for interpreted code only), for
+The LinkerEnv maps Names to actual closures (for interpreted code only), for
use during linking.
-}
newtype Loader = Loader { loader_state :: MVar (Maybe LoaderState) }
data LoaderState = LoaderState
- { closure_env :: ClosureEnv
+ { linker_env :: !LinkerEnv
-- ^ Current global mapping from Names to their true values
- , itbl_env :: !ItblEnv
- -- ^ The current global mapping from RdrNames of DataCons to
- -- info table addresses.
- -- When a new Unlinked is linked into the running image, or an existing
- -- module in the image is replaced, the itbl_env must be updated
- -- appropriately.
-
, bcos_loaded :: !LinkableSet
-- ^ The currently loaded interpreted modules (home package)
@@ -102,7 +101,44 @@ data LoaderState = LoaderState
uninitializedLoader :: IO Loader
uninitializedLoader = Loader <$> newMVar Nothing
+modifyClosureEnv :: LoaderState -> (ClosureEnv -> ClosureEnv) -> LoaderState
+modifyClosureEnv pls f =
+ let le = linker_env pls
+ ce = closure_env le
+ in pls { linker_env = le { closure_env = f ce } }
+
+data LinkerEnv = LinkerEnv
+ { closure_env :: !ClosureEnv
+ -- ^ Current global mapping from closure Names to their true values
+
+ , itbl_env :: !ItblEnv
+ -- ^ The current global mapping from RdrNames of DataCons to
+ -- info table addresses.
+ -- When a new Unlinked is linked into the running image, or an existing
+ -- module in the image is replaced, the itbl_env must be updated
+ -- appropriately.
+
+ , addr_env :: !AddrEnv
+ -- ^ Like 'closure_env' and 'itbl_env', but for top-level 'Addr#' literals,
+ -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode.
+ }
+
+filterLinkerEnv :: (Name -> Bool) -> LinkerEnv -> LinkerEnv
+filterLinkerEnv f le = LinkerEnv
+ { closure_env = filterNameEnv (f . fst) (closure_env le)
+ , itbl_env = filterNameEnv (f . fst) (itbl_env le)
+ , addr_env = filterNameEnv (f . fst) (addr_env le)
+ }
+
type ClosureEnv = NameEnv (Name, ForeignHValue)
+
+emptyClosureEnv :: ClosureEnv
+emptyClosureEnv = emptyNameEnv
+
+extendClosureEnv :: ClosureEnv -> [(Name,ForeignHValue)] -> ClosureEnv
+extendClosureEnv cl_env pairs
+ = extendNameEnvList cl_env [ (n, (n,v)) | (n,v) <- pairs]
+
type PkgsLoaded = UniqDFM UnitId LoadedPkgInfo
data LoadedPkgInfo
diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs
index c557bc554f..8c54a04d4f 100644
--- a/compiler/GHC/StgToByteCode.hs
+++ b/compiler/GHC/StgToByteCode.hs
@@ -43,6 +43,7 @@ import GHC.Types.Literal
import GHC.Builtin.PrimOps
import GHC.Builtin.PrimOps.Ids (primOpId)
import GHC.Core.Type
+import GHC.Core.TyCo.Compare (eqType)
import GHC.Types.RepType
import GHC.Core.DataCon
import GHC.Core.TyCon
@@ -64,7 +65,7 @@ import GHC.Runtime.Heap.Layout hiding (WordOff, ByteOff, wordsToBytes)
import GHC.Data.Bitmap
import GHC.Data.OrdList
import GHC.Data.Maybe
-import GHC.Types.Var.Env
+import GHC.Types.Name.Env (mkNameEnv)
import GHC.Types.Tickish
import Data.List ( genericReplicate, genericLength, intersperse
@@ -105,7 +106,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
(text "GHC.StgToByteCode"<+>brackets (ppr this_mod))
(const ()) $ do
-- Split top-level binds into strings and others.
- -- See Note [generating code for top-level string literal bindings].
+ -- See Note [Generating code for top-level string literal bindings].
let (strings, lifted_binds) = partitionEithers $ do -- list monad
bnd <- binds
case bnd of
@@ -116,7 +117,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
stringPtrs <- allocateTopStrings interp strings
(BcM_State{..}, proto_bcos) <-
- runBc hsc_env this_mod mb_modBreaks (mkVarEnv stringPtrs) $ do
+ runBc hsc_env this_mod mb_modBreaks $ do
let flattened_binds = concatMap flattenBind (reverse lifted_binds)
mapM schemeTopBind flattened_binds
@@ -127,7 +128,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
"Proto-BCOs" FormatByteCode
(vcat (intersperse (char ' ') (map ppr proto_bcos)))
- cbc <- assembleBCOs interp profile proto_bcos tycs (map snd stringPtrs)
+ cbc <- assembleBCOs interp profile proto_bcos tycs stringPtrs
(case modBreaks of
Nothing -> Nothing
Just mb -> Just mb{ modBreaks_breakInfo = breakInfo })
@@ -147,28 +148,49 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
interp = hscInterp hsc_env
profile = targetProfile dflags
+-- | see Note [Generating code for top-level string literal bindings]
allocateTopStrings
:: Interp
-> [(Id, ByteString)]
- -> IO [(Var, RemotePtr ())]
+ -> IO AddrEnv
allocateTopStrings interp topStrings = do
let !(bndrs, strings) = unzip topStrings
ptrs <- interpCmd interp $ MallocStrings strings
- return $ zip bndrs ptrs
-
-{-
-Note [generating code for top-level string literal bindings]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Here is a summary on how the byte code generator deals with top-level string
-literals:
-
-1. Top-level string literal bindings are separated from the rest of the module.
-
-2. The strings are allocated via interpCmd, in allocateTopStrings
-
-3. The mapping from binders to allocated strings (topStrings) are maintained in
- BcM and used when generating code for variable references.
--}
+ return $ mkNameEnv (zipWith mk_entry bndrs ptrs)
+ where
+ mk_entry bndr ptr = let nm = getName bndr
+ in (nm, (nm, AddrPtr ptr))
+
+{- Note [Generating code for top-level string literal bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As described in Note [Compilation plan for top-level string literals]
+in GHC.Core, the core-to-core optimizer can introduce top-level Addr#
+bindings to represent string literals. The creates two challenges for
+the bytecode compiler: (1) compiling the bindings themselves, and
+(2) compiling references to such bindings. Here is a summary on how
+we deal with them:
+
+ 1. Top-level string literal bindings are separated from the rest of
+ the module. Memory for them is allocated immediately, via
+ interpCmd, in allocateTopStrings, and the resulting AddrEnv is
+ recorded in the bc_strs field of the CompiledByteCode result.
+
+ 2. When we encounter a reference to a top-level string literal, we
+ generate a PUSH_ADDR pseudo-instruction, which is assembled to
+ a PUSH_UBX instruction with a BCONPtrAddr argument.
+
+ 3. The loader accumulates string literal bindings from loaded
+ bytecode in the addr_env field of the LinkerEnv.
+
+ 4. The BCO linker resolves BCONPtrAddr references by searching both
+ the addr_env (to find literals defined in bytecode) and the native
+ symbol table (to find literals defined in native code).
+
+This strategy works alright, but it does have one significant problem:
+we never free the memory that we allocate for the top-level strings.
+In theory, we could explicitly free it when BCOs are unloaded, but
+this comes with its own complications; see #22400 for why. For now,
+we just accept the leak, but it would nice to find something better. -}
-- -----------------------------------------------------------------------------
-- Compilation schema for the bytecode generator
@@ -1774,26 +1796,25 @@ pushAtom d p (StgVarArg var)
-- slots on to the top of the stack.
| otherwise -- var must be a global variable
- = do topStrings <- getTopStrings
- platform <- targetPlatform <$> getDynFlags
- case lookupVarEnv topStrings var of
- Just ptr -> pushAtom d p $ StgLitArg $ mkLitWord platform $
- fromIntegral $ ptrToWordPtr $ fromRemotePtr ptr
- Nothing
- -- PUSH_G doesn't tag constructors. So we use PACK here
- -- if we are dealing with nullary constructor.
- | Just con <- isDataConWorkId_maybe var
- -> do
- massert (sz == wordSize platform)
- massert (isNullaryRepDataCon con)
- return (unitOL (PACK con 0), sz)
- | otherwise
- -> do
- let
- massert (sz == wordSize platform)
- return (unitOL (PUSH_G (getName var)), sz)
- where
- !sz = idSizeCon platform var
+ = do platform <- targetPlatform <$> getDynFlags
+ let !szb = idSizeCon platform var
+ massert (szb == wordSize platform)
+
+ -- PUSH_G doesn't tag constructors. So we use PACK here
+ -- if we are dealing with nullary constructor.
+ case isDataConWorkId_maybe var of
+ Just con -> do
+ massert (isNullaryRepDataCon con)
+ return (unitOL (PACK con 0), szb)
+
+ Nothing
+ -- see Note [Generating code for top-level string literal bindings]
+ | isUnliftedType (idType var) -> do
+ massert (idType var `eqType` addrPrimTy)
+ return (unitOL (PUSH_ADDR (getName var)), szb)
+
+ | otherwise -> do
+ return (unitOL (PUSH_G (getName var)), szb)
pushAtom _ _ (StgLitArg lit) = pushLiteral True lit
@@ -2162,8 +2183,6 @@ data BcM_State
-- Should be free()d when it is GCd
, modBreaks :: Maybe ModBreaks -- info about breakpoints
, breakInfo :: IntMap CgBreakInfo
- , topStrings :: IdEnv (RemotePtr ()) -- top-level string literals
- -- See Note [generating code for top-level string literal bindings].
}
newtype BcM r = BcM (BcM_State -> IO (BcM_State, r)) deriving (Functor)
@@ -2174,11 +2193,10 @@ ioToBc io = BcM $ \st -> do
return (st, x)
runBc :: HscEnv -> Module -> Maybe ModBreaks
- -> IdEnv (RemotePtr ())
-> BcM r
-> IO (BcM_State, r)
-runBc hsc_env this_mod modBreaks topStrings (BcM m)
- = m (BcM_State hsc_env this_mod 0 [] modBreaks IntMap.empty topStrings)
+runBc hsc_env this_mod modBreaks (BcM m)
+ = m (BcM_State hsc_env this_mod 0 [] modBreaks IntMap.empty)
thenBc :: BcM a -> (a -> BcM b) -> BcM b
thenBc (BcM expr) cont = BcM $ \st0 -> do
@@ -2247,8 +2265,5 @@ newBreakInfo ix info = BcM $ \st ->
getCurrentModule :: BcM Module
getCurrentModule = BcM $ \st -> return (st, thisModule st)
-getTopStrings :: BcM (IdEnv (RemotePtr ()))
-getTopStrings = BcM $ \st -> return (st, topStrings st)
-
tickFS :: FastString
tickFS = fsLit "ticked"
diff --git a/testsuite/tests/bytecode/T22376/A.hs b/testsuite/tests/bytecode/T22376/A.hs
new file mode 100644
index 0000000000..670c3fd6a4
--- /dev/null
+++ b/testsuite/tests/bytecode/T22376/A.hs
@@ -0,0 +1,6 @@
+module A where
+import B
+
+foo :: String
+foo = f "bc"
+{-# NOINLINE foo #-}
diff --git a/testsuite/tests/bytecode/T22376/B.hs b/testsuite/tests/bytecode/T22376/B.hs
new file mode 100644
index 0000000000..8bfb7bfd32
--- /dev/null
+++ b/testsuite/tests/bytecode/T22376/B.hs
@@ -0,0 +1,4 @@
+module B where
+
+f :: String -> String
+f = ("a" ++)
diff --git a/testsuite/tests/bytecode/T22376/T22376.hs b/testsuite/tests/bytecode/T22376/T22376.hs
new file mode 100644
index 0000000000..b97640a00b
--- /dev/null
+++ b/testsuite/tests/bytecode/T22376/T22376.hs
@@ -0,0 +1,6 @@
+{-# LANGUAGE TemplateHaskell #-}
+import Language.Haskell.TH.Syntax (lift)
+import A
+
+main :: IO ()
+main = putStrLn $(lift foo)
diff --git a/testsuite/tests/bytecode/T22376/T22376.stdout b/testsuite/tests/bytecode/T22376/T22376.stdout
new file mode 100644
index 0000000000..8baef1b4ab
--- /dev/null
+++ b/testsuite/tests/bytecode/T22376/T22376.stdout
@@ -0,0 +1 @@
+abc
diff --git a/testsuite/tests/bytecode/T22376/all.T b/testsuite/tests/bytecode/T22376/all.T
new file mode 100644
index 0000000000..0b15e93e6d
--- /dev/null
+++ b/testsuite/tests/bytecode/T22376/all.T
@@ -0,0 +1,2 @@
+test('T22376', [req_interp, extra_files(['A.hs', 'B.hs'])], multimod_compile_and_run,
+ ['T22376', '-O1 -fwrite-if-simplified-core -fbyte-code-and-object-code -fprefer-byte-code'])