diff options
author | Alexis King <lexi.lambda@gmail.com> | 2023-01-18 17:00:54 -0600 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2023-02-20 15:27:17 -0500 |
commit | 26243de1e3716886161d79918af9359f7639314b (patch) | |
tree | c106f6bf05cbd460d23a7bdc00ba7f42636cf7a2 | |
parent | 2592ab6924ee34ed0f0d82a7cb0aed393d93bb14 (diff) | |
download | haskell-26243de1e3716886161d79918af9359f7639314b.tar.gz |
Handle top-level Addr# literals in the bytecode compiler
Fixes #22376.
-rw-r--r-- | compiler/GHC/ByteCode/Asm.hs | 39 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/Instr.hs | 6 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/Linker.hs | 61 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/Types.hs | 14 | ||||
-rw-r--r-- | compiler/GHC/Linker/Loader.hs | 76 | ||||
-rw-r--r-- | compiler/GHC/Linker/Types.hs | 58 | ||||
-rw-r--r-- | compiler/GHC/StgToByteCode.hs | 111 | ||||
-rw-r--r-- | testsuite/tests/bytecode/T22376/A.hs | 6 | ||||
-rw-r--r-- | testsuite/tests/bytecode/T22376/B.hs | 4 | ||||
-rw-r--r-- | testsuite/tests/bytecode/T22376/T22376.hs | 6 | ||||
-rw-r--r-- | testsuite/tests/bytecode/T22376/T22376.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/bytecode/T22376/all.T | 2 |
12 files changed, 241 insertions, 143 deletions
diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs index 391949d448..f020f0af0a 100644 --- a/compiler/GHC/ByteCode/Asm.hs +++ b/compiler/GHC/ByteCode/Asm.hs @@ -97,7 +97,7 @@ assembleBCOs -> Profile -> [ProtoBCO Name] -> [TyCon] - -> [RemotePtr ()] + -> AddrEnv -> Maybe ModBreaks -> IO CompiledByteCode assembleBCOs interp profile proto_bcos tycons top_strs modbreaks = do @@ -105,27 +105,40 @@ assembleBCOs interp profile proto_bcos tycons top_strs modbreaks = do -- fixed for an interpreter itblenv <- mkITbls interp profile tycons bcos <- mapM (assembleBCO (profilePlatform profile)) proto_bcos - (bcos',ptrs) <- mallocStrings interp bcos + bcos' <- mallocStrings interp bcos return CompiledByteCode { bc_bcos = bcos' , bc_itbls = itblenv , bc_ffis = concatMap protoBCOFFIs proto_bcos - , bc_strs = top_strs ++ ptrs + , bc_strs = top_strs , bc_breaks = modbreaks } --- Find all the literal strings and malloc them together. We want to --- do this because: +-- Note [Allocating string literals] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Our strategy for handling top-level string literal bindings is described in +-- Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode, +-- but not all Addr# literals in a program are guaranteed to be lifted to the +-- top level. Our strategy for handling local Addr# literals is somewhat simpler: +-- after assembling, we find all the BCONPtrStr arguments in the program, malloc +-- memory for them, and bake the resulting addresses into the instruction stream +-- in the form of BCONPtrWord arguments. -- --- a) It should be done when we compile the module, not each time we relink it --- b) For -fexternal-interpreter It's more efficient to malloc the strings --- as a single batch message, especially when compiling in parallel. +-- Since we do this when assembling, we only allocate the memory when we compile +-- the module, not each time we relink it. However, we do want to take care to +-- malloc the memory all in one go, since that is more efficient with +-- -fexternal-interpreter, especially when compiling in parallel. -- -mallocStrings :: Interp -> [UnlinkedBCO] -> IO ([UnlinkedBCO], [RemotePtr ()]) +-- Note that, as with top-level string literal bindings, this memory is never +-- freed, so it just leaks if the BCO is unloaded. See Note [Generating code for +-- top-level string literal bindings] in GHC.StgToByteCode for some discussion +-- about why. +-- +mallocStrings :: Interp -> [UnlinkedBCO] -> IO [UnlinkedBCO] mallocStrings interp ulbcos = do let bytestrings = reverse (execState (mapM_ collect ulbcos) []) ptrs <- interpCmd interp (MallocStrings bytestrings) - return (evalState (mapM splice ulbcos) ptrs, ptrs) + return (evalState (mapM splice ulbcos) ptrs) where splice bco@UnlinkedBCO{..} = do lits <- mapM spliceLit unlinkedBCOLits @@ -162,7 +175,7 @@ assembleOneBCO interp profile pbco = do -- TODO: the profile should be bundled with the interpreter: the rts ways are -- fixed for an interpreter ubco <- assembleBCO (profilePlatform profile) pbco - ([ubco'], _ptrs) <- mallocStrings interp [ubco] + [ubco'] <- mallocStrings interp [ubco] return ubco' assembleBCO :: Platform -> ProtoBCO Name -> IO UnlinkedBCO @@ -411,6 +424,10 @@ assembleI platform i = case i of PUSH_UBX lit nws -> do np <- literal lit emit bci_PUSH_UBX [Op np, SmallOp nws] + -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode + PUSH_ADDR nm -> do np <- lit [BCONPtrAddr nm] + emit bci_PUSH_UBX [Op np, SmallOp 1] + PUSH_APPLY_N -> emit bci_PUSH_APPLY_N [] PUSH_APPLY_V -> emit bci_PUSH_APPLY_V [] PUSH_APPLY_F -> emit bci_PUSH_APPLY_F [] diff --git a/compiler/GHC/ByteCode/Instr.hs b/compiler/GHC/ByteCode/Instr.hs index 34baa57d40..4f9fd75fc4 100644 --- a/compiler/GHC/ByteCode/Instr.hs +++ b/compiler/GHC/ByteCode/Instr.hs @@ -112,6 +112,10 @@ data BCInstr -- type, and it appears impossible to get hold of the bits of -- an addr, even though we need to assemble BCOs. + -- Push a top-level Addr#. This is a pseudo-instruction assembled to PUSH_UBX, + -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode. + | PUSH_ADDR Name + -- various kinds of application | PUSH_APPLY_N | PUSH_APPLY_V @@ -284,6 +288,7 @@ instance Outputable BCInstr where ppr (PUSH_UBX16 lit) = text "PUSH_UBX16" <+> ppr lit ppr (PUSH_UBX32 lit) = text "PUSH_UBX32" <+> ppr lit ppr (PUSH_UBX lit nw) = text "PUSH_UBX" <+> parens (ppr nw) <+> ppr lit + ppr (PUSH_ADDR nm) = text "PUSH_ADDR" <+> ppr nm ppr PUSH_APPLY_N = text "PUSH_APPLY_N" ppr PUSH_APPLY_V = text "PUSH_APPLY_V" ppr PUSH_APPLY_F = text "PUSH_APPLY_F" @@ -397,6 +402,7 @@ bciStackUse (PUSH_UBX8 _) = 1 -- overapproximation bciStackUse (PUSH_UBX16 _) = 1 -- overapproximation bciStackUse (PUSH_UBX32 _) = 1 -- overapproximation on 64bit arch bciStackUse (PUSH_UBX _ nw) = fromIntegral nw +bciStackUse PUSH_ADDR{} = 1 bciStackUse PUSH_APPLY_N{} = 1 bciStackUse PUSH_APPLY_V{} = 1 bciStackUse PUSH_APPLY_F{} = 1 diff --git a/compiler/GHC/ByteCode/Linker.hs b/compiler/GHC/ByteCode/Linker.hs index c3af3d4e85..8a7a24ae1a 100644 --- a/compiler/GHC/ByteCode/Linker.hs +++ b/compiler/GHC/ByteCode/Linker.hs @@ -8,10 +8,7 @@ -- | Bytecode assembler and linker module GHC.ByteCode.Linker - ( ClosureEnv - , emptyClosureEnv - , extendClosureEnv - , linkBCO + ( linkBCO , lookupStaticPtr , lookupIE , nameToCLabel @@ -35,6 +32,8 @@ import GHC.Unit.Types import GHC.Data.FastString import GHC.Data.SizedSeq +import GHC.Linker.Types + import GHC.Utils.Panic import GHC.Utils.Panic.Plain import GHC.Utils.Outputable @@ -53,45 +52,34 @@ import GHC.Exts Linking interpretables into something we can run -} -type ClosureEnv = NameEnv (Name, ForeignHValue) - -emptyClosureEnv :: ClosureEnv -emptyClosureEnv = emptyNameEnv - -extendClosureEnv :: ClosureEnv -> [(Name,ForeignHValue)] -> ClosureEnv -extendClosureEnv cl_env pairs - = extendNameEnvList cl_env [ (n, (n,v)) | (n,v) <- pairs] - -{- - Linking interpretables into something we can run --} - linkBCO :: Interp - -> ItblEnv - -> ClosureEnv + -> LinkerEnv -> NameEnv Int -> RemoteRef BreakArray -> UnlinkedBCO -> IO ResolvedBCO -linkBCO interp ie ce bco_ix breakarray +linkBCO interp le bco_ix breakarray (UnlinkedBCO _ arity insns bitmap lits0 ptrs0) = do -- fromIntegral Word -> Word64 should be a no op if Word is Word64 -- otherwise it will result in a cast to longlong on 32bit systems. - lits <- mapM (fmap fromIntegral . lookupLiteral interp ie) (ssElts lits0) - ptrs <- mapM (resolvePtr interp ie ce bco_ix breakarray) (ssElts ptrs0) + lits <- mapM (fmap fromIntegral . lookupLiteral interp le) (ssElts lits0) + ptrs <- mapM (resolvePtr interp le bco_ix breakarray) (ssElts ptrs0) return (ResolvedBCO isLittleEndian arity insns bitmap (listArray (0, fromIntegral (sizeSS lits0)-1) lits) (addListToSS emptySS ptrs)) -lookupLiteral :: Interp -> ItblEnv -> BCONPtr -> IO Word -lookupLiteral interp ie ptr = case ptr of +lookupLiteral :: Interp -> LinkerEnv -> BCONPtr -> IO Word +lookupLiteral interp le ptr = case ptr of BCONPtrWord lit -> return lit BCONPtrLbl sym -> do Ptr a# <- lookupStaticPtr interp sym return (W# (int2Word# (addr2Int# a#))) BCONPtrItbl nm -> do - Ptr a# <- lookupIE interp ie nm + Ptr a# <- lookupIE interp (itbl_env le) nm + return (W# (int2Word# (addr2Int# a#))) + BCONPtrAddr nm -> do + Ptr a# <- lookupAddr interp (addr_env le) nm return (W# (int2Word# (addr2Int# a#))) BCONPtrStr _ -> -- should be eliminated during assembleBCOs @@ -124,6 +112,20 @@ lookupIE interp ie con_nm = (unpackFS sym_to_find1 ++ " or " ++ unpackFS sym_to_find2) +-- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode +lookupAddr :: Interp -> AddrEnv -> Name -> IO (Ptr ()) +lookupAddr interp ae addr_nm = do + case lookupNameEnv ae addr_nm of + Just (_, AddrPtr ptr) -> return (fromRemotePtr ptr) + Nothing -> do -- try looking up in the object files. + let sym_to_find = nameToCLabel addr_nm "bytes" + -- see Note [Bytes label] in GHC.Cmm.CLabel + m <- lookupSymbol interp sym_to_find + case m of + Just ptr -> return ptr + Nothing -> linkFail "GHC.ByteCode.Linker.lookupAddr" + (unpackFS sym_to_find) + lookupPrimOp :: Interp -> PrimOp -> IO (RemotePtr ()) lookupPrimOp interp primop = do let sym_to_find = primopToCLabel primop "closure" @@ -134,18 +136,17 @@ lookupPrimOp interp primop = do resolvePtr :: Interp - -> ItblEnv - -> ClosureEnv + -> LinkerEnv -> NameEnv Int -> RemoteRef BreakArray -> BCOPtr -> IO ResolvedBCOPtr -resolvePtr interp ie ce bco_ix breakarray ptr = case ptr of +resolvePtr interp le bco_ix breakarray ptr = case ptr of BCOPtrName nm | Just ix <- lookupNameEnv bco_ix nm -> return (ResolvedBCORef ix) -- ref to another BCO in this group - | Just (_, rhv) <- lookupNameEnv ce nm + | Just (_, rhv) <- lookupNameEnv (closure_env le) nm -> return (ResolvedBCOPtr (unsafeForeignRefToRemoteRef rhv)) | otherwise @@ -161,7 +162,7 @@ resolvePtr interp ie ce bco_ix breakarray ptr = case ptr of -> ResolvedBCOStaticPtr <$> lookupPrimOp interp op BCOPtrBCO bco - -> ResolvedBCOPtrBCO <$> linkBCO interp ie ce bco_ix breakarray bco + -> ResolvedBCOPtrBCO <$> linkBCO interp le bco_ix breakarray bco BCOPtrBreakArray -> return (ResolvedBCOPtrBreakArray breakarray) diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs index a100e72085..e16fcf7f5a 100644 --- a/compiler/GHC/ByteCode/Types.hs +++ b/compiler/GHC/ByteCode/Types.hs @@ -14,6 +14,7 @@ module GHC.ByteCode.Types , ByteOff(..), WordOff(..) , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..) , ItblEnv, ItblPtr(..) + , AddrEnv, AddrPtr(..) , CgBreakInfo(..) , ModBreaks (..), BreakIndex, emptyModBreaks , CCostCentre @@ -51,7 +52,7 @@ data CompiledByteCode = CompiledByteCode { bc_bcos :: [UnlinkedBCO] -- Bunch of interpretable bindings , bc_itbls :: ItblEnv -- A mapping from DataCons to their itbls , bc_ffis :: [FFIInfo] -- ffi blocks we allocated - , bc_strs :: [RemotePtr ()] -- malloc'd strings + , bc_strs :: AddrEnv -- malloc'd top-level strings , bc_breaks :: Maybe ModBreaks -- breakpoint info (Nothing if we're not -- creating breakpoints, for some reason) } @@ -69,7 +70,7 @@ seqCompiledByteCode CompiledByteCode{..} = rnf bc_bcos `seq` seqEltsNameEnv rnf bc_itbls `seq` rnf bc_ffis `seq` - rnf bc_strs `seq` + seqEltsNameEnv rnf bc_strs `seq` rnf (fmap seqModBreaks bc_breaks) newtype ByteOff = ByteOff Int @@ -131,11 +132,14 @@ voidPrimCallInfo :: NativeCallInfo voidPrimCallInfo = NativeCallInfo NativePrimCall 0 emptyRegSet 0 type ItblEnv = NameEnv (Name, ItblPtr) +type AddrEnv = NameEnv (Name, AddrPtr) -- We need the Name in the range so we know which -- elements to filter out when unloading a module newtype ItblPtr = ItblPtr (RemotePtr Heap.StgInfoTable) deriving (Show, NFData) +newtype AddrPtr = AddrPtr (RemotePtr ()) + deriving (NFData) data UnlinkedBCO = UnlinkedBCO { @@ -166,6 +170,12 @@ data BCONPtr = BCONPtrWord {-# UNPACK #-} !Word | BCONPtrLbl !FastString | BCONPtrItbl !Name + -- | A reference to a top-level string literal; see + -- Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode. + | BCONPtrAddr !Name + -- | Only used internally in the assembler in an intermediate representation; + -- should never appear in a fully-assembled UnlinkedBCO. + -- Also see Note [Allocating string literals] in GHC.ByteCode.Asm. | BCONPtrStr !ByteString instance NFData BCONPtr where diff --git a/compiler/GHC/Linker/Loader.hs b/compiler/GHC/Linker/Loader.hs index 3c9baf45cf..1b3a283d92 100644 --- a/compiler/GHC/Linker/Loader.hs +++ b/compiler/GHC/Linker/Loader.hs @@ -141,8 +141,11 @@ getLoaderState interp = readMVar (loader_state (interpLoader interp)) emptyLoaderState :: LoaderState emptyLoaderState = LoaderState - { closure_env = emptyNameEnv - , itbl_env = emptyNameEnv + { linker_env = LinkerEnv + { closure_env = emptyNameEnv + , itbl_env = emptyNameEnv + , addr_env = emptyNameEnv + } , pkgs_loaded = init_pkgs , bcos_loaded = emptyModuleEnv , objs_loaded = emptyModuleEnv @@ -157,17 +160,16 @@ emptyLoaderState = LoaderState extendLoadedEnv :: Interp -> [(Name,ForeignHValue)] -> IO () extendLoadedEnv interp new_bindings = - modifyLoaderState_ interp $ \pls@LoaderState{..} -> do - let new_ce = extendClosureEnv closure_env new_bindings - return $! pls{ closure_env = new_ce } + modifyLoaderState_ interp $ \pls -> do + return $! modifyClosureEnv pls $ \ce -> + extendClosureEnv ce new_bindings -- strictness is important for not retaining old copies of the pls deleteFromLoadedEnv :: Interp -> [Name] -> IO () deleteFromLoadedEnv interp to_remove = modifyLoaderState_ interp $ \pls -> do - let ce = closure_env pls - let new_ce = delListFromNameEnv ce to_remove - return pls{ closure_env = new_ce } + return $ modifyClosureEnv pls $ \ce -> + delListFromNameEnv ce to_remove -- | Load the module containing the given Name and get its associated 'HValue'. -- @@ -185,7 +187,7 @@ loadName interp hsc_env name = do then throwGhcExceptionIO (ProgramError "") else return (pls', links, pkgs) - case lookupNameEnv (closure_env pls) name of + case lookupNameEnv (closure_env (linker_env pls)) name of Just (_,aa) -> return (pls,(aa, links, pkgs)) Nothing -> assertPpr (isExternalName name) (ppr name) $ do let sym_to_find = nameToCLabel name "closure" @@ -247,10 +249,7 @@ withExtendedLoadedEnv interp new_env action -- package), so the reset action only removes the names we -- added earlier. reset_old_env = liftIO $ - modifyLoaderState_ interp $ \pls -> - let cur = closure_env pls - new = delListFromNameEnv cur (map fst new_env) - in return pls{ closure_env = new } + deleteFromLoadedEnv interp (map fst new_env) -- | Display the loader state. @@ -594,13 +593,11 @@ loadExpr interp hsc_env span root_ul_bco = do then throwGhcExceptionIO (ProgramError "") else do -- Load the expression itself - let ie = itbl_env pls - ce = closure_env pls - -- Load the necessary packages and linkables - let nobreakarray = error "no break array" + let le = linker_env pls + nobreakarray = error "no break array" bco_ix = mkNameEnv [(unlinkedBCOName root_ul_bco, 0)] - resolved <- linkBCO interp ie ce bco_ix nobreakarray root_ul_bco + resolved <- linkBCO interp le bco_ix nobreakarray root_ul_bco bco_opts <- initBCOOpts (hsc_dflags hsc_env) [root_hvref] <- createBCOs interp bco_opts [resolved] fhv <- mkFinalizedHValue interp root_hvref @@ -944,15 +941,16 @@ loadDecls interp hsc_env span cbc@CompiledByteCode{..} = do then throwGhcExceptionIO (ProgramError "") else do -- Link the expression itself - let ie = plusNameEnv (itbl_env pls) bc_itbls - ce = closure_env pls + let le = linker_env pls + le2 = le { itbl_env = plusNameEnv (itbl_env le) bc_itbls + , addr_env = plusNameEnv (addr_env le) bc_strs } -- Link the necessary packages and linkables bco_opts <- initBCOOpts (hsc_dflags hsc_env) - new_bindings <- linkSomeBCOs bco_opts interp ie ce [cbc] + new_bindings <- linkSomeBCOs bco_opts interp le2 [cbc] nms_fhvs <- makeForeignNamedHValueRefs interp new_bindings - let pls2 = pls { closure_env = extendClosureEnv ce nms_fhvs - , itbl_env = ie } + let ce2 = extendClosureEnv (closure_env le2) nms_fhvs + !pls2 = pls { linker_env = le2 { closure_env = ce2 } } return (pls2, (nms_fhvs, links_needed, units_needed)) where free_names = uniqDSetToList $ @@ -1170,11 +1168,12 @@ dynLinkBCOs bco_opts interp pls bcos = do cbcs = concatMap byteCodeOfObject unlinkeds - ies = map bc_itbls cbcs - gce = closure_env pls - final_ie = foldr plusNameEnv (itbl_env pls) ies + le1 = linker_env pls + ie2 = foldr plusNameEnv (itbl_env le1) (map bc_itbls cbcs) + ae2 = foldr plusNameEnv (addr_env le1) (map bc_strs cbcs) + le2 = le1 { itbl_env = ie2, addr_env = ae2 } - names_and_refs <- linkSomeBCOs bco_opts interp final_ie gce cbcs + names_and_refs <- linkSomeBCOs bco_opts interp le2 cbcs -- We only want to add the external ones to the ClosureEnv let (to_add, to_drop) = partition (isExternalName.fst) names_and_refs @@ -1184,21 +1183,20 @@ dynLinkBCOs bco_opts interp pls bcos = do -- Wrap finalizers on the ones we want to keep new_binds <- makeForeignNamedHValueRefs interp to_add - return pls1 { closure_env = extendClosureEnv gce new_binds, - itbl_env = final_ie } + let ce2 = extendClosureEnv (closure_env le2) new_binds + return $! pls1 { linker_env = le2 { closure_env = ce2 } } -- Link a bunch of BCOs and return references to their values linkSomeBCOs :: BCOOpts -> Interp - -> ItblEnv - -> ClosureEnv + -> LinkerEnv -> [CompiledByteCode] -> IO [(Name,HValueRef)] -- The returned HValueRefs are associated 1-1 with -- the incoming unlinked BCOs. Each gives the -- value of the corresponding unlinked BCO -linkSomeBCOs bco_opts interp ie ce mods = foldr fun do_link mods [] +linkSomeBCOs bco_opts interp le mods = foldr fun do_link mods [] where fun CompiledByteCode{..} inner accum = case bc_breaks of @@ -1211,7 +1209,7 @@ linkSomeBCOs bco_opts interp ie ce mods = foldr fun do_link mods [] let flat = [ (breakarray, bco) | (breakarray, bcos) <- mods, bco <- bcos ] names = map (unlinkedBCOName . snd) flat bco_ix = mkNameEnv (zip names [0..]) - resolved <- sequence [ linkBCO interp ie ce bco_ix breakarray bco + resolved <- sequence [ linkBCO interp le bco_ix breakarray bco | (breakarray, bco) <- flat ] hvrefs <- createBCOs interp bco_opts resolved return (zip names hvrefs) @@ -1301,15 +1299,11 @@ unload_wkr interp keep_linkables pls@LoaderState{..} = do let -- Note that we want to remove all *local* -- (i.e. non-isExternal) names too (these are the -- temporary bindings from the command line). - keep_name :: (Name, a) -> Bool - keep_name (n,_) = isExternalName n && - nameModule n `elemModuleEnv` remaining_bcos_loaded - - itbl_env' = filterNameEnv keep_name itbl_env - closure_env' = filterNameEnv keep_name closure_env + keep_name :: Name -> Bool + keep_name n = isExternalName n && + nameModule n `elemModuleEnv` remaining_bcos_loaded - !new_pls = pls { itbl_env = itbl_env', - closure_env = closure_env', + !new_pls = pls { linker_env = filterLinkerEnv keep_name linker_env, bcos_loaded = remaining_bcos_loaded, objs_loaded = remaining_objs_loaded } diff --git a/compiler/GHC/Linker/Types.hs b/compiler/GHC/Linker/Types.hs index c26cdb0dad..c343537b08 100644 --- a/compiler/GHC/Linker/Types.hs +++ b/compiler/GHC/Linker/Types.hs @@ -10,6 +10,12 @@ module GHC.Linker.Types ( Loader (..) , LoaderState (..) , uninitializedLoader + , modifyClosureEnv + , LinkerEnv(..) + , filterLinkerEnv + , ClosureEnv + , emptyClosureEnv + , extendClosureEnv , Linkable(..) , LinkableSet , mkLinkableSet @@ -32,12 +38,12 @@ where import GHC.Prelude import GHC.Unit ( UnitId, Module ) -import GHC.ByteCode.Types ( ItblEnv, CompiledByteCode ) +import GHC.ByteCode.Types ( ItblEnv, AddrEnv, CompiledByteCode ) import GHC.Fingerprint.Type ( Fingerprint ) import GHCi.RemoteTypes ( ForeignHValue ) import GHC.Types.Var ( Id ) -import GHC.Types.Name.Env ( NameEnv ) +import GHC.Types.Name.Env ( NameEnv, emptyNameEnv, extendNameEnvList, filterNameEnv ) import GHC.Types.Name ( Name ) import GHC.Utils.Outputable @@ -67,23 +73,16 @@ serves to ensure mutual exclusion between multiple loaded copies of the GHC library. The Maybe may be Nothing to indicate that the linker has not yet been initialised. -The LoaderState maps Names to actual closures (for interpreted code only), for +The LinkerEnv maps Names to actual closures (for interpreted code only), for use during linking. -} newtype Loader = Loader { loader_state :: MVar (Maybe LoaderState) } data LoaderState = LoaderState - { closure_env :: ClosureEnv + { linker_env :: !LinkerEnv -- ^ Current global mapping from Names to their true values - , itbl_env :: !ItblEnv - -- ^ The current global mapping from RdrNames of DataCons to - -- info table addresses. - -- When a new Unlinked is linked into the running image, or an existing - -- module in the image is replaced, the itbl_env must be updated - -- appropriately. - , bcos_loaded :: !LinkableSet -- ^ The currently loaded interpreted modules (home package) @@ -102,7 +101,44 @@ data LoaderState = LoaderState uninitializedLoader :: IO Loader uninitializedLoader = Loader <$> newMVar Nothing +modifyClosureEnv :: LoaderState -> (ClosureEnv -> ClosureEnv) -> LoaderState +modifyClosureEnv pls f = + let le = linker_env pls + ce = closure_env le + in pls { linker_env = le { closure_env = f ce } } + +data LinkerEnv = LinkerEnv + { closure_env :: !ClosureEnv + -- ^ Current global mapping from closure Names to their true values + + , itbl_env :: !ItblEnv + -- ^ The current global mapping from RdrNames of DataCons to + -- info table addresses. + -- When a new Unlinked is linked into the running image, or an existing + -- module in the image is replaced, the itbl_env must be updated + -- appropriately. + + , addr_env :: !AddrEnv + -- ^ Like 'closure_env' and 'itbl_env', but for top-level 'Addr#' literals, + -- see Note [Generating code for top-level string literal bindings] in GHC.StgToByteCode. + } + +filterLinkerEnv :: (Name -> Bool) -> LinkerEnv -> LinkerEnv +filterLinkerEnv f le = LinkerEnv + { closure_env = filterNameEnv (f . fst) (closure_env le) + , itbl_env = filterNameEnv (f . fst) (itbl_env le) + , addr_env = filterNameEnv (f . fst) (addr_env le) + } + type ClosureEnv = NameEnv (Name, ForeignHValue) + +emptyClosureEnv :: ClosureEnv +emptyClosureEnv = emptyNameEnv + +extendClosureEnv :: ClosureEnv -> [(Name,ForeignHValue)] -> ClosureEnv +extendClosureEnv cl_env pairs + = extendNameEnvList cl_env [ (n, (n,v)) | (n,v) <- pairs] + type PkgsLoaded = UniqDFM UnitId LoadedPkgInfo data LoadedPkgInfo diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs index c557bc554f..8c54a04d4f 100644 --- a/compiler/GHC/StgToByteCode.hs +++ b/compiler/GHC/StgToByteCode.hs @@ -43,6 +43,7 @@ import GHC.Types.Literal import GHC.Builtin.PrimOps import GHC.Builtin.PrimOps.Ids (primOpId) import GHC.Core.Type +import GHC.Core.TyCo.Compare (eqType) import GHC.Types.RepType import GHC.Core.DataCon import GHC.Core.TyCon @@ -64,7 +65,7 @@ import GHC.Runtime.Heap.Layout hiding (WordOff, ByteOff, wordsToBytes) import GHC.Data.Bitmap import GHC.Data.OrdList import GHC.Data.Maybe -import GHC.Types.Var.Env +import GHC.Types.Name.Env (mkNameEnv) import GHC.Types.Tickish import Data.List ( genericReplicate, genericLength, intersperse @@ -105,7 +106,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks (text "GHC.StgToByteCode"<+>brackets (ppr this_mod)) (const ()) $ do -- Split top-level binds into strings and others. - -- See Note [generating code for top-level string literal bindings]. + -- See Note [Generating code for top-level string literal bindings]. let (strings, lifted_binds) = partitionEithers $ do -- list monad bnd <- binds case bnd of @@ -116,7 +117,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks stringPtrs <- allocateTopStrings interp strings (BcM_State{..}, proto_bcos) <- - runBc hsc_env this_mod mb_modBreaks (mkVarEnv stringPtrs) $ do + runBc hsc_env this_mod mb_modBreaks $ do let flattened_binds = concatMap flattenBind (reverse lifted_binds) mapM schemeTopBind flattened_binds @@ -127,7 +128,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks "Proto-BCOs" FormatByteCode (vcat (intersperse (char ' ') (map ppr proto_bcos))) - cbc <- assembleBCOs interp profile proto_bcos tycs (map snd stringPtrs) + cbc <- assembleBCOs interp profile proto_bcos tycs stringPtrs (case modBreaks of Nothing -> Nothing Just mb -> Just mb{ modBreaks_breakInfo = breakInfo }) @@ -147,28 +148,49 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks interp = hscInterp hsc_env profile = targetProfile dflags +-- | see Note [Generating code for top-level string literal bindings] allocateTopStrings :: Interp -> [(Id, ByteString)] - -> IO [(Var, RemotePtr ())] + -> IO AddrEnv allocateTopStrings interp topStrings = do let !(bndrs, strings) = unzip topStrings ptrs <- interpCmd interp $ MallocStrings strings - return $ zip bndrs ptrs - -{- -Note [generating code for top-level string literal bindings] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Here is a summary on how the byte code generator deals with top-level string -literals: - -1. Top-level string literal bindings are separated from the rest of the module. - -2. The strings are allocated via interpCmd, in allocateTopStrings - -3. The mapping from binders to allocated strings (topStrings) are maintained in - BcM and used when generating code for variable references. --} + return $ mkNameEnv (zipWith mk_entry bndrs ptrs) + where + mk_entry bndr ptr = let nm = getName bndr + in (nm, (nm, AddrPtr ptr)) + +{- Note [Generating code for top-level string literal bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As described in Note [Compilation plan for top-level string literals] +in GHC.Core, the core-to-core optimizer can introduce top-level Addr# +bindings to represent string literals. The creates two challenges for +the bytecode compiler: (1) compiling the bindings themselves, and +(2) compiling references to such bindings. Here is a summary on how +we deal with them: + + 1. Top-level string literal bindings are separated from the rest of + the module. Memory for them is allocated immediately, via + interpCmd, in allocateTopStrings, and the resulting AddrEnv is + recorded in the bc_strs field of the CompiledByteCode result. + + 2. When we encounter a reference to a top-level string literal, we + generate a PUSH_ADDR pseudo-instruction, which is assembled to + a PUSH_UBX instruction with a BCONPtrAddr argument. + + 3. The loader accumulates string literal bindings from loaded + bytecode in the addr_env field of the LinkerEnv. + + 4. The BCO linker resolves BCONPtrAddr references by searching both + the addr_env (to find literals defined in bytecode) and the native + symbol table (to find literals defined in native code). + +This strategy works alright, but it does have one significant problem: +we never free the memory that we allocate for the top-level strings. +In theory, we could explicitly free it when BCOs are unloaded, but +this comes with its own complications; see #22400 for why. For now, +we just accept the leak, but it would nice to find something better. -} -- ----------------------------------------------------------------------------- -- Compilation schema for the bytecode generator @@ -1774,26 +1796,25 @@ pushAtom d p (StgVarArg var) -- slots on to the top of the stack. | otherwise -- var must be a global variable - = do topStrings <- getTopStrings - platform <- targetPlatform <$> getDynFlags - case lookupVarEnv topStrings var of - Just ptr -> pushAtom d p $ StgLitArg $ mkLitWord platform $ - fromIntegral $ ptrToWordPtr $ fromRemotePtr ptr - Nothing - -- PUSH_G doesn't tag constructors. So we use PACK here - -- if we are dealing with nullary constructor. - | Just con <- isDataConWorkId_maybe var - -> do - massert (sz == wordSize platform) - massert (isNullaryRepDataCon con) - return (unitOL (PACK con 0), sz) - | otherwise - -> do - let - massert (sz == wordSize platform) - return (unitOL (PUSH_G (getName var)), sz) - where - !sz = idSizeCon platform var + = do platform <- targetPlatform <$> getDynFlags + let !szb = idSizeCon platform var + massert (szb == wordSize platform) + + -- PUSH_G doesn't tag constructors. So we use PACK here + -- if we are dealing with nullary constructor. + case isDataConWorkId_maybe var of + Just con -> do + massert (isNullaryRepDataCon con) + return (unitOL (PACK con 0), szb) + + Nothing + -- see Note [Generating code for top-level string literal bindings] + | isUnliftedType (idType var) -> do + massert (idType var `eqType` addrPrimTy) + return (unitOL (PUSH_ADDR (getName var)), szb) + + | otherwise -> do + return (unitOL (PUSH_G (getName var)), szb) pushAtom _ _ (StgLitArg lit) = pushLiteral True lit @@ -2162,8 +2183,6 @@ data BcM_State -- Should be free()d when it is GCd , modBreaks :: Maybe ModBreaks -- info about breakpoints , breakInfo :: IntMap CgBreakInfo - , topStrings :: IdEnv (RemotePtr ()) -- top-level string literals - -- See Note [generating code for top-level string literal bindings]. } newtype BcM r = BcM (BcM_State -> IO (BcM_State, r)) deriving (Functor) @@ -2174,11 +2193,10 @@ ioToBc io = BcM $ \st -> do return (st, x) runBc :: HscEnv -> Module -> Maybe ModBreaks - -> IdEnv (RemotePtr ()) -> BcM r -> IO (BcM_State, r) -runBc hsc_env this_mod modBreaks topStrings (BcM m) - = m (BcM_State hsc_env this_mod 0 [] modBreaks IntMap.empty topStrings) +runBc hsc_env this_mod modBreaks (BcM m) + = m (BcM_State hsc_env this_mod 0 [] modBreaks IntMap.empty) thenBc :: BcM a -> (a -> BcM b) -> BcM b thenBc (BcM expr) cont = BcM $ \st0 -> do @@ -2247,8 +2265,5 @@ newBreakInfo ix info = BcM $ \st -> getCurrentModule :: BcM Module getCurrentModule = BcM $ \st -> return (st, thisModule st) -getTopStrings :: BcM (IdEnv (RemotePtr ())) -getTopStrings = BcM $ \st -> return (st, topStrings st) - tickFS :: FastString tickFS = fsLit "ticked" diff --git a/testsuite/tests/bytecode/T22376/A.hs b/testsuite/tests/bytecode/T22376/A.hs new file mode 100644 index 0000000000..670c3fd6a4 --- /dev/null +++ b/testsuite/tests/bytecode/T22376/A.hs @@ -0,0 +1,6 @@ +module A where +import B + +foo :: String +foo = f "bc" +{-# NOINLINE foo #-} diff --git a/testsuite/tests/bytecode/T22376/B.hs b/testsuite/tests/bytecode/T22376/B.hs new file mode 100644 index 0000000000..8bfb7bfd32 --- /dev/null +++ b/testsuite/tests/bytecode/T22376/B.hs @@ -0,0 +1,4 @@ +module B where + +f :: String -> String +f = ("a" ++) diff --git a/testsuite/tests/bytecode/T22376/T22376.hs b/testsuite/tests/bytecode/T22376/T22376.hs new file mode 100644 index 0000000000..b97640a00b --- /dev/null +++ b/testsuite/tests/bytecode/T22376/T22376.hs @@ -0,0 +1,6 @@ +{-# LANGUAGE TemplateHaskell #-} +import Language.Haskell.TH.Syntax (lift) +import A + +main :: IO () +main = putStrLn $(lift foo) diff --git a/testsuite/tests/bytecode/T22376/T22376.stdout b/testsuite/tests/bytecode/T22376/T22376.stdout new file mode 100644 index 0000000000..8baef1b4ab --- /dev/null +++ b/testsuite/tests/bytecode/T22376/T22376.stdout @@ -0,0 +1 @@ +abc diff --git a/testsuite/tests/bytecode/T22376/all.T b/testsuite/tests/bytecode/T22376/all.T new file mode 100644 index 0000000000..0b15e93e6d --- /dev/null +++ b/testsuite/tests/bytecode/T22376/all.T @@ -0,0 +1,2 @@ +test('T22376', [req_interp, extra_files(['A.hs', 'B.hs'])], multimod_compile_and_run, + ['T22376', '-O1 -fwrite-if-simplified-core -fbyte-code-and-object-code -fprefer-byte-code']) |