diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2020-01-17 15:13:04 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-02-12 01:57:27 -0500 |
commit | da7f74797e8c322006eba385c9cbdce346dd1d43 (patch) | |
tree | 79a69eed3aa18414caf76b02a5c8dc7c7e6d5f54 /compiler/GHC/ByteCode | |
parent | f82a2f90ceda5c2bc74088fa7f6a7c8cb9c9756f (diff) | |
download | haskell-da7f74797e8c322006eba385c9cbdce346dd1d43.tar.gz |
Module hierarchy: ByteCode and Runtime (cf #13009)
Update haddock submodule
Diffstat (limited to 'compiler/GHC/ByteCode')
-rw-r--r-- | compiler/GHC/ByteCode/Asm.hs | 566 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/InfoTable.hs | 76 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/Instr.hs | 373 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/Linker.hs | 184 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/Types.hs | 182 |
5 files changed, 1381 insertions, 0 deletions
diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs new file mode 100644 index 0000000000..db5c14b806 --- /dev/null +++ b/compiler/GHC/ByteCode/Asm.hs @@ -0,0 +1,566 @@ +{-# LANGUAGE BangPatterns, CPP, DeriveFunctor, MagicHash, RecordWildCards #-} +{-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE #-} +-- +-- (c) The University of Glasgow 2002-2006 +-- + +-- | Bytecode assembler and linker +module GHC.ByteCode.Asm ( + assembleBCOs, assembleOneBCO, + + bcoFreeNames, + SizedSeq, sizeSS, ssElts, + iNTERP_STACK_CHECK_THRESH + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.ByteCode.Instr +import GHC.ByteCode.InfoTable +import GHC.ByteCode.Types +import GHCi.RemoteTypes +import GHC.Runtime.Interpreter + +import HscTypes +import Name +import NameSet +import Literal +import TyCon +import FastString +import GHC.StgToCmm.Layout ( ArgRep(..) ) +import GHC.Runtime.Heap.Layout +import DynFlags +import Outputable +import GHC.Platform +import Util +import Unique +import UniqDSet + +-- From iserv +import SizedSeq + +import Control.Monad +import Control.Monad.ST ( runST ) +import Control.Monad.Trans.Class +import Control.Monad.Trans.State.Strict + +import Data.Array.MArray + +import qualified Data.Array.Unboxed as Array +import Data.Array.Base ( UArray(..) ) + +import Data.Array.Unsafe( castSTUArray ) + +import Foreign +import Data.Char ( ord ) +import Data.List ( genericLength ) +import Data.Map (Map) +import Data.Maybe (fromMaybe) +import qualified Data.Map as Map + +-- ----------------------------------------------------------------------------- +-- Unlinked BCOs + +-- CompiledByteCode represents the result of byte-code +-- compiling a bunch of functions and data types + +-- | Finds external references. Remember to remove the names +-- defined by this group of BCOs themselves +bcoFreeNames :: UnlinkedBCO -> UniqDSet Name +bcoFreeNames bco + = bco_refs bco `uniqDSetMinusUniqSet` mkNameSet [unlinkedBCOName bco] + where + bco_refs (UnlinkedBCO _ _ _ _ nonptrs ptrs) + = unionManyUniqDSets ( + mkUniqDSet [ n | BCOPtrName n <- ssElts ptrs ] : + mkUniqDSet [ n | BCONPtrItbl n <- ssElts nonptrs ] : + map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ] + ) + +-- ----------------------------------------------------------------------------- +-- The bytecode assembler + +-- The object format for bytecodes is: 16 bits for the opcode, and 16 +-- for each field -- so the code can be considered a sequence of +-- 16-bit ints. Each field denotes either a stack offset or number of +-- items on the stack (eg SLIDE), and index into the pointer table (eg +-- PUSH_G), an index into the literal table (eg PUSH_I/D/L), or a +-- bytecode address in this BCO. + +-- Top level assembler fn. +assembleBCOs + :: HscEnv -> [ProtoBCO Name] -> [TyCon] -> [RemotePtr ()] + -> Maybe ModBreaks + -> IO CompiledByteCode +assembleBCOs hsc_env proto_bcos tycons top_strs modbreaks = do + itblenv <- mkITbls hsc_env tycons + bcos <- mapM (assembleBCO (hsc_dflags hsc_env)) proto_bcos + (bcos',ptrs) <- mallocStrings hsc_env bcos + return CompiledByteCode + { bc_bcos = bcos' + , bc_itbls = itblenv + , bc_ffis = concat (map protoBCOFFIs proto_bcos) + , bc_strs = top_strs ++ ptrs + , bc_breaks = modbreaks + } + +-- Find all the literal strings and malloc them together. We want to +-- do this because: +-- +-- a) It should be done when we compile the module, not each time we relink it +-- b) For -fexternal-interpreter It's more efficient to malloc the strings +-- as a single batch message, especially when compiling in parallel. +-- +mallocStrings :: HscEnv -> [UnlinkedBCO] -> IO ([UnlinkedBCO], [RemotePtr ()]) +mallocStrings hsc_env ulbcos = do + let bytestrings = reverse (execState (mapM_ collect ulbcos) []) + ptrs <- iservCmd hsc_env (MallocStrings bytestrings) + return (evalState (mapM splice ulbcos) ptrs, ptrs) + where + splice bco@UnlinkedBCO{..} = do + lits <- mapM spliceLit unlinkedBCOLits + ptrs <- mapM splicePtr unlinkedBCOPtrs + return bco { unlinkedBCOLits = lits, unlinkedBCOPtrs = ptrs } + + spliceLit (BCONPtrStr _) = do + rptrs <- get + case rptrs of + (RemotePtr p : rest) -> do + put rest + return (BCONPtrWord (fromIntegral p)) + _ -> panic "mallocStrings:spliceLit" + spliceLit other = return other + + splicePtr (BCOPtrBCO bco) = BCOPtrBCO <$> splice bco + splicePtr other = return other + + collect UnlinkedBCO{..} = do + mapM_ collectLit unlinkedBCOLits + mapM_ collectPtr unlinkedBCOPtrs + + collectLit (BCONPtrStr bs) = do + strs <- get + put (bs:strs) + collectLit _ = return () + + collectPtr (BCOPtrBCO bco) = collect bco + collectPtr _ = return () + + +assembleOneBCO :: HscEnv -> ProtoBCO Name -> IO UnlinkedBCO +assembleOneBCO hsc_env pbco = do + ubco <- assembleBCO (hsc_dflags hsc_env) pbco + ([ubco'], _ptrs) <- mallocStrings hsc_env [ubco] + return ubco' + +assembleBCO :: DynFlags -> ProtoBCO Name -> IO UnlinkedBCO +assembleBCO dflags (ProtoBCO { protoBCOName = nm + , protoBCOInstrs = instrs + , protoBCOBitmap = bitmap + , protoBCOBitmapSize = bsize + , protoBCOArity = arity }) = do + -- pass 1: collect up the offsets of the local labels. + let asm = mapM_ (assembleI dflags) instrs + + initial_offset = 0 + + -- Jump instructions are variable-sized, there are long and short variants + -- depending on the magnitude of the offset. However, we can't tell what + -- size instructions we will need until we have calculated the offsets of + -- the labels, which depends on the size of the instructions... So we + -- first create the label environment assuming that all jumps are short, + -- and if the final size is indeed small enough for short jumps, we are + -- done. Otherwise, we repeat the calculation, and we force all jumps in + -- this BCO to be long. + (n_insns0, lbl_map0) = inspectAsm dflags False initial_offset asm + ((n_insns, lbl_map), long_jumps) + | isLarge n_insns0 = (inspectAsm dflags True initial_offset asm, True) + | otherwise = ((n_insns0, lbl_map0), False) + + env :: Word16 -> Word + env lbl = fromMaybe + (pprPanic "assembleBCO.findLabel" (ppr lbl)) + (Map.lookup lbl lbl_map) + + -- pass 2: run assembler and generate instructions, literals and pointers + let initial_state = (emptySS, emptySS, emptySS) + (final_insns, final_lits, final_ptrs) <- flip execStateT initial_state $ runAsm dflags long_jumps env asm + + -- precomputed size should be equal to final size + ASSERT(n_insns == sizeSS final_insns) return () + + let asm_insns = ssElts final_insns + insns_arr = Array.listArray (0, fromIntegral n_insns - 1) asm_insns + bitmap_arr = mkBitmapArray bsize bitmap + ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr final_lits final_ptrs + + -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive + -- objects, since they might get run too early. Disable this until + -- we figure out what to do. + -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced)) + + return ul_bco + +mkBitmapArray :: Word16 -> [StgWord] -> UArray Int Word64 +-- Here the return type must be an array of Words, not StgWords, +-- because the underlying ByteArray# will end up as a component +-- of a BCO object. +mkBitmapArray bsize bitmap + = Array.listArray (0, length bitmap) $ + fromIntegral bsize : map (fromInteger . fromStgWord) bitmap + +-- instrs nonptrs ptrs +type AsmState = (SizedSeq Word16, + SizedSeq BCONPtr, + SizedSeq BCOPtr) + +data Operand + = Op Word + | SmallOp Word16 + | LabelOp Word16 +-- (unused) | LargeOp Word + +data Assembler a + = AllocPtr (IO BCOPtr) (Word -> Assembler a) + | AllocLit [BCONPtr] (Word -> Assembler a) + | AllocLabel Word16 (Assembler a) + | Emit Word16 [Operand] (Assembler a) + | NullAsm a + deriving (Functor) + +instance Applicative Assembler where + pure = NullAsm + (<*>) = ap + +instance Monad Assembler where + NullAsm x >>= f = f x + AllocPtr p k >>= f = AllocPtr p (k >=> f) + AllocLit l k >>= f = AllocLit l (k >=> f) + AllocLabel lbl k >>= f = AllocLabel lbl (k >>= f) + Emit w ops k >>= f = Emit w ops (k >>= f) + +ioptr :: IO BCOPtr -> Assembler Word +ioptr p = AllocPtr p return + +ptr :: BCOPtr -> Assembler Word +ptr = ioptr . return + +lit :: [BCONPtr] -> Assembler Word +lit l = AllocLit l return + +label :: Word16 -> Assembler () +label w = AllocLabel w (return ()) + +emit :: Word16 -> [Operand] -> Assembler () +emit w ops = Emit w ops (return ()) + +type LabelEnv = Word16 -> Word + +largeOp :: Bool -> Operand -> Bool +largeOp long_jumps op = case op of + SmallOp _ -> False + Op w -> isLarge w + LabelOp _ -> long_jumps +-- LargeOp _ -> True + +runAsm :: DynFlags -> Bool -> LabelEnv -> Assembler a -> StateT AsmState IO a +runAsm dflags long_jumps e = go + where + go (NullAsm x) = return x + go (AllocPtr p_io k) = do + p <- lift p_io + w <- state $ \(st_i0,st_l0,st_p0) -> + let st_p1 = addToSS st_p0 p + in (sizeSS st_p0, (st_i0,st_l0,st_p1)) + go $ k w + go (AllocLit lits k) = do + w <- state $ \(st_i0,st_l0,st_p0) -> + let st_l1 = addListToSS st_l0 lits + in (sizeSS st_l0, (st_i0,st_l1,st_p0)) + go $ k w + go (AllocLabel _ k) = go k + go (Emit w ops k) = do + let largeOps = any (largeOp long_jumps) ops + opcode + | largeOps = largeArgInstr w + | otherwise = w + words = concatMap expand ops + expand (SmallOp w) = [w] + expand (LabelOp w) = expand (Op (e w)) + expand (Op w) = if largeOps then largeArg dflags w else [fromIntegral w] +-- expand (LargeOp w) = largeArg dflags w + state $ \(st_i0,st_l0,st_p0) -> + let st_i1 = addListToSS st_i0 (opcode : words) + in ((), (st_i1,st_l0,st_p0)) + go k + +type LabelEnvMap = Map Word16 Word + +data InspectState = InspectState + { instrCount :: !Word + , ptrCount :: !Word + , litCount :: !Word + , lblEnv :: LabelEnvMap + } + +inspectAsm :: DynFlags -> Bool -> Word -> Assembler a -> (Word, LabelEnvMap) +inspectAsm dflags long_jumps initial_offset + = go (InspectState initial_offset 0 0 Map.empty) + where + go s (NullAsm _) = (instrCount s, lblEnv s) + go s (AllocPtr _ k) = go (s { ptrCount = n + 1 }) (k n) + where n = ptrCount s + go s (AllocLit ls k) = go (s { litCount = n + genericLength ls }) (k n) + where n = litCount s + go s (AllocLabel lbl k) = go s' k + where s' = s { lblEnv = Map.insert lbl (instrCount s) (lblEnv s) } + go s (Emit _ ops k) = go s' k + where + s' = s { instrCount = instrCount s + size } + size = sum (map count ops) + 1 + largeOps = any (largeOp long_jumps) ops + count (SmallOp _) = 1 + count (LabelOp _) = count (Op 0) + count (Op _) = if largeOps then largeArg16s dflags else 1 +-- count (LargeOp _) = largeArg16s dflags + +-- Bring in all the bci_ bytecode constants. +#include "rts/Bytecodes.h" + +largeArgInstr :: Word16 -> Word16 +largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci + +largeArg :: DynFlags -> Word -> [Word16] +largeArg dflags w + | wORD_SIZE_IN_BITS dflags == 64 + = [fromIntegral (w `shiftR` 48), + fromIntegral (w `shiftR` 32), + fromIntegral (w `shiftR` 16), + fromIntegral w] + | wORD_SIZE_IN_BITS dflags == 32 + = [fromIntegral (w `shiftR` 16), + fromIntegral w] + | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?" + +largeArg16s :: DynFlags -> Word +largeArg16s dflags | wORD_SIZE_IN_BITS dflags == 64 = 4 + | otherwise = 2 + +assembleI :: DynFlags + -> BCInstr + -> Assembler () +assembleI dflags i = case i of + STKCHECK n -> emit bci_STKCHECK [Op n] + PUSH_L o1 -> emit bci_PUSH_L [SmallOp o1] + PUSH_LL o1 o2 -> emit bci_PUSH_LL [SmallOp o1, SmallOp o2] + PUSH_LLL o1 o2 o3 -> emit bci_PUSH_LLL [SmallOp o1, SmallOp o2, SmallOp o3] + PUSH8 o1 -> emit bci_PUSH8 [SmallOp o1] + PUSH16 o1 -> emit bci_PUSH16 [SmallOp o1] + PUSH32 o1 -> emit bci_PUSH32 [SmallOp o1] + PUSH8_W o1 -> emit bci_PUSH8_W [SmallOp o1] + PUSH16_W o1 -> emit bci_PUSH16_W [SmallOp o1] + PUSH32_W o1 -> emit bci_PUSH32_W [SmallOp o1] + PUSH_G nm -> do p <- ptr (BCOPtrName nm) + emit bci_PUSH_G [Op p] + PUSH_PRIMOP op -> do p <- ptr (BCOPtrPrimOp op) + emit bci_PUSH_G [Op p] + PUSH_BCO proto -> do let ul_bco = assembleBCO dflags proto + p <- ioptr (liftM BCOPtrBCO ul_bco) + emit bci_PUSH_G [Op p] + PUSH_ALTS proto -> do let ul_bco = assembleBCO dflags proto + p <- ioptr (liftM BCOPtrBCO ul_bco) + emit bci_PUSH_ALTS [Op p] + PUSH_ALTS_UNLIFTED proto pk + -> do let ul_bco = assembleBCO dflags proto + p <- ioptr (liftM BCOPtrBCO ul_bco) + emit (push_alts pk) [Op p] + PUSH_PAD8 -> emit bci_PUSH_PAD8 [] + PUSH_PAD16 -> emit bci_PUSH_PAD16 [] + PUSH_PAD32 -> emit bci_PUSH_PAD32 [] + PUSH_UBX8 lit -> do np <- literal lit + emit bci_PUSH_UBX8 [Op np] + PUSH_UBX16 lit -> do np <- literal lit + emit bci_PUSH_UBX16 [Op np] + PUSH_UBX32 lit -> do np <- literal lit + emit bci_PUSH_UBX32 [Op np] + PUSH_UBX lit nws -> do np <- literal lit + emit bci_PUSH_UBX [Op np, SmallOp nws] + + PUSH_APPLY_N -> emit bci_PUSH_APPLY_N [] + PUSH_APPLY_V -> emit bci_PUSH_APPLY_V [] + PUSH_APPLY_F -> emit bci_PUSH_APPLY_F [] + PUSH_APPLY_D -> emit bci_PUSH_APPLY_D [] + PUSH_APPLY_L -> emit bci_PUSH_APPLY_L [] + PUSH_APPLY_P -> emit bci_PUSH_APPLY_P [] + PUSH_APPLY_PP -> emit bci_PUSH_APPLY_PP [] + PUSH_APPLY_PPP -> emit bci_PUSH_APPLY_PPP [] + PUSH_APPLY_PPPP -> emit bci_PUSH_APPLY_PPPP [] + PUSH_APPLY_PPPPP -> emit bci_PUSH_APPLY_PPPPP [] + PUSH_APPLY_PPPPPP -> emit bci_PUSH_APPLY_PPPPPP [] + + SLIDE n by -> emit bci_SLIDE [SmallOp n, SmallOp by] + ALLOC_AP n -> emit bci_ALLOC_AP [SmallOp n] + ALLOC_AP_NOUPD n -> emit bci_ALLOC_AP_NOUPD [SmallOp n] + ALLOC_PAP arity n -> emit bci_ALLOC_PAP [SmallOp arity, SmallOp n] + MKAP off sz -> emit bci_MKAP [SmallOp off, SmallOp sz] + MKPAP off sz -> emit bci_MKPAP [SmallOp off, SmallOp sz] + UNPACK n -> emit bci_UNPACK [SmallOp n] + PACK dcon sz -> do itbl_no <- lit [BCONPtrItbl (getName dcon)] + emit bci_PACK [Op itbl_no, SmallOp sz] + LABEL lbl -> label lbl + TESTLT_I i l -> do np <- int i + emit bci_TESTLT_I [Op np, LabelOp l] + TESTEQ_I i l -> do np <- int i + emit bci_TESTEQ_I [Op np, LabelOp l] + TESTLT_W w l -> do np <- word w + emit bci_TESTLT_W [Op np, LabelOp l] + TESTEQ_W w l -> do np <- word w + emit bci_TESTEQ_W [Op np, LabelOp l] + TESTLT_F f l -> do np <- float f + emit bci_TESTLT_F [Op np, LabelOp l] + TESTEQ_F f l -> do np <- float f + emit bci_TESTEQ_F [Op np, LabelOp l] + TESTLT_D d l -> do np <- double d + emit bci_TESTLT_D [Op np, LabelOp l] + TESTEQ_D d l -> do np <- double d + emit bci_TESTEQ_D [Op np, LabelOp l] + TESTLT_P i l -> emit bci_TESTLT_P [SmallOp i, LabelOp l] + TESTEQ_P i l -> emit bci_TESTEQ_P [SmallOp i, LabelOp l] + CASEFAIL -> emit bci_CASEFAIL [] + SWIZZLE stkoff n -> emit bci_SWIZZLE [SmallOp stkoff, SmallOp n] + JMP l -> emit bci_JMP [LabelOp l] + ENTER -> emit bci_ENTER [] + RETURN -> emit bci_RETURN [] + RETURN_UBX rep -> emit (return_ubx rep) [] + CCALL off m_addr i -> do np <- addr m_addr + emit bci_CCALL [SmallOp off, Op np, SmallOp i] + BRK_FUN index uniq cc -> do p1 <- ptr BCOPtrBreakArray + q <- int (getKey uniq) + np <- addr cc + emit bci_BRK_FUN [Op p1, SmallOp index, + Op q, Op np] + + where + literal (LitLabel fs (Just sz) _) + | platformOS (targetPlatform dflags) == OSMinGW32 + = litlabel (appendFS fs (mkFastString ('@':show sz))) + -- On Windows, stdcall labels have a suffix indicating the no. of + -- arg words, e.g. foo@8. testcase: ffi012(ghci) + literal (LitLabel fs _ _) = litlabel fs + literal LitNullAddr = int 0 + literal (LitFloat r) = float (fromRational r) + literal (LitDouble r) = double (fromRational r) + literal (LitChar c) = int (ord c) + literal (LitString bs) = lit [BCONPtrStr bs] + -- LitString requires a zero-terminator when emitted + literal (LitNumber nt i _) = case nt of + LitNumInt -> int (fromIntegral i) + LitNumWord -> int (fromIntegral i) + LitNumInt64 -> int64 (fromIntegral i) + LitNumWord64 -> int64 (fromIntegral i) + LitNumInteger -> panic "GHC.ByteCode.Asm.literal: LitNumInteger" + LitNumNatural -> panic "GHC.ByteCode.Asm.literal: LitNumNatural" + -- We can lower 'LitRubbish' to an arbitrary constant, but @NULL@ is most + -- likely to elicit a crash (rather than corrupt memory) in case absence + -- analysis messed up. + literal LitRubbish = int 0 + + litlabel fs = lit [BCONPtrLbl fs] + addr (RemotePtr a) = words [fromIntegral a] + float = words . mkLitF + double = words . mkLitD dflags + int = words . mkLitI + int64 = words . mkLitI64 dflags + words ws = lit (map BCONPtrWord ws) + word w = words [w] + +isLarge :: Word -> Bool +isLarge n = n > 65535 + +push_alts :: ArgRep -> Word16 +push_alts V = bci_PUSH_ALTS_V +push_alts P = bci_PUSH_ALTS_P +push_alts N = bci_PUSH_ALTS_N +push_alts L = bci_PUSH_ALTS_L +push_alts F = bci_PUSH_ALTS_F +push_alts D = bci_PUSH_ALTS_D +push_alts V16 = error "push_alts: vector" +push_alts V32 = error "push_alts: vector" +push_alts V64 = error "push_alts: vector" + +return_ubx :: ArgRep -> Word16 +return_ubx V = bci_RETURN_V +return_ubx P = bci_RETURN_P +return_ubx N = bci_RETURN_N +return_ubx L = bci_RETURN_L +return_ubx F = bci_RETURN_F +return_ubx D = bci_RETURN_D +return_ubx V16 = error "return_ubx: vector" +return_ubx V32 = error "return_ubx: vector" +return_ubx V64 = error "return_ubx: vector" + +-- Make lists of host-sized words for literals, so that when the +-- words are placed in memory at increasing addresses, the +-- bit pattern is correct for the host's word size and endianness. +mkLitI :: Int -> [Word] +mkLitF :: Float -> [Word] +mkLitD :: DynFlags -> Double -> [Word] +mkLitI64 :: DynFlags -> Int64 -> [Word] + +mkLitF f + = runST (do + arr <- newArray_ ((0::Int),0) + writeArray arr 0 f + f_arr <- castSTUArray arr + w0 <- readArray f_arr 0 + return [w0 :: Word] + ) + +mkLitD dflags d + | wORD_SIZE dflags == 4 + = runST (do + arr <- newArray_ ((0::Int),1) + writeArray arr 0 d + d_arr <- castSTUArray arr + w0 <- readArray d_arr 0 + w1 <- readArray d_arr 1 + return [w0 :: Word, w1] + ) + | wORD_SIZE dflags == 8 + = runST (do + arr <- newArray_ ((0::Int),0) + writeArray arr 0 d + d_arr <- castSTUArray arr + w0 <- readArray d_arr 0 + return [w0 :: Word] + ) + | otherwise + = panic "mkLitD: Bad wORD_SIZE" + +mkLitI64 dflags ii + | wORD_SIZE dflags == 4 + = runST (do + arr <- newArray_ ((0::Int),1) + writeArray arr 0 ii + d_arr <- castSTUArray arr + w0 <- readArray d_arr 0 + w1 <- readArray d_arr 1 + return [w0 :: Word,w1] + ) + | wORD_SIZE dflags == 8 + = runST (do + arr <- newArray_ ((0::Int),0) + writeArray arr 0 ii + d_arr <- castSTUArray arr + w0 <- readArray d_arr 0 + return [w0 :: Word] + ) + | otherwise + = panic "mkLitI64: Bad wORD_SIZE" + +mkLitI i = [fromIntegral i :: Word] + +iNTERP_STACK_CHECK_THRESH :: Int +iNTERP_STACK_CHECK_THRESH = INTERP_STACK_CHECK_THRESH diff --git a/compiler/GHC/ByteCode/InfoTable.hs b/compiler/GHC/ByteCode/InfoTable.hs new file mode 100644 index 0000000000..40a107756d --- /dev/null +++ b/compiler/GHC/ByteCode/InfoTable.hs @@ -0,0 +1,76 @@ +{-# LANGUAGE CPP, MagicHash, ScopedTypeVariables #-} +{-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE #-} +-- +-- (c) The University of Glasgow 2002-2006 +-- + +-- | Generate infotables for interpreter-made bytecodes +module GHC.ByteCode.InfoTable ( mkITbls ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.ByteCode.Types +import GHC.Runtime.Interpreter +import DynFlags +import HscTypes +import Name ( Name, getName ) +import NameEnv +import DataCon ( DataCon, dataConRepArgTys, dataConIdentity ) +import TyCon ( TyCon, tyConFamilySize, isDataTyCon, tyConDataCons ) +import GHC.Types.RepType +import GHC.StgToCmm.Layout ( mkVirtConstrSizes ) +import GHC.StgToCmm.Closure ( tagForCon, NonVoid (..) ) +import Util +import Panic + +{- + Manufacturing of info tables for DataCons +-} + +-- Make info tables for the data decls in this module +mkITbls :: HscEnv -> [TyCon] -> IO ItblEnv +mkITbls hsc_env tcs = + foldr plusNameEnv emptyNameEnv <$> + mapM (mkITbl hsc_env) (filter isDataTyCon tcs) + where + mkITbl :: HscEnv -> TyCon -> IO ItblEnv + mkITbl hsc_env tc + | dcs `lengthIs` n -- paranoia; this is an assertion. + = make_constr_itbls hsc_env dcs + where + dcs = tyConDataCons tc + n = tyConFamilySize tc + mkITbl _ _ = panic "mkITbl" + +mkItblEnv :: [(Name,ItblPtr)] -> ItblEnv +mkItblEnv pairs = mkNameEnv [(n, (n,p)) | (n,p) <- pairs] + +-- Assumes constructors are numbered from zero, not one +make_constr_itbls :: HscEnv -> [DataCon] -> IO ItblEnv +make_constr_itbls hsc_env cons = + mkItblEnv <$> mapM (uncurry mk_itbl) (zip cons [0..]) + where + dflags = hsc_dflags hsc_env + + mk_itbl :: DataCon -> Int -> IO (Name,ItblPtr) + mk_itbl dcon conNo = do + let rep_args = [ NonVoid prim_rep + | arg <- dataConRepArgTys dcon + , prim_rep <- typePrimRep arg ] + + (tot_wds, ptr_wds) = + mkVirtConstrSizes dflags rep_args + + ptrs' = ptr_wds + nptrs' = tot_wds - ptr_wds + nptrs_really + | ptrs' + nptrs' >= mIN_PAYLOAD_SIZE dflags = nptrs' + | otherwise = mIN_PAYLOAD_SIZE dflags - ptrs' + + descr = dataConIdentity dcon + + r <- iservCmd hsc_env (MkConInfoTable ptrs' nptrs_really + conNo (tagForCon dflags dcon) descr) + return (getName dcon, ItblPtr r) diff --git a/compiler/GHC/ByteCode/Instr.hs b/compiler/GHC/ByteCode/Instr.hs new file mode 100644 index 0000000000..d6c9cd5391 --- /dev/null +++ b/compiler/GHC/ByteCode/Instr.hs @@ -0,0 +1,373 @@ +{-# LANGUAGE CPP, MagicHash #-} +{-# OPTIONS_GHC -funbox-strict-fields #-} +-- +-- (c) The University of Glasgow 2002-2006 +-- + +-- | Bytecode instruction definitions +module GHC.ByteCode.Instr ( + BCInstr(..), ProtoBCO(..), bciStackUse, + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.ByteCode.Types +import GHCi.RemoteTypes +import GHCi.FFI (C_ffi_cif) +import GHC.StgToCmm.Layout ( ArgRep(..) ) +import PprCore +import Outputable +import FastString +import Name +import Unique +import Id +import CoreSyn +import Literal +import DataCon +import VarSet +import PrimOp +import GHC.Runtime.Heap.Layout + +import Data.Word +import GHC.Stack.CCS (CostCentre) + +-- ---------------------------------------------------------------------------- +-- Bytecode instructions + +data ProtoBCO a + = ProtoBCO { + protoBCOName :: a, -- name, in some sense + protoBCOInstrs :: [BCInstr], -- instrs + -- arity and GC info + protoBCOBitmap :: [StgWord], + protoBCOBitmapSize :: Word16, + protoBCOArity :: Int, + -- what the BCO came from, for debugging only + protoBCOExpr :: Either [AnnAlt Id DVarSet] (AnnExpr Id DVarSet), + -- malloc'd pointers + protoBCOFFIs :: [FFIInfo] + } + +type LocalLabel = Word16 + +data BCInstr + -- Messing with the stack + = STKCHECK Word + + -- Push locals (existing bits of the stack) + | PUSH_L !Word16{-offset-} + | PUSH_LL !Word16 !Word16{-2 offsets-} + | PUSH_LLL !Word16 !Word16 !Word16{-3 offsets-} + + -- Push the specified local as a 8, 16, 32 bit value onto the stack. (i.e., + -- the stack will grow by 8, 16 or 32 bits) + | PUSH8 !Word16 + | PUSH16 !Word16 + | PUSH32 !Word16 + + -- Push the specifiec local as a 8, 16, 32 bit value onto the stack, but the + -- value will take the whole word on the stack (i.e., the stack will grow by + -- a word) + -- This is useful when extracting a packed constructor field for further use. + -- Currently we expect all values on the stack to take full words, except for + -- the ones used for PACK (i.e., actually constracting new data types, in + -- which case we use PUSH{8,16,32}) + | PUSH8_W !Word16 + | PUSH16_W !Word16 + | PUSH32_W !Word16 + + -- Push a ptr (these all map to PUSH_G really) + | PUSH_G Name + | PUSH_PRIMOP PrimOp + | PUSH_BCO (ProtoBCO Name) + + -- Push an alt continuation + | PUSH_ALTS (ProtoBCO Name) + | PUSH_ALTS_UNLIFTED (ProtoBCO Name) ArgRep + + -- Pushing 8, 16 and 32 bits of padding (for constructors). + | PUSH_PAD8 + | PUSH_PAD16 + | PUSH_PAD32 + + -- Pushing literals + | PUSH_UBX8 Literal + | PUSH_UBX16 Literal + | PUSH_UBX32 Literal + | PUSH_UBX Literal Word16 + -- push this int/float/double/addr, on the stack. Word16 + -- is # of words to copy from literal pool. Eitherness reflects + -- the difficulty of dealing with MachAddr here, mostly due to + -- the excessive (and unnecessary) restrictions imposed by the + -- designers of the new Foreign library. In particular it is + -- quite impossible to convert an Addr to any other integral + -- type, and it appears impossible to get hold of the bits of + -- an addr, even though we need to assemble BCOs. + + -- various kinds of application + | PUSH_APPLY_N + | PUSH_APPLY_V + | PUSH_APPLY_F + | PUSH_APPLY_D + | PUSH_APPLY_L + | PUSH_APPLY_P + | PUSH_APPLY_PP + | PUSH_APPLY_PPP + | PUSH_APPLY_PPPP + | PUSH_APPLY_PPPPP + | PUSH_APPLY_PPPPPP + + | SLIDE Word16{-this many-} Word16{-down by this much-} + + -- To do with the heap + | ALLOC_AP !Word16 -- make an AP with this many payload words + | ALLOC_AP_NOUPD !Word16 -- make an AP_NOUPD with this many payload words + | ALLOC_PAP !Word16 !Word16 -- make a PAP with this arity / payload words + | MKAP !Word16{-ptr to AP is this far down stack-} !Word16{-number of words-} + | MKPAP !Word16{-ptr to PAP is this far down stack-} !Word16{-number of words-} + | UNPACK !Word16 -- unpack N words from t.o.s Constr + | PACK DataCon !Word16 + -- after assembly, the DataCon is an index into the + -- itbl array + -- For doing case trees + | LABEL LocalLabel + | TESTLT_I Int LocalLabel + | TESTEQ_I Int LocalLabel + | TESTLT_W Word LocalLabel + | TESTEQ_W Word LocalLabel + | TESTLT_F Float LocalLabel + | TESTEQ_F Float LocalLabel + | TESTLT_D Double LocalLabel + | TESTEQ_D Double LocalLabel + + -- The Word16 value is a constructor number and therefore + -- stored in the insn stream rather than as an offset into + -- the literal pool. + | TESTLT_P Word16 LocalLabel + | TESTEQ_P Word16 LocalLabel + + | CASEFAIL + | JMP LocalLabel + + -- For doing calls to C (via glue code generated by libffi) + | CCALL Word16 -- stack frame size + (RemotePtr C_ffi_cif) -- addr of the glue code + Word16 -- flags. + -- + -- 0x1: call is interruptible + -- 0x2: call is unsafe + -- + -- (XXX: inefficient, but I don't know + -- what the alignment constraints are.) + + -- For doing magic ByteArray passing to foreign calls + | SWIZZLE Word16 -- to the ptr N words down the stack, + Word16 -- add M (interpreted as a signed 16-bit entity) + + -- To Infinity And Beyond + | ENTER + | RETURN -- return a lifted value + | RETURN_UBX ArgRep -- return an unlifted value, here's its rep + + -- Breakpoints + | BRK_FUN Word16 Unique (RemotePtr CostCentre) + +-- ----------------------------------------------------------------------------- +-- Printing bytecode instructions + +instance Outputable a => Outputable (ProtoBCO a) where + ppr (ProtoBCO { protoBCOName = name + , protoBCOInstrs = instrs + , protoBCOBitmap = bitmap + , protoBCOBitmapSize = bsize + , protoBCOArity = arity + , protoBCOExpr = origin + , protoBCOFFIs = ffis }) + = (text "ProtoBCO" <+> ppr name <> char '#' <> int arity + <+> text (show ffis) <> colon) + $$ nest 3 (case origin of + Left alts -> vcat (zipWith (<+>) (char '{' : repeat (char ';')) + (map (pprCoreAltShort.deAnnAlt) alts)) <+> char '}' + Right rhs -> pprCoreExprShort (deAnnotate rhs)) + $$ nest 3 (text "bitmap: " <+> text (show bsize) <+> ppr bitmap) + $$ nest 3 (vcat (map ppr instrs)) + +-- Print enough of the Core expression to enable the reader to find +-- the expression in the -ddump-prep output. That is, we need to +-- include at least a binder. + +pprCoreExprShort :: CoreExpr -> SDoc +pprCoreExprShort expr@(Lam _ _) + = let + (bndrs, _) = collectBinders expr + in + char '\\' <+> sep (map (pprBndr LambdaBind) bndrs) <+> arrow <+> text "..." + +pprCoreExprShort (Case _expr var _ty _alts) + = text "case of" <+> ppr var + +pprCoreExprShort (Let (NonRec x _) _) = text "let" <+> ppr x <+> ptext (sLit ("= ... in ...")) +pprCoreExprShort (Let (Rec bs) _) = text "let {" <+> ppr (fst (head bs)) <+> ptext (sLit ("= ...; ... } in ...")) + +pprCoreExprShort (Tick t e) = ppr t <+> pprCoreExprShort e +pprCoreExprShort (Cast e _) = pprCoreExprShort e <+> text "`cast` T" + +pprCoreExprShort e = pprCoreExpr e + +pprCoreAltShort :: CoreAlt -> SDoc +pprCoreAltShort (con, args, expr) = ppr con <+> sep (map ppr args) <+> text "->" <+> pprCoreExprShort expr + +instance Outputable BCInstr where + ppr (STKCHECK n) = text "STKCHECK" <+> ppr n + ppr (PUSH_L offset) = text "PUSH_L " <+> ppr offset + ppr (PUSH_LL o1 o2) = text "PUSH_LL " <+> ppr o1 <+> ppr o2 + ppr (PUSH_LLL o1 o2 o3) = text "PUSH_LLL" <+> ppr o1 <+> ppr o2 <+> ppr o3 + ppr (PUSH8 offset) = text "PUSH8 " <+> ppr offset + ppr (PUSH16 offset) = text "PUSH16 " <+> ppr offset + ppr (PUSH32 offset) = text "PUSH32 " <+> ppr offset + ppr (PUSH8_W offset) = text "PUSH8_W " <+> ppr offset + ppr (PUSH16_W offset) = text "PUSH16_W " <+> ppr offset + ppr (PUSH32_W offset) = text "PUSH32_W " <+> ppr offset + ppr (PUSH_G nm) = text "PUSH_G " <+> ppr nm + ppr (PUSH_PRIMOP op) = text "PUSH_G " <+> text "GHC.PrimopWrappers." + <> ppr op + ppr (PUSH_BCO bco) = hang (text "PUSH_BCO") 2 (ppr bco) + ppr (PUSH_ALTS bco) = hang (text "PUSH_ALTS") 2 (ppr bco) + ppr (PUSH_ALTS_UNLIFTED bco pk) = hang (text "PUSH_ALTS_UNLIFTED" <+> ppr pk) 2 (ppr bco) + + ppr PUSH_PAD8 = text "PUSH_PAD8" + ppr PUSH_PAD16 = text "PUSH_PAD16" + ppr PUSH_PAD32 = text "PUSH_PAD32" + + ppr (PUSH_UBX8 lit) = text "PUSH_UBX8" <+> ppr lit + ppr (PUSH_UBX16 lit) = text "PUSH_UBX16" <+> ppr lit + ppr (PUSH_UBX32 lit) = text "PUSH_UBX32" <+> ppr lit + ppr (PUSH_UBX lit nw) = text "PUSH_UBX" <+> parens (ppr nw) <+> ppr lit + ppr PUSH_APPLY_N = text "PUSH_APPLY_N" + ppr PUSH_APPLY_V = text "PUSH_APPLY_V" + ppr PUSH_APPLY_F = text "PUSH_APPLY_F" + ppr PUSH_APPLY_D = text "PUSH_APPLY_D" + ppr PUSH_APPLY_L = text "PUSH_APPLY_L" + ppr PUSH_APPLY_P = text "PUSH_APPLY_P" + ppr PUSH_APPLY_PP = text "PUSH_APPLY_PP" + ppr PUSH_APPLY_PPP = text "PUSH_APPLY_PPP" + ppr PUSH_APPLY_PPPP = text "PUSH_APPLY_PPPP" + ppr PUSH_APPLY_PPPPP = text "PUSH_APPLY_PPPPP" + ppr PUSH_APPLY_PPPPPP = text "PUSH_APPLY_PPPPPP" + + ppr (SLIDE n d) = text "SLIDE " <+> ppr n <+> ppr d + ppr (ALLOC_AP sz) = text "ALLOC_AP " <+> ppr sz + ppr (ALLOC_AP_NOUPD sz) = text "ALLOC_AP_NOUPD " <+> ppr sz + ppr (ALLOC_PAP arity sz) = text "ALLOC_PAP " <+> ppr arity <+> ppr sz + ppr (MKAP offset sz) = text "MKAP " <+> ppr sz <+> text "words," + <+> ppr offset <+> text "stkoff" + ppr (MKPAP offset sz) = text "MKPAP " <+> ppr sz <+> text "words," + <+> ppr offset <+> text "stkoff" + ppr (UNPACK sz) = text "UNPACK " <+> ppr sz + ppr (PACK dcon sz) = text "PACK " <+> ppr dcon <+> ppr sz + ppr (LABEL lab) = text "__" <> ppr lab <> colon + ppr (TESTLT_I i lab) = text "TESTLT_I" <+> int i <+> text "__" <> ppr lab + ppr (TESTEQ_I i lab) = text "TESTEQ_I" <+> int i <+> text "__" <> ppr lab + ppr (TESTLT_W i lab) = text "TESTLT_W" <+> int (fromIntegral i) <+> text "__" <> ppr lab + ppr (TESTEQ_W i lab) = text "TESTEQ_W" <+> int (fromIntegral i) <+> text "__" <> ppr lab + ppr (TESTLT_F f lab) = text "TESTLT_F" <+> float f <+> text "__" <> ppr lab + ppr (TESTEQ_F f lab) = text "TESTEQ_F" <+> float f <+> text "__" <> ppr lab + ppr (TESTLT_D d lab) = text "TESTLT_D" <+> double d <+> text "__" <> ppr lab + ppr (TESTEQ_D d lab) = text "TESTEQ_D" <+> double d <+> text "__" <> ppr lab + ppr (TESTLT_P i lab) = text "TESTLT_P" <+> ppr i <+> text "__" <> ppr lab + ppr (TESTEQ_P i lab) = text "TESTEQ_P" <+> ppr i <+> text "__" <> ppr lab + ppr CASEFAIL = text "CASEFAIL" + ppr (JMP lab) = text "JMP" <+> ppr lab + ppr (CCALL off marshall_addr flags) = text "CCALL " <+> ppr off + <+> text "marshall code at" + <+> text (show marshall_addr) + <+> (case flags of + 0x1 -> text "(interruptible)" + 0x2 -> text "(unsafe)" + _ -> empty) + ppr (SWIZZLE stkoff n) = text "SWIZZLE " <+> text "stkoff" <+> ppr stkoff + <+> text "by" <+> ppr n + ppr ENTER = text "ENTER" + ppr RETURN = text "RETURN" + ppr (RETURN_UBX pk) = text "RETURN_UBX " <+> ppr pk + ppr (BRK_FUN index uniq _cc) = text "BRK_FUN" <+> ppr index <+> ppr uniq <+> text "<cc>" + +-- ----------------------------------------------------------------------------- +-- The stack use, in words, of each bytecode insn. These _must_ be +-- correct, or overestimates of reality, to be safe. + +-- NOTE: we aggregate the stack use from case alternatives too, so that +-- we can do a single stack check at the beginning of a function only. + +-- This could all be made more accurate by keeping track of a proper +-- stack high water mark, but it doesn't seem worth the hassle. + +protoBCOStackUse :: ProtoBCO a -> Word +protoBCOStackUse bco = sum (map bciStackUse (protoBCOInstrs bco)) + +bciStackUse :: BCInstr -> Word +bciStackUse STKCHECK{} = 0 +bciStackUse PUSH_L{} = 1 +bciStackUse PUSH_LL{} = 2 +bciStackUse PUSH_LLL{} = 3 +bciStackUse PUSH8{} = 1 -- overapproximation +bciStackUse PUSH16{} = 1 -- overapproximation +bciStackUse PUSH32{} = 1 -- overapproximation on 64bit arch +bciStackUse PUSH8_W{} = 1 -- takes exactly 1 word +bciStackUse PUSH16_W{} = 1 -- takes exactly 1 word +bciStackUse PUSH32_W{} = 1 -- takes exactly 1 word +bciStackUse PUSH_G{} = 1 +bciStackUse PUSH_PRIMOP{} = 1 +bciStackUse PUSH_BCO{} = 1 +bciStackUse (PUSH_ALTS bco) = 2 + protoBCOStackUse bco +bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 + protoBCOStackUse bco +bciStackUse (PUSH_PAD8) = 1 -- overapproximation +bciStackUse (PUSH_PAD16) = 1 -- overapproximation +bciStackUse (PUSH_PAD32) = 1 -- overapproximation on 64bit arch +bciStackUse (PUSH_UBX8 _) = 1 -- overapproximation +bciStackUse (PUSH_UBX16 _) = 1 -- overapproximation +bciStackUse (PUSH_UBX32 _) = 1 -- overapproximation on 64bit arch +bciStackUse (PUSH_UBX _ nw) = fromIntegral nw +bciStackUse PUSH_APPLY_N{} = 1 +bciStackUse PUSH_APPLY_V{} = 1 +bciStackUse PUSH_APPLY_F{} = 1 +bciStackUse PUSH_APPLY_D{} = 1 +bciStackUse PUSH_APPLY_L{} = 1 +bciStackUse PUSH_APPLY_P{} = 1 +bciStackUse PUSH_APPLY_PP{} = 1 +bciStackUse PUSH_APPLY_PPP{} = 1 +bciStackUse PUSH_APPLY_PPPP{} = 1 +bciStackUse PUSH_APPLY_PPPPP{} = 1 +bciStackUse PUSH_APPLY_PPPPPP{} = 1 +bciStackUse ALLOC_AP{} = 1 +bciStackUse ALLOC_AP_NOUPD{} = 1 +bciStackUse ALLOC_PAP{} = 1 +bciStackUse (UNPACK sz) = fromIntegral sz +bciStackUse LABEL{} = 0 +bciStackUse TESTLT_I{} = 0 +bciStackUse TESTEQ_I{} = 0 +bciStackUse TESTLT_W{} = 0 +bciStackUse TESTEQ_W{} = 0 +bciStackUse TESTLT_F{} = 0 +bciStackUse TESTEQ_F{} = 0 +bciStackUse TESTLT_D{} = 0 +bciStackUse TESTEQ_D{} = 0 +bciStackUse TESTLT_P{} = 0 +bciStackUse TESTEQ_P{} = 0 +bciStackUse CASEFAIL{} = 0 +bciStackUse JMP{} = 0 +bciStackUse ENTER{} = 0 +bciStackUse RETURN{} = 0 +bciStackUse RETURN_UBX{} = 1 +bciStackUse CCALL{} = 0 +bciStackUse SWIZZLE{} = 0 +bciStackUse BRK_FUN{} = 0 + +-- These insns actually reduce stack use, but we need the high-tide level, +-- so can't use this info. Not that it matters much. +bciStackUse SLIDE{} = 0 +bciStackUse MKAP{} = 0 +bciStackUse MKPAP{} = 0 +bciStackUse PACK{} = 1 -- worst case is PACK 0 words diff --git a/compiler/GHC/ByteCode/Linker.hs b/compiler/GHC/ByteCode/Linker.hs new file mode 100644 index 0000000000..69bdb63a91 --- /dev/null +++ b/compiler/GHC/ByteCode/Linker.hs @@ -0,0 +1,184 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE #-} +-- +-- (c) The University of Glasgow 2002-2006 +-- + +-- | Bytecode assembler and linker +module GHC.ByteCode.Linker ( + ClosureEnv, emptyClosureEnv, extendClosureEnv, + linkBCO, lookupStaticPtr, + lookupIE, + nameToCLabel, linkFail + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHCi.RemoteTypes +import GHCi.ResolvedBCO +import GHCi.BreakArray +import SizedSeq + +import GHC.Runtime.Interpreter +import GHC.ByteCode.Types +import HscTypes +import Name +import NameEnv +import PrimOp +import Module +import FastString +import Panic +import Outputable +import Util + +-- Standard libraries +import Data.Array.Unboxed +import Foreign.Ptr +import GHC.Exts + +{- + Linking interpretables into something we can run +-} + +type ClosureEnv = NameEnv (Name, ForeignHValue) + +emptyClosureEnv :: ClosureEnv +emptyClosureEnv = emptyNameEnv + +extendClosureEnv :: ClosureEnv -> [(Name,ForeignHValue)] -> ClosureEnv +extendClosureEnv cl_env pairs + = extendNameEnvList cl_env [ (n, (n,v)) | (n,v) <- pairs] + +{- + Linking interpretables into something we can run +-} + +linkBCO + :: HscEnv -> ItblEnv -> ClosureEnv -> NameEnv Int -> RemoteRef BreakArray + -> UnlinkedBCO + -> IO ResolvedBCO +linkBCO hsc_env ie ce bco_ix breakarray + (UnlinkedBCO _ arity insns bitmap lits0 ptrs0) = do + -- fromIntegral Word -> Word64 should be a no op if Word is Word64 + -- otherwise it will result in a cast to longlong on 32bit systems. + lits <- mapM (fmap fromIntegral . lookupLiteral hsc_env ie) (ssElts lits0) + ptrs <- mapM (resolvePtr hsc_env ie ce bco_ix breakarray) (ssElts ptrs0) + return (ResolvedBCO isLittleEndian arity insns bitmap + (listArray (0, fromIntegral (sizeSS lits0)-1) lits) + (addListToSS emptySS ptrs)) + +lookupLiteral :: HscEnv -> ItblEnv -> BCONPtr -> IO Word +lookupLiteral _ _ (BCONPtrWord lit) = return lit +lookupLiteral hsc_env _ (BCONPtrLbl sym) = do + Ptr a# <- lookupStaticPtr hsc_env sym + return (W# (int2Word# (addr2Int# a#))) +lookupLiteral hsc_env ie (BCONPtrItbl nm) = do + Ptr a# <- lookupIE hsc_env ie nm + return (W# (int2Word# (addr2Int# a#))) +lookupLiteral _ _ (BCONPtrStr _) = + -- should be eliminated during assembleBCOs + panic "lookupLiteral: BCONPtrStr" + +lookupStaticPtr :: HscEnv -> FastString -> IO (Ptr ()) +lookupStaticPtr hsc_env addr_of_label_string = do + m <- lookupSymbol hsc_env addr_of_label_string + case m of + Just ptr -> return ptr + Nothing -> linkFail "GHC.ByteCode.Linker: can't find label" + (unpackFS addr_of_label_string) + +lookupIE :: HscEnv -> ItblEnv -> Name -> IO (Ptr ()) +lookupIE hsc_env ie con_nm = + case lookupNameEnv ie con_nm of + Just (_, ItblPtr a) -> return (fromRemotePtr (castRemotePtr a)) + Nothing -> do -- try looking up in the object files. + let sym_to_find1 = nameToCLabel con_nm "con_info" + m <- lookupSymbol hsc_env sym_to_find1 + case m of + Just addr -> return addr + Nothing + -> do -- perhaps a nullary constructor? + let sym_to_find2 = nameToCLabel con_nm "static_info" + n <- lookupSymbol hsc_env sym_to_find2 + case n of + Just addr -> return addr + Nothing -> linkFail "GHC.ByteCode.Linker.lookupIE" + (unpackFS sym_to_find1 ++ " or " ++ + unpackFS sym_to_find2) + +lookupPrimOp :: HscEnv -> PrimOp -> IO (RemotePtr ()) +lookupPrimOp hsc_env primop = do + let sym_to_find = primopToCLabel primop "closure" + m <- lookupSymbol hsc_env (mkFastString sym_to_find) + case m of + Just p -> return (toRemotePtr p) + Nothing -> linkFail "GHC.ByteCode.Linker.lookupCE(primop)" sym_to_find + +resolvePtr + :: HscEnv -> ItblEnv -> ClosureEnv -> NameEnv Int -> RemoteRef BreakArray + -> BCOPtr + -> IO ResolvedBCOPtr +resolvePtr hsc_env _ie ce bco_ix _ (BCOPtrName nm) + | Just ix <- lookupNameEnv bco_ix nm = + return (ResolvedBCORef ix) -- ref to another BCO in this group + | Just (_, rhv) <- lookupNameEnv ce nm = + return (ResolvedBCOPtr (unsafeForeignRefToRemoteRef rhv)) + | otherwise = + ASSERT2(isExternalName nm, ppr nm) + do let sym_to_find = nameToCLabel nm "closure" + m <- lookupSymbol hsc_env sym_to_find + case m of + Just p -> return (ResolvedBCOStaticPtr (toRemotePtr p)) + Nothing -> linkFail "GHC.ByteCode.Linker.lookupCE" (unpackFS sym_to_find) +resolvePtr hsc_env _ _ _ _ (BCOPtrPrimOp op) = + ResolvedBCOStaticPtr <$> lookupPrimOp hsc_env op +resolvePtr hsc_env ie ce bco_ix breakarray (BCOPtrBCO bco) = + ResolvedBCOPtrBCO <$> linkBCO hsc_env ie ce bco_ix breakarray bco +resolvePtr _ _ _ _ breakarray BCOPtrBreakArray = + return (ResolvedBCOPtrBreakArray breakarray) + +linkFail :: String -> String -> IO a +linkFail who what + = throwGhcExceptionIO (ProgramError $ + unlines [ "",who + , "During interactive linking, GHCi couldn't find the following symbol:" + , ' ' : ' ' : what + , "This may be due to you not asking GHCi to load extra object files," + , "archives or DLLs needed by your current session. Restart GHCi, specifying" + , "the missing library using the -L/path/to/object/dir and -lmissinglibname" + , "flags, or simply by naming the relevant files on the GHCi command line." + , "Alternatively, this link failure might indicate a bug in GHCi." + , "If you suspect the latter, please report this as a GHC bug:" + , " https://www.haskell.org/ghc/reportabug" + ]) + + +nameToCLabel :: Name -> String -> FastString +nameToCLabel n suffix = mkFastString label + where + encodeZ = zString . zEncodeFS + (Module pkgKey modName) = ASSERT( isExternalName n ) nameModule n + packagePart = encodeZ (unitIdFS pkgKey) + modulePart = encodeZ (moduleNameFS modName) + occPart = encodeZ (occNameFS (nameOccName n)) + + label = concat + [ if pkgKey == mainUnitId then "" else packagePart ++ "_" + , modulePart + , '_':occPart + , '_':suffix + ] + + +primopToCLabel :: PrimOp -> String -> String +primopToCLabel primop suffix = concat + [ "ghczmprim_GHCziPrimopWrappers_" + , zString (zEncodeFS (occNameFS (primOpOcc primop))) + , '_':suffix + ] diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs new file mode 100644 index 0000000000..ce80c53279 --- /dev/null +++ b/compiler/GHC/ByteCode/Types.hs @@ -0,0 +1,182 @@ +{-# LANGUAGE MagicHash, RecordWildCards, GeneralizedNewtypeDeriving #-} +-- +-- (c) The University of Glasgow 2002-2006 +-- + +-- | Bytecode assembler types +module GHC.ByteCode.Types + ( CompiledByteCode(..), seqCompiledByteCode, FFIInfo(..) + , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..) + , ItblEnv, ItblPtr(..) + , CgBreakInfo(..) + , ModBreaks (..), BreakIndex, emptyModBreaks + , CCostCentre + ) where + +import GhcPrelude + +import FastString +import Id +import Name +import NameEnv +import Outputable +import PrimOp +import SizedSeq +import Type +import SrcLoc +import GHCi.BreakArray +import GHCi.RemoteTypes +import GHCi.FFI +import Control.DeepSeq + +import Foreign +import Data.Array +import Data.Array.Base ( UArray(..) ) +import Data.ByteString (ByteString) +import Data.IntMap (IntMap) +import qualified Data.IntMap as IntMap +import Data.Maybe (catMaybes) +import GHC.Exts.Heap +import GHC.Stack.CCS + +-- ----------------------------------------------------------------------------- +-- Compiled Byte Code + +data CompiledByteCode = CompiledByteCode + { bc_bcos :: [UnlinkedBCO] -- Bunch of interpretable bindings + , bc_itbls :: ItblEnv -- A mapping from DataCons to their itbls + , bc_ffis :: [FFIInfo] -- ffi blocks we allocated + , bc_strs :: [RemotePtr ()] -- malloc'd strings + , bc_breaks :: Maybe ModBreaks -- breakpoint info (Nothing if we're not + -- creating breakpoints, for some reason) + } + -- ToDo: we're not tracking strings that we malloc'd +newtype FFIInfo = FFIInfo (RemotePtr C_ffi_cif) + deriving (Show, NFData) + +instance Outputable CompiledByteCode where + ppr CompiledByteCode{..} = ppr bc_bcos + +-- Not a real NFData instance, because ModBreaks contains some things +-- we can't rnf +seqCompiledByteCode :: CompiledByteCode -> () +seqCompiledByteCode CompiledByteCode{..} = + rnf bc_bcos `seq` + rnf (nameEnvElts bc_itbls) `seq` + rnf bc_ffis `seq` + rnf bc_strs `seq` + rnf (fmap seqModBreaks bc_breaks) + +type ItblEnv = NameEnv (Name, ItblPtr) + -- We need the Name in the range so we know which + -- elements to filter out when unloading a module + +newtype ItblPtr = ItblPtr (RemotePtr StgInfoTable) + deriving (Show, NFData) + +data UnlinkedBCO + = UnlinkedBCO { + unlinkedBCOName :: !Name, + unlinkedBCOArity :: {-# UNPACK #-} !Int, + unlinkedBCOInstrs :: !(UArray Int Word16), -- insns + unlinkedBCOBitmap :: !(UArray Int Word64), -- bitmap + unlinkedBCOLits :: !(SizedSeq BCONPtr), -- non-ptrs + unlinkedBCOPtrs :: !(SizedSeq BCOPtr) -- ptrs + } + +instance NFData UnlinkedBCO where + rnf UnlinkedBCO{..} = + rnf unlinkedBCOLits `seq` + rnf unlinkedBCOPtrs + +data BCOPtr + = BCOPtrName !Name + | BCOPtrPrimOp !PrimOp + | BCOPtrBCO !UnlinkedBCO + | BCOPtrBreakArray -- a pointer to this module's BreakArray + +instance NFData BCOPtr where + rnf (BCOPtrBCO bco) = rnf bco + rnf x = x `seq` () + +data BCONPtr + = BCONPtrWord {-# UNPACK #-} !Word + | BCONPtrLbl !FastString + | BCONPtrItbl !Name + | BCONPtrStr !ByteString + +instance NFData BCONPtr where + rnf x = x `seq` () + +-- | Information about a breakpoint that we know at code-generation time +data CgBreakInfo + = CgBreakInfo + { cgb_vars :: [Maybe (Id,Word16)] + , cgb_resty :: Type + } +-- See Note [Syncing breakpoint info] in GHC.Runtime.Eval + +-- Not a real NFData instance because we can't rnf Id or Type +seqCgBreakInfo :: CgBreakInfo -> () +seqCgBreakInfo CgBreakInfo{..} = + rnf (map snd (catMaybes (cgb_vars))) `seq` + seqType cgb_resty + +instance Outputable UnlinkedBCO where + ppr (UnlinkedBCO nm _arity _insns _bitmap lits ptrs) + = sep [text "BCO", ppr nm, text "with", + ppr (sizeSS lits), text "lits", + ppr (sizeSS ptrs), text "ptrs" ] + +instance Outputable CgBreakInfo where + ppr info = text "CgBreakInfo" <+> + parens (ppr (cgb_vars info) <+> + ppr (cgb_resty info)) + +-- ----------------------------------------------------------------------------- +-- Breakpoints + +-- | Breakpoint index +type BreakIndex = Int + +-- | C CostCentre type +data CCostCentre + +-- | All the information about the breakpoints for a module +data ModBreaks + = ModBreaks + { modBreaks_flags :: ForeignRef BreakArray + -- ^ The array of flags, one per breakpoint, + -- indicating which breakpoints are enabled. + , modBreaks_locs :: !(Array BreakIndex SrcSpan) + -- ^ An array giving the source span of each breakpoint. + , modBreaks_vars :: !(Array BreakIndex [OccName]) + -- ^ An array giving the names of the free variables at each breakpoint. + , modBreaks_decls :: !(Array BreakIndex [String]) + -- ^ An array giving the names of the declarations enclosing each breakpoint. + , modBreaks_ccs :: !(Array BreakIndex (RemotePtr CostCentre)) + -- ^ Array pointing to cost centre for each breakpoint + , modBreaks_breakInfo :: IntMap CgBreakInfo + -- ^ info about each breakpoint from the bytecode generator + } + +seqModBreaks :: ModBreaks -> () +seqModBreaks ModBreaks{..} = + rnf modBreaks_flags `seq` + rnf modBreaks_locs `seq` + rnf modBreaks_vars `seq` + rnf modBreaks_decls `seq` + rnf modBreaks_ccs `seq` + rnf (fmap seqCgBreakInfo modBreaks_breakInfo) + +-- | Construct an empty ModBreaks +emptyModBreaks :: ModBreaks +emptyModBreaks = ModBreaks + { modBreaks_flags = error "ModBreaks.modBreaks_array not initialised" + -- ToDo: can we avoid this? + , modBreaks_locs = array (0,-1) [] + , modBreaks_vars = array (0,-1) [] + , modBreaks_decls = array (0,-1) [] + , modBreaks_ccs = array (0,-1) [] + , modBreaks_breakInfo = IntMap.empty + } |