summaryrefslogtreecommitdiff
path: root/compiler/ghci/ByteCodeAsm.lhs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/ghci/ByteCodeAsm.lhs')
-rw-r--r--compiler/ghci/ByteCodeAsm.lhs497
1 files changed, 497 insertions, 0 deletions
diff --git a/compiler/ghci/ByteCodeAsm.lhs b/compiler/ghci/ByteCodeAsm.lhs
new file mode 100644
index 0000000000..e332413dae
--- /dev/null
+++ b/compiler/ghci/ByteCodeAsm.lhs
@@ -0,0 +1,497 @@
+%
+% (c) The University of Glasgow 2002
+%
+\section[ByteCodeLink]{Bytecode assembler and linker}
+
+\begin{code}
+{-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
+
+module ByteCodeAsm (
+ assembleBCOs, assembleBCO,
+
+ CompiledByteCode(..),
+ UnlinkedBCO(..), BCOPtr(..), bcoFreeNames,
+ SizedSeq, sizeSS, ssElts,
+ iNTERP_STACK_CHECK_THRESH
+ ) where
+
+#include "HsVersions.h"
+
+import ByteCodeInstr
+import ByteCodeItbls ( ItblEnv, mkITbls )
+
+import Name ( Name, getName )
+import NameSet
+import FiniteMap ( addToFM, lookupFM, emptyFM )
+import Literal ( Literal(..) )
+import TyCon ( TyCon )
+import PrimOp ( PrimOp )
+import Constants ( wORD_SIZE )
+import FastString ( FastString(..) )
+import SMRep ( CgRep(..), StgWord )
+import FiniteMap
+import Outputable
+
+import Control.Monad ( foldM )
+import Control.Monad.ST ( runST )
+
+import GHC.Word ( Word(..) )
+import Data.Array.MArray
+import Data.Array.Unboxed ( listArray )
+import Data.Array.Base ( UArray(..) )
+import Data.Array.ST ( castSTUArray )
+import Foreign ( Word16, free )
+import Data.Int ( Int64 )
+import Data.Char ( ord )
+
+import GHC.Base ( ByteArray# )
+import GHC.IOBase ( IO(..) )
+import GHC.Ptr ( Ptr(..) )
+
+-- -----------------------------------------------------------------------------
+-- Unlinked BCOs
+
+-- CompiledByteCode represents the result of byte-code
+-- compiling a bunch of functions and data types
+
+data CompiledByteCode
+ = ByteCode [UnlinkedBCO] -- Bunch of interpretable bindings
+ ItblEnv -- A mapping from DataCons to their itbls
+
+instance Outputable CompiledByteCode where
+ ppr (ByteCode bcos _) = ppr bcos
+
+
+data UnlinkedBCO
+ = UnlinkedBCO {
+ unlinkedBCOName :: Name,
+ unlinkedBCOArity :: Int,
+ unlinkedBCOInstrs :: ByteArray#, -- insns
+ unlinkedBCOBitmap :: ByteArray#, -- bitmap
+ unlinkedBCOLits :: (SizedSeq (Either Word FastString)), -- literals
+ -- Either literal words or a pointer to a asciiz
+ -- string, denoting a label whose *address* should
+ -- be determined at link time
+ unlinkedBCOPtrs :: (SizedSeq BCOPtr), -- ptrs
+ unlinkedBCOItbls :: (SizedSeq Name) -- itbl refs
+ }
+
+data BCOPtr
+ = BCOPtrName Name
+ | BCOPtrPrimOp PrimOp
+ | BCOPtrBCO UnlinkedBCO
+
+-- | Finds external references. Remember to remove the names
+-- defined by this group of BCOs themselves
+bcoFreeNames :: UnlinkedBCO -> NameSet
+bcoFreeNames bco
+ = bco_refs bco `minusNameSet` mkNameSet [unlinkedBCOName bco]
+ where
+ bco_refs (UnlinkedBCO _ _ _ _ _ ptrs itbls)
+ = unionManyNameSets (
+ mkNameSet [ n | BCOPtrName n <- ssElts ptrs ] :
+ mkNameSet (ssElts itbls) :
+ map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
+ )
+
+instance Outputable UnlinkedBCO where
+ ppr (UnlinkedBCO nm arity insns bitmap lits ptrs itbls)
+ = sep [text "BCO", ppr nm, text "with",
+ int (sizeSS lits), text "lits",
+ int (sizeSS ptrs), text "ptrs",
+ int (sizeSS itbls), text "itbls"]
+
+-- -----------------------------------------------------------------------------
+-- The bytecode assembler
+
+-- The object format for bytecodes is: 16 bits for the opcode, and 16
+-- for each field -- so the code can be considered a sequence of
+-- 16-bit ints. Each field denotes either a stack offset or number of
+-- items on the stack (eg SLIDE), and index into the pointer table (eg
+-- PUSH_G), an index into the literal table (eg PUSH_I/D/L), or a
+-- bytecode address in this BCO.
+
+-- Top level assembler fn.
+assembleBCOs :: [ProtoBCO Name] -> [TyCon] -> IO CompiledByteCode
+assembleBCOs proto_bcos tycons
+ = do itblenv <- mkITbls tycons
+ bcos <- mapM assembleBCO proto_bcos
+ return (ByteCode bcos itblenv)
+
+assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
+assembleBCO (ProtoBCO nm instrs bitmap bsize arity origin malloced)
+ = let
+ -- pass 1: collect up the offsets of the local labels.
+ -- Remember that the first insn starts at offset 1 since offset 0
+ -- (eventually) will hold the total # of insns.
+ label_env = mkLabelEnv emptyFM 1 instrs
+
+ mkLabelEnv env i_offset [] = env
+ mkLabelEnv env i_offset (i:is)
+ = let new_env
+ = case i of LABEL n -> addToFM env n i_offset ; _ -> env
+ in mkLabelEnv new_env (i_offset + instrSize16s i) is
+
+ findLabel lab
+ = case lookupFM label_env lab of
+ Just bco_offset -> bco_offset
+ Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
+ in
+ do -- pass 2: generate the instruction, ptr and nonptr bits
+ insns <- return emptySS :: IO (SizedSeq Word16)
+ lits <- return emptySS :: IO (SizedSeq (Either Word FastString))
+ ptrs <- return emptySS :: IO (SizedSeq BCOPtr)
+ itbls <- return emptySS :: IO (SizedSeq Name)
+ let init_asm_state = (insns,lits,ptrs,itbls)
+ (final_insns, final_lits, final_ptrs, final_itbls)
+ <- mkBits findLabel init_asm_state instrs
+
+ let asm_insns = ssElts final_insns
+ n_insns = sizeSS final_insns
+
+ insns_arr
+ | n_insns > 65535 = panic "linkBCO: >= 64k insns in BCO"
+ | otherwise = mkInstrArray n_insns asm_insns
+ insns_barr = case insns_arr of UArray _lo _hi barr -> barr
+
+ bitmap_arr = mkBitmapArray bsize bitmap
+ bitmap_barr = case bitmap_arr of UArray _lo _hi barr -> barr
+
+ let ul_bco = UnlinkedBCO nm arity insns_barr bitmap_barr final_lits
+ final_ptrs final_itbls
+
+ -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
+ -- objects, since they might get run too early. Disable this until
+ -- we figure out what to do.
+ -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
+
+ return ul_bco
+ where
+ zonk ptr = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
+ free ptr
+
+mkBitmapArray :: Int -> [StgWord] -> UArray Int StgWord
+mkBitmapArray bsize bitmap
+ = listArray (0, length bitmap) (fromIntegral bsize : bitmap)
+
+mkInstrArray :: Int -> [Word16] -> UArray Int Word16
+mkInstrArray n_insns asm_insns
+ = listArray (0, n_insns) (fromIntegral n_insns : asm_insns)
+
+-- instrs nonptrs ptrs itbls
+type AsmState = (SizedSeq Word16,
+ SizedSeq (Either Word FastString),
+ SizedSeq BCOPtr,
+ SizedSeq Name)
+
+data SizedSeq a = SizedSeq !Int [a]
+emptySS = SizedSeq 0 []
+
+-- Why are these two monadic???
+addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
+addListToSS (SizedSeq n r_xs) xs
+ = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))
+
+ssElts :: SizedSeq a -> [a]
+ssElts (SizedSeq n r_xs) = reverse r_xs
+
+sizeSS :: SizedSeq a -> Int
+sizeSS (SizedSeq n r_xs) = n
+
+-- Bring in all the bci_ bytecode constants.
+#include "Bytecodes.h"
+
+-- This is where all the action is (pass 2 of the assembler)
+mkBits :: (Int -> Int) -- label finder
+ -> AsmState
+ -> [BCInstr] -- instructions (in)
+ -> IO AsmState
+
+mkBits findLabel st proto_insns
+ = foldM doInstr st proto_insns
+ where
+ doInstr :: AsmState -> BCInstr -> IO AsmState
+ doInstr st i
+ = case i of
+ STKCHECK n -> instr2 st bci_STKCHECK n
+ PUSH_L o1 -> instr2 st bci_PUSH_L o1
+ PUSH_LL o1 o2 -> instr3 st bci_PUSH_LL o1 o2
+ PUSH_LLL o1 o2 o3 -> instr4 st bci_PUSH_LLL o1 o2 o3
+ PUSH_G nm -> do (p, st2) <- ptr st (BCOPtrName nm)
+ instr2 st2 bci_PUSH_G p
+ PUSH_PRIMOP op -> do (p, st2) <- ptr st (BCOPtrPrimOp op)
+ instr2 st2 bci_PUSH_G p
+ PUSH_BCO proto -> do ul_bco <- assembleBCO proto
+ (p, st2) <- ptr st (BCOPtrBCO ul_bco)
+ instr2 st2 bci_PUSH_G p
+ PUSH_ALTS proto -> do ul_bco <- assembleBCO proto
+ (p, st2) <- ptr st (BCOPtrBCO ul_bco)
+ instr2 st2 bci_PUSH_ALTS p
+ PUSH_ALTS_UNLIFTED proto pk -> do
+ ul_bco <- assembleBCO proto
+ (p, st2) <- ptr st (BCOPtrBCO ul_bco)
+ instr2 st2 (push_alts pk) p
+ PUSH_UBX (Left lit) nws
+ -> do (np, st2) <- literal st lit
+ instr3 st2 bci_PUSH_UBX np nws
+ PUSH_UBX (Right aa) nws
+ -> do (np, st2) <- addr st aa
+ instr3 st2 bci_PUSH_UBX np nws
+
+ PUSH_APPLY_N -> do instr1 st bci_PUSH_APPLY_N
+ PUSH_APPLY_V -> do instr1 st bci_PUSH_APPLY_V
+ PUSH_APPLY_F -> do instr1 st bci_PUSH_APPLY_F
+ PUSH_APPLY_D -> do instr1 st bci_PUSH_APPLY_D
+ PUSH_APPLY_L -> do instr1 st bci_PUSH_APPLY_L
+ PUSH_APPLY_P -> do instr1 st bci_PUSH_APPLY_P
+ PUSH_APPLY_PP -> do instr1 st bci_PUSH_APPLY_PP
+ PUSH_APPLY_PPP -> do instr1 st bci_PUSH_APPLY_PPP
+ PUSH_APPLY_PPPP -> do instr1 st bci_PUSH_APPLY_PPPP
+ PUSH_APPLY_PPPPP -> do instr1 st bci_PUSH_APPLY_PPPPP
+ PUSH_APPLY_PPPPPP -> do instr1 st bci_PUSH_APPLY_PPPPPP
+
+ SLIDE n by -> instr3 st bci_SLIDE n by
+ ALLOC_AP n -> instr2 st bci_ALLOC_AP n
+ ALLOC_PAP arity n -> instr3 st bci_ALLOC_PAP arity n
+ MKAP off sz -> instr3 st bci_MKAP off sz
+ MKPAP off sz -> instr3 st bci_MKPAP off sz
+ UNPACK n -> instr2 st bci_UNPACK n
+ PACK dcon sz -> do (itbl_no,st2) <- itbl st dcon
+ instr3 st2 bci_PACK itbl_no sz
+ LABEL lab -> return st
+ TESTLT_I i l -> do (np, st2) <- int st i
+ instr3 st2 bci_TESTLT_I np (findLabel l)
+ TESTEQ_I i l -> do (np, st2) <- int st i
+ instr3 st2 bci_TESTEQ_I np (findLabel l)
+ TESTLT_F f l -> do (np, st2) <- float st f
+ instr3 st2 bci_TESTLT_F np (findLabel l)
+ TESTEQ_F f l -> do (np, st2) <- float st f
+ instr3 st2 bci_TESTEQ_F np (findLabel l)
+ TESTLT_D d l -> do (np, st2) <- double st d
+ instr3 st2 bci_TESTLT_D np (findLabel l)
+ TESTEQ_D d l -> do (np, st2) <- double st d
+ instr3 st2 bci_TESTEQ_D np (findLabel l)
+ TESTLT_P i l -> instr3 st bci_TESTLT_P i (findLabel l)
+ TESTEQ_P i l -> instr3 st bci_TESTEQ_P i (findLabel l)
+ CASEFAIL -> instr1 st bci_CASEFAIL
+ SWIZZLE stkoff n -> instr3 st bci_SWIZZLE stkoff n
+ JMP l -> instr2 st bci_JMP (findLabel l)
+ ENTER -> instr1 st bci_ENTER
+ RETURN -> instr1 st bci_RETURN
+ RETURN_UBX rep -> instr1 st (return_ubx rep)
+ CCALL off m_addr -> do (np, st2) <- addr st m_addr
+ instr3 st2 bci_CCALL off np
+
+ i2s :: Int -> Word16
+ i2s = fromIntegral
+
+ instr1 (st_i0,st_l0,st_p0,st_I0) i1
+ = do st_i1 <- addToSS st_i0 i1
+ return (st_i1,st_l0,st_p0,st_I0)
+
+ instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
+ = do st_i1 <- addToSS st_i0 (i2s i1)
+ st_i2 <- addToSS st_i1 (i2s i2)
+ return (st_i2,st_l0,st_p0,st_I0)
+
+ instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
+ = do st_i1 <- addToSS st_i0 (i2s i1)
+ st_i2 <- addToSS st_i1 (i2s i2)
+ st_i3 <- addToSS st_i2 (i2s i3)
+ return (st_i3,st_l0,st_p0,st_I0)
+
+ instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
+ = do st_i1 <- addToSS st_i0 (i2s i1)
+ st_i2 <- addToSS st_i1 (i2s i2)
+ st_i3 <- addToSS st_i2 (i2s i3)
+ st_i4 <- addToSS st_i3 (i2s i4)
+ return (st_i4,st_l0,st_p0,st_I0)
+
+ float (st_i0,st_l0,st_p0,st_I0) f
+ = do let ws = mkLitF f
+ st_l1 <- addListToSS st_l0 (map Left ws)
+ return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
+
+ double (st_i0,st_l0,st_p0,st_I0) d
+ = do let ws = mkLitD d
+ st_l1 <- addListToSS st_l0 (map Left ws)
+ return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
+
+ int (st_i0,st_l0,st_p0,st_I0) i
+ = do let ws = mkLitI i
+ st_l1 <- addListToSS st_l0 (map Left ws)
+ return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
+
+ int64 (st_i0,st_l0,st_p0,st_I0) i
+ = do let ws = mkLitI64 i
+ st_l1 <- addListToSS st_l0 (map Left ws)
+ return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
+
+ addr (st_i0,st_l0,st_p0,st_I0) a
+ = do let ws = mkLitPtr a
+ st_l1 <- addListToSS st_l0 (map Left ws)
+ return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
+
+ litlabel (st_i0,st_l0,st_p0,st_I0) fs
+ = do st_l1 <- addListToSS st_l0 [Right fs]
+ return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))
+
+ ptr (st_i0,st_l0,st_p0,st_I0) p
+ = do st_p1 <- addToSS st_p0 p
+ return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))
+
+ itbl (st_i0,st_l0,st_p0,st_I0) dcon
+ = do st_I1 <- addToSS st_I0 (getName dcon)
+ return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))
+
+ literal st (MachLabel fs _) = litlabel st fs
+ literal st (MachWord w) = int st (fromIntegral w)
+ literal st (MachInt j) = int st (fromIntegral j)
+ literal st (MachFloat r) = float st (fromRational r)
+ literal st (MachDouble r) = double st (fromRational r)
+ literal st (MachChar c) = int st (ord c)
+ literal st (MachInt64 ii) = int64 st (fromIntegral ii)
+ literal st (MachWord64 ii) = int64 st (fromIntegral ii)
+ literal st other = pprPanic "ByteCodeLink.literal" (ppr other)
+
+
+push_alts NonPtrArg = bci_PUSH_ALTS_N
+push_alts FloatArg = bci_PUSH_ALTS_F
+push_alts DoubleArg = bci_PUSH_ALTS_D
+push_alts VoidArg = bci_PUSH_ALTS_V
+push_alts LongArg = bci_PUSH_ALTS_L
+push_alts PtrArg = bci_PUSH_ALTS_P
+
+return_ubx NonPtrArg = bci_RETURN_N
+return_ubx FloatArg = bci_RETURN_F
+return_ubx DoubleArg = bci_RETURN_D
+return_ubx VoidArg = bci_RETURN_V
+return_ubx LongArg = bci_RETURN_L
+return_ubx PtrArg = bci_RETURN_P
+
+
+-- The size in 16-bit entities of an instruction.
+instrSize16s :: BCInstr -> Int
+instrSize16s instr
+ = case instr of
+ STKCHECK{} -> 2
+ PUSH_L{} -> 2
+ PUSH_LL{} -> 3
+ PUSH_LLL{} -> 4
+ PUSH_G{} -> 2
+ PUSH_PRIMOP{} -> 2
+ PUSH_BCO{} -> 2
+ PUSH_ALTS{} -> 2
+ PUSH_ALTS_UNLIFTED{} -> 2
+ PUSH_UBX{} -> 3
+ PUSH_APPLY_N{} -> 1
+ PUSH_APPLY_V{} -> 1
+ PUSH_APPLY_F{} -> 1
+ PUSH_APPLY_D{} -> 1
+ PUSH_APPLY_L{} -> 1
+ PUSH_APPLY_P{} -> 1
+ PUSH_APPLY_PP{} -> 1
+ PUSH_APPLY_PPP{} -> 1
+ PUSH_APPLY_PPPP{} -> 1
+ PUSH_APPLY_PPPPP{} -> 1
+ PUSH_APPLY_PPPPPP{} -> 1
+ SLIDE{} -> 3
+ ALLOC_AP{} -> 2
+ ALLOC_PAP{} -> 3
+ MKAP{} -> 3
+ MKPAP{} -> 3
+ UNPACK{} -> 2
+ PACK{} -> 3
+ LABEL{} -> 0 -- !!
+ TESTLT_I{} -> 3
+ TESTEQ_I{} -> 3
+ TESTLT_F{} -> 3
+ TESTEQ_F{} -> 3
+ TESTLT_D{} -> 3
+ TESTEQ_D{} -> 3
+ TESTLT_P{} -> 3
+ TESTEQ_P{} -> 3
+ JMP{} -> 2
+ CASEFAIL{} -> 1
+ ENTER{} -> 1
+ RETURN{} -> 1
+ RETURN_UBX{} -> 1
+ CCALL{} -> 3
+ SWIZZLE{} -> 3
+
+-- Make lists of host-sized words for literals, so that when the
+-- words are placed in memory at increasing addresses, the
+-- bit pattern is correct for the host's word size and endianness.
+mkLitI :: Int -> [Word]
+mkLitF :: Float -> [Word]
+mkLitD :: Double -> [Word]
+mkLitPtr :: Ptr () -> [Word]
+mkLitI64 :: Int64 -> [Word]
+
+mkLitF f
+ = runST (do
+ arr <- newArray_ ((0::Int),0)
+ writeArray arr 0 f
+ f_arr <- castSTUArray arr
+ w0 <- readArray f_arr 0
+ return [w0 :: Word]
+ )
+
+mkLitD d
+ | wORD_SIZE == 4
+ = runST (do
+ arr <- newArray_ ((0::Int),1)
+ writeArray arr 0 d
+ d_arr <- castSTUArray arr
+ w0 <- readArray d_arr 0
+ w1 <- readArray d_arr 1
+ return [w0 :: Word, w1]
+ )
+ | wORD_SIZE == 8
+ = runST (do
+ arr <- newArray_ ((0::Int),0)
+ writeArray arr 0 d
+ d_arr <- castSTUArray arr
+ w0 <- readArray d_arr 0
+ return [w0 :: Word]
+ )
+
+mkLitI64 ii
+ | wORD_SIZE == 4
+ = runST (do
+ arr <- newArray_ ((0::Int),1)
+ writeArray arr 0 ii
+ d_arr <- castSTUArray arr
+ w0 <- readArray d_arr 0
+ w1 <- readArray d_arr 1
+ return [w0 :: Word,w1]
+ )
+ | wORD_SIZE == 8
+ = runST (do
+ arr <- newArray_ ((0::Int),0)
+ writeArray arr 0 ii
+ d_arr <- castSTUArray arr
+ w0 <- readArray d_arr 0
+ return [w0 :: Word]
+ )
+
+mkLitI i
+ = runST (do
+ arr <- newArray_ ((0::Int),0)
+ writeArray arr 0 i
+ i_arr <- castSTUArray arr
+ w0 <- readArray i_arr 0
+ return [w0 :: Word]
+ )
+
+mkLitPtr a
+ = runST (do
+ arr <- newArray_ ((0::Int),0)
+ writeArray arr 0 a
+ a_arr <- castSTUArray arr
+ w0 <- readArray a_arr 0
+ return [w0 :: Word]
+ )
+
+iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)
+\end{code}