diff options
author | Luite Stegeman <stegeman@gmail.com> | 2021-01-22 00:09:17 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-03-20 07:49:15 -0400 |
commit | 1f94e0f7601f8e22fdd81a47f130650265a44196 (patch) | |
tree | d06d02317049b56763b2f1da27f71f3663efa5a0 /compiler/GHC/ByteCode | |
parent | 7de3532f0317032f75b76150c5d3a6f76178be04 (diff) | |
download | haskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz |
Generate GHCi bytecode from STG instead of Core and support unboxed
tuples and sums.
fixes #1257
Diffstat (limited to 'compiler/GHC/ByteCode')
-rw-r--r-- | compiler/GHC/ByteCode/Asm.hs | 101 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/Instr.hs | 98 | ||||
-rw-r--r-- | compiler/GHC/ByteCode/Types.hs | 61 |
3 files changed, 221 insertions, 39 deletions
diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs index 3f88187960..c58328f57c 100644 --- a/compiler/GHC/ByteCode/Asm.hs +++ b/compiler/GHC/ByteCode/Asm.hs @@ -9,10 +9,10 @@ -- | Bytecode assembler and linker module GHC.ByteCode.Asm ( assembleBCOs, assembleOneBCO, - bcoFreeNames, SizedSeq, sizeSS, ssElts, - iNTERP_STACK_CHECK_THRESH + iNTERP_STACK_CHECK_THRESH, + mkTupleInfoLit ) where #include "HsVersions.h" @@ -27,7 +27,7 @@ import GHC.ByteCode.InfoTable import GHC.ByteCode.Types import GHCi.RemoteTypes import GHC.Runtime.Interpreter -import GHC.Runtime.Heap.Layout +import GHC.Runtime.Heap.Layout hiding ( WordOff ) import GHC.Types.Name import GHC.Types.Name.Set @@ -381,6 +381,16 @@ assembleI platform i = case i of -> do let ul_bco = assembleBCO platform proto p <- ioptr (liftM BCOPtrBCO ul_bco) emit (push_alts pk) [Op p] + PUSH_ALTS_TUPLE proto tuple_info tuple_proto + -> do let ul_bco = assembleBCO platform proto + ul_tuple_bco = assembleBCO platform + tuple_proto + p <- ioptr (liftM BCOPtrBCO ul_bco) + p_tup <- ioptr (liftM BCOPtrBCO ul_tuple_bco) + info <- int (fromIntegral $ + mkTupleInfoSig tuple_info) + emit bci_PUSH_ALTS_T + [Op p, Op info, Op p_tup] PUSH_PAD8 -> emit bci_PUSH_PAD8 [] PUSH_PAD16 -> emit bci_PUSH_PAD16 [] PUSH_PAD32 -> emit bci_PUSH_PAD32 [] @@ -439,6 +449,7 @@ assembleI platform i = case i of ENTER -> emit bci_ENTER [] RETURN -> emit bci_RETURN [] RETURN_UBX rep -> emit (return_ubx rep) [] + RETURN_TUPLE -> emit bci_RETURN_T [] CCALL off m_addr i -> do np <- addr m_addr emit bci_CCALL [SmallOp off, Op np, SmallOp i] BRK_FUN index uniq cc -> do p1 <- ptr BCOPtrBreakArray @@ -516,6 +527,90 @@ return_ubx V16 = error "return_ubx: vector" return_ubx V32 = error "return_ubx: vector" return_ubx V64 = error "return_ubx: vector" +{- + we can only handle up to a fixed number of words on the stack, + because we need a stg_ctoi_tN stack frame for each size N. See + Note [unboxed tuple bytecodes and tuple_BCO]. + + If needed, you can support larger tuples by adding more in + StgMiscClosures.cmm, Interpreter.c and MiscClosures.h and + raising this limit. + + Note that the limit is the number of words passed on the stack. + If the calling convention passes part of the tuple in registers, the + maximum number of tuple elements may be larger. Elements can also + take multiple words on the stack (for example Double# on a 32 bit + platform). + + -} +maxTupleNativeStackSize :: WordOff +maxTupleNativeStackSize = 62 + +{- + Maximum number of supported registers for returning tuples. + + If GHC uses more more than these (because of a change in the calling + convention or a new platform) mkTupleInfoSig will panic. + + You can raise the limits after modifying stg_ctoi_t and stg_ret_t + (StgMiscClosures.cmm) to save and restore the additional registers. + -} +maxTupleVanillaRegs, maxTupleFloatRegs, maxTupleDoubleRegs, + maxTupleLongRegs :: Int +maxTupleVanillaRegs = 6 +maxTupleFloatRegs = 6 +maxTupleDoubleRegs = 6 +maxTupleLongRegs = 1 + +{- + Construct the tuple_info word that stg_ctoi_t and stg_ret_t use + to convert a tuple between the native calling convention and the + interpreter. + + See Note [GHCi tuple layout] for more information. + -} +mkTupleInfoSig :: TupleInfo -> Word32 +mkTupleInfoSig ti@TupleInfo{..} + | tupleNativeStackSize > maxTupleNativeStackSize = + pprPanic "mkTupleInfoSig: tuple too big for the bytecode compiler" + (ppr tupleNativeStackSize <+> text "stack words." <+> + text "Use -fobject-code to get around this limit" + ) + | tupleVanillaRegs `shiftR` maxTupleVanillaRegs /= 0 = + pprPanic "mkTupleInfoSig: too many vanilla registers" (ppr tupleVanillaRegs) + | tupleLongRegs `shiftR` maxTupleLongRegs /= 0 = + pprPanic "mkTupleInfoSig: too many long registers" (ppr tupleLongRegs) + | tupleFloatRegs `shiftR` maxTupleFloatRegs /= 0 = + pprPanic "mkTupleInfoSig: too many float registers" (ppr tupleFloatRegs) + | tupleDoubleRegs `shiftR` maxTupleDoubleRegs /= 0 = + pprPanic "mkTupleInfoSig: too many double registers" (ppr tupleDoubleRegs) + {- + Check that we can pack the register counts/bitmaps and stack size + in the information word. In particular we check that each component + fits in the bits we have reserved for it. + + This overlaps with some of the above checks. It's likely that if the + number of registers changes, the number of bits will also need to be + updated. + -} + | tupleNativeStackSize < 16384 && -- 14 bits stack usage + tupleDoubleRegs < 64 && -- 6 bit bitmap (these can be shared with float) + tupleFloatRegs < 64 && -- 6 bit bitmap (these can be shared with double) + tupleLongRegs < 4 && -- 2 bit bitmap + tupleVanillaRegs < 65536 && -- 4 bit count (tupleVanillaRegs is still a bitmap) + -- check that there are no "holes", i.e. that R1..Rn are all in use + tupleVanillaRegs .&. (tupleVanillaRegs + 1) == 0 + = fromIntegral tupleNativeStackSize .|. + unRegBitmap (tupleLongRegs `shiftL` 14) .|. + unRegBitmap (tupleDoubleRegs `shiftL` 16) .|. + unRegBitmap (tupleFloatRegs `shiftL` 22) .|. + fromIntegral (countTrailingZeros (1 + tupleVanillaRegs) `shiftL` 28) + | otherwise = pprPanic "mkTupleInfoSig: unsupported tuple shape" (ppr ti) + +mkTupleInfoLit :: Platform -> TupleInfo -> Literal +mkTupleInfoLit platform tuple_info = + mkLitWord platform . fromIntegral $ mkTupleInfoSig tuple_info + -- Make lists of host-sized words for literals, so that when the -- words are placed in memory at increasing addresses, the -- bit pattern is correct for the host's word size and endianness. diff --git a/compiler/GHC/ByteCode/Instr.hs b/compiler/GHC/ByteCode/Instr.hs index a8cc569548..5b0b20e38d 100644 --- a/compiler/GHC/ByteCode/Instr.hs +++ b/compiler/GHC/ByteCode/Instr.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} {-# OPTIONS_GHC -funbox-strict-fields #-} -- -- (c) The University of Glasgow 2002-2006 @@ -17,22 +18,19 @@ import GHC.ByteCode.Types import GHCi.RemoteTypes import GHCi.FFI (C_ffi_cif) import GHC.StgToCmm.Layout ( ArgRep(..) ) -import GHC.Core.Ppr import GHC.Utils.Outputable -import GHC.Data.FastString import GHC.Types.Name import GHC.Types.Unique -import GHC.Types.Id -import GHC.Core import GHC.Types.Literal import GHC.Core.DataCon -import GHC.Types.Var.Set import GHC.Builtin.PrimOps import GHC.Runtime.Heap.Layout import Data.Word import GHC.Stack.CCS (CostCentre) +import GHC.Stg.Syntax + -- ---------------------------------------------------------------------------- -- Bytecode instructions @@ -45,7 +43,7 @@ data ProtoBCO a protoBCOBitmapSize :: Word16, protoBCOArity :: Int, -- what the BCO came from, for debugging only - protoBCOExpr :: Either [AnnAlt Id DVarSet] (AnnExpr Id DVarSet), + protoBCOExpr :: Either [CgStgAlt] CgStgRhs, -- malloc'd pointers protoBCOFFIs :: [FFIInfo] } @@ -91,6 +89,9 @@ data BCInstr -- Push an alt continuation | PUSH_ALTS (ProtoBCO Name) | PUSH_ALTS_UNLIFTED (ProtoBCO Name) ArgRep + | PUSH_ALTS_TUPLE (ProtoBCO Name) -- continuation + !TupleInfo + (ProtoBCO Name) -- tuple return BCO -- Pushing 8, 16 and 32 bits of padding (for constructors). | PUSH_PAD8 @@ -173,8 +174,9 @@ data BCInstr -- To Infinity And Beyond | ENTER - | RETURN -- return a lifted value + | RETURN -- return a lifted value | RETURN_UBX ArgRep -- return an unlifted value, here's its rep + | RETURN_TUPLE -- return an unboxed tuple (info already on stack) -- Breakpoints | BRK_FUN Word16 Unique (RemotePtr CostCentre) @@ -193,36 +195,45 @@ instance Outputable a => Outputable (ProtoBCO a) where = (text "ProtoBCO" <+> ppr name <> char '#' <> int arity <+> text (show ffis) <> colon) $$ nest 3 (case origin of - Left alts -> vcat (zipWith (<+>) (char '{' : repeat (char ';')) - (map (pprCoreAltShort.deAnnAlt) alts)) <+> char '}' - Right rhs -> pprCoreExprShort (deAnnotate rhs)) + Left alts -> + vcat (zipWith (<+>) (char '{' : repeat (char ';')) + (map (pprStgAltShort shortStgPprOpts) alts)) + Right rhs -> + pprStgRhsShort shortStgPprOpts rhs + ) $$ nest 3 (text "bitmap: " <+> text (show bsize) <+> ppr bitmap) $$ nest 3 (vcat (map ppr instrs)) --- Print enough of the Core expression to enable the reader to find --- the expression in the -ddump-prep output. That is, we need to +-- Print enough of the STG expression to enable the reader to find +-- the expression in the -ddump-stg output. That is, we need to -- include at least a binder. -pprCoreExprShort :: CoreExpr -> SDoc -pprCoreExprShort expr@(Lam _ _) - = let - (bndrs, _) = collectBinders expr - in - char '\\' <+> sep (map (pprBndr LambdaBind) bndrs) <+> arrow <+> text "..." - -pprCoreExprShort (Case _expr var _ty _alts) - = text "case of" <+> ppr var - -pprCoreExprShort (Let (NonRec x _) _) = text "let" <+> ppr x <+> ptext (sLit ("= ... in ...")) -pprCoreExprShort (Let (Rec bs) _) = text "let {" <+> ppr (fst (head bs)) <+> ptext (sLit ("= ...; ... } in ...")) - -pprCoreExprShort (Tick t e) = ppr t <+> pprCoreExprShort e -pprCoreExprShort (Cast e _) = pprCoreExprShort e <+> text "`cast` T" +pprStgExprShort :: OutputablePass pass => StgPprOpts -> GenStgExpr pass -> SDoc +pprStgExprShort _ (StgCase _expr var _ty _alts) = + text "case of" <+> ppr var +pprStgExprShort _ (StgLet _ bnd _) = + text "let" <+> pprStgBindShort bnd <+> text "in ..." +pprStgExprShort _ (StgLetNoEscape _ bnd _) = + text "let-no-escape" <+> pprStgBindShort bnd <+> text "in ..." +pprStgExprShort opts (StgTick t e) = ppr t <+> pprStgExprShort opts e +pprStgExprShort opts e = pprStgExpr opts e + +pprStgBindShort :: OutputablePass pass => GenStgBinding pass -> SDoc +pprStgBindShort (StgNonRec x _) = + ppr x <+> text "= ..." +pprStgBindShort (StgRec bs) = + char '{' <+> ppr (fst (head bs)) <+> text "= ...; ... }" + +pprStgAltShort :: OutputablePass pass => StgPprOpts -> GenStgAlt pass -> SDoc +pprStgAltShort opts (con, args, expr) = + ppr con <+> sep (map ppr args) <+> text "->" <+> pprStgExprShort opts expr + +pprStgRhsShort :: OutputablePass pass => StgPprOpts -> GenStgRhs pass -> SDoc +pprStgRhsShort opts (StgRhsClosure _ext _cc upd_flag args body) = + hang (hsep [ char '\\' <> ppr upd_flag, brackets (interppSP args) ]) + 4 (pprStgExprShort opts body) +pprStgRhsShort opts rhs = pprStgRhs opts rhs -pprCoreExprShort e = pprCoreExpr e - -pprCoreAltShort :: CoreAlt -> SDoc -pprCoreAltShort (Alt con args expr) = ppr con <+> sep (map ppr args) <+> text "->" <+> pprCoreExprShort expr instance Outputable BCInstr where ppr (STKCHECK n) = text "STKCHECK" <+> ppr n @@ -239,8 +250,13 @@ instance Outputable BCInstr where ppr (PUSH_PRIMOP op) = text "PUSH_G " <+> text "GHC.PrimopWrappers." <> ppr op ppr (PUSH_BCO bco) = hang (text "PUSH_BCO") 2 (ppr bco) + ppr (PUSH_ALTS bco) = hang (text "PUSH_ALTS") 2 (ppr bco) ppr (PUSH_ALTS_UNLIFTED bco pk) = hang (text "PUSH_ALTS_UNLIFTED" <+> ppr pk) 2 (ppr bco) + ppr (PUSH_ALTS_TUPLE bco tuple_info tuple_bco) = + hang (text "PUSH_ALTS_TUPLE" <+> ppr tuple_info) + 2 + (ppr tuple_bco $+$ ppr bco) ppr PUSH_PAD8 = text "PUSH_PAD8" ppr PUSH_PAD16 = text "PUSH_PAD16" @@ -297,8 +313,11 @@ instance Outputable BCInstr where ppr ENTER = text "ENTER" ppr RETURN = text "RETURN" ppr (RETURN_UBX pk) = text "RETURN_UBX " <+> ppr pk + ppr (RETURN_TUPLE) = text "RETURN_TUPLE" ppr (BRK_FUN index uniq _cc) = text "BRK_FUN" <+> ppr index <+> ppr uniq <+> text "<cc>" + + -- ----------------------------------------------------------------------------- -- The stack use, in words, of each bytecode insn. These _must_ be -- correct, or overestimates of reality, to be safe. @@ -326,8 +345,16 @@ bciStackUse PUSH32_W{} = 1 -- takes exactly 1 word bciStackUse PUSH_G{} = 1 bciStackUse PUSH_PRIMOP{} = 1 bciStackUse PUSH_BCO{} = 1 -bciStackUse (PUSH_ALTS bco) = 2 + protoBCOStackUse bco -bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 + protoBCOStackUse bco +bciStackUse (PUSH_ALTS bco) = 2 {- profiling only, restore CCCS -} + + 3 + protoBCOStackUse bco +bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 {- profiling only, restore CCCS -} + + 4 + protoBCOStackUse bco +bciStackUse (PUSH_ALTS_TUPLE bco info _) = + -- (tuple_bco, tuple_info word, cont_bco, stg_ctoi_t) + -- tuple + -- (tuple_info, tuple_bco, stg_ret_t) + 1 {- profiling only -} + + 7 + fromIntegral (tupleSize info) + protoBCOStackUse bco bciStackUse (PUSH_PAD8) = 1 -- overapproximation bciStackUse (PUSH_PAD16) = 1 -- overapproximation bciStackUse (PUSH_PAD32) = 1 -- overapproximation on 64bit arch @@ -365,7 +392,8 @@ bciStackUse CASEFAIL{} = 0 bciStackUse JMP{} = 0 bciStackUse ENTER{} = 0 bciStackUse RETURN{} = 0 -bciStackUse RETURN_UBX{} = 1 +bciStackUse RETURN_UBX{} = 1 -- pushes stg_ret_X for some X +bciStackUse RETURN_TUPLE{} = 1 -- pushes stg_ret_t header bciStackUse CCALL{} = 0 bciStackUse SWIZZLE{} = 0 bciStackUse BRK_FUN{} = 0 diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs index 97304cb7f4..02c117d716 100644 --- a/compiler/GHC/ByteCode/Types.hs +++ b/compiler/GHC/ByteCode/Types.hs @@ -6,7 +6,11 @@ -- | Bytecode assembler types module GHC.ByteCode.Types - ( CompiledByteCode(..), seqCompiledByteCode, FFIInfo(..) + ( CompiledByteCode(..), seqCompiledByteCode + , FFIInfo(..) + , RegBitmap(..) + , TupleInfo(..), voidTupleInfo + , ByteOff(..), WordOff(..) , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..) , ItblEnv, ItblPtr(..) , CgBreakInfo(..) @@ -68,6 +72,61 @@ seqCompiledByteCode CompiledByteCode{..} = rnf bc_strs `seq` rnf (fmap seqModBreaks bc_breaks) +newtype ByteOff = ByteOff Int + deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable) + +newtype WordOff = WordOff Int + deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable) + +newtype RegBitmap = RegBitmap { unRegBitmap :: Word32 } + deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Bits, FiniteBits, Outputable) + +{- Note [GHCi TupleInfo] +~~~~~~~~~~~~~~~~~~~~~~~~ + + This contains the data we need for passing unboxed tuples between + bytecode and native code + + In general we closely follow the native calling convention that + GHC uses for unboxed tuples, but we don't use any registers in + bytecode. All tuple elements are expanded to use a full register + or a full word on the stack. + + The position of tuple elements that are returned on the stack in + the native calling convention is unchanged when returning the same + tuple in bytecode. + + The order of the remaining elements is determined by the register in + which they would have been returned, rather than by their position in + the tuple in the Haskell source code. This makes jumping between bytecode + and native code easier: A map of live registers is enough to convert the + tuple. + + See GHC.StgToByteCode.layoutTuple for more details. +-} +data TupleInfo = TupleInfo + { tupleSize :: !WordOff -- total size of tuple in words + , tupleVanillaRegs :: !RegBitmap -- vanilla registers used + , tupleLongRegs :: !RegBitmap -- long registers used + , tupleFloatRegs :: !RegBitmap -- float registers used + , tupleDoubleRegs :: !RegBitmap -- double registers used + , tupleNativeStackSize :: !WordOff {- words spilled on the stack by + GHCs native calling convention -} + } deriving (Show) + +instance Outputable TupleInfo where + ppr TupleInfo{..} = text "<size" <+> ppr tupleSize <+> + text "stack" <+> ppr tupleNativeStackSize <+> + text "regs" <+> + char 'R' <> ppr tupleVanillaRegs <+> + char 'L' <> ppr tupleLongRegs <+> + char 'F' <> ppr tupleFloatRegs <+> + char 'D' <> ppr tupleDoubleRegs <> + char '>' + +voidTupleInfo :: TupleInfo +voidTupleInfo = TupleInfo 0 0 0 0 0 0 + type ItblEnv = NameEnv (Name, ItblPtr) -- We need the Name in the range so we know which -- elements to filter out when unloading a module |