Generate GHCi bytecode from STG instead of Core and support unboxed

tuples and sums. fixes #1257
author: Luite Stegeman <stegeman@gmail.com> 2021-01-22 00:09:17 +0100
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2021-03-20 07:49:15 -0400
commit: 1f94e0f7601f8e22fdd81a47f130650265a44196 (patch)
tree: d06d02317049b56763b2f1da27f71f3663efa5a0 /compiler/GHC/ByteCode
parent: 7de3532f0317032f75b76150c5d3a6f76178be04 (diff)
download: haskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz
3 files changed, 221 insertions, 39 deletions
diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs
index 3f88187960..c58328f57c 100644
--- a/compiler/GHC/ByteCode/Asm.hs
+++ b/compiler/GHC/ByteCode/Asm.hs
@@ -9,10 +9,10 @@
 -- | Bytecode assembler and linker
 module GHC.ByteCode.Asm (
         assembleBCOs, assembleOneBCO,
-
         bcoFreeNames,
         SizedSeq, sizeSS, ssElts,
-        iNTERP_STACK_CHECK_THRESH
+        iNTERP_STACK_CHECK_THRESH,
+        mkTupleInfoLit
   ) where
 
 #include "HsVersions.h"
@@ -27,7 +27,7 @@ import GHC.ByteCode.InfoTable
 import GHC.ByteCode.Types
 import GHCi.RemoteTypes
 import GHC.Runtime.Interpreter
-import GHC.Runtime.Heap.Layout
+import GHC.Runtime.Heap.Layout hiding ( WordOff )
 
 import GHC.Types.Name
 import GHC.Types.Name.Set
@@ -381,6 +381,16 @@ assembleI platform i = case i of
                            -> do let ul_bco = assembleBCO platform proto
                                  p <- ioptr (liftM BCOPtrBCO ul_bco)
                                  emit (push_alts pk) [Op p]
+  PUSH_ALTS_TUPLE proto tuple_info tuple_proto
+                           -> do let ul_bco = assembleBCO platform proto
+                                     ul_tuple_bco = assembleBCO platform
+                                                                tuple_proto
+                                 p <- ioptr (liftM BCOPtrBCO ul_bco)
+                                 p_tup <- ioptr (liftM BCOPtrBCO ul_tuple_bco)
+                                 info <- int (fromIntegral $
+                                              mkTupleInfoSig tuple_info)
+                                 emit bci_PUSH_ALTS_T
+                                      [Op p, Op info, Op p_tup]
   PUSH_PAD8                -> emit bci_PUSH_PAD8 []
   PUSH_PAD16               -> emit bci_PUSH_PAD16 []
   PUSH_PAD32               -> emit bci_PUSH_PAD32 []
@@ -439,6 +449,7 @@ assembleI platform i = case i of
   ENTER                    -> emit bci_ENTER []
   RETURN                   -> emit bci_RETURN []
   RETURN_UBX rep           -> emit (return_ubx rep) []
+  RETURN_TUPLE             -> emit bci_RETURN_T []
   CCALL off m_addr i       -> do np <- addr m_addr
                                  emit bci_CCALL [SmallOp off, Op np, SmallOp i]
   BRK_FUN index uniq cc    -> do p1 <- ptr BCOPtrBreakArray
@@ -516,6 +527,90 @@ return_ubx V16 = error "return_ubx: vector"
 return_ubx V32 = error "return_ubx: vector"
 return_ubx V64 = error "return_ubx: vector"
 
+{-
+  we can only handle up to a fixed number of words on the stack,
+  because we need a stg_ctoi_tN stack frame for each size N. See
+  Note [unboxed tuple bytecodes and tuple_BCO].
+
+  If needed, you can support larger tuples by adding more in
+  StgMiscClosures.cmm, Interpreter.c and MiscClosures.h and
+  raising this limit.
+
+  Note that the limit is the number of words passed on the stack.
+  If the calling convention passes part of the tuple in registers, the
+  maximum number of tuple elements may be larger. Elements can also
+  take multiple words on the stack (for example Double# on a 32 bit
+  platform).
+
+ -}
+maxTupleNativeStackSize :: WordOff
+maxTupleNativeStackSize = 62
+
+{-
+  Maximum number of supported registers for returning tuples.
+
+  If GHC uses more more than these (because of a change in the calling
+  convention or a new platform) mkTupleInfoSig will panic.
+
+  You can raise the limits after modifying stg_ctoi_t and stg_ret_t
+  (StgMiscClosures.cmm) to save and restore the additional registers.
+ -}
+maxTupleVanillaRegs, maxTupleFloatRegs, maxTupleDoubleRegs,
+    maxTupleLongRegs :: Int
+maxTupleVanillaRegs = 6
+maxTupleFloatRegs = 6
+maxTupleDoubleRegs = 6
+maxTupleLongRegs = 1
+
+{-
+  Construct the tuple_info word that stg_ctoi_t and stg_ret_t use
+  to convert a tuple between the native calling convention and the
+  interpreter.
+
+  See Note [GHCi tuple layout] for more information.
+ -}
+mkTupleInfoSig :: TupleInfo -> Word32
+mkTupleInfoSig ti@TupleInfo{..}
+  | tupleNativeStackSize > maxTupleNativeStackSize =
+    pprPanic "mkTupleInfoSig: tuple too big for the bytecode compiler"
+             (ppr tupleNativeStackSize <+> text "stack words." <+>
+              text "Use -fobject-code to get around this limit"
+             )
+  | tupleVanillaRegs `shiftR` maxTupleVanillaRegs /= 0 =
+    pprPanic "mkTupleInfoSig: too many vanilla registers" (ppr tupleVanillaRegs)
+  | tupleLongRegs `shiftR` maxTupleLongRegs /= 0 =
+    pprPanic "mkTupleInfoSig: too many long registers" (ppr tupleLongRegs)
+  | tupleFloatRegs `shiftR` maxTupleFloatRegs /= 0 =
+    pprPanic "mkTupleInfoSig: too many float registers" (ppr tupleFloatRegs)
+  | tupleDoubleRegs `shiftR` maxTupleDoubleRegs /= 0 =
+    pprPanic "mkTupleInfoSig: too many double registers" (ppr tupleDoubleRegs)
+  {-
+    Check that we can pack the register counts/bitmaps and stack size
+    in the information word. In particular we check that each component
+    fits in the bits we have reserved for it.
+
+    This overlaps with some of the above checks. It's likely that if the
+    number of registers changes, the number of bits will also need to be
+    updated.
+   -}
+  | tupleNativeStackSize < 16384 && -- 14 bits stack usage
+    tupleDoubleRegs < 64 && -- 6 bit bitmap (these can be shared with float)
+    tupleFloatRegs < 64 && -- 6 bit bitmap (these can be shared with double)
+    tupleLongRegs < 4 && -- 2 bit bitmap
+    tupleVanillaRegs < 65536 && -- 4 bit count (tupleVanillaRegs is still a bitmap)
+    -- check that there are no "holes", i.e. that R1..Rn are all in use
+    tupleVanillaRegs .&. (tupleVanillaRegs + 1) == 0
+    = fromIntegral tupleNativeStackSize .|.
+      unRegBitmap (tupleLongRegs `shiftL` 14) .|.
+      unRegBitmap (tupleDoubleRegs `shiftL` 16) .|.
+      unRegBitmap (tupleFloatRegs `shiftL` 22) .|.
+      fromIntegral (countTrailingZeros (1 + tupleVanillaRegs) `shiftL` 28)
+  | otherwise = pprPanic "mkTupleInfoSig: unsupported tuple shape" (ppr ti)
+
+mkTupleInfoLit :: Platform -> TupleInfo -> Literal
+mkTupleInfoLit platform tuple_info =
+  mkLitWord platform . fromIntegral $ mkTupleInfoSig tuple_info
+
 -- Make lists of host-sized words for literals, so that when the
 -- words are placed in memory at increasing addresses, the
 -- bit pattern is correct for the host's word size and endianness.
diff --git a/compiler/GHC/ByteCode/Instr.hs b/compiler/GHC/ByteCode/Instr.hs
index a8cc569548..5b0b20e38d 100644
--- a/compiler/GHC/ByteCode/Instr.hs
+++ b/compiler/GHC/ByteCode/Instr.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP              #-}
+{-# LANGUAGE FlexibleContexts #-}
 {-# OPTIONS_GHC -funbox-strict-fields #-}
 --
 --  (c) The University of Glasgow 2002-2006
@@ -17,22 +18,19 @@ import GHC.ByteCode.Types
 import GHCi.RemoteTypes
 import GHCi.FFI (C_ffi_cif)
 import GHC.StgToCmm.Layout     ( ArgRep(..) )
-import GHC.Core.Ppr
 import GHC.Utils.Outputable
-import GHC.Data.FastString
 import GHC.Types.Name
 import GHC.Types.Unique
-import GHC.Types.Id
-import GHC.Core
 import GHC.Types.Literal
 import GHC.Core.DataCon
-import GHC.Types.Var.Set
 import GHC.Builtin.PrimOps
 import GHC.Runtime.Heap.Layout
 
 import Data.Word
 import GHC.Stack.CCS (CostCentre)
 
+import GHC.Stg.Syntax
+
 -- ----------------------------------------------------------------------------
 -- Bytecode instructions
 
@@ -45,7 +43,7 @@ data ProtoBCO a
         protoBCOBitmapSize :: Word16,
         protoBCOArity      :: Int,
         -- what the BCO came from, for debugging only
-        protoBCOExpr       :: Either  [AnnAlt Id DVarSet] (AnnExpr Id DVarSet),
+        protoBCOExpr       :: Either [CgStgAlt] CgStgRhs,
         -- malloc'd pointers
         protoBCOFFIs       :: [FFIInfo]
    }
@@ -91,6 +89,9 @@ data BCInstr
    -- Push an alt continuation
    | PUSH_ALTS          (ProtoBCO Name)
    | PUSH_ALTS_UNLIFTED (ProtoBCO Name) ArgRep
+   | PUSH_ALTS_TUPLE    (ProtoBCO Name) -- continuation
+                        !TupleInfo
+                        (ProtoBCO Name) -- tuple return BCO
 
    -- Pushing 8, 16 and 32 bits of padding (for constructors).
    | PUSH_PAD8
@@ -173,8 +174,9 @@ data BCInstr
 
    -- To Infinity And Beyond
    | ENTER
-   | RETURN             -- return a lifted value
+   | RETURN            -- return a lifted value
    | RETURN_UBX ArgRep -- return an unlifted value, here's its rep
+   | RETURN_TUPLE      -- return an unboxed tuple (info already on stack)
 
    -- Breakpoints
    | BRK_FUN          Word16 Unique (RemotePtr CostCentre)
@@ -193,36 +195,45 @@ instance Outputable a => Outputable (ProtoBCO a) where
       = (text "ProtoBCO" <+> ppr name <> char '#' <> int arity
                 <+> text (show ffis) <> colon)
         $$ nest 3 (case origin of
-                      Left alts -> vcat (zipWith (<+>) (char '{' : repeat (char ';'))
-                                                       (map (pprCoreAltShort.deAnnAlt) alts)) <+> char '}'
-                      Right rhs -> pprCoreExprShort (deAnnotate rhs))
+                      Left alts ->
+                        vcat (zipWith (<+>) (char '{' : repeat (char ';'))
+                             (map (pprStgAltShort shortStgPprOpts) alts))
+                      Right rhs ->
+                        pprStgRhsShort shortStgPprOpts rhs
+                  )
         $$ nest 3 (text "bitmap: " <+> text (show bsize) <+> ppr bitmap)
         $$ nest 3 (vcat (map ppr instrs))
 
--- Print enough of the Core expression to enable the reader to find
--- the expression in the -ddump-prep output.  That is, we need to
+-- Print enough of the STG expression to enable the reader to find
+-- the expression in the -ddump-stg output.  That is, we need to
 -- include at least a binder.
 
-pprCoreExprShort :: CoreExpr -> SDoc
-pprCoreExprShort expr@(Lam _ _)
-  = let
-        (bndrs, _) = collectBinders expr
-    in
-    char '\\' <+> sep (map (pprBndr LambdaBind) bndrs) <+> arrow <+> text "..."
-
-pprCoreExprShort (Case _expr var _ty _alts)
- = text "case of" <+> ppr var
-
-pprCoreExprShort (Let (NonRec x _) _) = text "let" <+> ppr x <+> ptext (sLit ("= ... in ..."))
-pprCoreExprShort (Let (Rec bs) _) = text "let {" <+> ppr (fst (head bs)) <+> ptext (sLit ("= ...; ... } in ..."))
-
-pprCoreExprShort (Tick t e) = ppr t <+> pprCoreExprShort e
-pprCoreExprShort (Cast e _) = pprCoreExprShort e <+> text "`cast` T"
+pprStgExprShort :: OutputablePass pass => StgPprOpts -> GenStgExpr pass -> SDoc
+pprStgExprShort _ (StgCase _expr var _ty _alts) =
+  text "case of" <+> ppr var
+pprStgExprShort _ (StgLet _ bnd _) =
+  text "let" <+> pprStgBindShort bnd <+> text "in ..."
+pprStgExprShort _ (StgLetNoEscape _ bnd _) =
+  text "let-no-escape" <+> pprStgBindShort bnd <+> text "in ..."
+pprStgExprShort opts (StgTick t e) = ppr t <+> pprStgExprShort opts e
+pprStgExprShort opts e = pprStgExpr opts e
+
+pprStgBindShort :: OutputablePass pass => GenStgBinding pass -> SDoc
+pprStgBindShort (StgNonRec x _) =
+  ppr x <+> text "= ..."
+pprStgBindShort (StgRec bs) =
+  char '{' <+> ppr (fst (head bs)) <+> text "= ...; ... }"
+
+pprStgAltShort :: OutputablePass pass => StgPprOpts -> GenStgAlt pass -> SDoc
+pprStgAltShort opts (con, args, expr) =
+  ppr con <+> sep (map ppr args) <+> text "->" <+> pprStgExprShort opts expr
+
+pprStgRhsShort :: OutputablePass pass => StgPprOpts -> GenStgRhs pass -> SDoc
+pprStgRhsShort opts (StgRhsClosure _ext _cc upd_flag args body) =
+  hang (hsep [ char '\\' <> ppr upd_flag, brackets (interppSP args) ])
+       4 (pprStgExprShort opts body)
+pprStgRhsShort opts rhs = pprStgRhs opts rhs
 
-pprCoreExprShort e = pprCoreExpr e
-
-pprCoreAltShort :: CoreAlt -> SDoc
-pprCoreAltShort (Alt con args expr) = ppr con <+> sep (map ppr args) <+> text "->" <+> pprCoreExprShort expr
 
 instance Outputable BCInstr where
    ppr (STKCHECK n)          = text "STKCHECK" <+> ppr n
@@ -239,8 +250,13 @@ instance Outputable BCInstr where
    ppr (PUSH_PRIMOP op)      = text "PUSH_G  " <+> text "GHC.PrimopWrappers."
                                                <> ppr op
    ppr (PUSH_BCO bco)        = hang (text "PUSH_BCO") 2 (ppr bco)
+
    ppr (PUSH_ALTS bco)       = hang (text "PUSH_ALTS") 2 (ppr bco)
    ppr (PUSH_ALTS_UNLIFTED bco pk) = hang (text "PUSH_ALTS_UNLIFTED" <+> ppr pk) 2 (ppr bco)
+   ppr (PUSH_ALTS_TUPLE bco tuple_info tuple_bco) =
+                               hang (text "PUSH_ALTS_TUPLE" <+> ppr tuple_info)
+                                    2
+                                    (ppr tuple_bco $+$ ppr bco)
 
    ppr PUSH_PAD8             = text "PUSH_PAD8"
    ppr PUSH_PAD16            = text "PUSH_PAD16"
@@ -297,8 +313,11 @@ instance Outputable BCInstr where
    ppr ENTER                 = text "ENTER"
    ppr RETURN                = text "RETURN"
    ppr (RETURN_UBX pk)       = text "RETURN_UBX  " <+> ppr pk
+   ppr (RETURN_TUPLE)        = text "RETURN_TUPLE"
    ppr (BRK_FUN index uniq _cc) = text "BRK_FUN" <+> ppr index <+> ppr uniq <+> text "<cc>"
 
+
+
 -- -----------------------------------------------------------------------------
 -- The stack use, in words, of each bytecode insn.  These _must_ be
 -- correct, or overestimates of reality, to be safe.
@@ -326,8 +345,16 @@ bciStackUse PUSH32_W{}            = 1  -- takes exactly 1 word
 bciStackUse PUSH_G{}              = 1
 bciStackUse PUSH_PRIMOP{}         = 1
 bciStackUse PUSH_BCO{}            = 1
-bciStackUse (PUSH_ALTS bco)       = 2 + protoBCOStackUse bco
-bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 + protoBCOStackUse bco
+bciStackUse (PUSH_ALTS bco)       = 2 {- profiling only, restore CCCS -} +
+                                    3 + protoBCOStackUse bco
+bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 {- profiling only, restore CCCS -} +
+                                         4 + protoBCOStackUse bco
+bciStackUse (PUSH_ALTS_TUPLE bco info _) =
+   -- (tuple_bco, tuple_info word, cont_bco, stg_ctoi_t)
+   -- tuple
+   -- (tuple_info, tuple_bco, stg_ret_t)
+   1 {- profiling only -} +
+   7 + fromIntegral (tupleSize info) + protoBCOStackUse bco
 bciStackUse (PUSH_PAD8)           = 1  -- overapproximation
 bciStackUse (PUSH_PAD16)          = 1  -- overapproximation
 bciStackUse (PUSH_PAD32)          = 1  -- overapproximation on 64bit arch
@@ -365,7 +392,8 @@ bciStackUse CASEFAIL{}            = 0
 bciStackUse JMP{}                 = 0
 bciStackUse ENTER{}               = 0
 bciStackUse RETURN{}              = 0
-bciStackUse RETURN_UBX{}          = 1
+bciStackUse RETURN_UBX{}          = 1 -- pushes stg_ret_X for some X
+bciStackUse RETURN_TUPLE{}        = 1 -- pushes stg_ret_t header
 bciStackUse CCALL{}               = 0
 bciStackUse SWIZZLE{}             = 0
 bciStackUse BRK_FUN{}             = 0
diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs
index 97304cb7f4..02c117d716 100644
--- a/compiler/GHC/ByteCode/Types.hs
+++ b/compiler/GHC/ByteCode/Types.hs
@@ -6,7 +6,11 @@
 
 -- | Bytecode assembler types
 module GHC.ByteCode.Types
-  ( CompiledByteCode(..), seqCompiledByteCode, FFIInfo(..)
+  ( CompiledByteCode(..), seqCompiledByteCode
+  , FFIInfo(..)
+  , RegBitmap(..)
+  , TupleInfo(..), voidTupleInfo
+  , ByteOff(..), WordOff(..)
   , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
   , ItblEnv, ItblPtr(..)
   , CgBreakInfo(..)
@@ -68,6 +72,61 @@ seqCompiledByteCode CompiledByteCode{..} =
   rnf bc_strs `seq`
   rnf (fmap seqModBreaks bc_breaks)
 
+newtype ByteOff = ByteOff Int
+    deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable)
+
+newtype WordOff = WordOff Int
+    deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable)
+
+newtype RegBitmap = RegBitmap { unRegBitmap :: Word32 }
+    deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Bits, FiniteBits, Outputable)
+
+{- Note [GHCi TupleInfo]
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+   This contains the data we need for passing unboxed tuples between
+   bytecode and native code
+
+   In general we closely follow the native calling convention that
+   GHC uses for unboxed tuples, but we don't use any registers in
+   bytecode. All tuple elements are expanded to use a full register
+   or a full word on the stack.
+
+   The position of tuple elements that are returned on the stack in
+   the native calling convention is unchanged when returning the same
+   tuple in bytecode.
+
+   The order of the remaining elements is determined by the register in
+   which they would have been returned, rather than by their position in
+   the tuple in the Haskell source code. This makes jumping between bytecode
+   and native code easier: A map of live registers is enough to convert the
+   tuple.
+
+   See GHC.StgToByteCode.layoutTuple for more details.
+-}
+data TupleInfo = TupleInfo
+  { tupleSize            :: !WordOff   -- total size of tuple in words
+  , tupleVanillaRegs     :: !RegBitmap -- vanilla registers used
+  , tupleLongRegs        :: !RegBitmap -- long registers used
+  , tupleFloatRegs       :: !RegBitmap -- float registers used
+  , tupleDoubleRegs      :: !RegBitmap -- double registers used
+  , tupleNativeStackSize :: !WordOff {- words spilled on the stack by
+                                        GHCs native calling convention -}
+  } deriving (Show)
+
+instance Outputable TupleInfo where
+  ppr TupleInfo{..} = text "<size" <+> ppr tupleSize <+>
+                      text "stack" <+> ppr tupleNativeStackSize <+>
+                      text "regs"  <+>
+                          char 'R' <> ppr tupleVanillaRegs <+>
+                          char 'L' <> ppr tupleLongRegs <+>
+                          char 'F' <> ppr tupleFloatRegs <+>
+                          char 'D' <> ppr tupleDoubleRegs <>
+                      char '>'
+
+voidTupleInfo :: TupleInfo
+voidTupleInfo = TupleInfo 0 0 0 0 0 0
+
 type ItblEnv = NameEnv (Name, ItblPtr)
         -- We need the Name in the range so we know which
         -- elements to filter out when unloading a module
author	Luite Stegeman <stegeman@gmail.com>	2021-01-22 00:09:17 +0100
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2021-03-20 07:49:15 -0400
commit	1f94e0f7601f8e22fdd81a47f130650265a44196 (patch)
tree	d06d02317049b56763b2f1da27f71f3663efa5a0 /compiler/GHC/ByteCode
parent	7de3532f0317032f75b76150c5d3a6f76178be04 (diff)
download	haskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz