Generate GHCi bytecode from STG instead of Core and support unboxed

tuples and sums. fixes #1257
author: Luite Stegeman <stegeman@gmail.com> 2021-01-22 00:09:17 +0100
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2021-03-20 07:49:15 -0400
commit: 1f94e0f7601f8e22fdd81a47f130650265a44196 (patch)
tree: d06d02317049b56763b2f1da27f71f3663efa5a0 /compiler
parent: 7de3532f0317032f75b76150c5d3a6f76178be04 (diff)
download: haskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz
16 files changed, 989 insertions, 608 deletions
diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs
index 3f88187960..c58328f57c 100644
--- a/compiler/GHC/ByteCode/Asm.hs
+++ b/compiler/GHC/ByteCode/Asm.hs
@@ -9,10 +9,10 @@
 -- | Bytecode assembler and linker
 module GHC.ByteCode.Asm (
         assembleBCOs, assembleOneBCO,
-
         bcoFreeNames,
         SizedSeq, sizeSS, ssElts,
-        iNTERP_STACK_CHECK_THRESH
+        iNTERP_STACK_CHECK_THRESH,
+        mkTupleInfoLit
   ) where
 
 #include "HsVersions.h"
@@ -27,7 +27,7 @@ import GHC.ByteCode.InfoTable
 import GHC.ByteCode.Types
 import GHCi.RemoteTypes
 import GHC.Runtime.Interpreter
-import GHC.Runtime.Heap.Layout
+import GHC.Runtime.Heap.Layout hiding ( WordOff )
 
 import GHC.Types.Name
 import GHC.Types.Name.Set
@@ -381,6 +381,16 @@ assembleI platform i = case i of
                            -> do let ul_bco = assembleBCO platform proto
                                  p <- ioptr (liftM BCOPtrBCO ul_bco)
                                  emit (push_alts pk) [Op p]
+  PUSH_ALTS_TUPLE proto tuple_info tuple_proto
+                           -> do let ul_bco = assembleBCO platform proto
+                                     ul_tuple_bco = assembleBCO platform
+                                                                tuple_proto
+                                 p <- ioptr (liftM BCOPtrBCO ul_bco)
+                                 p_tup <- ioptr (liftM BCOPtrBCO ul_tuple_bco)
+                                 info <- int (fromIntegral $
+                                              mkTupleInfoSig tuple_info)
+                                 emit bci_PUSH_ALTS_T
+                                      [Op p, Op info, Op p_tup]
   PUSH_PAD8                -> emit bci_PUSH_PAD8 []
   PUSH_PAD16               -> emit bci_PUSH_PAD16 []
   PUSH_PAD32               -> emit bci_PUSH_PAD32 []
@@ -439,6 +449,7 @@ assembleI platform i = case i of
   ENTER                    -> emit bci_ENTER []
   RETURN                   -> emit bci_RETURN []
   RETURN_UBX rep           -> emit (return_ubx rep) []
+  RETURN_TUPLE             -> emit bci_RETURN_T []
   CCALL off m_addr i       -> do np <- addr m_addr
                                  emit bci_CCALL [SmallOp off, Op np, SmallOp i]
   BRK_FUN index uniq cc    -> do p1 <- ptr BCOPtrBreakArray
@@ -516,6 +527,90 @@ return_ubx V16 = error "return_ubx: vector"
 return_ubx V32 = error "return_ubx: vector"
 return_ubx V64 = error "return_ubx: vector"
 
+{-
+  we can only handle up to a fixed number of words on the stack,
+  because we need a stg_ctoi_tN stack frame for each size N. See
+  Note [unboxed tuple bytecodes and tuple_BCO].
+
+  If needed, you can support larger tuples by adding more in
+  StgMiscClosures.cmm, Interpreter.c and MiscClosures.h and
+  raising this limit.
+
+  Note that the limit is the number of words passed on the stack.
+  If the calling convention passes part of the tuple in registers, the
+  maximum number of tuple elements may be larger. Elements can also
+  take multiple words on the stack (for example Double# on a 32 bit
+  platform).
+
+ -}
+maxTupleNativeStackSize :: WordOff
+maxTupleNativeStackSize = 62
+
+{-
+  Maximum number of supported registers for returning tuples.
+
+  If GHC uses more more than these (because of a change in the calling
+  convention or a new platform) mkTupleInfoSig will panic.
+
+  You can raise the limits after modifying stg_ctoi_t and stg_ret_t
+  (StgMiscClosures.cmm) to save and restore the additional registers.
+ -}
+maxTupleVanillaRegs, maxTupleFloatRegs, maxTupleDoubleRegs,
+    maxTupleLongRegs :: Int
+maxTupleVanillaRegs = 6
+maxTupleFloatRegs = 6
+maxTupleDoubleRegs = 6
+maxTupleLongRegs = 1
+
+{-
+  Construct the tuple_info word that stg_ctoi_t and stg_ret_t use
+  to convert a tuple between the native calling convention and the
+  interpreter.
+
+  See Note [GHCi tuple layout] for more information.
+ -}
+mkTupleInfoSig :: TupleInfo -> Word32
+mkTupleInfoSig ti@TupleInfo{..}
+  | tupleNativeStackSize > maxTupleNativeStackSize =
+    pprPanic "mkTupleInfoSig: tuple too big for the bytecode compiler"
+             (ppr tupleNativeStackSize <+> text "stack words." <+>
+              text "Use -fobject-code to get around this limit"
+             )
+  | tupleVanillaRegs `shiftR` maxTupleVanillaRegs /= 0 =
+    pprPanic "mkTupleInfoSig: too many vanilla registers" (ppr tupleVanillaRegs)
+  | tupleLongRegs `shiftR` maxTupleLongRegs /= 0 =
+    pprPanic "mkTupleInfoSig: too many long registers" (ppr tupleLongRegs)
+  | tupleFloatRegs `shiftR` maxTupleFloatRegs /= 0 =
+    pprPanic "mkTupleInfoSig: too many float registers" (ppr tupleFloatRegs)
+  | tupleDoubleRegs `shiftR` maxTupleDoubleRegs /= 0 =
+    pprPanic "mkTupleInfoSig: too many double registers" (ppr tupleDoubleRegs)
+  {-
+    Check that we can pack the register counts/bitmaps and stack size
+    in the information word. In particular we check that each component
+    fits in the bits we have reserved for it.
+
+    This overlaps with some of the above checks. It's likely that if the
+    number of registers changes, the number of bits will also need to be
+    updated.
+   -}
+  | tupleNativeStackSize < 16384 && -- 14 bits stack usage
+    tupleDoubleRegs < 64 && -- 6 bit bitmap (these can be shared with float)
+    tupleFloatRegs < 64 && -- 6 bit bitmap (these can be shared with double)
+    tupleLongRegs < 4 && -- 2 bit bitmap
+    tupleVanillaRegs < 65536 && -- 4 bit count (tupleVanillaRegs is still a bitmap)
+    -- check that there are no "holes", i.e. that R1..Rn are all in use
+    tupleVanillaRegs .&. (tupleVanillaRegs + 1) == 0
+    = fromIntegral tupleNativeStackSize .|.
+      unRegBitmap (tupleLongRegs `shiftL` 14) .|.
+      unRegBitmap (tupleDoubleRegs `shiftL` 16) .|.
+      unRegBitmap (tupleFloatRegs `shiftL` 22) .|.
+      fromIntegral (countTrailingZeros (1 + tupleVanillaRegs) `shiftL` 28)
+  | otherwise = pprPanic "mkTupleInfoSig: unsupported tuple shape" (ppr ti)
+
+mkTupleInfoLit :: Platform -> TupleInfo -> Literal
+mkTupleInfoLit platform tuple_info =
+  mkLitWord platform . fromIntegral $ mkTupleInfoSig tuple_info
+
 -- Make lists of host-sized words for literals, so that when the
 -- words are placed in memory at increasing addresses, the
 -- bit pattern is correct for the host's word size and endianness.
diff --git a/compiler/GHC/ByteCode/Instr.hs b/compiler/GHC/ByteCode/Instr.hs
index a8cc569548..5b0b20e38d 100644
--- a/compiler/GHC/ByteCode/Instr.hs
+++ b/compiler/GHC/ByteCode/Instr.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP              #-}
+{-# LANGUAGE FlexibleContexts #-}
 {-# OPTIONS_GHC -funbox-strict-fields #-}
 --
 --  (c) The University of Glasgow 2002-2006
@@ -17,22 +18,19 @@ import GHC.ByteCode.Types
 import GHCi.RemoteTypes
 import GHCi.FFI (C_ffi_cif)
 import GHC.StgToCmm.Layout     ( ArgRep(..) )
-import GHC.Core.Ppr
 import GHC.Utils.Outputable
-import GHC.Data.FastString
 import GHC.Types.Name
 import GHC.Types.Unique
-import GHC.Types.Id
-import GHC.Core
 import GHC.Types.Literal
 import GHC.Core.DataCon
-import GHC.Types.Var.Set
 import GHC.Builtin.PrimOps
 import GHC.Runtime.Heap.Layout
 
 import Data.Word
 import GHC.Stack.CCS (CostCentre)
 
+import GHC.Stg.Syntax
+
 -- ----------------------------------------------------------------------------
 -- Bytecode instructions
 
@@ -45,7 +43,7 @@ data ProtoBCO a
         protoBCOBitmapSize :: Word16,
         protoBCOArity      :: Int,
         -- what the BCO came from, for debugging only
-        protoBCOExpr       :: Either  [AnnAlt Id DVarSet] (AnnExpr Id DVarSet),
+        protoBCOExpr       :: Either [CgStgAlt] CgStgRhs,
         -- malloc'd pointers
         protoBCOFFIs       :: [FFIInfo]
    }
@@ -91,6 +89,9 @@ data BCInstr
    -- Push an alt continuation
    | PUSH_ALTS          (ProtoBCO Name)
    | PUSH_ALTS_UNLIFTED (ProtoBCO Name) ArgRep
+   | PUSH_ALTS_TUPLE    (ProtoBCO Name) -- continuation
+                        !TupleInfo
+                        (ProtoBCO Name) -- tuple return BCO
 
    -- Pushing 8, 16 and 32 bits of padding (for constructors).
    | PUSH_PAD8
@@ -173,8 +174,9 @@ data BCInstr
 
    -- To Infinity And Beyond
    | ENTER
-   | RETURN             -- return a lifted value
+   | RETURN            -- return a lifted value
    | RETURN_UBX ArgRep -- return an unlifted value, here's its rep
+   | RETURN_TUPLE      -- return an unboxed tuple (info already on stack)
 
    -- Breakpoints
    | BRK_FUN          Word16 Unique (RemotePtr CostCentre)
@@ -193,36 +195,45 @@ instance Outputable a => Outputable (ProtoBCO a) where
       = (text "ProtoBCO" <+> ppr name <> char '#' <> int arity
                 <+> text (show ffis) <> colon)
         $$ nest 3 (case origin of
-                      Left alts -> vcat (zipWith (<+>) (char '{' : repeat (char ';'))
-                                                       (map (pprCoreAltShort.deAnnAlt) alts)) <+> char '}'
-                      Right rhs -> pprCoreExprShort (deAnnotate rhs))
+                      Left alts ->
+                        vcat (zipWith (<+>) (char '{' : repeat (char ';'))
+                             (map (pprStgAltShort shortStgPprOpts) alts))
+                      Right rhs ->
+                        pprStgRhsShort shortStgPprOpts rhs
+                  )
         $$ nest 3 (text "bitmap: " <+> text (show bsize) <+> ppr bitmap)
         $$ nest 3 (vcat (map ppr instrs))
 
--- Print enough of the Core expression to enable the reader to find
--- the expression in the -ddump-prep output.  That is, we need to
+-- Print enough of the STG expression to enable the reader to find
+-- the expression in the -ddump-stg output.  That is, we need to
 -- include at least a binder.
 
-pprCoreExprShort :: CoreExpr -> SDoc
-pprCoreExprShort expr@(Lam _ _)
-  = let
-        (bndrs, _) = collectBinders expr
-    in
-    char '\\' <+> sep (map (pprBndr LambdaBind) bndrs) <+> arrow <+> text "..."
-
-pprCoreExprShort (Case _expr var _ty _alts)
- = text "case of" <+> ppr var
-
-pprCoreExprShort (Let (NonRec x _) _) = text "let" <+> ppr x <+> ptext (sLit ("= ... in ..."))
-pprCoreExprShort (Let (Rec bs) _) = text "let {" <+> ppr (fst (head bs)) <+> ptext (sLit ("= ...; ... } in ..."))
-
-pprCoreExprShort (Tick t e) = ppr t <+> pprCoreExprShort e
-pprCoreExprShort (Cast e _) = pprCoreExprShort e <+> text "`cast` T"
+pprStgExprShort :: OutputablePass pass => StgPprOpts -> GenStgExpr pass -> SDoc
+pprStgExprShort _ (StgCase _expr var _ty _alts) =
+  text "case of" <+> ppr var
+pprStgExprShort _ (StgLet _ bnd _) =
+  text "let" <+> pprStgBindShort bnd <+> text "in ..."
+pprStgExprShort _ (StgLetNoEscape _ bnd _) =
+  text "let-no-escape" <+> pprStgBindShort bnd <+> text "in ..."
+pprStgExprShort opts (StgTick t e) = ppr t <+> pprStgExprShort opts e
+pprStgExprShort opts e = pprStgExpr opts e
+
+pprStgBindShort :: OutputablePass pass => GenStgBinding pass -> SDoc
+pprStgBindShort (StgNonRec x _) =
+  ppr x <+> text "= ..."
+pprStgBindShort (StgRec bs) =
+  char '{' <+> ppr (fst (head bs)) <+> text "= ...; ... }"
+
+pprStgAltShort :: OutputablePass pass => StgPprOpts -> GenStgAlt pass -> SDoc
+pprStgAltShort opts (con, args, expr) =
+  ppr con <+> sep (map ppr args) <+> text "->" <+> pprStgExprShort opts expr
+
+pprStgRhsShort :: OutputablePass pass => StgPprOpts -> GenStgRhs pass -> SDoc
+pprStgRhsShort opts (StgRhsClosure _ext _cc upd_flag args body) =
+  hang (hsep [ char '\\' <> ppr upd_flag, brackets (interppSP args) ])
+       4 (pprStgExprShort opts body)
+pprStgRhsShort opts rhs = pprStgRhs opts rhs
 
-pprCoreExprShort e = pprCoreExpr e
-
-pprCoreAltShort :: CoreAlt -> SDoc
-pprCoreAltShort (Alt con args expr) = ppr con <+> sep (map ppr args) <+> text "->" <+> pprCoreExprShort expr
 
 instance Outputable BCInstr where
    ppr (STKCHECK n)          = text "STKCHECK" <+> ppr n
@@ -239,8 +250,13 @@ instance Outputable BCInstr where
    ppr (PUSH_PRIMOP op)      = text "PUSH_G  " <+> text "GHC.PrimopWrappers."
                                                <> ppr op
    ppr (PUSH_BCO bco)        = hang (text "PUSH_BCO") 2 (ppr bco)
+
    ppr (PUSH_ALTS bco)       = hang (text "PUSH_ALTS") 2 (ppr bco)
    ppr (PUSH_ALTS_UNLIFTED bco pk) = hang (text "PUSH_ALTS_UNLIFTED" <+> ppr pk) 2 (ppr bco)
+   ppr (PUSH_ALTS_TUPLE bco tuple_info tuple_bco) =
+                               hang (text "PUSH_ALTS_TUPLE" <+> ppr tuple_info)
+                                    2
+                                    (ppr tuple_bco $+$ ppr bco)
 
    ppr PUSH_PAD8             = text "PUSH_PAD8"
    ppr PUSH_PAD16            = text "PUSH_PAD16"
@@ -297,8 +313,11 @@ instance Outputable BCInstr where
    ppr ENTER                 = text "ENTER"
    ppr RETURN                = text "RETURN"
    ppr (RETURN_UBX pk)       = text "RETURN_UBX  " <+> ppr pk
+   ppr (RETURN_TUPLE)        = text "RETURN_TUPLE"
    ppr (BRK_FUN index uniq _cc) = text "BRK_FUN" <+> ppr index <+> ppr uniq <+> text "<cc>"
 
+
+
 -- -----------------------------------------------------------------------------
 -- The stack use, in words, of each bytecode insn.  These _must_ be
 -- correct, or overestimates of reality, to be safe.
@@ -326,8 +345,16 @@ bciStackUse PUSH32_W{}            = 1  -- takes exactly 1 word
 bciStackUse PUSH_G{}              = 1
 bciStackUse PUSH_PRIMOP{}         = 1
 bciStackUse PUSH_BCO{}            = 1
-bciStackUse (PUSH_ALTS bco)       = 2 + protoBCOStackUse bco
-bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 + protoBCOStackUse bco
+bciStackUse (PUSH_ALTS bco)       = 2 {- profiling only, restore CCCS -} +
+                                    3 + protoBCOStackUse bco
+bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 {- profiling only, restore CCCS -} +
+                                         4 + protoBCOStackUse bco
+bciStackUse (PUSH_ALTS_TUPLE bco info _) =
+   -- (tuple_bco, tuple_info word, cont_bco, stg_ctoi_t)
+   -- tuple
+   -- (tuple_info, tuple_bco, stg_ret_t)
+   1 {- profiling only -} +
+   7 + fromIntegral (tupleSize info) + protoBCOStackUse bco
 bciStackUse (PUSH_PAD8)           = 1  -- overapproximation
 bciStackUse (PUSH_PAD16)          = 1  -- overapproximation
 bciStackUse (PUSH_PAD32)          = 1  -- overapproximation on 64bit arch
@@ -365,7 +392,8 @@ bciStackUse CASEFAIL{}            = 0
 bciStackUse JMP{}                 = 0
 bciStackUse ENTER{}               = 0
 bciStackUse RETURN{}              = 0
-bciStackUse RETURN_UBX{}          = 1
+bciStackUse RETURN_UBX{}          = 1 -- pushes stg_ret_X for some X
+bciStackUse RETURN_TUPLE{}        = 1 -- pushes stg_ret_t header
 bciStackUse CCALL{}               = 0
 bciStackUse SWIZZLE{}             = 0
 bciStackUse BRK_FUN{}             = 0
diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs
index 97304cb7f4..02c117d716 100644
--- a/compiler/GHC/ByteCode/Types.hs
+++ b/compiler/GHC/ByteCode/Types.hs
@@ -6,7 +6,11 @@
 
 -- | Bytecode assembler types
 module GHC.ByteCode.Types
-  ( CompiledByteCode(..), seqCompiledByteCode, FFIInfo(..)
+  ( CompiledByteCode(..), seqCompiledByteCode
+  , FFIInfo(..)
+  , RegBitmap(..)
+  , TupleInfo(..), voidTupleInfo
+  , ByteOff(..), WordOff(..)
   , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
   , ItblEnv, ItblPtr(..)
   , CgBreakInfo(..)
@@ -68,6 +72,61 @@ seqCompiledByteCode CompiledByteCode{..} =
   rnf bc_strs `seq`
   rnf (fmap seqModBreaks bc_breaks)
 
+newtype ByteOff = ByteOff Int
+    deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable)
+
+newtype WordOff = WordOff Int
+    deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Outputable)
+
+newtype RegBitmap = RegBitmap { unRegBitmap :: Word32 }
+    deriving (Enum, Eq, Show, Integral, Num, Ord, Real, Bits, FiniteBits, Outputable)
+
+{- Note [GHCi TupleInfo]
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+   This contains the data we need for passing unboxed tuples between
+   bytecode and native code
+
+   In general we closely follow the native calling convention that
+   GHC uses for unboxed tuples, but we don't use any registers in
+   bytecode. All tuple elements are expanded to use a full register
+   or a full word on the stack.
+
+   The position of tuple elements that are returned on the stack in
+   the native calling convention is unchanged when returning the same
+   tuple in bytecode.
+
+   The order of the remaining elements is determined by the register in
+   which they would have been returned, rather than by their position in
+   the tuple in the Haskell source code. This makes jumping between bytecode
+   and native code easier: A map of live registers is enough to convert the
+   tuple.
+
+   See GHC.StgToByteCode.layoutTuple for more details.
+-}
+data TupleInfo = TupleInfo
+  { tupleSize            :: !WordOff   -- total size of tuple in words
+  , tupleVanillaRegs     :: !RegBitmap -- vanilla registers used
+  , tupleLongRegs        :: !RegBitmap -- long registers used
+  , tupleFloatRegs       :: !RegBitmap -- float registers used
+  , tupleDoubleRegs      :: !RegBitmap -- double registers used
+  , tupleNativeStackSize :: !WordOff {- words spilled on the stack by
+                                        GHCs native calling convention -}
+  } deriving (Show)
+
+instance Outputable TupleInfo where
+  ppr TupleInfo{..} = text "<size" <+> ppr tupleSize <+>
+                      text "stack" <+> ppr tupleNativeStackSize <+>
+                      text "regs"  <+>
+                          char 'R' <> ppr tupleVanillaRegs <+>
+                          char 'L' <> ppr tupleLongRegs <+>
+                          char 'F' <> ppr tupleFloatRegs <+>
+                          char 'D' <> ppr tupleDoubleRegs <>
+                      char '>'
+
+voidTupleInfo :: TupleInfo
+voidTupleInfo = TupleInfo 0 0 0 0 0 0
+
 type ItblEnv = NameEnv (Name, ItblPtr)
         -- We need the Name in the range so we know which
         -- elements to filter out when unloading a module
diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs
index 2c68439dc0..86b06271d1 100644
--- a/compiler/GHC/Cmm/Expr.hs
+++ b/compiler/GHC/Cmm/Expr.hs
@@ -522,6 +522,8 @@ instance Eq GlobalReg where
    PicBaseReg == PicBaseReg = True
    _r1 == _r2 = False
 
+-- NOTE: this Ord instance affects the tuple layout in GHCi, see
+--       Note [GHCi tuple layout]
 instance Ord GlobalReg where
    compare (VanillaReg i _) (VanillaReg j _) = compare i j
      -- Ignore type when seeking clashes
diff --git a/compiler/GHC/Core/Lint.hs b/compiler/GHC/Core/Lint.hs
index 7edc0d7a28..a3ea0bb1d3 100644
--- a/compiler/GHC/Core/Lint.hs
+++ b/compiler/GHC/Core/Lint.hs
@@ -17,6 +17,8 @@ module GHC.Core.Lint (
     lintPassResult, lintInteractiveExpr, lintExpr,
     lintAnnots, lintAxioms,
 
+    interactiveInScope,
+
     -- ** Debug output
     endPass, endPassIO,
     displayLintResults, dumpPassResult,
@@ -379,7 +381,7 @@ lintPassResult hsc_env pass binds
   | not (gopt Opt_DoCoreLinting dflags)
   = return ()
   | otherwise
-  = do { let warns_and_errs = lintCoreBindings dflags pass (interactiveInScope hsc_env) binds
+  = do { let warns_and_errs = lintCoreBindings dflags pass (interactiveInScope $ hsc_IC hsc_env) binds
        ; Err.showPass logger dflags ("Core Linted result of " ++ showPpr dflags pass)
        ; displayLintResults logger dflags (showLintWarnings pass) (ppr pass)
                             (pprCoreBindings binds) warns_and_errs }
@@ -432,7 +434,7 @@ lintInteractiveExpr :: SDoc -- ^ The source of the linted expression
 lintInteractiveExpr what hsc_env expr
   | not (gopt Opt_DoCoreLinting dflags)
   = return ()
-  | Just err <- lintExpr dflags (interactiveInScope hsc_env) expr
+  | Just err <- lintExpr dflags (interactiveInScope $ hsc_IC hsc_env) expr
   = displayLintResults logger dflags False what (pprCoreExpr expr) (emptyBag, err)
   | otherwise
   = return ()
@@ -440,7 +442,7 @@ lintInteractiveExpr what hsc_env expr
     dflags = hsc_dflags hsc_env
     logger = hsc_logger hsc_env
 
-interactiveInScope :: HscEnv -> [Var]
+interactiveInScope :: InteractiveContext -> [Var]
 -- In GHCi we may lint expressions, or bindings arising from 'deriving'
 -- clauses, that mention variables bound in the interactive context.
 -- These are Local things (see Note [Interactively-bound Ids in GHCi] in GHC.Runtime.Context).
@@ -452,11 +454,10 @@ interactiveInScope :: HscEnv -> [Var]
 -- so this is a (cheap) no-op.
 --
 -- See #8215 for an example
-interactiveInScope hsc_env
+interactiveInScope ictxt
   = tyvars ++ ids
   where
     -- C.f. GHC.Tc.Module.setInteractiveContext, Desugar.deSugarExpr
-    ictxt                   = hsc_IC hsc_env
     (cls_insts, _fam_insts) = ic_instances ictxt
     te1    = mkTypeEnvWithImplicits (ic_tythings ictxt)
     te     = extendTypeEnvWithIds te1 (map instanceDFunId cls_insts)
diff --git a/compiler/GHC/CoreToStg.hs b/compiler/GHC/CoreToStg.hs
index b1397fe4e1..d8a6dd0e95 100644
--- a/compiler/GHC/CoreToStg.hs
+++ b/compiler/GHC/CoreToStg.hs
@@ -1,4 +1,6 @@
-{-# LANGUAGE CPP, DeriveFunctor #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE TypeFamilies #-}
 
 --
 -- (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
@@ -414,13 +416,9 @@ coreToStgExpr expr@(Lam _ _)
         text "Unexpected value lambda:" $$ ppr expr
 
 coreToStgExpr (Tick tick expr)
-  = do stg_tick <- case tick of
-                     HpcTick m i        -> return (HpcTick m i)
-                     ProfNote cc cnt sc -> return (ProfNote cc cnt sc)
-                     SourceNote span nm -> return (SourceNote span nm)
-                     Breakpoint{} ->
-                       panic "coreToStgExpr: breakpoint should not happen"
-       expr2 <- coreToStgExpr expr
+  = do
+       let !stg_tick = coreToStgTick (exprType expr) tick
+       !expr2 <- coreToStgExpr expr
        return (StgTick stg_tick expr2)
 
 coreToStgExpr (Cast expr _)
@@ -570,12 +568,8 @@ coreToStgApp f args ticks = do
                 TickBoxOpId {}   -> pprPanic "coreToStg TickBox" $ ppr (f,args')
                 _other           -> StgApp f args'
 
-        convert_tick (Breakpoint _ bid fvs)  = res_ty `seq` Breakpoint res_ty bid fvs
-        convert_tick (HpcTick m i)           = HpcTick m i
-        convert_tick (SourceNote span nm)    = SourceNote span nm
-        convert_tick (ProfNote cc cnt scope) = ProfNote cc cnt scope
         add_tick !t !e = StgTick t e
-        tapp = foldr add_tick app (map convert_tick ticks ++ ticks')
+        tapp = foldr add_tick app (map (coreToStgTick res_ty) ticks ++ ticks')
 
     -- Forcing these fixes a leak in the code generator, noticed while
     -- profiling for trac #4367
@@ -601,12 +595,7 @@ coreToStgArgs (Coercion _ : args) -- Coercion argument; See Note [Coercion token
 coreToStgArgs (Tick t e : args)
   = ASSERT( not (tickishIsCode t) )
     do { (args', ts) <- coreToStgArgs (e : args)
-       ; let convert_tick (Breakpoint _ bid fvs) =
-               let !ty = exprType e in Breakpoint ty bid fvs
-             convert_tick (HpcTick m i)           = HpcTick m i
-             convert_tick (SourceNote span nm)    = SourceNote span nm
-             convert_tick (ProfNote cc cnt scope) = ProfNote cc cnt scope
-             !t' = convert_tick t
+       ; let !t' = coreToStgTick (exprType e) t
        ; return (args', t':ts) }
 
 coreToStgArgs (arg : args) = do         -- Non-type argument
@@ -639,6 +628,13 @@ coreToStgArgs (arg : args) = do         -- Non-type argument
     WARN( bad_args, text "Dangerous-looking argument. Probable cause: bad unsafeCoerce#" $$ ppr arg )
      return (stg_arg : stg_args, ticks ++ aticks)
 
+coreToStgTick :: Type -- type of the ticked expression
+              -> CoreTickish
+              -> StgTickish
+coreToStgTick _ty (HpcTick m i)           = HpcTick m i
+coreToStgTick _ty (SourceNote span nm)    = SourceNote span nm
+coreToStgTick _ty (ProfNote cc cnt scope) = ProfNote cc cnt scope
+coreToStgTick !ty (Breakpoint _ bid fvs)  = Breakpoint ty bid fvs
 
 -- ---------------------------------------------------------------------------
 -- The magic for lets:
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index 956175b3ad..af94cb92d7 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -964,7 +964,7 @@ no further floating will occur. This allows us to safely inline things like
    GHC.Magic. This definition is used in cases where runRW is curried.
 
  * In addition to its normal Haskell definition in GHC.Magic, we give it
-   a special late inlining here in CorePrep and GHC.CoreToByteCode, avoiding
+   a special late inlining here in CorePrep and GHC.StgToByteCode, avoiding
    the incorrect sharing due to float-out noted above.
 
  * It is levity-polymorphic:
diff --git a/compiler/GHC/Driver/Backend.hs b/compiler/GHC/Driver/Backend.hs
index 845a5f36c0..39789607d9 100644
--- a/compiler/GHC/Driver/Backend.hs
+++ b/compiler/GHC/Driver/Backend.hs
@@ -67,10 +67,10 @@ data Backend
                    -- Produce ByteCode objects (BCO, see "GHC.ByteCode") that
                    -- can be interpreted. It is used by GHCi.
                    --
-                   -- Currently some extensions are not supported (unboxed
-                   -- tuples/sums, foreign primops).
+                   -- Currently some extensions are not supported
+                   -- (foreign primops).
                    --
-                   -- See "GHC.CoreToByteCode"
+                   -- See "GHC.StgToByteCode"
 
 
    | NoBackend     -- ^ No code generated.
diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs
index a910cdf23f..50e5a0a067 100644
--- a/compiler/GHC/Driver/Main.hs
+++ b/compiler/GHC/Driver/Main.hs
@@ -114,7 +114,7 @@ import GHC.Hs.Stats         ( ppSourceStats )
 
 import GHC.HsToCore
 
-import GHC.CoreToByteCode      ( byteCodeGen, coreExprToBCOs )
+import GHC.StgToByteCode    ( byteCodeGen, stgExprToBCOs )
 
 import GHC.IfaceToCore  ( typecheckIface )
 
@@ -132,6 +132,8 @@ import GHC.Core
 import GHC.Core.Tidy           ( tidyExpr )
 import GHC.Core.Type           ( Type, Kind )
 import GHC.Core.Lint           ( lintInteractiveExpr )
+import GHC.Core.Multiplicity
+import GHC.Core.Utils          ( exprType )
 import GHC.Core.ConLike
 import GHC.Core.Opt.Pipeline
 import GHC.Core.TyCon
@@ -156,6 +158,7 @@ import GHC.Stg.Pipeline ( stg2stg )
 
 import GHC.Builtin.Utils
 import GHC.Builtin.Names
+import GHC.Builtin.Uniques ( mkPseudoUniqueE )
 
 import qualified GHC.StgToCmm as StgToCmm ( codeGen )
 import GHC.StgToCmm.Types (CgInfos (..), ModuleLFInfos)
@@ -1551,7 +1554,7 @@ hscGenHardCode hsc_env cgguts location output_filename = do
                withTiming logger dflags
                    (text "CoreToStg"<+>brackets (ppr this_mod))
                    (\(a, b, (c,d)) -> a `seqList` b `seq` c `seqList` d `seqList` ())
-                   (myCoreToStg logger dflags this_mod location prepd_binds)
+                   (myCoreToStg logger dflags (hsc_IC hsc_env) this_mod location prepd_binds)
 
         let cost_centre_info =
               (S.toList local_ccs ++ caf_ccs, caf_cc_stacks)
@@ -1622,8 +1625,12 @@ hscInteractive hsc_env cgguts location = do
     -- Do saturation and convert to A-normal form
     (prepd_binds, _) <- {-# SCC "CorePrep" #-}
                    corePrepPgm hsc_env this_mod location core_binds data_tycons
+
+    (stg_binds, _infotable_prov, _caf_ccs__caf_cc_stacks)
+      <- {-# SCC "CoreToStg" #-}
+          myCoreToStg logger dflags (hsc_IC hsc_env) this_mod location prepd_binds
     -----------------  Generate byte code ------------------
-    comp_bc <- byteCodeGen hsc_env this_mod prepd_binds data_tycons mod_breaks
+    comp_bc <- byteCodeGen hsc_env this_mod stg_binds data_tycons mod_breaks
     ------------------ Create f-x-dynamic C-side stuff -----
     (_istub_h_exists, istub_c_exists)
         <- outputForeignStubs logger tmpfs dflags (hsc_units hsc_env) this_mod location foreign_stubs
@@ -1760,22 +1767,43 @@ doCodeGen hsc_env this_mod denv data_tycons
 
     return (Stream.mapM dump2 pipeline_stream)
 
-myCoreToStg :: Logger -> DynFlags -> Module -> ModLocation -> CoreProgram
+myCoreToStgExpr :: Logger -> DynFlags -> InteractiveContext
+                -> Module -> ModLocation -> CoreExpr
+                -> IO ( StgRhs
+                      , InfoTableProvMap
+                      , CollectedCCs )
+myCoreToStgExpr logger dflags ictxt this_mod ml prepd_expr = do
+    {- Create a temporary binding (just because myCoreToStg needs a
+       binding for the stg2stg step) -}
+    let bco_tmp_id = mkSysLocal (fsLit "BCO_toplevel")
+                                (mkPseudoUniqueE 0)
+                                Many
+                                (exprType prepd_expr)
+    ([StgTopLifted (StgNonRec _ stg_expr)], prov_map, collected_ccs) <-
+       myCoreToStg logger
+                   dflags
+                   ictxt
+                   this_mod
+                   ml
+                   [NonRec bco_tmp_id prepd_expr]
+    return (stg_expr, prov_map, collected_ccs)
+
+myCoreToStg :: Logger -> DynFlags -> InteractiveContext
+            -> Module -> ModLocation -> CoreProgram
             -> IO ( [StgTopBinding] -- output program
                   , InfoTableProvMap
                   , CollectedCCs )  -- CAF cost centre info (declared and used)
-myCoreToStg logger dflags this_mod ml prepd_binds = do
+myCoreToStg logger dflags ictxt this_mod ml prepd_binds = do
     let (stg_binds, denv, cost_centre_info)
          = {-# SCC "Core2Stg" #-}
            coreToStg dflags this_mod ml prepd_binds
 
     stg_binds2
         <- {-# SCC "Stg2Stg" #-}
-           stg2stg logger dflags this_mod stg_binds
+           stg2stg logger dflags ictxt this_mod stg_binds
 
     return (stg_binds2, denv, cost_centre_info)
 
-
 {- **********************************************************************
 %*                                                                      *
 \subsection{Compiling a do-statement}
@@ -1911,9 +1939,18 @@ hscParsedDecls hsc_env decls = runInteractiveHsc hsc_env $ do
     (prepd_binds, _) <- {-# SCC "CorePrep" #-}
       liftIO $ corePrepPgm hsc_env this_mod iNTERACTIVELoc core_binds data_tycons
 
+    (stg_binds, _infotable_prov, _caf_ccs__caf_cc_stacks)
+        <- {-# SCC "CoreToStg" #-}
+           liftIO $ myCoreToStg (hsc_logger hsc_env)
+                                (hsc_dflags hsc_env)
+                                (hsc_IC hsc_env)
+                                this_mod
+                                iNTERACTIVELoc
+                                prepd_binds
+
     {- Generate byte code -}
     cbc <- liftIO $ byteCodeGen hsc_env this_mod
-                                prepd_binds data_tycons mod_breaks
+                                stg_binds data_tycons mod_breaks
 
     let src_span = srcLocSpan interactiveSrcLoc
     liftIO $ loadDecls hsc_env src_span cbc
@@ -2077,10 +2114,25 @@ hscCompileCoreExpr' hsc_env srcspan ds_expr
 
            {- Lint if necessary -}
          ; lintInteractiveExpr (text "hscCompileExpr") hsc_env prepd_expr
+         ; let iNTERACTIVELoc = ModLocation{ ml_hs_file   = Nothing,
+                                      ml_hi_file   = panic "hscCompileCoreExpr':ml_hi_file",
+                                      ml_obj_file  = panic "hscCompileCoreExpr':ml_obj_file",
+                                      ml_hie_file  = panic "hscCompileCoreExpr':ml_hie_file" }
+
+         ; let ictxt = hsc_IC hsc_env
+         ; (stg_expr, _, _) <-
+             myCoreToStgExpr (hsc_logger hsc_env)
+                             (hsc_dflags hsc_env)
+                             ictxt
+                             (icInteractiveModule ictxt)
+                             iNTERACTIVELoc
+                             prepd_expr
 
            {- Convert to BCOs -}
-         ; bcos <- coreExprToBCOs hsc_env
-                     (icInteractiveModule (hsc_IC hsc_env)) prepd_expr
+         ; bcos <- stgExprToBCOs hsc_env
+                     (icInteractiveModule ictxt)
+                     (exprType prepd_expr)
+                     stg_expr
 
            {- load it -}
          ; loadExpr hsc_env srcspan bcos }
diff --git a/compiler/GHC/Driver/Make.hs b/compiler/GHC/Driver/Make.hs
index bd885d9042..20fb7ecc86 100644
--- a/compiler/GHC/Driver/Make.hs
+++ b/compiler/GHC/Driver/Make.hs
@@ -2267,7 +2267,6 @@ downsweep hsc_env old_summaries excl_mods allow_dup_roots
        let tmpfs           = hsc_tmpfs     hsc_env
        map1 <- case backend dflags of
          NoBackend   -> enableCodeGenForTH logger tmpfs home_unit default_backend map0
-         Interpreter -> enableCodeGenForUnboxedTuplesOrSums logger tmpfs default_backend map0
          _           -> return map0
        if null errs
          then pure $ concat $ modNodeMapElems map1
@@ -2377,33 +2376,8 @@ enableCodeGenForTH logger tmpfs home_unit =
       -- can't compile anything anyway! See #16219.
       isHomeUnitDefinite home_unit
 
--- | Update the every ModSummary that is depended on
--- by a module that needs unboxed tuples. We enable codegen to
--- the specified target, disable optimization and change the .hi
--- and .o file locations to be temporary files.
---
--- This is used in order to load code that uses unboxed tuples
--- or sums into GHCi while still allowing some code to be interpreted.
-enableCodeGenForUnboxedTuplesOrSums
-  :: Logger
-  -> TmpFs
-  -> Backend
-  -> ModNodeMap [Either ErrorMessages ExtendedModSummary]
-  -> IO (ModNodeMap [Either ErrorMessages ExtendedModSummary])
-enableCodeGenForUnboxedTuplesOrSums logger tmpfs =
-  enableCodeGenWhen logger tmpfs condition should_modify TFL_GhcSession TFL_CurrentModule
-  where
-    condition ms =
-      unboxed_tuples_or_sums (ms_hspp_opts ms) &&
-      not (gopt Opt_ByteCode (ms_hspp_opts ms)) &&
-      (isBootSummary ms == NotBoot)
-    unboxed_tuples_or_sums d =
-      xopt LangExt.UnboxedTuples d || xopt LangExt.UnboxedSums d
-    should_modify (ModSummary { ms_hspp_opts = dflags }) =
-      backend dflags == Interpreter
-
--- | Helper used to implement 'enableCodeGenForTH' and
--- 'enableCodeGenForUnboxedTuples'. In particular, this enables
+-- | Helper used to implement 'enableCodeGenForTH'.
+-- In particular, this enables
 -- unoptimized code generation for all modules that meet some
 -- condition (first parameter), or are dependencies of those
 -- modules. The second parameter is a condition to check before
diff --git a/compiler/GHC/Runtime/Eval.hs b/compiler/GHC/Runtime/Eval.hs
index db43ff74ac..e3ba232add 100644
--- a/compiler/GHC/Runtime/Eval.hs
+++ b/compiler/GHC/Runtime/Eval.hs
@@ -692,7 +692,7 @@ pushResume hsc_env resume = hsc_env { hsc_IC = ictxt1 }
 
   There are 3 situations where items are removed from the Id list
   (or replaced with `Nothing`):
-  1.) If function `GHC.CoreToByteCode.schemeER_wrk` (which creates
+  1.) If function `GHC.StgToByteCode.schemeER_wrk` (which creates
       the Id list) doesn't find an Id in the ByteCode environement.
   2.) If function `GHC.Runtime.Eval.bindLocalsAtBreakpoint`
       filters out unboxed elements from the Id list, because GHCi cannot
diff --git a/compiler/GHC/Stg/Lint.hs b/compiler/GHC/Stg/Lint.hs
index 1e12e9bab9..8464cb8786 100644
--- a/compiler/GHC/Stg/Lint.hs
+++ b/compiler/GHC/Stg/Lint.hs
@@ -42,6 +42,7 @@ import GHC.Prelude
 import GHC.Stg.Syntax
 
 import GHC.Driver.Session
+import GHC.Core.Lint        ( interactiveInScope )
 import GHC.Data.Bag         ( Bag, emptyBag, isEmptyBag, snocBag, bagToList )
 import GHC.Types.Basic      ( TopLevelFlag(..), isTopLevel )
 import GHC.Types.CostCentre ( isCurrentCCS )
@@ -57,6 +58,7 @@ import GHC.Types.SrcLoc
 import GHC.Utils.Logger
 import GHC.Utils.Outputable
 import GHC.Unit.Module            ( Module )
+import GHC.Runtime.Context        ( InteractiveContext )
 import qualified GHC.Utils.Error as Err
 import Control.Applicative ((<|>))
 import Control.Monad
@@ -64,13 +66,14 @@ import Control.Monad
 lintStgTopBindings :: forall a . (OutputablePass a, BinderP a ~ Id)
                    => Logger
                    -> DynFlags
+                   -> InteractiveContext
                    -> Module -- ^ module being compiled
                    -> Bool   -- ^ have we run Unarise yet?
                    -> String -- ^ who produced the STG?
                    -> [GenStgTopBinding a]
                    -> IO ()
 
-lintStgTopBindings logger dflags this_mod unarised whodunnit binds
+lintStgTopBindings logger dflags ictxt this_mod unarised whodunnit binds
   = {-# SCC "StgLint" #-}
     case initL this_mod unarised opts top_level_binds (lint_binds binds) of
       Nothing  ->
@@ -89,7 +92,8 @@ lintStgTopBindings logger dflags this_mod unarised whodunnit binds
     opts = initStgPprOpts dflags
     -- Bring all top-level binds into scope because CoreToStg does not generate
     -- bindings in dependency order (so we may see a use before its definition).
-    top_level_binds = mkVarSet (bindersOfTopBinds binds)
+    top_level_binds = extendVarSetList (mkVarSet (bindersOfTopBinds binds))
+                                       (interactiveInScope ictxt)
 
     lint_binds :: [GenStgTopBinding a] -> LintM ()
 
diff --git a/compiler/GHC/Stg/Pipeline.hs b/compiler/GHC/Stg/Pipeline.hs
index c05450c0f7..d9f1342b66 100644
--- a/compiler/GHC/Stg/Pipeline.hs
+++ b/compiler/GHC/Stg/Pipeline.hs
@@ -24,6 +24,7 @@ import GHC.Stg.Unarise  ( unarise )
 import GHC.Stg.CSE      ( stgCse )
 import GHC.Stg.Lift     ( stgLiftLams )
 import GHC.Unit.Module ( Module )
+import GHC.Runtime.Context ( InteractiveContext )
 
 import GHC.Driver.Session
 import GHC.Utils.Error
@@ -49,11 +50,11 @@ runStgM mask (StgM m) = evalStateT m mask
 
 stg2stg :: Logger
         -> DynFlags                  -- includes spec of what stg-to-stg passes to do
+        -> InteractiveContext
         -> Module                    -- module being compiled
         -> [StgTopBinding]           -- input program
         -> IO [StgTopBinding]        -- output program
-
-stg2stg logger dflags this_mod binds
+stg2stg logger dflags ictxt this_mod binds
   = do  { dump_when Opt_D_dump_stg_from_core "Initial STG:" binds
         ; showPass logger dflags "Stg2Stg"
         -- Do the main business!
@@ -75,7 +76,7 @@ stg2stg logger dflags this_mod binds
   where
     stg_linter unarised
       | gopt Opt_DoStgLinting dflags
-      = lintStgTopBindings logger dflags this_mod unarised
+      = lintStgTopBindings logger dflags ictxt this_mod unarised
       | otherwise
       = \ _whodunnit _binds -> return ()
 
diff --git a/compiler/GHC/Stg/Syntax.hs b/compiler/GHC/Stg/Syntax.hs
index 03ba9b5549..6e2107e9d6 100644
--- a/compiler/GHC/Stg/Syntax.hs
+++ b/compiler/GHC/Stg/Syntax.hs
@@ -58,7 +58,8 @@ module GHC.Stg.Syntax (
         bindersOf, bindersOfTop, bindersOfTopBinds,
 
         -- ppr
-        StgPprOpts(..), initStgPprOpts, panicStgPprOpts,
+        StgPprOpts(..), initStgPprOpts,
+        panicStgPprOpts, shortStgPprOpts,
         pprStgArg, pprStgExpr, pprStgRhs, pprStgBinding,
         pprGenStgTopBinding, pprStgTopBinding,
         pprGenStgTopBindings, pprStgTopBindings
@@ -691,6 +692,13 @@ panicStgPprOpts = StgPprOpts
    { stgSccEnabled = True
    }
 
+-- | STG pretty-printing options used for short messages
+shortStgPprOpts :: StgPprOpts
+shortStgPprOpts = StgPprOpts
+   { stgSccEnabled = False
+   }
+
+
 pprGenStgTopBinding
   :: OutputablePass pass => StgPprOpts -> GenStgTopBinding pass -> SDoc
 pprGenStgTopBinding opts b = case b of
@@ -778,9 +786,10 @@ pprStgExpr opts e = case e of
              , hang (text "} in ") 2 (pprStgExpr opts expr)
              ]
 
-   StgTick tickish expr -> sdocOption sdocSuppressTicks $ \case
+   StgTick _tickish expr -> sdocOption sdocSuppressTicks $ \case
       True  -> pprStgExpr opts expr
-      False -> sep [ ppr tickish, pprStgExpr opts expr ]
+      False -> pprStgExpr opts expr
+        -- XXX sep [ ppr tickish, pprStgExpr opts expr ]
 
    -- Don't indent for a single case alternative.
    StgCase expr bndr alt_type [alt]
diff --git a/compiler/GHC/CoreToByteCode.hs b/compiler/GHC/StgToByteCode.hs
index dbb64d51d5..e14de72eb5 100644
--- a/compiler/GHC/CoreToByteCode.hs
+++ b/compiler/GHC/StgToByteCode.hs
@@ -10,8 +10,8 @@
 --  (c) The University of Glasgow 2002-2006
 --
 
--- | GHC.CoreToByteCode: Generate bytecode from Core
-module GHC.CoreToByteCode ( UnlinkedBCO, byteCodeGen, coreExprToBCOs ) where
+-- | GHC.StgToByteCode: Generate bytecode from STG
+module GHC.StgToByteCode ( UnlinkedBCO, byteCodeGen, stgExprToBCOs ) where
 
 #include "HsVersions.h"
 
@@ -24,6 +24,11 @@ import GHC.ByteCode.Instr
 import GHC.ByteCode.Asm
 import GHC.ByteCode.Types
 
+import GHC.Cmm.CallConv
+import GHC.Cmm.Expr
+import GHC.Cmm.Node
+import GHC.Cmm.Utils
+
 import GHC.Platform
 import GHC.Platform.Profile
 
@@ -36,12 +41,9 @@ import GHC.Types.Name
 import GHC.Types.Id.Make
 import GHC.Types.Id
 import GHC.Types.ForeignCall
-import GHC.Core.Utils
 import GHC.Core
-import GHC.Core.Ppr
 import GHC.Types.Literal
 import GHC.Builtin.PrimOps
-import GHC.Core.FVs
 import GHC.Core.Type
 import GHC.Types.RepType
 import GHC.Core.DataCon
@@ -55,6 +57,7 @@ import GHC.Core.TyCo.Ppr ( pprType )
 import GHC.Utils.Error
 import GHC.Types.Unique
 import GHC.Builtin.Uniques
+import GHC.Builtin.Utils ( primOpId )
 import GHC.Data.FastString
 import GHC.Utils.Panic
 import GHC.StgToCmm.Closure ( NonVoid(..), fromNonVoid, nonVoidIds )
@@ -64,7 +67,6 @@ import GHC.Data.Bitmap
 import GHC.Data.OrdList
 import GHC.Data.Maybe
 import GHC.Types.Var.Env
-import GHC.Builtin.Names ( unsafeEqualityProofName )
 import GHC.Types.Tickish
 
 import Data.List ( genericReplicate, genericLength, intersperse
@@ -89,35 +91,44 @@ import Data.Ord
 import GHC.Stack.CCS
 import Data.Either ( partitionEithers )
 
+import qualified GHC.Types.CostCentre as CC
+import GHC.Stg.Syntax
+import GHC.Stg.FVs
+
 -- -----------------------------------------------------------------------------
 -- Generating byte code for a complete module
 
 byteCodeGen :: HscEnv
             -> Module
-            -> CoreProgram
+            -> [StgTopBinding]
             -> [TyCon]
             -> Maybe ModBreaks
             -> IO CompiledByteCode
 byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
    = withTiming logger dflags
-                (text "GHC.CoreToByteCode"<+>brackets (ppr this_mod))
+                (text "GHC.StgToByteCode"<+>brackets (ppr this_mod))
                 (const ()) $ do
         -- Split top-level binds into strings and others.
         -- See Note [generating code for top-level string literal bindings].
-        let (strings, flatBinds) = partitionEithers $ do  -- list monad
-                (bndr, rhs) <- flattenBinds binds
-                return $ case exprIsTickedString_maybe rhs of
-                    Just str -> Left (bndr, str)
-                    _ -> Right (bndr, simpleFreeVars rhs)
+        let (strings, lifted_binds) = partitionEithers $ do  -- list monad
+                bnd <- binds
+                case bnd of
+                  StgTopLifted bnd      -> [Right bnd]
+                  StgTopStringLit b str -> [Left (b, str)]
+            flattenBind (StgNonRec b e) = [(b,e)]
+            flattenBind (StgRec bs)     = bs
         stringPtrs <- allocateTopStrings hsc_env strings
 
         us <- mkSplitUniqSupply 'y'
         (BcM_State{..}, proto_bcos) <-
-           runBc hsc_env us this_mod mb_modBreaks (mkVarEnv stringPtrs) $
-             mapM schemeTopBind flatBinds
+           runBc hsc_env us this_mod mb_modBreaks (mkVarEnv stringPtrs) $ do
+             prepd_binds <- mapM bcPrepBind lifted_binds
+             let flattened_binds =
+                   concatMap (flattenBind . annBindingFreeVars) (reverse prepd_binds)
+             mapM schemeTopBind flattened_binds
 
         when (notNull ffis)
-             (panic "GHC.CoreToByteCode.byteCodeGen: missing final emitBc?")
+             (panic "GHC.StgToByteCode.byteCodeGen: missing final emitBc?")
 
         dumpIfSet_dyn logger dflags Opt_D_dump_BCOs
            "Proto-BCOs" FormatByteCode
@@ -168,27 +179,30 @@ literals:
 -- Generating byte code for an expression
 
 -- Returns: the root BCO for this expression
-coreExprToBCOs :: HscEnv
-               -> Module
-               -> CoreExpr
-               -> IO UnlinkedBCO
-coreExprToBCOs hsc_env this_mod expr
+stgExprToBCOs :: HscEnv
+              -> Module
+              -> Type
+              -> StgRhs
+              -> IO UnlinkedBCO
+stgExprToBCOs hsc_env this_mod expr_ty expr
  = withTiming logger dflags
-              (text "GHC.CoreToByteCode"<+>brackets (ppr this_mod))
+              (text "GHC.StgToByteCode"<+>brackets (ppr this_mod))
               (const ()) $ do
-      -- create a totally bogus name for the top-level BCO; this
-      -- should be harmless, since it's never used for anything
-      let invented_name  = mkSystemVarName (mkPseudoUniqueE 0) (fsLit "ExprTopLevel")
 
       -- the uniques are needed to generate fresh variables when we introduce new
       -- let bindings for ticked expressions
       us <- mkSplitUniqSupply 'y'
       (BcM_State _dflags _us _this_mod _final_ctr mallocd _ _ _, proto_bco)
-         <- runBc hsc_env us this_mod Nothing emptyVarEnv $
-              schemeR [] (invented_name, simpleFreeVars expr)
+         <- runBc hsc_env us this_mod Nothing emptyVarEnv $ do
+              prepd_expr <- annBindingFreeVars <$>
+                                       bcPrepBind (StgNonRec dummy_id expr)
+              case prepd_expr of
+                (StgNonRec _ cg_expr) -> schemeR [] (idName dummy_id, cg_expr)
+                _                     ->
+                  panic "GHC.StgByteCode.stgExprToBCOs"
 
       when (notNull mallocd)
-           (panic "GHC.CoreToByteCode.coreExprToBCOs: missing final emitBc?")
+           (panic "GHC.StgToByteCode.stgExprToBCOs: missing final emitBc?")
 
       dumpIfSet_dyn logger dflags Opt_D_dump_BCOs "Proto-BCOs" FormatByteCode
          (ppr proto_bco)
@@ -196,27 +210,110 @@ coreExprToBCOs hsc_env this_mod expr
       assembleOneBCO hsc_env proto_bco
   where dflags = hsc_dflags hsc_env
         logger = hsc_logger hsc_env
-
--- The regular freeVars function gives more information than is useful to
--- us here. We need only the free variables, not everything in an FVAnn.
--- Historical note: At one point FVAnn was more sophisticated than just
--- a set. Now it isn't. So this function is much simpler. Keeping it around
--- so that if someone changes FVAnn, they will get a nice type error right
--- here.
-simpleFreeVars :: CoreExpr -> AnnExpr Id DVarSet
-simpleFreeVars = freeVars
+        -- we need an otherwise unused Id for bytecode generation
+        dummy_id = mkSysLocal (fsLit "BCO_toplevel")
+                              (mkPseudoUniqueE 0)
+                              Many
+                              expr_ty
+{-
+  Prepare the STG for bytecode generation:
+
+   - Ensure that all breakpoints are directly under
+        a let-binding, introducing a new binding for
+        those that aren't already.
+
+   - Protect Not-necessarily lifted join points, see
+        Note [Not-necessarily-lifted join points]
+
+ -}
+
+bcPrepRHS :: StgRhs -> BcM StgRhs
+-- explicitly match all constructors so we get a warning if we miss any
+bcPrepRHS (StgRhsClosure fvs cc upd args (StgTick bp@Breakpoint{} expr)) = do
+  {- If we have a breakpoint directly under an StgRhsClosure we don't
+     need to introduce a new binding for it.
+   -}
+  expr' <- bcPrepExpr expr
+  pure (StgRhsClosure fvs cc upd args (StgTick bp expr'))
+bcPrepRHS (StgRhsClosure fvs cc upd args expr) =
+  StgRhsClosure fvs cc upd args <$> bcPrepExpr expr
+bcPrepRHS con@StgRhsCon{} = pure con
+
+bcPrepExpr :: StgExpr -> BcM StgExpr
+-- explicitly match all constructors so we get a warning if we miss any
+bcPrepExpr (StgTick bp@(Breakpoint tick_ty _ _) rhs)
+  | isLiftedTypeKind (typeKind tick_ty) = do
+      id <- newId tick_ty
+      rhs' <- bcPrepExpr rhs
+      let expr' = StgTick bp rhs'
+          bnd = StgNonRec id (StgRhsClosure noExtFieldSilent
+                                            CC.dontCareCCS
+                                            ReEntrant
+                                            []
+                                            expr'
+                             )
+          letExp = StgLet noExtFieldSilent bnd (StgApp id [])
+      pure letExp
+  | otherwise = do
+      id <- newId (mkVisFunTyMany realWorldStatePrimTy tick_ty)
+      st <- newId realWorldStatePrimTy
+      rhs' <- bcPrepExpr rhs
+      let expr' = StgTick bp rhs'
+          bnd = StgNonRec id (StgRhsClosure noExtFieldSilent
+                                            CC.dontCareCCS
+                                            ReEntrant
+                                            [voidArgId]
+                                            expr'
+                             )
+      pure $ StgLet noExtFieldSilent bnd (StgApp id [StgVarArg st])
+bcPrepExpr (StgTick tick rhs) =
+  StgTick tick <$> bcPrepExpr rhs
+bcPrepExpr (StgLet xlet bnds expr) =
+  StgLet xlet <$> bcPrepBind bnds
+              <*> bcPrepExpr expr
+bcPrepExpr (StgLetNoEscape xlne bnds expr) =
+  StgLet xlne <$> bcPrepBind bnds
+              <*> bcPrepExpr expr
+bcPrepExpr (StgCase expr bndr alt_type alts) =
+  StgCase <$> bcPrepExpr expr
+          <*> pure bndr
+          <*> pure alt_type
+          <*> mapM bcPrepAlt alts
+bcPrepExpr lit@StgLit{} = pure lit
+-- See Note [Not-necessarily-lifted join points], step 3.
+bcPrepExpr (StgApp x [])
+  | isNNLJoinPoint x = pure $
+      StgApp (protectNNLJoinPointId x) [StgVarArg voidPrimId]
+bcPrepExpr app@StgApp{} = pure app
+bcPrepExpr app@StgConApp{} = pure app
+bcPrepExpr app@StgOpApp{} = pure app
+
+bcPrepAlt :: StgAlt -> BcM StgAlt
+bcPrepAlt (ac, bndrs, expr) = (,,) ac bndrs <$> bcPrepExpr expr
+
+bcPrepBind :: StgBinding -> BcM StgBinding
+-- explicitly match all constructors so we get a warning if we miss any
+bcPrepBind (StgNonRec bndr rhs) =
+  let (bndr', rhs') = bcPrepSingleBind (bndr, rhs)
+  in  StgNonRec bndr' <$> bcPrepRHS rhs'
+bcPrepBind (StgRec bnds) =
+  StgRec <$> mapM ((\(b,r) -> (,) b <$> bcPrepRHS r) . bcPrepSingleBind)
+                  bnds
+
+bcPrepSingleBind :: (Id, StgRhs) -> (Id, StgRhs)
+-- If necessary, modify this Id and body to protect not-necessarily-lifted join points.
+-- See Note [Not-necessarily-lifted join points], step 2.
+bcPrepSingleBind (x, StgRhsClosure ext cc upd_flag args body)
+  | isNNLJoinPoint x
+  = ( protectNNLJoinPointId x
+    , StgRhsClosure ext cc upd_flag (args ++ [voidArgId]) body)
+bcPrepSingleBind bnd = bnd
 
 -- -----------------------------------------------------------------------------
 -- Compilation schema for the bytecode generator
 
 type BCInstrList = OrdList BCInstr
 
-newtype ByteOff = ByteOff Int
-    deriving (Enum, Eq, Integral, Num, Ord, Real)
-
-newtype WordOff = WordOff Int
-    deriving (Enum, Eq, Integral, Num, Ord, Real)
-
 wordsToBytes :: Platform -> WordOff -> ByteOff
 wordsToBytes platform = fromIntegral . (* platformWordSizeInBytes platform) . fromIntegral
 
@@ -226,7 +323,7 @@ bytesToWords platform (ByteOff bytes) =
     let (q, r) = bytes `quotRem` (platformWordSizeInBytes platform)
     in if r == 0
            then fromIntegral q
-           else panic $ "GHC.CoreToByteCode.bytesToWords: bytes=" ++ show bytes
+           else panic $ "GHC.StgToByteCode.bytesToWords: bytes=" ++ show bytes
 
 wordSize :: Platform -> ByteOff
 wordSize platform = ByteOff (platformWordSizeInBytes platform)
@@ -246,7 +343,7 @@ ppBCEnv p
      $$ nest 4 (vcat (map pp_one (sortBy cmp_snd (Map.toList p))))
      $$ text "end-env"
      where
-        pp_one (var, offset) = int offset <> colon <+> ppr var <+> ppr (bcIdArgRep var)
+        pp_one (var, ByteOff offset) = int offset <> colon <+> ppr var <+> ppr (bcIdArgReps var)
         cmp_snd x y = compare (snd x) (snd y)
 -}
 
@@ -256,7 +353,7 @@ mkProtoBCO
    :: Platform
    -> name
    -> BCInstrList
-   -> Either  [AnnAlt Id DVarSet] (AnnExpr Id DVarSet)
+   -> Either  [CgStgAlt] (CgStgRhs)
         -- ^ original expression; for debugging only
    -> Int
    -> Word16
@@ -315,12 +412,17 @@ argBits platform (rep : args)
   | isFollowableArg rep  = False : argBits platform args
   | otherwise = take (argRepSizeW platform rep) (repeat True) ++ argBits platform args
 
+non_void :: [ArgRep] -> [ArgRep]
+non_void = filter nv
+  where nv V = False
+        nv _ = True
+
 -- -----------------------------------------------------------------------------
 -- schemeTopBind
 
 -- Compile code for the right-hand side of a top-level binding
 
-schemeTopBind :: (Id, AnnExpr Id DVarSet) -> BcM (ProtoBCO Name)
+schemeTopBind :: (Id, CgStgRhs) -> BcM (ProtoBCO Name)
 schemeTopBind (id, rhs)
   | Just data_con <- isDataConWorkId_maybe id,
     isNullaryRepDataCon data_con = do
@@ -351,44 +453,27 @@ schemeTopBind (id, rhs)
 -- Park the resulting BCO in the monad.  Also requires the
 -- name of the variable to which this value was bound,
 -- so as to give the resulting BCO a name.
-
 schemeR :: [Id]                 -- Free vars of the RHS, ordered as they
                                 -- will appear in the thunk.  Empty for
                                 -- top-level things, which have no free vars.
-        -> (Name, AnnExpr Id DVarSet)
+        -> (Name, CgStgRhs)
         -> BcM (ProtoBCO Name)
 schemeR fvs (nm, rhs)
-{-
-   | trace (showSDoc (
-              (char ' '
-               $$ (ppr.filter (not.isTyVar).dVarSetElems.fst) rhs
-               $$ pprCoreExpr (deAnnotate rhs)
-               $$ char ' '
-              ))) False
-   = undefined
-   | otherwise
--}
    = schemeR_wrk fvs nm rhs (collect rhs)
 
 -- If an expression is a lambda (after apply bcView), return the
 -- list of arguments to the lambda (in R-to-L order) and the
 -- underlying expression
-collect :: AnnExpr Id DVarSet -> ([Var], AnnExpr' Id DVarSet)
-collect (_, e) = go [] e
-  where
-    go xs e | Just e' <- bcView e = go xs e'
-    go xs (AnnLam x (_,e))
-      | typePrimRep (idType x) `lengthExceeds` 1
-      = multiValException
-      | otherwise
-      = go (x:xs) e
-    go xs not_lambda = (reverse xs, not_lambda)
+
+collect :: CgStgRhs -> ([Var], CgStgExpr)
+collect (StgRhsClosure _ _ _ args body) = (args, body)
+collect (StgRhsCon _cc dc cnum _ticks args) = ([], StgConApp dc cnum args [])
 
 schemeR_wrk
     :: [Id]
     -> Name
-    -> AnnExpr Id DVarSet             -- expression e, for debugging only
-    -> ([Var], AnnExpr' Var DVarSet)  -- result of collect on e
+    -> CgStgRhs            -- expression e, for debugging only
+    -> ([Var], CgStgExpr)  -- result of collect on e
     -> BcM (ProtoBCO Name)
 schemeR_wrk fvs nm original_body (args, body)
    = do
@@ -417,17 +502,16 @@ schemeR_wrk fvs nm original_body (args, body)
                  arity bitmap_size bitmap False{-not alts-})
 
 -- introduce break instructions for ticked expressions
-schemeER_wrk :: StackDepth -> BCEnv -> AnnExpr' Id DVarSet -> BcM BCInstrList
-schemeER_wrk d p rhs
-  | AnnTick (Breakpoint _ext tick_no fvs) (_annot, newRhs) <- rhs
-  = do  code <- schemeE d 0 p newRhs
+schemeER_wrk :: StackDepth -> BCEnv -> CgStgExpr -> BcM BCInstrList
+schemeER_wrk d p (StgTick (Breakpoint tick_ty tick_no fvs) rhs)
+  = do  code <- schemeE d 0 p rhs
         cc_arr <- getCCArray
         this_mod <- moduleName <$> getCurrentModule
         platform <- profilePlatform <$> getProfile
         let idOffSets = getVarOffSets platform d p fvs
         let breakInfo = CgBreakInfo
                         { cgb_vars = idOffSets
-                        , cgb_resty = exprType (deAnnotate' newRhs)
+                        , cgb_resty = tick_ty
                         }
         newBreakInfo tick_no breakInfo
         hsc_env <- getHscEnv
@@ -437,7 +521,7 @@ schemeER_wrk d p rhs
                | otherwise = toRemotePtr nullPtr
         let breakInstr = BRK_FUN (fromIntegral tick_no) (getUnique this_mod) cc
         return $ breakInstr `consOL` code
-   | otherwise = schemeE d 0 p rhs
+schemeER_wrk d p rhs = schemeE d 0 p rhs
 
 getVarOffSets :: Platform -> StackDepth -> BCEnv -> [Id] -> [Maybe (Id, Word16)]
 getVarOffSets platform depth env = map getOffSet
@@ -469,7 +553,7 @@ trunc16B = truncIntegral16
 trunc16W :: WordOff -> Word16
 trunc16W = truncIntegral16
 
-fvsToEnv :: BCEnv -> DVarSet -> [Id]
+fvsToEnv :: BCEnv -> CgStgRhs -> [Id]
 -- Takes the free variables of a right-hand side, and
 -- delivers an ordered list of the local variables that will
 -- be captured in the thunk for the RHS
@@ -478,93 +562,128 @@ fvsToEnv :: BCEnv -> DVarSet -> [Id]
 --
 -- The code that constructs the thunk, and the code that executes
 -- it, have to agree about this layout
-fvsToEnv p fvs = [v | v <- dVarSetElems fvs,
-                      isId v,           -- Could be a type variable
-                      v `Map.member` p]
+
+fvsToEnv p (StgRhsClosure fvs _ _ _ _) =
+            [v | v <- dVarSetElems fvs,
+                 v `Map.member` p]
+fvsToEnv _ _ = []
 
 -- -----------------------------------------------------------------------------
 -- schemeE
 
+-- Returning an unlifted value.
+-- Heave it on the stack, SLIDE, and RETURN.
 returnUnboxedAtom
     :: StackDepth
     -> Sequel
     -> BCEnv
-    -> AnnExpr' Id DVarSet
-    -> ArgRep
+    -> StgArg
     -> BcM BCInstrList
--- Returning an unlifted value.
--- Heave it on the stack, SLIDE, and RETURN.
-returnUnboxedAtom d s p e e_rep = do
-    dflags <- getDynFlags
-    let platform = targetPlatform dflags
+returnUnboxedAtom d s p e = do
+    let reps = case e of
+                 StgLitArg lit -> typePrimRepArgs (literalType lit)
+                 StgVarArg i   -> bcIdPrimReps i
     (push, szb) <- pushAtom d p e
-    return (push                                  -- value onto stack
-           `appOL`  mkSlideB platform szb (d - s) -- clear to sequel
-           `snocOL` RETURN_UBX e_rep)             -- go
+    ret <- returnUnboxedReps d s szb reps
+    return (push `appOL` ret)
+
+-- return an unboxed value from the top of the stack
+returnUnboxedReps
+    :: StackDepth
+    -> Sequel
+    -> ByteOff    -- size of the thing we're returning
+    -> [PrimRep]  -- representations
+    -> BcM BCInstrList
+returnUnboxedReps d s szb reps = do
+    profile <- getProfile
+    let platform = profilePlatform profile
+        non_void VoidRep = False
+        non_void _ = True
+    ret <- case filter non_void reps of
+             -- use RETURN_UBX for unary representations
+             []    -> return (unitOL $ RETURN_UBX V)
+             [rep] -> return (unitOL $ RETURN_UBX (toArgRep platform rep))
+             -- otherwise use RETURN_TUPLE with a tuple descriptor
+             nv_reps -> do
+               let (tuple_info, args_offsets) = layoutTuple profile 0 (primRepCmmType platform) nv_reps
+                   args_ptrs = map (\(rep, off) -> (isFollowableArg (toArgRep platform rep), off)) args_offsets
+               tuple_bco <- emitBc (tupleBCO platform tuple_info args_ptrs)
+               return $ PUSH_UBX (mkTupleInfoLit platform tuple_info) 1 `consOL`
+                        PUSH_BCO tuple_bco `consOL`
+                        unitOL RETURN_TUPLE
+    return ( mkSlideB platform szb (d - s) -- clear to sequel
+             `appOL`  ret)                 -- go
+
+-- construct and return an unboxed tuple
+returnUnboxedTuple
+    :: StackDepth
+    -> Sequel
+    -> BCEnv
+    -> [StgArg]
+    -> BcM BCInstrList
+returnUnboxedTuple d s p es = do
+    profile <- getProfile
+    let platform = profilePlatform profile
+        arg_ty e = primRepCmmType platform (atomPrimRep e)
+        (tuple_info, tuple_components) = layoutTuple profile d arg_ty es
+        go _   pushes [] = return (reverse pushes)
+        go !dd pushes ((a, off):cs) = do (push, szb) <- pushAtom dd p a
+                                         MASSERT(off == dd + szb)
+                                         go (dd + szb) (push:pushes) cs
+    pushes <- go d [] tuple_components
+    ret <- returnUnboxedReps d
+                             s
+                             (wordsToBytes platform $ tupleSize tuple_info)
+                             (map atomPrimRep es)
+    return (mconcat pushes `appOL` ret)
 
 -- Compile code to apply the given expression to the remaining args
 -- on the stack, returning a HNF.
 schemeE
-    :: StackDepth -> Sequel -> BCEnv -> AnnExpr' Id DVarSet -> BcM BCInstrList
-schemeE d s p e
-   | Just e' <- bcView e
-   = schemeE d s p e'
-
+    :: StackDepth -> Sequel -> BCEnv -> CgStgExpr -> BcM BCInstrList
+schemeE d s p (StgLit lit) = returnUnboxedAtom d s p (StgLitArg lit)
+schemeE d s p (StgApp x [])
+   | isUnliftedType (idType x) = returnUnboxedAtom d s p (StgVarArg x)
 -- Delegate tail-calls to schemeT.
-schemeE d s p e@(AnnApp _ _) = schemeT d s p e
-
-schemeE d s p e@(AnnLit lit)     = do
-    platform <- profilePlatform <$> getProfile
-    returnUnboxedAtom d s p e (typeArgRep platform (literalType lit))
-schemeE d s p e@(AnnCoercion {}) = returnUnboxedAtom d s p e V
-
-schemeE d s p e@(AnnVar v)
-      -- See Note [Not-necessarily-lifted join points], step 3.
-    | isNNLJoinPoint v          = doTailCall d s p (protectNNLJoinPointId v) [AnnVar voidPrimId]
-    | isUnliftedType (idType v) = do
-        platform <- profilePlatform <$> getProfile
-        returnUnboxedAtom d s p e (bcIdArgRep platform v)
-    | otherwise                 = schemeT d s p e
-
-schemeE d s p (AnnLet (AnnNonRec x (_,rhs)) (_,body))
-   | (AnnVar v, args_r_to_l) <- splitApp rhs,
-     Just data_con <- isDataConWorkId_maybe v,
-     dataConRepArity data_con == length args_r_to_l
+schemeE d s p e@(StgApp {}) = schemeT d s p e
+schemeE d s p e@(StgConApp {}) = schemeT d s p e
+schemeE d s p e@(StgOpApp {}) = schemeT d s p e
+schemeE d s p (StgLetNoEscape xlet bnd body)
+   = schemeE d s p (StgLet xlet bnd body)
+schemeE d s p (StgLet _xlet
+                      (StgNonRec x (StgRhsCon _cc data_con _cnum _ticks args))
+                      body)
    = do -- Special case for a non-recursive let whose RHS is a
         -- saturated constructor application.
         -- Just allocate the constructor and carry on
-        alloc_code <- mkConAppCode d s p data_con args_r_to_l
+        alloc_code <- mkConAppCode d s p data_con args
         platform <- targetPlatform <$> getDynFlags
         let !d2 = d + wordSize platform
         body_code <- schemeE d2 s (Map.insert x d2 p) body
         return (alloc_code `appOL` body_code)
-
 -- General case for let.  Generates correct, if inefficient, code in
 -- all situations.
-schemeE d s p (AnnLet binds (_,body)) = do
+schemeE d s p (StgLet _ext binds body) = do
      platform <- targetPlatform <$> getDynFlags
-     let (xs,rhss) = case binds of AnnNonRec x rhs  -> ([x],[rhs])
-                                   AnnRec xs_n_rhss -> unzip xs_n_rhss
+     let (xs,rhss) = case binds of StgNonRec x rhs  -> ([x],[rhs])
+                                   StgRec xs_n_rhss -> unzip xs_n_rhss
          n_binds = genericLength xs
 
-         fvss  = map (fvsToEnv p' . fst) rhss
-
-           -- See Note [Not-necessarily-lifted join points], step 2.
-         (xs',rhss') = zipWithAndUnzip protectNNLJoinPointBind xs rhss
+         fvss  = map (fvsToEnv p') rhss
 
          -- Sizes of free vars
          size_w = trunc16W . idSizeW platform
          sizes = map (\rhs_fvs -> sum (map size_w rhs_fvs)) fvss
 
          -- the arity of each rhs
-         arities = map (genericLength . fst . collect) rhss'
+         arities = map (genericLength . fst . collect) rhss
 
          -- This p', d' defn is safe because all the items being pushed
          -- are ptrs, so all have size 1 word.  d' and p' reflect the stack
          -- after the closures have been allocated in the heap (but not
          -- filled in), and pointers to them parked on the stack.
          offsets = mkStackOffsets d (genericReplicate n_binds (wordSize platform))
-         p' = Map.insertList (zipE xs' offsets) p
+         p' = Map.insertList (zipE xs offsets) p
          d' = d + wordsToBytes platform n_binds
          zipE = zipEqual "schemeE"
 
@@ -583,7 +702,7 @@ schemeE d s p (AnnLet binds (_,body)) = do
                 mkap | arity == 0 = MKAP
                      | otherwise  = MKPAP
          build_thunk dd (fv:fvs) size bco off arity = do
-              (push_code, pushed_szb) <- pushAtom dd p' (AnnVar fv)
+              (push_code, pushed_szb) <- pushAtom dd p' (StgVarArg fv)
               more_push_code <-
                   build_thunk (dd + pushed_szb) fvs size bco off arity
               return (push_code `appOL` more_push_code)
@@ -595,112 +714,35 @@ schemeE d s p (AnnLet binds (_,body)) = do
                  mkAlloc sz arity = ALLOC_PAP arity sz
 
          is_tick = case binds of
-                     AnnNonRec id _ -> occNameFS (getOccName id) == tickFS
+                     StgNonRec id _ -> occNameFS (getOccName id) == tickFS
                      _other -> False
 
-         compile_bind d' fvs x rhs size arity off = do
+         compile_bind d' fvs x (rhs::CgStgRhs) size arity off = do
                 bco <- schemeR fvs (getName x,rhs)
                 build_thunk d' fvs size bco off arity
 
          compile_binds =
             [ compile_bind d' fvs x rhs size arity (trunc16W n)
             | (fvs, x, rhs, size, arity, n) <-
-                zip6 fvss xs' rhss' sizes arities [n_binds, n_binds-1 .. 1]
+                zip6 fvss xs rhss sizes arities [n_binds, n_binds-1 .. 1]
             ]
      body_code <- schemeE d' s p' body
      thunk_codes <- sequence compile_binds
      return (alloc_code `appOL` concatOL thunk_codes `appOL` body_code)
 
--- Introduce a let binding for a ticked case expression. This rule
--- *should* only fire when the expression was not already let-bound
--- (the code gen for let bindings should take care of that).  Todo: we
--- call exprFreeVars on a deAnnotated expression, this may not be the
--- best way to calculate the free vars but it seemed like the least
--- intrusive thing to do
-schemeE d s p exp@(AnnTick (Breakpoint _ext _id _fvs) _rhs)
-   | isLiftedTypeKind (typeKind ty)
-   = do   id <- newId ty
-          -- Todo: is emptyVarSet correct on the next line?
-          let letExp = AnnLet (AnnNonRec id (fvs, exp)) (emptyDVarSet, AnnVar id)
-          schemeE d s p letExp
-
-   | otherwise
-   = do   -- If the result type is not definitely lifted, then we must generate
-          --   let f = \s . tick<n> e
-          --   in  f realWorld#
-          -- When we stop at the breakpoint, _result will have an unlifted
-          -- type and hence won't be bound in the environment, but the
-          -- breakpoint will otherwise work fine.
-          --
-          -- NB (#12007) this /also/ applies for if (ty :: TYPE r), where
-          --    r :: RuntimeRep is a variable. This can happen in the
-          --    continuations for a pattern-synonym matcher
-          --    match = /\(r::RuntimeRep) /\(a::TYPE r).
-          --            \(k :: Int -> a) \(v::T).
-          --            case v of MkV n -> k n
-          -- Here (k n) :: a :: TYPE r, so we don't know if it's lifted
-          -- or not; but that should be fine provided we add that void arg.
-
-          id <- newId (mkVisFunTyMany realWorldStatePrimTy ty)
-          st <- newId realWorldStatePrimTy
-          let letExp = AnnLet (AnnNonRec id (fvs, AnnLam st (emptyDVarSet, exp)))
-                              (emptyDVarSet, (AnnApp (emptyDVarSet, AnnVar id)
-                                                    (emptyDVarSet, AnnVar realWorldPrimId)))
-          schemeE d s p letExp
-
-   where
-     exp' = deAnnotate' exp
-     fvs  = exprFreeVarsDSet exp'
-     ty   = exprType exp'
+schemeE _d _s _p (StgTick (Breakpoint _ bp_id _) _rhs)
+   = panic ("schemeE: Breakpoint without let binding: " ++
+            show bp_id ++
+            " forgot to run bcPrep?")
 
 -- ignore other kinds of tick
-schemeE d s p (AnnTick _ (_, rhs)) = schemeE d s p rhs
+schemeE d s p (StgTick _ rhs) = schemeE d s p rhs
 
 -- no alts: scrut is guaranteed to diverge
-schemeE d s p (AnnCase (_,scrut) _ _ []) = schemeE d s p scrut
-
--- handle pairs with one void argument (e.g. state token)
-schemeE d s p (AnnCase scrut bndr _ [AnnAlt (DataAlt dc) [bind1, bind2] rhs])
-   | isUnboxedTupleDataCon dc
-        -- Convert
-        --      case .... of x { (# V'd-thing, a #) -> ... }
-        -- to
-        --      case .... of a { DEFAULT -> ... }
-        -- because the return convention for both are identical.
-        --
-        -- Note that it does not matter losing the void-rep thing from the
-        -- envt (it won't be bound now) because we never look such things up.
-   , Just res <- case (typePrimRep (idType bind1), typePrimRep (idType bind2)) of
-                   ([], [_])
-                     -> Just $ doCase d s p scrut bind2 [AnnAlt DEFAULT [] rhs] (Just bndr)
-                   ([_], [])
-                     -> Just $ doCase d s p scrut bind1 [AnnAlt DEFAULT [] rhs] (Just bndr)
-                   _ -> Nothing
-   = res
-
--- handle unit tuples
-schemeE d s p (AnnCase scrut bndr _ [AnnAlt (DataAlt dc) [bind1] rhs])
-   | isUnboxedTupleDataCon dc
-   , typePrimRep (idType bndr) `lengthAtMost` 1
-   = doCase d s p scrut bind1 [AnnAlt DEFAULT [] rhs] (Just bndr)
-
--- handle nullary tuples
-schemeE d s p (AnnCase scrut bndr _ alt@[AnnAlt DEFAULT [] _])
-   | isUnboxedTupleType (idType bndr)
-   , Just ty <- case typePrimRep (idType bndr) of
-       [_]  -> Just (unwrapType (idType bndr))
-       []   -> Just unboxedUnitTy
-       _    -> Nothing
-       -- handles any pattern with a single non-void binder; in particular I/O
-       -- monad returns (# RealWorld#, a #)
-   = doCase d s p scrut (bndr `setIdType` ty) alt (Just bndr)
-
-schemeE d s p (AnnCase scrut bndr _ alts)
-   = doCase d s p scrut bndr alts Nothing{-not an unboxed tuple-}
-
-schemeE _ _ _ expr
-   = pprPanic "GHC.CoreToByteCode.schemeE: unhandled case"
-               (pprCoreExpr (deAnnotate' expr))
+schemeE d s p (StgCase scrut _ _ []) = schemeE d s p scrut
+
+schemeE d s p (StgCase scrut bndr _ alts)
+   = doCase d s p scrut bndr alts
 
 -- Is this Id a not-necessarily-lifted join point?
 -- See Note [Not-necessarily-lifted join points], step 1
@@ -708,16 +750,6 @@ isNNLJoinPoint :: Id -> Bool
 isNNLJoinPoint x = isJoinId x &&
                    Just True /= isLiftedType_maybe (idType x)
 
--- If necessary, modify this Id and body to protect not-necessarily-lifted join points.
--- See Note [Not-necessarily-lifted join points], step 2.
-protectNNLJoinPointBind :: Id -> AnnExpr Id DVarSet -> (Id, AnnExpr Id DVarSet)
-protectNNLJoinPointBind x rhs@(fvs, _)
-  | isNNLJoinPoint x
-  = (protectNNLJoinPointId x, (fvs, AnnLam voidArgId rhs))
-
-  | otherwise
-  = (x, rhs)
-
 -- Update an Id's type to take a Void# argument.
 -- Precondition: the Id is a not-necessarily-lifted join point.
 -- See Note [Not-necessarily-lifted join points]
@@ -763,7 +795,7 @@ isUnliftedType check in the AnnVar case of schemeE.) Here is the strategy:
    type to tack on a `(# #) ->`.
    Note that functions are never levity-polymorphic, so this transformation
    changes an NNLJP to a non-levity-polymorphic join point. This is done
-   in protectNNLJoinPointBind, called from the AnnLet case of schemeE.
+   in bcPrepSingleBind.
 
 3. At an occurrence of an NNLJP, add an application to void# (called voidPrimId),
    being careful to note the new type of the NNLJP. This is done in the AnnVar
@@ -805,10 +837,8 @@ Right Fix is to take advantage of join points as goto-labels.
 --
 -- 1.  The fn denotes a ccall.  Defer to generateCCall.
 --
--- 2.  (Another nasty hack).  Spot (# a::V, b #) and treat
---     it simply as  b  -- since the representations are identical
---     (the V takes up zero stack space).  Also, spot
---     (# b #) and treat it as  b.
+-- 2.  An unboxed tuple: push the components on the top of
+--     the stack and return.
 --
 -- 3.  Application of a constructor, by defn saturated.
 --     Split the args into ptrs and non-ptrs, and push the nonptrs,
@@ -820,59 +850,45 @@ Right Fix is to take advantage of join points as goto-labels.
 schemeT :: StackDepth   -- Stack depth
         -> Sequel       -- Sequel depth
         -> BCEnv        -- stack env
-        -> AnnExpr' Id DVarSet
+        -> CgStgExpr
         -> BcM BCInstrList
 
-schemeT d s p app
-
    -- Case 0
+schemeT d s p app
    | Just (arg, constr_names) <- maybe_is_tagToEnum_call app
    = implement_tagToId d s p arg constr_names
 
    -- Case 1
-   | Just (CCall ccall_spec) <- isFCallId_maybe fn
+schemeT d s p (StgOpApp (StgFCallOp (CCall ccall_spec) _ty) args result_ty)
    = if isSupportedCConv ccall_spec
-      then generateCCall d s p ccall_spec fn args_r_to_l
+      then generateCCall d s p ccall_spec result_ty (reverse args)
       else unsupportedCConvException
 
+schemeT d s p (StgOpApp (StgPrimOp op) args _ty)
+   = doTailCall d s p (primOpId op) (reverse args)
 
-   -- Case 2: Constructor application
-   | Just con <- maybe_saturated_dcon
-   , isUnboxedTupleDataCon con
-   = do
-     platform <- profilePlatform <$> getProfile
-     case args_r_to_l of
-        [arg1,arg2] | isVAtom platform arg1 ->
-                  unboxedTupleReturn d s p arg2
-        [arg1,arg2] | isVAtom platform arg2 ->
-                  unboxedTupleReturn d s p arg1
-        _other -> multiValException
+schemeT _d _s _p (StgOpApp StgPrimCallOp{} _args _ty)
+   = unsupportedCConvException
+
+   -- Case 2: Unboxed tuple
+schemeT d s p (StgConApp con _ext args _tys)
+   | isUnboxedTupleDataCon con || isUnboxedSumDataCon con
+   = returnUnboxedTuple d s p args
 
    -- Case 3: Ordinary data constructor
-   | Just con <- maybe_saturated_dcon
-   = do alloc_con <- mkConAppCode d s p con args_r_to_l
+   | otherwise
+   = do alloc_con <- mkConAppCode d s p con args
         platform <- profilePlatform <$> getProfile
         return (alloc_con         `appOL`
                 mkSlideW 1 (bytesToWords platform $ d - s) `snocOL`
                 ENTER)
 
    -- Case 4: Tail call of function
-   | otherwise
-   = doTailCall d s p fn args_r_to_l
-
-   where
-        -- Extract the args (R->L) and fn
-        -- The function will necessarily be a variable,
-        -- because we are compiling a tail call
-      (AnnVar fn, args_r_to_l) = splitApp app
-
-      -- Only consider this to be a constructor application iff it is
-      -- saturated.  Otherwise, we'll call the constructor wrapper.
-      n_args = length args_r_to_l
-      maybe_saturated_dcon
-        = case isDataConWorkId_maybe fn of
-                Just con | dataConRepArity con == n_args -> Just con
-                _ -> Nothing
+schemeT d s p (StgApp fn args)
+   = doTailCall d s p fn (reverse args)
+
+schemeT _ _ _ e = pprPanic "GHC.StgToByteCode.schemeT"
+                           (pprStgExpr shortStgPprOpts e)
 
 -- -----------------------------------------------------------------------------
 -- Generate code to build a constructor application,
@@ -883,26 +899,17 @@ mkConAppCode
     -> Sequel
     -> BCEnv
     -> DataCon                  -- The data constructor
-    -> [AnnExpr' Id DVarSet]    -- Args, in *reverse* order
+    -> [StgArg]                 -- Args, in *reverse* order
     -> BcM BCInstrList
-mkConAppCode _ _ _ con []       -- Nullary constructor
-  = ASSERT( isNullaryRepDataCon con )
-    return (unitOL (PUSH_G (getName (dataConWorkId con))))
-        -- Instead of doing a PACK, which would allocate a fresh
-        -- copy of this constructor, use the single shared version.
-
-mkConAppCode orig_d _ p con args_r_to_l =
-    ASSERT( args_r_to_l `lengthIs` dataConRepArity con ) app_code
+mkConAppCode orig_d _ p con args = app_code
   where
     app_code = do
         profile <- getProfile
         let platform = profilePlatform profile
 
-        -- The args are initially in reverse order, but mkVirtHeapOffsets
-        -- expects them to be left-to-right.
-        let non_voids =
+            non_voids =
                 [ NonVoid (prim_rep, arg)
-                | arg <- reverse args_r_to_l
+                | arg <- args
                 , let prim_rep = atomPrimRep arg
                 , not (isVoidRep prim_rep)
                 ]
@@ -922,20 +929,6 @@ mkConAppCode orig_d _ p con args_r_to_l =
         -- Push on the stack in the reverse order.
         do_pushery orig_d (reverse args_offsets)
 
-
--- -----------------------------------------------------------------------------
--- Returning an unboxed tuple with one non-void component (the only
--- case we can handle).
---
--- Remember, we don't want to *evaluate* the component that is being
--- returned, even if it is a pointed type.  We always just return.
-
-unboxedTupleReturn
-    :: StackDepth -> Sequel -> BCEnv -> AnnExpr' Id DVarSet -> BcM BCInstrList
-unboxedTupleReturn d s p arg = do
-   platform <- profilePlatform <$> getProfile
-   returnUnboxedAtom d s p arg (atomRep platform arg)
-
 -- -----------------------------------------------------------------------------
 -- Generate code for a tail-call
 
@@ -944,7 +937,7 @@ doTailCall
     -> Sequel
     -> BCEnv
     -> Id
-    -> [AnnExpr' Id DVarSet]
+    -> [StgArg]
     -> BcM BCInstrList
 doTailCall init_d s p fn args = do
    platform <- profilePlatform <$> getProfile
@@ -952,7 +945,7 @@ doTailCall init_d s p fn args = do
   where
   do_pushes !d [] reps = do
         ASSERT( null reps ) return ()
-        (push_fn, sz) <- pushAtom d p (AnnVar fn)
+        (push_fn, sz) <- pushAtom d p (StgVarArg fn)
         platform <- profilePlatform <$> getProfile
         ASSERT( sz == wordSize platform ) return ()
         let slide = mkSlideB platform (d - init_d + wordSize platform) (init_d - s)
@@ -997,7 +990,7 @@ findPushSeq (D: rest)
 findPushSeq (L: rest)
   = (PUSH_APPLY_L, 1, rest)
 findPushSeq _
-  = panic "GHC.CoreToByteCode.findPushSeq"
+  = panic "GHC.StgToByteCode.findPushSeq"
 
 -- -----------------------------------------------------------------------------
 -- Case expressions
@@ -1006,23 +999,31 @@ doCase
     :: StackDepth
     -> Sequel
     -> BCEnv
-    -> AnnExpr Id DVarSet
+    -> CgStgExpr
     -> Id
-    -> [AnnAlt Id DVarSet]
-    -> Maybe Id  -- Just x <=> is an unboxed tuple case with scrut binder,
-                 -- don't enter the result
+    -> [CgStgAlt]
     -> BcM BCInstrList
-doCase d s p (_,scrut) bndr alts is_unboxed_tuple
-  | typePrimRep (idType bndr) `lengthExceeds` 1
-  = multiValException
-
-  | otherwise
+doCase d s p scrut bndr alts
   = do
      profile <- getProfile
      hsc_env <- getHscEnv
      let
         platform = profilePlatform profile
 
+        -- Are we dealing with an unboxed tuple with a tuple return frame?
+        --
+        -- 'Simple' tuples with at most one non-void component,
+        -- like (# Word# #) or (# Int#, State# RealWorld# #) do not have a
+        -- tuple return frame. This is because (# foo #) and (# foo, Void# #)
+        -- have the same runtime rep. We have more efficient specialized
+        -- return frames for the situations with one non-void element.
+
+        ubx_tuple_frame =
+          (isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty) &&
+          length non_void_arg_reps > 1
+
+        non_void_arg_reps = non_void (typeArgReps platform bndr_ty)
+
         profiling
           | Just interp <- hsc_interp hsc_env
           = interpreterProfiled interp
@@ -1033,53 +1034,84 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
         -- When an alt is entered, it assumes the returned value is
         -- on top of the itbl.
         ret_frame_size_b :: StackDepth
-        ret_frame_size_b = 2 * wordSize platform
+        ret_frame_size_b | ubx_tuple_frame =
+                             (if profiling then 5 else 4) * wordSize platform
+                         | otherwise = 2 * wordSize platform
 
-        -- The extra frame we push to save/restore the CCCS when profiling
-        save_ccs_size_b | profiling = 2 * wordSize platform
+        -- The stack space used to save/restore the CCCS when profiling
+        save_ccs_size_b | profiling &&
+                          not ubx_tuple_frame = 2 * wordSize platform
                         | otherwise = 0
 
         -- An unlifted value gets an extra info table pushed on top
         -- when it is returned.
         unlifted_itbl_size_b :: StackDepth
-        unlifted_itbl_size_b | isAlgCase = 0
-                             | otherwise = wordSize platform
+        unlifted_itbl_size_b | isAlgCase       = 0
+                             | ubx_tuple_frame = 3 * wordSize platform
+                             | otherwise       = wordSize platform
+
+        (bndr_size, tuple_info, args_offsets)
+           | ubx_tuple_frame =
+               let bndr_ty = primRepCmmType platform
+                   bndr_reps = filter (not.isVoidRep) (bcIdPrimReps bndr)
+                   (tuple_info, args_offsets) =
+                       layoutTuple profile 0 bndr_ty bndr_reps
+               in ( wordsToBytes platform (tupleSize tuple_info)
+                  , tuple_info
+                  , args_offsets
+                  )
+           | otherwise = ( wordsToBytes platform (idSizeW platform bndr)
+                         , voidTupleInfo
+                         , []
+                         )
 
         -- depth of stack after the return value has been pushed
         d_bndr =
-            d + ret_frame_size_b + wordsToBytes platform (idSizeW platform bndr)
+            d + ret_frame_size_b + bndr_size
 
         -- depth of stack after the extra info table for an unboxed return
         -- has been pushed, if any.  This is the stack depth at the
         -- continuation.
-        d_alts = d_bndr + unlifted_itbl_size_b
+        d_alts = d + ret_frame_size_b + bndr_size + unlifted_itbl_size_b
 
         -- Env in which to compile the alts, not including
         -- any vars bound by the alts themselves
-        p_alts0 = Map.insert bndr d_bndr p
-
-        p_alts = case is_unboxed_tuple of
-                   Just ubx_bndr -> Map.insert ubx_bndr d_bndr p_alts0
-                   Nothing       -> p_alts0
+        p_alts = Map.insert bndr d_bndr p
 
         bndr_ty = idType bndr
-        isAlgCase = not (isUnliftedType bndr_ty) && isNothing is_unboxed_tuple
+        isAlgCase = not (isUnliftedType bndr_ty)
 
         -- given an alt, return a discr and code for it.
-        codeAlt (AnnAlt DEFAULT _ (_,rhs))
+        codeAlt (DEFAULT, _, rhs)
            = do rhs_code <- schemeE d_alts s p_alts rhs
                 return (NoDiscr, rhs_code)
 
-        codeAlt alt@(AnnAlt _ bndrs (_,rhs))
+        codeAlt alt@(_, bndrs, rhs)
            -- primitive or nullary constructor alt: no need to UNPACK
            | null real_bndrs = do
                 rhs_code <- schemeE d_alts s p_alts rhs
                 return (my_discr alt, rhs_code)
-           -- If an alt attempts to match on an unboxed tuple or sum, we must
-           -- bail out, as the bytecode compiler can't handle them.
-           -- (See #14608.)
-           | any (\bndr -> typePrimRep (idType bndr) `lengthExceeds` 1) bndrs
-           = multiValException
+           | isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty =
+             let bndr_ty = primRepCmmType platform . bcIdPrimRep
+                 tuple_start = d_bndr
+                 (tuple_info, args_offsets) =
+                   layoutTuple profile
+                               0
+                               bndr_ty
+                               bndrs
+
+                 stack_bot = d_alts
+
+                 p' = Map.insertList
+                        [ (arg, tuple_start -
+                                wordsToBytes platform (tupleSize tuple_info) +
+                                offset)
+                        | (arg, offset) <- args_offsets
+                        , not (isVoidRep $ bcIdPrimRep arg)]
+                        p_alts
+             in do
+               rhs_code <- schemeE stack_bot s p' rhs
+               return (NoDiscr, rhs_code)
            -- algebraic alt with some binders
            | otherwise =
              let (tot_wds, _ptrs_wds, args_offsets) =
@@ -1104,24 +1136,24 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
            where
              real_bndrs = filterOut isTyVar bndrs
 
-        my_discr (AnnAlt DEFAULT _ _) = NoDiscr {-shouldn't really happen-}
-        my_discr (AnnAlt (DataAlt dc) _ _)
+        my_discr (DEFAULT, _, _) = NoDiscr {-shouldn't really happen-}
+        my_discr (DataAlt dc, _, _)
            | isUnboxedTupleDataCon dc || isUnboxedSumDataCon dc
-           = multiValException
+           = NoDiscr
            | otherwise
            = DiscrP (fromIntegral (dataConTag dc - fIRST_TAG))
-        my_discr (AnnAlt (LitAlt l) _ _)
+        my_discr (LitAlt l, _, _)
            = case l of LitNumber LitNumInt i  -> DiscrI (fromInteger i)
                        LitNumber LitNumWord w -> DiscrW (fromInteger w)
                        LitFloat r   -> DiscrF (fromRational r)
                        LitDouble r  -> DiscrD (fromRational r)
                        LitChar i    -> DiscrI (ord i)
-                       _ -> pprPanic "schemeE(AnnCase).my_discr" (ppr l)
+                       _ -> pprPanic "schemeE(StgCase).my_discr" (ppr l)
 
         maybe_ncons
            | not isAlgCase = Nothing
            | otherwise
-           = case [dc | AnnAlt (DataAlt dc) _ _ <- alts] of
+           = case [dc | (DataAlt dc, _, _) <- alts] of
                 []     -> Nothing
                 (dc:_) -> Just (tyConFamilySize (dataConTyCon dc))
 
@@ -1139,20 +1171,36 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
         -- really want a bitmap up to depth (d-s).  This affects compilation of
         -- case-of-case expressions, which is the only time we can be compiling a
         -- case expression with s /= 0.
-        bitmap_size = trunc16W $ bytesToWords platform (d - s)
+
+        -- unboxed tuples get two more words, the second is a pointer (tuple_bco)
+        (extra_pointers, extra_slots)
+           | ubx_tuple_frame && profiling = ([1], 3) -- tuple_info, tuple_BCO, CCCS
+           | ubx_tuple_frame              = ([1], 2) -- tuple_info, tuple_BCO
+           | otherwise                    = ([], 0)
+
+        bitmap_size = trunc16W $ fromIntegral extra_slots +
+                                 bytesToWords platform (d - s)
+
         bitmap_size' :: Int
         bitmap_size' = fromIntegral bitmap_size
-        bitmap = intsToReverseBitmap platform bitmap_size'{-size-}
-                        (sort (filter (< bitmap_size') rel_slots))
+
+
+        pointers =
+          extra_pointers ++
+          sort (filter (< bitmap_size') (map (+extra_slots) rel_slots))
           where
           binds = Map.toList p
           -- NB: unboxed tuple cases bind the scrut binder to the same offset
           -- as one of the alt binders, so we have to remove any duplicates here:
           rel_slots = nub $ map fromIntegral $ concatMap spread binds
-          spread (id, offset) | isFollowableArg (bcIdArgRep platform id) = [ rel_offset ]
-                              | otherwise                                = []
+          spread (id, offset) | isUnboxedTupleType (idType id) ||
+                                isUnboxedSumType (idType id) = []
+                              | isFollowableArg (bcIdArgRep platform id) = [ rel_offset ]
+                              | otherwise                      = []
                 where rel_offset = trunc16W $ bytesToWords platform (d - offset)
 
+        bitmap = intsToReverseBitmap platform bitmap_size'{-size-} pointers
+
      alt_stuff <- mapM codeAlt alts
      alt_final <- mkMultiBranch maybe_ncons alt_stuff
 
@@ -1160,20 +1208,218 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
          alt_bco_name = getName bndr
          alt_bco = mkProtoBCO platform alt_bco_name alt_final (Left alts)
                        0{-no arity-} bitmap_size bitmap True{-is alts-}
---     trace ("case: bndr = " ++ showSDocDebug (ppr bndr) ++ "\ndepth = " ++ show d ++ "\nenv = \n" ++ showSDocDebug (ppBCEnv p) ++
---            "\n      bitmap = " ++ show bitmap) $ do
-
      scrut_code <- schemeE (d + ret_frame_size_b + save_ccs_size_b)
                            (d + ret_frame_size_b + save_ccs_size_b)
                            p scrut
      alt_bco' <- emitBc alt_bco
-     let push_alts
-            | isAlgCase = PUSH_ALTS alt_bco'
-            | otherwise = PUSH_ALTS_UNLIFTED alt_bco' (typeArgRep platform bndr_ty)
-     return (push_alts `consOL` scrut_code)
+     if ubx_tuple_frame
+       then do
+              let args_ptrs =
+                    map (\(rep, off) -> (isFollowableArg (toArgRep platform rep), off))
+                        args_offsets
+              tuple_bco <- emitBc (tupleBCO platform tuple_info args_ptrs)
+              return (PUSH_ALTS_TUPLE alt_bco' tuple_info tuple_bco
+                      `consOL` scrut_code)
+       else let push_alts
+                  | isAlgCase
+                  = PUSH_ALTS alt_bco'
+                  | otherwise
+                  = let unlifted_rep =
+                          case non_void_arg_reps of
+                            []    -> V
+                            [rep] -> rep
+                            _     -> panic "schemeE(StgCase).push_alts"
+                    in PUSH_ALTS_UNLIFTED alt_bco' unlifted_rep
+            in return (push_alts `consOL` scrut_code)
 
 
 -- -----------------------------------------------------------------------------
+-- Deal with tuples
+
+-- The native calling convention uses registers for tuples, but in the
+-- bytecode interpreter, all values live on the stack.
+
+layoutTuple :: Profile
+            -> ByteOff
+            -> (a -> CmmType)
+            -> [a]
+            -> ( TupleInfo      -- See Note [GHCi TupleInfo]
+               , [(a, ByteOff)] -- argument, offset on stack
+               )
+layoutTuple profile start_off arg_ty reps =
+  let platform = profilePlatform profile
+      (orig_stk_bytes, pos) = assignArgumentsPos profile
+                                                 0
+                                                 NativeReturn
+                                                 arg_ty
+                                                 reps
+
+      -- keep the stack parameters in the same place
+      orig_stk_params = [(x, fromIntegral off) | (x, StackParam off) <- pos]
+
+      -- sort the register parameters by register and add them to the stack
+      (regs, reg_params)
+          = unzip $ sortBy (comparing fst)
+                           [(reg, x) | (x, RegisterParam reg) <- pos]
+
+      (new_stk_bytes, new_stk_params) = assignStack platform
+                                                    orig_stk_bytes
+                                                    arg_ty
+                                                    reg_params
+
+      -- make live register bitmaps
+      bmp_reg r ~(v, f, d, l)
+        = case r of VanillaReg n _ -> (a v n, f,     d,     l    )
+                    FloatReg n     -> (v,     a f n, d,     l    )
+                    DoubleReg n    -> (v,     f,     a d n, l    )
+                    LongReg n      -> (v,     f,     d,     a l n)
+                    _              ->
+                      pprPanic "GHC.StgToByteCode.layoutTuple unsupported register type"
+                               (ppr r)
+              where a bmp n = bmp .|. (1 `shiftL` (n-1))
+
+      (vanilla_regs, float_regs, double_regs, long_regs)
+          = foldr bmp_reg (0, 0, 0, 0) regs
+
+      get_byte_off (x, StackParam y) = (x, fromIntegral y)
+      get_byte_off _                 =
+          panic "GHC.StgToByteCode.layoutTuple get_byte_off"
+
+  in ( TupleInfo
+         { tupleSize        = bytesToWords platform (ByteOff new_stk_bytes)
+         , tupleVanillaRegs = vanilla_regs
+         , tupleLongRegs    = long_regs
+         , tupleFloatRegs   = float_regs
+         , tupleDoubleRegs  = double_regs
+         , tupleNativeStackSize = bytesToWords platform
+                                               (ByteOff orig_stk_bytes)
+         }
+     , sortBy (comparing snd) $
+              map (\(x, o) -> (x, o + start_off))
+                  (orig_stk_params ++ map get_byte_off new_stk_params)
+     )
+
+{- Note [unboxed tuple bytecodes and tuple_BCO]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+  We have the bytecode instructions RETURN_TUPLE and PUSH_ALTS_TUPLE to
+  return and receive arbitrary unboxed tuples, respectively. These
+  instructions use the helper data tuple_BCO and tuple_info.
+
+  The helper data is used to convert tuples between GHCs native calling
+  convention (object code), which uses stack and registers, and the bytecode
+  calling convention, which only uses the stack. See Note [GHCi TupleInfo]
+  for more details.
+
+
+  Returning a tuple
+  =================
+
+  Bytecode that returns a tuple first pushes all the tuple fields followed
+  by the appropriate tuple_info and tuple_BCO onto the stack. It then
+  executes the RETURN_TUPLE instruction, which causes the interpreter
+  to push stg_ret_t_info to the top of the stack. The stack (growing down)
+  then looks as follows:
+
+      ...
+      next_frame
+      tuple_field_1
+      tuple_field_2
+      ...
+      tuple_field_n
+      tuple_info
+      tuple_BCO
+      stg_ret_t_info <- Sp
+
+  If next_frame is bytecode, the interpreter will start executing it. If
+  it's object code, the interpreter jumps back to the scheduler, which in
+  turn jumps to stg_ret_t. stg_ret_t converts the tuple to the native
+  calling convention using the description in tuple_info, and then jumps
+  to next_frame.
+
+
+  Receiving a tuple
+  =================
+
+  Bytecode that receives a tuple uses the PUSH_ALTS_TUPLE instruction to
+  push a continuation, followed by jumping to the code that produces the
+  tuple. The PUSH_ALTS_TUPLE instuction contains three pieces of data:
+
+     * cont_BCO: the continuation that receives the tuple
+     * tuple_info: see below
+     * tuple_BCO: see below
+
+  The interpreter pushes these onto the stack when the PUSH_ALTS_TUPLE
+  instruction is executed, followed by stg_ctoi_tN_info, with N depending
+  on the number of stack words used by the tuple in the GHC native calling
+  convention. N is derived from tuple_info.
+
+  For example if we expect a tuple with three words on the stack, the stack
+  looks as follows after PUSH_ALTS_TUPLE:
+
+      ...
+      next_frame
+      cont_free_var_1
+      cont_free_var_2
+      ...
+      cont_free_var_n
+      tuple_info
+      tuple_BCO
+      cont_BCO
+      stg_ctoi_t3_info <- Sp
+
+  If the tuple is returned by object code, stg_ctoi_t3 will deal with
+  adjusting the stack pointer and converting the tuple to the bytecode
+  calling convention. See Note [GHCi unboxed tuples stack spills] for more
+  details.
+
+
+  The tuple_BCO
+  =============
+
+  The tuple_BCO is a helper bytecode object. Its main purpose is describing
+  the contents of the stack frame containing the tuple for the storage
+  manager. It contains only instructions to immediately return the tuple
+  that is already on the stack.
+
+
+  The tuple_info word
+  ===================
+
+  The tuple_info word describes the stack and STG register (e.g. R1..R6,
+  D1..D6) usage for the tuple. tuple_info contains enough information to
+  convert the tuple between the stack-only bytecode and stack+registers
+  GHC native calling conventions.
+
+  See Note [GHCi tuple layout] for more details of how the data is packed
+  in a single word.
+
+ -}
+
+tupleBCO :: Platform -> TupleInfo -> [(Bool, ByteOff)] -> [FFIInfo] -> ProtoBCO Name
+tupleBCO platform info pointers =
+  mkProtoBCO platform invented_name body_code (Left [])
+             0{-no arity-} bitmap_size bitmap False{-is alts-}
+
+  where
+    {-
+      The tuple BCO is never referred to by name, so we can get away
+      with using a fake name here. We will need to change this if we want
+      to save some memory by sharing the BCO between places that have
+      the same tuple shape
+    -}
+    invented_name  = mkSystemVarName (mkPseudoUniqueE 0) (fsLit "tuple")
+
+    -- the first word in the frame is the tuple_info word,
+    -- which is not a pointer
+    bitmap_size = trunc16W $ 1 + tupleSize info
+    bitmap      = intsToReverseBitmap platform (fromIntegral bitmap_size) $
+                  map ((+1) . fromIntegral . bytesToWords platform . snd)
+                      (filter fst pointers)
+    body_code = mkSlideW 0 1          -- pop frame header
+                `snocOL` RETURN_TUPLE -- and add it again
+
+-- -----------------------------------------------------------------------------
 -- Deal with a CCall.
 
 -- Taggedly push the args onto the stack R->L,
@@ -1187,10 +1433,10 @@ generateCCall
     -> Sequel
     -> BCEnv
     -> CCallSpec               -- where to call
-    -> Id                      -- of target, for type info
-    -> [AnnExpr' Id DVarSet]   -- args (atoms)
+    -> Type
+    -> [StgArg]              -- args (atoms)
     -> BcM BCInstrList
-generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
+generateCCall d0 s p (CCallSpec target cconv safety) result_ty args_r_to_l
  = do
      profile <- getProfile
 
@@ -1200,56 +1446,40 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
          addr_size_b :: ByteOff
          addr_size_b = wordSize platform
 
+         arrayish_rep_hdr_size :: TyCon -> Maybe Int
+         arrayish_rep_hdr_size t
+           | t == arrayPrimTyCon || t == mutableArrayPrimTyCon
+              = Just (arrPtrsHdrSize profile)
+           | t == smallArrayPrimTyCon || t == smallMutableArrayPrimTyCon
+              = Just (smallArrPtrsHdrSize profile)
+           | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon
+              = Just (arrWordsHdrSize profile)
+           | otherwise
+              = Nothing
+
          -- Get the args on the stack, with tags and suitably
          -- dereferenced for the CCall.  For each arg, return the
          -- depth to the first word of the bits for that arg, and the
          -- ArgRep of what was actually pushed.
 
          pargs
-             :: ByteOff -> [AnnExpr' Id DVarSet] -> BcM [(BCInstrList, PrimRep)]
+             :: ByteOff -> [StgArg] -> BcM [(BCInstrList, PrimRep)]
          pargs _ [] = return []
-         pargs d (a:az)
-            = let arg_ty = unwrapType (exprType (deAnnotate' a))
-
-              in case tyConAppTyCon_maybe arg_ty of
-                    -- Don't push the FO; instead push the Addr# it
-                    -- contains.
-                    Just t
-                     | t == arrayPrimTyCon || t == mutableArrayPrimTyCon
-                       -> do rest <- pargs (d + addr_size_b) az
-                             code <- parg_ArrayishRep (fromIntegral (arrPtrsHdrSize profile)) d p a
-                             return ((code,AddrRep):rest)
-
-                     | t == smallArrayPrimTyCon || t == smallMutableArrayPrimTyCon
-                       -> do rest <- pargs (d + addr_size_b) az
-                             code <- parg_ArrayishRep (fromIntegral (smallArrPtrsHdrSize profile)) d p a
-                             return ((code,AddrRep):rest)
-
-                     | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon
-                       -> do rest <- pargs (d + addr_size_b) az
-                             code <- parg_ArrayishRep (fromIntegral (arrWordsHdrSize profile)) d p a
-                             return ((code,AddrRep):rest)
-
-                    -- Default case: push taggedly, but otherwise intact.
-                    _
-                       -> do (code_a, sz_a) <- pushAtom d p a
-                             rest <- pargs (d + sz_a) az
-                             return ((code_a, atomPrimRep a) : rest)
-
-         -- Do magic for Ptr/Byte arrays.  Push a ptr to the array on
-         -- the stack but then advance it over the headers, so as to
-         -- point to the payload.
-         parg_ArrayishRep
-             :: Word16
-             -> StackDepth
-             -> BCEnv
-             -> AnnExpr' Id DVarSet
-             -> BcM BCInstrList
-         parg_ArrayishRep hdrSize d p a
-            = do (push_fo, _) <- pushAtom d p a
+         pargs d (aa@(StgVarArg a):az)
+            | Just t      <- tyConAppTyCon_maybe (idType a)
+            , Just hdr_sz <- arrayish_rep_hdr_size t
+            -- Do magic for Ptr/Byte arrays.  Push a ptr to the array on
+            -- the stack but then advance it over the headers, so as to
+            -- point to the payload.
+            = do rest <- pargs (d + addr_size_b) az
+                 (push_fo, _) <- pushAtom d p aa
                  -- The ptr points at the header.  Advance it over the
                  -- header and then pretend this is an Addr#.
-                 return (push_fo `snocOL` SWIZZLE 0 hdrSize)
+                 let code = push_fo `snocOL` SWIZZLE 0 (fromIntegral hdr_sz)
+                 return ((code, AddrRep) : rest)
+         pargs d (aa:az) =  do (code_a, sz_a) <- pushAtom d p aa
+                               rest <- pargs (d + sz_a) az
+                               return ((code_a, atomPrimRep aa) : rest)
 
      code_n_reps <- pargs d0 args_r_to_l
      let
@@ -1260,7 +1490,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
          !d_after_args = d0 + wordsToBytes platform a_reps_sizeW
          a_reps_pushed_RAW
             | null a_reps_pushed_r_to_l || not (isVoidRep (head a_reps_pushed_r_to_l))
-            = panic "GHC.CoreToByteCode.generateCCall: missing or invalid World token?"
+            = panic "GHC.StgToByteCode.generateCCall: missing or invalid World token?"
             | otherwise
             = reverse (tail a_reps_pushed_r_to_l)
 
@@ -1270,7 +1500,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
 
          -- Get the result rep.
          (returns_void, r_rep)
-            = case maybe_getCCallReturnRep (idType fn) of
+            = case maybe_getCCallReturnRep result_ty of
                  Nothing -> (True,  VoidRep)
                  Just rr -> (False, rr)
          {-
@@ -1332,7 +1562,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
          a_reps --  | trace (showSDoc (ppr a_reps_pushed_RAW)) False = error "???"
                 | is_static = a_reps_pushed_RAW
                 | otherwise = if null a_reps_pushed_RAW
-                              then panic "GHC.CoreToByteCode.generateCCall: dyn with no args"
+                              then panic "GHC.StgToByteCode.generateCCall: dyn with no args"
                               else tail a_reps_pushed_RAW
 
          -- push the Addr#
@@ -1362,7 +1592,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
          conv = case cconv of
            CCallConv -> FFICCall
            StdCallConv -> FFIStdCall
-           _ -> panic "GHC.CoreToByteCode: unexpected calling convention"
+           _ -> panic "GHC.StgToByteCode: unexpected calling convention"
 
      -- the only difference in libffi mode is that we prepare a cif
      -- describing the call type by calling libffi, and we attach the
@@ -1472,14 +1702,10 @@ maybe_getCCallReturnRep fn_ty
                  -- valid return value placeholder on the stack
          _             -> blargh
 
-maybe_is_tagToEnum_call :: AnnExpr' Id DVarSet -> Maybe (AnnExpr' Id DVarSet, [Name])
+maybe_is_tagToEnum_call :: CgStgExpr -> Maybe (Id, [Name])
 -- Detect and extract relevant info for the tagToEnum kludge.
-maybe_is_tagToEnum_call app
-  | AnnApp (_, AnnApp (_, AnnVar v) (_, AnnType t)) arg <- app
-  , Just TagToEnumOp <- isPrimOpId_maybe v
-  = Just (snd arg, extract_constr_Names t)
-  | otherwise
-  = Nothing
+maybe_is_tagToEnum_call (StgOpApp (StgPrimOp TagToEnumOp) [StgVarArg v] t)
+  = Just (v, extract_constr_Names t)
   where
     extract_constr_Names ty
            | rep_ty <- unwrapType ty
@@ -1490,6 +1716,7 @@ maybe_is_tagToEnum_call app
            -- the DataCon.  See "GHC.Core.DataCon" for details.
            | otherwise
            = pprPanic "maybe_is_tagToEnum_call.extract_constr_Ids" (ppr ty)
+maybe_is_tagToEnum_call _ = Nothing
 
 {- -----------------------------------------------------------------------------
 Note [Implementing tagToEnum#]
@@ -1533,13 +1760,13 @@ implement_tagToId
     :: StackDepth
     -> Sequel
     -> BCEnv
-    -> AnnExpr' Id DVarSet
+    -> Id
     -> [Name]
     -> BcM BCInstrList
 -- See Note [Implementing tagToEnum#]
 implement_tagToId d s p arg names
   = ASSERT( notNull names )
-    do (push_arg, arg_bytes) <- pushAtom d p arg
+    do (push_arg, arg_bytes) <- pushAtom d p (StgVarArg arg)
        labels <- getLabelsBc (genericLength names)
        label_fail <- getLabelBc
        label_exit <- getLabelBc
@@ -1582,21 +1809,12 @@ implement_tagToId d s p arg names
 -- depth 6 stack has valid words 0 .. 5.
 
 pushAtom
-    :: StackDepth -> BCEnv -> AnnExpr' Id DVarSet -> BcM (BCInstrList, ByteOff)
-pushAtom d p e
-   | Just e' <- bcView e
-   = pushAtom d p e'
-
-pushAtom _ _ (AnnCoercion {})   -- Coercions are zero-width things,
-   = return (nilOL, 0)          -- treated just like a variable V
+    :: StackDepth -> BCEnv -> StgArg -> BcM (BCInstrList, ByteOff)
 
 -- See Note [Empty case alternatives] in GHC.Core
 -- and Note [Bottoming expressions] in GHC.Core.Utils:
 -- The scrutinee of an empty case evaluates to bottom
-pushAtom d p (AnnCase (_, a) _ _ []) -- trac #12128
-   = pushAtom d p a
-
-pushAtom d p (AnnVar var)
+pushAtom d p (StgVarArg var)
    | [] <- typePrimRep (idType var)
    = return (nilOL, 0)
 
@@ -1635,15 +1853,14 @@ pushAtom d p (AnnVar var)
    = do topStrings <- getTopStrings
         platform <- targetPlatform <$> getDynFlags
         case lookupVarEnv topStrings var of
-            Just ptr -> pushAtom d p $ AnnLit $ mkLitWord platform $
+            Just ptr -> pushAtom d p $ StgLitArg $ mkLitWord platform $
               fromIntegral $ ptrToWordPtr $ fromRemotePtr ptr
             Nothing -> do
                 let sz = idSizeCon platform var
                 MASSERT( sz == wordSize platform )
                 return (unitOL (PUSH_G (getName var)), sz)
 
-
-pushAtom _ _ (AnnLit lit) = do
+pushAtom _ _ (StgLitArg lit) = do
      platform <- targetPlatform <$> getDynFlags
      let code :: PrimRep -> BcM (BCInstrList, ByteOff)
          code rep =
@@ -1684,21 +1901,15 @@ pushAtom _ _ (AnnLit lit) = do
           LitNumInteger -> panic "pushAtom: LitInteger"
           LitNumNatural -> panic "pushAtom: LitNatural"
 
-pushAtom _ _ expr
-   = pprPanic "GHC.CoreToByteCode.pushAtom"
-              (pprCoreExpr (deAnnotate' expr))
-
-
 -- | Push an atom for constructor (i.e., PACK instruction) onto the stack.
 -- This is slightly different to @pushAtom@ due to the fact that we allow
 -- packing constructor fields. See also @mkConAppCode@ and @pushPadding@.
 pushConstrAtom
-    :: StackDepth -> BCEnv -> AnnExpr' Id DVarSet -> BcM (BCInstrList, ByteOff)
-
-pushConstrAtom _ _ (AnnLit lit@(LitFloat _)) =
+    :: StackDepth -> BCEnv -> StgArg -> BcM (BCInstrList, ByteOff)
+pushConstrAtom _ _ (StgLitArg lit@(LitFloat _)) =
     return (unitOL (PUSH_UBX32 lit), 4)
 
-pushConstrAtom d p (AnnVar v)
+pushConstrAtom d p va@(StgVarArg v)
     | Just d_v <- lookupBCEnv_maybe v p = do  -- v is a local variable
         platform <- targetPlatform <$> getDynFlags
         let !szb = idSizeCon platform v
@@ -1709,7 +1920,7 @@ pushConstrAtom d p (AnnVar v)
             1 -> done PUSH8
             2 -> done PUSH16
             4 -> done PUSH32
-            _ -> pushAtom d p (AnnVar v)
+            _ -> pushAtom d p va
 
 pushConstrAtom d p expr = pushAtom d p expr
 
@@ -1869,7 +2080,14 @@ idSizeW :: Platform -> Id -> WordOff
 idSizeW platform = WordOff . argRepSizeW platform . bcIdArgRep platform
 
 idSizeCon :: Platform -> Id -> ByteOff
-idSizeCon platform = ByteOff . primRepSizeB platform . bcIdPrimRep
+idSizeCon platform var
+  -- unboxed tuple components are padded to word size
+  | isUnboxedTupleType (idType var) ||
+    isUnboxedSumType (idType var) =
+    wordsToBytes platform .
+    WordOff . sum . map (argRepSizeW platform . toArgRep platform) .
+    bcIdPrimReps $ var
+  | otherwise = ByteOff (primRepSizeB platform (bcIdPrimRep var))
 
 bcIdArgRep :: Platform -> Id -> ArgRep
 bcIdArgRep platform = toArgRep platform . bcIdPrimRep
@@ -1881,6 +2099,10 @@ bcIdPrimRep id
   | otherwise
   = pprPanic "bcIdPrimRep" (ppr id <+> dcolon <+> ppr (idType id))
 
+
+bcIdPrimReps :: Id -> [PrimRep]
+bcIdPrimReps id = typePrimRepArgs (idType id)
+
 repSizeWords :: Platform -> PrimRep -> WordOff
 repSizeWords platform rep = WordOff $ argRepSizeW platform (toArgRep platform rep)
 
@@ -1888,17 +2110,6 @@ isFollowableArg :: ArgRep -> Bool
 isFollowableArg P = True
 isFollowableArg _ = False
 
-isVoidArg :: ArgRep -> Bool
-isVoidArg V = True
-isVoidArg _ = False
-
--- See bug #1257
-multiValException :: a
-multiValException = throwGhcException (ProgramError
-  ("Error: bytecode compiler can't handle unboxed tuples and sums.\n"++
-   "  Possibly due to foreign import/export decls in source.\n"++
-   "  Workaround: use -fobject-code, or compile this module to .o separately."))
-
 -- | Indicate if the calling convention is supported
 isSupportedCConv :: CCallSpec -> Bool
 isSupportedCConv (CCallSpec _ cconv _) = case cconv of
@@ -1934,62 +2145,11 @@ mkSlideW !n !ws
     limit :: Word16
     limit = maxBound
 
-splitApp :: AnnExpr' Var ann -> (AnnExpr' Var ann, [AnnExpr' Var ann])
-        -- The arguments are returned in *right-to-left* order
-splitApp e | Just e' <- bcView e = splitApp e'
-splitApp (AnnApp (_,f) (_,a))    = case splitApp f of
-                                      (f', as) -> (f', a:as)
-splitApp e                       = (e, [])
-
-
-bcView :: AnnExpr' Var ann -> Maybe (AnnExpr' Var ann)
--- The "bytecode view" of a term discards
---  a) type abstractions
---  b) type applications
---  c) casts
---  d) ticks (but not breakpoints)
---  e) case unsafeEqualityProof of UnsafeRefl -> e  ==> e
--- Type lambdas *can* occur in random expressions,
--- whereas value lambdas cannot; that is why they are nuked here
-bcView (AnnCast (_,e) _)             = Just e
-bcView (AnnLam v (_,e)) | isTyVar v  = Just e
-bcView (AnnApp (_,e) (_, AnnType _)) = Just e
-bcView (AnnTick Breakpoint{} _)      = Nothing
-bcView (AnnTick _other_tick (_,e))   = Just e
-bcView (AnnCase (_,e) _ _ alts)  -- Handle unsafe equality proof
-  | AnnVar id <- bcViewLoop e
-  , idName id == unsafeEqualityProofName
-  , [AnnAlt _ _ (_, rhs)] <- alts
-  = Just rhs
-bcView _                             = Nothing
-
-bcViewLoop :: AnnExpr' Var ann -> AnnExpr' Var ann
-bcViewLoop e =
-    case bcView e of
-      Nothing -> e
-      Just e' -> bcViewLoop e'
-
-isVAtom :: Platform -> AnnExpr' Var ann -> Bool
-isVAtom platform expr = case expr of
-   e | Just e' <- bcView e -> isVAtom platform e'
-   (AnnVar v)              -> isVoidArg (bcIdArgRep platform v)
-   (AnnCoercion {})        -> True
-   _                       -> False
-
-atomPrimRep :: AnnExpr' Id ann -> PrimRep
-atomPrimRep e | Just e' <- bcView e = atomPrimRep e'
-atomPrimRep (AnnVar v)              = bcIdPrimRep v
-atomPrimRep (AnnLit l)              = typePrimRep1 (literalType l)
-
--- #12128:
--- A case expression can be an atom because empty cases evaluate to bottom.
--- See Note [Empty case alternatives] in GHC.Core
-atomPrimRep (AnnCase _ _ ty _)      =
-  ASSERT(case typePrimRep ty of [LiftedRep] -> True; _ -> False) LiftedRep
-atomPrimRep (AnnCoercion {})        = VoidRep
-atomPrimRep other = pprPanic "atomPrimRep" (ppr (deAnnotate' other))
+atomPrimRep :: StgArg -> PrimRep
+atomPrimRep (StgVarArg v) = bcIdPrimRep v
+atomPrimRep (StgLitArg l) = typePrimRep1 (literalType l)
 
-atomRep :: Platform -> AnnExpr' Id ann -> ArgRep
+atomRep :: Platform -> StgArg -> ArgRep
 atomRep platform e = toArgRep platform (atomPrimRep e)
 
 -- | Let szsw be the sizes in bytes of some items pushed onto the stack, which
@@ -1998,8 +2158,8 @@ atomRep platform e = toArgRep platform (atomPrimRep e)
 mkStackOffsets :: ByteOff -> [ByteOff] -> [ByteOff]
 mkStackOffsets original_depth szsb = tail (scanl' (+) original_depth szsb)
 
-typeArgRep :: Platform -> Type -> ArgRep
-typeArgRep platform = toArgRep platform . typePrimRep1
+typeArgReps :: Platform -> Type -> [ArgRep]
+typeArgReps platform = map (toArgRep platform) . typePrimRepArgs
 
 -- -----------------------------------------------------------------------------
 -- The bytecode generator's monad
@@ -2088,7 +2248,7 @@ getLabelsBc n
 
 getCCArray :: BcM (Array BreakIndex (RemotePtr CostCentre))
 getCCArray = BcM $ \st ->
-  let breaks = expectJust "GHC.CoreToByteCode.getCCArray" $ modBreaks st in
+  let breaks = expectJust "GHC.StgToByteCode.getCCArray" $ modBreaks st in
   return (st, modBreaks_ccs breaks)
 
 
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 76d225bd57..ded5bc4c07 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -335,7 +335,6 @@ Library
         GHC.Core.Stats
         GHC.Core.Subst
         GHC.Core.Tidy
-        GHC.CoreToByteCode
         GHC.CoreToIface
         GHC.CoreToStg
         GHC.CoreToStg.Prep
@@ -536,6 +535,7 @@ Library
         GHC.Stg.Stats
         GHC.Stg.Subst
         GHC.Stg.Syntax
+        GHC.StgToByteCode
         GHC.StgToCmm
         GHC.StgToCmm.ArgRep
         GHC.StgToCmm.Bind
author	Luite Stegeman <stegeman@gmail.com>	2021-01-22 00:09:17 +0100
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2021-03-20 07:49:15 -0400
commit	1f94e0f7601f8e22fdd81a47f130650265a44196 (patch)
tree	d06d02317049b56763b2f1da27f71f3663efa5a0 /compiler
parent	7de3532f0317032f75b76150c5d3a6f76178be04 (diff)
download	haskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz