Module hierarchy: StgToCmm (#13009)

Add StgToCmm module hierarchy. Platform modules that are used in several other places (NCG, LLVM codegen, Cmm transformations) are put into GHC.Platform.
author: Sylvain Henry <sylvain@haskus.fr> 2019-08-13 17:26:32 +0200
committer: Sylvain Henry <sylvain@haskus.fr> 2019-09-10 00:04:50 +0200
commit: 447864a94a1679b5b079e08bb7208a0005381cef (patch)
tree: baa469c52620ce7ae02def3e5e6a6f109cc89f40 /compiler/codeGen
parent: 270fbe8512f04b6107755fa22bdec62205c0a567 (diff)
download: haskell-447864a94a1679b5b079e08bb7208a0005381cef.tar.gz
27 files changed, 0 insertions, 11330 deletions
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
deleted file mode 100644
index 0ff9bd8b56..0000000000
--- a/compiler/codeGen/CgUtils.hs
+++ /dev/null
@@ -1,186 +0,0 @@
-{-# LANGUAGE GADTs #-}
-
------------------------------------------------------------------------------
---
--- Code generator utilities; mostly monadic
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module CgUtils (
-        fixStgRegisters,
-        baseRegOffset,
-        get_Regtable_addr_from_offset,
-        regTableOffset,
-        get_GlobalReg_addr,
-  ) where
-
-import GhcPrelude
-
-import CodeGen.Platform
-import Cmm
-import Hoopl.Block
-import Hoopl.Graph
-import CmmUtils
-import CLabel
-import DynFlags
-import Outputable
-
--- -----------------------------------------------------------------------------
--- Information about global registers
-
-baseRegOffset :: DynFlags -> GlobalReg -> Int
-
-baseRegOffset dflags (VanillaReg 1 _)    = oFFSET_StgRegTable_rR1 dflags
-baseRegOffset dflags (VanillaReg 2 _)    = oFFSET_StgRegTable_rR2 dflags
-baseRegOffset dflags (VanillaReg 3 _)    = oFFSET_StgRegTable_rR3 dflags
-baseRegOffset dflags (VanillaReg 4 _)    = oFFSET_StgRegTable_rR4 dflags
-baseRegOffset dflags (VanillaReg 5 _)    = oFFSET_StgRegTable_rR5 dflags
-baseRegOffset dflags (VanillaReg 6 _)    = oFFSET_StgRegTable_rR6 dflags
-baseRegOffset dflags (VanillaReg 7 _)    = oFFSET_StgRegTable_rR7 dflags
-baseRegOffset dflags (VanillaReg 8 _)    = oFFSET_StgRegTable_rR8 dflags
-baseRegOffset dflags (VanillaReg 9 _)    = oFFSET_StgRegTable_rR9 dflags
-baseRegOffset dflags (VanillaReg 10 _)   = oFFSET_StgRegTable_rR10 dflags
-baseRegOffset _      (VanillaReg n _)    = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")")
-baseRegOffset dflags (FloatReg  1)       = oFFSET_StgRegTable_rF1 dflags
-baseRegOffset dflags (FloatReg  2)       = oFFSET_StgRegTable_rF2 dflags
-baseRegOffset dflags (FloatReg  3)       = oFFSET_StgRegTable_rF3 dflags
-baseRegOffset dflags (FloatReg  4)       = oFFSET_StgRegTable_rF4 dflags
-baseRegOffset dflags (FloatReg  5)       = oFFSET_StgRegTable_rF5 dflags
-baseRegOffset dflags (FloatReg  6)       = oFFSET_StgRegTable_rF6 dflags
-baseRegOffset _      (FloatReg  n)       = panic ("Registers above F6 are not supported (tried to use F" ++ show n ++ ")")
-baseRegOffset dflags (DoubleReg 1)       = oFFSET_StgRegTable_rD1 dflags
-baseRegOffset dflags (DoubleReg 2)       = oFFSET_StgRegTable_rD2 dflags
-baseRegOffset dflags (DoubleReg 3)       = oFFSET_StgRegTable_rD3 dflags
-baseRegOffset dflags (DoubleReg 4)       = oFFSET_StgRegTable_rD4 dflags
-baseRegOffset dflags (DoubleReg 5)       = oFFSET_StgRegTable_rD5 dflags
-baseRegOffset dflags (DoubleReg 6)       = oFFSET_StgRegTable_rD6 dflags
-baseRegOffset _      (DoubleReg n)       = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
-baseRegOffset dflags (XmmReg 1)          = oFFSET_StgRegTable_rXMM1 dflags
-baseRegOffset dflags (XmmReg 2)          = oFFSET_StgRegTable_rXMM2 dflags
-baseRegOffset dflags (XmmReg 3)          = oFFSET_StgRegTable_rXMM3 dflags
-baseRegOffset dflags (XmmReg 4)          = oFFSET_StgRegTable_rXMM4 dflags
-baseRegOffset dflags (XmmReg 5)          = oFFSET_StgRegTable_rXMM5 dflags
-baseRegOffset dflags (XmmReg 6)          = oFFSET_StgRegTable_rXMM6 dflags
-baseRegOffset _      (XmmReg n)          = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
-baseRegOffset dflags (YmmReg 1)          = oFFSET_StgRegTable_rYMM1 dflags
-baseRegOffset dflags (YmmReg 2)          = oFFSET_StgRegTable_rYMM2 dflags
-baseRegOffset dflags (YmmReg 3)          = oFFSET_StgRegTable_rYMM3 dflags
-baseRegOffset dflags (YmmReg 4)          = oFFSET_StgRegTable_rYMM4 dflags
-baseRegOffset dflags (YmmReg 5)          = oFFSET_StgRegTable_rYMM5 dflags
-baseRegOffset dflags (YmmReg 6)          = oFFSET_StgRegTable_rYMM6 dflags
-baseRegOffset _      (YmmReg n)          = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
-baseRegOffset dflags (ZmmReg 1)          = oFFSET_StgRegTable_rZMM1 dflags
-baseRegOffset dflags (ZmmReg 2)          = oFFSET_StgRegTable_rZMM2 dflags
-baseRegOffset dflags (ZmmReg 3)          = oFFSET_StgRegTable_rZMM3 dflags
-baseRegOffset dflags (ZmmReg 4)          = oFFSET_StgRegTable_rZMM4 dflags
-baseRegOffset dflags (ZmmReg 5)          = oFFSET_StgRegTable_rZMM5 dflags
-baseRegOffset dflags (ZmmReg 6)          = oFFSET_StgRegTable_rZMM6 dflags
-baseRegOffset _      (ZmmReg n)          = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
-baseRegOffset dflags Sp                  = oFFSET_StgRegTable_rSp dflags
-baseRegOffset dflags SpLim               = oFFSET_StgRegTable_rSpLim dflags
-baseRegOffset dflags (LongReg 1)         = oFFSET_StgRegTable_rL1 dflags
-baseRegOffset _      (LongReg n)         = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
-baseRegOffset dflags Hp                  = oFFSET_StgRegTable_rHp dflags
-baseRegOffset dflags HpLim               = oFFSET_StgRegTable_rHpLim dflags
-baseRegOffset dflags CCCS                = oFFSET_StgRegTable_rCCCS dflags
-baseRegOffset dflags CurrentTSO          = oFFSET_StgRegTable_rCurrentTSO dflags
-baseRegOffset dflags CurrentNursery      = oFFSET_StgRegTable_rCurrentNursery dflags
-baseRegOffset dflags HpAlloc             = oFFSET_StgRegTable_rHpAlloc dflags
-baseRegOffset dflags EagerBlackholeInfo  = oFFSET_stgEagerBlackholeInfo dflags
-baseRegOffset dflags GCEnter1            = oFFSET_stgGCEnter1 dflags
-baseRegOffset dflags GCFun               = oFFSET_stgGCFun dflags
-baseRegOffset _      BaseReg             = panic "CgUtils.baseRegOffset:BaseReg"
-baseRegOffset _      PicBaseReg          = panic "CgUtils.baseRegOffset:PicBaseReg"
-baseRegOffset _      MachSp              = panic "CgUtils.baseRegOffset:MachSp"
-baseRegOffset _      UnwindReturnReg     = panic "CgUtils.baseRegOffset:UnwindReturnReg"
-
-
--- -----------------------------------------------------------------------------
---
--- STG/Cmm GlobalReg
---
--- -----------------------------------------------------------------------------
-
--- | We map STG registers onto appropriate CmmExprs.  Either they map
--- to real machine registers or stored as offsets from BaseReg.  Given
--- a GlobalReg, get_GlobalReg_addr always produces the
--- register table address for it.
-get_GlobalReg_addr :: DynFlags -> GlobalReg -> CmmExpr
-get_GlobalReg_addr dflags BaseReg = regTableOffset dflags 0
-get_GlobalReg_addr dflags mid
-    = get_Regtable_addr_from_offset dflags (baseRegOffset dflags mid)
-
--- Calculate a literal representing an offset into the register table.
--- Used when we don't have an actual BaseReg to offset from.
-regTableOffset :: DynFlags -> Int -> CmmExpr
-regTableOffset dflags n =
-  CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r dflags + n))
-
-get_Regtable_addr_from_offset :: DynFlags -> Int -> CmmExpr
-get_Regtable_addr_from_offset dflags offset =
-    if haveRegBase (targetPlatform dflags)
-    then CmmRegOff baseReg offset
-    else regTableOffset dflags offset
-
--- | Fixup global registers so that they assign to locations within the
--- RegTable if they aren't pinned for the current target.
-fixStgRegisters :: DynFlags -> RawCmmDecl -> RawCmmDecl
-fixStgRegisters _ top@(CmmData _ _) = top
-
-fixStgRegisters dflags (CmmProc info lbl live graph) =
-  let graph' = modifyGraph (mapGraphBlocks (fixStgRegBlock dflags)) graph
-  in CmmProc info lbl live graph'
-
-fixStgRegBlock :: DynFlags -> Block CmmNode e x -> Block CmmNode e x
-fixStgRegBlock dflags block = mapBlock (fixStgRegStmt dflags) block
-
-fixStgRegStmt :: DynFlags -> CmmNode e x -> CmmNode e x
-fixStgRegStmt dflags stmt = fixAssign $ mapExpDeep fixExpr stmt
-  where
-    platform = targetPlatform dflags
-
-    fixAssign stmt =
-      case stmt of
-        CmmAssign (CmmGlobal reg) src
-          -- MachSp isn't an STG register; it's merely here for tracking unwind
-          -- information
-          | reg == MachSp -> stmt
-          | otherwise ->
-            let baseAddr = get_GlobalReg_addr dflags reg
-            in case reg `elem` activeStgRegs (targetPlatform dflags) of
-                True  -> CmmAssign (CmmGlobal reg) src
-                False -> CmmStore baseAddr src
-        other_stmt -> other_stmt
-
-    fixExpr expr = case expr of
-        -- MachSp isn't an STG; it's merely here for tracking unwind information
-        CmmReg (CmmGlobal MachSp) -> expr
-        CmmReg (CmmGlobal reg) ->
-            -- Replace register leaves with appropriate StixTrees for
-            -- the given target.  MagicIds which map to a reg on this
-            -- arch are left unchanged.  For the rest, BaseReg is taken
-            -- to mean the address of the reg table in MainCapability,
-            -- and for all others we generate an indirection to its
-            -- location in the register table.
-            case reg `elem` activeStgRegs platform of
-                True  -> expr
-                False ->
-                    let baseAddr = get_GlobalReg_addr dflags reg
-                    in case reg of
-                        BaseReg -> baseAddr
-                        _other  -> CmmLoad baseAddr (globalRegType dflags reg)
-
-        CmmRegOff (CmmGlobal reg) offset ->
-            -- RegOf leaves are just a shorthand form. If the reg maps
-            -- to a real reg, we keep the shorthand, otherwise, we just
-            -- expand it and defer to the above code.
-            case reg `elem` activeStgRegs platform of
-                True  -> expr
-                False -> CmmMachOp (MO_Add (wordWidth dflags)) [
-                                    fixExpr (CmmReg (CmmGlobal reg)),
-                                    CmmLit (CmmInt (fromIntegral offset)
-                                                   (wordWidth dflags))]
-
-        other_expr -> other_expr
diff --git a/compiler/codeGen/CodeGen/Platform.hs b/compiler/codeGen/CodeGen/Platform.hs
deleted file mode 100644
index bc216758a0..0000000000
--- a/compiler/codeGen/CodeGen/Platform.hs
+++ /dev/null
@@ -1,107 +0,0 @@
-
-module CodeGen.Platform
-       (callerSaves, activeStgRegs, haveRegBase, globalRegMaybe, freeReg)
-       where
-
-import GhcPrelude
-
-import CmmExpr
-import GHC.Platform
-import Reg
-
-import qualified CodeGen.Platform.ARM        as ARM
-import qualified CodeGen.Platform.ARM64      as ARM64
-import qualified CodeGen.Platform.PPC        as PPC
-import qualified CodeGen.Platform.SPARC      as SPARC
-import qualified CodeGen.Platform.X86        as X86
-import qualified CodeGen.Platform.X86_64     as X86_64
-import qualified CodeGen.Platform.NoRegs     as NoRegs
-
--- | Returns 'True' if this global register is stored in a caller-saves
--- machine register.
-
-callerSaves :: Platform -> GlobalReg -> Bool
-callerSaves platform
- | platformUnregisterised platform = NoRegs.callerSaves
- | otherwise
- = case platformArch platform of
-   ArchX86    -> X86.callerSaves
-   ArchX86_64 -> X86_64.callerSaves
-   ArchSPARC  -> SPARC.callerSaves
-   ArchARM {} -> ARM.callerSaves
-   ArchARM64  -> ARM64.callerSaves
-   arch
-    | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
-        PPC.callerSaves
-
-    | otherwise -> NoRegs.callerSaves
-
--- | Here is where the STG register map is defined for each target arch.
--- The order matters (for the llvm backend anyway)! We must make sure to
--- maintain the order here with the order used in the LLVM calling conventions.
--- Note that also, this isn't all registers, just the ones that are currently
--- possbily mapped to real registers.
-activeStgRegs :: Platform -> [GlobalReg]
-activeStgRegs platform
- | platformUnregisterised platform = NoRegs.activeStgRegs
- | otherwise
- = case platformArch platform of
-   ArchX86    -> X86.activeStgRegs
-   ArchX86_64 -> X86_64.activeStgRegs
-   ArchSPARC  -> SPARC.activeStgRegs
-   ArchARM {} -> ARM.activeStgRegs
-   ArchARM64  -> ARM64.activeStgRegs
-   arch
-    | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
-        PPC.activeStgRegs
-
-    | otherwise -> NoRegs.activeStgRegs
-
-haveRegBase :: Platform -> Bool
-haveRegBase platform
- | platformUnregisterised platform = NoRegs.haveRegBase
- | otherwise
- = case platformArch platform of
-   ArchX86    -> X86.haveRegBase
-   ArchX86_64 -> X86_64.haveRegBase
-   ArchSPARC  -> SPARC.haveRegBase
-   ArchARM {} -> ARM.haveRegBase
-   ArchARM64  -> ARM64.haveRegBase
-   arch
-    | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
-        PPC.haveRegBase
-
-    | otherwise -> NoRegs.haveRegBase
-
-globalRegMaybe :: Platform -> GlobalReg -> Maybe RealReg
-globalRegMaybe platform
- | platformUnregisterised platform = NoRegs.globalRegMaybe
- | otherwise
- = case platformArch platform of
-   ArchX86    -> X86.globalRegMaybe
-   ArchX86_64 -> X86_64.globalRegMaybe
-   ArchSPARC  -> SPARC.globalRegMaybe
-   ArchARM {} -> ARM.globalRegMaybe
-   ArchARM64  -> ARM64.globalRegMaybe
-   arch
-    | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
-        PPC.globalRegMaybe
-
-    | otherwise -> NoRegs.globalRegMaybe
-
-freeReg :: Platform -> RegNo -> Bool
-freeReg platform
- | platformUnregisterised platform = NoRegs.freeReg
- | otherwise
- = case platformArch platform of
-   ArchX86    -> X86.freeReg
-   ArchX86_64 -> X86_64.freeReg
-   ArchSPARC  -> SPARC.freeReg
-   ArchARM {} -> ARM.freeReg
-   ArchARM64  -> ARM64.freeReg
-   arch
-    | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
-        PPC.freeReg
-
-    | otherwise -> NoRegs.freeReg
-
diff --git a/compiler/codeGen/CodeGen/Platform/ARM.hs b/compiler/codeGen/CodeGen/Platform/ARM.hs
deleted file mode 100644
index a2cb476e04..0000000000
--- a/compiler/codeGen/CodeGen/Platform/ARM.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.ARM where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_arm 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/ARM64.hs b/compiler/codeGen/CodeGen/Platform/ARM64.hs
deleted file mode 100644
index 6ace181356..0000000000
--- a/compiler/codeGen/CodeGen/Platform/ARM64.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.ARM64 where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_aarch64 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/NoRegs.hs b/compiler/codeGen/CodeGen/Platform/NoRegs.hs
deleted file mode 100644
index 4c074ee313..0000000000
--- a/compiler/codeGen/CodeGen/Platform/NoRegs.hs
+++ /dev/null
@@ -1,9 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.NoRegs where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/PPC.hs b/compiler/codeGen/CodeGen/Platform/PPC.hs
deleted file mode 100644
index f7eae6b4ca..0000000000
--- a/compiler/codeGen/CodeGen/Platform/PPC.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.PPC where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_powerpc 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/SPARC.hs b/compiler/codeGen/CodeGen/Platform/SPARC.hs
deleted file mode 100644
index 5d8dbb1da9..0000000000
--- a/compiler/codeGen/CodeGen/Platform/SPARC.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.SPARC where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_sparc 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/X86.hs b/compiler/codeGen/CodeGen/Platform/X86.hs
deleted file mode 100644
index 84d52c1585..0000000000
--- a/compiler/codeGen/CodeGen/Platform/X86.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.X86 where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_i386 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/X86_64.hs b/compiler/codeGen/CodeGen/Platform/X86_64.hs
deleted file mode 100644
index 1b2b5549ac..0000000000
--- a/compiler/codeGen/CodeGen/Platform/X86_64.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.X86_64 where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_x86_64 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs
deleted file mode 100644
index 83409b6b24..0000000000
--- a/compiler/codeGen/StgCmm.hs
+++ /dev/null
@@ -1,223 +0,0 @@
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE DataKinds #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmm ( codeGen ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude as Prelude
-
-import StgCmmProf (initCostCentres, ldvEnter)
-import StgCmmMonad
-import StgCmmEnv
-import StgCmmBind
-import StgCmmCon
-import StgCmmLayout
-import StgCmmUtils
-import StgCmmClosure
-import StgCmmHpc
-import StgCmmTicky
-
-import Cmm
-import CmmUtils
-import CLabel
-
-import StgSyn
-import DynFlags
-import ErrUtils
-
-import HscTypes
-import CostCentre
-import Id
-import IdInfo
-import RepType
-import DataCon
-import TyCon
-import Module
-import Outputable
-import Stream
-import BasicTypes
-import VarSet ( isEmptyDVarSet )
-
-import OrdList
-import MkGraph
-
-import Data.IORef
-import Control.Monad (when,void)
-import Util
-
-codeGen :: DynFlags
-        -> Module
-        -> [TyCon]
-        -> CollectedCCs                -- (Local/global) cost-centres needing declaring/registering.
-        -> [CgStgTopBinding]           -- Bindings to convert
-        -> HpcInfo
-        -> Stream IO CmmGroup ()       -- Output as a stream, so codegen can
-                                       -- be interleaved with output
-
-codeGen dflags this_mod data_tycons
-        cost_centre_info stg_binds hpc_info
-  = do  {     -- cg: run the code generator, and yield the resulting CmmGroup
-              -- Using an IORef to store the state is a bit crude, but otherwise
-              -- we would need to add a state monad layer.
-        ; cgref <- liftIO $ newIORef =<< initC
-        ; let cg :: FCode () -> Stream IO CmmGroup ()
-              cg fcode = do
-                cmm <- liftIO . withTiming (return dflags) (text "STG -> Cmm") (`seq` ()) $ do
-                         st <- readIORef cgref
-                         let (a,st') = runC dflags this_mod st (getCmm fcode)
-
-                         -- NB. stub-out cgs_tops and cgs_stmts.  This fixes
-                         -- a big space leak.  DO NOT REMOVE!
-                         writeIORef cgref $! st'{ cgs_tops = nilOL,
-                                                  cgs_stmts = mkNop }
-                         return a
-                yield cmm
-
-               -- Note [codegen-split-init] the cmm_init block must come
-               -- FIRST.  This is because when -split-objs is on we need to
-               -- combine this block with its initialisation routines; see
-               -- Note [pipeline-split-init].
-        ; cg (mkModuleInit cost_centre_info this_mod hpc_info)
-
-        ; mapM_ (cg . cgTopBinding dflags) stg_binds
-
-                -- Put datatype_stuff after code_stuff, because the
-                -- datatype closure table (for enumeration types) to
-                -- (say) PrelBase_True_closure, which is defined in
-                -- code_stuff
-        ; let do_tycon tycon = do
-                -- Generate a table of static closures for an
-                -- enumeration type Note that the closure pointers are
-                -- tagged.
-                 when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon)
-                 mapM_ (cg . cgDataCon) (tyConDataCons tycon)
-
-        ; mapM_ do_tycon data_tycons
-        }
-
----------------------------------------------------------------
---      Top-level bindings
----------------------------------------------------------------
-
-{- 'cgTopBinding' is only used for top-level bindings, since they need
-to be allocated statically (not in the heap) and need to be labelled.
-No unboxed bindings can happen at top level.
-
-In the code below, the static bindings are accumulated in the
-@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@.
-This is so that we can write the top level processing in a compositional
-style, with the increasing static environment being plumbed as a state
-variable. -}
-
-cgTopBinding :: DynFlags -> CgStgTopBinding -> FCode ()
-cgTopBinding dflags (StgTopLifted (StgNonRec id rhs))
-  = do  { let (info, fcode) = cgTopRhs dflags NonRecursive id rhs
-        ; fcode
-        ; addBindC info
-        }
-
-cgTopBinding dflags (StgTopLifted (StgRec pairs))
-  = do  { let (bndrs, rhss) = unzip pairs
-        ; let pairs' = zip bndrs rhss
-              r = unzipWith (cgTopRhs dflags Recursive) pairs'
-              (infos, fcodes) = unzip r
-        ; addBindsC infos
-        ; sequence_ fcodes
-        }
-
-cgTopBinding dflags (StgTopStringLit id str)
-  = do  { let label = mkBytesLabel (idName id)
-        ; let (lit, decl) = mkByteStringCLit label str
-        ; emitDecl decl
-        ; addBindC (litIdInfo dflags id mkLFStringLit lit)
-        }
-
-cgTopRhs :: DynFlags -> RecFlag -> Id -> CgStgRhs -> (CgIdInfo, FCode ())
-        -- The Id is passed along for setting up a binding...
-
-cgTopRhs dflags _rec bndr (StgRhsCon _cc con args)
-  = cgTopRhsCon dflags bndr con (assertNonVoidStgArgs args)
-      -- con args are always non-void,
-      -- see Note [Post-unarisation invariants] in UnariseStg
-
-cgTopRhs dflags rec bndr (StgRhsClosure fvs cc upd_flag args body)
-  = ASSERT(isEmptyDVarSet fvs)    -- There should be no free variables
-    cgTopRhsClosure dflags rec bndr cc upd_flag args body
-
-
----------------------------------------------------------------
---      Module initialisation code
----------------------------------------------------------------
-
-mkModuleInit
-        :: CollectedCCs         -- cost centre info
-        -> Module
-        -> HpcInfo
-        -> FCode ()
-
-mkModuleInit cost_centre_info this_mod hpc_info
-  = do  { initHpc this_mod hpc_info
-        ; initCostCentres cost_centre_info
-        }
-
-
----------------------------------------------------------------
---      Generating static stuff for algebraic data types
----------------------------------------------------------------
-
-
-cgEnumerationTyCon :: TyCon -> FCode ()
-cgEnumerationTyCon tycon
-  = do dflags <- getDynFlags
-       emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
-             [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
-                           (tagForCon dflags con)
-             | con <- tyConDataCons tycon]
-
-
-cgDataCon :: DataCon -> FCode ()
--- Generate the entry code, info tables, and (for niladic constructor)
--- the static closure, for a constructor.
-cgDataCon data_con
-  = do  { dflags <- getDynFlags
-        ; let
-            (tot_wds, --  #ptr_wds + #nonptr_wds
-             ptr_wds) --  #ptr_wds
-              = mkVirtConstrSizes dflags arg_reps
-
-            nonptr_wds   = tot_wds - ptr_wds
-
-            dyn_info_tbl =
-              mkDataConInfoTable dflags data_con False ptr_wds nonptr_wds
-
-            -- We're generating info tables, so we don't know and care about
-            -- what the actual arguments are. Using () here as the place holder.
-            arg_reps :: [NonVoid PrimRep]
-            arg_reps = [ NonVoid rep_ty
-                       | ty <- dataConRepArgTys data_con
-                       , rep_ty <- typePrimRep ty
-                       , not (isVoidRep rep_ty) ]
-
-        ; emitClosureAndInfoTable dyn_info_tbl NativeDirectCall [] $
-            -- NB: the closure pointer is assumed *untagged* on
-            -- entry to a constructor.  If the pointer is tagged,
-            -- then we should not be entering it.  This assumption
-            -- is used in ldvEnter and when tagging the pointer to
-            -- return it.
-            -- NB 2: We don't set CC when entering data (WDP 94/06)
-            do { tickyEnterDynCon
-               ; ldvEnter (CmmReg nodeReg)
-               ; tickyReturnOldCon (length arg_reps)
-               ; void $ emitReturn [cmmOffsetB dflags (CmmReg nodeReg) (tagForCon dflags data_con)]
-               }
-                    -- The case continuation code expects a tagged pointer
-        }
diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs
deleted file mode 100644
index ef40fce464..0000000000
--- a/compiler/codeGen/StgCmmArgRep.hs
+++ /dev/null
@@ -1,160 +0,0 @@
------------------------------------------------------------------------------
---
--- Argument representations used in StgCmmLayout.
---
--- (c) The University of Glasgow 2013
---
------------------------------------------------------------------------------
-
-module StgCmmArgRep (
-        ArgRep(..), toArgRep, argRepSizeW,
-
-        argRepString, isNonV, idArgRep,
-
-        slowCallPattern,
-
-        ) where
-
-import GhcPrelude
-
-import StgCmmClosure    ( idPrimRep )
-
-import SMRep            ( WordOff )
-import Id               ( Id )
-import TyCon            ( PrimRep(..), primElemRepSizeB )
-import BasicTypes       ( RepArity )
-import Constants        ( wORD64_SIZE )
-import DynFlags
-
-import Outputable
-import FastString
-
--- I extricated this code as this new module in order to avoid a
--- cyclic dependency between StgCmmLayout and StgCmmTicky.
---
--- NSF 18 Feb 2013
-
--------------------------------------------------------------------------
---      Classifying arguments: ArgRep
--------------------------------------------------------------------------
-
--- ArgRep is re-exported by StgCmmLayout, but only for use in the
--- byte-code generator which also needs to know about the
--- classification of arguments.
-
-data ArgRep = P   -- GC Ptr
-            | N   -- Word-sized non-ptr
-            | L   -- 64-bit non-ptr (long)
-            | V   -- Void
-            | F   -- Float
-            | D   -- Double
-            | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
-            | V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc.
-            | V64 -- 64-byte (512-bit) vectors of Float/Double/Int8/Word32/etc.
-instance Outputable ArgRep where ppr = text . argRepString
-
-argRepString :: ArgRep -> String
-argRepString P = "P"
-argRepString N = "N"
-argRepString L = "L"
-argRepString V = "V"
-argRepString F = "F"
-argRepString D = "D"
-argRepString V16 = "V16"
-argRepString V32 = "V32"
-argRepString V64 = "V64"
-
-toArgRep :: PrimRep -> ArgRep
-toArgRep VoidRep           = V
-toArgRep LiftedRep         = P
-toArgRep UnliftedRep       = P
-toArgRep IntRep            = N
-toArgRep WordRep           = N
-toArgRep Int8Rep           = N  -- Gets widened to native word width for calls
-toArgRep Word8Rep          = N  -- Gets widened to native word width for calls
-toArgRep Int16Rep          = N  -- Gets widened to native word width for calls
-toArgRep Word16Rep         = N  -- Gets widened to native word width for calls
-toArgRep Int32Rep          = N  -- Gets widened to native word width for calls
-toArgRep Word32Rep         = N  -- Gets widened to native word width for calls
-toArgRep AddrRep           = N
-toArgRep Int64Rep          = L
-toArgRep Word64Rep         = L
-toArgRep FloatRep          = F
-toArgRep DoubleRep         = D
-toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of
-                               16 -> V16
-                               32 -> V32
-                               64 -> V64
-                               _  -> error "toArgRep: bad vector primrep"
-
-isNonV :: ArgRep -> Bool
-isNonV V = False
-isNonV _ = True
-
-argRepSizeW :: DynFlags -> ArgRep -> WordOff                -- Size in words
-argRepSizeW _      N   = 1
-argRepSizeW _      P   = 1
-argRepSizeW _      F   = 1
-argRepSizeW dflags L   = wORD64_SIZE        `quot` wORD_SIZE dflags
-argRepSizeW dflags D   = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
-argRepSizeW _      V   = 0
-argRepSizeW dflags V16 = 16                 `quot` wORD_SIZE dflags
-argRepSizeW dflags V32 = 32                 `quot` wORD_SIZE dflags
-argRepSizeW dflags V64 = 64                 `quot` wORD_SIZE dflags
-
-idArgRep :: Id -> ArgRep
-idArgRep = toArgRep . idPrimRep
-
--- This list of argument patterns should be kept in sync with at least
--- the following:
---
---  * StgCmmLayout.stdPattern maybe to some degree?
---
---  * the RTS_RET(stg_ap_*) and RTS_FUN_DECL(stg_ap_*_fast)
---  declarations in includes/stg/MiscClosures.h
---
---  * the SLOW_CALL_*_ctr declarations in includes/stg/Ticky.h,
---
---  * the TICK_SLOW_CALL_*() #defines in includes/Cmm.h,
---
---  * the PR_CTR(SLOW_CALL_*_ctr) calls in rts/Ticky.c,
---
---  * and the SymI_HasProto(stg_ap_*_{ret,info,fast}) calls and
---  SymI_HasProto(SLOW_CALL_*_ctr) calls in rts/Linker.c
---
--- There may be more places that I haven't found; I merely igrep'd for
--- pppppp and excluded things that seemed ghci-specific.
---
--- Also, it seems at the moment that ticky counters with void
--- arguments will never be bumped, but I'm still declaring those
--- counters, defensively.
---
--- NSF 6 Mar 2013
-
-slowCallPattern :: [ArgRep] -> (FastString, RepArity)
--- Returns the generic apply function and arity
---
--- The first batch of cases match (some) specialised entries
--- The last group deals exhaustively with the cases for the first argument
---   (and the zero-argument case)
---
--- In 99% of cases this function will match *all* the arguments in one batch
-
-slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6)
-slowCallPattern (P: P: P: P: P: _)    = (fsLit "stg_ap_ppppp", 5)
-slowCallPattern (P: P: P: P: _)       = (fsLit "stg_ap_pppp", 4)
-slowCallPattern (P: P: P: V: _)       = (fsLit "stg_ap_pppv", 4)
-slowCallPattern (P: P: P: _)          = (fsLit "stg_ap_ppp", 3)
-slowCallPattern (P: P: V: _)          = (fsLit "stg_ap_ppv", 3)
-slowCallPattern (P: P: _)             = (fsLit "stg_ap_pp", 2)
-slowCallPattern (P: V: _)             = (fsLit "stg_ap_pv", 2)
-slowCallPattern (P: _)                = (fsLit "stg_ap_p", 1)
-slowCallPattern (V: _)                = (fsLit "stg_ap_v", 1)
-slowCallPattern (N: _)                = (fsLit "stg_ap_n", 1)
-slowCallPattern (F: _)                = (fsLit "stg_ap_f", 1)
-slowCallPattern (D: _)                = (fsLit "stg_ap_d", 1)
-slowCallPattern (L: _)                = (fsLit "stg_ap_l", 1)
-slowCallPattern (V16: _)              = (fsLit "stg_ap_v16", 1)
-slowCallPattern (V32: _)              = (fsLit "stg_ap_v32", 1)
-slowCallPattern (V64: _)              = (fsLit "stg_ap_v64", 1)
-slowCallPattern []                    = (fsLit "stg_ap_0", 0)
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
deleted file mode 100644
index 7189800f6e..0000000000
--- a/compiler/codeGen/StgCmmBind.hs
+++ /dev/null
@@ -1,753 +0,0 @@
------------------------------------------------------------------------------
---
--- Stg to C-- code generation: bindings
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmBind (
-        cgTopRhsClosure,
-        cgBind,
-        emitBlackHoleCode,
-        pushUpdateFrame, emitUpdateFrame
-  ) where
-
-import GhcPrelude hiding ((<*>))
-
-import StgCmmExpr
-import StgCmmMonad
-import StgCmmEnv
-import StgCmmCon
-import StgCmmHeap
-import StgCmmProf (ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk,
-                   initUpdFrameProf)
-import StgCmmTicky
-import StgCmmLayout
-import StgCmmUtils
-import StgCmmClosure
-import StgCmmForeign    (emitPrimCall)
-
-import MkGraph
-import CoreSyn          ( AltCon(..), tickishIsCode )
-import BlockId
-import SMRep
-import Cmm
-import CmmInfo
-import CmmUtils
-import CLabel
-import StgSyn
-import CostCentre
-import Id
-import IdInfo
-import Name
-import Module
-import ListSetOps
-import Util
-import VarSet
-import BasicTypes
-import Outputable
-import FastString
-import DynFlags
-
-import Control.Monad
-
-------------------------------------------------------------------------
---              Top-level bindings
-------------------------------------------------------------------------
-
--- For closures bound at top level, allocate in static space.
--- They should have no free variables.
-
-cgTopRhsClosure :: DynFlags
-                -> RecFlag              -- member of a recursive group?
-                -> Id
-                -> CostCentreStack      -- Optional cost centre annotation
-                -> UpdateFlag
-                -> [Id]                 -- Args
-                -> CgStgExpr
-                -> (CgIdInfo, FCode ())
-
-cgTopRhsClosure dflags rec id ccs upd_flag args body =
-  let closure_label = mkLocalClosureLabel (idName id) (idCafInfo id)
-      cg_id_info    = litIdInfo dflags id lf_info (CmmLabel closure_label)
-      lf_info       = mkClosureLFInfo dflags id TopLevel [] upd_flag args
-  in (cg_id_info, gen_code dflags lf_info closure_label)
-  where
-  -- special case for a indirection (f = g).  We create an IND_STATIC
-  -- closure pointing directly to the indirectee.  This is exactly
-  -- what the CAF will eventually evaluate to anyway, we're just
-  -- shortcutting the whole process, and generating a lot less code
-  -- (#7308). Eventually the IND_STATIC closure will be eliminated
-  -- by assembly '.equiv' directives, where possible (#15155).
-  -- See note [emit-time elimination of static indirections] in CLabel.
-  --
-  -- Note: we omit the optimisation when this binding is part of a
-  -- recursive group, because the optimisation would inhibit the black
-  -- hole detection from working in that case.  Test
-  -- concurrent/should_run/4030 fails, for instance.
-  --
-  gen_code dflags _ closure_label
-    | StgApp f [] <- body, null args, isNonRec rec
-    = do
-         cg_info <- getCgIdInfo f
-         let closure_rep   = mkStaticClosureFields dflags
-                                    indStaticInfoTable ccs MayHaveCafRefs
-                                    [unLit (idInfoToAmode cg_info)]
-         emitDataLits closure_label closure_rep
-         return ()
-
-  gen_code dflags lf_info _closure_label
-   = do { let name = idName id
-        ; mod_name <- getModuleName
-        ; let descr         = closureDescription dflags mod_name name
-              closure_info  = mkClosureInfo dflags True id lf_info 0 0 descr
-
-        -- We don't generate the static closure here, because we might
-        -- want to add references to static closures to it later.  The
-        -- static closure is generated by CmmBuildInfoTables.updInfoSRTs,
-        -- See Note [SRTs], specifically the [FUN] optimisation.
-
-        ; let fv_details :: [(NonVoid Id, ByteOff)]
-              header = if isLFThunk lf_info then ThunkHeader else StdHeader
-              (_, _, fv_details) = mkVirtHeapOffsets dflags header []
-        -- Don't drop the non-void args until the closure info has been made
-        ; forkClosureBody (closureCodeBody True id closure_info ccs
-                                (nonVoidIds args) (length args) body fv_details)
-
-        ; return () }
-
-  unLit (CmmLit l) = l
-  unLit _ = panic "unLit"
-
-------------------------------------------------------------------------
---              Non-top-level bindings
-------------------------------------------------------------------------
-
-cgBind :: CgStgBinding -> FCode ()
-cgBind (StgNonRec name rhs)
-  = do  { (info, fcode) <- cgRhs name rhs
-        ; addBindC info
-        ; init <- fcode
-        ; emit init }
-        -- init cannot be used in body, so slightly better to sink it eagerly
-
-cgBind (StgRec pairs)
-  = do  {  r <- sequence $ unzipWith cgRhs pairs
-        ;  let (id_infos, fcodes) = unzip r
-        ;  addBindsC id_infos
-        ;  (inits, body) <- getCodeR $ sequence fcodes
-        ;  emit (catAGraphs inits <*> body) }
-
-{- Note [cgBind rec]
-
-   Recursive let-bindings are tricky.
-   Consider the following pseudocode:
-
-     let x = \_ ->  ... y ...
-         y = \_ ->  ... z ...
-         z = \_ ->  ... x ...
-     in ...
-
-   For each binding, we need to allocate a closure, and each closure must
-   capture the address of the other closures.
-   We want to generate the following C-- code:
-     // Initialization Code
-     x = hp - 24; // heap address of x's closure
-     y = hp - 40; // heap address of x's closure
-     z = hp - 64; // heap address of x's closure
-     // allocate and initialize x
-     m[hp-8]   = ...
-     m[hp-16]  = y       // the closure for x captures y
-     m[hp-24] = x_info;
-     // allocate and initialize y
-     m[hp-32] = z;       // the closure for y captures z
-     m[hp-40] = y_info;
-     // allocate and initialize z
-     ...
-
-   For each closure, we must generate not only the code to allocate and
-   initialize the closure itself, but also some initialization Code that
-   sets a variable holding the closure pointer.
-
-   We could generate a pair of the (init code, body code), but since
-   the bindings are recursive we also have to initialise the
-   environment with the CgIdInfo for all the bindings before compiling
-   anything.  So we do this in 3 stages:
-
-     1. collect all the CgIdInfos and initialise the environment
-     2. compile each binding into (init, body) code
-     3. emit all the inits, and then all the bodies
-
-   We'd rather not have separate functions to do steps 1 and 2 for
-   each binding, since in pratice they share a lot of code.  So we
-   have just one function, cgRhs, that returns a pair of the CgIdInfo
-   for step 1, and a monadic computation to generate the code in step
-   2.
-
-   The alternative to separating things in this way is to use a
-   fixpoint.  That's what we used to do, but it introduces a
-   maintenance nightmare because there is a subtle dependency on not
-   being too strict everywhere.  Doing things this way means that the
-   FCode monad can be strict, for example.
- -}
-
-cgRhs :: Id
-      -> CgStgRhs
-      -> FCode (
-                 CgIdInfo         -- The info for this binding
-               , FCode CmmAGraph  -- A computation which will generate the
-                                  -- code for the binding, and return an
-                                  -- assignent of the form "x = Hp - n"
-                                  -- (see above)
-               )
-
-cgRhs id (StgRhsCon cc con args)
-  = withNewTickyCounterCon (idName id) $
-    buildDynCon id True cc con (assertNonVoidStgArgs args)
-      -- con args are always non-void,
-      -- see Note [Post-unarisation invariants] in UnariseStg
-
-{- See Note [GC recovery] in compiler/codeGen/StgCmmClosure.hs -}
-cgRhs id (StgRhsClosure fvs cc upd_flag args body)
-  = do dflags <- getDynFlags
-       mkRhsClosure dflags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
-
-------------------------------------------------------------------------
---              Non-constructor right hand sides
-------------------------------------------------------------------------
-
-mkRhsClosure :: DynFlags -> Id -> CostCentreStack
-             -> [NonVoid Id]                    -- Free vars
-             -> UpdateFlag
-             -> [Id]                            -- Args
-             -> CgStgExpr
-             -> FCode (CgIdInfo, FCode CmmAGraph)
-
-{- mkRhsClosure looks for two special forms of the right-hand side:
-        a) selector thunks
-        b) AP thunks
-
-If neither happens, it just calls mkClosureLFInfo.  You might think
-that mkClosureLFInfo should do all this, but it seems wrong for the
-latter to look at the structure of an expression
-
-Note [Selectors]
-~~~~~~~~~~~~~~~~
-We look at the body of the closure to see if it's a selector---turgid,
-but nothing deep.  We are looking for a closure of {\em exactly} the
-form:
-
-...  = [the_fv] \ u [] ->
-         case the_fv of
-           con a_1 ... a_n -> a_i
-
-Note [Ap thunks]
-~~~~~~~~~~~~~~~~
-A more generic AP thunk of the form
-
-        x = [ x_1...x_n ] \.. [] -> x_1 ... x_n
-
-A set of these is compiled statically into the RTS, so we just use
-those.  We could extend the idea to thunks where some of the x_i are
-global ids (and hence not free variables), but this would entail
-generating a larger thunk.  It might be an option for non-optimising
-compilation, though.
-
-We only generate an Ap thunk if all the free variables are pointers,
-for semi-obvious reasons.
-
--}
-
----------- Note [Selectors] ------------------
-mkRhsClosure    dflags bndr _cc
-                [NonVoid the_fv]                -- Just one free var
-                upd_flag                -- Updatable thunk
-                []                      -- A thunk
-                expr
-  | let strip = stripStgTicksTopE (not . tickishIsCode)
-  , StgCase (StgApp scrutinee [{-no args-}])
-         _   -- ignore bndr
-         (AlgAlt _)
-         [(DataAlt _, params, sel_expr)] <- strip expr
-  , StgApp selectee [{-no args-}] <- strip sel_expr
-  , the_fv == scrutinee                -- Scrutinee is the only free variable
-
-  , let (_, _, params_w_offsets) = mkVirtConstrOffsets dflags (addIdReps (assertNonVoidIds params))
-                                   -- pattern binders are always non-void,
-                                   -- see Note [Post-unarisation invariants] in UnariseStg
-  , Just the_offset <- assocMaybe params_w_offsets (NonVoid selectee)
-
-  , let offset_into_int = bytesToWordsRoundUp dflags the_offset
-                          - fixedHdrSizeW dflags
-  , offset_into_int <= mAX_SPEC_SELECTEE_SIZE dflags -- Offset is small enough
-  = -- NOT TRUE: ASSERT(is_single_constructor)
-    -- The simplifier may have statically determined that the single alternative
-    -- is the only possible case and eliminated the others, even if there are
-    -- other constructors in the datatype.  It's still ok to make a selector
-    -- thunk in this case, because we *know* which constructor the scrutinee
-    -- will evaluate to.
-    --
-    -- srt is discarded; it must be empty
-    let lf_info = mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag)
-    in cgRhsStdThunk bndr lf_info [StgVarArg the_fv]
-
----------- Note [Ap thunks] ------------------
-mkRhsClosure    dflags bndr _cc
-                fvs
-                upd_flag
-                []                      -- No args; a thunk
-                (StgApp fun_id args)
-
-  -- We are looking for an "ApThunk"; see data con ApThunk in StgCmmClosure
-  -- of form (x1 x2 .... xn), where all the xi are locals (not top-level)
-  -- So the xi will all be free variables
-  | args `lengthIs` (n_fvs-1)  -- This happens only if the fun_id and
-                               -- args are all distinct local variables
-                               -- The "-1" is for fun_id
-    -- Missed opportunity:   (f x x) is not detected
-  , all (isGcPtrRep . idPrimRep . fromNonVoid) fvs
-  , isUpdatable upd_flag
-  , n_fvs <= mAX_SPEC_AP_SIZE dflags
-  , not (gopt Opt_SccProfilingOn dflags)
-                         -- not when profiling: we don't want to
-                         -- lose information about this particular
-                         -- thunk (e.g. its type) (#949)
-  , idArity fun_id == unknownArity -- don't spoil a known call
-
-          -- Ha! an Ap thunk
-  = cgRhsStdThunk bndr lf_info payload
-
-  where
-    n_fvs   = length fvs
-    lf_info = mkApLFInfo bndr upd_flag n_fvs
-    -- the payload has to be in the correct order, hence we can't
-    -- just use the fvs.
-    payload = StgVarArg fun_id : args
-
----------- Default case ------------------
-mkRhsClosure dflags bndr cc fvs upd_flag args body
-  = do  { let lf_info = mkClosureLFInfo dflags bndr NotTopLevel fvs upd_flag args
-        ; (id_info, reg) <- rhsIdInfo bndr lf_info
-        ; return (id_info, gen_code lf_info reg) }
- where
- gen_code lf_info reg
-  = do  {       -- LAY OUT THE OBJECT
-        -- If the binder is itself a free variable, then don't store
-        -- it in the closure.  Instead, just bind it to Node on entry.
-        -- NB we can be sure that Node will point to it, because we
-        -- haven't told mkClosureLFInfo about this; so if the binder
-        -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is*
-        -- stored in the closure itself, so it will make sure that
-        -- Node points to it...
-        ; let   reduced_fvs = filter (NonVoid bndr /=) fvs
-
-        -- MAKE CLOSURE INFO FOR THIS CLOSURE
-        ; mod_name <- getModuleName
-        ; dflags <- getDynFlags
-        ; let   name  = idName bndr
-                descr = closureDescription dflags mod_name name
-                fv_details :: [(NonVoid Id, ByteOff)]
-                header = if isLFThunk lf_info then ThunkHeader else StdHeader
-                (tot_wds, ptr_wds, fv_details)
-                   = mkVirtHeapOffsets dflags header (addIdReps reduced_fvs)
-                closure_info = mkClosureInfo dflags False       -- Not static
-                                             bndr lf_info tot_wds ptr_wds
-                                             descr
-
-        -- BUILD ITS INFO TABLE AND CODE
-        ; forkClosureBody $
-                -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere
-                --                  (b) ignore Sequel from context; use empty Sequel
-                -- And compile the body
-                closureCodeBody False bndr closure_info cc (nonVoidIds args)
-                                (length args) body fv_details
-
-        -- BUILD THE OBJECT
---      ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
-        ; let use_cc = cccsExpr; blame_cc = cccsExpr
-        ; emit (mkComment $ mkFastString "calling allocDynClosure")
-        ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off)
-        ; let info_tbl = mkCmmInfo closure_info bndr currentCCS
-        ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info use_cc blame_cc
-                                         (map toVarArg fv_details)
-
-        -- RETURN
-        ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
-
--------------------------
-cgRhsStdThunk
-        :: Id
-        -> LambdaFormInfo
-        -> [StgArg]             -- payload
-        -> FCode (CgIdInfo, FCode CmmAGraph)
-
-cgRhsStdThunk bndr lf_info payload
- = do  { (id_info, reg) <- rhsIdInfo bndr lf_info
-       ; return (id_info, gen_code reg)
-       }
- where
- gen_code reg  -- AHA!  A STANDARD-FORM THUNK
-  = withNewTickyCounterStdThunk (lfUpdatable lf_info) (idName bndr) $
-    do
-  {     -- LAY OUT THE OBJECT
-    mod_name <- getModuleName
-  ; dflags <- getDynFlags
-  ; let header = if isLFThunk lf_info then ThunkHeader else StdHeader
-        (tot_wds, ptr_wds, payload_w_offsets)
-            = mkVirtHeapOffsets dflags header
-                (addArgReps (nonVoidStgArgs payload))
-
-        descr = closureDescription dflags mod_name (idName bndr)
-        closure_info = mkClosureInfo dflags False       -- Not static
-                                     bndr lf_info tot_wds ptr_wds
-                                     descr
-
---  ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body
-  ; let use_cc = cccsExpr; blame_cc = cccsExpr
-
-
-        -- BUILD THE OBJECT
-  ; let info_tbl = mkCmmInfo closure_info bndr currentCCS
-  ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info
-                                   use_cc blame_cc payload_w_offsets
-
-        -- RETURN
-  ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
-
-
-mkClosureLFInfo :: DynFlags
-                -> Id           -- The binder
-                -> TopLevelFlag -- True of top level
-                -> [NonVoid Id] -- Free vars
-                -> UpdateFlag   -- Update flag
-                -> [Id]         -- Args
-                -> LambdaFormInfo
-mkClosureLFInfo dflags bndr top fvs upd_flag args
-  | null args =
-        mkLFThunk (idType bndr) top (map fromNonVoid fvs) upd_flag
-  | otherwise =
-        mkLFReEntrant top (map fromNonVoid fvs) args (mkArgDescr dflags args)
-
-
-------------------------------------------------------------------------
---              The code for closures
-------------------------------------------------------------------------
-
-closureCodeBody :: Bool            -- whether this is a top-level binding
-                -> Id              -- the closure's name
-                -> ClosureInfo     -- Lots of information about this closure
-                -> CostCentreStack -- Optional cost centre attached to closure
-                -> [NonVoid Id]    -- incoming args to the closure
-                -> Int             -- arity, including void args
-                -> CgStgExpr
-                -> [(NonVoid Id, ByteOff)] -- the closure's free vars
-                -> FCode ()
-
-{- There are two main cases for the code for closures.
-
-* If there are *no arguments*, then the closure is a thunk, and not in
-  normal form. So it should set up an update frame (if it is
-  shared). NB: Thunks cannot have a primitive type!
-
-* If there is *at least one* argument, then this closure is in
-  normal form, so there is no need to set up an update frame.
--}
-
-closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details
-  | arity == 0 -- No args i.e. thunk
-  = withNewTickyCounterThunk
-        (isStaticClosure cl_info)
-        (closureUpdReqd cl_info)
-        (closureName cl_info) $
-    emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
-      \(_, node, _) -> thunkCode cl_info fv_details cc node arity body
-   where
-     lf_info  = closureLFInfo cl_info
-     info_tbl = mkCmmInfo cl_info bndr cc
-
-closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
-  = -- Note: args may be [], if all args are Void
-    withNewTickyCounterFun
-        (closureSingleEntry cl_info)
-        (closureName cl_info)
-        args $ do {
-
-        ; let
-             lf_info  = closureLFInfo cl_info
-             info_tbl = mkCmmInfo cl_info bndr cc
-
-        -- Emit the main entry code
-        ; emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args $
-            \(_offset, node, arg_regs) -> do
-                -- Emit slow-entry code (for entering a closure through a PAP)
-                { mkSlowEntryCode bndr cl_info arg_regs
-                ; dflags <- getDynFlags
-                ; let node_points = nodeMustPointToIt dflags lf_info
-                      node' = if node_points then Just node else Nothing
-                ; loop_header_id <- newBlockId
-                -- Extend reader monad with information that
-                -- self-recursive tail calls can be optimized into local
-                -- jumps. See Note [Self-recursive tail calls] in StgCmmExpr.
-                ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do
-                {
-                -- Main payload
-                ; entryHeapCheck cl_info node' arity arg_regs $ do
-                { -- emit LDV code when profiling
-                  when node_points (ldvEnterClosure cl_info (CmmLocal node))
-                -- ticky after heap check to avoid double counting
-                ; tickyEnterFun cl_info
-                ; enterCostCentreFun cc
-                    (CmmMachOp (mo_wordSub dflags)
-                         [ CmmReg (CmmLocal node) -- See [NodeReg clobbered with loopification]
-                         , mkIntExpr dflags (funTag dflags cl_info) ])
-                ; fv_bindings <- mapM bind_fv fv_details
-                -- Load free vars out of closure *after*
-                -- heap check, to reduce live vars over check
-                ; when node_points $ load_fvs node lf_info fv_bindings
-                ; void $ cgExpr body
-                }}}
-
-  }
-
--- Note [NodeReg clobbered with loopification]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Previously we used to pass nodeReg (aka R1) here. With profiling, upon
--- entering a closure, enterFunCCS was called with R1 passed to it. But since R1
--- may get clobbered inside the body of a closure, and since a self-recursive
--- tail call does not restore R1, a subsequent call to enterFunCCS received a
--- possibly bogus value from R1. The solution is to not pass nodeReg (aka R1) to
--- enterFunCCS. Instead, we pass node, the callee-saved temporary that stores
--- the original value of R1. This way R1 may get modified but loopification will
--- not care.
-
--- A function closure pointer may be tagged, so we
--- must take it into account when accessing the free variables.
-bind_fv :: (NonVoid Id, ByteOff) -> FCode (LocalReg, ByteOff)
-bind_fv (id, off) = do { reg <- rebindToReg id; return (reg, off) }
-
-load_fvs :: LocalReg -> LambdaFormInfo -> [(LocalReg, ByteOff)] -> FCode ()
-load_fvs node lf_info = mapM_ (\ (reg, off) ->
-   do dflags <- getDynFlags
-      let tag = lfDynTag dflags lf_info
-      emit $ mkTaggedObjectLoad dflags reg node off tag)
-
------------------------------------------
--- The "slow entry" code for a function.  This entry point takes its
--- arguments on the stack.  It loads the arguments into registers
--- according to the calling convention, and jumps to the function's
--- normal entry point.  The function's closure is assumed to be in
--- R1/node.
---
--- The slow entry point is used for unknown calls: eg. stg_PAP_entry
-
-mkSlowEntryCode :: Id -> ClosureInfo -> [LocalReg] -> FCode ()
--- If this function doesn't have a specialised ArgDescr, we need
--- to generate the function's arg bitmap and slow-entry code.
--- Here, we emit the slow-entry code.
-mkSlowEntryCode bndr cl_info arg_regs -- function closure is already in `Node'
-  | Just (_, ArgGen _) <- closureFunInfo cl_info
-  = do dflags <- getDynFlags
-       let node = idToReg dflags (NonVoid bndr)
-           slow_lbl = closureSlowEntryLabel  cl_info
-           fast_lbl = closureLocalEntryLabel dflags cl_info
-           -- mkDirectJump does not clobber `Node' containing function closure
-           jump = mkJump dflags NativeNodeCall
-                                (mkLblExpr fast_lbl)
-                                (map (CmmReg . CmmLocal) (node : arg_regs))
-                                (initUpdFrameOff dflags)
-       tscope <- getTickScope
-       emitProcWithConvention Slow Nothing slow_lbl
-         (node : arg_regs) (jump, tscope)
-  | otherwise = return ()
-
------------------------------------------
-thunkCode :: ClosureInfo -> [(NonVoid Id, ByteOff)] -> CostCentreStack
-          -> LocalReg -> Int -> CgStgExpr -> FCode ()
-thunkCode cl_info fv_details _cc node arity body
-  = do { dflags <- getDynFlags
-       ; let node_points = nodeMustPointToIt dflags (closureLFInfo cl_info)
-             node'       = if node_points then Just node else Nothing
-        ; ldvEnterClosure cl_info (CmmLocal node) -- NB: Node always points when profiling
-
-        -- Heap overflow check
-        ; entryHeapCheck cl_info node' arity [] $ do
-        { -- Overwrite with black hole if necessary
-          -- but *after* the heap-overflow check
-        ; tickyEnterThunk cl_info
-        ; when (blackHoleOnEntry cl_info && node_points)
-                (blackHoleIt node)
-
-          -- Push update frame
-        ; setupUpdate cl_info node $
-            -- We only enter cc after setting up update so
-            -- that cc of enclosing scope will be recorded
-            -- in update frame CAF/DICT functions will be
-            -- subsumed by this enclosing cc
-            do { enterCostCentreThunk (CmmReg nodeReg)
-               ; let lf_info = closureLFInfo cl_info
-               ; fv_bindings <- mapM bind_fv fv_details
-               ; load_fvs node lf_info fv_bindings
-               ; void $ cgExpr body }}}
-
-
-------------------------------------------------------------------------
---              Update and black-hole wrappers
-------------------------------------------------------------------------
-
-blackHoleIt :: LocalReg -> FCode ()
--- Only called for closures with no args
--- Node points to the closure
-blackHoleIt node_reg
-  = emitBlackHoleCode (CmmReg (CmmLocal node_reg))
-
-emitBlackHoleCode :: CmmExpr -> FCode ()
-emitBlackHoleCode node = do
-  dflags <- getDynFlags
-
-  -- Eager blackholing is normally disabled, but can be turned on with
-  -- -feager-blackholing.  When it is on, we replace the info pointer
-  -- of the thunk with stg_EAGER_BLACKHOLE_info on entry.
-
-  -- If we wanted to do eager blackholing with slop filling, we'd need
-  -- to do it at the *end* of a basic block, otherwise we overwrite
-  -- the free variables in the thunk that we still need.  We have a
-  -- patch for this from Andy Cheadle, but not incorporated yet. --SDM
-  -- [6/2004]
-  --
-  -- Previously, eager blackholing was enabled when ticky-ticky was
-  -- on. But it didn't work, and it wasn't strictly necessary to bring
-  -- back minimal ticky-ticky, so now EAGER_BLACKHOLING is
-  -- unconditionally disabled. -- krc 1/2007
-
-  -- Note the eager-blackholing check is here rather than in blackHoleOnEntry,
-  -- because emitBlackHoleCode is called from CmmParse.
-
-  let  eager_blackholing =  not (gopt Opt_SccProfilingOn dflags)
-                         && gopt Opt_EagerBlackHoling dflags
-             -- Profiling needs slop filling (to support LDV
-             -- profiling), so currently eager blackholing doesn't
-             -- work with profiling.
-
-  when eager_blackholing $ do
-    emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr
-    -- See Note [Heap memory barriers] in SMP.h.
-    emitPrimCall [] MO_WriteBarrier []
-    emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo))
-
-setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()
-        -- Nota Bene: this function does not change Node (even if it's a CAF),
-        -- so that the cost centre in the original closure can still be
-        -- extracted by a subsequent enterCostCentre
-setupUpdate closure_info node body
-  | not (lfUpdatable (closureLFInfo closure_info))
-  = body
-
-  | not (isStaticClosure closure_info)
-  = if not (closureUpdReqd closure_info)
-      then do tickyUpdateFrameOmitted; body
-      else do
-          tickyPushUpdateFrame
-          dflags <- getDynFlags
-          let
-              bh = blackHoleOnEntry closure_info &&
-                   not (gopt Opt_SccProfilingOn dflags) &&
-                   gopt Opt_EagerBlackHoling dflags
-
-              lbl | bh        = mkBHUpdInfoLabel
-                  | otherwise = mkUpdInfoLabel
-
-          pushUpdateFrame lbl (CmmReg (CmmLocal node)) body
-
-  | otherwise   -- A static closure
-  = do  { tickyUpdateBhCaf closure_info
-
-        ; if closureUpdReqd closure_info
-          then do       -- Blackhole the (updatable) CAF:
-                { upd_closure <- link_caf node
-                ; pushUpdateFrame mkBHUpdInfoLabel upd_closure body }
-          else do {tickyUpdateFrameOmitted; body}
-    }
-
------------------------------------------------------------------------------
--- Setting up update frames
-
--- Push the update frame on the stack in the Entry area,
--- leaving room for the return address that is already
--- at the old end of the area.
---
-pushUpdateFrame :: CLabel -> CmmExpr -> FCode () -> FCode ()
-pushUpdateFrame lbl updatee body
-  = do
-       updfr  <- getUpdFrameOff
-       dflags <- getDynFlags
-       let
-           hdr         = fixedHdrSize dflags
-           frame       = updfr + hdr + sIZEOF_StgUpdateFrame_NoHdr dflags
-       --
-       emitUpdateFrame dflags (CmmStackSlot Old frame) lbl updatee
-       withUpdFrameOff frame body
-
-emitUpdateFrame :: DynFlags -> CmmExpr -> CLabel -> CmmExpr -> FCode ()
-emitUpdateFrame dflags frame lbl updatee = do
-  let
-           hdr         = fixedHdrSize dflags
-           off_updatee = hdr + oFFSET_StgUpdateFrame_updatee dflags
-  --
-  emitStore frame (mkLblExpr lbl)
-  emitStore (cmmOffset dflags frame off_updatee) updatee
-  initUpdFrameProf frame
-
------------------------------------------------------------------------------
--- Entering a CAF
---
--- See Note [CAF management] in rts/sm/Storage.c
-
-link_caf :: LocalReg           -- pointer to the closure
-         -> FCode CmmExpr      -- Returns amode for closure to be updated
--- This function returns the address of the black hole, so it can be
--- updated with the new value when available.
-link_caf node = do
-  { dflags <- getDynFlags
-        -- Call the RTS function newCAF, returning the newly-allocated
-        -- blackhole indirection closure
-  ; let newCAF_lbl = mkForeignLabel (fsLit "newCAF") Nothing
-                                    ForeignLabelInExternalPackage IsFunction
-  ; bh <- newTemp (bWord dflags)
-  ; emitRtsCallGen [(bh,AddrHint)] newCAF_lbl
-      [ (baseExpr,  AddrHint),
-        (CmmReg (CmmLocal node), AddrHint) ]
-      False
-
-  -- see Note [atomic CAF entry] in rts/sm/Storage.c
-  ; updfr  <- getUpdFrameOff
-  ; let target = entryCode dflags (closureInfoPtr dflags (CmmReg (CmmLocal node)))
-  ; emit =<< mkCmmIfThen
-      (cmmEqWord dflags (CmmReg (CmmLocal bh)) (zeroExpr dflags))
-        -- re-enter the CAF
-       (mkJump dflags NativeNodeCall target [] updfr)
-
-  ; return (CmmReg (CmmLocal bh)) }
-
-------------------------------------------------------------------------
---              Profiling
-------------------------------------------------------------------------
-
--- For "global" data constructors the description is simply occurrence
--- name of the data constructor itself.  Otherwise it is determined by
--- @closureDescription@ from the let binding information.
-
-closureDescription :: DynFlags
-           -> Module            -- Module
-                   -> Name              -- Id of closure binding
-                   -> String
-        -- Not called for StgRhsCon which have global info tables built in
-        -- CgConTbls.hs with a description generated from the data constructor
-closureDescription dflags mod_name name
-  = showSDocDump dflags (char '<' <>
-                    (if isExternalName name
-                      then ppr name -- ppr will include the module name prefix
-                      else pprModule mod_name <> char '.' <> ppr name) <>
-                    char '>')
-   -- showSDocDump, because we want to see the unique on the Name.
diff --git a/compiler/codeGen/StgCmmBind.hs-boot b/compiler/codeGen/StgCmmBind.hs-boot
deleted file mode 100644
index 8e3dd38ad8..0000000000
--- a/compiler/codeGen/StgCmmBind.hs-boot
+++ /dev/null
@@ -1,6 +0,0 @@
-module StgCmmBind where
-
-import StgCmmMonad( FCode )
-import StgSyn( CgStgBinding )
-
-cgBind :: CgStgBinding -> FCode ()
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
deleted file mode 100644
index ac8db1268f..0000000000
--- a/compiler/codeGen/StgCmmClosure.hs
+++ /dev/null
@@ -1,1008 +0,0 @@
-{-# LANGUAGE CPP, RecordWildCards #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation:
---
--- The types   LambdaFormInfo
---             ClosureInfo
---
--- Nothing monadic in here!
---
------------------------------------------------------------------------------
-
-module StgCmmClosure (
-        DynTag,  tagForCon, isSmallFamily,
-
-        idPrimRep, isVoidRep, isGcPtrRep, addIdReps, addArgReps,
-        argPrimRep,
-
-        NonVoid(..), fromNonVoid, nonVoidIds, nonVoidStgArgs,
-        assertNonVoidIds, assertNonVoidStgArgs,
-
-        -- * LambdaFormInfo
-        LambdaFormInfo,         -- Abstract
-        StandardFormInfo,        -- ...ditto...
-        mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
-        mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
-        mkLFStringLit,
-        lfDynTag,
-        isLFThunk, isLFReEntrant, lfUpdatable,
-
-        -- * Used by other modules
-        CgLoc(..), SelfLoopInfo, CallMethod(..),
-        nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod,
-
-        -- * ClosureInfo
-        ClosureInfo,
-        mkClosureInfo,
-        mkCmmInfo,
-
-        -- ** Inspection
-        closureLFInfo, closureName,
-
-        -- ** Labels
-        -- These just need the info table label
-        closureInfoLabel, staticClosureLabel,
-        closureSlowEntryLabel, closureLocalEntryLabel,
-
-        -- ** Predicates
-        -- These are really just functions on LambdaFormInfo
-        closureUpdReqd, closureSingleEntry,
-        closureReEntrant, closureFunInfo,
-        isToplevClosure,
-
-        blackHoleOnEntry,  -- Needs LambdaFormInfo and SMRep
-        isStaticClosure,   -- Needs SMPre
-
-        -- * InfoTables
-        mkDataConInfoTable,
-        cafBlackHoleInfoTable,
-        indStaticInfoTable,
-        staticClosureNeedsLink,
-    ) where
-
-#include "../includes/MachDeps.h"
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import StgSyn
-import SMRep
-import Cmm
-import PprCmmExpr() -- For Outputable instances
-
-import CostCentre
-import BlockId
-import CLabel
-import Id
-import IdInfo
-import DataCon
-import Name
-import Type
-import TyCoRep
-import TcType
-import TyCon
-import RepType
-import BasicTypes
-import Outputable
-import DynFlags
-import Util
-
-import Data.Coerce (coerce)
-import qualified Data.ByteString.Char8 as BS8
-
------------------------------------------------------------------------------
---                Data types and synonyms
------------------------------------------------------------------------------
-
--- These data types are mostly used by other modules, especially StgCmmMonad,
--- but we define them here because some functions in this module need to
--- have access to them as well
-
-data CgLoc
-  = CmmLoc CmmExpr      -- A stable CmmExpr; that is, one not mentioning
-                        -- Hp, so that it remains valid across calls
-
-  | LneLoc BlockId [LocalReg]             -- A join point
-        -- A join point (= let-no-escape) should only
-        -- be tail-called, and in a saturated way.
-        -- To tail-call it, assign to these locals,
-        -- and branch to the block id
-
-instance Outputable CgLoc where
-  ppr (CmmLoc e)    = text "cmm" <+> ppr e
-  ppr (LneLoc b rs) = text "lne" <+> ppr b <+> ppr rs
-
-type SelfLoopInfo = (Id, BlockId, [LocalReg])
-
--- used by ticky profiling
-isKnownFun :: LambdaFormInfo -> Bool
-isKnownFun LFReEntrant{} = True
-isKnownFun LFLetNoEscape = True
-isKnownFun _             = False
-
-
--------------------------------------
---        Non-void types
--------------------------------------
--- We frequently need the invariant that an Id or a an argument
--- is of a non-void type. This type is a witness to the invariant.
-
-newtype NonVoid a = NonVoid a
-  deriving (Eq, Show)
-
-fromNonVoid :: NonVoid a -> a
-fromNonVoid (NonVoid a) = a
-
-instance (Outputable a) => Outputable (NonVoid a) where
-  ppr (NonVoid a) = ppr a
-
-nonVoidIds :: [Id] -> [NonVoid Id]
-nonVoidIds ids = [NonVoid id | id <- ids, not (isVoidTy (idType id))]
-
--- | Used in places where some invariant ensures that all these Ids are
--- non-void; e.g. constructor field binders in case expressions.
--- See Note [Post-unarisation invariants] in UnariseStg.
-assertNonVoidIds :: [Id] -> [NonVoid Id]
-assertNonVoidIds ids = ASSERT(not (any (isVoidTy . idType) ids))
-                       coerce ids
-
-nonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
-nonVoidStgArgs args = [NonVoid arg | arg <- args, not (isVoidTy (stgArgType arg))]
-
--- | Used in places where some invariant ensures that all these arguments are
--- non-void; e.g. constructor arguments.
--- See Note [Post-unarisation invariants] in UnariseStg.
-assertNonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
-assertNonVoidStgArgs args = ASSERT(not (any (isVoidTy . stgArgType) args))
-                            coerce args
-
-
------------------------------------------------------------------------------
---                Representations
------------------------------------------------------------------------------
-
--- Why are these here?
-
--- | Assumes that there is precisely one 'PrimRep' of the type. This assumption
--- holds after unarise.
--- See Note [Post-unarisation invariants]
-idPrimRep :: Id -> PrimRep
-idPrimRep id = typePrimRep1 (idType id)
-    -- See also Note [VoidRep] in RepType
-
--- | Assumes that Ids have one PrimRep, which holds after unarisation.
--- See Note [Post-unarisation invariants]
-addIdReps :: [NonVoid Id] -> [NonVoid (PrimRep, Id)]
-addIdReps = map (\id -> let id' = fromNonVoid id
-                         in NonVoid (idPrimRep id', id'))
-
--- | Assumes that arguments have one PrimRep, which holds after unarisation.
--- See Note [Post-unarisation invariants]
-addArgReps :: [NonVoid StgArg] -> [NonVoid (PrimRep, StgArg)]
-addArgReps = map (\arg -> let arg' = fromNonVoid arg
-                           in NonVoid (argPrimRep arg', arg'))
-
--- | Assumes that the argument has one PrimRep, which holds after unarisation.
--- See Note [Post-unarisation invariants]
-argPrimRep :: StgArg -> PrimRep
-argPrimRep arg = typePrimRep1 (stgArgType arg)
-
-
------------------------------------------------------------------------------
---                LambdaFormInfo
------------------------------------------------------------------------------
-
--- Information about an identifier, from the code generator's point of
--- view.  Every identifier is bound to a LambdaFormInfo in the
--- environment, which gives the code generator enough info to be able to
--- tail call or return that identifier.
-
-data LambdaFormInfo
-  = LFReEntrant         -- Reentrant closure (a function)
-        TopLevelFlag    -- True if top level
-        OneShotInfo
-        !RepArity       -- Arity. Invariant: always > 0
-        !Bool           -- True <=> no fvs
-        ArgDescr        -- Argument descriptor (should really be in ClosureInfo)
-
-  | LFThunk             -- Thunk (zero arity)
-        TopLevelFlag
-        !Bool           -- True <=> no free vars
-        !Bool           -- True <=> updatable (i.e., *not* single-entry)
-        StandardFormInfo
-        !Bool           -- True <=> *might* be a function type
-
-  | LFCon               -- A saturated constructor application
-        DataCon         -- The constructor
-
-  | LFUnknown           -- Used for function arguments and imported things.
-                        -- We know nothing about this closure.
-                        -- Treat like updatable "LFThunk"...
-                        -- Imported things which we *do* know something about use
-                        -- one of the other LF constructors (eg LFReEntrant for
-                        -- known functions)
-        !Bool           -- True <=> *might* be a function type
-                        --      The False case is good when we want to enter it,
-                        --        because then we know the entry code will do
-                        --        For a function, the entry code is the fast entry point
-
-  | LFUnlifted          -- A value of unboxed type;
-                        -- always a value, needs evaluation
-
-  | LFLetNoEscape       -- See LetNoEscape module for precise description
-
-
--------------------------
--- StandardFormInfo tells whether this thunk has one of
--- a small number of standard forms
-
-data StandardFormInfo
-  = NonStandardThunk
-        -- The usual case: not of the standard forms
-
-  | SelectorThunk
-        -- A SelectorThunk is of form
-        --      case x of
-        --           con a1,..,an -> ak
-        -- and the constructor is from a single-constr type.
-       WordOff          -- 0-origin offset of ak within the "goods" of
-                        -- constructor (Recall that the a1,...,an may be laid
-                        -- out in the heap in a non-obvious order.)
-
-  | ApThunk
-        -- An ApThunk is of form
-        --        x1 ... xn
-        -- The code for the thunk just pushes x2..xn on the stack and enters x1.
-        -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled
-        -- in the RTS to save space.
-        RepArity                -- Arity, n
-
-
-------------------------------------------------------
---                Building LambdaFormInfo
-------------------------------------------------------
-
-mkLFArgument :: Id -> LambdaFormInfo
-mkLFArgument id
-  | isUnliftedType ty      = LFUnlifted
-  | might_be_a_function ty = LFUnknown True
-  | otherwise              = LFUnknown False
-  where
-    ty = idType id
-
--------------
-mkLFLetNoEscape :: LambdaFormInfo
-mkLFLetNoEscape = LFLetNoEscape
-
--------------
-mkLFReEntrant :: TopLevelFlag    -- True of top level
-              -> [Id]            -- Free vars
-              -> [Id]            -- Args
-              -> ArgDescr        -- Argument descriptor
-              -> LambdaFormInfo
-
-mkLFReEntrant _ _ [] _
-  = pprPanic "mkLFReEntrant" empty
-mkLFReEntrant top fvs args arg_descr
-  = LFReEntrant top os_info (length args) (null fvs) arg_descr
-  where os_info = idOneShotInfo (head args)
-
--------------
-mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo
-mkLFThunk thunk_ty top fvs upd_flag
-  = ASSERT( not (isUpdatable upd_flag) || not (isUnliftedType thunk_ty) )
-    LFThunk top (null fvs)
-            (isUpdatable upd_flag)
-            NonStandardThunk
-            (might_be_a_function thunk_ty)
-
---------------
-might_be_a_function :: Type -> Bool
--- Return False only if we are *sure* it's a data type
--- Look through newtypes etc as much as poss
-might_be_a_function ty
-  | [LiftedRep] <- typePrimRep ty
-  , Just tc <- tyConAppTyCon_maybe (unwrapType ty)
-  , isDataTyCon tc
-  = False
-  | otherwise
-  = True
-
--------------
-mkConLFInfo :: DataCon -> LambdaFormInfo
-mkConLFInfo con = LFCon con
-
--------------
-mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo
-mkSelectorLFInfo id offset updatable
-  = LFThunk NotTopLevel False updatable (SelectorThunk offset)
-        (might_be_a_function (idType id))
-
--------------
-mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo
-mkApLFInfo id upd_flag arity
-  = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity)
-        (might_be_a_function (idType id))
-
--------------
-mkLFImported :: Id -> LambdaFormInfo
-mkLFImported id
-  | Just con <- isDataConWorkId_maybe id
-  , isNullaryRepDataCon con
-  = LFCon con   -- An imported nullary constructor
-                -- We assume that the constructor is evaluated so that
-                -- the id really does point directly to the constructor
-
-  | arity > 0
-  = LFReEntrant TopLevel noOneShotInfo arity True (panic "arg_descr")
-
-  | otherwise
-  = mkLFArgument id -- Not sure of exact arity
-  where
-    arity = idFunRepArity id
-
--------------
-mkLFStringLit :: LambdaFormInfo
-mkLFStringLit = LFUnlifted
-
------------------------------------------------------
---                Dynamic pointer tagging
------------------------------------------------------
-
-type DynTag = Int       -- The tag on a *pointer*
-                        -- (from the dynamic-tagging paper)
-
--- Note [Data constructor dynamic tags]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- The family size of a data type (the number of constructors
--- or the arity of a function) can be either:
---    * small, if the family size < 2**tag_bits
---    * big, otherwise.
---
--- Small families can have the constructor tag in the tag bits.
--- Big families only use the tag value 1 to represent evaluatedness.
--- We don't have very many tag bits: for example, we have 2 bits on
--- x86-32 and 3 bits on x86-64.
-
-isSmallFamily :: DynFlags -> Int -> Bool
-isSmallFamily dflags fam_size = fam_size <= mAX_PTR_TAG dflags
-
-tagForCon :: DynFlags -> DataCon -> DynTag
-tagForCon dflags con
-  | isSmallFamily dflags fam_size = con_tag
-  | otherwise                     = 1
-  where
-    con_tag  = dataConTag con -- NB: 1-indexed
-    fam_size = tyConFamilySize (dataConTyCon con)
-
-tagForArity :: DynFlags -> RepArity -> DynTag
-tagForArity dflags arity
- | isSmallFamily dflags arity = arity
- | otherwise                  = 0
-
-lfDynTag :: DynFlags -> LambdaFormInfo -> DynTag
--- Return the tag in the low order bits of a variable bound
--- to this LambdaForm
-lfDynTag dflags (LFCon con)                 = tagForCon dflags con
-lfDynTag dflags (LFReEntrant _ _ arity _ _) = tagForArity dflags arity
-lfDynTag _      _other                      = 0
-
-
------------------------------------------------------------------------------
---                Observing LambdaFormInfo
------------------------------------------------------------------------------
-
-------------
-isLFThunk :: LambdaFormInfo -> Bool
-isLFThunk (LFThunk {})  = True
-isLFThunk _ = False
-
-isLFReEntrant :: LambdaFormInfo -> Bool
-isLFReEntrant (LFReEntrant {}) = True
-isLFReEntrant _                = False
-
------------------------------------------------------------------------------
---                Choosing SM reps
------------------------------------------------------------------------------
-
-lfClosureType :: LambdaFormInfo -> ClosureTypeInfo
-lfClosureType (LFReEntrant _ _ arity _ argd) = Fun arity argd
-lfClosureType (LFCon con)                    = Constr (dataConTagZ con)
-                                                      (dataConIdentity con)
-lfClosureType (LFThunk _ _ _ is_sel _)       = thunkClosureType is_sel
-lfClosureType _                              = panic "lfClosureType"
-
-thunkClosureType :: StandardFormInfo -> ClosureTypeInfo
-thunkClosureType (SelectorThunk off) = ThunkSelector off
-thunkClosureType _                   = Thunk
-
--- We *do* get non-updatable top-level thunks sometimes.  eg. f = g
--- gets compiled to a jump to g (if g has non-zero arity), instead of
--- messing around with update frames and PAPs.  We set the closure type
--- to FUN_STATIC in this case.
-
------------------------------------------------------------------------------
---                nodeMustPointToIt
------------------------------------------------------------------------------
-
-nodeMustPointToIt :: DynFlags -> LambdaFormInfo -> Bool
--- If nodeMustPointToIt is true, then the entry convention for
--- this closure has R1 (the "Node" register) pointing to the
--- closure itself --- the "self" argument
-
-nodeMustPointToIt _ (LFReEntrant top _ _ no_fvs _)
-  =  not no_fvs          -- Certainly if it has fvs we need to point to it
-  || isNotTopLevel top   -- See Note [GC recovery]
-        -- For lex_profiling we also access the cost centre for a
-        -- non-inherited (i.e. non-top-level) function.
-        -- The isNotTopLevel test above ensures this is ok.
-
-nodeMustPointToIt dflags (LFThunk top no_fvs updatable NonStandardThunk _)
-  =  not no_fvs            -- Self parameter
-  || isNotTopLevel top     -- Note [GC recovery]
-  || updatable             -- Need to push update frame
-  || gopt Opt_SccProfilingOn dflags
-          -- For the non-updatable (single-entry case):
-          --
-          -- True if has fvs (in which case we need access to them, and we
-          --                    should black-hole it)
-          -- or profiling (in which case we need to recover the cost centre
-          --                 from inside it)  ToDo: do we need this even for
-          --                                    top-level thunks? If not,
-          --                                    isNotTopLevel subsumes this
-
-nodeMustPointToIt _ (LFThunk {})        -- Node must point to a standard-form thunk
-  = True
-
-nodeMustPointToIt _ (LFCon _) = True
-
-        -- Strictly speaking, the above two don't need Node to point
-        -- to it if the arity = 0.  But this is a *really* unlikely
-        -- situation.  If we know it's nil (say) and we are entering
-        -- it. Eg: let x = [] in x then we will certainly have inlined
-        -- x, since nil is a simple atom.  So we gain little by not
-        -- having Node point to known zero-arity things.  On the other
-        -- hand, we do lose something; Patrick's code for figuring out
-        -- when something has been updated but not entered relies on
-        -- having Node point to the result of an update.  SLPJ
-        -- 27/11/92.
-
-nodeMustPointToIt _ (LFUnknown _)   = True
-nodeMustPointToIt _ LFUnlifted      = False
-nodeMustPointToIt _ LFLetNoEscape   = False
-
-{- Note [GC recovery]
-~~~~~~~~~~~~~~~~~~~~~
-If we a have a local let-binding (function or thunk)
-   let f = <body> in ...
-AND <body> allocates, then the heap-overflow check needs to know how
-to re-start the evaluation.  It uses the "self" pointer to do this.
-So even if there are no free variables in <body>, we still make
-nodeMustPointToIt be True for non-top-level bindings.
-
-Why do any such bindings exist?  After all, let-floating should have
-floated them out.  Well, a clever optimiser might leave one there to
-avoid a space leak, deliberately recomputing a thunk.  Also (and this
-really does happen occasionally) let-floating may make a function f smaller
-so it can be inlined, so now (f True) may generate a local no-fv closure.
-This actually happened during bootstrapping GHC itself, with f=mkRdrFunBind
-in TcGenDeriv.) -}
-
------------------------------------------------------------------------------
---                getCallMethod
------------------------------------------------------------------------------
-
-{- The entry conventions depend on the type of closure being entered,
-whether or not it has free variables, and whether we're running
-sequentially or in parallel.
-
-Closure                           Node   Argument   Enter
-Characteristics              Par   Req'd  Passing    Via
----------------------------------------------------------------------------
-Unknown                     & no  & yes & stack     & node
-Known fun (>1 arg), no fvs  & no  & no  & registers & fast entry (enough args)
-                                                    & slow entry (otherwise)
-Known fun (>1 arg), fvs     & no  & yes & registers & fast entry (enough args)
-0 arg, no fvs \r,\s         & no  & no  & n/a       & direct entry
-0 arg, no fvs \u            & no  & yes & n/a       & node
-0 arg, fvs \r,\s,selector   & no  & yes & n/a       & node
-0 arg, fvs \r,\s            & no  & yes & n/a       & direct entry
-0 arg, fvs \u               & no  & yes & n/a       & node
-Unknown                     & yes & yes & stack     & node
-Known fun (>1 arg), no fvs  & yes & no  & registers & fast entry (enough args)
-                                                    & slow entry (otherwise)
-Known fun (>1 arg), fvs     & yes & yes & registers & node
-0 arg, fvs \r,\s,selector   & yes & yes & n/a       & node
-0 arg, no fvs \r,\s         & yes & no  & n/a       & direct entry
-0 arg, no fvs \u            & yes & yes & n/a       & node
-0 arg, fvs \r,\s            & yes & yes & n/a       & node
-0 arg, fvs \u               & yes & yes & n/a       & node
-
-When black-holing, single-entry closures could also be entered via node
-(rather than directly) to catch double-entry. -}
-
-data CallMethod
-  = EnterIt             -- No args, not a function
-
-  | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
-
-  | ReturnIt            -- It's a value (function, unboxed value,
-                        -- or constructor), so just return it.
-
-  | SlowCall                -- Unknown fun, or known fun with
-                        -- too few args.
-
-  | DirectEntry         -- Jump directly, with args in regs
-        CLabel          --   The code label
-        RepArity        --   Its arity
-
-getCallMethod :: DynFlags
-              -> Name           -- Function being applied
-              -> Id             -- Function Id used to chech if it can refer to
-                                -- CAF's and whether the function is tail-calling
-                                -- itself
-              -> LambdaFormInfo -- Its info
-              -> RepArity       -- Number of available arguments
-              -> RepArity       -- Number of them being void arguments
-              -> CgLoc          -- Passed in from cgIdApp so that we can
-                                -- handle let-no-escape bindings and self-recursive
-                                -- tail calls using the same data constructor,
-                                -- JumpToIt. This saves us one case branch in
-                                -- cgIdApp
-              -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call?
-              -> CallMethod
-
-getCallMethod dflags _ id _ n_args v_args _cg_loc
-              (Just (self_loop_id, block_id, args))
-  | gopt Opt_Loopification dflags
-  , id == self_loop_id
-  , args `lengthIs` (n_args - v_args)
-  -- If these patterns match then we know that:
-  --   * loopification optimisation is turned on
-  --   * function is performing a self-recursive call in a tail position
-  --   * number of non-void parameters of the function matches functions arity.
-  -- See Note [Self-recursive tail calls] and Note [Void arguments in
-  -- self-recursive tail calls] in StgCmmExpr for more details
-  = JumpToIt block_id args
-
-getCallMethod dflags name id (LFReEntrant _ _ arity _ _) n_args _v_args _cg_loc
-              _self_loop_info
-  | n_args == 0 -- No args at all
-  && not (gopt Opt_SccProfilingOn dflags)
-     -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
-  = ASSERT( arity /= 0 ) ReturnIt
-  | n_args < arity = SlowCall        -- Not enough args
-  | otherwise      = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity
-
-getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info
-  = ASSERT( n_args == 0 ) ReturnIt
-
-getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
-  = ASSERT( n_args == 0 ) ReturnIt
-    -- n_args=0 because it'd be ill-typed to apply a saturated
-    --          constructor application to anything
-
-getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun)
-              n_args _v_args _cg_loc _self_loop_info
-  | is_fun      -- it *might* be a function, so we must "call" it (which is always safe)
-  = SlowCall    -- We cannot just enter it [in eval/apply, the entry code
-                -- is the fast-entry code]
-
-  -- Since is_fun is False, we are *definitely* looking at a data value
-  | updatable || gopt Opt_Ticky dflags -- to catch double entry
-      {- OLD: || opt_SMP
-         I decided to remove this, because in SMP mode it doesn't matter
-         if we enter the same thunk multiple times, so the optimisation
-         of jumping directly to the entry code is still valid.  --SDM
-        -}
-  = EnterIt
-
-  -- even a non-updatable selector thunk can be updated by the garbage
-  -- collector, so we must enter it. (#8817)
-  | SelectorThunk{} <- std_form_info
-  = EnterIt
-
-    -- We used to have ASSERT( n_args == 0 ), but actually it is
-    -- possible for the optimiser to generate
-    --   let bot :: Int = error Int "urk"
-    --   in (bot `cast` unsafeCoerce Int (Int -> Int)) 3
-    -- This happens as a result of the case-of-error transformation
-    -- So the right thing to do is just to enter the thing
-
-  | otherwise        -- Jump direct to code for single-entry thunks
-  = ASSERT( n_args == 0 )
-    DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info
-                updatable) 0
-
-getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info
-  = SlowCall -- might be a function
-
-getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
-  = ASSERT2( n_args == 0, ppr name <+> ppr n_args )
-    EnterIt -- Not a function
-
-getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs)
-              _self_loop_info
-  = JumpToIt blk_id lne_regs
-
-getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method"
-
------------------------------------------------------------------------------
---              Data types for closure information
------------------------------------------------------------------------------
-
-
-{- ClosureInfo: information about a binding
-
-   We make a ClosureInfo for each let binding (both top level and not),
-   but not bindings for data constructors: for those we build a CmmInfoTable
-   directly (see mkDataConInfoTable).
-
-   To a first approximation:
-       ClosureInfo = (LambdaFormInfo, CmmInfoTable)
-
-   A ClosureInfo has enough information
-     a) to construct the info table itself, and build other things
-        related to the binding (e.g. slow entry points for a function)
-     b) to allocate a closure containing that info pointer (i.e.
-           it knows the info table label)
--}
-
-data ClosureInfo
-  = ClosureInfo {
-        closureName :: !Name,           -- The thing bound to this closure
-           -- we don't really need this field: it's only used in generating
-           -- code for ticky and profiling, and we could pass the information
-           -- around separately, but it doesn't do much harm to keep it here.
-
-        closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon
-          -- this tells us about what the closure contains: it's right-hand-side.
-
-          -- the rest is just an unpacked CmmInfoTable.
-        closureInfoLabel :: !CLabel,
-        closureSMRep     :: !SMRep,          -- representation used by storage mgr
-        closureProf      :: !ProfilingInfo
-    }
-
--- | Convert from 'ClosureInfo' to 'CmmInfoTable'.
-mkCmmInfo :: ClosureInfo -> Id -> CostCentreStack -> CmmInfoTable
-mkCmmInfo ClosureInfo {..} id ccs
-  = CmmInfoTable { cit_lbl  = closureInfoLabel
-                 , cit_rep  = closureSMRep
-                 , cit_prof = closureProf
-                 , cit_srt  = Nothing
-                 , cit_clo  = if isStaticRep closureSMRep
-                                then Just (id,ccs)
-                                else Nothing }
-
---------------------------------------
---        Building ClosureInfos
---------------------------------------
-
-mkClosureInfo :: DynFlags
-              -> Bool                -- Is static
-              -> Id
-              -> LambdaFormInfo
-              -> Int -> Int        -- Total and pointer words
-              -> String         -- String descriptor
-              -> ClosureInfo
-mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr
-  = ClosureInfo { closureName      = name
-                , closureLFInfo    = lf_info
-                , closureInfoLabel = info_lbl   -- These three fields are
-                , closureSMRep     = sm_rep     -- (almost) an info table
-                , closureProf      = prof }     -- (we don't have an SRT yet)
-  where
-    name       = idName id
-    sm_rep     = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info)
-    prof       = mkProfilingInfo dflags id val_descr
-    nonptr_wds = tot_wds - ptr_wds
-
-    info_lbl = mkClosureInfoTableLabel id lf_info
-
---------------------------------------
---   Other functions over ClosureInfo
---------------------------------------
-
--- Eager blackholing is normally disabled, but can be turned on with
--- -feager-blackholing.  When it is on, we replace the info pointer of
--- the thunk with stg_EAGER_BLACKHOLE_info on entry.
-
--- If we wanted to do eager blackholing with slop filling,
--- we'd need to do it at the *end* of a basic block, otherwise
--- we overwrite the free variables in the thunk that we still
--- need.  We have a patch for this from Andy Cheadle, but not
--- incorporated yet. --SDM [6/2004]
---
--- Previously, eager blackholing was enabled when ticky-ticky
--- was on. But it didn't work, and it wasn't strictly necessary
--- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING
--- is unconditionally disabled. -- krc 1/2007
---
--- Static closures are never themselves black-holed.
-
-blackHoleOnEntry :: ClosureInfo -> Bool
-blackHoleOnEntry cl_info
-  | isStaticRep (closureSMRep cl_info)
-  = False        -- Never black-hole a static closure
-
-  | otherwise
-  = case closureLFInfo cl_info of
-      LFReEntrant {}            -> False
-      LFLetNoEscape             -> False
-      LFThunk _ _no_fvs upd _ _ -> upd   -- See Note [Black-holing non-updatable thunks]
-      _other -> panic "blackHoleOnEntry"
-
-{- Note [Black-holing non-updatable thunks]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We must not black-hole non-updatable (single-entry) thunks otherwise
-we run into issues like #10414. Specifically:
-
-  * There is no reason to black-hole a non-updatable thunk: it should
-    not be competed for by multiple threads
-
-  * It could, conceivably, cause a space leak if we don't black-hole
-    it, if there was a live but never-followed pointer pointing to it.
-    Let's hope that doesn't happen.
-
-  * It is dangerous to black-hole a non-updatable thunk because
-     - is not updated (of course)
-     - hence, if it is black-holed and another thread tries to evaluate
-       it, that thread will block forever
-    This actually happened in #10414.  So we do not black-hole
-    non-updatable thunks.
-
-  * How could two threads evaluate the same non-updatable (single-entry)
-    thunk?  See Reid Barton's example below.
-
-  * Only eager blackholing could possibly black-hole a non-updatable
-    thunk, because lazy black-holing only affects thunks with an
-    update frame on the stack.
-
-Here is and example due to Reid Barton (#10414):
-    x = \u []  concat [[1], []]
-with the following definitions,
-
-    concat x = case x of
-        []       -> []
-        (:) x xs -> (++) x (concat xs)
-
-    (++) xs ys = case xs of
-        []         -> ys
-        (:) x rest -> (:) x ((++) rest ys)
-
-Where we use the syntax @\u []@ to denote an updatable thunk and @\s []@ to
-denote a single-entry (i.e. non-updatable) thunk. After a thread evaluates @x@
-to WHNF and calls @(++)@ the heap will contain the following thunks,
-
-    x = 1 : y
-    y = \u []  (++) [] z
-    z = \s []  concat []
-
-Now that the stage is set, consider the follow evaluations by two racing threads
-A and B,
-
-  1. Both threads enter @y@ before either is able to replace it with an
-     indirection
-
-  2. Thread A does the case analysis in @(++)@ and consequently enters @z@,
-     replacing it with a black-hole
-
-  3. At some later point thread B does the same case analysis and also attempts
-     to enter @z@. However, it finds that it has been replaced with a black-hole
-     so it blocks.
-
-  4. Thread A eventually finishes evaluating @z@ (to @[]@) and updates @y@
-     accordingly. It does *not* update @z@, however, as it is single-entry. This
-     leaves Thread B blocked forever on a black-hole which will never be
-     updated.
-
-To avoid this sort of condition we never black-hole non-updatable thunks.
--}
-
-isStaticClosure :: ClosureInfo -> Bool
-isStaticClosure cl_info = isStaticRep (closureSMRep cl_info)
-
-closureUpdReqd :: ClosureInfo -> Bool
-closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info
-
-lfUpdatable :: LambdaFormInfo -> Bool
-lfUpdatable (LFThunk _ _ upd _ _)  = upd
-lfUpdatable _ = False
-
-closureSingleEntry :: ClosureInfo -> Bool
-closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd
-closureSingleEntry (ClosureInfo { closureLFInfo = LFReEntrant _ OneShotLam _ _ _}) = True
-closureSingleEntry _ = False
-
-closureReEntrant :: ClosureInfo -> Bool
-closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant {} }) = True
-closureReEntrant _ = False
-
-closureFunInfo :: ClosureInfo -> Maybe (RepArity, ArgDescr)
-closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info
-
-lfFunInfo :: LambdaFormInfo ->  Maybe (RepArity, ArgDescr)
-lfFunInfo (LFReEntrant _ _ arity _ arg_desc)  = Just (arity, arg_desc)
-lfFunInfo _                                   = Nothing
-
-funTag :: DynFlags -> ClosureInfo -> DynTag
-funTag dflags (ClosureInfo { closureLFInfo = lf_info })
-    = lfDynTag dflags lf_info
-
-isToplevClosure :: ClosureInfo -> Bool
-isToplevClosure (ClosureInfo { closureLFInfo = lf_info })
-  = case lf_info of
-      LFReEntrant TopLevel _ _ _ _ -> True
-      LFThunk TopLevel _ _ _ _     -> True
-      _other                       -> False
-
---------------------------------------
---   Label generation
---------------------------------------
-
-staticClosureLabel :: ClosureInfo -> CLabel
-staticClosureLabel = toClosureLbl .  closureInfoLabel
-
-closureSlowEntryLabel :: ClosureInfo -> CLabel
-closureSlowEntryLabel = toSlowEntryLbl . closureInfoLabel
-
-closureLocalEntryLabel :: DynFlags -> ClosureInfo -> CLabel
-closureLocalEntryLabel dflags
-  | tablesNextToCode dflags = toInfoLbl  . closureInfoLabel
-  | otherwise               = toEntryLbl . closureInfoLabel
-
-mkClosureInfoTableLabel :: Id -> LambdaFormInfo -> CLabel
-mkClosureInfoTableLabel id lf_info
-  = case lf_info of
-        LFThunk _ _ upd_flag (SelectorThunk offset) _
-                      -> mkSelectorInfoLabel upd_flag offset
-
-        LFThunk _ _ upd_flag (ApThunk arity) _
-                      -> mkApInfoTableLabel upd_flag arity
-
-        LFThunk{}     -> std_mk_lbl name cafs
-        LFReEntrant{} -> std_mk_lbl name cafs
-        _other        -> panic "closureInfoTableLabel"
-
-  where
-    name = idName id
-
-    std_mk_lbl | is_local  = mkLocalInfoTableLabel
-               | otherwise = mkInfoTableLabel
-
-    cafs     = idCafInfo id
-    is_local = isDataConWorkId id
-       -- Make the _info pointer for the implicit datacon worker
-       -- binding local. The reason we can do this is that importing
-       -- code always either uses the _closure or _con_info. By the
-       -- invariants in CorePrep anything else gets eta expanded.
-
-
-thunkEntryLabel :: DynFlags -> Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel
--- thunkEntryLabel is a local help function, not exported.  It's used from
--- getCallMethod.
-thunkEntryLabel dflags _thunk_id _ (ApThunk arity) upd_flag
-  = enterApLabel dflags upd_flag arity
-thunkEntryLabel dflags _thunk_id _ (SelectorThunk offset) upd_flag
-  = enterSelectorLabel dflags upd_flag offset
-thunkEntryLabel dflags thunk_id c _ _
-  = enterIdLabel dflags thunk_id c
-
-enterApLabel :: DynFlags -> Bool -> Arity -> CLabel
-enterApLabel dflags is_updatable arity
-  | tablesNextToCode dflags = mkApInfoTableLabel is_updatable arity
-  | otherwise               = mkApEntryLabel is_updatable arity
-
-enterSelectorLabel :: DynFlags -> Bool -> WordOff -> CLabel
-enterSelectorLabel dflags upd_flag offset
-  | tablesNextToCode dflags = mkSelectorInfoLabel upd_flag offset
-  | otherwise               = mkSelectorEntryLabel upd_flag offset
-
-enterIdLabel :: DynFlags -> Name -> CafInfo -> CLabel
-enterIdLabel dflags id c
-  | tablesNextToCode dflags = mkInfoTableLabel id c
-  | otherwise               = mkEntryLabel id c
-
-
---------------------------------------
---   Profiling
---------------------------------------
-
--- Profiling requires two pieces of information to be determined for
--- each closure's info table --- description and type.
-
--- The description is stored directly in the @CClosureInfoTable@ when the
--- info table is built.
-
--- The type is determined from the type information stored with the @Id@
--- in the closure info using @closureTypeDescr@.
-
-mkProfilingInfo :: DynFlags -> Id -> String -> ProfilingInfo
-mkProfilingInfo dflags id val_descr
-  | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
-  | otherwise = ProfilingInfo ty_descr_w8 (BS8.pack val_descr)
-  where
-    ty_descr_w8  = BS8.pack (getTyDescription (idType id))
-
-getTyDescription :: Type -> String
-getTyDescription ty
-  = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) ->
-    case tau_ty of
-      TyVarTy _              -> "*"
-      AppTy fun _            -> getTyDescription fun
-      TyConApp tycon _       -> getOccString tycon
-      FunTy {}              -> '-' : fun_result tau_ty
-      ForAllTy _  ty         -> getTyDescription ty
-      LitTy n                -> getTyLitDescription n
-      CastTy ty _            -> getTyDescription ty
-      CoercionTy co          -> pprPanic "getTyDescription" (ppr co)
-    }
-  where
-    fun_result (FunTy { ft_res = res }) = '>' : fun_result res
-    fun_result other                    = getTyDescription other
-
-getTyLitDescription :: TyLit -> String
-getTyLitDescription l =
-  case l of
-    NumTyLit n -> show n
-    StrTyLit n -> show n
-
---------------------------------------
---   CmmInfoTable-related things
---------------------------------------
-
-mkDataConInfoTable :: DynFlags -> DataCon -> Bool -> Int -> Int -> CmmInfoTable
-mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds
- = CmmInfoTable { cit_lbl  = info_lbl
-                , cit_rep  = sm_rep
-                , cit_prof = prof
-                , cit_srt  = Nothing
-                , cit_clo  = Nothing }
- where
-   name = dataConName data_con
-   info_lbl = mkConInfoTableLabel name NoCafRefs
-   sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type
-   cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con)
-                  -- We keep the *zero-indexed* tag in the srt_len field
-                  -- of the info table of a data constructor.
-
-   prof | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
-        | otherwise                            = ProfilingInfo ty_descr val_descr
-
-   ty_descr  = BS8.pack $ occNameString $ getOccName $ dataConTyCon data_con
-   val_descr = BS8.pack $ occNameString $ getOccName data_con
-
--- We need a black-hole closure info to pass to @allocDynClosure@ when we
--- want to allocate the black hole on entry to a CAF.
-
-cafBlackHoleInfoTable :: CmmInfoTable
-cafBlackHoleInfoTable
-  = CmmInfoTable { cit_lbl  = mkCAFBlackHoleInfoTableLabel
-                 , cit_rep  = blackHoleRep
-                 , cit_prof = NoProfilingInfo
-                 , cit_srt  = Nothing
-                 , cit_clo  = Nothing }
-
-indStaticInfoTable :: CmmInfoTable
-indStaticInfoTable
-  = CmmInfoTable { cit_lbl  = mkIndStaticInfoLabel
-                 , cit_rep  = indStaticRep
-                 , cit_prof = NoProfilingInfo
-                 , cit_srt  = Nothing
-                 , cit_clo  = Nothing }
-
-staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool
--- A static closure needs a link field to aid the GC when traversing
--- the static closure graph.  But it only needs such a field if either
---        a) it has an SRT
---        b) it's a constructor with one or more pointer fields
--- In case (b), the constructor's fields themselves play the role
--- of the SRT.
-staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep }
-  | isConRep smrep         = not (isStaticNoCafCon smrep)
-  | otherwise              = has_srt
diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs
deleted file mode 100644
index 67a9776eac..0000000000
--- a/compiler/codeGen/StgCmmCon.hs
+++ /dev/null
@@ -1,285 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Stg to C--: code generation for constructors
---
--- This module provides the support code for StgCmm to deal with with
--- constructors on the RHSs of let(rec)s.
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmCon (
-        cgTopRhsCon, buildDynCon, bindConArgs
-    ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import StgSyn
-import CoreSyn  ( AltCon(..) )
-
-import StgCmmMonad
-import StgCmmEnv
-import StgCmmHeap
-import StgCmmLayout
-import StgCmmUtils
-import StgCmmClosure
-
-import CmmExpr
-import CmmUtils
-import CLabel
-import MkGraph
-import SMRep
-import CostCentre
-import Module
-import DataCon
-import DynFlags
-import FastString
-import Id
-import RepType (countConRepArgs)
-import Literal
-import PrelInfo
-import Outputable
-import GHC.Platform
-import Util
-import MonadUtils (mapMaybeM)
-
-import Control.Monad
-import Data.Char
-
-
-
----------------------------------------------------------------
---      Top-level constructors
----------------------------------------------------------------
-
-cgTopRhsCon :: DynFlags
-            -> Id               -- Name of thing bound to this RHS
-            -> DataCon          -- Id
-            -> [NonVoid StgArg] -- Args
-            -> (CgIdInfo, FCode ())
-cgTopRhsCon dflags id con args =
-    let id_info = litIdInfo dflags id (mkConLFInfo con) (CmmLabel closure_label)
-    in (id_info, gen_code)
-  where
-   name          = idName id
-   caffy         = idCafInfo id -- any stgArgHasCafRefs args
-   closure_label = mkClosureLabel name caffy
-
-   gen_code =
-     do { this_mod <- getModuleName
-        ; when (platformOS (targetPlatform dflags) == OSMinGW32) $
-              -- Windows DLLs have a problem with static cross-DLL refs.
-              MASSERT( not (isDllConApp dflags this_mod con (map fromNonVoid args)) )
-        ; ASSERT( args `lengthIs` countConRepArgs con ) return ()
-
-        -- LAY IT OUT
-        ; let
-            (tot_wds, --  #ptr_wds + #nonptr_wds
-             ptr_wds, --  #ptr_wds
-             nv_args_w_offsets) =
-                 mkVirtHeapOffsetsWithPadding dflags StdHeader (addArgReps args)
-
-            mk_payload (Padding len _) = return (CmmInt 0 (widthFromBytes len))
-            mk_payload (FieldOff arg _) = do
-                amode <- getArgAmode arg
-                case amode of
-                  CmmLit lit -> return lit
-                  _          -> panic "StgCmmCon.cgTopRhsCon"
-
-            nonptr_wds = tot_wds - ptr_wds
-
-             -- we're not really going to emit an info table, so having
-             -- to make a CmmInfoTable is a bit overkill, but mkStaticClosureFields
-             -- needs to poke around inside it.
-            info_tbl = mkDataConInfoTable dflags con True ptr_wds nonptr_wds
-
-
-        ; payload <- mapM mk_payload nv_args_w_offsets
-                -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs
-                -- NB2: all the amodes should be Lits!
-                --      TODO (osa): Why?
-
-        ; let closure_rep = mkStaticClosureFields
-                             dflags
-                             info_tbl
-                             dontCareCCS                -- Because it's static data
-                             caffy                      -- Has CAF refs
-                             payload
-
-                -- BUILD THE OBJECT
-        ; emitDataLits closure_label closure_rep
-
-        ; return () }
-
-
----------------------------------------------------------------
---      Lay out and allocate non-top-level constructors
----------------------------------------------------------------
-
-buildDynCon :: Id                 -- Name of the thing to which this constr will
-                                  -- be bound
-            -> Bool               -- is it genuinely bound to that name, or just
-                                  -- for profiling?
-            -> CostCentreStack    -- Where to grab cost centre from;
-                                  -- current CCS if currentOrSubsumedCCS
-            -> DataCon            -- The data constructor
-            -> [NonVoid StgArg]   -- Its args
-            -> FCode (CgIdInfo, FCode CmmAGraph)
-               -- Return details about how to find it and initialization code
-buildDynCon binder actually_bound cc con args
-    = do dflags <- getDynFlags
-         buildDynCon' dflags (targetPlatform dflags) binder actually_bound cc con args
-
-
-buildDynCon' :: DynFlags
-             -> Platform
-             -> Id -> Bool
-             -> CostCentreStack
-             -> DataCon
-             -> [NonVoid StgArg]
-             -> FCode (CgIdInfo, FCode CmmAGraph)
-
-{- We used to pass a boolean indicating whether all the
-args were of size zero, so we could use a static
-constructor; but I concluded that it just isn't worth it.
-Now I/O uses unboxed tuples there just aren't any constructors
-with all size-zero args.
-
-The reason for having a separate argument, rather than looking at
-the addr modes of the args is that we may be in a "knot", and
-premature looking at the args will cause the compiler to black-hole!
--}
-
-
--------- buildDynCon': Nullary constructors --------------
--- First we deal with the case of zero-arity constructors.  They
--- will probably be unfolded, so we don't expect to see this case much,
--- if at all, but it does no harm, and sets the scene for characters.
---
--- In the case of zero-arity constructors, or, more accurately, those
--- which have exclusively size-zero (VoidRep) args, we generate no code
--- at all.
-
-buildDynCon' dflags _ binder _ _cc con []
-  | isNullaryRepDataCon con
-  = return (litIdInfo dflags binder (mkConLFInfo con)
-                (CmmLabel (mkClosureLabel (dataConName con) (idCafInfo binder))),
-            return mkNop)
-
--------- buildDynCon': Charlike and Intlike constructors -----------
-{- The following three paragraphs about @Char@-like and @Int@-like
-closures are obsolete, but I don't understand the details well enough
-to properly word them, sorry. I've changed the treatment of @Char@s to
-be analogous to @Int@s: only a subset is preallocated, because @Char@
-has now 31 bits. Only literals are handled here. -- Qrczak
-
-Now for @Char@-like closures.  We generate an assignment of the
-address of the closure to a temporary.  It would be possible simply to
-generate no code, and record the addressing mode in the environment,
-but we'd have to be careful if the argument wasn't a constant --- so
-for simplicity we just always assign to a temporary.
-
-Last special case: @Int@-like closures.  We only special-case the
-situation in which the argument is a literal in the range
-@mIN_INTLIKE@..@mAX_INTLILKE@.  NB: for @Char@-like closures we can
-work with any old argument, but for @Int@-like ones the argument has
-to be a literal.  Reason: @Char@ like closures have an argument type
-which is guaranteed in range.
-
-Because of this, we use can safely return an addressing mode.
-
-We don't support this optimisation when compiling into Windows DLLs yet
-because they don't support cross package data references well.
--}
-
-buildDynCon' dflags platform binder _ _cc con [arg]
-  | maybeIntLikeCon con
-  , platformOS platform /= OSMinGW32 || not (positionIndependent dflags)
-  , NonVoid (StgLitArg (LitNumber LitNumInt val _)) <- arg
-  , val <= fromIntegral (mAX_INTLIKE dflags) -- Comparisons at type Integer!
-  , val >= fromIntegral (mIN_INTLIKE dflags) -- ...ditto...
-  = do  { let intlike_lbl   = mkCmmClosureLabel rtsUnitId (fsLit "stg_INTLIKE")
-              val_int = fromIntegral val :: Int
-              offsetW = (val_int - mIN_INTLIKE dflags) * (fixedHdrSizeW dflags + 1)
-                -- INTLIKE closures consist of a header and one word payload
-              intlike_amode = cmmLabelOffW dflags intlike_lbl offsetW
-        ; return ( litIdInfo dflags binder (mkConLFInfo con) intlike_amode
-                 , return mkNop) }
-
-buildDynCon' dflags platform binder _ _cc con [arg]
-  | maybeCharLikeCon con
-  , platformOS platform /= OSMinGW32 || not (positionIndependent dflags)
-  , NonVoid (StgLitArg (LitChar val)) <- arg
-  , let val_int = ord val :: Int
-  , val_int <= mAX_CHARLIKE dflags
-  , val_int >= mIN_CHARLIKE dflags
-  = do  { let charlike_lbl   = mkCmmClosureLabel rtsUnitId (fsLit "stg_CHARLIKE")
-              offsetW = (val_int - mIN_CHARLIKE dflags) * (fixedHdrSizeW dflags + 1)
-                -- CHARLIKE closures consist of a header and one word payload
-              charlike_amode = cmmLabelOffW dflags charlike_lbl offsetW
-        ; return ( litIdInfo dflags binder (mkConLFInfo con) charlike_amode
-                 , return mkNop) }
-
--------- buildDynCon': the general case -----------
-buildDynCon' dflags _ binder actually_bound ccs con args
-  = do  { (id_info, reg) <- rhsIdInfo binder lf_info
-        ; return (id_info, gen_code reg)
-        }
- where
-  lf_info = mkConLFInfo con
-
-  gen_code reg
-    = do  { let (tot_wds, ptr_wds, args_w_offsets)
-                  = mkVirtConstrOffsets dflags (addArgReps args)
-                nonptr_wds = tot_wds - ptr_wds
-                info_tbl = mkDataConInfoTable dflags con False
-                                ptr_wds nonptr_wds
-          ; let ticky_name | actually_bound = Just binder
-                           | otherwise = Nothing
-
-          ; hp_plus_n <- allocDynClosure ticky_name info_tbl lf_info
-                                          use_cc blame_cc args_w_offsets
-          ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
-    where
-      use_cc      -- cost-centre to stick in the object
-        | isCurrentCCS ccs = cccsExpr
-        | otherwise        = panic "buildDynCon: non-current CCS not implemented"
-
-      blame_cc = use_cc -- cost-centre on which to blame the alloc (same)
-
-
----------------------------------------------------------------
---      Binding constructor arguments
----------------------------------------------------------------
-
-bindConArgs :: AltCon -> LocalReg -> [NonVoid Id] -> FCode [LocalReg]
--- bindConArgs is called from cgAlt of a case
--- (bindConArgs con args) augments the environment with bindings for the
--- binders args, assuming that we have just returned from a 'case' which
--- found a con
-bindConArgs (DataAlt con) base args
-  = ASSERT(not (isUnboxedTupleCon con))
-    do dflags <- getDynFlags
-       let (_, _, args_w_offsets) = mkVirtConstrOffsets dflags (addIdReps args)
-           tag = tagForCon dflags con
-
-           -- The binding below forces the masking out of the tag bits
-           -- when accessing the constructor field.
-           bind_arg :: (NonVoid Id, ByteOff) -> FCode (Maybe LocalReg)
-           bind_arg (arg@(NonVoid b), offset)
-             | isDeadBinder b  -- See Note [Dead-binder optimisation] in StgCmmExpr
-             = return Nothing
-             | otherwise
-             = do { emit $ mkTaggedObjectLoad dflags (idToReg dflags arg)
-                                              base offset tag
-                  ; Just <$> bindArgToReg arg }
-
-       mapMaybeM bind_arg args_w_offsets
-
-bindConArgs _other_con _base args
-  = ASSERT( null args ) return []
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
deleted file mode 100644
index e605762f1f..0000000000
--- a/compiler/codeGen/StgCmmEnv.hs
+++ /dev/null
@@ -1,208 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation: the binding environment
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-module StgCmmEnv (
-        CgIdInfo,
-
-        litIdInfo, lneIdInfo, rhsIdInfo, mkRhsInit,
-        idInfoToAmode,
-
-        addBindC, addBindsC,
-
-        bindArgsToRegs, bindToReg, rebindToReg,
-        bindArgToReg, idToReg,
-        getArgAmode, getNonVoidArgAmodes,
-        getCgIdInfo,
-        maybeLetNoEscape,
-    ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import TyCon
-import StgCmmMonad
-import StgCmmUtils
-import StgCmmClosure
-
-import CLabel
-
-import BlockId
-import CmmExpr
-import CmmUtils
-import DynFlags
-import Id
-import MkGraph
-import Name
-import Outputable
-import StgSyn
-import Type
-import TysPrim
-import UniqFM
-import Util
-import VarEnv
-
--------------------------------------
---        Manipulating CgIdInfo
--------------------------------------
-
-mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo
-mkCgIdInfo id lf expr
-  = CgIdInfo { cg_id = id, cg_lf = lf
-             , cg_loc = CmmLoc expr }
-
-litIdInfo :: DynFlags -> Id -> LambdaFormInfo -> CmmLit -> CgIdInfo
-litIdInfo dflags id lf lit
-  = CgIdInfo { cg_id = id, cg_lf = lf
-             , cg_loc = CmmLoc (addDynTag dflags (CmmLit lit) tag) }
-  where
-    tag = lfDynTag dflags lf
-
-lneIdInfo :: DynFlags -> Id -> [NonVoid Id] -> CgIdInfo
-lneIdInfo dflags id regs
-  = CgIdInfo { cg_id = id, cg_lf = lf
-             , cg_loc = LneLoc blk_id (map (idToReg dflags) regs) }
-  where
-    lf     = mkLFLetNoEscape
-    blk_id = mkBlockId (idUnique id)
-
-
-rhsIdInfo :: Id -> LambdaFormInfo -> FCode (CgIdInfo, LocalReg)
-rhsIdInfo id lf_info
-  = do dflags <- getDynFlags
-       reg <- newTemp (gcWord dflags)
-       return (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)), reg)
-
-mkRhsInit :: DynFlags -> LocalReg -> LambdaFormInfo -> CmmExpr -> CmmAGraph
-mkRhsInit dflags reg lf_info expr
-  = mkAssign (CmmLocal reg) (addDynTag dflags expr (lfDynTag dflags lf_info))
-
-idInfoToAmode :: CgIdInfo -> CmmExpr
--- Returns a CmmExpr for the *tagged* pointer
-idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e }) = e
-idInfoToAmode cg_info
-  = pprPanic "idInfoToAmode" (ppr (cg_id cg_info))        -- LneLoc
-
-addDynTag :: DynFlags -> CmmExpr -> DynTag -> CmmExpr
--- A tag adds a byte offset to the pointer
-addDynTag dflags expr tag = cmmOffsetB dflags expr tag
-
-maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg])
-maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args)
-maybeLetNoEscape _other                                      = Nothing
-
-
-
----------------------------------------------------------
---        The binding environment
---
--- There are three basic routines, for adding (addBindC),
--- modifying(modifyBindC) and looking up (getCgIdInfo) bindings.
----------------------------------------------------------
-
-addBindC :: CgIdInfo -> FCode ()
-addBindC stuff_to_bind = do
-        binds <- getBinds
-        setBinds $ extendVarEnv binds (cg_id stuff_to_bind) stuff_to_bind
-
-addBindsC :: [CgIdInfo] -> FCode ()
-addBindsC new_bindings = do
-        binds <- getBinds
-        let new_binds = foldl' (\ binds info -> extendVarEnv binds (cg_id info) info)
-                               binds
-                               new_bindings
-        setBinds new_binds
-
-getCgIdInfo :: Id -> FCode CgIdInfo
-getCgIdInfo id
-  = do  { dflags <- getDynFlags
-        ; local_binds <- getBinds -- Try local bindings first
-        ; case lookupVarEnv local_binds id of {
-            Just info -> return info ;
-            Nothing   -> do {
-
-                -- Should be imported; make up a CgIdInfo for it
-          let name = idName id
-        ; if isExternalName name then
-              let ext_lbl
-                      | isUnliftedType (idType id) =
-                          -- An unlifted external Id must refer to a top-level
-                          -- string literal. See Note [Bytes label] in CLabel.
-                          ASSERT( idType id `eqType` addrPrimTy )
-                          mkBytesLabel name
-                      | otherwise = mkClosureLabel name $ idCafInfo id
-              in return $
-                  litIdInfo dflags id (mkLFImported id) (CmmLabel ext_lbl)
-          else
-              cgLookupPanic id -- Bug
-        }}}
-
-cgLookupPanic :: Id -> FCode a
-cgLookupPanic id
-  = do  local_binds <- getBinds
-        pprPanic "StgCmmEnv: variable not found"
-                (vcat [ppr id,
-                text "local binds for:",
-                pprUFM local_binds $ \infos ->
-                  vcat [ ppr (cg_id info) | info <- infos ]
-              ])
-
-
---------------------
-getArgAmode :: NonVoid StgArg -> FCode CmmExpr
-getArgAmode (NonVoid (StgVarArg var)) = idInfoToAmode <$> getCgIdInfo var
-getArgAmode (NonVoid (StgLitArg lit)) = CmmLit <$> cgLit lit
-
-getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr]
--- NB: Filters out void args,
---     so the result list may be shorter than the argument list
-getNonVoidArgAmodes [] = return []
-getNonVoidArgAmodes (arg:args)
-  | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args
-  | otherwise = do { amode  <- getArgAmode (NonVoid arg)
-                   ; amodes <- getNonVoidArgAmodes args
-                   ; return ( amode : amodes ) }
-
-
-------------------------------------------------------------------------
---        Interface functions for binding and re-binding names
-------------------------------------------------------------------------
-
-bindToReg :: NonVoid Id -> LambdaFormInfo -> FCode LocalReg
--- Bind an Id to a fresh LocalReg
-bindToReg nvid@(NonVoid id) lf_info
-  = do dflags <- getDynFlags
-       let reg = idToReg dflags nvid
-       addBindC (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)))
-       return reg
-
-rebindToReg :: NonVoid Id -> FCode LocalReg
--- Like bindToReg, but the Id is already in scope, so
--- get its LF info from the envt
-rebindToReg nvid@(NonVoid id)
-  = do  { info <- getCgIdInfo id
-        ; bindToReg nvid (cg_lf info) }
-
-bindArgToReg :: NonVoid Id -> FCode LocalReg
-bindArgToReg nvid@(NonVoid id) = bindToReg nvid (mkLFArgument id)
-
-bindArgsToRegs :: [NonVoid Id] -> FCode [LocalReg]
-bindArgsToRegs args = mapM bindArgToReg args
-
-idToReg :: DynFlags -> NonVoid Id -> LocalReg
--- Make a register from an Id, typically a function argument,
--- free variable, or case binder
---
--- We re-use the Unique from the Id to make it easier to see what is going on
---
--- By now the Ids should be uniquely named; else one would worry
--- about accidental collision
-idToReg dflags (NonVoid id)
-             = LocalReg (idUnique id)
-                        (primRepCmmType dflags (idPrimRep id))
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
deleted file mode 100644
index 70a044a7ab..0000000000
--- a/compiler/codeGen/StgCmmExpr.hs
+++ /dev/null
@@ -1,992 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation: expressions
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmExpr ( cgExpr ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude hiding ((<*>))
-
-import {-# SOURCE #-} StgCmmBind ( cgBind )
-
-import StgCmmMonad
-import StgCmmHeap
-import StgCmmEnv
-import StgCmmCon
-import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
-import StgCmmLayout
-import StgCmmPrim
-import StgCmmHpc
-import StgCmmTicky
-import StgCmmUtils
-import StgCmmClosure
-
-import StgSyn
-
-import MkGraph
-import BlockId
-import Cmm
-import CmmInfo
-import CoreSyn
-import DataCon
-import ForeignCall
-import Id
-import PrimOp
-import TyCon
-import Type             ( isUnliftedType )
-import RepType          ( isVoidTy, countConRepArgs, primRepSlot )
-import CostCentre       ( CostCentreStack, currentCCS )
-import Maybes
-import Util
-import FastString
-import Outputable
-
-import Control.Monad (unless,void)
-import Control.Arrow (first)
-import Data.Function ( on )
-
-------------------------------------------------------------------------
---              cgExpr: the main function
-------------------------------------------------------------------------
-
-cgExpr  :: CgStgExpr -> FCode ReturnKind
-
-cgExpr (StgApp fun args)     = cgIdApp fun args
-
--- seq# a s ==> a
--- See Note [seq# magic] in PrelRules
-cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
-  cgIdApp a []
-
--- dataToTag# :: a -> Int#
--- See Note [dataToTag#] in primops.txt.pp
-cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
-  dflags <- getDynFlags
-  emitComment (mkFastString "dataToTag#")
-  tmp <- newTemp (bWord dflags)
-  _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
-  -- TODO: For small types look at the tag bits instead of reading info table
-  emitReturn [getConstrTag dflags (cmmUntag dflags (CmmReg (CmmLocal tmp)))]
-
-cgExpr (StgOpApp op args ty) = cgOpApp op args ty
-cgExpr (StgConApp con args _)= cgConApp con args
-cgExpr (StgTick t e)         = cgTick t >> cgExpr e
-cgExpr (StgLit lit)       = do cmm_lit <- cgLit lit
-                               emitReturn [CmmLit cmm_lit]
-
-cgExpr (StgLet _ binds expr) = do { cgBind binds;     cgExpr expr }
-cgExpr (StgLetNoEscape _ binds expr) =
-  do { u <- newUnique
-     ; let join_id = mkBlockId u
-     ; cgLneBinds join_id binds
-     ; r <- cgExpr expr
-     ; emitLabel join_id
-     ; return r }
-
-cgExpr (StgCase expr bndr alt_type alts) =
-  cgCase expr bndr alt_type alts
-
-cgExpr (StgLam {}) = panic "cgExpr: StgLam"
-
-------------------------------------------------------------------------
---              Let no escape
-------------------------------------------------------------------------
-
-{- Generating code for a let-no-escape binding, aka join point is very
-very similar to what we do for a case expression.  The duality is
-between
-        let-no-escape x = b
-        in e
-and
-        case e of ... -> b
-
-That is, the RHS of 'x' (ie 'b') will execute *later*, just like
-the alternative of the case; it needs to be compiled in an environment
-in which all volatile bindings are forgotten, and the free vars are
-bound only to stable things like stack locations..  The 'e' part will
-execute *next*, just like the scrutinee of a case. -}
-
--------------------------
-cgLneBinds :: BlockId -> CgStgBinding -> FCode ()
-cgLneBinds join_id (StgNonRec bndr rhs)
-  = do  { local_cc <- saveCurrentCostCentre
-                -- See Note [Saving the current cost centre]
-        ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
-        ; fcode
-        ; addBindC info }
-
-cgLneBinds join_id (StgRec pairs)
-  = do  { local_cc <- saveCurrentCostCentre
-        ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
-        ; let (infos, fcodes) = unzip r
-        ; addBindsC infos
-        ; sequence_ fcodes
-        }
-
--------------------------
-cgLetNoEscapeRhs
-    :: BlockId          -- join point for successor of let-no-escape
-    -> Maybe LocalReg   -- Saved cost centre
-    -> Id
-    -> CgStgRhs
-    -> FCode (CgIdInfo, FCode ())
-
-cgLetNoEscapeRhs join_id local_cc bndr rhs =
-  do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
-     ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
-     ; let code = do { (_, body) <- getCodeScoped rhs_code
-                     ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
-     ; return (info, code)
-     }
-
-cgLetNoEscapeRhsBody
-    :: Maybe LocalReg   -- Saved cost centre
-    -> Id
-    -> CgStgRhs
-    -> FCode (CgIdInfo, FCode ())
-cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure _ cc _upd args body)
-  = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
-cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
-  = cgLetNoEscapeClosure bndr local_cc cc []
-      (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $
-                           text "StgRhsCon doesn't have type args"))
-        -- For a constructor RHS we want to generate a single chunk of
-        -- code which can be jumped to from many places, which will
-        -- return the constructor. It's easy; just behave as if it
-        -- was an StgRhsClosure with a ConApp inside!
-
--------------------------
-cgLetNoEscapeClosure
-        :: Id                   -- binder
-        -> Maybe LocalReg       -- Slot for saved current cost centre
-        -> CostCentreStack      -- XXX: *** NOT USED *** why not?
-        -> [NonVoid Id]         -- Args (as in \ args -> body)
-        -> CgStgExpr            -- Body (as in above)
-        -> FCode (CgIdInfo, FCode ())
-
-cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
-  = do dflags <- getDynFlags
-       return ( lneIdInfo dflags bndr args
-              , code )
-  where
-   code = forkLneBody $ do {
-            ; withNewTickyCounterLNE (idName bndr) args $ do
-            ; restoreCurrentCostCentre cc_slot
-            ; arg_regs <- bindArgsToRegs args
-            ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
-
-
-------------------------------------------------------------------------
---              Case expressions
-------------------------------------------------------------------------
-
-{- Note [Compiling case expressions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It is quite interesting to decide whether to put a heap-check at the
-start of each alternative.  Of course we certainly have to do so if
-the case forces an evaluation, or if there is a primitive op which can
-trigger GC.
-
-A more interesting situation is this (a Plan-B situation)
-
-        !P!;
-        ...P...
-        case x# of
-          0#      -> !Q!; ...Q...
-          default -> !R!; ...R...
-
-where !x! indicates a possible heap-check point. The heap checks
-in the alternatives *can* be omitted, in which case the topmost
-heapcheck will take their worst case into account.
-
-In favour of omitting !Q!, !R!:
-
- - *May* save a heap overflow test,
-   if ...P... allocates anything.
-
- - We can use relative addressing from a single Hp to
-   get at all the closures so allocated.
-
- - No need to save volatile vars etc across heap checks
-   in !Q!, !R!
-
-Against omitting !Q!, !R!
-
-  - May put a heap-check into the inner loop.  Suppose
-        the main loop is P -> R -> P -> R...
-        Q is the loop exit, and only it does allocation.
-    This only hurts us if P does no allocation.  If P allocates,
-    then there is a heap check in the inner loop anyway.
-
-  - May do more allocation than reqd.  This sometimes bites us
-    badly.  For example, nfib (ha!) allocates about 30\% more space if the
-    worst-casing is done, because many many calls to nfib are leaf calls
-    which don't need to allocate anything.
-
-    We can un-allocate, but that costs an instruction
-
-Neither problem hurts us if there is only one alternative.
-
-Suppose the inner loop is P->R->P->R etc.  Then here is
-how many heap checks we get in the *inner loop* under various
-conditions
-
-  Alloc   Heap check in branches (!Q!, !R!)?
-  P Q R      yes     no (absorb to !P!)
---------------------------------------
-  n n n      0          0
-  n y n      0          1
-  n . y      1          1
-  y . y      2          1
-  y . n      1          1
-
-Best choices: absorb heap checks from Q and R into !P! iff
-  a) P itself does some allocation
-or
-  b) P does allocation, or there is exactly one alternative
-
-We adopt (b) because that is more likely to put the heap check at the
-entry to a function, when not many things are live.  After a bunch of
-single-branch cases, we may have lots of things live
-
-Hence: two basic plans for
-
-        case e of r { alts }
-
------- Plan A: the general case ---------
-
-        ...save current cost centre...
-
-        ...code for e,
-           with sequel (SetLocals r)
-
-        ...restore current cost centre...
-        ...code for alts...
-        ...alts do their own heap checks
-
------- Plan B: special case when ---------
-  (i)  e does not allocate or call GC
-  (ii) either upstream code performs allocation
-       or there is just one alternative
-
-  Then heap allocation in the (single) case branch
-  is absorbed by the upstream check.
-  Very common example: primops on unboxed values
-
-        ...code for e,
-           with sequel (SetLocals r)...
-
-        ...code for alts...
-        ...no heap check...
--}
-
-
-
--------------------------------------
-data GcPlan
-  = GcInAlts            -- Put a GC check at the start the case alternatives,
-        [LocalReg]      -- which binds these registers
-  | NoGcInAlts          -- The scrutinee is a primitive value, or a call to a
-                        -- primitive op which does no GC.  Absorb the allocation
-                        -- of the case alternative(s) into the upstream check
-
--------------------------------------
-cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind
-
-cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
-  | isEnumerationTyCon tycon -- Note [case on bool]
-  = do { tag_expr <- do_enum_primop op args
-
-       -- If the binder is not dead, convert the tag to a constructor
-       -- and assign it. See Note [Dead-binder optimisation]
-       ; unless (isDeadBinder bndr) $ do
-            { dflags <- getDynFlags
-            ; tmp_reg <- bindArgToReg (NonVoid bndr)
-            ; emitAssign (CmmLocal tmp_reg)
-                         (tagToClosure dflags tycon tag_expr) }
-
-       ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
-                                              (NonVoid bndr) alts
-                                 -- See Note [GC for conditionals]
-       ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
-       ; return AssignedDirectly
-       }
-  where
-    do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
-    do_enum_primop TagToEnumOp [arg]  -- No code!
-      = getArgAmode (NonVoid arg)
-    do_enum_primop primop args
-      = do dflags <- getDynFlags
-           tmp <- newTemp (bWord dflags)
-           cgPrimOp [tmp] primop args
-           return (CmmReg (CmmLocal tmp))
-
-{-
-Note [case on bool]
-~~~~~~~~~~~~~~~~~~~
-This special case handles code like
-
-  case a <# b of
-    True ->
-    False ->
-
--->  case tagToEnum# (a <$# b) of
-        True -> .. ; False -> ...
-
---> case (a <$# b) of r ->
-    case tagToEnum# r of
-        True -> .. ; False -> ...
-
-If we let the ordinary case code handle it, we'll get something like
-
- tmp1 = a < b
- tmp2 = Bool_closure_tbl[tmp1]
- if (tmp2 & 7 != 0) then ... // normal tagged case
-
-but this junk won't optimise away.  What we really want is just an
-inline comparison:
-
- if (a < b) then ...
-
-So we add a special case to generate
-
- tmp1 = a < b
- if (tmp1 == 0) then ...
-
-and later optimisations will further improve this.
-
-Now that #6135 has been resolved it should be possible to remove that
-special case. The idea behind this special case and pre-6135 implementation
-of Bool-returning primops was that tagToEnum# was added implicitly in the
-codegen and then optimized away. Now the call to tagToEnum# is explicit
-in the source code, which allows to optimize it away at the earlier stages
-of compilation (i.e. at the Core level).
-
-Note [Scrutinising VoidRep]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Suppose we have this STG code:
-   f = \[s : State# RealWorld] ->
-       case s of _ -> blah
-This is very odd.  Why are we scrutinising a state token?  But it
-can arise with bizarre NOINLINE pragmas (#9964)
-    crash :: IO ()
-    crash = IO (\s -> let {-# NOINLINE s' #-}
-                          s' = s
-                      in (# s', () #))
-
-Now the trouble is that 's' has VoidRep, and we do not bind void
-arguments in the environment; they don't live anywhere.  See the
-calls to nonVoidIds in various places.  So we must not look up
-'s' in the environment.  Instead, just evaluate the RHS!  Simple.
-
-Note [Dead-binder optimisation]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A case-binder, or data-constructor argument, may be marked as dead,
-because we preserve occurrence-info on binders in CoreTidy (see
-CoreTidy.tidyIdBndr).
-
-If the binder is dead, we can sometimes eliminate a load.  While
-CmmSink will eliminate that load, it's very easy to kill it at source
-(giving CmmSink less work to do), and in any case CmmSink only runs
-with -O. Since the majority of case binders are dead, this
-optimisation probably still has a great benefit-cost ratio and we want
-to keep it for -O0. See also Phab:D5358.
-
-This probably also was the reason for occurrence hack in Phab:D5339 to
-exist, perhaps because the occurrence information preserved by
-'CoreTidy.tidyIdBndr' was insufficient.  But now that CmmSink does the
-job we deleted the hacks.
--}
-
-cgCase (StgApp v []) _ (PrimAlt _) alts
-  | isVoidRep (idPrimRep v)  -- See Note [Scrutinising VoidRep]
-  , [(DEFAULT, _, rhs)] <- alts
-  = cgExpr rhs
-
-{- Note [Dodgy unsafeCoerce 1]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
-    case (x :: HValue) |> co of (y :: MutVar# Int)
-        DEFAULT -> ...
-We want to gnerate an assignment
-     y := x
-We want to allow this assignment to be generated in the case when the
-types are compatible, because this allows some slightly-dodgy but
-occasionally-useful casts to be used, such as in RtClosureInspect
-where we cast an HValue to a MutVar# so we can print out the contents
-of the MutVar#.  If instead we generate code that enters the HValue,
-then we'll get a runtime panic, because the HValue really is a
-MutVar#.  The types are compatible though, so we can just generate an
-assignment.
--}
-cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
-  | isUnliftedType (idType v)  -- Note [Dodgy unsafeCoerce 1]
-  || reps_compatible
-  = -- assignment suffices for unlifted types
-    do { dflags <- getDynFlags
-       ; unless reps_compatible $
-           pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
-                    (pp_bndr v $$ pp_bndr bndr)
-       ; v_info <- getCgIdInfo v
-       ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
-                    (idInfoToAmode v_info)
-       -- Add bndr to the environment
-       ; _ <- bindArgToReg (NonVoid bndr)
-       ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
-  where
-    reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr
-      -- Must compare SlotTys, not proper PrimReps, because with unboxed sums,
-      -- the types of the binders are generated from slotPrimRep and might not
-      -- match. Test case:
-      --   swap :: (# Int | Int #) -> (# Int | Int #)
-      --   swap (# x | #) = (# | x #)
-      --   swap (# | y #) = (# y | #)
-
-    pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id))
-
-{- Note [Dodgy unsafeCoerce 2, #3132]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In all other cases of a lifted Id being cast to an unlifted type, the
-Id should be bound to bottom, otherwise this is an unsafe use of
-unsafeCoerce.  We can generate code to enter the Id and assume that
-it will never return.  Hence, we emit the usual enter/return code, and
-because bottom must be untagged, it will be entered.  The Sequel is a
-type-correct assignment, albeit bogus.  The (dead) continuation loops;
-it would be better to invoke some kind of panic function here.
--}
-cgCase scrut@(StgApp v []) _ (PrimAlt _) _
-  = do { dflags <- getDynFlags
-       ; mb_cc <- maybeSaveCostCentre True
-       ; _ <- withSequel
-                  (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
-       ; restoreCurrentCostCentre mb_cc
-       ; emitComment $ mkFastString "should be unreachable code"
-       ; l <- newBlockId
-       ; emitLabel l
-       ; emit (mkBranch l)  -- an infinite loop
-       ; return AssignedDirectly
-       }
-
-{- Note [Handle seq#]
-~~~~~~~~~~~~~~~~~~~~~
-See Note [seq# magic] in PrelRules.
-The special case for seq# in cgCase does this:
-
-  case seq# a s of v
-    (# s', a' #) -> e
-==>
-  case a of v
-    (# s', a' #) -> e
-
-(taking advantage of the fact that the return convention for (# State#, a #)
-is the same as the return convention for just 'a')
--}
-
-cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
-  = -- Note [Handle seq#]
-    -- And see Note [seq# magic] in PrelRules
-    -- Use the same return convention as vanilla 'a'.
-    cgCase (StgApp a []) bndr alt_type alts
-
-cgCase scrut bndr alt_type alts
-  = -- the general case
-    do { dflags <- getDynFlags
-       ; up_hp_usg <- getVirtHp        -- Upstream heap usage
-       ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
-             alt_regs  = map (idToReg dflags) ret_bndrs
-       ; simple_scrut <- isSimpleScrut scrut alt_type
-       ; let do_gc  | is_cmp_op scrut  = False  -- See Note [GC for conditionals]
-                    | not simple_scrut = True
-                    | isSingleton alts = False
-                    | up_hp_usg > 0    = False
-                    | otherwise        = True
-               -- cf Note [Compiling case expressions]
-             gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
-
-       ; mb_cc <- maybeSaveCostCentre simple_scrut
-
-       ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
-       ; ret_kind <- withSequel sequel (cgExpr scrut)
-       ; restoreCurrentCostCentre mb_cc
-       ; _ <- bindArgsToRegs ret_bndrs
-       ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
-       }
-  where
-    is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
-    is_cmp_op _                             = False
-
-{- Note [GC for conditionals]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-For boolean conditionals it seems that we have always done NoGcInAlts.
-That is, we have always done the GC check before the conditional.
-This is enshrined in the special case for
-   case tagToEnum# (a>b) of ...
-See Note [case on bool]
-
-It's odd, and it's flagrantly inconsistent with the rules described
-Note [Compiling case expressions].  However, after eliminating the
-tagToEnum# (#13397) we will have:
-   case (a>b) of ...
-Rather than make it behave quite differently, I am testing for a
-comparison operator here in in the general case as well.
-
-ToDo: figure out what the Right Rule should be.
-
-Note [scrut sequel]
-~~~~~~~~~~~~~~~~~~~
-The job of the scrutinee is to assign its value(s) to alt_regs.
-Additionally, if we plan to do a heap-check in the alternatives (see
-Note [Compiling case expressions]), then we *must* retreat Hp to
-recover any unused heap before passing control to the sequel.  If we
-don't do this, then any unused heap will become slop because the heap
-check will reset the heap usage. Slop in the heap breaks LDV profiling
-(+RTS -hb) which needs to do a linear sweep through the nursery.
-
-
-Note [Inlining out-of-line primops and heap checks]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If shouldInlinePrimOp returns True when called from StgCmmExpr for the
-purpose of heap check placement, we *must* inline the primop later in
-StgCmmPrim. If we don't things will go wrong.
--}
-
------------------
-maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
-maybeSaveCostCentre simple_scrut
-  | simple_scrut = return Nothing
-  | otherwise    = saveCurrentCostCentre
-
-
------------------
-isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
--- Simple scrutinee, does not block or allocate; hence safe to amalgamate
--- heap usage from alternatives into the stuff before the case
--- NB: if you get this wrong, and claim that the expression doesn't allocate
---     when it does, you'll deeply mess up allocation
-isSimpleScrut (StgOpApp op args _) _       = isSimpleOp op args
-isSimpleScrut (StgLit _)       _           = return True       -- case 1# of { 0# -> ..; ... }
-isSimpleScrut (StgApp _ [])    (PrimAlt _) = return True       -- case x# of { 0# -> ..; ... }
-isSimpleScrut _                _           = return False
-
-isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
--- True iff the op cannot block or allocate
-isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
--- dataToTag# evalautes its argument, see Note [dataToTag#] in primops.txt.pp
-isSimpleOp (StgPrimOp DataToTagOp) _ = return False
-isSimpleOp (StgPrimOp op) stg_args                  = do
-    arg_exprs <- getNonVoidArgAmodes stg_args
-    dflags <- getDynFlags
-    -- See Note [Inlining out-of-line primops and heap checks]
-    return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
-isSimpleOp (StgPrimCallOp _) _                           = return False
-
------------------
-chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
--- These are the binders of a case that are assigned by the evaluation of the
--- scrutinee.
--- They're non-void, see Note [Post-unarisation invariants] in UnariseStg.
-chooseReturnBndrs bndr (PrimAlt _) _alts
-  = assertNonVoidIds [bndr]
-
-chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
-  = ASSERT2(ids `lengthIs` n, ppr n $$ ppr ids $$ ppr _bndr)
-    assertNonVoidIds ids     -- 'bndr' is not assigned!
-
-chooseReturnBndrs bndr (AlgAlt _) _alts
-  = assertNonVoidIds [bndr]  -- Only 'bndr' is assigned
-
-chooseReturnBndrs bndr PolyAlt _alts
-  = assertNonVoidIds [bndr]  -- Only 'bndr' is assigned
-
-chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
-                             -- MultiValAlt has only one alternative
-
--------------------------------------
-cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt]
-       -> FCode ReturnKind
--- At this point the result of the case are in the binders
-cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
-  = maybeAltHeapCheck gc_plan (cgExpr rhs)
-
-cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
-  = maybeAltHeapCheck gc_plan (cgExpr rhs)
-        -- Here bndrs are *already* in scope, so don't rebind them
-
-cgAlts gc_plan bndr (PrimAlt _) alts
-  = do  { dflags <- getDynFlags
-
-        ; tagged_cmms <- cgAltRhss gc_plan bndr alts
-
-        ; let bndr_reg = CmmLocal (idToReg dflags bndr)
-              (DEFAULT,deflt) = head tagged_cmms
-                -- PrimAlts always have a DEFAULT case
-                -- and it always comes first
-
-              tagged_cmms' = [(lit,code)
-                             | (LitAlt lit, code) <- tagged_cmms]
-        ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
-        ; return AssignedDirectly }
-
-cgAlts gc_plan bndr (AlgAlt tycon) alts
-  = do  { dflags <- getDynFlags
-
-        ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
-
-        ; let fam_sz   = tyConFamilySize tycon
-              bndr_reg = CmmLocal (idToReg dflags bndr)
-
-                    -- Is the constructor tag in the node reg?
-        ; if isSmallFamily dflags fam_sz
-          then do
-                let   -- Yes, bndr_reg has constr. tag in ls bits
-                   tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
-                   branches' = [(tag+1,branch) | (tag,branch) <- branches]
-                emitSwitch tag_expr branches' mb_deflt 1 fam_sz
-
-           else -- No, get tag from info table
-                let -- Note that ptr _always_ has tag 1
-                    -- when the family size is big enough
-                    untagged_ptr = cmmRegOffB bndr_reg (-1)
-                    tag_expr = getConstrTag dflags (untagged_ptr)
-                in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
-
-        ; return AssignedDirectly }
-
-cgAlts _ _ _ _ = panic "cgAlts"
-        -- UbxTupAlt and PolyAlt have only one alternative
-
-
--- Note [alg-alt heap check]
---
--- In an algebraic case with more than one alternative, we will have
--- code like
---
--- L0:
---   x = R1
---   goto L1
--- L1:
---   if (x & 7 >= 2) then goto L2 else goto L3
--- L2:
---   Hp = Hp + 16
---   if (Hp > HpLim) then goto L4
---   ...
--- L4:
---   call gc() returns to L5
--- L5:
---   x = R1
---   goto L1
-
--------------------
-cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
-             -> FCode ( Maybe CmmAGraphScoped
-                      , [(ConTagZ, CmmAGraphScoped)] )
-cgAlgAltRhss gc_plan bndr alts
-  = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
-
-       ; let { mb_deflt = case tagged_cmms of
-                           ((DEFAULT,rhs) : _) -> Just rhs
-                           _other              -> Nothing
-                            -- DEFAULT is always first, if present
-
-              ; branches = [ (dataConTagZ con, cmm)
-                           | (DataAlt con, cmm) <- tagged_cmms ]
-              }
-
-       ; return (mb_deflt, branches)
-       }
-
-
--------------------
-cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
-          -> FCode [(AltCon, CmmAGraphScoped)]
-cgAltRhss gc_plan bndr alts = do
-  dflags <- getDynFlags
-  let
-    base_reg = idToReg dflags bndr
-    cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped)
-    cg_alt (con, bndrs, rhs)
-      = getCodeScoped             $
-        maybeAltHeapCheck gc_plan $
-        do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
-                    -- alt binders are always non-void,
-                    -- see Note [Post-unarisation invariants] in UnariseStg
-           ; _ <- cgExpr rhs
-           ; return con }
-  forkAlts (map cg_alt alts)
-
-maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
-maybeAltHeapCheck (NoGcInAlts,_)  code = code
-maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
-  altHeapCheck regs code
-maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
-  altHeapCheckReturnsTo regs lret off code
-
------------------------------------------------------------------------------
---      Tail calls
------------------------------------------------------------------------------
-
-cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
-cgConApp con stg_args
-  | isUnboxedTupleCon con       -- Unboxed tuple: assign and return
-  = do { arg_exprs <- getNonVoidArgAmodes stg_args
-       ; tickyUnboxedTupleReturn (length arg_exprs)
-       ; emitReturn arg_exprs }
-
-  | otherwise   --  Boxed constructors; allocate and return
-  = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args )
-    do  { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
-                                     currentCCS con (assertNonVoidStgArgs stg_args)
-                                     -- con args are always non-void,
-                                     -- see Note [Post-unarisation invariants] in UnariseStg
-                -- The first "con" says that the name bound to this
-                -- closure is "con", which is a bit of a fudge, but
-                -- it only affects profiling (hence the False)
-
-        ; emit =<< fcode_init
-        ; tickyReturnNewCon (length stg_args)
-        ; emitReturn [idInfoToAmode idinfo] }
-
-cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
-cgIdApp fun_id args = do
-    dflags         <- getDynFlags
-    fun_info       <- getCgIdInfo fun_id
-    self_loop_info <- getSelfLoop
-    let fun_arg     = StgVarArg fun_id
-        fun_name    = idName    fun_id
-        fun         = idInfoToAmode fun_info
-        lf_info     = cg_lf         fun_info
-        n_args      = length args
-        v_args      = length $ filter (isVoidTy . stgArgType) args
-        node_points dflags = nodeMustPointToIt dflags lf_info
-    case getCallMethod dflags fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
-            -- A value in WHNF, so we can just return it.
-        ReturnIt
-          | isVoidTy (idType fun_id) -> emitReturn []
-          | otherwise                -> emitReturn [fun]
-          -- ToDo: does ReturnIt guarantee tagged?
-
-        EnterIt -> ASSERT( null args )  -- Discarding arguments
-                   emitEnter fun
-
-        SlowCall -> do      -- A slow function call via the RTS apply routines
-                { tickySlowCall lf_info args
-                ; emitComment $ mkFastString "slowCall"
-                ; slowCall fun args }
-
-        -- A direct function call (possibly with some left-over arguments)
-        DirectEntry lbl arity -> do
-                { tickyDirectCall arity args
-                ; if node_points dflags
-                     then directCall NativeNodeCall   lbl arity (fun_arg:args)
-                     else directCall NativeDirectCall lbl arity args }
-
-        -- Let-no-escape call or self-recursive tail-call
-        JumpToIt blk_id lne_regs -> do
-          { adjustHpBackwards -- always do this before a tail-call
-          ; cmm_args <- getNonVoidArgAmodes args
-          ; emitMultiAssign lne_regs cmm_args
-          ; emit (mkBranch blk_id)
-          ; return AssignedDirectly }
-
--- Note [Self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Self-recursive tail calls can be optimized into a local jump in the same
--- way as let-no-escape bindings (see Note [What is a non-escaping let] in
--- stgSyn/CoreToStg.hs). Consider this:
---
--- foo.info:
---     a = R1  // calling convention
---     b = R2
---     goto L1
--- L1: ...
---     ...
--- ...
--- L2: R1 = x
---     R2 = y
---     call foo(R1,R2)
---
--- Instead of putting x and y into registers (or other locations required by the
--- calling convention) and performing a call we can put them into local
--- variables a and b and perform jump to L1:
---
--- foo.info:
---     a = R1
---     b = R2
---     goto L1
--- L1: ...
---     ...
--- ...
--- L2: a = x
---     b = y
---     goto L1
---
--- This can be done only when function is calling itself in a tail position
--- and only if the call passes number of parameters equal to function's arity.
--- Note that this cannot be performed if a function calls itself with a
--- continuation.
---
--- This in fact implements optimization known as "loopification". It was
--- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
--- by Krzysztof Woś, though we use different approach. Krzysztof performed his
--- optimization at the Cmm level, whereas we perform ours during code generation
--- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
--- generated in the first place.
---
--- Implementation is spread across a couple of places in the code:
---
---   * FCode monad stores additional information in its reader environment
---     (cgd_self_loop field). This information tells us which function can
---     tail call itself in an optimized way (it is the function currently
---     being compiled), what is the label of a loop header (L1 in example above)
---     and information about local registers in which we should arguments
---     before making a call (this would be a and b in example above).
---
---   * Whenever we are compiling a function, we set that information to reflect
---     the fact that function currently being compiled can be jumped to, instead
---     of called. This is done in closureCodyBody in StgCmmBind.
---
---   * We also have to emit a label to which we will be jumping. We make sure
---     that the label is placed after a stack check but before the heap
---     check. The reason is that making a recursive tail-call does not increase
---     the stack so we only need to check once. But it may grow the heap, so we
---     have to repeat the heap check in every self-call. This is done in
---     do_checks in StgCmmHeap.
---
---   * When we begin compilation of another closure we remove the additional
---     information from the environment. This is done by forkClosureBody
---     in StgCmmMonad. Other functions that duplicate the environment -
---     forkLneBody, forkAlts, codeOnly - duplicate that information. In other
---     words, we only need to clean the environment of the self-loop information
---     when compiling right hand side of a closure (binding).
---
---   * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
---     of call will be generated. getCallMethod decides to generate a self
---     recursive tail call when (a) environment stores information about
---     possible self tail-call; (b) that tail call is to a function currently
---     being compiled; (c) number of passed non-void arguments is equal to
---     function's arity. (d) loopification is turned on via -floopification
---     command-line option.
---
---   * Command line option to turn loopification on and off is implemented in
---     DynFlags.
---
---
--- Note [Void arguments in self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- State# tokens can get in the way of the loopification optimization as seen in
--- #11372. Consider this:
---
--- foo :: [a]
---     -> (a -> State# s -> (# State s, Bool #))
---     -> State# s
---     -> (# State# s, Maybe a #)
--- foo [] f s = (# s, Nothing #)
--- foo (x:xs) f s = case f x s of
---      (# s', b #) -> case b of
---          True -> (# s', Just x #)
---          False -> foo xs f s'
---
--- We would like to compile the call to foo as a local jump instead of a call
--- (see Note [Self-recursive tail calls]). However, the generated function has
--- an arity of 2 while we apply it to 3 arguments, one of them being of void
--- type. Thus, we mustn't count arguments of void type when checking whether
--- we can turn a call into a self-recursive jump.
---
-
-emitEnter :: CmmExpr -> FCode ReturnKind
-emitEnter fun = do
-  { dflags <- getDynFlags
-  ; adjustHpBackwards
-  ; sequel <- getSequel
-  ; updfr_off <- getUpdFrameOff
-  ; case sequel of
-      -- For a return, we have the option of generating a tag-test or
-      -- not.  If the value is tagged, we can return directly, which
-      -- is quicker than entering the value.  This is a code
-      -- size/speed trade-off: when optimising for speed rather than
-      -- size we could generate the tag test.
-      --
-      -- Right now, we do what the old codegen did, and omit the tag
-      -- test, just generating an enter.
-      Return -> do
-        { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
-        ; emit $ mkJump dflags NativeNodeCall entry
-                        [cmmUntag dflags fun] updfr_off
-        ; return AssignedDirectly
-        }
-
-      -- The result will be scrutinised in the sequel.  This is where
-      -- we generate a tag-test to avoid entering the closure if
-      -- possible.
-      --
-      -- The generated code will be something like this:
-      --
-      --    R1 = fun  -- copyout
-      --    if (fun & 7 != 0) goto Lret else goto Lcall
-      --  Lcall:
-      --    call [fun] returns to Lret
-      --  Lret:
-      --    fun' = R1  -- copyin
-      --    ...
-      --
-      -- Note in particular that the label Lret is used as a
-      -- destination by both the tag-test and the call.  This is
-      -- because Lret will necessarily be a proc-point, and we want to
-      -- ensure that we generate only one proc-point for this
-      -- sequence.
-      --
-      -- Furthermore, we tell the caller that we generated a native
-      -- return continuation by returning (ReturnedTo Lret off), so
-      -- that the continuation can be reused by the heap-check failure
-      -- code in the enclosing case expression.
-      --
-      AssignTo res_regs _ -> do
-       { lret <- newBlockId
-       ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
-       ; lcall <- newBlockId
-       ; updfr_off <- getUpdFrameOff
-       ; let area = Young lret
-       ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
-                                          [fun] updfr_off []
-         -- refer to fun via nodeReg after the copyout, to avoid having
-         -- both live simultaneously; this sometimes enables fun to be
-         -- inlined in the RHS of the R1 assignment.
-       ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
-             the_call = toCall entry (Just lret) updfr_off off outArgs regs
-       ; tscope <- getTickScope
-       ; emit $
-           copyout <*>
-           mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
-                     lret lcall Nothing <*>
-           outOfLine lcall (the_call,tscope) <*>
-           mkLabel lret tscope <*>
-           copyin
-       ; return (ReturnedTo lret off)
-       }
-  }
-
-------------------------------------------------------------------------
---              Ticks
-------------------------------------------------------------------------
-
--- | Generate Cmm code for a tick. Depending on the type of Tickish,
--- this will either generate actual Cmm instrumentation code, or
--- simply pass on the annotation as a @CmmTickish@.
-cgTick :: Tickish Id -> FCode ()
-cgTick tick
-  = do { dflags <- getDynFlags
-       ; case tick of
-           ProfNote   cc t p -> emitSetCCC cc t p
-           HpcTick    m n    -> emit (mkTickBox dflags m n)
-           SourceNote s n    -> emitTick $ SourceNote s n
-           _other            -> return () -- ignore
-       }
diff --git a/compiler/codeGen/StgCmmExtCode.hs b/compiler/codeGen/StgCmmExtCode.hs
deleted file mode 100644
index 1d35c3454e..0000000000
--- a/compiler/codeGen/StgCmmExtCode.hs
+++ /dev/null
@@ -1,252 +0,0 @@
-{-# LANGUAGE DeriveFunctor #-}
--- | Our extended FCode monad.
-
--- We add a mapping from names to CmmExpr, to support local variable names in
--- the concrete C-- code.  The unique supply of the underlying FCode monad
--- is used to grab a new unique for each local variable.
-
--- In C--, a local variable can be declared anywhere within a proc,
--- and it scopes from the beginning of the proc to the end.  Hence, we have
--- to collect declarations as we parse the proc, and feed the environment
--- back in circularly (to avoid a two-pass algorithm).
-
-module StgCmmExtCode (
-        CmmParse, unEC,
-        Named(..), Env,
-
-        loopDecls,
-        getEnv,
-
-        withName,
-        getName,
-
-        newLocal,
-        newLabel,
-        newBlockId,
-        newFunctionName,
-        newImport,
-        lookupLabel,
-        lookupName,
-
-        code,
-        emit, emitLabel, emitAssign, emitStore,
-        getCode, getCodeR, getCodeScoped,
-        emitOutOfLine,
-        withUpdFrameOff, getUpdFrameOff
-)
-
-where
-
-import GhcPrelude
-
-import qualified StgCmmMonad as F
-import StgCmmMonad (FCode, newUnique)
-
-import Cmm
-import CLabel
-import MkGraph
-
-import BlockId
-import DynFlags
-import FastString
-import Module
-import UniqFM
-import Unique
-import UniqSupply
-
-import Control.Monad (ap)
-
--- | The environment contains variable definitions or blockids.
-data Named
-        = VarN CmmExpr          -- ^ Holds CmmLit(CmmLabel ..) which gives the label type,
-                                --      eg, RtsLabel, ForeignLabel, CmmLabel etc.
-
-        | FunN   UnitId      -- ^ A function name from this package
-        | LabelN BlockId                -- ^ A blockid of some code or data.
-
--- | An environment of named things.
-type Env        = UniqFM Named
-
--- | Local declarations that are in scope during code generation.
-type Decls      = [(FastString,Named)]
-
--- | Does a computation in the FCode monad, with a current environment
---      and a list of local declarations. Returns the resulting list of declarations.
-newtype CmmParse a
-        = EC { unEC :: String -> Env -> Decls -> FCode (Decls, a) }
-    deriving (Functor)
-
-type ExtCode = CmmParse ()
-
-returnExtFC :: a -> CmmParse a
-returnExtFC a   = EC $ \_ _ s -> return (s, a)
-
-thenExtFC :: CmmParse a -> (a -> CmmParse b) -> CmmParse b
-thenExtFC (EC m) k = EC $ \c e s -> do (s',r) <- m c e s; unEC (k r) c e s'
-
-instance Applicative CmmParse where
-      pure = returnExtFC
-      (<*>) = ap
-
-instance Monad CmmParse where
-  (>>=) = thenExtFC
-
-instance MonadUnique CmmParse where
-  getUniqueSupplyM = code getUniqueSupplyM
-  getUniqueM = EC $ \_ _ decls -> do
-    u <- getUniqueM
-    return (decls, u)
-
-instance HasDynFlags CmmParse where
-    getDynFlags = EC (\_ _ d -> do dflags <- getDynFlags
-                                   return (d, dflags))
-
-
--- | Takes the variable decarations and imports from the monad
---      and makes an environment, which is looped back into the computation.
---      In this way, we can have embedded declarations that scope over the whole
---      procedure, and imports that scope over the entire module.
---      Discards the local declaration contained within decl'
---
-loopDecls :: CmmParse a -> CmmParse a
-loopDecls (EC fcode) =
-      EC $ \c e globalDecls -> do
-        (_, a) <- F.fixC $ \ ~(decls, _) ->
-          fcode c (addListToUFM e decls) globalDecls
-        return (globalDecls, a)
-
-
--- | Get the current environment from the monad.
-getEnv :: CmmParse Env
-getEnv  = EC $ \_ e s -> return (s, e)
-
--- | Get the current context name from the monad
-getName :: CmmParse String
-getName  = EC $ \c _ s -> return (s, c)
-
--- | Set context name for a sub-parse
-withName :: String -> CmmParse a -> CmmParse a
-withName c' (EC fcode) = EC $ \_ e s -> fcode c' e s
-
-addDecl :: FastString -> Named -> ExtCode
-addDecl name named = EC $ \_ _ s -> return ((name, named) : s, ())
-
-
--- | Add a new variable to the list of local declarations.
---      The CmmExpr says where the value is stored.
-addVarDecl :: FastString -> CmmExpr -> ExtCode
-addVarDecl var expr = addDecl var (VarN expr)
-
--- | Add a new label to the list of local declarations.
-addLabel :: FastString -> BlockId -> ExtCode
-addLabel name block_id = addDecl name (LabelN block_id)
-
-
--- | Create a fresh local variable of a given type.
-newLocal
-        :: CmmType              -- ^ data type
-        -> FastString           -- ^ name of variable
-        -> CmmParse LocalReg    -- ^ register holding the value
-
-newLocal ty name = do
-   u <- code newUnique
-   let reg = LocalReg u ty
-   addVarDecl name (CmmReg (CmmLocal reg))
-   return reg
-
-
--- | Allocate a fresh label.
-newLabel :: FastString -> CmmParse BlockId
-newLabel name = do
-   u <- code newUnique
-   addLabel name (mkBlockId u)
-   return (mkBlockId u)
-
--- | Add add a local function to the environment.
-newFunctionName
-        :: FastString   -- ^ name of the function
-        -> UnitId    -- ^ package of the current module
-        -> ExtCode
-
-newFunctionName name pkg = addDecl name (FunN pkg)
-
-
--- | Add an imported foreign label to the list of local declarations.
---      If this is done at the start of the module the declaration will scope
---      over the whole module.
-newImport
-        :: (FastString, CLabel)
-        -> CmmParse ()
-
-newImport (name, cmmLabel)
-   = addVarDecl name (CmmLit (CmmLabel cmmLabel))
-
-
--- | Lookup the BlockId bound to the label with this name.
---      If one hasn't been bound yet, create a fresh one based on the
---      Unique of the name.
-lookupLabel :: FastString -> CmmParse BlockId
-lookupLabel name = do
-  env <- getEnv
-  return $
-     case lookupUFM env name of
-        Just (LabelN l) -> l
-        _other          -> mkBlockId (newTagUnique (getUnique name) 'L')
-
-
--- | Lookup the location of a named variable.
---      Unknown names are treated as if they had been 'import'ed from the runtime system.
---      This saves us a lot of bother in the RTS sources, at the expense of
---      deferring some errors to link time.
-lookupName :: FastString -> CmmParse CmmExpr
-lookupName name = do
-  env    <- getEnv
-  return $
-     case lookupUFM env name of
-        Just (VarN e)   -> e
-        Just (FunN pkg) -> CmmLit (CmmLabel (mkCmmCodeLabel pkg          name))
-        _other          -> CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId name))
-
-
--- | Lift an FCode computation into the CmmParse monad
-code :: FCode a -> CmmParse a
-code fc = EC $ \_ _ s -> do
-                r <- fc
-                return (s, r)
-
-emit :: CmmAGraph -> CmmParse ()
-emit = code . F.emit
-
-emitLabel :: BlockId -> CmmParse ()
-emitLabel = code . F.emitLabel
-
-emitAssign :: CmmReg  -> CmmExpr -> CmmParse ()
-emitAssign l r = code (F.emitAssign l r)
-
-emitStore :: CmmExpr  -> CmmExpr -> CmmParse ()
-emitStore l r = code (F.emitStore l r)
-
-getCode :: CmmParse a -> CmmParse CmmAGraph
-getCode (EC ec) = EC $ \c e s -> do
-  ((s',_), gr) <- F.getCodeR (ec c e s)
-  return (s', gr)
-
-getCodeR :: CmmParse a -> CmmParse (a, CmmAGraph)
-getCodeR (EC ec) = EC $ \c e s -> do
-  ((s', r), gr) <- F.getCodeR (ec c e s)
-  return (s', (r,gr))
-
-getCodeScoped :: CmmParse a -> CmmParse (a, CmmAGraphScoped)
-getCodeScoped (EC ec) = EC $ \c e s -> do
-  ((s', r), gr) <- F.getCodeScoped (ec c e s)
-  return (s', (r,gr))
-
-emitOutOfLine :: BlockId -> CmmAGraphScoped -> CmmParse ()
-emitOutOfLine l g = code (F.emitOutOfLine l g)
-
-withUpdFrameOff :: UpdFrameOffset -> CmmParse () -> CmmParse ()
-withUpdFrameOff size inner
-  = EC $ \c e s -> F.withUpdFrameOff size $ (unEC inner) c e s
-
-getUpdFrameOff :: CmmParse UpdFrameOffset
-getUpdFrameOff = code $ F.getUpdFrameOff
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
deleted file mode 100644
index 172dcba219..0000000000
--- a/compiler/codeGen/StgCmmForeign.hs
+++ /dev/null
@@ -1,627 +0,0 @@
------------------------------------------------------------------------------
---
--- Code generation for foreign calls.
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmForeign (
-  cgForeignCall,
-  emitPrimCall, emitCCall,
-  emitForeignCall,     -- For CmmParse
-  emitSaveThreadState,
-  saveThreadState,
-  emitLoadThreadState,
-  loadThreadState,
-  emitOpenNursery,
-  emitCloseNursery,
- ) where
-
-import GhcPrelude hiding( succ, (<*>) )
-
-import StgSyn
-import StgCmmProf (storeCurCCS, ccsType)
-import StgCmmEnv
-import StgCmmMonad
-import StgCmmUtils
-import StgCmmClosure
-import StgCmmLayout
-
-import BlockId (newBlockId)
-import Cmm
-import CmmUtils
-import MkGraph
-import Type
-import RepType
-import CLabel
-import SMRep
-import ForeignCall
-import DynFlags
-import Maybes
-import Outputable
-import UniqSupply
-import BasicTypes
-
-import TyCoRep
-import TysPrim
-import Util (zipEqual)
-
-import Control.Monad
-
------------------------------------------------------------------------------
--- Code generation for Foreign Calls
------------------------------------------------------------------------------
-
--- | Emit code for a foreign call, and return the results to the sequel.
--- Precondition: the length of the arguments list is the same as the
--- arity of the foreign function.
-cgForeignCall :: ForeignCall            -- the op
-              -> Type                   -- type of foreign function
-              -> [StgArg]               -- x,y    arguments
-              -> Type                   -- result type
-              -> FCode ReturnKind
-
-cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
-  = do  { dflags <- getDynFlags
-        ; let -- in the stdcall calling convention, the symbol needs @size appended
-              -- to it, where size is the total number of bytes of arguments.  We
-              -- attach this info to the CLabel here, and the CLabel pretty printer
-              -- will generate the suffix when the label is printed.
-            call_size args
-              | StdCallConv <- cconv = Just (sum (map arg_size args))
-              | otherwise            = Nothing
-
-              -- ToDo: this might not be correct for 64-bit API
-            arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
-                                     (wORD_SIZE dflags)
-        ; cmm_args <- getFCallArgs stg_args typ
-        ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
-        ; let ((call_args, arg_hints), cmm_target)
-                = case target of
-                   StaticTarget _ _   _      False ->
-                       panic "cgForeignCall: unexpected FFI value import"
-                   StaticTarget _ lbl mPkgId True
-                     -> let labelSource
-                                = case mPkgId of
-                                        Nothing         -> ForeignLabelInThisPackage
-                                        Just pkgId      -> ForeignLabelInPackage pkgId
-                            size = call_size cmm_args
-                        in  ( unzip cmm_args
-                            , CmmLit (CmmLabel
-                                        (mkForeignLabel lbl size labelSource IsFunction)))
-
-                   DynamicTarget    ->  case cmm_args of
-                                           (fn,_):rest -> (unzip rest, fn)
-                                           [] -> panic "cgForeignCall []"
-              fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
-              call_target = ForeignTarget cmm_target fc
-
-        -- we want to emit code for the call, and then emitReturn.
-        -- However, if the sequel is AssignTo, we shortcut a little
-        -- and generate a foreign call that assigns the results
-        -- directly.  Otherwise we end up generating a bunch of
-        -- useless "r = r" assignments, which are not merely annoying:
-        -- they prevent the common block elimination from working correctly
-        -- in the case of a safe foreign call.
-        -- See Note [safe foreign call convention]
-        --
-        ; sequel <- getSequel
-        ; case sequel of
-            AssignTo assign_to_these _ ->
-                emitForeignCall safety assign_to_these call_target call_args
-
-            _something_else ->
-                do { _ <- emitForeignCall safety res_regs call_target call_args
-                   ; emitReturn (map (CmmReg . CmmLocal) res_regs)
-                   }
-         }
-
-{- Note [safe foreign call convention]
-
-The simple thing to do for a safe foreign call would be the same as an
-unsafe one: just
-
-    emitForeignCall ...
-    emitReturn ...
-
-but consider what happens in this case
-
-   case foo x y z of
-     (# s, r #) -> ...
-
-The sequel is AssignTo [r].  The call to newUnboxedTupleRegs picks [r]
-as the result reg, and we generate
-
-  r = foo(x,y,z) returns to L1  -- emitForeignCall
- L1:
-  r = r  -- emitReturn
-  goto L2
-L2:
-  ...
-
-Now L1 is a proc point (by definition, it is the continuation of the
-safe foreign call).  If L2 does a heap check, then L2 will also be a
-proc point.
-
-Furthermore, the stack layout algorithm has to arrange to save r
-somewhere between the call and the jump to L1, which is annoying: we
-would have to treat r differently from the other live variables, which
-have to be saved *before* the call.
-
-So we adopt a special convention for safe foreign calls: the results
-are copied out according to the NativeReturn convention by the call,
-and the continuation of the call should copyIn the results.  (The
-copyOut code is actually inserted when the safe foreign call is
-lowered later).  The result regs attached to the safe foreign call are
-only used temporarily to hold the results before they are copied out.
-
-We will now generate this:
-
-  r = foo(x,y,z) returns to L1
- L1:
-  r = R1  -- copyIn, inserted by mkSafeCall
-  goto L2
- L2:
-  ... r ...
-
-And when the safe foreign call is lowered later (see Note [lower safe
-foreign calls]) we get this:
-
-  suspendThread()
-  r = foo(x,y,z)
-  resumeThread()
-  R1 = r  -- copyOut, inserted by lowerSafeForeignCall
-  jump L1
- L1:
-  r = R1  -- copyIn, inserted by mkSafeCall
-  goto L2
- L2:
-  ... r ...
-
-Now consider what happens if L2 does a heap check: the Adams
-optimisation kicks in and commons up L1 with the heap-check
-continuation, resulting in just one proc point instead of two. Yay!
--}
-
-
-emitCCall :: [(CmmFormal,ForeignHint)]
-          -> CmmExpr
-          -> [(CmmActual,ForeignHint)]
-          -> FCode ()
-emitCCall hinted_results fn hinted_args
-  = void $ emitForeignCall PlayRisky results target args
-  where
-    (args, arg_hints) = unzip hinted_args
-    (results, result_hints) = unzip hinted_results
-    target = ForeignTarget fn fc
-    fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
-
-
-emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
-emitPrimCall res op args
-  = void $ emitForeignCall PlayRisky res (PrimTarget op) args
-
--- alternative entry point, used by CmmParse
-emitForeignCall
-        :: Safety
-        -> [CmmFormal]          -- where to put the results
-        -> ForeignTarget        -- the op
-        -> [CmmActual]          -- arguments
-        -> FCode ReturnKind
-emitForeignCall safety results target args
-  | not (playSafe safety) = do
-    dflags <- getDynFlags
-    let (caller_save, caller_load) = callerSaveVolatileRegs dflags
-    emit caller_save
-    target' <- load_target_into_temp target
-    args' <- mapM maybe_assign_temp args
-    emit $ mkUnsafeCall target' results args'
-    emit caller_load
-    return AssignedDirectly
-
-  | otherwise = do
-    dflags <- getDynFlags
-    updfr_off <- getUpdFrameOff
-    target' <- load_target_into_temp target
-    args' <- mapM maybe_assign_temp args
-    k <- newBlockId
-    let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
-       -- see Note [safe foreign call convention]
-    tscope <- getTickScope
-    emit $
-           (    mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
-                        (CmmLit (CmmBlock k))
-            <*> mkLast (CmmForeignCall { tgt  = target'
-                                       , res  = results
-                                       , args = args'
-                                       , succ = k
-                                       , ret_args = off
-                                       , ret_off = updfr_off
-                                       , intrbl = playInterruptible safety })
-            <*> mkLabel k tscope
-            <*> copyout
-           )
-    return (ReturnedTo k off)
-
-load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
-load_target_into_temp (ForeignTarget expr conv) = do
-  tmp <- maybe_assign_temp expr
-  return (ForeignTarget tmp conv)
-load_target_into_temp other_target@(PrimTarget _) =
-  return other_target
-
--- What we want to do here is create a new temporary for the foreign
--- call argument if it is not safe to use the expression directly,
--- because the expression mentions caller-saves GlobalRegs (see
--- Note [Register Parameter Passing]).
---
--- However, we can't pattern-match on the expression here, because
--- this is used in a loop by CmmParse, and testing the expression
--- results in a black hole.  So we always create a temporary, and rely
--- on CmmSink to clean it up later.  (Yuck, ToDo).  The generated code
--- ends up being the same, at least for the RTS .cmm code.
---
-maybe_assign_temp :: CmmExpr -> FCode CmmExpr
-maybe_assign_temp e = do
-  dflags <- getDynFlags
-  reg <- newTemp (cmmExprType dflags e)
-  emitAssign (CmmLocal reg) e
-  return (CmmReg (CmmLocal reg))
-
--- -----------------------------------------------------------------------------
--- Save/restore the thread state in the TSO
-
--- This stuff can't be done in suspendThread/resumeThread, because it
--- refers to global registers which aren't available in the C world.
-
-emitSaveThreadState :: FCode ()
-emitSaveThreadState = do
-  dflags <- getDynFlags
-  code <- saveThreadState dflags
-  emit code
-
--- | Produce code to save the current thread state to @CurrentTSO@
-saveThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
-saveThreadState dflags = do
-  tso <- newTemp (gcWord dflags)
-  close_nursery <- closeNursery dflags tso
-  pure $ catAGraphs [
-    -- tso = CurrentTSO;
-    mkAssign (CmmLocal tso) currentTSOExpr,
-    -- tso->stackobj->sp = Sp;
-    mkStore (cmmOffset dflags
-                       (CmmLoad (cmmOffset dflags
-                                           (CmmReg (CmmLocal tso))
-                                           (tso_stackobj dflags))
-                                (bWord dflags))
-                       (stack_SP dflags))
-            spExpr,
-    close_nursery,
-    -- and save the current cost centre stack in the TSO when profiling:
-    if gopt Opt_SccProfilingOn dflags then
-        mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) cccsExpr
-      else mkNop
-    ]
-
-emitCloseNursery :: FCode ()
-emitCloseNursery = do
-  dflags <- getDynFlags
-  tso <- newTemp (bWord dflags)
-  code <- closeNursery dflags tso
-  emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
-
-{- |
-@closeNursery dflags tso@ produces code to close the nursery.
-A local register holding the value of @CurrentTSO@ is expected for
-efficiency.
-
-Closing the nursery corresponds to the following code:
-
-@
-  tso = CurrentTSO;
-  cn = CurrentNuresry;
-
-  // Update the allocation limit for the current thread.  We don't
-  // check to see whether it has overflowed at this point, that check is
-  // made when we run out of space in the current heap block (stg_gc_noregs)
-  // and in the scheduler when context switching (schedulePostRunThread).
-  tso->alloc_limit -= Hp + WDS(1) - cn->start;
-
-  // Set cn->free to the next unoccupied word in the block
-  cn->free = Hp + WDS(1);
-@
--}
-closeNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
-closeNursery df tso = do
-  let tsoreg  = CmmLocal tso
-  cnreg      <- CmmLocal <$> newTemp (bWord df)
-  pure $ catAGraphs [
-    mkAssign cnreg currentNurseryExpr,
-
-    -- CurrentNursery->free = Hp+1;
-    mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df hpExpr 1),
-
-    let alloc =
-           CmmMachOp (mo_wordSub df)
-              [ cmmOffsetW df hpExpr 1
-              , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
-              ]
-
-        alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
-    in
-
-    -- tso->alloc_limit += alloc
-    mkStore alloc_limit (CmmMachOp (MO_Sub W64)
-                               [ CmmLoad alloc_limit b64
-                               , CmmMachOp (mo_WordTo64 df) [alloc] ])
-   ]
-
-emitLoadThreadState :: FCode ()
-emitLoadThreadState = do
-  dflags <- getDynFlags
-  code <- loadThreadState dflags
-  emit code
-
--- | Produce code to load the current thread state from @CurrentTSO@
-loadThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
-loadThreadState dflags = do
-  tso <- newTemp (gcWord dflags)
-  stack <- newTemp (gcWord dflags)
-  open_nursery <- openNursery dflags tso
-  pure $ catAGraphs [
-    -- tso = CurrentTSO;
-    mkAssign (CmmLocal tso) currentTSOExpr,
-    -- stack = tso->stackobj;
-    mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
-    -- Sp = stack->sp;
-    mkAssign spReg (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
-    -- SpLim = stack->stack + RESERVED_STACK_WORDS;
-    mkAssign spLimReg (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
-                                (rESERVED_STACK_WORDS dflags)),
-    -- HpAlloc = 0;
-    --   HpAlloc is assumed to be set to non-zero only by a failed
-    --   a heap check, see HeapStackCheck.cmm:GC_GENERIC
-    mkAssign hpAllocReg (zeroExpr dflags),
-    open_nursery,
-    -- and load the current cost centre stack from the TSO when profiling:
-    if gopt Opt_SccProfilingOn dflags
-       then storeCurCCS
-              (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
-                 (tso_CCCS dflags)) (ccsType dflags))
-       else mkNop
-   ]
-
-
-emitOpenNursery :: FCode ()
-emitOpenNursery = do
-  dflags <- getDynFlags
-  tso <- newTemp (bWord dflags)
-  code <- openNursery dflags tso
-  emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
-
-{- |
-@openNursery dflags tso@ produces code to open the nursery. A local register
-holding the value of @CurrentTSO@ is expected for efficiency.
-
-Opening the nursery corresponds to the following code:
-
-@
-   tso = CurrentTSO;
-   cn = CurrentNursery;
-   bdfree = CurrentNursery->free;
-   bdstart = CurrentNursery->start;
-
-   // We *add* the currently occupied portion of the nursery block to
-   // the allocation limit, because we will subtract it again in
-   // closeNursery.
-   tso->alloc_limit += bdfree - bdstart;
-
-   // Set Hp to the last occupied word of the heap block.  Why not the
-   // next unocupied word?  Doing it this way means that we get to use
-   // an offset of zero more often, which might lead to slightly smaller
-   // code on some architectures.
-   Hp = bdfree - WDS(1);
-
-   // Set HpLim to the end of the current nursery block (note that this block
-   // might be a block group, consisting of several adjacent blocks.
-   HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
-@
--}
-openNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
-openNursery df tso = do
-  let tsoreg =  CmmLocal tso
-  cnreg      <- CmmLocal <$> newTemp (bWord df)
-  bdfreereg  <- CmmLocal <$> newTemp (bWord df)
-  bdstartreg <- CmmLocal <$> newTemp (bWord df)
-
-  -- These assignments are carefully ordered to reduce register
-  -- pressure and generate not completely awful code on x86.  To see
-  -- what code we generate, look at the assembly for
-  -- stg_returnToStackTop in rts/StgStartup.cmm.
-  pure $ catAGraphs [
-     mkAssign cnreg currentNurseryExpr,
-     mkAssign bdfreereg  (CmmLoad (nursery_bdescr_free df cnreg)  (bWord df)),
-
-     -- Hp = CurrentNursery->free - 1;
-     mkAssign hpReg (cmmOffsetW df (CmmReg bdfreereg) (-1)),
-
-     mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
-
-     -- HpLim = CurrentNursery->start +
-     --              CurrentNursery->blocks*BLOCK_SIZE_W - 1;
-     mkAssign hpLimReg
-         (cmmOffsetExpr df
-             (CmmReg bdstartreg)
-             (cmmOffset df
-               (CmmMachOp (mo_wordMul df) [
-                 CmmMachOp (MO_SS_Conv W32 (wordWidth df))
-                   [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
-                 mkIntExpr df (bLOCK_SIZE df)
-                ])
-               (-1)
-             )
-         ),
-
-     -- alloc = bd->free - bd->start
-     let alloc =
-           CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
-
-         alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
-     in
-
-     -- tso->alloc_limit += alloc
-     mkStore alloc_limit (CmmMachOp (MO_Add W64)
-                               [ CmmLoad alloc_limit b64
-                               , CmmMachOp (mo_WordTo64 df) [alloc] ])
-
-   ]
-
-nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
-  :: DynFlags -> CmmReg -> CmmExpr
-nursery_bdescr_free   dflags cn =
-  cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
-nursery_bdescr_start  dflags cn =
-  cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
-nursery_bdescr_blocks dflags cn =
-  cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
-
-tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
-tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
-tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
-tso_CCCS     dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
-stack_STACK  dflags = closureField dflags (oFFSET_StgStack_stack dflags)
-stack_SP     dflags = closureField dflags (oFFSET_StgStack_sp dflags)
-
-
-closureField :: DynFlags -> ByteOff -> ByteOff
-closureField dflags off = off + fixedHdrSize dflags
-
--- Note [Unlifted boxed arguments to foreign calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- For certain types passed to foreign calls, we adjust the actual
--- value passed to the call.  For ByteArray#, Array#, SmallArray#,
--- and ArrayArray#, we pass the address of the array's payload, not
--- the address of the heap object. For example, consider
---   foreign import "c_foo" foo :: ByteArray# -> Int# -> IO ()
--- At a Haskell call like `foo x y`, we'll generate a C call that
--- is more like
---   c_foo( x+8, y )
--- where the "+8" takes the heap pointer (x :: ByteArray#) and moves
--- it past the header words of the ByteArray object to point directly
--- to the data inside the ByteArray#. (The exact offset depends
--- on the target architecture and on profiling) By contrast, (y :: Int#)
--- requires no such adjustment.
---
--- This adjustment is performed by 'add_shim'. The size of the
--- adjustment depends on the type of heap object. But
--- how can we determine that type? There are two available options.
--- We could use the types of the actual values that the foreign call
--- has been applied to, or we could use the types present in the
--- foreign function's type. Prior to GHC 8.10, we used the former
--- strategy since it's a little more simple. However, in issue #16650
--- and more compellingly in the comments of
--- https://gitlab.haskell.org/ghc/ghc/merge_requests/939, it was
--- demonstrated that this leads to bad behavior in the presence
--- of unsafeCoerce#. Returning to the above example, suppose the
--- Haskell call looked like
---   foo (unsafeCoerce# p)
--- where the types of expressions comprising the arguments are
---   p :: (Any :: TYPE 'UnliftedRep)
---   i :: Int#
--- so that the unsafe-coerce is between Any and ByteArray#.
--- These two types have the same kind (they are both represented by
--- a heap pointer) so no GC errors will occur if we do this unsafe coerce.
--- By the time this gets to the code generator the cast has been
--- discarded so we have
---   foo p y
--- But we *must* adjust the pointer to p by a ByteArray# shim,
--- *not* by an Any shim (the Any shim involves no offset at all).
---
--- To avoid this bad behavior, we adopt the second strategy: use
--- the types present in the foreign function's type.
--- In collectStgFArgTypes, we convert the foreign function's
--- type to a list of StgFArgType. Then, in add_shim, we interpret
--- these as numeric offsets.
-
-getFCallArgs ::
-     [StgArg]
-  -> Type -- the type of the foreign function
-  -> FCode [(CmmExpr, ForeignHint)]
--- (a) Drop void args
--- (b) Add foreign-call shim code
--- It's (b) that makes this differ from getNonVoidArgAmodes
--- Precondition: args and typs have the same length
--- See Note [Unlifted boxed arguments to foreign calls]
-getFCallArgs args typ
-  = do  { mb_cmms <- mapM get (zipEqual "getFCallArgs" args (collectStgFArgTypes typ))
-        ; return (catMaybes mb_cmms) }
-  where
-    get (arg,typ)
-      | null arg_reps
-      = return Nothing
-      | otherwise
-      = do { cmm <- getArgAmode (NonVoid arg)
-           ; dflags <- getDynFlags
-           ; return (Just (add_shim dflags typ cmm, hint)) }
-      where
-        arg_ty   = stgArgType arg
-        arg_reps = typePrimRep arg_ty
-        hint     = typeForeignHint arg_ty
-
--- The minimum amount of information needed to determine
--- the offset to apply to an argument to a foreign call.
--- See Note [Unlifted boxed arguments to foreign calls]
-data StgFArgType
-  = StgPlainType
-  | StgArrayType
-  | StgSmallArrayType
-  | StgByteArrayType
-
--- See Note [Unlifted boxed arguments to foreign calls]
-add_shim :: DynFlags -> StgFArgType -> CmmExpr -> CmmExpr
-add_shim dflags ty expr = case ty of
-  StgPlainType -> expr
-  StgArrayType -> cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
-  StgSmallArrayType -> cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
-  StgByteArrayType -> cmmOffsetB dflags expr (arrWordsHdrSize dflags)
-
--- From a function, extract information needed to determine
--- the offset of each argument when used as a C FFI argument.
--- See Note [Unlifted boxed arguments to foreign calls]
-collectStgFArgTypes :: Type -> [StgFArgType]
-collectStgFArgTypes = go []
-  where
-    -- Skip foralls
-    go bs (ForAllTy _ res) = go bs res
-    go bs (AppTy{}) = reverse bs
-    go bs (TyConApp{}) = reverse bs
-    go bs (LitTy{}) = reverse bs
-    go bs (TyVarTy{}) = reverse bs
-    go  _ (CastTy{}) = panic "myCollectTypeArgs: CastTy"
-    go  _ (CoercionTy{}) = panic "myCollectTypeArgs: CoercionTy"
-    go bs (FunTy {ft_arg = arg, ft_res=res}) =
-      go (typeToStgFArgType arg:bs) res
-
--- Choose the offset based on the type. For anything other
--- than an unlifted boxed type, there is no offset.
--- See Note [Unlifted boxed arguments to foreign calls]
-typeToStgFArgType :: Type -> StgFArgType
-typeToStgFArgType typ
-  | tycon == arrayPrimTyCon = StgArrayType
-  | tycon == mutableArrayPrimTyCon = StgArrayType
-  | tycon == arrayArrayPrimTyCon = StgArrayType
-  | tycon == mutableArrayArrayPrimTyCon = StgArrayType
-  | tycon == smallArrayPrimTyCon = StgSmallArrayType
-  | tycon == smallMutableArrayPrimTyCon = StgSmallArrayType
-  | tycon == byteArrayPrimTyCon = StgByteArrayType
-  | tycon == mutableByteArrayPrimTyCon = StgByteArrayType
-  | otherwise = StgPlainType
-  where
-  -- Should be a tycon app, since this is a foreign call. We look
-  -- through newtypes so the offset does not change if a user replaces
-  -- a type in a foreign function signature with a representationally
-  -- equivalent newtype.
-  tycon = tyConAppTyCon (unwrapType typ)
-
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
deleted file mode 100644
index da9e85f1e7..0000000000
--- a/compiler/codeGen/StgCmmHeap.hs
+++ /dev/null
@@ -1,680 +0,0 @@
------------------------------------------------------------------------------
---
--- Stg to C--: heap management functions
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmHeap (
-        getVirtHp, setVirtHp, setRealHp,
-        getHpRelOffset,
-
-        entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
-        heapStackCheckGen,
-        entryHeapCheck',
-
-        mkStaticClosureFields, mkStaticClosure,
-
-        allocDynClosure, allocDynClosureCmm, allocHeapClosure,
-        emitSetDynHdr
-    ) where
-
-import GhcPrelude hiding ((<*>))
-
-import StgSyn
-import CLabel
-import StgCmmLayout
-import StgCmmUtils
-import StgCmmMonad
-import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
-import StgCmmTicky
-import StgCmmClosure
-import StgCmmEnv
-
-import MkGraph
-
-import Hoopl.Label
-import SMRep
-import BlockId
-import Cmm
-import CmmUtils
-import CostCentre
-import IdInfo( CafInfo(..), mayHaveCafRefs )
-import Id ( Id )
-import Module
-import DynFlags
-import FastString( mkFastString, fsLit )
-import Panic( sorry )
-
-import Control.Monad (when)
-import Data.Maybe (isJust)
-
------------------------------------------------------------
---              Initialise dynamic heap objects
------------------------------------------------------------
-
-allocDynClosure
-        :: Maybe Id
-        -> CmmInfoTable
-        -> LambdaFormInfo
-        -> CmmExpr              -- Cost Centre to stick in the object
-        -> CmmExpr              -- Cost Centre to blame for this alloc
-                                -- (usually the same; sometimes "OVERHEAD")
-
-        -> [(NonVoid StgArg, VirtualHpOffset)]  -- Offsets from start of object
-                                                -- ie Info ptr has offset zero.
-                                                -- No void args in here
-        -> FCode CmmExpr -- returns Hp+n
-
-allocDynClosureCmm
-        :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
-        -> [(CmmExpr, ByteOff)]
-        -> FCode CmmExpr -- returns Hp+n
-
--- allocDynClosure allocates the thing in the heap,
--- and modifies the virtual Hp to account for this.
--- The second return value is the graph that sets the value of the
--- returned LocalReg, which should point to the closure after executing
--- the graph.
-
--- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
--- only valid until Hp is changed.  The caller should assign the
--- result to a LocalReg if it is required to remain live.
---
--- The reason we don't assign it to a LocalReg here is that the caller
--- is often about to call regIdInfo, which immediately assigns the
--- result of allocDynClosure to a new temp in order to add the tag.
--- So by not generating a LocalReg here we avoid a common source of
--- new temporaries and save some compile time.  This can be quite
--- significant - see test T4801.
-
-
-allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
-  let (args, offsets) = unzip args_w_offsets
-  cmm_args <- mapM getArgAmode args     -- No void args
-  allocDynClosureCmm mb_id info_tbl lf_info
-                     use_cc _blame_cc (zip cmm_args offsets)
-
-
-allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
-  -- SAY WHAT WE ARE ABOUT TO DO
-  let rep = cit_rep info_tbl
-  tickyDynAlloc mb_id rep lf_info
-  let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
-  allocHeapClosure rep info_ptr use_cc amodes_w_offsets
-
-
--- | Low-level heap object allocation.
-allocHeapClosure
-  :: SMRep                            -- ^ representation of the object
-  -> CmmExpr                          -- ^ info pointer
-  -> CmmExpr                          -- ^ cost centre
-  -> [(CmmExpr,ByteOff)]              -- ^ payload
-  -> FCode CmmExpr                    -- ^ returns the address of the object
-allocHeapClosure rep info_ptr use_cc payload = do
-  profDynAlloc rep use_cc
-
-  virt_hp <- getVirtHp
-
-  -- Find the offset of the info-ptr word
-  let info_offset = virt_hp + 1
-            -- info_offset is the VirtualHpOffset of the first
-            -- word of the new object
-            -- Remember, virtHp points to last allocated word,
-            -- ie 1 *before* the info-ptr word of new object.
-
-  base <- getHpRelOffset info_offset
-  emitComment $ mkFastString "allocHeapClosure"
-  emitSetDynHdr base info_ptr use_cc
-
-  -- Fill in the fields
-  hpStore base payload
-
-  -- Bump the virtual heap pointer
-  dflags <- getDynFlags
-  setVirtHp (virt_hp + heapClosureSizeW dflags rep)
-
-  return base
-
-
-emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-emitSetDynHdr base info_ptr ccs
-  = do dflags <- getDynFlags
-       hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
-  where
-    header :: DynFlags -> [CmmExpr]
-    header dflags = [info_ptr] ++ dynProfHdr dflags ccs
-        -- ToDo: Parallel stuff
-        -- No ticky header
-
--- Store the item (expr,off) in base[off]
-hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
-hpStore base vals = do
-  dflags <- getDynFlags
-  sequence_ $
-    [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
-
------------------------------------------------------------
---              Layout of static closures
------------------------------------------------------------
-
--- Make a static closure, adding on any extra padding needed for CAFs,
--- and adding a static link field if necessary.
-
-mkStaticClosureFields
-        :: DynFlags
-        -> CmmInfoTable
-        -> CostCentreStack
-        -> CafInfo
-        -> [CmmLit]             -- Payload
-        -> [CmmLit]             -- The full closure
-mkStaticClosureFields dflags info_tbl ccs caf_refs payload
-  = mkStaticClosure dflags info_lbl ccs payload padding
-        static_link_field saved_info_field
-  where
-    info_lbl = cit_lbl info_tbl
-
-    -- CAFs must have consistent layout, regardless of whether they
-    -- are actually updatable or not.  The layout of a CAF is:
-    --
-    --        3 saved_info
-    --        2 static_link
-    --        1 indirectee
-    --        0 info ptr
-    --
-    -- the static_link and saved_info fields must always be in the
-    -- same place.  So we use isThunkRep rather than closureUpdReqd
-    -- here:
-
-    is_caf = isThunkRep (cit_rep info_tbl)
-
-    padding
-        | is_caf && null payload = [mkIntCLit dflags 0]
-        | otherwise = []
-
-    static_link_field
-        | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
-        = [static_link_value]
-        | otherwise
-        = []
-
-    saved_info_field
-        | is_caf     = [mkIntCLit dflags 0]
-        | otherwise  = []
-
-        -- For a static constructor which has NoCafRefs, we set the
-        -- static link field to a non-zero value so the garbage
-        -- collector will ignore it.
-    static_link_value
-        | mayHaveCafRefs caf_refs  = mkIntCLit dflags 0
-        | otherwise                = mkIntCLit dflags 3  -- No CAF refs
-                                      -- See Note [STATIC_LINK fields]
-                                      -- in rts/sm/Storage.h
-
-mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
-  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
-mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
-  =  [CmmLabel info_lbl]
-  ++ staticProfHdr dflags ccs
-  ++ payload
-  ++ padding
-  ++ static_link_field
-  ++ saved_info_field
-
------------------------------------------------------------
---              Heap overflow checking
------------------------------------------------------------
-
-{- Note [Heap checks]
-   ~~~~~~~~~~~~~~~~~~
-Heap checks come in various forms.  We provide the following entry
-points to the runtime system, all of which use the native C-- entry
-convention.
-
-  * gc() performs garbage collection and returns
-    nothing to its caller
-
-  * A series of canned entry points like
-        r = gc_1p( r )
-    where r is a pointer.  This performs gc, and
-    then returns its argument r to its caller.
-
-  * A series of canned entry points like
-        gcfun_2p( f, x, y )
-    where f is a function closure of arity 2
-    This performs garbage collection, keeping alive the
-    three argument ptrs, and then tail-calls f(x,y)
-
-These are used in the following circumstances
-
-* entryHeapCheck: Function entry
-    (a) With a canned GC entry sequence
-        f( f_clo, x:ptr, y:ptr ) {
-             Hp = Hp+8
-             if Hp > HpLim goto L
-             ...
-          L: HpAlloc = 8
-             jump gcfun_2p( f_clo, x, y ) }
-     Note the tail call to the garbage collector;
-     it should do no register shuffling
-
-    (b) No canned sequence
-        f( f_clo, x:ptr, y:ptr, ...etc... ) {
-          T: Hp = Hp+8
-             if Hp > HpLim goto L
-             ...
-          L: HpAlloc = 8
-             call gc()  -- Needs an info table
-             goto T }
-
-* altHeapCheck: Immediately following an eval
-  Started as
-        case f x y of r { (p,q) -> rhs }
-  (a) With a canned sequence for the results of f
-       (which is the very common case since
-       all boxed cases return just one pointer
-           ...
-           r = f( x, y )
-        K:      -- K needs an info table
-           Hp = Hp+8
-           if Hp > HpLim goto L
-           ...code for rhs...
-
-        L: r = gc_1p( r )
-           goto K }
-
-        Here, the info table needed by the call
-        to gc_1p should be the *same* as the
-        one for the call to f; the C-- optimiser
-        spots this sharing opportunity)
-
-   (b) No canned sequence for results of f
-       Note second info table
-           ...
-           (r1,r2,r3) = call f( x, y )
-        K:
-           Hp = Hp+8
-           if Hp > HpLim goto L
-           ...code for rhs...
-
-        L: call gc()    -- Extra info table here
-           goto K
-
-* generalHeapCheck: Anywhere else
-  e.g. entry to thunk
-       case branch *not* following eval,
-       or let-no-escape
-  Exactly the same as the previous case:
-
-        K:      -- K needs an info table
-           Hp = Hp+8
-           if Hp > HpLim goto L
-           ...
-
-        L: call gc()
-           goto K
--}
-
---------------------------------------------------------------
--- A heap/stack check at a function or thunk entry point.
-
-entryHeapCheck :: ClosureInfo
-               -> Maybe LocalReg -- Function (closure environment)
-               -> Int            -- Arity -- not same as len args b/c of voids
-               -> [LocalReg]     -- Non-void args (empty for thunk)
-               -> FCode ()
-               -> FCode ()
-
-entryHeapCheck cl_info nodeSet arity args code
-  = entryHeapCheck' is_fastf node arity args code
-  where
-    node = case nodeSet of
-              Just r  -> CmmReg (CmmLocal r)
-              Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
-
-    is_fastf = case closureFunInfo cl_info of
-                 Just (_, ArgGen _) -> False
-                 _otherwise         -> True
-
--- | lower-level version for CmmParse
-entryHeapCheck' :: Bool           -- is a known function pattern
-                -> CmmExpr        -- expression for the closure pointer
-                -> Int            -- Arity -- not same as len args b/c of voids
-                -> [LocalReg]     -- Non-void args (empty for thunk)
-                -> FCode ()
-                -> FCode ()
-entryHeapCheck' is_fastf node arity args code
-  = do dflags <- getDynFlags
-       let is_thunk = arity == 0
-
-           args' = map (CmmReg . CmmLocal) args
-           stg_gc_fun    = CmmReg (CmmGlobal GCFun)
-           stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
-
-           {- Thunks:          jump stg_gc_enter_1
-
-              Function (fast): call (NativeNode) stg_gc_fun(fun, args)
-
-              Function (slow): call (slow) stg_gc_fun(fun, args)
-           -}
-           gc_call upd
-               | is_thunk
-                 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
-
-               | is_fastf
-                 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
-
-               | otherwise
-                 = mkJump dflags Slow stg_gc_fun (node : args') upd
-
-       updfr_sz <- getUpdFrameOff
-
-       loop_id <- newBlockId
-       emitLabel loop_id
-       heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
-
--- ------------------------------------------------------------
--- A heap/stack check in a case alternative
-
-
--- If there are multiple alts and we need to GC, but don't have a
--- continuation already (the scrut was simple), then we should
--- pre-generate the continuation.  (if there are multiple alts it is
--- always a canned GC point).
-
--- altHeapCheck:
--- If we have a return continuation,
---   then if it is a canned GC pattern,
---           then we do mkJumpReturnsTo
---           else we do a normal call to stg_gc_noregs
---   else if it is a canned GC pattern,
---           then generate the continuation and do mkCallReturnsTo
---           else we do a normal call to stg_gc_noregs
-
-altHeapCheck :: [LocalReg] -> FCode a -> FCode a
-altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
-
-altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
-altOrNoEscapeHeapCheck checkYield regs code = do
-    dflags <- getDynFlags
-    case cannedGCEntryPoint dflags regs of
-      Nothing -> genericGC checkYield code
-      Just gc -> do
-        lret <- newBlockId
-        let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
-        lcont <- newBlockId
-        tscope <- getTickScope
-        emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
-        emitLabel lcont
-        cannedGCReturnsTo checkYield False gc regs lret off code
-
-altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
-altHeapCheckReturnsTo regs lret off code
-  = do dflags <- getDynFlags
-       case cannedGCEntryPoint dflags regs of
-           Nothing -> genericGC False code
-           Just gc -> cannedGCReturnsTo False True gc regs lret off code
-
--- noEscapeHeapCheck is implemented identically to altHeapCheck (which
--- is more efficient), but cannot be optimized away in the non-allocating
--- case because it may occur in a loop
-noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
-noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
-
-cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
-                  -> FCode a
-                  -> FCode a
-cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
-  = do dflags <- getDynFlags
-       updfr_sz <- getUpdFrameOff
-       heapCheck False checkYield (gc_call dflags gc updfr_sz) code
-  where
-    reg_exprs = map (CmmReg . CmmLocal) regs
-      -- Note [stg_gc arguments]
-
-      -- NB. we use the NativeReturn convention for passing arguments
-      -- to the canned heap-check routines, because we are in a case
-      -- alternative and hence the [LocalReg] was passed to us in the
-      -- NativeReturn convention.
-    gc_call dflags label sp
-      | cont_on_stack
-      = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
-      | otherwise
-      = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
-
-genericGC :: Bool -> FCode a -> FCode a
-genericGC checkYield code
-  = do updfr_sz <- getUpdFrameOff
-       lretry <- newBlockId
-       emitLabel lretry
-       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
-       heapCheck False checkYield (call <*> mkBranch lretry) code
-
-cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
-cannedGCEntryPoint dflags regs
-  = case map localRegType regs of
-      []  -> Just (mkGcLabel "stg_gc_noregs")
-      [ty]
-          | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
-          | isFloatType ty -> case width of
-                                  W32       -> Just (mkGcLabel "stg_gc_f1")
-                                  W64       -> Just (mkGcLabel "stg_gc_d1")
-                                  _         -> Nothing
-
-          | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
-          | width == W64              -> Just (mkGcLabel "stg_gc_l1")
-          | otherwise                 -> Nothing
-          where
-              width = typeWidth ty
-      [ty1,ty2]
-          |  isGcPtrType ty1
-          && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
-      [ty1,ty2,ty3]
-          |  isGcPtrType ty1
-          && isGcPtrType ty2
-          && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
-      [ty1,ty2,ty3,ty4]
-          |  isGcPtrType ty1
-          && isGcPtrType ty2
-          && isGcPtrType ty3
-          && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
-      _otherwise -> Nothing
-
--- Note [stg_gc arguments]
--- It might seem that we could avoid passing the arguments to the
--- stg_gc function, because they are already in the right registers.
--- While this is usually the case, it isn't always.  Sometimes the
--- code generator has cleverly avoided the eval in a case, e.g. in
--- ffi/should_run/4221.hs we found
---
---   case a_r1mb of z
---     FunPtr x y -> ...
---
--- where a_r1mb is bound a top-level constructor, and is known to be
--- evaluated.  The codegen just assigns x, y and z, and continues;
--- R1 is never assigned.
---
--- So we'll have to rely on optimisations to eliminatethese
--- assignments where possible.
-
-
--- | The generic GC procedure; no params, no results
-generic_gc :: CmmExpr
-generic_gc = mkGcLabel "stg_gc_noregs"
-
--- | Create a CLabel for calling a garbage collector entry point
-mkGcLabel :: String -> CmmExpr
-mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
-
--------------------------------
-heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
-heapCheck checkStack checkYield do_gc code
-  = getHeapUsage $ \ hpHw ->
-    -- Emit heap checks, but be sure to do it lazily so
-    -- that the conditionals on hpHw don't cause a black hole
-    do  { dflags <- getDynFlags
-        ; let mb_alloc_bytes
-                 | hpHw > mBLOCK_SIZE = sorry $ unlines
-                    [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
-                     "",
-                     "This is currently not possible due to a limitation of GHC's code generator.",
-                     "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.",
-                     "Suggestion: read data from a file instead of having large static data",
-                     "structures in code."]
-                 | hpHw > 0  = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
-                 | otherwise = Nothing
-                 where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags
-              stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
-                      | otherwise  = Nothing
-        ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
-        ; tickyAllocHeap True hpHw
-        ; setRealHp hpHw
-        ; code }
-
-heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
-heapStackCheckGen stk_hwm mb_bytes
-  = do updfr_sz <- getUpdFrameOff
-       lretry <- newBlockId
-       emitLabel lretry
-       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
-       do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
-
--- Note [Single stack check]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~
--- When compiling a function we can determine how much stack space it
--- will use. We therefore need to perform only a single stack check at
--- the beginning of a function to see if we have enough stack space.
---
--- The check boils down to comparing Sp-N with SpLim, where N is the
--- amount of stack space needed (see Note [Stack usage] below).  *BUT*
--- at this stage of the pipeline we are not supposed to refer to Sp
--- itself, because the stack is not yet manifest, so we don't quite
--- know where Sp pointing.
-
--- So instead of referring directly to Sp - as we used to do in the
--- past - the code generator uses (old + 0) in the stack check. That
--- is the address of the first word of the old area, so if we add N
--- we'll get the address of highest used word.
---
--- This makes the check robust.  For example, while we need to perform
--- only one stack check for each function, we could in theory place
--- more stack checks later in the function. They would be redundant,
--- but not incorrect (in a sense that they should not change program
--- behaviour). We need to make sure however that a stack check
--- inserted after incrementing the stack pointer checks for a
--- respectively smaller stack space. This would not be the case if the
--- code generator produced direct references to Sp. By referencing
--- (old + 0) we make sure that we always check for a correct amount of
--- stack: when converting (old + 0) to Sp the stack layout phase takes
--- into account changes already made to stack pointer. The idea for
--- this change came from observations made while debugging #8275.
-
--- Note [Stack usage]
--- ~~~~~~~~~~~~~~~~~~
--- At the moment we convert from STG to Cmm we don't know N, the
--- number of bytes of stack that the function will use, so we use a
--- special late-bound CmmLit, namely
---       CmmHighStackMark
--- to stand for the number of bytes needed. When the stack is made
--- manifest, the number of bytes needed is calculated, and used to
--- replace occurrences of CmmHighStackMark
---
--- The (Maybe CmmExpr) passed to do_checks is usually
---     Just (CmmLit CmmHighStackMark)
--- but can also (in certain hand-written RTS functions)
---     Just (CmmLit 8)  or some other fixed valuet
--- If it is Nothing, we don't generate a stack check at all.
-
-do_checks :: Maybe CmmExpr    -- Should we check the stack?
-                              -- See Note [Stack usage]
-          -> Bool             -- Should we check for preemption?
-          -> Maybe CmmExpr    -- Heap headroom (bytes)
-          -> CmmAGraph        -- What to do on failure
-          -> FCode ()
-do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
-  dflags <- getDynFlags
-  gc_id <- newBlockId
-
-  let
-    Just alloc_lit = mb_alloc_lit
-
-    bump_hp   = cmmOffsetExprB dflags hpExpr alloc_lit
-
-    -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
-    -- At the beginning of a function old + 0 = Sp
-    -- See Note [Single stack check]
-    sp_oflo sp_hwm =
-         CmmMachOp (mo_wordULt dflags)
-                  [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
-                             [CmmStackSlot Old 0, sp_hwm],
-                   CmmReg spLimReg]
-
-    -- Hp overflow if (Hp > HpLim)
-    -- (Hp has been incremented by now)
-    -- HpLim points to the LAST WORD of valid allocation space.
-    hp_oflo = CmmMachOp (mo_wordUGt dflags) [hpExpr, hpLimExpr]
-
-    alloc_n = mkAssign hpAllocReg alloc_lit
-
-  case mb_stk_hwm of
-    Nothing -> return ()
-    Just stk_hwm -> tickyStackCheck
-      >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
-
-  -- Emit new label that might potentially be a header
-  -- of a self-recursive tail call.
-  -- See Note [Self-recursive loop header].
-  self_loop_info <- getSelfLoop
-  case self_loop_info of
-    Just (_, loop_header_id, _)
-        | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
-    _otherwise -> return ()
-
-  if (isJust mb_alloc_lit)
-    then do
-     tickyHeapCheck
-     emitAssign hpReg bump_hp
-     emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False)
-    else do
-      when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
-         -- Yielding if HpLim == 0
-         let yielding = CmmMachOp (mo_wordEq dflags)
-                                  [CmmReg hpLimReg,
-                                   CmmLit (zeroCLit dflags)]
-         emit =<< mkCmmIfGoto' yielding gc_id (Just False)
-
-  tscope <- getTickScope
-  emitOutOfLine gc_id
-   (do_gc, tscope) -- this is expected to jump back somewhere
-
-                -- Test for stack pointer exhaustion, then
-                -- bump heap pointer, and test for heap exhaustion
-                -- Note that we don't move the heap pointer unless the
-                -- stack check succeeds.  Otherwise we might end up
-                -- with slop at the end of the current block, which can
-                -- confuse the LDV profiler.
-
--- Note [Self-recursive loop header]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Self-recursive loop header is required by loopification optimization (See
--- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
---
---  1. There is information about self-loop in the FCode environment. We don't
---     check the binder (first component of the self_loop_info) because we are
---     certain that if the self-loop info is present then we are compiling the
---     binder body. Reason: the only possible way to get here with the
---     self_loop_info present is from closureCodeBody.
---
---  2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
---     to preempt the heap check (see #367 for motivation behind this check). It
---     is True for heap checks placed at the entry to a function and
---     let-no-escape heap checks but false for other heap checks (eg. in case
---     alternatives or created from hand-written high-level Cmm). The second
---     check (isJust mb_stk_hwm) is true for heap checks at the entry to a
---     function and some heap checks created in hand-written Cmm. Otherwise it
---     is Nothing. In other words the only situation when both conditions are
---     true is when compiling stack and heap checks at the entry to a
---     function. This is the only situation when we want to emit a self-loop
---     label.
diff --git a/compiler/codeGen/StgCmmHpc.hs b/compiler/codeGen/StgCmmHpc.hs
deleted file mode 100644
index 8e9676bd33..0000000000
--- a/compiler/codeGen/StgCmmHpc.hs
+++ /dev/null
@@ -1,48 +0,0 @@
------------------------------------------------------------------------------
---
--- Code generation for coverage
---
--- (c) Galois Connections, Inc. 2006
---
------------------------------------------------------------------------------
-
-module StgCmmHpc ( initHpc, mkTickBox ) where
-
-import GhcPrelude
-
-import StgCmmMonad
-
-import MkGraph
-import CmmExpr
-import CLabel
-import Module
-import CmmUtils
-import StgCmmUtils
-import HscTypes
-import DynFlags
-
-import Control.Monad
-
-mkTickBox :: DynFlags -> Module -> Int -> CmmAGraph
-mkTickBox dflags mod n
-  = mkStore tick_box (CmmMachOp (MO_Add W64)
-                                [ CmmLoad tick_box b64
-                                , CmmLit (CmmInt 1 W64)
-                                ])
-  where
-    tick_box = cmmIndex dflags W64
-                        (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod)
-                        n
-
-initHpc :: Module -> HpcInfo -> FCode ()
--- Emit top-level tables for HPC and return code to initialise
-initHpc _ (NoHpcInfo {})
-  = return ()
-initHpc this_mod (HpcInfo tickCount _hashNo)
-  = do dflags <- getDynFlags
-       when (gopt Opt_Hpc dflags) $
-           do emitDataLits (mkHpcTicksLabel this_mod)
-                           [ (CmmInt 0 W64)
-                           | _ <- take tickCount [0 :: Int ..]
-                           ]
-
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
deleted file mode 100644
index 78a7cf3f85..0000000000
--- a/compiler/codeGen/StgCmmLayout.hs
+++ /dev/null
@@ -1,623 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Building info tables.
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmLayout (
-        mkArgDescr,
-        emitCall, emitReturn, adjustHpBackwards,
-
-        emitClosureProcAndInfoTable,
-        emitClosureAndInfoTable,
-
-        slowCall, directCall,
-
-        FieldOffOrPadding(..),
-        ClosureHeader(..),
-        mkVirtHeapOffsets,
-        mkVirtHeapOffsetsWithPadding,
-        mkVirtConstrOffsets,
-        mkVirtConstrSizes,
-        getHpRelOffset,
-
-        ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
-  ) where
-
-
-#include "HsVersions.h"
-
-import GhcPrelude hiding ((<*>))
-
-import StgCmmClosure
-import StgCmmEnv
-import StgCmmArgRep -- notably: ( slowCallPattern )
-import StgCmmTicky
-import StgCmmMonad
-import StgCmmUtils
-
-import MkGraph
-import SMRep
-import BlockId
-import Cmm
-import CmmUtils
-import CmmInfo
-import CLabel
-import StgSyn
-import Id
-import TyCon             ( PrimRep(..), primRepSizeB )
-import BasicTypes        ( RepArity )
-import DynFlags
-import Module
-
-import Util
-import Data.List
-import Outputable
-import FastString
-import Control.Monad
-
-------------------------------------------------------------------------
---                Call and return sequences
-------------------------------------------------------------------------
-
--- | Return multiple values to the sequel
---
--- If the sequel is @Return@
---
--- >     return (x,y)
---
--- If the sequel is @AssignTo [p,q]@
---
--- >    p=x; q=y;
---
-emitReturn :: [CmmExpr] -> FCode ReturnKind
-emitReturn results
-  = do { dflags    <- getDynFlags
-       ; sequel    <- getSequel
-       ; updfr_off <- getUpdFrameOff
-       ; case sequel of
-           Return ->
-             do { adjustHpBackwards
-                ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
-                ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
-                }
-           AssignTo regs adjust ->
-             do { when adjust adjustHpBackwards
-                ; emitMultiAssign  regs results }
-       ; return AssignedDirectly
-       }
-
-
--- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
--- using the call/return convention @conv@, passing @args@, and
--- returning the results to the current sequel.
---
-emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
-emitCall convs fun args
-  = emitCallWithExtraStack convs fun args noExtraStack
-
-
--- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
--- entry-code of @fun@, using the call/return convention @conv@,
--- passing @args@, pushing some extra stack frames described by
--- @stack@, and returning the results to the current sequel.
---
-emitCallWithExtraStack
-   :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
-   -> [CmmExpr] -> FCode ReturnKind
-emitCallWithExtraStack (callConv, retConv) fun args extra_stack
-  = do  { dflags <- getDynFlags
-        ; adjustHpBackwards
-        ; sequel <- getSequel
-        ; updfr_off <- getUpdFrameOff
-        ; case sequel of
-            Return -> do
-              emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
-              return AssignedDirectly
-            AssignTo res_regs _ -> do
-              k <- newBlockId
-              let area = Young k
-                  (off, _, copyin) = copyInOflow dflags retConv area res_regs []
-                  copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
-                                   extra_stack
-              tscope <- getTickScope
-              emit (copyout <*> mkLabel k tscope <*> copyin)
-              return (ReturnedTo k off)
-      }
-
-
-adjustHpBackwards :: FCode ()
--- This function adjusts the heap pointer just before a tail call or
--- return.  At a call or return, the virtual heap pointer may be less
--- than the real Hp, because the latter was advanced to deal with
--- the worst-case branch of the code, and we may be in a better-case
--- branch.  In that case, move the real Hp *back* and retract some
--- ticky allocation count.
---
--- It *does not* deal with high-water-mark adjustment.  That's done by
--- functions which allocate heap.
-adjustHpBackwards
-  = do  { hp_usg <- getHpUsage
-        ; let rHp = realHp hp_usg
-              vHp = virtHp hp_usg
-              adjust_words = vHp -rHp
-        ; new_hp <- getHpRelOffset vHp
-
-        ; emit (if adjust_words == 0
-                then mkNop
-                else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
-
-        ; tickyAllocHeap False adjust_words -- ...ditto
-
-        ; setRealHp vHp
-        }
-
-
--------------------------------------------------------------------------
---        Making calls: directCall and slowCall
--------------------------------------------------------------------------
-
--- General plan is:
---   - we'll make *one* fast call, either to the function itself
---     (directCall) or to stg_ap_<pat>_fast (slowCall)
---     Any left-over arguments will be pushed on the stack,
---
---     e.g. Sp[old+8]  = arg1
---          Sp[old+16] = arg2
---          Sp[old+32] = stg_ap_pp_info
---          R2 = arg3
---          R3 = arg4
---          call f() return to Nothing updfr_off: 32
-
-
-directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
--- (directCall f n args)
--- calls f(arg1, ..., argn), and applies the result to the remaining args
--- The function f has arity n, and there are guaranteed at least n args
--- Both arity and args include void args
-directCall conv lbl arity stg_args
-  = do  { argreps <- getArgRepsAmodes stg_args
-        ; direct_call "directCall" conv lbl arity argreps }
-
-
-slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
--- (slowCall fun args) applies fun to args, returning the results to Sequel
-slowCall fun stg_args
-  = do  dflags <- getDynFlags
-        argsreps <- getArgRepsAmodes stg_args
-        let (rts_fun, arity) = slowCallPattern (map fst argsreps)
-
-        (r, slow_code) <- getCodeR $ do
-           r <- direct_call "slow_call" NativeNodeCall
-                 (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
-           emitComment $ mkFastString ("slow_call for " ++
-                                      showSDoc dflags (ppr fun) ++
-                                      " with pat " ++ unpackFS rts_fun)
-           return r
-
-        -- Note [avoid intermediate PAPs]
-        let n_args = length stg_args
-        if n_args > arity && optLevel dflags >= 2
-           then do
-             funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun
-             fun_iptr <- (CmmReg . CmmLocal) `fmap`
-                    assignTemp (closureInfoPtr dflags (cmmUntag dflags funv))
-
-             -- ToDo: we could do slightly better here by reusing the
-             -- continuation from the slow call, which we have in r.
-             -- Also we'd like to push the continuation on the stack
-             -- before the branch, so that we only get one copy of the
-             -- code that saves all the live variables across the
-             -- call, but that might need some improvements to the
-             -- special case in the stack layout code to handle this
-             -- (see Note [diamond proc point]).
-
-             fast_code <- getCode $
-                emitCall (NativeNodeCall, NativeReturn)
-                  (entryCode dflags fun_iptr)
-                  (nonVArgs ((P,Just funv):argsreps))
-
-             slow_lbl <- newBlockId
-             fast_lbl <- newBlockId
-             is_tagged_lbl <- newBlockId
-             end_lbl <- newBlockId
-
-             let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr)
-                                                  (mkIntExpr dflags n_args)
-
-             tscope <- getTickScope
-             emit (mkCbranch (cmmIsTagged dflags funv)
-                             is_tagged_lbl slow_lbl (Just True)
-                   <*> mkLabel is_tagged_lbl tscope
-                   <*> mkCbranch correct_arity fast_lbl slow_lbl (Just True)
-                   <*> mkLabel fast_lbl tscope
-                   <*> fast_code
-                   <*> mkBranch end_lbl
-                   <*> mkLabel slow_lbl tscope
-                   <*> slow_code
-                   <*> mkLabel end_lbl tscope)
-             return r
-
-           else do
-             emit slow_code
-             return r
-
-
--- Note [avoid intermediate PAPs]
---
--- A slow call which needs multiple generic apply patterns will be
--- almost guaranteed to create one or more intermediate PAPs when
--- applied to a function that takes the correct number of arguments.
--- We try to avoid this situation by generating code to test whether
--- we are calling a function with the correct number of arguments
--- first, i.e.:
---
---   if (TAG(f) != 0} {  // f is not a thunk
---      if (f->info.arity == n) {
---         ... make a fast call to f ...
---      }
---   }
---   ... otherwise make the slow call ...
---
--- We *only* do this when the call requires multiple generic apply
--- functions, which requires pushing extra stack frames and probably
--- results in intermediate PAPs.  (I say probably, because it might be
--- that we're over-applying a function, but that seems even less
--- likely).
---
--- This very rarely applies, but if it does happen in an inner loop it
--- can have a severe impact on performance (#6084).
-
-
---------------
-direct_call :: String
-            -> Convention     -- e.g. NativeNodeCall or NativeDirectCall
-            -> CLabel -> RepArity
-            -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
-direct_call caller call_conv lbl arity args
-  | debugIsOn && args `lengthLessThan` real_arity  -- Too few args
-  = do -- Caller should ensure that there enough args!
-       pprPanic "direct_call" $
-            text caller <+> ppr arity <+>
-            ppr lbl <+> ppr (length args) <+>
-            ppr (map snd args) <+> ppr (map fst args)
-
-  | null rest_args  -- Precisely the right number of arguments
-  = emitCall (call_conv, NativeReturn) target (nonVArgs args)
-
-  | otherwise       -- Note [over-saturated calls]
-  = do dflags <- getDynFlags
-       emitCallWithExtraStack (call_conv, NativeReturn)
-                              target
-                              (nonVArgs fast_args)
-                              (nonVArgs (stack_args dflags))
-  where
-    target = CmmLit (CmmLabel lbl)
-    (fast_args, rest_args) = splitAt real_arity args
-    stack_args dflags = slowArgs dflags rest_args
-    real_arity = case call_conv of
-                   NativeNodeCall -> arity+1
-                   _              -> arity
-
-
--- When constructing calls, it is easier to keep the ArgReps and the
--- CmmExprs zipped together.  However, a void argument has no
--- representation, so we need to use Maybe CmmExpr (the alternative of
--- using zeroCLit or even undefined would work, but would be ugly).
---
-getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
-getArgRepsAmodes = mapM getArgRepAmode
-  where getArgRepAmode arg
-           | V <- rep  = return (V, Nothing)
-           | otherwise = do expr <- getArgAmode (NonVoid arg)
-                            return (rep, Just expr)
-           where rep = toArgRep (argPrimRep arg)
-
-nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
-nonVArgs [] = []
-nonVArgs ((_,Nothing)  : args) = nonVArgs args
-nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
-
-{-
-Note [over-saturated calls]
-
-The natural thing to do for an over-saturated call would be to call
-the function with the correct number of arguments, and then apply the
-remaining arguments to the value returned, e.g.
-
-  f a b c d   (where f has arity 2)
-  -->
-  r = call f(a,b)
-  call r(c,d)
-
-but this entails
-  - saving c and d on the stack
-  - making a continuation info table
-  - at the continuation, loading c and d off the stack into regs
-  - finally, call r
-
-Note that since there are a fixed number of different r's
-(e.g.  stg_ap_pp_fast), we can also pre-compile continuations
-that correspond to each of them, rather than generating a fresh
-one for each over-saturated call.
-
-Not only does this generate much less code, it is faster too.  We will
-generate something like:
-
-Sp[old+16] = c
-Sp[old+24] = d
-Sp[old+32] = stg_ap_pp_info
-call f(a,b) -- usual calling convention
-
-For the purposes of the CmmCall node, we count this extra stack as
-just more arguments that we are passing on the stack (cml_args).
--}
-
--- | 'slowArgs' takes a list of function arguments and prepares them for
--- pushing on the stack for "extra" arguments to a function which requires
--- fewer arguments than we currently have.
-slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
-slowArgs _ [] = []
-slowArgs dflags args -- careful: reps contains voids (V), but args does not
-  | gopt Opt_SccProfilingOn dflags
-              = save_cccs ++ this_pat ++ slowArgs dflags rest_args
-  | otherwise =              this_pat ++ slowArgs dflags rest_args
-  where
-    (arg_pat, n)            = slowCallPattern (map fst args)
-    (call_args, rest_args)  = splitAt n args
-
-    stg_ap_pat = mkCmmRetInfoLabel rtsUnitId arg_pat
-    this_pat   = (N, Just (mkLblExpr stg_ap_pat)) : call_args
-    save_cccs  = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just cccsExpr)]
-    save_cccs_lbl = mkCmmRetInfoLabel rtsUnitId (fsLit "stg_restore_cccs")
-
--------------------------------------------------------------------------
-----        Laying out objects on the heap and stack
--------------------------------------------------------------------------
-
--- The heap always grows upwards, so hpRel is easy to compute
-hpRel :: VirtualHpOffset         -- virtual offset of Hp
-      -> VirtualHpOffset         -- virtual offset of The Thing
-      -> WordOff                -- integer word offset
-hpRel hp off = off - hp
-
-getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
--- See Note [Virtual and real heap pointers] in StgCmmMonad
-getHpRelOffset virtual_offset
-  = do dflags <- getDynFlags
-       hp_usg <- getHpUsage
-       return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
-
-data FieldOffOrPadding a
-    = FieldOff (NonVoid a) -- Something that needs an offset.
-               ByteOff     -- Offset in bytes.
-    | Padding ByteOff  -- Length of padding in bytes.
-              ByteOff  -- Offset in bytes.
-
--- | Used to tell the various @mkVirtHeapOffsets@ functions what kind
--- of header the object has.  This will be accounted for in the
--- offsets of the fields returned.
-data ClosureHeader
-  = NoHeader
-  | StdHeader
-  | ThunkHeader
-
-mkVirtHeapOffsetsWithPadding
-  :: DynFlags
-  -> ClosureHeader            -- What kind of header to account for
-  -> [NonVoid (PrimRep, a)]   -- Things to make offsets for
-  -> ( WordOff                -- Total number of words allocated
-     , WordOff                -- Number of words allocated for *pointers*
-     , [FieldOffOrPadding a]  -- Either an offset or padding.
-     )
-
--- Things with their offsets from start of object in order of
--- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
--- First in list gets lowest offset, which is initial offset + 1.
---
--- mkVirtHeapOffsetsWithPadding always returns boxed things with smaller offsets
--- than the unboxed things
-
-mkVirtHeapOffsetsWithPadding dflags header things =
-    ASSERT(not (any (isVoidRep . fst . fromNonVoid) things))
-    ( tot_wds
-    , bytesToWordsRoundUp dflags bytes_of_ptrs
-    , concat (ptrs_w_offsets ++ non_ptrs_w_offsets) ++ final_pad
-    )
-  where
-    hdr_words = case header of
-      NoHeader -> 0
-      StdHeader -> fixedHdrSizeW dflags
-      ThunkHeader -> thunkHdrSize dflags
-    hdr_bytes = wordsToBytes dflags hdr_words
-
-    (ptrs, non_ptrs) = partition (isGcPtrRep . fst . fromNonVoid) things
-
-    (bytes_of_ptrs, ptrs_w_offsets) =
-       mapAccumL computeOffset 0 ptrs
-    (tot_bytes, non_ptrs_w_offsets) =
-       mapAccumL computeOffset bytes_of_ptrs non_ptrs
-
-    tot_wds = bytesToWordsRoundUp dflags tot_bytes
-
-    final_pad_size = tot_wds * word_size - tot_bytes
-    final_pad
-        | final_pad_size > 0 = [(Padding final_pad_size
-                                         (hdr_bytes + tot_bytes))]
-        | otherwise          = []
-
-    word_size = wORD_SIZE dflags
-
-    computeOffset bytes_so_far nv_thing =
-        (new_bytes_so_far, with_padding field_off)
-      where
-        (rep, thing) = fromNonVoid nv_thing
-
-        -- Size of the field in bytes.
-        !sizeB = primRepSizeB dflags rep
-
-        -- Align the start offset (eg, 2-byte value should be 2-byte aligned).
-        -- But not more than to a word.
-        !align = min word_size sizeB
-        !start = roundUpTo bytes_so_far align
-        !padding = start - bytes_so_far
-
-        -- Final offset is:
-        --   size of header + bytes_so_far + padding
-        !final_offset = hdr_bytes + bytes_so_far + padding
-        !new_bytes_so_far = start + sizeB
-        field_off = FieldOff (NonVoid thing) final_offset
-
-        with_padding field_off
-            | padding == 0 = [field_off]
-            | otherwise    = [ Padding padding (hdr_bytes + bytes_so_far)
-                             , field_off
-                             ]
-
-
-mkVirtHeapOffsets
-  :: DynFlags
-  -> ClosureHeader            -- What kind of header to account for
-  -> [NonVoid (PrimRep,a)]    -- Things to make offsets for
-  -> (WordOff,                -- _Total_ number of words allocated
-      WordOff,                -- Number of words allocated for *pointers*
-      [(NonVoid a, ByteOff)])
-mkVirtHeapOffsets dflags header things =
-    ( tot_wds
-    , ptr_wds
-    , [ (field, offset) | (FieldOff field offset) <- things_offsets ]
-    )
-  where
-   (tot_wds, ptr_wds, things_offsets) =
-       mkVirtHeapOffsetsWithPadding dflags header things
-
--- | Just like mkVirtHeapOffsets, but for constructors
-mkVirtConstrOffsets
-  :: DynFlags -> [NonVoid (PrimRep, a)]
-  -> (WordOff, WordOff, [(NonVoid a, ByteOff)])
-mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags StdHeader
-
--- | Just like mkVirtConstrOffsets, but used when we don't have the actual
--- arguments. Useful when e.g. generating info tables; we just need to know
--- sizes of pointer and non-pointer fields.
-mkVirtConstrSizes :: DynFlags -> [NonVoid PrimRep] -> (WordOff, WordOff)
-mkVirtConstrSizes dflags field_reps
-  = (tot_wds, ptr_wds)
-  where
-    (tot_wds, ptr_wds, _) =
-       mkVirtConstrOffsets dflags
-         (map (\nv_rep -> NonVoid (fromNonVoid nv_rep, ())) field_reps)
-
--------------------------------------------------------------------------
---
---        Making argument descriptors
---
---  An argument descriptor describes the layout of args on the stack,
---  both for         * GC (stack-layout) purposes, and
---                * saving/restoring registers when a heap-check fails
---
--- Void arguments aren't important, therefore (contrast constructSlowCall)
---
--------------------------------------------------------------------------
-
--- bring in ARG_P, ARG_N, etc.
-#include "../includes/rts/storage/FunTypes.h"
-
-mkArgDescr :: DynFlags -> [Id] -> ArgDescr
-mkArgDescr dflags args
-  = let arg_bits = argBits dflags arg_reps
-        arg_reps = filter isNonV (map idArgRep args)
-           -- Getting rid of voids eases matching of standard patterns
-    in case stdPattern arg_reps of
-         Just spec_id -> ArgSpec spec_id
-         Nothing      -> ArgGen  arg_bits
-
-argBits :: DynFlags -> [ArgRep] -> [Bool]        -- True for non-ptr, False for ptr
-argBits _      []           = []
-argBits dflags (P   : args) = False : argBits dflags args
-argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
-                    ++ argBits dflags args
-
-----------------------
-stdPattern :: [ArgRep] -> Maybe Int
-stdPattern reps
-  = case reps of
-        []    -> Just ARG_NONE        -- just void args, probably
-        [N]   -> Just ARG_N
-        [P]   -> Just ARG_P
-        [F]   -> Just ARG_F
-        [D]   -> Just ARG_D
-        [L]   -> Just ARG_L
-        [V16] -> Just ARG_V16
-        [V32] -> Just ARG_V32
-        [V64] -> Just ARG_V64
-
-        [N,N] -> Just ARG_NN
-        [N,P] -> Just ARG_NP
-        [P,N] -> Just ARG_PN
-        [P,P] -> Just ARG_PP
-
-        [N,N,N] -> Just ARG_NNN
-        [N,N,P] -> Just ARG_NNP
-        [N,P,N] -> Just ARG_NPN
-        [N,P,P] -> Just ARG_NPP
-        [P,N,N] -> Just ARG_PNN
-        [P,N,P] -> Just ARG_PNP
-        [P,P,N] -> Just ARG_PPN
-        [P,P,P] -> Just ARG_PPP
-
-        [P,P,P,P]     -> Just ARG_PPPP
-        [P,P,P,P,P]   -> Just ARG_PPPPP
-        [P,P,P,P,P,P] -> Just ARG_PPPPPP
-
-        _ -> Nothing
-
--------------------------------------------------------------------------
---
---        Generating the info table and code for a closure
---
--------------------------------------------------------------------------
-
--- Here we make an info table of type 'CmmInfo'.  The concrete
--- representation as a list of 'CmmAddr' is handled later
--- in the pipeline by 'cmmToRawCmm'.
--- When loading the free variables, a function closure pointer may be tagged,
--- so we must take it into account.
-
-emitClosureProcAndInfoTable :: Bool                    -- top-level?
-                            -> Id                      -- name of the closure
-                            -> LambdaFormInfo
-                            -> CmmInfoTable
-                            -> [NonVoid Id]            -- incoming arguments
-                            -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
-                            -> FCode ()
-emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
- = do   { dflags <- getDynFlags
-        -- Bind the binder itself, but only if it's not a top-level
-        -- binding. We need non-top let-bindings to refer to the
-        -- top-level binding, which this binding would incorrectly shadow.
-        ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
-                  else bindToReg (NonVoid bndr) lf_info
-        ; let node_points = nodeMustPointToIt dflags lf_info
-        ; arg_regs <- bindArgsToRegs args
-        ; let args' = if node_points then (node : arg_regs) else arg_regs
-              conv  = if nodeMustPointToIt dflags lf_info then NativeNodeCall
-                                                          else NativeDirectCall
-              (offset, _, _) = mkCallEntry dflags conv args' []
-        ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
-        }
-
--- Data constructors need closures, but not with all the argument handling
--- needed for functions. The shared part goes here.
-emitClosureAndInfoTable ::
-  CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
-emitClosureAndInfoTable info_tbl conv args body
-  = do { (_, blks) <- getCodeScoped body
-       ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
-       ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
-       }
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
deleted file mode 100644
index d6f84c6a0a..0000000000
--- a/compiler/codeGen/StgCmmMonad.hs
+++ /dev/null
@@ -1,861 +0,0 @@
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE GADTs #-}
-
------------------------------------------------------------------------------
---
--- Monad for Stg to C-- code generation
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmMonad (
-        FCode,        -- type
-
-        initC, runC, fixC,
-        newUnique,
-
-        emitLabel,
-
-        emit, emitDecl,
-        emitProcWithConvention, emitProcWithStackFrame,
-        emitOutOfLine, emitAssign, emitStore,
-        emitComment, emitTick, emitUnwind,
-
-        getCmm, aGraphToGraph,
-        getCodeR, getCode, getCodeScoped, getHeapUsage,
-
-        mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto,
-        mkCmmIfThenElse', mkCmmIfThen', mkCmmIfGoto',
-
-        mkCall, mkCmmCall,
-
-        forkClosureBody, forkLneBody, forkAlts, forkAltPair, codeOnly,
-
-        ConTagZ,
-
-        Sequel(..), ReturnKind(..),
-        withSequel, getSequel,
-
-        setTickyCtrLabel, getTickyCtrLabel,
-        tickScope, getTickScope,
-
-        withUpdFrameOff, getUpdFrameOff, initUpdFrameOff,
-
-        HeapUsage(..), VirtualHpOffset,        initHpUsage,
-        getHpUsage,  setHpUsage, heapHWM,
-        setVirtHp, getVirtHp, setRealHp,
-
-        getModuleName,
-
-        -- ideally we wouldn't export these, but some other modules access internal state
-        getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, getThisPackage,
-
-        -- more localised access to monad state
-        CgIdInfo(..),
-        getBinds, setBinds,
-
-        -- out of general friendliness, we also export ...
-        CgInfoDownwards(..), CgState(..)        -- non-abstract
-    ) where
-
-import GhcPrelude hiding( sequence, succ )
-
-import Cmm
-import StgCmmClosure
-import DynFlags
-import Hoopl.Collections
-import MkGraph
-import BlockId
-import CLabel
-import SMRep
-import Module
-import Id
-import VarEnv
-import OrdList
-import BasicTypes( ConTagZ )
-import Unique
-import UniqSupply
-import FastString
-import Outputable
-import Util
-
-import Control.Monad
-import Data.List
-
-
-
---------------------------------------------------------
--- The FCode monad and its types
---
--- FCode is the monad plumbed through the Stg->Cmm code generator, and
--- the Cmm parser.  It contains the following things:
---
---  - A writer monad, collecting:
---    - code for the current function, in the form of a CmmAGraph.
---      The function "emit" appends more code to this.
---    - the top-level CmmDecls accumulated so far
---
---  - A state monad with:
---    - the local bindings in scope
---    - the current heap usage
---    - a UniqSupply
---
---  - A reader monad, for CgInfoDownwards, containing
---    - DynFlags,
---    - the current Module
---    - the update-frame offset
---    - the ticky counter label
---    - the Sequel (the continuation to return to)
---    - the self-recursive tail call information
-
---------------------------------------------------------
-
-newtype FCode a = FCode { doFCode :: CgInfoDownwards -> CgState -> (a, CgState) }
-    deriving (Functor)
-
-instance Applicative FCode where
-    pure val = FCode (\_info_down state -> (val, state))
-    {-# INLINE pure #-}
-    (<*>) = ap
-
-instance Monad FCode where
-    FCode m >>= k = FCode $
-        \info_down state ->
-            case m info_down state of
-              (m_result, new_state) ->
-                 case k m_result of
-                   FCode kcode -> kcode info_down new_state
-    {-# INLINE (>>=) #-}
-
-instance MonadUnique FCode where
-  getUniqueSupplyM = cgs_uniqs <$> getState
-  getUniqueM = FCode $ \_ st ->
-    let (u, us') = takeUniqFromSupply (cgs_uniqs st)
-    in (u, st { cgs_uniqs = us' })
-
-initC :: IO CgState
-initC  = do { uniqs <- mkSplitUniqSupply 'c'
-            ; return (initCgState uniqs) }
-
-runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState)
-runC dflags mod st fcode = doFCode fcode (initCgInfoDown dflags mod) st
-
-fixC :: (a -> FCode a) -> FCode a
-fixC fcode = FCode $
-    \info_down state -> let (v, s) = doFCode (fcode v) info_down state
-                        in (v, s)
-
---------------------------------------------------------
---        The code generator environment
---------------------------------------------------------
-
--- This monadery has some information that it only passes
--- *downwards*, as well as some ``state'' which is modified
--- as we go along.
-
-data CgInfoDownwards        -- information only passed *downwards* by the monad
-  = MkCgInfoDown {
-        cgd_dflags    :: DynFlags,
-        cgd_mod       :: Module,            -- Module being compiled
-        cgd_updfr_off :: UpdFrameOffset,    -- Size of current update frame
-        cgd_ticky     :: CLabel,            -- Current destination for ticky counts
-        cgd_sequel    :: Sequel,            -- What to do at end of basic block
-        cgd_self_loop :: Maybe SelfLoopInfo,-- Which tail calls can be compiled
-                                            -- as local jumps? See Note
-                                            -- [Self-recursive tail calls] in
-                                            -- StgCmmExpr
-        cgd_tick_scope:: CmmTickScope       -- Tick scope for new blocks & ticks
-  }
-
-type CgBindings = IdEnv CgIdInfo
-
-data CgIdInfo
-  = CgIdInfo
-        { cg_id :: Id   -- Id that this is the info for
-        , cg_lf  :: LambdaFormInfo
-        , cg_loc :: CgLoc                     -- CmmExpr for the *tagged* value
-        }
-
-instance Outputable CgIdInfo where
-  ppr (CgIdInfo { cg_id = id, cg_loc = loc })
-    = ppr id <+> text "-->" <+> ppr loc
-
--- Sequel tells what to do with the result of this expression
-data Sequel
-  = Return              -- Return result(s) to continuation found on the stack.
-
-  | AssignTo
-        [LocalReg]      -- Put result(s) in these regs and fall through
-                        -- NB: no void arguments here
-                        --
-        Bool            -- Should we adjust the heap pointer back to
-                        -- recover space that's unused on this path?
-                        -- We need to do this only if the expression
-                        -- may allocate (e.g. it's a foreign call or
-                        -- allocating primOp)
-
-instance Outputable Sequel where
-    ppr Return = text "Return"
-    ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b
-
--- See Note [sharing continuations] below
-data ReturnKind
-  = AssignedDirectly
-  | ReturnedTo BlockId ByteOff
-
--- Note [sharing continuations]
---
--- ReturnKind says how the expression being compiled returned its
--- results: either by assigning directly to the registers specified
--- by the Sequel, or by returning to a continuation that does the
--- assignments.  The point of this is we might be able to re-use the
--- continuation in a subsequent heap-check.  Consider:
---
---    case f x of z
---      True  -> <True code>
---      False -> <False code>
---
--- Naively we would generate
---
---    R2 = x   -- argument to f
---    Sp[young(L1)] = L1
---    call f returns to L1
---  L1:
---    z = R1
---    if (z & 1) then Ltrue else Lfalse
---  Ltrue:
---    Hp = Hp + 24
---    if (Hp > HpLim) then L4 else L7
---  L4:
---    HpAlloc = 24
---    goto L5
---  L5:
---    R1 = z
---    Sp[young(L6)] = L6
---    call stg_gc_unpt_r1 returns to L6
---  L6:
---    z = R1
---    goto L1
---  L7:
---    <True code>
---  Lfalse:
---    <False code>
---
--- We want the gc call in L4 to return to L1, and discard L6.  Note
--- that not only can we share L1 and L6, but the assignment of the
--- return address in L4 is unnecessary because the return address for
--- L1 is already on the stack.  We used to catch the sharing of L1 and
--- L6 in the common-block-eliminator, but not the unnecessary return
--- address assignment.
---
--- Since this case is so common I decided to make it more explicit and
--- robust by programming the sharing directly, rather than relying on
--- the common-block eliminator to catch it.  This makes
--- common-block-elimination an optional optimisation, and furthermore
--- generates less code in the first place that we have to subsequently
--- clean up.
---
--- There are some rarer cases of common blocks that we don't catch
--- this way, but that's ok.  Common-block-elimination is still available
--- to catch them when optimisation is enabled.  Some examples are:
---
---   - when both the True and False branches do a heap check, we
---     can share the heap-check failure code L4a and maybe L4
---
---   - in a case-of-case, there might be multiple continuations that
---     we can common up.
---
--- It is always safe to use AssignedDirectly.  Expressions that jump
--- to the continuation from multiple places (e.g. case expressions)
--- fall back to AssignedDirectly.
---
-
-
-initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards
-initCgInfoDown dflags mod
-  = MkCgInfoDown { cgd_dflags    = dflags
-                 , cgd_mod       = mod
-                 , cgd_updfr_off = initUpdFrameOff dflags
-                 , cgd_ticky     = mkTopTickyCtrLabel
-                 , cgd_sequel    = initSequel
-                 , cgd_self_loop = Nothing
-                 , cgd_tick_scope= GlobalScope }
-
-initSequel :: Sequel
-initSequel = Return
-
-initUpdFrameOff :: DynFlags -> UpdFrameOffset
-initUpdFrameOff dflags = widthInBytes (wordWidth dflags) -- space for the RA
-
-
---------------------------------------------------------
---        The code generator state
---------------------------------------------------------
-
-data CgState
-  = MkCgState {
-     cgs_stmts :: CmmAGraph,          -- Current procedure
-
-     cgs_tops  :: OrdList CmmDecl,
-        -- Other procedures and data blocks in this compilation unit
-        -- Both are ordered only so that we can
-        -- reduce forward references, when it's easy to do so
-
-     cgs_binds :: CgBindings,
-
-     cgs_hp_usg  :: HeapUsage,
-
-     cgs_uniqs :: UniqSupply }
-
-data HeapUsage   -- See Note [Virtual and real heap pointers]
-  = HeapUsage {
-        virtHp :: VirtualHpOffset,       -- Virtual offset of highest-allocated word
-                                         --   Incremented whenever we allocate
-        realHp :: VirtualHpOffset        -- realHp: Virtual offset of real heap ptr
-                                         --   Used in instruction addressing modes
-    }
-
-type VirtualHpOffset = WordOff
-
-
-{- Note [Virtual and real heap pointers]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The code generator can allocate one or more objects contiguously, performing
-one heap check to cover allocation of all the objects at once.  Let's call
-this little chunk of heap space an "allocation chunk".  The code generator
-will emit code to
-  * Perform a heap-exhaustion check
-  * Move the heap pointer to the end of the allocation chunk
-  * Allocate multiple objects within the chunk
-
-The code generator uses VirtualHpOffsets to address words within a
-single allocation chunk; these start at one and increase positively.
-The first word of the chunk has VirtualHpOffset=1, the second has
-VirtualHpOffset=2, and so on.
-
- * The field realHp tracks (the VirtualHpOffset) where the real Hp
-   register is pointing.  Typically it'll be pointing to the end of the
-   allocation chunk.
-
- * The field virtHp gives the VirtualHpOffset of the highest-allocated
-   word so far.  It starts at zero (meaning no word has been allocated),
-   and increases whenever an object is allocated.
-
-The difference between realHp and virtHp gives the offset from the
-real Hp register of a particular word in the allocation chunk. This
-is what getHpRelOffset does.  Since the returned offset is relative
-to the real Hp register, it is valid only until you change the real
-Hp register.  (Changing virtHp doesn't matter.)
--}
-
-
-initCgState :: UniqSupply -> CgState
-initCgState uniqs
-  = MkCgState { cgs_stmts  = mkNop
-              , cgs_tops   = nilOL
-              , cgs_binds  = emptyVarEnv
-              , cgs_hp_usg = initHpUsage
-              , cgs_uniqs  = uniqs }
-
-stateIncUsage :: CgState -> CgState -> CgState
--- stateIncUsage@ e1 e2 incorporates in e1
--- the heap high water mark found in e2.
-stateIncUsage s1 s2@(MkCgState { cgs_hp_usg = hp_usg })
-     = s1 { cgs_hp_usg  = cgs_hp_usg  s1 `maxHpHw`  virtHp hp_usg }
-       `addCodeBlocksFrom` s2
-
-addCodeBlocksFrom :: CgState -> CgState -> CgState
--- Add code blocks from the latter to the former
--- (The cgs_stmts will often be empty, but not always; see codeOnly)
-s1 `addCodeBlocksFrom` s2
-  = s1 { cgs_stmts = cgs_stmts s1 MkGraph.<*> cgs_stmts s2,
-         cgs_tops  = cgs_tops  s1 `appOL` cgs_tops  s2 }
-
-
--- The heap high water mark is the larger of virtHp and hwHp.  The latter is
--- only records the high water marks of forked-off branches, so to find the
--- heap high water mark you have to take the max of virtHp and hwHp.  Remember,
--- virtHp never retreats!
---
--- Note Jan 04: ok, so why do we only look at the virtual Hp??
-
-heapHWM :: HeapUsage -> VirtualHpOffset
-heapHWM = virtHp
-
-initHpUsage :: HeapUsage
-initHpUsage = HeapUsage { virtHp = 0, realHp = 0 }
-
-maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage
-hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw }
-
---------------------------------------------------------
--- Operators for getting and setting the state and "info_down".
---------------------------------------------------------
-
-getState :: FCode CgState
-getState = FCode $ \_info_down state -> (state, state)
-
-setState :: CgState -> FCode ()
-setState state = FCode $ \_info_down _ -> ((), state)
-
-getHpUsage :: FCode HeapUsage
-getHpUsage = do
-        state <- getState
-        return $ cgs_hp_usg state
-
-setHpUsage :: HeapUsage -> FCode ()
-setHpUsage new_hp_usg = do
-        state <- getState
-        setState $ state {cgs_hp_usg = new_hp_usg}
-
-setVirtHp :: VirtualHpOffset -> FCode ()
-setVirtHp new_virtHp
-  = do  { hp_usage <- getHpUsage
-        ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
-
-getVirtHp :: FCode VirtualHpOffset
-getVirtHp
-  = do  { hp_usage <- getHpUsage
-        ; return (virtHp hp_usage) }
-
-setRealHp ::  VirtualHpOffset -> FCode ()
-setRealHp new_realHp
-  = do  { hp_usage <- getHpUsage
-        ; setHpUsage (hp_usage {realHp = new_realHp}) }
-
-getBinds :: FCode CgBindings
-getBinds = do
-        state <- getState
-        return $ cgs_binds state
-
-setBinds :: CgBindings -> FCode ()
-setBinds new_binds = do
-        state <- getState
-        setState $ state {cgs_binds = new_binds}
-
-withState :: FCode a -> CgState -> FCode (a,CgState)
-withState (FCode fcode) newstate = FCode $ \info_down state ->
-  case fcode info_down newstate of
-    (retval, state2) -> ((retval,state2), state)
-
-newUniqSupply :: FCode UniqSupply
-newUniqSupply = do
-        state <- getState
-        let (us1, us2) = splitUniqSupply (cgs_uniqs state)
-        setState $ state { cgs_uniqs = us1 }
-        return us2
-
-newUnique :: FCode Unique
-newUnique = do
-        state <- getState
-        let (u,us') = takeUniqFromSupply (cgs_uniqs state)
-        setState $ state { cgs_uniqs = us' }
-        return u
-
-------------------
-getInfoDown :: FCode CgInfoDownwards
-getInfoDown = FCode $ \info_down state -> (info_down,state)
-
-getSelfLoop :: FCode (Maybe SelfLoopInfo)
-getSelfLoop = do
-        info_down <- getInfoDown
-        return $ cgd_self_loop info_down
-
-withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a
-withSelfLoop self_loop code = do
-        info_down <- getInfoDown
-        withInfoDown code (info_down {cgd_self_loop = Just self_loop})
-
-instance HasDynFlags FCode where
-    getDynFlags = liftM cgd_dflags getInfoDown
-
-getThisPackage :: FCode UnitId
-getThisPackage = liftM thisPackage getDynFlags
-
-withInfoDown :: FCode a -> CgInfoDownwards -> FCode a
-withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state
-
--- ----------------------------------------------------------------------------
--- Get the current module name
-
-getModuleName :: FCode Module
-getModuleName = do { info <- getInfoDown; return (cgd_mod info) }
-
--- ----------------------------------------------------------------------------
--- Get/set the end-of-block info
-
-withSequel :: Sequel -> FCode a -> FCode a
-withSequel sequel code
-  = do  { info  <- getInfoDown
-        ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) }
-
-getSequel :: FCode Sequel
-getSequel = do  { info <- getInfoDown
-                ; return (cgd_sequel info) }
-
--- ----------------------------------------------------------------------------
--- Get/set the size of the update frame
-
--- We keep track of the size of the update frame so that we
--- can set the stack pointer to the proper address on return
--- (or tail call) from the closure.
--- There should be at most one update frame for each closure.
--- Note: I'm including the size of the original return address
--- in the size of the update frame -- hence the default case on `get'.
-
-withUpdFrameOff :: UpdFrameOffset -> FCode a -> FCode a
-withUpdFrameOff size code
-  = do  { info  <- getInfoDown
-        ; withInfoDown code (info {cgd_updfr_off = size }) }
-
-getUpdFrameOff :: FCode UpdFrameOffset
-getUpdFrameOff
-  = do  { info  <- getInfoDown
-        ; return $ cgd_updfr_off info }
-
--- ----------------------------------------------------------------------------
--- Get/set the current ticky counter label
-
-getTickyCtrLabel :: FCode CLabel
-getTickyCtrLabel = do
-        info <- getInfoDown
-        return (cgd_ticky info)
-
-setTickyCtrLabel :: CLabel -> FCode a -> FCode a
-setTickyCtrLabel ticky code = do
-        info <- getInfoDown
-        withInfoDown code (info {cgd_ticky = ticky})
-
--- ----------------------------------------------------------------------------
--- Manage tick scopes
-
--- | The current tick scope. We will assign this to generated blocks.
-getTickScope :: FCode CmmTickScope
-getTickScope = do
-        info <- getInfoDown
-        return (cgd_tick_scope info)
-
--- | Places blocks generated by the given code into a fresh
--- (sub-)scope. This will make sure that Cmm annotations in our scope
--- will apply to the Cmm blocks generated therein - but not the other
--- way around.
-tickScope :: FCode a -> FCode a
-tickScope code = do
-        info <- getInfoDown
-        if debugLevel (cgd_dflags info) == 0 then code else do
-          u <- newUnique
-          let scope' = SubScope u (cgd_tick_scope info)
-          withInfoDown code info{ cgd_tick_scope = scope' }
-
-
---------------------------------------------------------
---                 Forking
---------------------------------------------------------
-
-forkClosureBody :: FCode () -> FCode ()
--- forkClosureBody compiles body_code in environment where:
---   - sequel, update stack frame and self loop info are
---     set to fresh values
---   - state is set to a fresh value, except for local bindings
---     that are passed in unchanged. It's up to the enclosed code to
---     re-bind the free variables to a field of the closure.
-
-forkClosureBody body_code
-  = do  { dflags <- getDynFlags
-        ; info   <- getInfoDown
-        ; us     <- newUniqSupply
-        ; state  <- getState
-        ; let body_info_down = info { cgd_sequel    = initSequel
-                                    , cgd_updfr_off = initUpdFrameOff dflags
-                                    , cgd_self_loop = Nothing }
-              fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
-              ((),fork_state_out) = doFCode body_code body_info_down fork_state_in
-        ; setState $ state `addCodeBlocksFrom` fork_state_out }
-
-forkLneBody :: FCode a -> FCode a
--- 'forkLneBody' takes a body of let-no-escape binding and compiles
--- it in the *current* environment, returning the graph thus constructed.
---
--- The current environment is passed on completely unchanged to
--- the successor.  In particular, any heap usage from the enclosed
--- code is discarded; it should deal with its own heap consumption.
-forkLneBody body_code
-  = do  { info_down <- getInfoDown
-        ; us        <- newUniqSupply
-        ; state     <- getState
-        ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
-              (result, fork_state_out) = doFCode body_code info_down fork_state_in
-        ; setState $ state `addCodeBlocksFrom` fork_state_out
-        ; return result }
-
-codeOnly :: FCode () -> FCode ()
--- Emit any code from the inner thing into the outer thing
--- Do not affect anything else in the outer state
--- Used in almost-circular code to prevent false loop dependencies
-codeOnly body_code
-  = do  { info_down <- getInfoDown
-        ; us        <- newUniqSupply
-        ; state     <- getState
-        ; let   fork_state_in = (initCgState us) { cgs_binds   = cgs_binds state
-                                                 , cgs_hp_usg  = cgs_hp_usg state }
-                ((), fork_state_out) = doFCode body_code info_down fork_state_in
-        ; setState $ state `addCodeBlocksFrom` fork_state_out }
-
-forkAlts :: [FCode a] -> FCode [a]
--- (forkAlts' bs d) takes fcodes 'bs' for the branches of a 'case', and
--- an fcode for the default case 'd', and compiles each in the current
--- environment.  The current environment is passed on unmodified, except
--- that the virtual Hp is moved on to the worst virtual Hp for the branches
-
-forkAlts branch_fcodes
-  = do  { info_down <- getInfoDown
-        ; us <- newUniqSupply
-        ; state <- getState
-        ; let compile us branch
-                = (us2, doFCode branch info_down branch_state)
-                where
-                  (us1,us2) = splitUniqSupply us
-                  branch_state = (initCgState us1) {
-                                        cgs_binds  = cgs_binds state
-                                      , cgs_hp_usg = cgs_hp_usg state }
-              (_us, results) = mapAccumL compile us branch_fcodes
-              (branch_results, branch_out_states) = unzip results
-        ; setState $ foldl' stateIncUsage state branch_out_states
-                -- NB foldl.  state is the *left* argument to stateIncUsage
-        ; return branch_results }
-
-forkAltPair :: FCode a -> FCode a -> FCode (a,a)
--- Most common use of 'forkAlts'; having this helper function avoids
--- accidental use of failible pattern-matches in @do@-notation
-forkAltPair x y = do
-  xy' <- forkAlts [x,y]
-  case xy' of
-    [x',y'] -> return (x',y')
-    _ -> panic "forkAltPair"
-
--- collect the code emitted by an FCode computation
-getCodeR :: FCode a -> FCode (a, CmmAGraph)
-getCodeR fcode
-  = do  { state1 <- getState
-        ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop })
-        ; setState $ state2 { cgs_stmts = cgs_stmts state1  }
-        ; return (a, cgs_stmts state2) }
-
-getCode :: FCode a -> FCode CmmAGraph
-getCode fcode = do { (_,stmts) <- getCodeR fcode; return stmts }
-
--- | Generate code into a fresh tick (sub-)scope and gather generated code
-getCodeScoped :: FCode a -> FCode (a, CmmAGraphScoped)
-getCodeScoped fcode
-  = do  { state1 <- getState
-        ; ((a, tscope), state2) <-
-            tickScope $
-            flip withState state1 { cgs_stmts = mkNop } $
-            do { a   <- fcode
-               ; scp <- getTickScope
-               ; return (a, scp) }
-        ; setState $ state2 { cgs_stmts = cgs_stmts state1  }
-        ; return (a, (cgs_stmts state2, tscope)) }
-
-
--- 'getHeapUsage' applies a function to the amount of heap that it uses.
--- It initialises the heap usage to zeros, and passes on an unchanged
--- heap usage.
---
--- It is usually a prelude to performing a GC check, so everything must
--- be in a tidy and consistent state.
---
--- Note the slightly subtle fixed point behaviour needed here
-
-getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a
-getHeapUsage fcode
-  = do  { info_down <- getInfoDown
-        ; state <- getState
-        ; let   fstate_in = state { cgs_hp_usg  = initHpUsage }
-                (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in
-                hp_hw = heapHWM (cgs_hp_usg fstate_out)        -- Loop here!
-
-        ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state }
-        ; return r }
-
--- ----------------------------------------------------------------------------
--- Combinators for emitting code
-
-emitCgStmt :: CgStmt -> FCode ()
-emitCgStmt stmt
-  = do  { state <- getState
-        ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt }
-        }
-
-emitLabel :: BlockId -> FCode ()
-emitLabel id = do tscope <- getTickScope
-                  emitCgStmt (CgLabel id tscope)
-
-emitComment :: FastString -> FCode ()
-emitComment s
-  | debugIsOn = emitCgStmt (CgStmt (CmmComment s))
-  | otherwise = return ()
-
-emitTick :: CmmTickish -> FCode ()
-emitTick = emitCgStmt . CgStmt . CmmTick
-
-emitUnwind :: [(GlobalReg, Maybe CmmExpr)] -> FCode ()
-emitUnwind regs = do
-  dflags <- getDynFlags
-  when (debugLevel dflags > 0) $ do
-     emitCgStmt $ CgStmt $ CmmUnwind regs
-
-emitAssign :: CmmReg  -> CmmExpr -> FCode ()
-emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r))
-
-emitStore :: CmmExpr  -> CmmExpr -> FCode ()
-emitStore l r = emitCgStmt (CgStmt (CmmStore l r))
-
-emit :: CmmAGraph -> FCode ()
-emit ag
-  = do  { state <- getState
-        ; setState $ state { cgs_stmts = cgs_stmts state MkGraph.<*> ag } }
-
-emitDecl :: CmmDecl -> FCode ()
-emitDecl decl
-  = do  { state <- getState
-        ; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } }
-
-emitOutOfLine :: BlockId -> CmmAGraphScoped -> FCode ()
-emitOutOfLine l (stmts, tscope) = emitCgStmt (CgFork l stmts tscope)
-
-emitProcWithStackFrame
-   :: Convention                        -- entry convention
-   -> Maybe CmmInfoTable                -- info table?
-   -> CLabel                            -- label for the proc
-   -> [CmmFormal]                       -- stack frame
-   -> [CmmFormal]                       -- arguments
-   -> CmmAGraphScoped                   -- code
-   -> Bool                              -- do stack layout?
-   -> FCode ()
-
-emitProcWithStackFrame _conv mb_info lbl _stk_args [] blocks False
-  = do  { dflags <- getDynFlags
-        ; emitProc mb_info lbl [] blocks (widthInBytes (wordWidth dflags)) False
-        }
-emitProcWithStackFrame conv mb_info lbl stk_args args (graph, tscope) True
-        -- do layout
-  = do  { dflags <- getDynFlags
-        ; let (offset, live, entry) = mkCallEntry dflags conv args stk_args
-              graph' = entry MkGraph.<*> graph
-        ; emitProc mb_info lbl live (graph', tscope) offset True
-        }
-emitProcWithStackFrame _ _ _ _ _ _ _ = panic "emitProcWithStackFrame"
-
-emitProcWithConvention :: Convention -> Maybe CmmInfoTable -> CLabel
-                       -> [CmmFormal]
-                       -> CmmAGraphScoped
-                       -> FCode ()
-emitProcWithConvention conv mb_info lbl args blocks
-  = emitProcWithStackFrame conv mb_info lbl [] args blocks True
-
-emitProc :: Maybe CmmInfoTable -> CLabel -> [GlobalReg] -> CmmAGraphScoped
-         -> Int -> Bool -> FCode ()
-emitProc mb_info lbl live blocks offset do_layout
-  = do  { dflags <- getDynFlags
-        ; l <- newBlockId
-        ; let
-              blks :: CmmGraph
-              blks = labelAGraph l blocks
-
-              infos | Just info <- mb_info = mapSingleton (g_entry blks) info
-                    | otherwise            = mapEmpty
-
-              sinfo = StackInfo { arg_space = offset
-                                , updfr_space = Just (initUpdFrameOff dflags)
-                                , do_layout = do_layout }
-
-              tinfo = TopInfo { info_tbls = infos
-                              , stack_info=sinfo}
-
-              proc_block = CmmProc tinfo lbl live blks
-
-        ; state <- getState
-        ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } }
-
-getCmm :: FCode () -> FCode CmmGroup
--- Get all the CmmTops (there should be no stmts)
--- Return a single Cmm which may be split from other Cmms by
--- object splitting (at a later stage)
-getCmm code
-  = do  { state1 <- getState
-        ; ((), state2) <- withState code (state1 { cgs_tops  = nilOL })
-        ; setState $ state2 { cgs_tops = cgs_tops state1 }
-        ; return (fromOL (cgs_tops state2)) }
-
-
-mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph
-mkCmmIfThenElse e tbranch fbranch = mkCmmIfThenElse' e tbranch fbranch Nothing
-
-mkCmmIfThenElse' :: CmmExpr -> CmmAGraph -> CmmAGraph
-                 -> Maybe Bool -> FCode CmmAGraph
-mkCmmIfThenElse' e tbranch fbranch likely = do
-  tscp  <- getTickScope
-  endif <- newBlockId
-  tid   <- newBlockId
-  fid   <- newBlockId
-
-  let
-    (test, then_, else_, likely') = case likely of
-      Just False | Just e' <- maybeInvertCmmExpr e
-        -- currently NCG doesn't know about likely
-        -- annotations. We manually switch then and
-        -- else branch so the likely false branch
-        -- becomes a fallthrough.
-        -> (e', fbranch, tbranch, Just True)
-      _ -> (e, tbranch, fbranch, likely)
-
-  return $ catAGraphs [ mkCbranch test tid fid likely'
-                      , mkLabel tid tscp, then_, mkBranch endif
-                      , mkLabel fid tscp, else_, mkLabel endif tscp ]
-
-mkCmmIfGoto :: CmmExpr -> BlockId -> FCode CmmAGraph
-mkCmmIfGoto e tid = mkCmmIfGoto' e tid Nothing
-
-mkCmmIfGoto' :: CmmExpr -> BlockId -> Maybe Bool -> FCode CmmAGraph
-mkCmmIfGoto' e tid l = do
-  endif <- newBlockId
-  tscp  <- getTickScope
-  return $ catAGraphs [ mkCbranch e tid endif l, mkLabel endif tscp ]
-
-mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph
-mkCmmIfThen e tbranch = mkCmmIfThen' e tbranch Nothing
-
-mkCmmIfThen' :: CmmExpr -> CmmAGraph -> Maybe Bool -> FCode CmmAGraph
-mkCmmIfThen' e tbranch l = do
-  endif <- newBlockId
-  tid   <- newBlockId
-  tscp  <- getTickScope
-  return $ catAGraphs [ mkCbranch e tid endif l
-                      , mkLabel tid tscp, tbranch, mkLabel endif tscp ]
-
-mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmExpr]
-       -> UpdFrameOffset -> [CmmExpr] -> FCode CmmAGraph
-mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do
-  dflags <- getDynFlags
-  k      <- newBlockId
-  tscp   <- getTickScope
-  let area = Young k
-      (off, _, copyin) = copyInOflow dflags retConv area results []
-      copyout = mkCallReturnsTo dflags f callConv actuals k off updfr_off extra_stack
-  return $ catAGraphs [copyout, mkLabel k tscp, copyin]
-
-mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmExpr] -> UpdFrameOffset
-          -> FCode CmmAGraph
-mkCmmCall f results actuals updfr_off
-   = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off []
-
-
--- ----------------------------------------------------------------------------
--- turn CmmAGraph into CmmGraph, for making a new proc.
-
-aGraphToGraph :: CmmAGraphScoped -> FCode CmmGraph
-aGraphToGraph stmts
-  = do  { l <- newBlockId
-        ; return (labelAGraph l stmts) }
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
deleted file mode 100644
index 61d88feabb..0000000000
--- a/compiler/codeGen/StgCmmPrim.hs
+++ /dev/null
@@ -1,2622 +0,0 @@
-{-# LANGUAGE CPP #-}
--- emitPrimOp is quite large
-{-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-}
-
-----------------------------------------------------------------------------
---
--- Stg to C--: primitive operations
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmPrim (
-   cgOpApp,
-   cgPrimOp, -- internal(ish), used by cgCase to get code for a
-             -- comparison without also turning it into a Bool.
-   shouldInlinePrimOp
- ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude hiding ((<*>))
-
-import StgCmmLayout
-import StgCmmForeign
-import StgCmmEnv
-import StgCmmMonad
-import StgCmmUtils
-import StgCmmTicky
-import StgCmmHeap
-import StgCmmProf ( costCentreFrom )
-
-import DynFlags
-import GHC.Platform
-import BasicTypes
-import BlockId
-import MkGraph
-import StgSyn
-import Cmm
-import Type     ( Type, tyConAppTyCon )
-import TyCon
-import CLabel
-import CmmUtils
-import PrimOp
-import SMRep
-import FastString
-import Outputable
-import Util
-import Data.Maybe
-
-import Data.Bits ((.&.), bit)
-import Control.Monad (liftM, when, unless)
-
-------------------------------------------------------------------------
---      Primitive operations and foreign calls
-------------------------------------------------------------------------
-
-{- Note [Foreign call results]
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A foreign call always returns an unboxed tuple of results, one
-of which is the state token.  This seems to happen even for pure
-calls.
-
-Even if we returned a single result for pure calls, it'd still be
-right to wrap it in a singleton unboxed tuple, because the result
-might be a Haskell closure pointer, we don't want to evaluate it. -}
-
-----------------------------------
-cgOpApp :: StgOp        -- The op
-        -> [StgArg]     -- Arguments
-        -> Type         -- Result type (always an unboxed tuple)
-        -> FCode ReturnKind
-
--- Foreign calls
-cgOpApp (StgFCallOp fcall ty) stg_args res_ty
-  = cgForeignCall fcall ty stg_args res_ty
-      -- Note [Foreign call results]
-
--- tagToEnum# is special: we need to pull the constructor
--- out of the table, and perform an appropriate return.
-
-cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
-  = ASSERT(isEnumerationTyCon tycon)
-    do  { dflags <- getDynFlags
-        ; args' <- getNonVoidArgAmodes [arg]
-        ; let amode = case args' of [amode] -> amode
-                                    _ -> panic "TagToEnumOp had void arg"
-        ; emitReturn [tagToClosure dflags tycon amode] }
-   where
-          -- If you're reading this code in the attempt to figure
-          -- out why the compiler panic'ed here, it is probably because
-          -- you used tagToEnum# in a non-monomorphic setting, e.g.,
-          --         intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
-          -- That won't work.
-        tycon = tyConAppTyCon res_ty
-
-cgOpApp (StgPrimOp primop) args res_ty = do
-    dflags <- getDynFlags
-    cmm_args <- getNonVoidArgAmodes args
-    case shouldInlinePrimOp dflags primop cmm_args of
-        Nothing -> do  -- out-of-line
-          let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
-          emitCall (NativeNodeCall, NativeReturn) fun cmm_args
-
-        Just f  -- inline
-          | ReturnsPrim VoidRep <- result_info
-          -> do f []
-                emitReturn []
-
-          | ReturnsPrim rep <- result_info
-          -> do dflags <- getDynFlags
-                res <- newTemp (primRepCmmType dflags rep)
-                f [res]
-                emitReturn [CmmReg (CmmLocal res)]
-
-          | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
-          -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
-                f regs
-                emitReturn (map (CmmReg . CmmLocal) regs)
-
-          | otherwise -> panic "cgPrimop"
-          where
-             result_info = getPrimOpResultInfo primop
-
-cgOpApp (StgPrimCallOp primcall) args _res_ty
-  = do  { cmm_args <- getNonVoidArgAmodes args
-        ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
-        ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
-
--- | Interpret the argument as an unsigned value, assuming the value
--- is given in two-complement form in the given width.
---
--- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
---
--- This function is used to work around the fact that many array
--- primops take Int# arguments, but we interpret them as unsigned
--- quantities in the code gen. This means that we have to be careful
--- every time we work on e.g. a CmmInt literal that corresponds to the
--- array size, as it might contain a negative Integer value if the
--- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
--- literal.
-asUnsigned :: Width -> Integer -> Integer
-asUnsigned w n = n .&. (bit (widthInBits w) - 1)
-
--- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
---     ByteOff (or some other fixed width signed type) to represent
---     array sizes or indices. This means that these will overflow for
---     large enough sizes.
-
--- | Decide whether an out-of-line primop should be replaced by an
--- inline implementation. This might happen e.g. if there's enough
--- static information, such as statically know arguments, to emit a
--- more efficient implementation inline.
---
--- Returns 'Nothing' if this primop should use its out-of-line
--- implementation (defined elsewhere) and 'Just' together with a code
--- generating function that takes the output regs as arguments
--- otherwise.
-shouldInlinePrimOp :: DynFlags
-                   -> PrimOp     -- ^ The primop
-                   -> [CmmExpr]  -- ^ The primop arguments
-                   -> Maybe ([LocalReg] -> FCode ())
-
-shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
-  | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
-
-shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] ->
-      doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
-      [ (mkIntExpr dflags (fromInteger n),
-         fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
-      , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
-         fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
-      ]
-      (fromInteger n) init
-
-shouldInlinePrimOp _ CopyArrayOp
-    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
-        Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp _ CopyMutableArrayOp
-    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
-        Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp _ CopyArrayArrayOp
-    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
-        Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp _ CopyMutableArrayArrayOp
-    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
-        Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] ->
-      doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
-      [ (mkIntExpr dflags (fromInteger n),
-         fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
-      ]
-      (fromInteger n) init
-
-shouldInlinePrimOp _ CopySmallArrayOp
-    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
-        Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp _ CopySmallMutableArrayOp
-    [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
-        Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
-  | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
-      Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags primop args
-  | primOpOutOfLine primop = Nothing
-  | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
-
--- TODO: Several primops, such as 'copyArray#', only have an inline
--- implementation (below) but could possibly have both an inline
--- implementation and an out-of-line implementation, just like
--- 'newArray#'. This would lower the amount of code generated,
--- hopefully without a performance impact (needs to be measured).
-
----------------------------------------------------
-cgPrimOp   :: [LocalReg]        -- where to put the results
-           -> PrimOp            -- the op
-           -> [StgArg]          -- arguments
-           -> FCode ()
-
-cgPrimOp results op args
-  = do dflags <- getDynFlags
-       arg_exprs <- getNonVoidArgAmodes args
-       emitPrimOp dflags results op arg_exprs
-
-
-------------------------------------------------------------------------
---      Emitting code for a primop
-------------------------------------------------------------------------
-
-emitPrimOp :: DynFlags
-           -> [LocalReg]        -- where to put the results
-           -> PrimOp            -- the op
-           -> [CmmExpr]         -- arguments
-           -> FCode ()
-
--- First we handle various awkward cases specially.  The remaining
--- easy cases are then handled by translateOp, defined below.
-
-emitPrimOp _ [res] ParOp [arg]
-  =
-        -- for now, just implement this in a C function
-        -- later, we might want to inline it.
-    emitCCall
-        [(res,NoHint)]
-        (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
-        [(baseExpr, AddrHint), (arg,AddrHint)]
-
-emitPrimOp dflags [res] SparkOp [arg]
-  = do
-        -- returns the value of arg in res.  We're going to therefore
-        -- refer to arg twice (once to pass to newSpark(), and once to
-        -- assign to res), so put it in a temporary.
-        tmp <- assignTemp arg
-        tmp2 <- newTemp (bWord dflags)
-        emitCCall
-            [(tmp2,NoHint)]
-            (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
-            [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
-        emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
-
-emitPrimOp dflags [res] GetCCSOfOp [arg]
-  = emitAssign (CmmLocal res) val
-  where
-    val
-     | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
-     | otherwise                      = CmmLit (zeroCLit dflags)
-
-emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
-   = emitAssign (CmmLocal res) cccsExpr
-
-emitPrimOp _ [res] MyThreadIdOp []
-   = emitAssign (CmmLocal res) currentTSOExpr
-
-emitPrimOp dflags [res] ReadMutVarOp [mutv]
-   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
-
-emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
-   = do -- Without this write barrier, other CPUs may see this pointer before
-        -- the writes for the closure it points to have occurred.
-        emitPrimCall res MO_WriteBarrier []
-        emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
-        emitCCall
-                [{-no results-}]
-                (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
-                [(baseExpr, AddrHint), (mutv,AddrHint)]
-
---  #define sizzeofByteArrayzh(r,a) \
---     r = ((StgArrBytes *)(a))->bytes
-emitPrimOp dflags [res] SizeofByteArrayOp [arg]
-   = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
-
---  #define sizzeofMutableByteArrayzh(r,a) \
---      r = ((StgArrBytes *)(a))->bytes
-emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
-   = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
-
---  #define getSizzeofMutableByteArrayzh(r,a) \
---      r = ((StgArrBytes *)(a))->bytes
-emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
-   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
-
-
---  #define touchzh(o)                  /* nothing */
-emitPrimOp _ res@[] TouchOp args@[_arg]
-   = do emitPrimCall res MO_Touch args
-
---  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
-emitPrimOp dflags [res] ByteArrayContents_Char [arg]
-   = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
-
---  #define stableNameToIntzh(r,s)   (r = ((StgStableName *)s)->sn)
-emitPrimOp dflags [res] StableNameToIntOp [arg]
-   = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
-
-emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
-   = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
-
---  #define addrToHValuezh(r,a) r=(P_)a
-emitPrimOp _      [res] AddrToAnyOp [arg]
-   = emitAssign (CmmLocal res) arg
-
---  #define hvalueToAddrzh(r, a) r=(W_)a
-emitPrimOp _      [res] AnyToAddrOp [arg]
-   = emitAssign (CmmLocal res) arg
-
-{- Freezing arrays-of-ptrs requires changing an info table, for the
-   benefit of the generational collector.  It needs to scavenge mutable
-   objects, even if they are in old space.  When they become immutable,
-   they can be removed from this scavenge list.  -}
-
---  #define unsafeFreezzeArrayzh(r,a)
---      {
---        SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
---        r = a;
---      }
-emitPrimOp _      [res] UnsafeFreezeArrayOp [arg]
-   = emit $ catAGraphs
-   [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
-     mkAssign (CmmLocal res) arg ]
-emitPrimOp _      [res] UnsafeFreezeArrayArrayOp [arg]
-   = emit $ catAGraphs
-   [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
-     mkAssign (CmmLocal res) arg ]
-emitPrimOp _      [res] UnsafeFreezeSmallArrayOp [arg]
-   = emit $ catAGraphs
-   [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
-     mkAssign (CmmLocal res) arg ]
-
---  #define unsafeFreezzeByteArrayzh(r,a)       r=(a)
-emitPrimOp _      [res] UnsafeFreezeByteArrayOp [arg]
-   = emitAssign (CmmLocal res) arg
-
--- Reading/writing pointer arrays
-
-emitPrimOp _      [res] ReadArrayOp  [obj,ix]    = doReadPtrArrayOp res obj ix
-emitPrimOp _      [res] IndexArrayOp [obj,ix]    = doReadPtrArrayOp res obj ix
-emitPrimOp _      []  WriteArrayOp [obj,ix,v]  = doWritePtrArrayOp obj ix v
-
-emitPrimOp _      [res] IndexArrayArrayOp_ByteArray         [obj,ix]   = doReadPtrArrayOp res obj ix
-emitPrimOp _      [res] IndexArrayArrayOp_ArrayArray        [obj,ix]   = doReadPtrArrayOp res obj ix
-emitPrimOp _      [res] ReadArrayArrayOp_ByteArray          [obj,ix]   = doReadPtrArrayOp res obj ix
-emitPrimOp _      [res] ReadArrayArrayOp_MutableByteArray   [obj,ix]   = doReadPtrArrayOp res obj ix
-emitPrimOp _      [res] ReadArrayArrayOp_ArrayArray         [obj,ix]   = doReadPtrArrayOp res obj ix
-emitPrimOp _      [res] ReadArrayArrayOp_MutableArrayArray  [obj,ix]   = doReadPtrArrayOp res obj ix
-emitPrimOp _      []  WriteArrayArrayOp_ByteArray         [obj,ix,v] = doWritePtrArrayOp obj ix v
-emitPrimOp _      []  WriteArrayArrayOp_MutableByteArray  [obj,ix,v] = doWritePtrArrayOp obj ix v
-emitPrimOp _      []  WriteArrayArrayOp_ArrayArray        [obj,ix,v] = doWritePtrArrayOp obj ix v
-emitPrimOp _      []  WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
-
-emitPrimOp _      [res] ReadSmallArrayOp  [obj,ix] = doReadSmallPtrArrayOp res obj ix
-emitPrimOp _      [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
-emitPrimOp _      []  WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
-
--- Getting the size of pointer arrays
-
-emitPrimOp dflags [res] SizeofArrayOp [arg]
-   = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
-    (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
-        (bWord dflags))
-emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
-   = emitPrimOp dflags [res] SizeofArrayOp [arg]
-emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
-   = emitPrimOp dflags [res] SizeofArrayOp [arg]
-emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
-   = emitPrimOp dflags [res] SizeofArrayOp [arg]
-
-emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
-    emit $ mkAssign (CmmLocal res)
-    (cmmLoadIndexW dflags arg
-     (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
-        (bWord dflags))
-emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
-    emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
-
--- IndexXXXoffAddr
-
-emitPrimOp dflags res IndexOffAddrOp_Char             args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res IndexOffAddrOp_WideChar         args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp dflags res IndexOffAddrOp_Int              args = doIndexOffAddrOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexOffAddrOp_Word             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexOffAddrOp_Addr             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
-emitPrimOp _      res IndexOffAddrOp_Float            args = doIndexOffAddrOp   Nothing f32 res args
-emitPrimOp _      res IndexOffAddrOp_Double           args = doIndexOffAddrOp   Nothing f64 res args
-emitPrimOp dflags res IndexOffAddrOp_StablePtr        args = doIndexOffAddrOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexOffAddrOp_Int8             args = doIndexOffAddrOp   (Just (mo_s_8ToWord dflags)) b8  res args
-emitPrimOp dflags res IndexOffAddrOp_Int16            args = doIndexOffAddrOp   (Just (mo_s_16ToWord dflags)) b16 res args
-emitPrimOp dflags res IndexOffAddrOp_Int32            args = doIndexOffAddrOp   (Just (mo_s_32ToWord dflags)) b32 res args
-emitPrimOp _      res IndexOffAddrOp_Int64            args = doIndexOffAddrOp   Nothing b64 res args
-emitPrimOp dflags res IndexOffAddrOp_Word8            args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8  res args
-emitPrimOp dflags res IndexOffAddrOp_Word16           args = doIndexOffAddrOp   (Just (mo_u_16ToWord dflags)) b16 res args
-emitPrimOp dflags res IndexOffAddrOp_Word32           args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp _      res IndexOffAddrOp_Word64           args = doIndexOffAddrOp   Nothing b64 res args
-
--- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
-
-emitPrimOp dflags res ReadOffAddrOp_Char             args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res ReadOffAddrOp_WideChar         args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp dflags res ReadOffAddrOp_Int              args = doIndexOffAddrOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadOffAddrOp_Word             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadOffAddrOp_Addr             args = doIndexOffAddrOp   Nothing (bWord dflags) res args
-emitPrimOp _      res ReadOffAddrOp_Float            args = doIndexOffAddrOp   Nothing f32 res args
-emitPrimOp _      res ReadOffAddrOp_Double           args = doIndexOffAddrOp   Nothing f64 res args
-emitPrimOp dflags res ReadOffAddrOp_StablePtr        args = doIndexOffAddrOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadOffAddrOp_Int8             args = doIndexOffAddrOp   (Just (mo_s_8ToWord dflags)) b8  res args
-emitPrimOp dflags res ReadOffAddrOp_Int16            args = doIndexOffAddrOp   (Just (mo_s_16ToWord dflags)) b16 res args
-emitPrimOp dflags res ReadOffAddrOp_Int32            args = doIndexOffAddrOp   (Just (mo_s_32ToWord dflags)) b32 res args
-emitPrimOp _      res ReadOffAddrOp_Int64            args = doIndexOffAddrOp   Nothing b64 res args
-emitPrimOp dflags res ReadOffAddrOp_Word8            args = doIndexOffAddrOp   (Just (mo_u_8ToWord dflags)) b8  res args
-emitPrimOp dflags res ReadOffAddrOp_Word16           args = doIndexOffAddrOp   (Just (mo_u_16ToWord dflags)) b16 res args
-emitPrimOp dflags res ReadOffAddrOp_Word32           args = doIndexOffAddrOp   (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp _      res ReadOffAddrOp_Word64           args = doIndexOffAddrOp   Nothing b64 res args
-
--- IndexXXXArray
-
-emitPrimOp dflags res IndexByteArrayOp_Char             args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_WideChar         args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp dflags res IndexByteArrayOp_Int              args = doIndexByteArrayOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexByteArrayOp_Word             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexByteArrayOp_Addr             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
-emitPrimOp _      res IndexByteArrayOp_Float            args = doIndexByteArrayOp   Nothing f32 res args
-emitPrimOp _      res IndexByteArrayOp_Double           args = doIndexByteArrayOp   Nothing f64 res args
-emitPrimOp dflags res IndexByteArrayOp_StablePtr        args = doIndexByteArrayOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexByteArrayOp_Int8             args = doIndexByteArrayOp   (Just (mo_s_8ToWord dflags)) b8  res args
-emitPrimOp dflags res IndexByteArrayOp_Int16            args = doIndexByteArrayOp   (Just (mo_s_16ToWord dflags)) b16  res args
-emitPrimOp dflags res IndexByteArrayOp_Int32            args = doIndexByteArrayOp   (Just (mo_s_32ToWord dflags)) b32  res args
-emitPrimOp _      res IndexByteArrayOp_Int64            args = doIndexByteArrayOp   Nothing b64  res args
-emitPrimOp dflags res IndexByteArrayOp_Word8            args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8  res args
-emitPrimOp dflags res IndexByteArrayOp_Word16           args = doIndexByteArrayOp   (Just (mo_u_16ToWord dflags)) b16  res args
-emitPrimOp dflags res IndexByteArrayOp_Word32           args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32  res args
-emitPrimOp _      res IndexByteArrayOp_Word64           args = doIndexByteArrayOp   Nothing b64  res args
-
--- ReadXXXArray, identical to IndexXXXArray.
-
-emitPrimOp dflags res ReadByteArrayOp_Char             args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_WideChar         args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp dflags res ReadByteArrayOp_Int              args = doIndexByteArrayOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadByteArrayOp_Word             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadByteArrayOp_Addr             args = doIndexByteArrayOp   Nothing (bWord dflags) res args
-emitPrimOp _      res ReadByteArrayOp_Float            args = doIndexByteArrayOp   Nothing f32 res args
-emitPrimOp _      res ReadByteArrayOp_Double           args = doIndexByteArrayOp   Nothing f64 res args
-emitPrimOp dflags res ReadByteArrayOp_StablePtr        args = doIndexByteArrayOp   Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadByteArrayOp_Int8             args = doIndexByteArrayOp   (Just (mo_s_8ToWord dflags)) b8  res args
-emitPrimOp dflags res ReadByteArrayOp_Int16            args = doIndexByteArrayOp   (Just (mo_s_16ToWord dflags)) b16  res args
-emitPrimOp dflags res ReadByteArrayOp_Int32            args = doIndexByteArrayOp   (Just (mo_s_32ToWord dflags)) b32  res args
-emitPrimOp _      res ReadByteArrayOp_Int64            args = doIndexByteArrayOp   Nothing b64  res args
-emitPrimOp dflags res ReadByteArrayOp_Word8            args = doIndexByteArrayOp   (Just (mo_u_8ToWord dflags)) b8  res args
-emitPrimOp dflags res ReadByteArrayOp_Word16           args = doIndexByteArrayOp   (Just (mo_u_16ToWord dflags)) b16  res args
-emitPrimOp dflags res ReadByteArrayOp_Word32           args = doIndexByteArrayOp   (Just (mo_u_32ToWord dflags)) b32  res args
-emitPrimOp _      res ReadByteArrayOp_Word64           args = doIndexByteArrayOp   Nothing b64  res args
-
--- IndexWord8ArrayAsXXX
-
-emitPrimOp dflags res IndexByteArrayOp_Word8AsChar      args = doIndexByteArrayOpAs   (Just (mo_u_8ToWord dflags)) b8 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar  args = doIndexByteArrayOpAs   (Just (mo_u_32ToWord dflags)) b32 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsInt       args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsWord      args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr      args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
-emitPrimOp _      res IndexByteArrayOp_Word8AsFloat     args = doIndexByteArrayOpAs   Nothing f32 b8 res args
-emitPrimOp _      res IndexByteArrayOp_Word8AsDouble    args = doIndexByteArrayOpAs   Nothing f64 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16     args = doIndexByteArrayOpAs   (Just (mo_s_16ToWord dflags)) b16 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32     args = doIndexByteArrayOpAs   (Just (mo_s_32ToWord dflags)) b32 b8 res args
-emitPrimOp _      res IndexByteArrayOp_Word8AsInt64     args = doIndexByteArrayOpAs   Nothing b64 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16    args = doIndexByteArrayOpAs   (Just (mo_u_16ToWord dflags)) b16 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32    args = doIndexByteArrayOpAs   (Just (mo_u_32ToWord dflags)) b32 b8 res args
-emitPrimOp _      res IndexByteArrayOp_Word8AsWord64    args = doIndexByteArrayOpAs   Nothing b64 b8 res args
-
--- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
-
-emitPrimOp dflags res ReadByteArrayOp_Word8AsChar      args = doIndexByteArrayOpAs   (Just (mo_u_8ToWord dflags)) b8 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar  args = doIndexByteArrayOpAs   (Just (mo_u_32ToWord dflags)) b32 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsInt       args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsWord      args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr      args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
-emitPrimOp _      res ReadByteArrayOp_Word8AsFloat     args = doIndexByteArrayOpAs   Nothing f32 b8 res args
-emitPrimOp _      res ReadByteArrayOp_Word8AsDouble    args = doIndexByteArrayOpAs   Nothing f64 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs   Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16     args = doIndexByteArrayOpAs   (Just (mo_s_16ToWord dflags)) b16 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32     args = doIndexByteArrayOpAs   (Just (mo_s_32ToWord dflags)) b32 b8 res args
-emitPrimOp _      res ReadByteArrayOp_Word8AsInt64     args = doIndexByteArrayOpAs   Nothing b64 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16    args = doIndexByteArrayOpAs   (Just (mo_u_16ToWord dflags)) b16 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32    args = doIndexByteArrayOpAs   (Just (mo_u_32ToWord dflags)) b32 b8 res args
-emitPrimOp _      res ReadByteArrayOp_Word8AsWord64    args = doIndexByteArrayOpAs   Nothing b64 b8 res args
-
--- WriteXXXoffAddr
-
-emitPrimOp dflags res WriteOffAddrOp_Char             args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
-emitPrimOp dflags res WriteOffAddrOp_WideChar         args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp dflags res WriteOffAddrOp_Int              args = doWriteOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteOffAddrOp_Word             args = doWriteOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteOffAddrOp_Addr             args = doWriteOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp _      res WriteOffAddrOp_Float            args = doWriteOffAddrOp Nothing f32 res args
-emitPrimOp _      res WriteOffAddrOp_Double           args = doWriteOffAddrOp Nothing f64 res args
-emitPrimOp dflags res WriteOffAddrOp_StablePtr        args = doWriteOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteOffAddrOp_Int8             args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
-emitPrimOp dflags res WriteOffAddrOp_Int16            args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
-emitPrimOp dflags res WriteOffAddrOp_Int32            args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp _      res WriteOffAddrOp_Int64            args = doWriteOffAddrOp Nothing b64 res args
-emitPrimOp dflags res WriteOffAddrOp_Word8            args = doWriteOffAddrOp (Just (mo_WordTo8 dflags))  b8 res args
-emitPrimOp dflags res WriteOffAddrOp_Word16           args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
-emitPrimOp dflags res WriteOffAddrOp_Word32           args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp _      res WriteOffAddrOp_Word64           args = doWriteOffAddrOp Nothing b64 res args
-
--- WriteXXXArray
-
-emitPrimOp dflags res WriteByteArrayOp_Char             args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
-emitPrimOp dflags res WriteByteArrayOp_WideChar         args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp dflags res WriteByteArrayOp_Int              args = doWriteByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteByteArrayOp_Word             args = doWriteByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteByteArrayOp_Addr             args = doWriteByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp _      res WriteByteArrayOp_Float            args = doWriteByteArrayOp Nothing f32 res args
-emitPrimOp _      res WriteByteArrayOp_Double           args = doWriteByteArrayOp Nothing f64 res args
-emitPrimOp dflags res WriteByteArrayOp_StablePtr        args = doWriteByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteByteArrayOp_Int8             args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Int16            args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
-emitPrimOp dflags res WriteByteArrayOp_Int32            args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp _      res WriteByteArrayOp_Int64            args = doWriteByteArrayOp Nothing b64 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8            args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8  res args
-emitPrimOp dflags res WriteByteArrayOp_Word16           args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
-emitPrimOp dflags res WriteByteArrayOp_Word32           args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp _      res WriteByteArrayOp_Word64           args = doWriteByteArrayOp Nothing b64 res args
-
--- WriteInt8ArrayAsXXX
-
-emitPrimOp dflags res WriteByteArrayOp_Word8AsChar       args = doWriteByteArrayOp (Just (mo_WordTo8 dflags))  b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar   args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
-emitPrimOp _      res WriteByteArrayOp_Word8AsInt        args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _      res WriteByteArrayOp_Word8AsWord       args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _      res WriteByteArrayOp_Word8AsAddr       args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _      res WriteByteArrayOp_Word8AsFloat      args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _      res WriteByteArrayOp_Word8AsDouble     args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _      res WriteByteArrayOp_Word8AsStablePtr  args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16      args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32      args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
-emitPrimOp _      res WriteByteArrayOp_Word8AsInt64      args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16     args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32     args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
-emitPrimOp _      res WriteByteArrayOp_Word8AsWord64     args = doWriteByteArrayOp Nothing b8 res args
-
--- Copying and setting byte arrays
-emitPrimOp _      [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
-    doCopyByteArrayOp src src_off dst dst_off n
-emitPrimOp _      [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
-    doCopyMutableByteArrayOp src src_off dst dst_off n
-emitPrimOp _      [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
-    doCopyByteArrayToAddrOp src src_off dst n
-emitPrimOp _      [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
-    doCopyMutableByteArrayToAddrOp src src_off dst n
-emitPrimOp _      [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
-    doCopyAddrToByteArrayOp src dst dst_off n
-emitPrimOp _      [] SetByteArrayOp [ba,off,len,c] =
-    doSetByteArrayOp ba off len c
-
--- Comparing byte arrays
-emitPrimOp _      [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
-    doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
-
-emitPrimOp _      [res] BSwap16Op [w] = emitBSwapCall res w W16
-emitPrimOp _      [res] BSwap32Op [w] = emitBSwapCall res w W32
-emitPrimOp _      [res] BSwap64Op [w] = emitBSwapCall res w W64
-emitPrimOp dflags [res] BSwapOp   [w] = emitBSwapCall res w (wordWidth dflags)
-
-emitPrimOp _      [res] BRev8Op  [w] = emitBRevCall res w W8
-emitPrimOp _      [res] BRev16Op [w] = emitBRevCall res w W16
-emitPrimOp _      [res] BRev32Op [w] = emitBRevCall res w W32
-emitPrimOp _      [res] BRev64Op [w] = emitBRevCall res w W64
-emitPrimOp dflags [res] BRevOp   [w] = emitBRevCall res w (wordWidth dflags)
-
--- Population count
-emitPrimOp _      [res] PopCnt8Op  [w] = emitPopCntCall res w W8
-emitPrimOp _      [res] PopCnt16Op [w] = emitPopCntCall res w W16
-emitPrimOp _      [res] PopCnt32Op [w] = emitPopCntCall res w W32
-emitPrimOp _      [res] PopCnt64Op [w] = emitPopCntCall res w W64
-emitPrimOp dflags [res] PopCntOp   [w] = emitPopCntCall res w (wordWidth dflags)
-
--- Parallel bit deposit
-emitPrimOp _      [res] Pdep8Op  [src, mask] = emitPdepCall res src mask W8
-emitPrimOp _      [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
-emitPrimOp _      [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
-emitPrimOp _      [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
-emitPrimOp dflags [res] PdepOp   [src, mask] = emitPdepCall res src mask (wordWidth dflags)
-
--- Parallel bit extract
-emitPrimOp _      [res] Pext8Op  [src, mask] = emitPextCall res src mask W8
-emitPrimOp _      [res] Pext16Op [src, mask] = emitPextCall res src mask W16
-emitPrimOp _      [res] Pext32Op [src, mask] = emitPextCall res src mask W32
-emitPrimOp _      [res] Pext64Op [src, mask] = emitPextCall res src mask W64
-emitPrimOp dflags [res] PextOp   [src, mask] = emitPextCall res src mask (wordWidth dflags)
-
--- count leading zeros
-emitPrimOp _      [res] Clz8Op  [w] = emitClzCall res w W8
-emitPrimOp _      [res] Clz16Op [w] = emitClzCall res w W16
-emitPrimOp _      [res] Clz32Op [w] = emitClzCall res w W32
-emitPrimOp _      [res] Clz64Op [w] = emitClzCall res w W64
-emitPrimOp dflags [res] ClzOp   [w] = emitClzCall res w (wordWidth dflags)
-
--- count trailing zeros
-emitPrimOp _      [res] Ctz8Op [w]  = emitCtzCall res w W8
-emitPrimOp _      [res] Ctz16Op [w] = emitCtzCall res w W16
-emitPrimOp _      [res] Ctz32Op [w] = emitCtzCall res w W32
-emitPrimOp _      [res] Ctz64Op [w] = emitCtzCall res w W64
-emitPrimOp dflags [res] CtzOp   [w] = emitCtzCall res w (wordWidth dflags)
-
--- Unsigned int to floating point conversions
-emitPrimOp _      [res] Word2FloatOp  [w] = emitPrimCall [res]
-                                            (MO_UF_Conv W32) [w]
-emitPrimOp _      [res] Word2DoubleOp [w] = emitPrimCall [res]
-                                            (MO_UF_Conv W64) [w]
-
--- SIMD primops
-emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
-    checkVecCompatibility dflags vcat n w
-    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
-  where
-    zeros :: CmmExpr
-    zeros = CmmLit $ CmmVec (replicate n zero)
-
-    zero :: CmmLit
-    zero = case vcat of
-             IntVec   -> CmmInt 0 w
-             WordVec  -> CmmInt 0 w
-             FloatVec -> CmmFloat 0 w
-
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
-    checkVecCompatibility dflags vcat n w
-    when (es `lengthIsNot` n) $
-        panic "emitPrimOp: VecPackOp has wrong number of arguments"
-    doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
-  where
-    zeros :: CmmExpr
-    zeros = CmmLit $ CmmVec (replicate n zero)
-
-    zero :: CmmLit
-    zero = case vcat of
-             IntVec   -> CmmInt 0 w
-             WordVec  -> CmmInt 0 w
-             FloatVec -> CmmFloat 0 w
-
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
-    checkVecCompatibility dflags vcat n w
-    when (res `lengthIsNot` n) $
-        panic "emitPrimOp: VecUnpackOp has wrong number of results"
-    doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
-  where
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
-    checkVecCompatibility dflags vcat n w
-    doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
-  where
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doIndexByteArrayOp Nothing ty res args
-  where
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doIndexByteArrayOp Nothing ty res args
-  where
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doWriteByteArrayOp Nothing ty res args
-  where
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doIndexOffAddrOp Nothing ty res args
-  where
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doIndexOffAddrOp Nothing ty res args
-  where
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doWriteOffAddrOp Nothing ty res args
-  where
-    ty :: CmmType
-    ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doIndexByteArrayOpAs Nothing vecty ty res args
-  where
-    vecty :: CmmType
-    vecty = vecVmmType vcat n w
-
-    ty :: CmmType
-    ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doIndexByteArrayOpAs Nothing vecty ty res args
-  where
-    vecty :: CmmType
-    vecty = vecVmmType vcat n w
-
-    ty :: CmmType
-    ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doWriteByteArrayOp Nothing ty res args
-  where
-    ty :: CmmType
-    ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doIndexOffAddrOpAs Nothing vecty ty res args
-  where
-    vecty :: CmmType
-    vecty = vecVmmType vcat n w
-
-    ty :: CmmType
-    ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doIndexOffAddrOpAs Nothing vecty ty res args
-  where
-    vecty :: CmmType
-    vecty = vecVmmType vcat n w
-
-    ty :: CmmType
-    ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
-    checkVecCompatibility dflags vcat n w
-    doWriteOffAddrOp Nothing ty res args
-  where
-    ty :: CmmType
-    ty = vecCmmCat vcat w
-
--- Prefetch
-emitPrimOp _ [] PrefetchByteArrayOp3        args = doPrefetchByteArrayOp 3  args
-emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3  args
-emitPrimOp _ [] PrefetchAddrOp3             args = doPrefetchAddrOp  3  args
-emitPrimOp _ [] PrefetchValueOp3            args = doPrefetchValueOp 3 args
-
-emitPrimOp _ [] PrefetchByteArrayOp2        args = doPrefetchByteArrayOp 2  args
-emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2  args
-emitPrimOp _ [] PrefetchAddrOp2             args = doPrefetchAddrOp 2  args
-emitPrimOp _ [] PrefetchValueOp2           args = doPrefetchValueOp 2 args
-
-emitPrimOp _ [] PrefetchByteArrayOp1        args = doPrefetchByteArrayOp 1  args
-emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1  args
-emitPrimOp _ [] PrefetchAddrOp1             args = doPrefetchAddrOp 1  args
-emitPrimOp _ [] PrefetchValueOp1            args = doPrefetchValueOp 1 args
-
-emitPrimOp _ [] PrefetchByteArrayOp0        args = doPrefetchByteArrayOp 0  args
-emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0  args
-emitPrimOp _ [] PrefetchAddrOp0             args = doPrefetchAddrOp 0  args
-emitPrimOp _ [] PrefetchValueOp0            args = doPrefetchValueOp 0 args
-
--- Atomic read-modify-write
-emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
-    doAtomicRMW res AMO_Add mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
-    doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
-    doAtomicRMW res AMO_And mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
-    doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
-    doAtomicRMW res AMO_Or mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
-    doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
-emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
-    doAtomicReadByteArray res mba ix (bWord dflags)
-emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
-    doAtomicWriteByteArray mba ix (bWord dflags) val
-emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
-    doCasByteArray res mba ix (bWord dflags) old new
-
--- The rest just translate straightforwardly
-emitPrimOp dflags [res] op [arg]
-   | nopOp op
-   = emitAssign (CmmLocal res) arg
-
-   | Just (mop,rep) <- narrowOp op
-   = emitAssign (CmmLocal res) $
-           CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
-
-emitPrimOp dflags r@[res] op args
-   | Just prim <- callishOp op
-   = do emitPrimCall r prim args
-
-   | Just mop <- translateOp dflags op
-   = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
-     emit stmt
-
-emitPrimOp dflags results op args
-   = case callishPrimOpSupported dflags op args of
-          Left op   -> emit $ mkUnsafeCall (PrimTarget op) results args
-          Right gen -> gen results args
-
--- Note [QuotRem optimization]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
--- (shift, .&.).
---
--- Currently we only support optimization (performed in CmmOpt) when the
--- constant is a power of 2. #9041 tracks the implementation of the general
--- optimization.
---
--- `quotRem` can be optimized in the same way. However as it returns two values,
--- it is implemented as a "callish" primop which is harder to match and
--- to transform later on. For simplicity, the current implementation detects cases
--- that can be optimized (see `quotRemCanBeOptimized`) and converts STG quotRem
--- primop into two CMM quot and rem primops.
-
-type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
-
-callishPrimOpSupported :: DynFlags -> PrimOp -> [CmmExpr] -> Either CallishMachOp GenericOp
-callishPrimOpSupported dflags op args
-  = case op of
-      IntQuotRemOp   | ncg && (x86ish || ppc)
-                     , not quotRemCanBeOptimized
-                         -> Left (MO_S_QuotRem  (wordWidth dflags))
-                     | otherwise
-                         -> Right (genericIntQuotRemOp (wordWidth dflags))
-
-      Int8QuotRemOp  | ncg && (x86ish || ppc)
-                     , not quotRemCanBeOptimized
-                                     -> Left (MO_S_QuotRem W8)
-                     | otherwise     -> Right (genericIntQuotRemOp W8)
-
-      Int16QuotRemOp | ncg && (x86ish || ppc)
-                     , not quotRemCanBeOptimized
-                                     -> Left (MO_S_QuotRem W16)
-                     | otherwise     -> Right (genericIntQuotRemOp W16)
-
-
-      WordQuotRemOp  | ncg && (x86ish || ppc)
-                     , not quotRemCanBeOptimized
-                         -> Left (MO_U_QuotRem  (wordWidth dflags))
-                     | otherwise
-                         -> Right (genericWordQuotRemOp (wordWidth dflags))
-
-      WordQuotRem2Op | (ncg && (x86ish || ppc))
-                          || llvm     -> Left (MO_U_QuotRem2 (wordWidth dflags))
-                     | otherwise      -> Right (genericWordQuotRem2Op dflags)
-
-      Word8QuotRemOp | ncg && (x86ish || ppc)
-                     , not quotRemCanBeOptimized
-                                      -> Left (MO_U_QuotRem W8)
-                     | otherwise      -> Right (genericWordQuotRemOp W8)
-
-      Word16QuotRemOp| ncg && (x86ish || ppc)
-                     , not quotRemCanBeOptimized
-                                     -> Left (MO_U_QuotRem W16)
-                     | otherwise     -> Right (genericWordQuotRemOp W16)
-
-      WordAdd2Op     | (ncg && (x86ish || ppc))
-                         || llvm      -> Left (MO_Add2       (wordWidth dflags))
-                     | otherwise      -> Right genericWordAdd2Op
-
-      WordAddCOp     | (ncg && (x86ish || ppc))
-                         || llvm      -> Left (MO_AddWordC   (wordWidth dflags))
-                     | otherwise      -> Right genericWordAddCOp
-
-      WordSubCOp     | (ncg && (x86ish || ppc))
-                         || llvm      -> Left (MO_SubWordC   (wordWidth dflags))
-                     | otherwise      -> Right genericWordSubCOp
-
-      IntAddCOp      | (ncg && (x86ish || ppc))
-                         || llvm      -> Left (MO_AddIntC    (wordWidth dflags))
-                     | otherwise      -> Right genericIntAddCOp
-
-      IntSubCOp      | (ncg && (x86ish || ppc))
-                         || llvm      -> Left (MO_SubIntC    (wordWidth dflags))
-                     | otherwise      -> Right genericIntSubCOp
-
-      WordMul2Op     | ncg && (x86ish || ppc)
-                         || llvm      -> Left (MO_U_Mul2     (wordWidth dflags))
-                     | otherwise      -> Right genericWordMul2Op
-      FloatFabsOp    | (ncg && x86ish || ppc)
-                         || llvm      -> Left MO_F32_Fabs
-                     | otherwise      -> Right $ genericFabsOp W32
-      DoubleFabsOp   | (ncg && x86ish || ppc)
-                         || llvm      -> Left MO_F64_Fabs
-                     | otherwise      -> Right $ genericFabsOp W64
-
-      _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
- where
-  -- See Note [QuotRem optimization]
-  quotRemCanBeOptimized = case args of
-    [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n)
-    _                         -> False
-
-  ncg = case hscTarget dflags of
-           HscAsm -> True
-           _      -> False
-  llvm = case hscTarget dflags of
-           HscLlvm -> True
-           _       -> False
-  x86ish = case platformArch (targetPlatform dflags) of
-             ArchX86    -> True
-             ArchX86_64 -> True
-             _          -> False
-  ppc = case platformArch (targetPlatform dflags) of
-          ArchPPC      -> True
-          ArchPPC_64 _ -> True
-          _            -> False
-
-genericIntQuotRemOp :: Width -> GenericOp
-genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y]
-   = emit $ mkAssign (CmmLocal res_q)
-              (CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*>
-            mkAssign (CmmLocal res_r)
-              (CmmMachOp (MO_S_Rem  width) [arg_x, arg_y])
-genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
-
-genericWordQuotRemOp :: Width -> GenericOp
-genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y]
-    = emit $ mkAssign (CmmLocal res_q)
-               (CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*>
-             mkAssign (CmmLocal res_r)
-               (CmmMachOp (MO_U_Rem  width) [arg_x, arg_y])
-genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
-
-genericWordQuotRem2Op :: DynFlags -> GenericOp
-genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
-    = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
-    where    ty = cmmExprType dflags arg_x_high
-             shl   x i = CmmMachOp (MO_Shl   (wordWidth dflags)) [x, i]
-             shr   x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
-             or    x y = CmmMachOp (MO_Or    (wordWidth dflags)) [x, y]
-             ge    x y = CmmMachOp (MO_U_Ge  (wordWidth dflags)) [x, y]
-             ne    x y = CmmMachOp (MO_Ne    (wordWidth dflags)) [x, y]
-             minus x y = CmmMachOp (MO_Sub   (wordWidth dflags)) [x, y]
-             times x y = CmmMachOp (MO_Mul   (wordWidth dflags)) [x, y]
-             zero   = lit 0
-             one    = lit 1
-             negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
-             lit i = CmmLit (CmmInt i (wordWidth dflags))
-
-             f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
-             f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
-                                      mkAssign (CmmLocal res_r) high)
-             f i acc high low =
-                 do roverflowedBit <- newTemp ty
-                    rhigh'         <- newTemp ty
-                    rhigh''        <- newTemp ty
-                    rlow'          <- newTemp ty
-                    risge          <- newTemp ty
-                    racc'          <- newTemp ty
-                    let high'         = CmmReg (CmmLocal rhigh')
-                        isge          = CmmReg (CmmLocal risge)
-                        overflowedBit = CmmReg (CmmLocal roverflowedBit)
-                    let this = catAGraphs
-                               [mkAssign (CmmLocal roverflowedBit)
-                                          (shr high negone),
-                                mkAssign (CmmLocal rhigh')
-                                          (or (shl high one) (shr low negone)),
-                                mkAssign (CmmLocal rlow')
-                                          (shl low one),
-                                mkAssign (CmmLocal risge)
-                                          (or (overflowedBit `ne` zero)
-                                              (high' `ge` arg_y)),
-                                mkAssign (CmmLocal rhigh'')
-                                          (high' `minus` (arg_y `times` isge)),
-                                mkAssign (CmmLocal racc')
-                                          (or (shl acc one) isge)]
-                    rest <- f (i - 1) (CmmReg (CmmLocal racc'))
-                                      (CmmReg (CmmLocal rhigh''))
-                                      (CmmReg (CmmLocal rlow'))
-                    return (this <*> rest)
-genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
-
-genericWordAdd2Op :: GenericOp
-genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
-  = do dflags <- getDynFlags
-       r1 <- newTemp (cmmExprType dflags arg_x)
-       r2 <- newTemp (cmmExprType dflags arg_x)
-       let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
-           toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
-           bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
-           add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
-           or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
-           hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
-                                (wordWidth dflags))
-           hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
-       emit $ catAGraphs
-          [mkAssign (CmmLocal r1)
-               (add (bottomHalf arg_x) (bottomHalf arg_y)),
-           mkAssign (CmmLocal r2)
-               (add (topHalf (CmmReg (CmmLocal r1)))
-                    (add (topHalf arg_x) (topHalf arg_y))),
-           mkAssign (CmmLocal res_h)
-               (topHalf (CmmReg (CmmLocal r2))),
-           mkAssign (CmmLocal res_l)
-               (or (toTopHalf (CmmReg (CmmLocal r2)))
-                   (bottomHalf (CmmReg (CmmLocal r1))))]
-genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
-
--- | Implements branchless recovery of the carry flag @c@ by checking the
--- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
---
--- @
---    c = a&b | (a|b)&~r
--- @
---
--- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
-genericWordAddCOp :: GenericOp
-genericWordAddCOp [res_r, res_c] [aa, bb]
- = do dflags <- getDynFlags
-      emit $ catAGraphs [
-        mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
-        mkAssign (CmmLocal res_c) $
-          CmmMachOp (mo_wordUShr dflags) [
-            CmmMachOp (mo_wordOr dflags) [
-              CmmMachOp (mo_wordAnd dflags) [aa,bb],
-              CmmMachOp (mo_wordAnd dflags) [
-                CmmMachOp (mo_wordOr dflags) [aa,bb],
-                CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)]
-              ]
-            ],
-            mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
-          ]
-        ]
-genericWordAddCOp _ _ = panic "genericWordAddCOp"
-
--- | Implements branchless recovery of the carry flag @c@ by checking the
--- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
---
--- @
---    c = ~a&b | (~a|b)&r
--- @
---
--- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
-genericWordSubCOp :: GenericOp
-genericWordSubCOp [res_r, res_c] [aa, bb]
- = do dflags <- getDynFlags
-      emit $ catAGraphs [
-        mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
-        mkAssign (CmmLocal res_c) $
-          CmmMachOp (mo_wordUShr dflags) [
-            CmmMachOp (mo_wordOr dflags) [
-              CmmMachOp (mo_wordAnd dflags) [
-                CmmMachOp (mo_wordNot dflags) [aa],
-                bb
-              ],
-              CmmMachOp (mo_wordAnd dflags) [
-                CmmMachOp (mo_wordOr dflags) [
-                  CmmMachOp (mo_wordNot dflags) [aa],
-                  bb
-                ],
-                CmmReg (CmmLocal res_r)
-              ]
-            ],
-            mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
-          ]
-        ]
-genericWordSubCOp _ _ = panic "genericWordSubCOp"
-
-genericIntAddCOp :: GenericOp
-genericIntAddCOp [res_r, res_c] [aa, bb]
-{-
-   With some bit-twiddling, we can define int{Add,Sub}Czh portably in
-   C, and without needing any comparisons.  This may not be the
-   fastest way to do it - if you have better code, please send it! --SDM
-
-   Return : r = a + b,  c = 0 if no overflow, 1 on overflow.
-
-   We currently don't make use of the r value if c is != 0 (i.e.
-   overflow), we just convert to big integers and try again.  This
-   could be improved by making r and c the correct values for
-   plugging into a new J#.
-
-   { r = ((I_)(a)) + ((I_)(b));                                 \
-     c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r)))    \
-         >> (BITS_IN (I_) - 1);                                 \
-   }
-   Wading through the mass of bracketry, it seems to reduce to:
-   c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
-
--}
- = do dflags <- getDynFlags
-      emit $ catAGraphs [
-        mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
-        mkAssign (CmmLocal res_c) $
-          CmmMachOp (mo_wordUShr dflags) [
-                CmmMachOp (mo_wordAnd dflags) [
-                    CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
-                    CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
-                ],
-                mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
-          ]
-        ]
-genericIntAddCOp _ _ = panic "genericIntAddCOp"
-
-genericIntSubCOp :: GenericOp
-genericIntSubCOp [res_r, res_c] [aa, bb]
-{- Similarly:
-   #define subIntCzh(r,c,a,b)                                   \
-   { r = ((I_)(a)) - ((I_)(b));                                 \
-     c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r)))     \
-         >> (BITS_IN (I_) - 1);                                 \
-   }
-
-   c =  ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
--}
- = do dflags <- getDynFlags
-      emit $ catAGraphs [
-        mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
-        mkAssign (CmmLocal res_c) $
-          CmmMachOp (mo_wordUShr dflags) [
-                CmmMachOp (mo_wordAnd dflags) [
-                    CmmMachOp (mo_wordXor dflags) [aa,bb],
-                    CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
-                ],
-                mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
-          ]
-        ]
-genericIntSubCOp _ _ = panic "genericIntSubCOp"
-
-genericWordMul2Op :: GenericOp
-genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
- = do dflags <- getDynFlags
-      let t = cmmExprType dflags arg_x
-      xlyl <- liftM CmmLocal $ newTemp t
-      xlyh <- liftM CmmLocal $ newTemp t
-      xhyl <- liftM CmmLocal $ newTemp t
-      r    <- liftM CmmLocal $ newTemp t
-      -- This generic implementation is very simple and slow. We might
-      -- well be able to do better, but for now this at least works.
-      let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
-          toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
-          bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
-          add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
-          sum = foldl1 add
-          mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
-          or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
-          hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
-                               (wordWidth dflags))
-          hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
-      emit $ catAGraphs
-             [mkAssign xlyl
-                  (mul (bottomHalf arg_x) (bottomHalf arg_y)),
-              mkAssign xlyh
-                  (mul (bottomHalf arg_x) (topHalf arg_y)),
-              mkAssign xhyl
-                  (mul (topHalf arg_x) (bottomHalf arg_y)),
-              mkAssign r
-                  (sum [topHalf    (CmmReg xlyl),
-                        bottomHalf (CmmReg xhyl),
-                        bottomHalf (CmmReg xlyh)]),
-              mkAssign (CmmLocal res_l)
-                  (or (bottomHalf (CmmReg xlyl))
-                      (toTopHalf (CmmReg r))),
-              mkAssign (CmmLocal res_h)
-                  (sum [mul (topHalf arg_x) (topHalf arg_y),
-                        topHalf (CmmReg xhyl),
-                        topHalf (CmmReg xlyh),
-                        topHalf (CmmReg r)])]
-genericWordMul2Op _ _ = panic "genericWordMul2Op"
-
--- This replicates what we had in libraries/base/GHC/Float.hs:
---
---    abs x    | x == 0    = 0 -- handles (-0.0)
---             | x >  0    = x
---             | otherwise = negateFloat x
-genericFabsOp :: Width -> GenericOp
-genericFabsOp w [res_r] [aa]
- = do dflags <- getDynFlags
-      let zero   = CmmLit (CmmFloat 0 w)
-
-          eq x y = CmmMachOp (MO_F_Eq w) [x, y]
-          gt x y = CmmMachOp (MO_F_Gt w) [x, y]
-
-          neg x  = CmmMachOp (MO_F_Neg w) [x]
-
-          g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
-          g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
-
-      res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa)
-      let g3 = catAGraphs [mkAssign res_t aa,
-                           mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
-
-      g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
-
-      emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
-
-genericFabsOp _ _ _ = panic "genericFabsOp"
-
--- These PrimOps are NOPs in Cmm
-
-nopOp :: PrimOp -> Bool
-nopOp Int2WordOp     = True
-nopOp Word2IntOp     = True
-nopOp Int2AddrOp     = True
-nopOp Addr2IntOp     = True
-nopOp ChrOp          = True  -- Int# and Char# are rep'd the same
-nopOp OrdOp          = True
-nopOp _              = False
-
--- These PrimOps turn into double casts
-
-narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
-narrowOp Narrow8IntOp   = Just (MO_SS_Conv, W8)
-narrowOp Narrow16IntOp  = Just (MO_SS_Conv, W16)
-narrowOp Narrow32IntOp  = Just (MO_SS_Conv, W32)
-narrowOp Narrow8WordOp  = Just (MO_UU_Conv, W8)
-narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
-narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
-narrowOp _              = Nothing
-
--- Native word signless ops
-
-translateOp :: DynFlags -> PrimOp -> Maybe MachOp
-translateOp dflags IntAddOp       = Just (mo_wordAdd dflags)
-translateOp dflags IntSubOp       = Just (mo_wordSub dflags)
-translateOp dflags WordAddOp      = Just (mo_wordAdd dflags)
-translateOp dflags WordSubOp      = Just (mo_wordSub dflags)
-translateOp dflags AddrAddOp      = Just (mo_wordAdd dflags)
-translateOp dflags AddrSubOp      = Just (mo_wordSub dflags)
-
-translateOp dflags IntEqOp        = Just (mo_wordEq dflags)
-translateOp dflags IntNeOp        = Just (mo_wordNe dflags)
-translateOp dflags WordEqOp       = Just (mo_wordEq dflags)
-translateOp dflags WordNeOp       = Just (mo_wordNe dflags)
-translateOp dflags AddrEqOp       = Just (mo_wordEq dflags)
-translateOp dflags AddrNeOp       = Just (mo_wordNe dflags)
-
-translateOp dflags AndOp          = Just (mo_wordAnd dflags)
-translateOp dflags OrOp           = Just (mo_wordOr dflags)
-translateOp dflags XorOp          = Just (mo_wordXor dflags)
-translateOp dflags NotOp          = Just (mo_wordNot dflags)
-translateOp dflags SllOp          = Just (mo_wordShl dflags)
-translateOp dflags SrlOp          = Just (mo_wordUShr dflags)
-
-translateOp dflags AddrRemOp      = Just (mo_wordURem dflags)
-
--- Native word signed ops
-
-translateOp dflags IntMulOp        = Just (mo_wordMul dflags)
-translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
-translateOp dflags IntQuotOp       = Just (mo_wordSQuot dflags)
-translateOp dflags IntRemOp        = Just (mo_wordSRem dflags)
-translateOp dflags IntNegOp        = Just (mo_wordSNeg dflags)
-
-
-translateOp dflags IntGeOp        = Just (mo_wordSGe dflags)
-translateOp dflags IntLeOp        = Just (mo_wordSLe dflags)
-translateOp dflags IntGtOp        = Just (mo_wordSGt dflags)
-translateOp dflags IntLtOp        = Just (mo_wordSLt dflags)
-
-translateOp dflags AndIOp         = Just (mo_wordAnd dflags)
-translateOp dflags OrIOp          = Just (mo_wordOr dflags)
-translateOp dflags XorIOp         = Just (mo_wordXor dflags)
-translateOp dflags NotIOp         = Just (mo_wordNot dflags)
-translateOp dflags ISllOp         = Just (mo_wordShl dflags)
-translateOp dflags ISraOp         = Just (mo_wordSShr dflags)
-translateOp dflags ISrlOp         = Just (mo_wordUShr dflags)
-
--- Native word unsigned ops
-
-translateOp dflags WordGeOp       = Just (mo_wordUGe dflags)
-translateOp dflags WordLeOp       = Just (mo_wordULe dflags)
-translateOp dflags WordGtOp       = Just (mo_wordUGt dflags)
-translateOp dflags WordLtOp       = Just (mo_wordULt dflags)
-
-translateOp dflags WordMulOp      = Just (mo_wordMul dflags)
-translateOp dflags WordQuotOp     = Just (mo_wordUQuot dflags)
-translateOp dflags WordRemOp      = Just (mo_wordURem dflags)
-
-translateOp dflags AddrGeOp       = Just (mo_wordUGe dflags)
-translateOp dflags AddrLeOp       = Just (mo_wordULe dflags)
-translateOp dflags AddrGtOp       = Just (mo_wordUGt dflags)
-translateOp dflags AddrLtOp       = Just (mo_wordULt dflags)
-
--- Int8# signed ops
-
-translateOp dflags Int8Extend     = Just (MO_SS_Conv W8 (wordWidth dflags))
-translateOp dflags Int8Narrow     = Just (MO_SS_Conv (wordWidth dflags) W8)
-translateOp _      Int8NegOp      = Just (MO_S_Neg W8)
-translateOp _      Int8AddOp      = Just (MO_Add W8)
-translateOp _      Int8SubOp      = Just (MO_Sub W8)
-translateOp _      Int8MulOp      = Just (MO_Mul W8)
-translateOp _      Int8QuotOp     = Just (MO_S_Quot W8)
-translateOp _      Int8RemOp      = Just (MO_S_Rem W8)
-
-translateOp _      Int8EqOp       = Just (MO_Eq W8)
-translateOp _      Int8GeOp       = Just (MO_S_Ge W8)
-translateOp _      Int8GtOp       = Just (MO_S_Gt W8)
-translateOp _      Int8LeOp       = Just (MO_S_Le W8)
-translateOp _      Int8LtOp       = Just (MO_S_Lt W8)
-translateOp _      Int8NeOp       = Just (MO_Ne W8)
-
--- Word8# unsigned ops
-
-translateOp dflags Word8Extend     = Just (MO_UU_Conv W8 (wordWidth dflags))
-translateOp dflags Word8Narrow     = Just (MO_UU_Conv (wordWidth dflags) W8)
-translateOp _      Word8NotOp      = Just (MO_Not W8)
-translateOp _      Word8AddOp      = Just (MO_Add W8)
-translateOp _      Word8SubOp      = Just (MO_Sub W8)
-translateOp _      Word8MulOp      = Just (MO_Mul W8)
-translateOp _      Word8QuotOp     = Just (MO_U_Quot W8)
-translateOp _      Word8RemOp      = Just (MO_U_Rem W8)
-
-translateOp _      Word8EqOp       = Just (MO_Eq W8)
-translateOp _      Word8GeOp       = Just (MO_U_Ge W8)
-translateOp _      Word8GtOp       = Just (MO_U_Gt W8)
-translateOp _      Word8LeOp       = Just (MO_U_Le W8)
-translateOp _      Word8LtOp       = Just (MO_U_Lt W8)
-translateOp _      Word8NeOp       = Just (MO_Ne W8)
-
--- Int16# signed ops
-
-translateOp dflags Int16Extend     = Just (MO_SS_Conv W16 (wordWidth dflags))
-translateOp dflags Int16Narrow     = Just (MO_SS_Conv (wordWidth dflags) W16)
-translateOp _      Int16NegOp      = Just (MO_S_Neg W16)
-translateOp _      Int16AddOp      = Just (MO_Add W16)
-translateOp _      Int16SubOp      = Just (MO_Sub W16)
-translateOp _      Int16MulOp      = Just (MO_Mul W16)
-translateOp _      Int16QuotOp     = Just (MO_S_Quot W16)
-translateOp _      Int16RemOp      = Just (MO_S_Rem W16)
-
-translateOp _      Int16EqOp       = Just (MO_Eq W16)
-translateOp _      Int16GeOp       = Just (MO_S_Ge W16)
-translateOp _      Int16GtOp       = Just (MO_S_Gt W16)
-translateOp _      Int16LeOp       = Just (MO_S_Le W16)
-translateOp _      Int16LtOp       = Just (MO_S_Lt W16)
-translateOp _      Int16NeOp       = Just (MO_Ne W16)
-
--- Word16# unsigned ops
-
-translateOp dflags Word16Extend     = Just (MO_UU_Conv W16 (wordWidth dflags))
-translateOp dflags Word16Narrow     = Just (MO_UU_Conv (wordWidth dflags) W16)
-translateOp _      Word16NotOp      = Just (MO_Not W16)
-translateOp _      Word16AddOp      = Just (MO_Add W16)
-translateOp _      Word16SubOp      = Just (MO_Sub W16)
-translateOp _      Word16MulOp      = Just (MO_Mul W16)
-translateOp _      Word16QuotOp     = Just (MO_U_Quot W16)
-translateOp _      Word16RemOp      = Just (MO_U_Rem W16)
-
-translateOp _      Word16EqOp       = Just (MO_Eq W16)
-translateOp _      Word16GeOp       = Just (MO_U_Ge W16)
-translateOp _      Word16GtOp       = Just (MO_U_Gt W16)
-translateOp _      Word16LeOp       = Just (MO_U_Le W16)
-translateOp _      Word16LtOp       = Just (MO_U_Lt W16)
-translateOp _      Word16NeOp       = Just (MO_Ne W16)
-
--- Char# ops
-
-translateOp dflags CharEqOp       = Just (MO_Eq (wordWidth dflags))
-translateOp dflags CharNeOp       = Just (MO_Ne (wordWidth dflags))
-translateOp dflags CharGeOp       = Just (MO_U_Ge (wordWidth dflags))
-translateOp dflags CharLeOp       = Just (MO_U_Le (wordWidth dflags))
-translateOp dflags CharGtOp       = Just (MO_U_Gt (wordWidth dflags))
-translateOp dflags CharLtOp       = Just (MO_U_Lt (wordWidth dflags))
-
--- Double ops
-
-translateOp _      DoubleEqOp     = Just (MO_F_Eq W64)
-translateOp _      DoubleNeOp     = Just (MO_F_Ne W64)
-translateOp _      DoubleGeOp     = Just (MO_F_Ge W64)
-translateOp _      DoubleLeOp     = Just (MO_F_Le W64)
-translateOp _      DoubleGtOp     = Just (MO_F_Gt W64)
-translateOp _      DoubleLtOp     = Just (MO_F_Lt W64)
-
-translateOp _      DoubleAddOp    = Just (MO_F_Add W64)
-translateOp _      DoubleSubOp    = Just (MO_F_Sub W64)
-translateOp _      DoubleMulOp    = Just (MO_F_Mul W64)
-translateOp _      DoubleDivOp    = Just (MO_F_Quot W64)
-translateOp _      DoubleNegOp    = Just (MO_F_Neg W64)
-
--- Float ops
-
-translateOp _      FloatEqOp     = Just (MO_F_Eq W32)
-translateOp _      FloatNeOp     = Just (MO_F_Ne W32)
-translateOp _      FloatGeOp     = Just (MO_F_Ge W32)
-translateOp _      FloatLeOp     = Just (MO_F_Le W32)
-translateOp _      FloatGtOp     = Just (MO_F_Gt W32)
-translateOp _      FloatLtOp     = Just (MO_F_Lt W32)
-
-translateOp _      FloatAddOp    = Just (MO_F_Add  W32)
-translateOp _      FloatSubOp    = Just (MO_F_Sub  W32)
-translateOp _      FloatMulOp    = Just (MO_F_Mul  W32)
-translateOp _      FloatDivOp    = Just (MO_F_Quot W32)
-translateOp _      FloatNegOp    = Just (MO_F_Neg  W32)
-
--- Vector ops
-
-translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add  n w)
-translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub  n w)
-translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul  n w)
-translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
-translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg  n w)
-
-translateOp _ (VecAddOp  IntVec n w) = Just (MO_V_Add   n w)
-translateOp _ (VecSubOp  IntVec n w) = Just (MO_V_Sub   n w)
-translateOp _ (VecMulOp  IntVec n w) = Just (MO_V_Mul   n w)
-translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
-translateOp _ (VecRemOp  IntVec n w) = Just (MO_VS_Rem  n w)
-translateOp _ (VecNegOp  IntVec n w) = Just (MO_VS_Neg  n w)
-
-translateOp _ (VecAddOp  WordVec n w) = Just (MO_V_Add   n w)
-translateOp _ (VecSubOp  WordVec n w) = Just (MO_V_Sub   n w)
-translateOp _ (VecMulOp  WordVec n w) = Just (MO_V_Mul   n w)
-translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
-translateOp _ (VecRemOp  WordVec n w) = Just (MO_VU_Rem  n w)
-
--- Conversions
-
-translateOp dflags Int2DoubleOp   = Just (MO_SF_Conv (wordWidth dflags) W64)
-translateOp dflags Double2IntOp   = Just (MO_FS_Conv W64 (wordWidth dflags))
-
-translateOp dflags Int2FloatOp    = Just (MO_SF_Conv (wordWidth dflags) W32)
-translateOp dflags Float2IntOp    = Just (MO_FS_Conv W32 (wordWidth dflags))
-
-translateOp _      Float2DoubleOp = Just (MO_FF_Conv W32 W64)
-translateOp _      Double2FloatOp = Just (MO_FF_Conv W64 W32)
-
--- Word comparisons masquerading as more exotic things.
-
-translateOp dflags SameMutVarOp           = Just (mo_wordEq dflags)
-translateOp dflags SameMVarOp             = Just (mo_wordEq dflags)
-translateOp dflags SameMutableArrayOp     = Just (mo_wordEq dflags)
-translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
-translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
-translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
-translateOp dflags SameTVarOp             = Just (mo_wordEq dflags)
-translateOp dflags EqStablePtrOp          = Just (mo_wordEq dflags)
--- See Note [Comparing stable names]
-translateOp dflags EqStableNameOp         = Just (mo_wordEq dflags)
-
-translateOp _      _ = Nothing
-
--- Note [Comparing stable names]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- A StableName# is actually a pointer to a stable name object (SNO)
--- containing an index into the stable name table (SNT). We
--- used to compare StableName#s by following the pointers to the
--- SNOs and checking whether they held the same SNT indices. However,
--- this is not necessary: there is a one-to-one correspondence
--- between SNOs and entries in the SNT, so simple pointer equality
--- does the trick.
-
--- These primops are implemented by CallishMachOps, because they sometimes
--- turn into foreign calls depending on the backend.
-
-callishOp :: PrimOp -> Maybe CallishMachOp
-callishOp DoublePowerOp  = Just MO_F64_Pwr
-callishOp DoubleSinOp    = Just MO_F64_Sin
-callishOp DoubleCosOp    = Just MO_F64_Cos
-callishOp DoubleTanOp    = Just MO_F64_Tan
-callishOp DoubleSinhOp   = Just MO_F64_Sinh
-callishOp DoubleCoshOp   = Just MO_F64_Cosh
-callishOp DoubleTanhOp   = Just MO_F64_Tanh
-callishOp DoubleAsinOp   = Just MO_F64_Asin
-callishOp DoubleAcosOp   = Just MO_F64_Acos
-callishOp DoubleAtanOp   = Just MO_F64_Atan
-callishOp DoubleAsinhOp  = Just MO_F64_Asinh
-callishOp DoubleAcoshOp  = Just MO_F64_Acosh
-callishOp DoubleAtanhOp  = Just MO_F64_Atanh
-callishOp DoubleLogOp    = Just MO_F64_Log
-callishOp DoubleLog1POp  = Just MO_F64_Log1P
-callishOp DoubleExpOp    = Just MO_F64_Exp
-callishOp DoubleExpM1Op  = Just MO_F64_ExpM1
-callishOp DoubleSqrtOp   = Just MO_F64_Sqrt
-
-callishOp FloatPowerOp  = Just MO_F32_Pwr
-callishOp FloatSinOp    = Just MO_F32_Sin
-callishOp FloatCosOp    = Just MO_F32_Cos
-callishOp FloatTanOp    = Just MO_F32_Tan
-callishOp FloatSinhOp   = Just MO_F32_Sinh
-callishOp FloatCoshOp   = Just MO_F32_Cosh
-callishOp FloatTanhOp   = Just MO_F32_Tanh
-callishOp FloatAsinOp   = Just MO_F32_Asin
-callishOp FloatAcosOp   = Just MO_F32_Acos
-callishOp FloatAtanOp   = Just MO_F32_Atan
-callishOp FloatAsinhOp  = Just MO_F32_Asinh
-callishOp FloatAcoshOp  = Just MO_F32_Acosh
-callishOp FloatAtanhOp  = Just MO_F32_Atanh
-callishOp FloatLogOp    = Just MO_F32_Log
-callishOp FloatLog1POp  = Just MO_F32_Log1P
-callishOp FloatExpOp    = Just MO_F32_Exp
-callishOp FloatExpM1Op  = Just MO_F32_ExpM1
-callishOp FloatSqrtOp   = Just MO_F32_Sqrt
-
-callishOp _ = Nothing
-
-------------------------------------------------------------------------------
--- Helpers for translating various minor variants of array indexing.
-
-doIndexOffAddrOp :: Maybe MachOp
-                 -> CmmType
-                 -> [LocalReg]
-                 -> [CmmExpr]
-                 -> FCode ()
-doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
-   = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
-doIndexOffAddrOp _ _ _ _
-   = panic "StgCmmPrim: doIndexOffAddrOp"
-
-doIndexOffAddrOpAs :: Maybe MachOp
-                   -> CmmType
-                   -> CmmType
-                   -> [LocalReg]
-                   -> [CmmExpr]
-                   -> FCode ()
-doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
-   = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
-doIndexOffAddrOpAs _ _ _ _ _
-   = panic "StgCmmPrim: doIndexOffAddrOpAs"
-
-doIndexByteArrayOp :: Maybe MachOp
-                   -> CmmType
-                   -> [LocalReg]
-                   -> [CmmExpr]
-                   -> FCode ()
-doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
-   = do dflags <- getDynFlags
-        mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
-doIndexByteArrayOp _ _ _ _
-   = panic "StgCmmPrim: doIndexByteArrayOp"
-
-doIndexByteArrayOpAs :: Maybe MachOp
-                    -> CmmType
-                    -> CmmType
-                    -> [LocalReg]
-                    -> [CmmExpr]
-                    -> FCode ()
-doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
-   = do dflags <- getDynFlags
-        mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
-doIndexByteArrayOpAs _ _ _ _ _
-   = panic "StgCmmPrim: doIndexByteArrayOpAs"
-
-doReadPtrArrayOp :: LocalReg
-                 -> CmmExpr
-                 -> CmmExpr
-                 -> FCode ()
-doReadPtrArrayOp res addr idx
-   = do dflags <- getDynFlags
-        mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
-
-doWriteOffAddrOp :: Maybe MachOp
-                 -> CmmType
-                 -> [LocalReg]
-                 -> [CmmExpr]
-                 -> FCode ()
-doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
-   = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
-doWriteOffAddrOp _ _ _ _
-   = panic "StgCmmPrim: doWriteOffAddrOp"
-
-doWriteByteArrayOp :: Maybe MachOp
-                   -> CmmType
-                   -> [LocalReg]
-                   -> [CmmExpr]
-                   -> FCode ()
-doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
-   = do dflags <- getDynFlags
-        mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
-doWriteByteArrayOp _ _ _ _
-   = panic "StgCmmPrim: doWriteByteArrayOp"
-
-doWritePtrArrayOp :: CmmExpr
-                  -> CmmExpr
-                  -> CmmExpr
-                  -> FCode ()
-doWritePtrArrayOp addr idx val
-  = do dflags <- getDynFlags
-       let ty = cmmExprType dflags val
-       -- This write barrier is to ensure that the heap writes to the object
-       -- referred to by val have happened before we write val into the array.
-       -- See #12469 for details.
-       emitPrimCall [] MO_WriteBarrier []
-       mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
-       emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
-  -- the write barrier.  We must write a byte into the mark table:
-  -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
-       emit $ mkStore (
-         cmmOffsetExpr dflags
-          (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
-                         (loadArrPtrsSize dflags addr))
-          (CmmMachOp (mo_wordUShr dflags) [idx,
-                                           mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
-         ) (CmmLit (CmmInt 1 W8))
-
-loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
-loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
- where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
-
-mkBasicIndexedRead :: ByteOff      -- Initial offset in bytes
-                   -> Maybe MachOp -- Optional result cast
-                   -> CmmType      -- Type of element we are accessing
-                   -> LocalReg     -- Destination
-                   -> CmmExpr      -- Base address
-                   -> CmmType      -- Type of element by which we are indexing
-                   -> CmmExpr      -- Index
-                   -> FCode ()
-mkBasicIndexedRead off Nothing ty res base idx_ty idx
-   = do dflags <- getDynFlags
-        emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
-mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
-   = do dflags <- getDynFlags
-        emitAssign (CmmLocal res) (CmmMachOp cast [
-                                   cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
-
-mkBasicIndexedWrite :: ByteOff      -- Initial offset in bytes
-                    -> Maybe MachOp -- Optional value cast
-                    -> CmmExpr      -- Base address
-                    -> CmmType      -- Type of element by which we are indexing
-                    -> CmmExpr      -- Index
-                    -> CmmExpr      -- Value to write
-                    -> FCode ()
-mkBasicIndexedWrite off Nothing base idx_ty idx val
-   = do dflags <- getDynFlags
-        emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
-mkBasicIndexedWrite off (Just cast) base idx_ty idx val
-   = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
-
--- ----------------------------------------------------------------------------
--- Misc utils
-
-cmmIndexOffExpr :: DynFlags
-                -> ByteOff  -- Initial offset in bytes
-                -> Width    -- Width of element by which we are indexing
-                -> CmmExpr  -- Base address
-                -> CmmExpr  -- Index
-                -> CmmExpr
-cmmIndexOffExpr dflags off width base idx
-   = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
-
-cmmLoadIndexOffExpr :: DynFlags
-                    -> ByteOff  -- Initial offset in bytes
-                    -> CmmType  -- Type of element we are accessing
-                    -> CmmExpr  -- Base address
-                    -> CmmType  -- Type of element by which we are indexing
-                    -> CmmExpr  -- Index
-                    -> CmmExpr
-cmmLoadIndexOffExpr dflags off ty base idx_ty idx
-   = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
-
-setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
-setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
-
-------------------------------------------------------------------------------
--- Helpers for translating vector primops.
-
-vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
-vecVmmType pocat n w = vec n (vecCmmCat pocat w)
-
-vecCmmCat :: PrimOpVecCat -> Width -> CmmType
-vecCmmCat IntVec   = cmmBits
-vecCmmCat WordVec  = cmmBits
-vecCmmCat FloatVec = cmmFloat
-
-vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
-vecElemInjectCast _      FloatVec _   =  Nothing
-vecElemInjectCast dflags IntVec   W8  =  Just (mo_WordTo8  dflags)
-vecElemInjectCast dflags IntVec   W16 =  Just (mo_WordTo16 dflags)
-vecElemInjectCast dflags IntVec   W32 =  Just (mo_WordTo32 dflags)
-vecElemInjectCast _      IntVec   W64 =  Nothing
-vecElemInjectCast dflags WordVec  W8  =  Just (mo_WordTo8  dflags)
-vecElemInjectCast dflags WordVec  W16 =  Just (mo_WordTo16 dflags)
-vecElemInjectCast dflags WordVec  W32 =  Just (mo_WordTo32 dflags)
-vecElemInjectCast _      WordVec  W64 =  Nothing
-vecElemInjectCast _      _        _   =  Nothing
-
-vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
-vecElemProjectCast _      FloatVec _   =  Nothing
-vecElemProjectCast dflags IntVec   W8  =  Just (mo_s_8ToWord  dflags)
-vecElemProjectCast dflags IntVec   W16 =  Just (mo_s_16ToWord dflags)
-vecElemProjectCast dflags IntVec   W32 =  Just (mo_s_32ToWord dflags)
-vecElemProjectCast _      IntVec   W64 =  Nothing
-vecElemProjectCast dflags WordVec  W8  =  Just (mo_u_8ToWord  dflags)
-vecElemProjectCast dflags WordVec  W16 =  Just (mo_u_16ToWord dflags)
-vecElemProjectCast dflags WordVec  W32 =  Just (mo_u_32ToWord dflags)
-vecElemProjectCast _      WordVec  W64 =  Nothing
-vecElemProjectCast _      _        _   =  Nothing
-
-
--- NOTE [SIMD Design for the future]
--- Check to make sure that we can generate code for the specified vector type
--- given the current set of dynamic flags.
--- Currently these checks are specific to x86 and x86_64 architecture.
--- This should be fixed!
--- In particular,
--- 1) Add better support for other architectures! (this may require a redesign)
--- 2) Decouple design choices from LLVM's pseudo SIMD model!
---   The high level LLVM naive rep makes per CPU family SIMD generation is own
---   optimization problem, and hides important differences in eg ARM vs x86_64 simd
--- 3) Depending on the architecture, the SIMD registers may also support general
---    computations on Float/Double/Word/Int scalars, but currently on
---    for example x86_64, we always put Word/Int (or sized) in GPR
---    (general purpose) registers. Would relaxing that allow for
---    useful optimization opportunities?
---      Phrased differently, it is worth experimenting with supporting
---    different register mapping strategies than we currently have, especially if
---    someday we want SIMD to be a first class denizen in GHC along with scalar
---    values!
---      The current design with respect to register mapping of scalars could
---    very well be the best,but exploring the  design space and doing careful
---    measurments is the only only way to validate that.
---      In some next generation CPU ISAs, notably RISC V, the SIMD extension
---    includes  support for a sort of run time CPU dependent vectorization parameter,
---    where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
---    element chunk! Time will tell if that direction sees wide adoption,
---    but it is from that context that unifying our handling of simd and scalars
---    may benefit. It is not likely to benefit current architectures, though
---    it may very well be a design perspective that helps guide improving the NCG.
-
-
-checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
-checkVecCompatibility dflags vcat l w = do
-    when (hscTarget dflags /= HscLlvm) $ do
-        sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
-                         ,"Please use -fllvm."]
-    check vecWidth vcat l w
-  where
-    check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
-    check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
-        sorry $ "128-bit wide single-precision floating point " ++
-                "SIMD vector instructions require at least -msse."
-    check W128 _ _ _ | not (isSse2Enabled dflags) =
-        sorry $ "128-bit wide integer and double precision " ++
-                "SIMD vector instructions require at least -msse2."
-    check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
-        sorry $ "256-bit wide floating point " ++
-                "SIMD vector instructions require at least -mavx."
-    check W256 _ _ _ | not (isAvx2Enabled dflags) =
-        sorry $ "256-bit wide integer " ++
-                "SIMD vector instructions require at least -mavx2."
-    check W512 _ _ _ | not (isAvx512fEnabled dflags) =
-        sorry $ "512-bit wide " ++
-                "SIMD vector instructions require -mavx512f."
-    check _ _ _ _ = return ()
-
-    vecWidth = typeWidth (vecVmmType vcat l w)
-
-------------------------------------------------------------------------------
--- Helpers for translating vector packing and unpacking.
-
-doVecPackOp :: Maybe MachOp  -- Cast from element to vector component
-            -> CmmType       -- Type of vector
-            -> CmmExpr       -- Initial vector
-            -> [CmmExpr]     -- Elements
-            -> CmmFormal     -- Destination for result
-            -> FCode ()
-doVecPackOp maybe_pre_write_cast ty z es res = do
-    dst <- newTemp ty
-    emitAssign (CmmLocal dst) z
-    vecPack dst es 0
-  where
-    vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
-    vecPack src [] _ =
-        emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
-
-    vecPack src (e : es) i = do
-        dst <- newTemp ty
-        if isFloatType (vecElemType ty)
-          then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
-                                                    [CmmReg (CmmLocal src), cast e, iLit])
-          else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
-                                                    [CmmReg (CmmLocal src), cast e, iLit])
-        vecPack dst es (i + 1)
-      where
-        -- vector indices are always 32-bits
-        iLit = CmmLit (CmmInt (toInteger i) W32)
-
-    cast :: CmmExpr -> CmmExpr
-    cast val = case maybe_pre_write_cast of
-                 Nothing   -> val
-                 Just cast -> CmmMachOp cast [val]
-
-    len :: Length
-    len = vecLength ty
-
-    wid :: Width
-    wid = typeWidth (vecElemType ty)
-
-doVecUnpackOp :: Maybe MachOp  -- Cast from vector component to element result
-              -> CmmType       -- Type of vector
-              -> CmmExpr       -- Vector
-              -> [CmmFormal]   -- Element results
-              -> FCode ()
-doVecUnpackOp maybe_post_read_cast ty e res =
-    vecUnpack res 0
-  where
-    vecUnpack :: [CmmFormal] -> Int -> FCode ()
-    vecUnpack [] _ =
-        return ()
-
-    vecUnpack (r : rs) i = do
-        if isFloatType (vecElemType ty)
-          then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
-                                             [e, iLit]))
-          else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
-                                             [e, iLit]))
-        vecUnpack rs (i + 1)
-      where
-        -- vector indices are always 32-bits
-        iLit = CmmLit (CmmInt (toInteger i) W32)
-
-    cast :: CmmExpr -> CmmExpr
-    cast val = case maybe_post_read_cast of
-                 Nothing   -> val
-                 Just cast -> CmmMachOp cast [val]
-
-    len :: Length
-    len = vecLength ty
-
-    wid :: Width
-    wid = typeWidth (vecElemType ty)
-
-doVecInsertOp :: Maybe MachOp  -- Cast from element to vector component
-              -> CmmType       -- Vector type
-              -> CmmExpr       -- Source vector
-              -> CmmExpr       -- Element
-              -> CmmExpr       -- Index at which to insert element
-              -> CmmFormal     -- Destination for result
-              -> FCode ()
-doVecInsertOp maybe_pre_write_cast ty src e idx res = do
-    dflags <- getDynFlags
-    -- vector indices are always 32-bits
-    let idx' :: CmmExpr
-        idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
-    if isFloatType (vecElemType ty)
-      then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
-      else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
-  where
-    cast :: CmmExpr -> CmmExpr
-    cast val = case maybe_pre_write_cast of
-                 Nothing   -> val
-                 Just cast -> CmmMachOp cast [val]
-
-    len :: Length
-    len = vecLength ty
-
-    wid :: Width
-    wid = typeWidth (vecElemType ty)
-
-------------------------------------------------------------------------------
--- Helpers for translating prefetching.
-
-
--- | Translate byte array prefetch operations into proper primcalls.
-doPrefetchByteArrayOp :: Int
-                      -> [CmmExpr]
-                      -> FCode ()
-doPrefetchByteArrayOp locality  [addr,idx]
-   = do dflags <- getDynFlags
-        mkBasicPrefetch locality (arrWordsHdrSize dflags)  addr idx
-doPrefetchByteArrayOp _ _
-   = panic "StgCmmPrim: doPrefetchByteArrayOp"
-
--- | Translate mutable byte array prefetch operations into proper primcalls.
-doPrefetchMutableByteArrayOp :: Int
-                      -> [CmmExpr]
-                      -> FCode ()
-doPrefetchMutableByteArrayOp locality  [addr,idx]
-   = do dflags <- getDynFlags
-        mkBasicPrefetch locality (arrWordsHdrSize dflags)  addr idx
-doPrefetchMutableByteArrayOp _ _
-   = panic "StgCmmPrim: doPrefetchByteArrayOp"
-
--- | Translate address prefetch operations into proper primcalls.
-doPrefetchAddrOp ::Int
-                 -> [CmmExpr]
-                 -> FCode ()
-doPrefetchAddrOp locality   [addr,idx]
-   = mkBasicPrefetch locality 0  addr idx
-doPrefetchAddrOp _ _
-   = panic "StgCmmPrim: doPrefetchAddrOp"
-
--- | Translate value prefetch operations into proper primcalls.
-doPrefetchValueOp :: Int
-                 -> [CmmExpr]
-                 -> FCode ()
-doPrefetchValueOp  locality   [addr]
-  =  do dflags <- getDynFlags
-        mkBasicPrefetch locality 0 addr  (CmmLit (CmmInt 0 (wordWidth dflags)))
-doPrefetchValueOp _ _
-  = panic "StgCmmPrim: doPrefetchValueOp"
-
--- | helper to generate prefetch primcalls
-mkBasicPrefetch :: Int          -- Locality level 0-3
-                -> ByteOff      -- Initial offset in bytes
-                -> CmmExpr      -- Base address
-                -> CmmExpr      -- Index
-                -> FCode ()
-mkBasicPrefetch locality off base idx
-   = do dflags <- getDynFlags
-        emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
-        return ()
-
--- ----------------------------------------------------------------------------
--- Allocating byte arrays
-
--- | Takes a register to return the newly allocated array in and the
--- size of the new array in bytes. Allocates a new
--- 'MutableByteArray#'.
-doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
-doNewByteArrayOp res_r n = do
-    dflags <- getDynFlags
-
-    let info_ptr = mkLblExpr mkArrWords_infoLabel
-        rep = arrWordsRep dflags n
-
-    tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
-        (mkIntExpr dflags (nonHdrSize dflags rep))
-        (zeroExpr dflags)
-
-    let hdr_size = fixedHdrSize dflags
-
-    base <- allocHeapClosure rep info_ptr cccsExpr
-                     [ (mkIntExpr dflags n,
-                        hdr_size + oFFSET_StgArrBytes_bytes dflags)
-                     ]
-
-    emit $ mkAssign (CmmLocal res_r) base
-
--- ----------------------------------------------------------------------------
--- Comparing byte arrays
-
-doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-                     -> FCode ()
-doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
-    dflags <- getDynFlags
-    ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
-    ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
-
-    -- short-cut in case of equal pointers avoiding a costly
-    -- subroutine call to the memcmp(3) routine; the Cmm logic below
-    -- results in assembly code being generated for
-    --
-    --   cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
-    --   cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
-    --
-    -- that looks like
-    --
-    --          leaq 16(%r14),%rax
-    --          leaq 16(%rsi),%rbx
-    --          xorl %ecx,%ecx
-    --          cmpq %rbx,%rax
-    --          je l_ptr_eq
-    --
-    --          ; NB: the common case (unequal pointers) falls-through
-    --          ; the conditional jump, and therefore matches the
-    --          ; usual static branch prediction convention of modern cpus
-    --
-    --          subq $8,%rsp
-    --          movq %rbx,%rsi
-    --          movq %rax,%rdi
-    --          movl $10,%edx
-    --          xorl %eax,%eax
-    --          call memcmp
-    --          addq $8,%rsp
-    --          movslq %eax,%rax
-    --          movq %rax,%rcx
-    --  l_ptr_eq:
-    --          movq %rcx,%rbx
-    --          jmp *(%rbp)
-
-    l_ptr_eq <- newBlockId
-    l_ptr_ne <- newBlockId
-
-    emit (mkAssign (CmmLocal res) (zeroExpr dflags))
-    emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
-                    l_ptr_eq l_ptr_ne (Just False))
-
-    emitLabel l_ptr_ne
-    emitMemcmpCall res ba1_p ba2_p n 1
-
-    emitLabel l_ptr_eq
-
--- ----------------------------------------------------------------------------
--- Copying byte arrays
-
--- | Takes a source 'ByteArray#', an offset in the source array, a
--- destination 'MutableByteArray#', an offset into the destination
--- array, and the number of bytes to copy.  Copies the given number of
--- bytes from the source array to the destination array.
-doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-                  -> FCode ()
-doCopyByteArrayOp = emitCopyByteArray copy
-  where
-    -- Copy data (we assume the arrays aren't overlapping since
-    -- they're of different types)
-    copy _src _dst dst_p src_p bytes align =
-        emitMemcpyCall dst_p src_p bytes align
-
--- | Takes a source 'MutableByteArray#', an offset in the source
--- array, a destination 'MutableByteArray#', an offset into the
--- destination array, and the number of bytes to copy.  Copies the
--- given number of bytes from the source array to the destination
--- array.
-doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-                         -> FCode ()
-doCopyMutableByteArrayOp = emitCopyByteArray copy
-  where
-    -- The only time the memory might overlap is when the two arrays
-    -- we were provided are the same array!
-    -- TODO: Optimize branch for common case of no aliasing.
-    copy src dst dst_p src_p bytes align = do
-        dflags <- getDynFlags
-        (moveCall, cpyCall) <- forkAltPair
-            (getCode $ emitMemmoveCall dst_p src_p bytes align)
-            (getCode $ emitMemcpyCall  dst_p src_p bytes align)
-        emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
-
-emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-                      -> Alignment -> FCode ())
-                  -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-                  -> FCode ()
-emitCopyByteArray copy src src_off dst dst_off n = do
-    dflags <- getDynFlags
-    let byteArrayAlignment = wordAlignment dflags
-        srcOffAlignment = cmmExprAlignment src_off
-        dstOffAlignment = cmmExprAlignment dst_off
-        align = minimum [byteArrayAlignment, srcOffAlignment, dstOffAlignment]
-    dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
-    src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
-    copy src dst dst_p src_p n align
-
--- | Takes a source 'ByteArray#', an offset in the source array, a
--- destination 'Addr#', and the number of bytes to copy.  Copies the given
--- number of bytes from the source array to the destination memory region.
-doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-doCopyByteArrayToAddrOp src src_off dst_p bytes = do
-    -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
-    dflags <- getDynFlags
-    src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
-    emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
-
--- | Takes a source 'MutableByteArray#', an offset in the source array, a
--- destination 'Addr#', and the number of bytes to copy.  Copies the given
--- number of bytes from the source array to the destination memory region.
-doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-                               -> FCode ()
-doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
-
--- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
--- the destination array, and the number of bytes to copy.  Copies the given
--- number of bytes from the source memory region to the destination array.
-doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
-    -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
-    dflags <- getDynFlags
-    dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
-    emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
-
-
--- ----------------------------------------------------------------------------
--- Setting byte arrays
-
--- | Takes a 'MutableByteArray#', an offset into the array, a length,
--- and a byte, and sets each of the selected bytes in the array to the
--- character.
-doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-                 -> FCode ()
-doSetByteArrayOp ba off len c = do
-    dflags <- getDynFlags
-
-    let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
-        offsetAlignment = cmmExprAlignment off
-        align = min byteArrayAlignment offsetAlignment
-
-    p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
-    emitMemsetCall p c len align
-
--- ----------------------------------------------------------------------------
--- Allocating arrays
-
--- | Allocate a new array.
-doNewArrayOp :: CmmFormal             -- ^ return register
-             -> SMRep                 -- ^ representation of the array
-             -> CLabel                -- ^ info pointer
-             -> [(CmmExpr, ByteOff)]  -- ^ header payload
-             -> WordOff               -- ^ array size
-             -> CmmExpr               -- ^ initial element
-             -> FCode ()
-doNewArrayOp res_r rep info payload n init = do
-    dflags <- getDynFlags
-
-    let info_ptr = mkLblExpr info
-
-    tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
-        (mkIntExpr dflags (nonHdrSize dflags rep))
-        (zeroExpr dflags)
-
-    base <- allocHeapClosure rep info_ptr cccsExpr payload
-
-    arr <- CmmLocal `fmap` newTemp (bWord dflags)
-    emit $ mkAssign arr base
-
-    -- Initialise all elements of the array
-    let mkOff off = cmmOffsetW dflags (CmmReg arr) (hdrSizeW dflags rep + off)
-        initialization = [ mkStore (mkOff off) init | off <- [0.. n - 1] ]
-    emit (catAGraphs initialization)
-
-    emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-
--- ----------------------------------------------------------------------------
--- Copying pointer arrays
-
--- EZY: This code has an unusually high amount of assignTemp calls, seen
--- nowhere else in the code generator.  This is mostly because these
--- "primitive" ops result in a surprisingly large amount of code.  It
--- will likely be worthwhile to optimize what is emitted here, so that
--- our optimization passes don't waste time repeatedly optimizing the
--- same bits of code.
-
--- More closely imitates 'assignTemp' from the old code generator, which
--- returns a CmmExpr rather than a LocalReg.
-assignTempE :: CmmExpr -> FCode CmmExpr
-assignTempE e = do
-    t <- assignTemp e
-    return (CmmReg (CmmLocal t))
-
--- | Takes a source 'Array#', an offset in the source array, a
--- destination 'MutableArray#', an offset into the destination array,
--- and the number of elements to copy.  Copies the given number of
--- elements from the source array to the destination array.
-doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
-              -> FCode ()
-doCopyArrayOp = emitCopyArray copy
-  where
-    -- Copy data (we assume the arrays aren't overlapping since
-    -- they're of different types)
-    copy _src _dst dst_p src_p bytes =
-        do dflags <- getDynFlags
-           emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
-               (wordAlignment dflags)
-
-
--- | Takes a source 'MutableArray#', an offset in the source array, a
--- destination 'MutableArray#', an offset into the destination array,
--- and the number of elements to copy.  Copies the given number of
--- elements from the source array to the destination array.
-doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
-                     -> FCode ()
-doCopyMutableArrayOp = emitCopyArray copy
-  where
-    -- The only time the memory might overlap is when the two arrays
-    -- we were provided are the same array!
-    -- TODO: Optimize branch for common case of no aliasing.
-    copy src dst dst_p src_p bytes = do
-        dflags <- getDynFlags
-        (moveCall, cpyCall) <- forkAltPair
-            (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
-             (wordAlignment dflags))
-            (getCode $ emitMemcpyCall  dst_p src_p (mkIntExpr dflags bytes)
-             (wordAlignment dflags))
-        emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
-
-emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
-                  -> FCode ())  -- ^ copy function
-              -> CmmExpr        -- ^ source array
-              -> CmmExpr        -- ^ offset in source array
-              -> CmmExpr        -- ^ destination array
-              -> CmmExpr        -- ^ offset in destination array
-              -> WordOff        -- ^ number of elements to copy
-              -> FCode ()
-emitCopyArray copy src0 src_off dst0 dst_off0 n =
-    when (n /= 0) $ do
-        dflags <- getDynFlags
-
-        -- Passed as arguments (be careful)
-        src     <- assignTempE src0
-        dst     <- assignTempE dst0
-        dst_off <- assignTempE dst_off0
-
-        -- Set the dirty bit in the header.
-        emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
-
-        dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
-                       (arrPtrsHdrSize dflags)
-        dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
-        src_p <- assignTempE $ cmmOffsetExprW dflags
-                 (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
-        let bytes = wordsToBytes dflags n
-
-        copy src dst dst_p src_p bytes
-
-        -- The base address of the destination card table
-        dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
-                       (loadArrPtrsSize dflags dst)
-
-        emitSetCards dst_off dst_cards_p n
-
-doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
-                   -> FCode ()
-doCopySmallArrayOp = emitCopySmallArray copy
-  where
-    -- Copy data (we assume the arrays aren't overlapping since
-    -- they're of different types)
-    copy _src _dst dst_p src_p bytes =
-        do dflags <- getDynFlags
-           emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
-               (wordAlignment dflags)
-
-
-doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
-                          -> FCode ()
-doCopySmallMutableArrayOp = emitCopySmallArray copy
-  where
-    -- The only time the memory might overlap is when the two arrays
-    -- we were provided are the same array!
-    -- TODO: Optimize branch for common case of no aliasing.
-    copy src dst dst_p src_p bytes = do
-        dflags <- getDynFlags
-        (moveCall, cpyCall) <- forkAltPair
-            (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
-             (wordAlignment dflags))
-            (getCode $ emitMemcpyCall  dst_p src_p (mkIntExpr dflags bytes)
-             (wordAlignment dflags))
-        emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
-
-emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
-                       -> FCode ())  -- ^ copy function
-                   -> CmmExpr        -- ^ source array
-                   -> CmmExpr        -- ^ offset in source array
-                   -> CmmExpr        -- ^ destination array
-                   -> CmmExpr        -- ^ offset in destination array
-                   -> WordOff        -- ^ number of elements to copy
-                   -> FCode ()
-emitCopySmallArray copy src0 src_off dst0 dst_off n =
-    when (n /= 0) $ do
-        dflags <- getDynFlags
-
-        -- Passed as arguments (be careful)
-        src     <- assignTempE src0
-        dst     <- assignTempE dst0
-
-        -- Set the dirty bit in the header.
-        emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
-
-        dst_p <- assignTempE $ cmmOffsetExprW dflags
-                 (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
-        src_p <- assignTempE $ cmmOffsetExprW dflags
-                 (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
-        let bytes = wordsToBytes dflags n
-
-        copy src dst dst_p src_p bytes
-
--- | Takes an info table label, a register to return the newly
--- allocated array in, a source array, an offset in the source array,
--- and the number of elements to copy. Allocates a new array and
--- initializes it from the source array.
-emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
-               -> FCode ()
-emitCloneArray info_p res_r src src_off n = do
-    dflags <- getDynFlags
-
-    let info_ptr = mkLblExpr info_p
-        rep = arrPtrsRep dflags n
-
-    tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
-        (mkIntExpr dflags (nonHdrSize dflags rep))
-        (zeroExpr dflags)
-
-    let hdr_size = fixedHdrSize dflags
-
-    base <- allocHeapClosure rep info_ptr cccsExpr
-                     [ (mkIntExpr dflags n,
-                        hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
-                     , (mkIntExpr dflags (nonHdrSizeW rep),
-                        hdr_size + oFFSET_StgMutArrPtrs_size dflags)
-                     ]
-
-    arr <- CmmLocal `fmap` newTemp (bWord dflags)
-    emit $ mkAssign arr base
-
-    dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
-             (arrPtrsHdrSize dflags)
-    src_p <- assignTempE $ cmmOffsetExprW dflags src
-             (cmmAddWord dflags
-              (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
-
-    emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
-        (wordAlignment dflags)
-
-    emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-
--- | Takes an info table label, a register to return the newly
--- allocated array in, a source array, an offset in the source array,
--- and the number of elements to copy. Allocates a new array and
--- initializes it from the source array.
-emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
-                    -> FCode ()
-emitCloneSmallArray info_p res_r src src_off n = do
-    dflags <- getDynFlags
-
-    let info_ptr = mkLblExpr info_p
-        rep = smallArrPtrsRep n
-
-    tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
-        (mkIntExpr dflags (nonHdrSize dflags rep))
-        (zeroExpr dflags)
-
-    let hdr_size = fixedHdrSize dflags
-
-    base <- allocHeapClosure rep info_ptr cccsExpr
-                     [ (mkIntExpr dflags n,
-                        hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
-                     ]
-
-    arr <- CmmLocal `fmap` newTemp (bWord dflags)
-    emit $ mkAssign arr base
-
-    dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
-             (smallArrPtrsHdrSize dflags)
-    src_p <- assignTempE $ cmmOffsetExprW dflags src
-             (cmmAddWord dflags
-              (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
-
-    emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
-        (wordAlignment dflags)
-
-    emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-
--- | Takes and offset in the destination array, the base address of
--- the card table, and the number of elements affected (*not* the
--- number of cards). The number of elements may not be zero.
--- Marks the relevant cards as dirty.
-emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
-emitSetCards dst_start dst_cards_start n = do
-    dflags <- getDynFlags
-    start_card <- assignTempE $ cardCmm dflags dst_start
-    let end_card = cardCmm dflags
-                   (cmmSubWord dflags
-                    (cmmAddWord dflags dst_start (mkIntExpr dflags n))
-                    (mkIntExpr dflags 1))
-    emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
-        (mkIntExpr dflags 1)
-        (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
-        (mkAlignment 1) -- no alignment (1 byte)
-
--- Convert an element index to a card index
-cardCmm :: DynFlags -> CmmExpr -> CmmExpr
-cardCmm dflags i =
-    cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
-
-------------------------------------------------------------------------------
--- SmallArray PrimOp implementations
-
-doReadSmallPtrArrayOp :: LocalReg
-                      -> CmmExpr
-                      -> CmmExpr
-                      -> FCode ()
-doReadSmallPtrArrayOp res addr idx = do
-    dflags <- getDynFlags
-    mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
-        (gcWord dflags) idx
-
-doWriteSmallPtrArrayOp :: CmmExpr
-                       -> CmmExpr
-                       -> CmmExpr
-                       -> FCode ()
-doWriteSmallPtrArrayOp addr idx val = do
-    dflags <- getDynFlags
-    let ty = cmmExprType dflags val
-    emitPrimCall [] MO_WriteBarrier [] -- #12469
-    mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
-    emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
-
-------------------------------------------------------------------------------
--- Atomic read-modify-write
-
--- | Emit an atomic modification to a byte array element. The result
--- reg contains that previous value of the element. Implies a full
--- memory barrier.
-doAtomicRMW :: LocalReg      -- ^ Result reg
-            -> AtomicMachOp  -- ^ Atomic op (e.g. add)
-            -> CmmExpr       -- ^ MutableByteArray#
-            -> CmmExpr       -- ^ Index
-            -> CmmType       -- ^ Type of element by which we are indexing
-            -> CmmExpr       -- ^ Op argument (e.g. amount to add)
-            -> FCode ()
-doAtomicRMW res amop mba idx idx_ty n = do
-    dflags <- getDynFlags
-    let width = typeWidth idx_ty
-        addr  = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
-                width mba idx
-    emitPrimCall
-        [ res ]
-        (MO_AtomicRMW width amop)
-        [ addr, n ]
-
--- | Emit an atomic read to a byte array that acts as a memory barrier.
-doAtomicReadByteArray
-    :: LocalReg  -- ^ Result reg
-    -> CmmExpr   -- ^ MutableByteArray#
-    -> CmmExpr   -- ^ Index
-    -> CmmType   -- ^ Type of element by which we are indexing
-    -> FCode ()
-doAtomicReadByteArray res mba idx idx_ty = do
-    dflags <- getDynFlags
-    let width = typeWidth idx_ty
-        addr  = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
-                width mba idx
-    emitPrimCall
-        [ res ]
-        (MO_AtomicRead width)
-        [ addr ]
-
--- | Emit an atomic write to a byte array that acts as a memory barrier.
-doAtomicWriteByteArray
-    :: CmmExpr   -- ^ MutableByteArray#
-    -> CmmExpr   -- ^ Index
-    -> CmmType   -- ^ Type of element by which we are indexing
-    -> CmmExpr   -- ^ Value to write
-    -> FCode ()
-doAtomicWriteByteArray mba idx idx_ty val = do
-    dflags <- getDynFlags
-    let width = typeWidth idx_ty
-        addr  = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
-                width mba idx
-    emitPrimCall
-        [ {- no results -} ]
-        (MO_AtomicWrite width)
-        [ addr, val ]
-
-doCasByteArray
-    :: LocalReg  -- ^ Result reg
-    -> CmmExpr   -- ^ MutableByteArray#
-    -> CmmExpr   -- ^ Index
-    -> CmmType   -- ^ Type of element by which we are indexing
-    -> CmmExpr   -- ^ Old value
-    -> CmmExpr   -- ^ New value
-    -> FCode ()
-doCasByteArray res mba idx idx_ty old new = do
-    dflags <- getDynFlags
-    let width = (typeWidth idx_ty)
-        addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
-               width mba idx
-    emitPrimCall
-        [ res ]
-        (MO_Cmpxchg width)
-        [ addr, old, new ]
-
-------------------------------------------------------------------------------
--- Helpers for emitting function calls
-
--- | Emit a call to @memcpy@.
-emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
-emitMemcpyCall dst src n align = do
-    emitPrimCall
-        [ {-no results-} ]
-        (MO_Memcpy (alignmentBytes align))
-        [ dst, src, n ]
-
--- | Emit a call to @memmove@.
-emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
-emitMemmoveCall dst src n align = do
-    emitPrimCall
-        [ {- no results -} ]
-        (MO_Memmove (alignmentBytes align))
-        [ dst, src, n ]
-
--- | Emit a call to @memset@.  The second argument must fit inside an
--- unsigned char.
-emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
-emitMemsetCall dst c n align = do
-    emitPrimCall
-        [ {- no results -} ]
-        (MO_Memset (alignmentBytes align))
-        [ dst, c, n ]
-
-emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
-emitMemcmpCall res ptr1 ptr2 n align = do
-    -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
-    -- code-gens currently call out to the @memcmp(3)@ C function.
-    -- This was easier than moving the sign-extensions into
-    -- all the code-gens.
-    dflags <- getDynFlags
-    let is32Bit = typeWidth (localRegType res) == W32
-
-    cres <- if is32Bit
-              then return res
-              else newTemp b32
-
-    emitPrimCall
-        [ cres ]
-        (MO_Memcmp align)
-        [ ptr1, ptr2, n ]
-
-    unless is32Bit $ do
-      emit $ mkAssign (CmmLocal res)
-                      (CmmMachOp
-                         (mo_s_32ToWord dflags)
-                         [(CmmReg (CmmLocal cres))])
-
-emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitBSwapCall res x width = do
-    emitPrimCall
-        [ res ]
-        (MO_BSwap width)
-        [ x ]
-
-emitBRevCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitBRevCall res x width = do
-    emitPrimCall
-        [ res ]
-        (MO_BRev width)
-        [ x ]
-
-emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitPopCntCall res x width = do
-    emitPrimCall
-        [ res ]
-        (MO_PopCnt width)
-        [ x ]
-
-emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
-emitPdepCall res x y width = do
-    emitPrimCall
-        [ res ]
-        (MO_Pdep width)
-        [ x, y ]
-
-emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
-emitPextCall res x y width = do
-    emitPrimCall
-        [ res ]
-        (MO_Pext width)
-        [ x, y ]
-
-emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitClzCall res x width = do
-    emitPrimCall
-        [ res ]
-        (MO_Clz width)
-        [ x ]
-
-emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitCtzCall res x width = do
-    emitPrimCall
-        [ res ]
-        (MO_Ctz width)
-        [ x ]
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
deleted file mode 100644
index 172b77c8f9..0000000000
--- a/compiler/codeGen/StgCmmProf.hs
+++ /dev/null
@@ -1,360 +0,0 @@
------------------------------------------------------------------------------
---
--- Code generation for profiling
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmProf (
-        initCostCentres, ccType, ccsType,
-        mkCCostCentre, mkCCostCentreStack,
-
-        -- Cost-centre Profiling
-        dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
-        enterCostCentreThunk, enterCostCentreFun,
-        costCentreFrom,
-        storeCurCCS,
-        emitSetCCC,
-
-        saveCurrentCostCentre, restoreCurrentCostCentre,
-
-        -- Lag/drag/void stuff
-        ldvEnter, ldvEnterClosure, ldvRecordCreate
-  ) where
-
-import GhcPrelude
-
-import StgCmmClosure
-import StgCmmUtils
-import StgCmmMonad
-import SMRep
-
-import MkGraph
-import Cmm
-import CmmUtils
-import CLabel
-
-import CostCentre
-import DynFlags
-import FastString
-import Module
-import Outputable
-
-import Control.Monad
-import Data.Char (ord)
-
------------------------------------------------------------------------------
---
--- Cost-centre-stack Profiling
---
------------------------------------------------------------------------------
-
--- Expression representing the current cost centre stack
-ccsType :: DynFlags -> CmmType -- Type of a cost-centre stack
-ccsType = bWord
-
-ccType :: DynFlags -> CmmType -- Type of a cost centre
-ccType = bWord
-
-storeCurCCS :: CmmExpr -> CmmAGraph
-storeCurCCS e = mkAssign cccsReg e
-
-mkCCostCentre :: CostCentre -> CmmLit
-mkCCostCentre cc = CmmLabel (mkCCLabel cc)
-
-mkCCostCentreStack :: CostCentreStack -> CmmLit
-mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
-
-costCentreFrom :: DynFlags
-               -> CmmExpr         -- A closure pointer
-               -> CmmExpr        -- The cost centre from that closure
-costCentreFrom dflags cl = CmmLoad (cmmOffsetB dflags cl (oFFSET_StgHeader_ccs dflags)) (ccsType dflags)
-
--- | The profiling header words in a static closure
-staticProfHdr :: DynFlags -> CostCentreStack -> [CmmLit]
-staticProfHdr dflags ccs
- = ifProfilingL dflags [mkCCostCentreStack ccs, staticLdvInit dflags]
-
--- | Profiling header words in a dynamic closure
-dynProfHdr :: DynFlags -> CmmExpr -> [CmmExpr]
-dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit dflags]
-
--- | Initialise the profiling field of an update frame
-initUpdFrameProf :: CmmExpr -> FCode ()
-initUpdFrameProf frame
-  = ifProfiling $        -- frame->header.prof.ccs = CCCS
-    do dflags <- getDynFlags
-       emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) cccsExpr
-        -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
-        -- is unnecessary because it is not used anyhow.
-
----------------------------------------------------------------------------
---         Saving and restoring the current cost centre
----------------------------------------------------------------------------
-
-{-        Note [Saving the current cost centre]
-        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The current cost centre is like a global register.  Like other
-global registers, it's a caller-saves one.  But consider
-        case (f x) of (p,q) -> rhs
-Since 'f' may set the cost centre, we must restore it
-before resuming rhs.  So we want code like this:
-        local_cc = CCC  -- save
-        r = f( x )
-        CCC = local_cc  -- restore
-That is, we explicitly "save" the current cost centre in
-a LocalReg, local_cc; and restore it after the call. The
-C-- infrastructure will arrange to save local_cc across the
-call.
-
-The same goes for join points;
-        let j x = join-stuff
-        in blah-blah
-We want this kind of code:
-        local_cc = CCC  -- save
-        blah-blah
-     J:
-        CCC = local_cc  -- restore
--}
-
-saveCurrentCostCentre :: FCode (Maybe LocalReg)
-        -- Returns Nothing if profiling is off
-saveCurrentCostCentre
-  = do dflags <- getDynFlags
-       if not (gopt Opt_SccProfilingOn dflags)
-           then return Nothing
-           else do local_cc <- newTemp (ccType dflags)
-                   emitAssign (CmmLocal local_cc) cccsExpr
-                   return (Just local_cc)
-
-restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
-restoreCurrentCostCentre Nothing
-  = return ()
-restoreCurrentCostCentre (Just local_cc)
-  = emit (storeCurCCS (CmmReg (CmmLocal local_cc)))
-
-
--------------------------------------------------------------------------------
--- Recording allocation in a cost centre
--------------------------------------------------------------------------------
-
--- | Record the allocation of a closure.  The CmmExpr is the cost
--- centre stack to which to attribute the allocation.
-profDynAlloc :: SMRep -> CmmExpr -> FCode ()
-profDynAlloc rep ccs
-  = ifProfiling $
-    do dflags <- getDynFlags
-       profAlloc (mkIntExpr dflags (heapClosureSizeW dflags rep)) ccs
-
--- | Record the allocation of a closure (size is given by a CmmExpr)
--- The size must be in words, because the allocation counter in a CCS counts
--- in words.
-profAlloc :: CmmExpr -> CmmExpr -> FCode ()
-profAlloc words ccs
-  = ifProfiling $
-        do dflags <- getDynFlags
-           let alloc_rep = rEP_CostCentreStack_mem_alloc dflags
-           emit (addToMemE alloc_rep
-                       (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_mem_alloc dflags))
-                       (CmmMachOp (MO_UU_Conv (wordWidth dflags) (typeWidth alloc_rep)) $
-                         [CmmMachOp (mo_wordSub dflags) [words,
-                                                         mkIntExpr dflags (profHdrSize dflags)]]))
-                       -- subtract the "profiling overhead", which is the
-                       -- profiling header in a closure.
-
--- -----------------------------------------------------------------------
--- Setting the current cost centre on entry to a closure
-
-enterCostCentreThunk :: CmmExpr -> FCode ()
-enterCostCentreThunk closure =
-  ifProfiling $ do
-      dflags <- getDynFlags
-      emit $ storeCurCCS (costCentreFrom dflags closure)
-
-enterCostCentreFun :: CostCentreStack -> CmmExpr -> FCode ()
-enterCostCentreFun ccs closure =
-  ifProfiling $ do
-    if isCurrentCCS ccs
-       then do dflags <- getDynFlags
-               emitRtsCall rtsUnitId (fsLit "enterFunCCS")
-                   [(baseExpr, AddrHint),
-                    (costCentreFrom dflags closure, AddrHint)] False
-       else return () -- top-level function, nothing to do
-
-ifProfiling :: FCode () -> FCode ()
-ifProfiling code
-  = do dflags <- getDynFlags
-       if gopt Opt_SccProfilingOn dflags
-           then code
-           else return ()
-
-ifProfilingL :: DynFlags -> [a] -> [a]
-ifProfilingL dflags xs
-  | gopt Opt_SccProfilingOn dflags = xs
-  | otherwise                      = []
-
-
----------------------------------------------------------------
---        Initialising Cost Centres & CCSs
----------------------------------------------------------------
-
-initCostCentres :: CollectedCCs -> FCode ()
--- Emit the declarations
-initCostCentres (local_CCs, singleton_CCSs)
-  = do dflags <- getDynFlags
-       when (gopt Opt_SccProfilingOn dflags) $
-           do mapM_ emitCostCentreDecl local_CCs
-              mapM_ emitCostCentreStackDecl singleton_CCSs
-
-
-emitCostCentreDecl :: CostCentre -> FCode ()
-emitCostCentreDecl cc = do
-  { dflags <- getDynFlags
-  ; let is_caf | isCafCC cc = mkIntCLit dflags (ord 'c') -- 'c' == is a CAF
-               | otherwise  = zero dflags
-                        -- NB. bytesFS: we want the UTF-8 bytes here (#5559)
-  ; label <- newByteStringCLit (bytesFS $ costCentreUserNameFS cc)
-  ; modl  <- newByteStringCLit (bytesFS $ Module.moduleNameFS
-                                        $ Module.moduleName
-                                        $ cc_mod cc)
-  ; loc <- newByteStringCLit $ bytesFS $ mkFastString $
-                   showPpr dflags (costCentreSrcSpan cc)
-           -- XXX going via FastString to get UTF-8 encoding is silly
-  ; let
-     lits = [ zero dflags,           -- StgInt ccID,
-              label,        -- char *label,
-              modl,        -- char *module,
-              loc,      -- char *srcloc,
-              zero64,   -- StgWord64 mem_alloc
-              zero dflags,     -- StgWord time_ticks
-              is_caf,   -- StgInt is_caf
-              zero dflags      -- struct _CostCentre *link
-            ]
-  ; emitDataLits (mkCCLabel cc) lits
-  }
-
-emitCostCentreStackDecl :: CostCentreStack -> FCode ()
-emitCostCentreStackDecl ccs
-  = case maybeSingletonCCS ccs of
-    Just cc ->
-        do dflags <- getDynFlags
-           let mk_lits cc = zero dflags :
-                            mkCCostCentre cc :
-                            replicate (sizeof_ccs_words dflags - 2) (zero dflags)
-                -- Note: to avoid making any assumptions about how the
-                -- C compiler (that compiles the RTS, in particular) does
-                -- layouts of structs containing long-longs, simply
-                -- pad out the struct with zero words until we hit the
-                -- size of the overall struct (which we get via DerivedConstants.h)
-           emitDataLits (mkCCSLabel ccs) (mk_lits cc)
-    Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs)
-
-zero :: DynFlags -> CmmLit
-zero dflags = mkIntCLit dflags 0
-zero64 :: CmmLit
-zero64 = CmmInt 0 W64
-
-sizeof_ccs_words :: DynFlags -> Int
-sizeof_ccs_words dflags
-    -- round up to the next word.
-  | ms == 0   = ws
-  | otherwise = ws + 1
-  where
-   (ws,ms) = sIZEOF_CostCentreStack dflags `divMod` wORD_SIZE dflags
-
--- ---------------------------------------------------------------------------
--- Set the current cost centre stack
-
-emitSetCCC :: CostCentre -> Bool -> Bool -> FCode ()
-emitSetCCC cc tick push
- = do dflags <- getDynFlags
-      if not (gopt Opt_SccProfilingOn dflags)
-          then return ()
-          else do tmp <- newTemp (ccsType dflags)
-                  pushCostCentre tmp cccsExpr cc
-                  when tick $ emit (bumpSccCount dflags (CmmReg (CmmLocal tmp)))
-                  when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp)))
-
-pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
-pushCostCentre result ccs cc
-  = emitRtsCallWithResult result AddrHint
-        rtsUnitId
-        (fsLit "pushCostCentre") [(ccs,AddrHint),
-                                (CmmLit (mkCCostCentre cc), AddrHint)]
-        False
-
-bumpSccCount :: DynFlags -> CmmExpr -> CmmAGraph
-bumpSccCount dflags ccs
-  = addToMem (rEP_CostCentreStack_scc_count dflags)
-         (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_scc_count dflags)) 1
-
------------------------------------------------------------------------------
---
---                Lag/drag/void stuff
---
------------------------------------------------------------------------------
-
---
--- Initial value for the LDV field in a static closure
---
-staticLdvInit :: DynFlags -> CmmLit
-staticLdvInit = zeroCLit
-
---
--- Initial value of the LDV field in a dynamic closure
---
-dynLdvInit :: DynFlags -> CmmExpr
-dynLdvInit dflags =     -- (era << LDV_SHIFT) | LDV_STATE_CREATE
-  CmmMachOp (mo_wordOr dflags) [
-      CmmMachOp (mo_wordShl dflags) [loadEra dflags, mkIntExpr dflags (lDV_SHIFT dflags)],
-      CmmLit (mkWordCLit dflags (iLDV_STATE_CREATE dflags))
-  ]
-
---
--- Initialise the LDV word of a new closure
---
-ldvRecordCreate :: CmmExpr -> FCode ()
-ldvRecordCreate closure = do
-  dflags <- getDynFlags
-  emit $ mkStore (ldvWord dflags closure) (dynLdvInit dflags)
-
---
--- | Called when a closure is entered, marks the closure as having
--- been "used".  The closure is not an "inherently used" one.  The
--- closure is not @IND@ because that is not considered for LDV profiling.
---
-ldvEnterClosure :: ClosureInfo -> CmmReg -> FCode ()
-ldvEnterClosure closure_info node_reg = do
-    dflags <- getDynFlags
-    let tag = funTag dflags closure_info
-    -- don't forget to substract node's tag
-    ldvEnter (cmmOffsetB dflags (CmmReg node_reg) (-tag))
-
-ldvEnter :: CmmExpr -> FCode ()
--- Argument is a closure pointer
-ldvEnter cl_ptr = do
-    dflags <- getDynFlags
-    let -- don't forget to substract node's tag
-        ldv_wd = ldvWord dflags cl_ptr
-        new_ldv_wd = cmmOrWord dflags (cmmAndWord dflags (CmmLoad ldv_wd (bWord dflags))
-                                                         (CmmLit (mkWordCLit dflags (iLDV_CREATE_MASK dflags))))
-                                      (cmmOrWord dflags (loadEra dflags) (CmmLit (mkWordCLit dflags (iLDV_STATE_USE dflags))))
-    ifProfiling $
-         -- if (era > 0) {
-         --    LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
-         --                era | LDV_STATE_USE }
-        emit =<< mkCmmIfThenElse (CmmMachOp (mo_wordUGt dflags) [loadEra dflags, CmmLit (zeroCLit dflags)])
-                     (mkStore ldv_wd new_ldv_wd)
-                     mkNop
-
-loadEra :: DynFlags -> CmmExpr
-loadEra dflags = CmmMachOp (MO_UU_Conv (cIntWidth dflags) (wordWidth dflags))
-    [CmmLoad (mkLblExpr (mkCmmDataLabel rtsUnitId (fsLit "era")))
-             (cInt dflags)]
-
-ldvWord :: DynFlags -> CmmExpr -> CmmExpr
--- Takes the address of a closure, and returns
--- the address of the LDV word in the closure
-ldvWord dflags closure_ptr
-    = cmmOffsetB dflags closure_ptr (oFFSET_StgHeader_ldvw dflags)
diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs
deleted file mode 100644
index 868b52f402..0000000000
--- a/compiler/codeGen/StgCmmTicky.hs
+++ /dev/null
@@ -1,682 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-
------------------------------------------------------------------------------
---
--- Code generation for ticky-ticky profiling
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-{- OVERVIEW: ticky ticky profiling
-
-Please see
-https://gitlab.haskell.org/ghc/ghc/wikis/debugging/ticky-ticky and also
-edit it and the rest of this comment to keep them up-to-date if you
-change ticky-ticky. Thanks!
-
- *** All allocation ticky numbers are in bytes. ***
-
-Some of the relevant source files:
-
-       ***not necessarily an exhaustive list***
-
-  * some codeGen/ modules import this one
-
-  * this module imports cmm/CLabel.hs to manage labels
-
-  * cmm/CmmParse.y expands some macros using generators defined in
-    this module
-
-  * includes/stg/Ticky.h declares all of the global counters
-
-  * includes/rts/Ticky.h declares the C data type for an
-    STG-declaration's counters
-
-  * some macros defined in includes/Cmm.h (and used within the RTS's
-    CMM code) update the global ticky counters
-
-  * at the end of execution rts/Ticky.c generates the final report
-    +RTS -r<report-file> -RTS
-
-The rts/Ticky.c function that generates the report includes an
-STG-declaration's ticky counters if
-
-  * that declaration was entered, or
-
-  * it was allocated (if -ticky-allocd)
-
-On either of those events, the counter is "registered" by adding it to
-a linked list; cf the CMM generated by registerTickyCtr.
-
-Ticky-ticky profiling has evolved over many years. Many of the
-counters from its most sophisticated days are no longer
-active/accurate. As the RTS has changed, sometimes the ticky code for
-relevant counters was not accordingly updated. Unfortunately, neither
-were the comments.
-
-As of March 2013, there still exist deprecated code and comments in
-the code generator as well as the RTS because:
-
-  * I don't know what is out-of-date versus merely commented out for
-    momentary convenience, and
-
-  * someone else might know how to repair it!
-
--}
-
-module StgCmmTicky (
-  withNewTickyCounterFun,
-  withNewTickyCounterLNE,
-  withNewTickyCounterThunk,
-  withNewTickyCounterStdThunk,
-  withNewTickyCounterCon,
-
-  tickyDynAlloc,
-  tickyAllocHeap,
-
-  tickyAllocPrim,
-  tickyAllocThunk,
-  tickyAllocPAP,
-  tickyHeapCheck,
-  tickyStackCheck,
-
-  tickyUnknownCall, tickyDirectCall,
-
-  tickyPushUpdateFrame,
-  tickyUpdateFrameOmitted,
-
-  tickyEnterDynCon,
-  tickyEnterStaticCon,
-  tickyEnterViaNode,
-
-  tickyEnterFun,
-  tickyEnterThunk, tickyEnterStdThunk,        -- dynamic non-value
-                                              -- thunks only
-  tickyEnterLNE,
-
-  tickyUpdateBhCaf,
-  tickyBlackHole,
-  tickyUnboxedTupleReturn,
-  tickyReturnOldCon, tickyReturnNewCon,
-
-  tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
-  tickySlowCall, tickySlowCallPat,
-  ) where
-
-import GhcPrelude
-
-import StgCmmArgRep    ( slowCallPattern , toArgRep , argRepString )
-import StgCmmClosure
-import StgCmmUtils
-import StgCmmMonad
-
-import StgSyn
-import CmmExpr
-import MkGraph
-import CmmUtils
-import CLabel
-import SMRep
-
-import Module
-import Name
-import Id
-import BasicTypes
-import FastString
-import Outputable
-import Util
-
-import DynFlags
-
--- Turgid imports for showTypeCategory
-import PrelNames
-import TcType
-import Type
-import TyCon
-
-import Data.Maybe
-import qualified Data.Char
-import Control.Monad ( when )
-
------------------------------------------------------------------------------
---
--- Ticky-ticky profiling
---
------------------------------------------------------------------------------
-
-data TickyClosureType
-    = TickyFun
-        Bool -- True <-> single entry
-    | TickyCon
-    | TickyThunk
-        Bool -- True <-> updateable
-        Bool -- True <-> standard thunk (AP or selector), has no entry counter
-    | TickyLNE
-
-withNewTickyCounterFun :: Bool -> Name  -> [NonVoid Id] -> FCode a -> FCode a
-withNewTickyCounterFun single_entry = withNewTickyCounter (TickyFun single_entry)
-
-withNewTickyCounterLNE :: Name  -> [NonVoid Id] -> FCode a -> FCode a
-withNewTickyCounterLNE nm args code = do
-  b <- tickyLNEIsOn
-  if not b then code else withNewTickyCounter TickyLNE nm args code
-
-thunkHasCounter :: Bool -> FCode Bool
-thunkHasCounter isStatic = do
-  b <- tickyDynThunkIsOn
-  pure (not isStatic && b)
-
-withNewTickyCounterThunk
-  :: Bool -- ^ static
-  -> Bool -- ^ updateable
-  -> Name
-  -> FCode a
-  -> FCode a
-withNewTickyCounterThunk isStatic isUpdatable name code = do
-    has_ctr <- thunkHasCounter isStatic
-    if not has_ctr
-      then code
-      else withNewTickyCounter (TickyThunk isUpdatable False) name [] code
-
-withNewTickyCounterStdThunk
-  :: Bool -- ^ updateable
-  -> Name
-  -> FCode a
-  -> FCode a
-withNewTickyCounterStdThunk isUpdatable name code = do
-    has_ctr <- thunkHasCounter False
-    if not has_ctr
-      then code
-      else withNewTickyCounter (TickyThunk isUpdatable True) name [] code
-
-withNewTickyCounterCon
-  :: Name
-  -> FCode a
-  -> FCode a
-withNewTickyCounterCon name code = do
-    has_ctr <- thunkHasCounter False
-    if not has_ctr
-      then code
-      else withNewTickyCounter TickyCon name [] code
-
--- args does not include the void arguments
-withNewTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode a -> FCode a
-withNewTickyCounter cloType name args m = do
-  lbl <- emitTickyCounter cloType name args
-  setTickyCtrLabel lbl m
-
-emitTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode CLabel
-emitTickyCounter cloType name args
-  = let ctr_lbl = mkRednCountsLabel name in
-    (>> return ctr_lbl) $
-    ifTicky $ do
-        { dflags <- getDynFlags
-        ; parent <- getTickyCtrLabel
-        ; mod_name <- getModuleName
-
-          -- When printing the name of a thing in a ticky file, we
-          -- want to give the module name even for *local* things.  We
-          -- print just "x (M)" rather that "M.x" to distinguish them
-          -- from the global kind.
-        ; let ppr_for_ticky_name :: SDoc
-              ppr_for_ticky_name =
-                let n = ppr name
-                    ext = case cloType of
-                              TickyFun single_entry -> parens $ hcat $ punctuate comma $
-                                  [text "fun"] ++ [text "se"|single_entry]
-                              TickyCon -> parens (text "con")
-                              TickyThunk upd std -> parens $ hcat $ punctuate comma $
-                                  [text "thk"] ++ [text "se"|not upd] ++ [text "std"|std]
-                              TickyLNE | isInternalName name -> parens (text "LNE")
-                                       | otherwise -> panic "emitTickyCounter: how is this an external LNE?"
-                    p = case hasHaskellName parent of
-                            -- NB the default "top" ticky ctr does not
-                            -- have a Haskell name
-                          Just pname -> text "in" <+> ppr (nameUnique pname)
-                          _ -> empty
-                in if isInternalName name
-                   then n <+> parens (ppr mod_name) <+> ext <+> p
-                   else n <+> ext <+> p
-
-        ; fun_descr_lit <- newStringCLit $ showSDocDebug dflags ppr_for_ticky_name
-        ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . fromNonVoid) args
-        ; emitDataLits ctr_lbl
-        -- Must match layout of includes/rts/Ticky.h's StgEntCounter
-        --
-        -- krc: note that all the fields are I32 now; some were I16
-        -- before, but the code generator wasn't handling that
-        -- properly and it led to chaos, panic and disorder.
-            [ mkIntCLit dflags 0,               -- registered?
-              mkIntCLit dflags (length args),   -- Arity
-              mkIntCLit dflags 0,               -- Heap allocated for this thing
-              fun_descr_lit,
-              arg_descr_lit,
-              zeroCLit dflags,          -- Entries into this thing
-              zeroCLit dflags,          -- Heap allocated by this thing
-              zeroCLit dflags                   -- Link to next StgEntCounter
-            ]
-        }
-
--- -----------------------------------------------------------------------------
--- Ticky stack frames
-
-tickyPushUpdateFrame, tickyUpdateFrameOmitted :: FCode ()
-tickyPushUpdateFrame    = ifTicky $ bumpTickyCounter (fsLit "UPDF_PUSHED_ctr")
-tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (fsLit "UPDF_OMITTED_ctr")
-
--- -----------------------------------------------------------------------------
--- Ticky entries
-
--- NB the name-specific entries are only available for names that have
--- dedicated Cmm code. As far as I know, this just rules out
--- constructor thunks. For them, there is no CMM code block to put the
--- bump of name-specific ticky counter into. On the other hand, we can
--- still track allocation their allocation.
-
-tickyEnterDynCon, tickyEnterStaticCon, tickyEnterViaNode :: FCode ()
-tickyEnterDynCon      = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_CON_ctr")
-tickyEnterStaticCon   = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_CON_ctr")
-tickyEnterViaNode     = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr")
-
-tickyEnterThunk :: ClosureInfo -> FCode ()
-tickyEnterThunk cl_info
-  = ifTicky $ do
-    { bumpTickyCounter ctr
-    ; has_ctr <- thunkHasCounter static
-    ; when has_ctr $ do
-      ticky_ctr_lbl <- getTickyCtrLabel
-      registerTickyCtrAtEntryDyn ticky_ctr_lbl
-      bumpTickyEntryCount ticky_ctr_lbl }
-  where
-    updatable = closureSingleEntry cl_info
-    static    = isStaticClosure cl_info
-
-    ctr | static    = if updatable then fsLit "ENT_STATIC_THK_SINGLE_ctr"
-                                   else fsLit "ENT_STATIC_THK_MANY_ctr"
-        | otherwise = if updatable then fsLit "ENT_DYN_THK_SINGLE_ctr"
-                                   else fsLit "ENT_DYN_THK_MANY_ctr"
-
-tickyEnterStdThunk :: ClosureInfo -> FCode ()
-tickyEnterStdThunk = tickyEnterThunk
-
-tickyBlackHole :: Bool{-updatable-} -> FCode ()
-tickyBlackHole updatable
-  = ifTicky (bumpTickyCounter ctr)
-  where
-    ctr | updatable = (fsLit "UPD_BH_SINGLE_ENTRY_ctr")
-        | otherwise = (fsLit "UPD_BH_UPDATABLE_ctr")
-
-tickyUpdateBhCaf :: ClosureInfo -> FCode ()
-tickyUpdateBhCaf cl_info
-  = ifTicky (bumpTickyCounter ctr)
-  where
-    ctr | closureUpdReqd cl_info = (fsLit "UPD_CAF_BH_SINGLE_ENTRY_ctr")
-        | otherwise              = (fsLit "UPD_CAF_BH_UPDATABLE_ctr")
-
-tickyEnterFun :: ClosureInfo -> FCode ()
-tickyEnterFun cl_info = ifTicky $ do
-  ctr_lbl <- getTickyCtrLabel
-
-  if isStaticClosure cl_info
-    then do bumpTickyCounter (fsLit "ENT_STATIC_FUN_DIRECT_ctr")
-            registerTickyCtr ctr_lbl
-    else do bumpTickyCounter (fsLit "ENT_DYN_FUN_DIRECT_ctr")
-            registerTickyCtrAtEntryDyn ctr_lbl
-
-  bumpTickyEntryCount ctr_lbl
-
-tickyEnterLNE :: FCode ()
-tickyEnterLNE = ifTicky $ do
-  bumpTickyCounter (fsLit "ENT_LNE_ctr")
-  ifTickyLNE $ do
-    ctr_lbl <- getTickyCtrLabel
-    registerTickyCtr ctr_lbl
-    bumpTickyEntryCount ctr_lbl
-
--- needn't register a counter upon entry if
---
--- 1) it's for a dynamic closure, and
---
--- 2) -ticky-allocd is on
---
--- since the counter was registered already upon being alloc'd
-registerTickyCtrAtEntryDyn :: CLabel -> FCode ()
-registerTickyCtrAtEntryDyn ctr_lbl = do
-  already_registered <- tickyAllocdIsOn
-  when (not already_registered) $ registerTickyCtr ctr_lbl
-
-registerTickyCtr :: CLabel -> FCode ()
--- Register a ticky counter
---   if ( ! f_ct.registeredp ) {
---          f_ct.link = ticky_entry_ctrs;       /* hook this one onto the front of the list */
---          ticky_entry_ctrs = & (f_ct);        /* mark it as "registered" */
---          f_ct.registeredp = 1 }
-registerTickyCtr ctr_lbl = do
-  dflags <- getDynFlags
-  let
-    -- krc: code generator doesn't handle Not, so we test for Eq 0 instead
-    test = CmmMachOp (MO_Eq (wordWidth dflags))
-              [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl
-                                (oFFSET_StgEntCounter_registeredp dflags))) (bWord dflags),
-               zeroExpr dflags]
-    register_stmts
-      = [ mkStore (CmmLit (cmmLabelOffB ctr_lbl (oFFSET_StgEntCounter_link dflags)))
-                   (CmmLoad ticky_entry_ctrs (bWord dflags))
-        , mkStore ticky_entry_ctrs (mkLblExpr ctr_lbl)
-        , mkStore (CmmLit (cmmLabelOffB ctr_lbl
-                                (oFFSET_StgEntCounter_registeredp dflags)))
-                   (mkIntExpr dflags 1) ]
-    ticky_entry_ctrs = mkLblExpr (mkCmmDataLabel rtsUnitId (fsLit "ticky_entry_ctrs"))
-  emit =<< mkCmmIfThen test (catAGraphs register_stmts)
-
-tickyReturnOldCon, tickyReturnNewCon :: RepArity -> FCode ()
-tickyReturnOldCon arity
-  = ifTicky $ do { bumpTickyCounter (fsLit "RET_OLD_ctr")
-                 ; bumpHistogram    (fsLit "RET_OLD_hst") arity }
-tickyReturnNewCon arity
-  = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr")
-                 ; bumpHistogram    (fsLit "RET_NEW_hst") arity }
-
-tickyUnboxedTupleReturn :: RepArity -> FCode ()
-tickyUnboxedTupleReturn arity
-  = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr")
-                 ; bumpHistogram    (fsLit "RET_UNBOXED_TUP_hst") arity }
-
--- -----------------------------------------------------------------------------
--- Ticky calls
-
--- Ticks at a *call site*:
-tickyDirectCall :: RepArity -> [StgArg] -> FCode ()
-tickyDirectCall arity args
-  | args `lengthIs` arity = tickyKnownCallExact
-  | otherwise = do tickyKnownCallExtraArgs
-                   tickySlowCallPat (map argPrimRep (drop arity args))
-
-tickyKnownCallTooFewArgs :: FCode ()
-tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_TOO_FEW_ARGS_ctr")
-
-tickyKnownCallExact :: FCode ()
-tickyKnownCallExact      = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_ctr")
-
-tickyKnownCallExtraArgs :: FCode ()
-tickyKnownCallExtraArgs  = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_EXTRA_ARGS_ctr")
-
-tickyUnknownCall :: FCode ()
-tickyUnknownCall         = ifTicky $ bumpTickyCounter (fsLit "UNKNOWN_CALL_ctr")
-
--- Tick for the call pattern at slow call site (i.e. in addition to
--- tickyUnknownCall, tickyKnownCallExtraArgs, etc.)
-tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode ()
-tickySlowCall _ [] = return ()
-tickySlowCall lf_info args = do
- -- see Note [Ticky for slow calls]
- if isKnownFun lf_info
-   then tickyKnownCallTooFewArgs
-   else tickyUnknownCall
- tickySlowCallPat (map argPrimRep args)
-
-tickySlowCallPat :: [PrimRep] -> FCode ()
-tickySlowCallPat args = ifTicky $
-  let argReps = map toArgRep args
-      (_, n_matched) = slowCallPattern argReps
-  in if n_matched > 0 && args `lengthIs` n_matched
-     then bumpTickyLbl $ mkRtsSlowFastTickyCtrLabel $ concatMap (map Data.Char.toLower . argRepString) argReps
-     else bumpTickyCounter $ fsLit "VERY_SLOW_CALL_ctr"
-
-{-
-
-Note [Ticky for slow calls]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Terminology is unfortunately a bit mixed up for these calls. codeGen
-uses "slow call" to refer to unknown calls and under-saturated known
-calls.
-
-Nowadays, though (ie as of the eval/apply paper), the significantly
-slower calls are actually just a subset of these: the ones with no
-built-in argument pattern (cf StgCmmArgRep.slowCallPattern)
-
-So for ticky profiling, we split slow calls into
-"SLOW_CALL_fast_<pattern>_ctr" (those matching a built-in pattern) and
-VERY_SLOW_CALL_ctr (those without a built-in pattern; these are very
-bad for both space and time).
-
--}
-
--- -----------------------------------------------------------------------------
--- Ticky allocation
-
-tickyDynAlloc :: Maybe Id -> SMRep -> LambdaFormInfo -> FCode ()
--- Called when doing a dynamic heap allocation; the LambdaFormInfo
--- used to distinguish between closure types
---
--- TODO what else to count while we're here?
-tickyDynAlloc mb_id rep lf = ifTicky $ getDynFlags >>= \dflags ->
-  let bytes = wORD_SIZE dflags * heapClosureSizeW dflags rep
-
-      countGlobal tot ctr = do
-        bumpTickyCounterBy tot bytes
-        bumpTickyCounter   ctr
-      countSpecific = ifTickyAllocd $ case mb_id of
-        Nothing -> return ()
-        Just id -> do
-          let ctr_lbl = mkRednCountsLabel (idName id)
-          registerTickyCtr ctr_lbl
-          bumpTickyAllocd ctr_lbl bytes
-
-  -- TODO are we still tracking "good stuff" (_gds) versus
-  -- administrative (_adm) versus slop (_slp)? I'm going with all _gds
-  -- for now, since I don't currently know neither if we do nor how to
-  -- distinguish. NSF Mar 2013
-
-  in case () of
-    _ | isConRep rep   ->
-          ifTickyDynThunk countSpecific >>
-          countGlobal (fsLit "ALLOC_CON_gds") (fsLit "ALLOC_CON_ctr")
-      | isThunkRep rep ->
-          ifTickyDynThunk countSpecific >>
-          if lfUpdatable lf
-          then countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_UP_THK_ctr")
-          else countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_SE_THK_ctr")
-      | isFunRep   rep ->
-          countSpecific >>
-          countGlobal (fsLit "ALLOC_FUN_gds") (fsLit "ALLOC_FUN_ctr")
-      | otherwise      -> panic "How is this heap object not a con, thunk, or fun?"
-
-
-
-tickyAllocHeap ::
-  Bool -> -- is this a genuine allocation? As opposed to
-          -- StgCmmLayout.adjustHpBackwards
-  VirtualHpOffset -> FCode ()
--- Called when doing a heap check [TICK_ALLOC_HEAP]
--- Must be lazy in the amount of allocation!
-tickyAllocHeap genuine hp
-  = ifTicky $
-    do  { dflags <- getDynFlags
-        ; ticky_ctr <- getTickyCtrLabel
-        ; emit $ catAGraphs $
-            -- only test hp from within the emit so that the monadic
-            -- computation itself is not strict in hp (cf knot in
-            -- StgCmmMonad.getHeapUsage)
-          if hp == 0 then []
-          else let !bytes = wORD_SIZE dflags * hp in [
-            -- Bump the allocation total in the closure's StgEntCounter
-            addToMem (rEP_StgEntCounter_allocs dflags)
-                     (CmmLit (cmmLabelOffB ticky_ctr (oFFSET_StgEntCounter_allocs dflags)))
-                     bytes,
-            -- Bump the global allocation total ALLOC_HEAP_tot
-            addToMemLbl (bWord dflags)
-                        (mkCmmDataLabel rtsUnitId (fsLit "ALLOC_HEAP_tot"))
-                        bytes,
-            -- Bump the global allocation counter ALLOC_HEAP_ctr
-            if not genuine then mkNop
-            else addToMemLbl (bWord dflags)
-                             (mkCmmDataLabel rtsUnitId (fsLit "ALLOC_HEAP_ctr"))
-                             1
-            ]}
-
-
---------------------------------------------------------------------------------
--- these three are only called from CmmParse.y (ie ultimately from the RTS)
-
--- the units are bytes
-
-tickyAllocPrim :: CmmExpr  -- ^ size of the full header, in bytes
-               -> CmmExpr  -- ^ size of the payload, in bytes
-               -> CmmExpr -> FCode ()
-tickyAllocPrim _hdr _goods _slop = ifTicky $ do
-  bumpTickyCounter    (fsLit "ALLOC_PRIM_ctr")
-  bumpTickyCounterByE (fsLit "ALLOC_PRIM_adm") _hdr
-  bumpTickyCounterByE (fsLit "ALLOC_PRIM_gds") _goods
-  bumpTickyCounterByE (fsLit "ALLOC_PRIM_slp") _slop
-
-tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode ()
-tickyAllocThunk _goods _slop = ifTicky $ do
-    -- TODO is it ever called with a Single-Entry thunk?
-  bumpTickyCounter    (fsLit "ALLOC_UP_THK_ctr")
-  bumpTickyCounterByE (fsLit "ALLOC_THK_gds") _goods
-  bumpTickyCounterByE (fsLit "ALLOC_THK_slp") _slop
-
-tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode ()
-tickyAllocPAP _goods _slop = ifTicky $ do
-  bumpTickyCounter    (fsLit "ALLOC_PAP_ctr")
-  bumpTickyCounterByE (fsLit "ALLOC_PAP_gds") _goods
-  bumpTickyCounterByE (fsLit "ALLOC_PAP_slp") _slop
-
-tickyHeapCheck :: FCode ()
-tickyHeapCheck = ifTicky $ bumpTickyCounter (fsLit "HEAP_CHK_ctr")
-
-tickyStackCheck :: FCode ()
-tickyStackCheck = ifTicky $ bumpTickyCounter (fsLit "STK_CHK_ctr")
-
--- -----------------------------------------------------------------------------
--- Ticky utils
-
-ifTicky :: FCode () -> FCode ()
-ifTicky code =
-  getDynFlags >>= \dflags -> when (gopt Opt_Ticky dflags) code
-
-tickyAllocdIsOn :: FCode Bool
-tickyAllocdIsOn = gopt Opt_Ticky_Allocd `fmap` getDynFlags
-
-tickyLNEIsOn :: FCode Bool
-tickyLNEIsOn = gopt Opt_Ticky_LNE `fmap` getDynFlags
-
-tickyDynThunkIsOn :: FCode Bool
-tickyDynThunkIsOn = gopt Opt_Ticky_Dyn_Thunk `fmap` getDynFlags
-
-ifTickyAllocd :: FCode () -> FCode ()
-ifTickyAllocd code = tickyAllocdIsOn >>= \b -> when b code
-
-ifTickyLNE :: FCode () -> FCode ()
-ifTickyLNE code = tickyLNEIsOn >>= \b -> when b code
-
-ifTickyDynThunk :: FCode () -> FCode ()
-ifTickyDynThunk code = tickyDynThunkIsOn >>= \b -> when b code
-
-bumpTickyCounter :: FastString -> FCode ()
-bumpTickyCounter lbl = bumpTickyLbl (mkCmmDataLabel rtsUnitId lbl)
-
-bumpTickyCounterBy :: FastString -> Int -> FCode ()
-bumpTickyCounterBy lbl = bumpTickyLblBy (mkCmmDataLabel rtsUnitId lbl)
-
-bumpTickyCounterByE :: FastString -> CmmExpr -> FCode ()
-bumpTickyCounterByE lbl = bumpTickyLblByE (mkCmmDataLabel rtsUnitId lbl)
-
-bumpTickyEntryCount :: CLabel -> FCode ()
-bumpTickyEntryCount lbl = do
-  dflags <- getDynFlags
-  bumpTickyLit (cmmLabelOffB lbl (oFFSET_StgEntCounter_entry_count dflags))
-
-bumpTickyAllocd :: CLabel -> Int -> FCode ()
-bumpTickyAllocd lbl bytes = do
-  dflags <- getDynFlags
-  bumpTickyLitBy (cmmLabelOffB lbl (oFFSET_StgEntCounter_allocd dflags)) bytes
-
-bumpTickyLbl :: CLabel -> FCode ()
-bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1
-
-bumpTickyLblBy :: CLabel -> Int -> FCode ()
-bumpTickyLblBy lhs = bumpTickyLitBy (cmmLabelOffB lhs 0)
-
-bumpTickyLblByE :: CLabel -> CmmExpr -> FCode ()
-bumpTickyLblByE lhs = bumpTickyLitByE (cmmLabelOffB lhs 0)
-
-bumpTickyLit :: CmmLit -> FCode ()
-bumpTickyLit lhs = bumpTickyLitBy lhs 1
-
-bumpTickyLitBy :: CmmLit -> Int -> FCode ()
-bumpTickyLitBy lhs n = do
-  dflags <- getDynFlags
-  emit (addToMem (bWord dflags) (CmmLit lhs) n)
-
-bumpTickyLitByE :: CmmLit -> CmmExpr -> FCode ()
-bumpTickyLitByE lhs e = do
-  dflags <- getDynFlags
-  emit (addToMemE (bWord dflags) (CmmLit lhs) e)
-
-bumpHistogram :: FastString -> Int -> FCode ()
-bumpHistogram lbl n = do
-    dflags <- getDynFlags
-    let offset = n `min` (tICKY_BIN_COUNT dflags - 1)
-    emit (addToMem (bWord dflags)
-           (cmmIndexExpr dflags
-                (wordWidth dflags)
-                (CmmLit (CmmLabel (mkCmmDataLabel rtsUnitId lbl)))
-                (CmmLit (CmmInt (fromIntegral offset) (wordWidth dflags))))
-           1)
-
-------------------------------------------------------------------
--- Showing the "type category" for ticky-ticky profiling
-
-showTypeCategory :: Type -> Char
-  {-
-        +           dictionary
-
-        >           function
-
-        {C,I,F,D,W} char, int, float, double, word
-        {c,i,f,d,w} unboxed ditto
-
-        T           tuple
-
-        P           other primitive type
-        p           unboxed ditto
-
-        L           list
-        E           enumeration type
-        S           other single-constructor type
-        M           other multi-constructor data-con type
-
-        .           other type
-
-        -           reserved for others to mark as "uninteresting"
-
-  Accurate as of Mar 2013, but I eliminated the Array category instead
-  of updating it, for simplicity. It's in P/p, I think --NSF
-
-    -}
-showTypeCategory ty
-  | isDictTy ty = '+'
-  | otherwise = case tcSplitTyConApp_maybe ty of
-  Nothing -> '.'
-  Just (tycon, _) ->
-    (if isUnliftedTyCon tycon then Data.Char.toLower else id) $
-    let anyOf us = getUnique tycon `elem` us in
-    case () of
-      _ | anyOf [funTyConKey] -> '>'
-        | anyOf [charPrimTyConKey, charTyConKey] -> 'C'
-        | anyOf [doublePrimTyConKey, doubleTyConKey] -> 'D'
-        | anyOf [floatPrimTyConKey, floatTyConKey] -> 'F'
-        | anyOf [intPrimTyConKey, int32PrimTyConKey, int64PrimTyConKey,
-                 intTyConKey, int8TyConKey, int16TyConKey, int32TyConKey, int64TyConKey
-                ] -> 'I'
-        | anyOf [wordPrimTyConKey, word32PrimTyConKey, word64PrimTyConKey, wordTyConKey,
-                 word8TyConKey, word16TyConKey, word32TyConKey, word64TyConKey
-                ] -> 'W'
-        | anyOf [listTyConKey] -> 'L'
-        | isTupleTyCon tycon       -> 'T'
-        | isPrimTyCon tycon        -> 'P'
-        | isEnumerationTyCon tycon -> 'E'
-        | isJust (tyConSingleDataCon_maybe tycon) -> 'S'
-        | otherwise -> 'M' -- oh, well...
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
deleted file mode 100644
index 766584e2c9..0000000000
--- a/compiler/codeGen/StgCmmUtils.hs
+++ /dev/null
@@ -1,578 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Code generator utilities; mostly monadic
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmUtils (
-        cgLit, mkSimpleLit,
-        emitDataLits, mkDataLits,
-        emitRODataLits, mkRODataLits,
-        emitRtsCall, emitRtsCallWithResult, emitRtsCallGen,
-        assignTemp, newTemp,
-
-        newUnboxedTupleRegs,
-
-        emitMultiAssign, emitCmmLitSwitch, emitSwitch,
-
-        tagToClosure, mkTaggedObjectLoad,
-
-        callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
-
-        cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
-        cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
-        cmmOffsetExprW, cmmOffsetExprB,
-        cmmRegOffW, cmmRegOffB,
-        cmmLabelOffW, cmmLabelOffB,
-        cmmOffsetW, cmmOffsetB,
-        cmmOffsetLitW, cmmOffsetLitB,
-        cmmLoadIndexW,
-        cmmConstrTag1,
-
-        cmmUntag, cmmIsTagged,
-
-        addToMem, addToMemE, addToMemLblE, addToMemLbl,
-        mkWordCLit,
-        newStringCLit, newByteStringCLit,
-        blankWord,
-  ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import StgCmmMonad
-import StgCmmClosure
-import Cmm
-import BlockId
-import MkGraph
-import CodeGen.Platform
-import CLabel
-import CmmUtils
-import CmmSwitch
-import CgUtils
-
-import ForeignCall
-import IdInfo
-import Type
-import TyCon
-import SMRep
-import Module
-import Literal
-import Digraph
-import Util
-import Unique
-import UniqSupply (MonadUnique(..))
-import DynFlags
-import FastString
-import Outputable
-import RepType
-
-import Data.ByteString (ByteString)
-import qualified Data.ByteString.Char8 as BS8
-import qualified Data.Map as M
-import Data.Char
-import Data.List
-import Data.Ord
-
-
--------------------------------------------------------------------------
---
---      Literals
---
--------------------------------------------------------------------------
-
-cgLit :: Literal -> FCode CmmLit
-cgLit (LitString s) = newByteStringCLit s
- -- not unpackFS; we want the UTF-8 byte stream.
-cgLit other_lit     = do dflags <- getDynFlags
-                         return (mkSimpleLit dflags other_lit)
-
-mkSimpleLit :: DynFlags -> Literal -> CmmLit
-mkSimpleLit dflags (LitChar   c)                = CmmInt (fromIntegral (ord c))
-                                                         (wordWidth dflags)
-mkSimpleLit dflags LitNullAddr                  = zeroCLit dflags
-mkSimpleLit dflags (LitNumber LitNumInt i _)    = CmmInt i (wordWidth dflags)
-mkSimpleLit _      (LitNumber LitNumInt64 i _)  = CmmInt i W64
-mkSimpleLit dflags (LitNumber LitNumWord i _)   = CmmInt i (wordWidth dflags)
-mkSimpleLit _      (LitNumber LitNumWord64 i _) = CmmInt i W64
-mkSimpleLit _      (LitFloat r)                 = CmmFloat r W32
-mkSimpleLit _      (LitDouble r)                = CmmFloat r W64
-mkSimpleLit _      (LitLabel fs ms fod)
-  = let -- TODO: Literal labels might not actually be in the current package...
-        labelSrc = ForeignLabelInThisPackage
-    in CmmLabel (mkForeignLabel fs ms labelSrc fod)
--- NB: LitRubbish should have been lowered in "CoreToStg"
-mkSimpleLit _      other = pprPanic "mkSimpleLit" (ppr other)
-
---------------------------------------------------------------------------
---
--- Incrementing a memory location
---
---------------------------------------------------------------------------
-
-addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph
-addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n
-
-addToMemLblE :: CmmType -> CLabel -> CmmExpr -> CmmAGraph
-addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl))
-
-addToMem :: CmmType     -- rep of the counter
-         -> CmmExpr     -- Address
-         -> Int         -- What to add (a word)
-         -> CmmAGraph
-addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep)))
-
-addToMemE :: CmmType    -- rep of the counter
-          -> CmmExpr    -- Address
-          -> CmmExpr    -- What to add (a word-typed expression)
-          -> CmmAGraph
-addToMemE rep ptr n
-  = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n])
-
-
--------------------------------------------------------------------------
---
---      Loading a field from an object,
---      where the object pointer is itself tagged
---
--------------------------------------------------------------------------
-
-mkTaggedObjectLoad
-  :: DynFlags -> LocalReg -> LocalReg -> ByteOff -> DynTag -> CmmAGraph
--- (loadTaggedObjectField reg base off tag) generates assignment
---      reg = bitsK[ base + off - tag ]
--- where K is fixed by 'reg'
-mkTaggedObjectLoad dflags reg base offset tag
-  = mkAssign (CmmLocal reg)
-             (CmmLoad (cmmOffsetB dflags
-                                  (CmmReg (CmmLocal base))
-                                  (offset - tag))
-                      (localRegType reg))
-
--------------------------------------------------------------------------
---
---      Converting a closure tag to a closure for enumeration types
---      (this is the implementation of tagToEnum#).
---
--------------------------------------------------------------------------
-
-tagToClosure :: DynFlags -> TyCon -> CmmExpr -> CmmExpr
-tagToClosure dflags tycon tag
-  = CmmLoad (cmmOffsetExprW dflags closure_tbl tag) (bWord dflags)
-  where closure_tbl = CmmLit (CmmLabel lbl)
-        lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
-
--------------------------------------------------------------------------
---
---      Conditionals and rts calls
---
--------------------------------------------------------------------------
-
-emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
-emitRtsCall pkg fun args safe = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) args safe
-
-emitRtsCallWithResult :: LocalReg -> ForeignHint -> UnitId -> FastString
-        -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
-emitRtsCallWithResult res hint pkg fun args safe
-   = emitRtsCallGen [(res,hint)] (mkCmmCodeLabel pkg fun) args safe
-
--- Make a call to an RTS C procedure
-emitRtsCallGen
-   :: [(LocalReg,ForeignHint)]
-   -> CLabel
-   -> [(CmmExpr,ForeignHint)]
-   -> Bool -- True <=> CmmSafe call
-   -> FCode ()
-emitRtsCallGen res lbl args safe
-  = do { dflags <- getDynFlags
-       ; updfr_off <- getUpdFrameOff
-       ; let (caller_save, caller_load) = callerSaveVolatileRegs dflags
-       ; emit caller_save
-       ; call updfr_off
-       ; emit caller_load }
-  where
-    call updfr_off =
-      if safe then
-        emit =<< mkCmmCall fun_expr res' args' updfr_off
-      else do
-        let conv = ForeignConvention CCallConv arg_hints res_hints CmmMayReturn
-        emit $ mkUnsafeCall (ForeignTarget fun_expr conv) res' args'
-    (args', arg_hints) = unzip args
-    (res',  res_hints) = unzip res
-    fun_expr = mkLblExpr lbl
-
-
------------------------------------------------------------------------------
---
---      Caller-Save Registers
---
------------------------------------------------------------------------------
-
--- Here we generate the sequence of saves/restores required around a
--- foreign call instruction.
-
--- TODO: reconcile with includes/Regs.h
---  * Regs.h claims that BaseReg should be saved last and loaded first
---    * This might not have been tickled before since BaseReg is callee save
---  * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
---
--- This code isn't actually used right now, because callerSaves
--- only ever returns true in the current universe for registers NOT in
--- system_regs (just do a grep for CALLER_SAVES in
--- includes/stg/MachRegs.h).  It's all one giant no-op, and for
--- good reason: having to save system registers on every foreign call
--- would be very expensive, so we avoid assigning them to those
--- registers when we add support for an architecture.
---
--- Note that the old code generator actually does more work here: it
--- also saves other global registers.  We can't (nor want) to do that
--- here, as we don't have liveness information.  And really, we
--- shouldn't be doing the workaround at this point in the pipeline, see
--- Note [Register parameter passing] and the ToDo on CmmCall in
--- cmm/CmmNode.hs.  Right now the workaround is to avoid inlining across
--- unsafe foreign calls in rewriteAssignments, but this is strictly
--- temporary.
-callerSaveVolatileRegs :: DynFlags -> (CmmAGraph, CmmAGraph)
-callerSaveVolatileRegs dflags = (caller_save, caller_load)
-  where
-    platform = targetPlatform dflags
-
-    caller_save = catAGraphs (map callerSaveGlobalReg    regs_to_save)
-    caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save)
-
-    system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery
-                    {- ,SparkHd,SparkTl,SparkBase,SparkLim -}
-                  , BaseReg ]
-
-    regs_to_save = filter (callerSaves platform) system_regs
-
-    callerSaveGlobalReg reg
-        = mkStore (get_GlobalReg_addr dflags reg) (CmmReg (CmmGlobal reg))
-
-    callerRestoreGlobalReg reg
-        = mkAssign (CmmGlobal reg)
-                   (CmmLoad (get_GlobalReg_addr dflags reg) (globalRegType dflags reg))
-
-
--------------------------------------------------------------------------
---
---      Strings generate a top-level data block
---
--------------------------------------------------------------------------
-
-emitDataLits :: CLabel -> [CmmLit] -> FCode ()
--- Emit a data-segment data block
-emitDataLits lbl lits = emitDecl (mkDataLits (Section Data lbl) lbl lits)
-
-emitRODataLits :: CLabel -> [CmmLit] -> FCode ()
--- Emit a read-only data block
-emitRODataLits lbl lits = emitDecl (mkRODataLits lbl lits)
-
-newStringCLit :: String -> FCode CmmLit
--- Make a global definition for the string,
--- and return its label
-newStringCLit str = newByteStringCLit (BS8.pack str)
-
-newByteStringCLit :: ByteString -> FCode CmmLit
-newByteStringCLit bytes
-  = do  { uniq <- newUnique
-        ; let (lit, decl) = mkByteStringCLit (mkStringLitLabel uniq) bytes
-        ; emitDecl decl
-        ; return lit }
-
--------------------------------------------------------------------------
---
---      Assigning expressions to temporaries
---
--------------------------------------------------------------------------
-
-assignTemp :: CmmExpr -> FCode LocalReg
--- Make sure the argument is in a local register.
--- We don't bother being particularly aggressive with avoiding
--- unnecessary local registers, since we can rely on a later
--- optimization pass to inline as necessary (and skipping out
--- on things like global registers can be a little dangerous
--- due to them being trashed on foreign calls--though it means
--- the optimization pass doesn't have to do as much work)
-assignTemp (CmmReg (CmmLocal reg)) = return reg
-assignTemp e = do { dflags <- getDynFlags
-                  ; uniq <- newUnique
-                  ; let reg = LocalReg uniq (cmmExprType dflags e)
-                  ; emitAssign (CmmLocal reg) e
-                  ; return reg }
-
-newTemp :: MonadUnique m => CmmType -> m LocalReg
-newTemp rep = do { uniq <- getUniqueM
-                 ; return (LocalReg uniq rep) }
-
-newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint])
--- Choose suitable local regs to use for the components
--- of an unboxed tuple that we are about to return to
--- the Sequel.  If the Sequel is a join point, using the
--- regs it wants will save later assignments.
-newUnboxedTupleRegs res_ty
-  = ASSERT( isUnboxedTupleType res_ty )
-    do  { dflags <- getDynFlags
-        ; sequel <- getSequel
-        ; regs <- choose_regs dflags sequel
-        ; ASSERT( regs `equalLength` reps )
-          return (regs, map primRepForeignHint reps) }
-  where
-    reps = typePrimRep res_ty
-    choose_regs _ (AssignTo regs _) = return regs
-    choose_regs dflags _            = mapM (newTemp . primRepCmmType dflags) reps
-
-
-
--------------------------------------------------------------------------
---      emitMultiAssign
--------------------------------------------------------------------------
-
-emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode ()
--- Emit code to perform the assignments in the
--- input simultaneously, using temporary variables when necessary.
-
-type Key  = Int
-type Vrtx = (Key, Stmt) -- Give each vertex a unique number,
-                        -- for fast comparison
-type Stmt = (LocalReg, CmmExpr) -- r := e
-
--- We use the strongly-connected component algorithm, in which
---      * the vertices are the statements
---      * an edge goes from s1 to s2 iff
---              s1 assigns to something s2 uses
---        that is, if s1 should *follow* s2 in the final order
-
-emitMultiAssign []    []    = return ()
-emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs
-emitMultiAssign regs rhss   = do
-  dflags <- getDynFlags
-  ASSERT2( equalLength regs rhss, ppr regs $$ ppr rhss )
-    unscramble dflags ([1..] `zip` (regs `zip` rhss))
-
-unscramble :: DynFlags -> [Vrtx] -> FCode ()
-unscramble dflags vertices = mapM_ do_component components
-  where
-        edges :: [ Node Key Vrtx ]
-        edges = [ DigraphNode vertex key1 (edges_from stmt1)
-                | vertex@(key1, stmt1) <- vertices ]
-
-        edges_from :: Stmt -> [Key]
-        edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
-                                    stmt1 `mustFollow` stmt2 ]
-
-        components :: [SCC Vrtx]
-        components = stronglyConnCompFromEdgedVerticesUniq edges
-
-        -- do_components deal with one strongly-connected component
-        -- Not cyclic, or singleton?  Just do it
-        do_component :: SCC Vrtx -> FCode ()
-        do_component (AcyclicSCC (_,stmt))  = mk_graph stmt
-        do_component (CyclicSCC [])         = panic "do_component"
-        do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt
-
-                -- Cyclic?  Then go via temporaries.  Pick one to
-                -- break the loop and try again with the rest.
-        do_component (CyclicSCC ((_,first_stmt) : rest)) = do
-            dflags <- getDynFlags
-            u <- newUnique
-            let (to_tmp, from_tmp) = split dflags u first_stmt
-            mk_graph to_tmp
-            unscramble dflags rest
-            mk_graph from_tmp
-
-        split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt)
-        split dflags uniq (reg, rhs)
-          = ((tmp, rhs), (reg, CmmReg (CmmLocal tmp)))
-          where
-            rep = cmmExprType dflags rhs
-            tmp = LocalReg uniq rep
-
-        mk_graph :: Stmt -> FCode ()
-        mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs
-
-        mustFollow :: Stmt -> Stmt -> Bool
-        (reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs
-
--------------------------------------------------------------------------
---      mkSwitch
--------------------------------------------------------------------------
-
-
-emitSwitch :: CmmExpr                      -- Tag to switch on
-           -> [(ConTagZ, CmmAGraphScoped)] -- Tagged branches
-           -> Maybe CmmAGraphScoped        -- Default branch (if any)
-           -> ConTagZ -> ConTagZ           -- Min and Max possible values;
-                                           -- behaviour outside this range is
-                                           -- undefined
-           -> FCode ()
-
--- First, two rather common cases in which there is no work to do
-emitSwitch _ []         (Just code) _ _ = emit (fst code)
-emitSwitch _ [(_,code)] Nothing     _ _ = emit (fst code)
-
--- Right, off we go
-emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do
-    join_lbl      <- newBlockId
-    mb_deflt_lbl  <- label_default join_lbl mb_deflt
-    branches_lbls <- label_branches join_lbl branches
-    tag_expr'     <- assignTemp' tag_expr
-
-    -- Sort the branches before calling mk_discrete_switch
-    let branches_lbls' = [ (fromIntegral i, l) | (i,l) <- sortBy (comparing fst) branches_lbls ]
-    let range = (fromIntegral lo_tag, fromIntegral hi_tag)
-
-    emit $ mk_discrete_switch False tag_expr' branches_lbls' mb_deflt_lbl range
-
-    emitLabel join_lbl
-
-mk_discrete_switch :: Bool -- ^ Use signed comparisons
-          -> CmmExpr
-          -> [(Integer, BlockId)]
-          -> Maybe BlockId
-          -> (Integer, Integer)
-          -> CmmAGraph
-
--- SINGLETON TAG RANGE: no case analysis to do
-mk_discrete_switch _ _tag_expr [(tag, lbl)] _ (lo_tag, hi_tag)
-  | lo_tag == hi_tag
-  = ASSERT( tag == lo_tag )
-    mkBranch lbl
-
--- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
-mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _
-  = mkBranch lbl
-        -- The simplifier might have eliminated a case
-        --       so we may have e.g. case xs of
-        --                               [] -> e
-        -- In that situation we can be sure the (:) case
-        -- can't happen, so no need to test
-
--- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans
--- See Note [Cmm Switches, the general plan] in CmmSwitch
-mk_discrete_switch signed tag_expr branches mb_deflt range
-  = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches)
-
-divideBranches :: Ord a => [(a,b)] -> ([(a,b)], a, [(a,b)])
-divideBranches branches = (lo_branches, mid, hi_branches)
-  where
-    -- 2 branches => n_branches `div` 2 = 1
-    --            => branches !! 1 give the *second* tag
-    -- There are always at least 2 branches here
-    (mid,_) = branches !! (length branches `div` 2)
-    (lo_branches, hi_branches) = span is_lo branches
-    is_lo (t,_) = t < mid
-
---------------
-emitCmmLitSwitch :: CmmExpr                    -- Tag to switch on
-               -> [(Literal, CmmAGraphScoped)] -- Tagged branches
-               -> CmmAGraphScoped              -- Default branch (always)
-               -> FCode ()                     -- Emit the code
-emitCmmLitSwitch _scrut []       deflt = emit $ fst deflt
-emitCmmLitSwitch scrut  branches deflt = do
-    scrut' <- assignTemp' scrut
-    join_lbl <- newBlockId
-    deflt_lbl <- label_code join_lbl deflt
-    branches_lbls <- label_branches join_lbl branches
-
-    dflags <- getDynFlags
-    let cmm_ty = cmmExprType dflags scrut
-        rep = typeWidth cmm_ty
-
-    -- We find the necessary type information in the literals in the branches
-    let signed = case head branches of
-                    (LitNumber nt _ _, _) -> litNumIsSigned nt
-                    _ -> False
-
-    let range | signed    = (tARGET_MIN_INT dflags, tARGET_MAX_INT dflags)
-              | otherwise = (0, tARGET_MAX_WORD dflags)
-
-    if isFloatType cmm_ty
-    then emit =<< mk_float_switch rep scrut' deflt_lbl noBound branches_lbls
-    else emit $ mk_discrete_switch
-        signed
-        scrut'
-        [(litValue lit,l) | (lit,l) <- branches_lbls]
-        (Just deflt_lbl)
-        range
-    emitLabel join_lbl
-
--- | lower bound (inclusive), upper bound (exclusive)
-type LitBound = (Maybe Literal, Maybe Literal)
-
-noBound :: LitBound
-noBound = (Nothing, Nothing)
-
-mk_float_switch :: Width -> CmmExpr -> BlockId
-              -> LitBound
-              -> [(Literal,BlockId)]
-              -> FCode CmmAGraph
-mk_float_switch rep scrut deflt _bounds [(lit,blk)]
-  = do dflags <- getDynFlags
-       return $ mkCbranch (cond dflags) deflt blk Nothing
-  where
-    cond dflags = CmmMachOp ne [scrut, CmmLit cmm_lit]
-      where
-        cmm_lit = mkSimpleLit dflags lit
-        ne      = MO_F_Ne rep
-
-mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches
-  = do dflags <- getDynFlags
-       lo_blk <- mk_float_switch rep scrut deflt_blk_id bounds_lo lo_branches
-       hi_blk <- mk_float_switch rep scrut deflt_blk_id bounds_hi hi_branches
-       mkCmmIfThenElse (cond dflags) lo_blk hi_blk
-  where
-    (lo_branches, mid_lit, hi_branches) = divideBranches branches
-
-    bounds_lo = (lo_bound, Just mid_lit)
-    bounds_hi = (Just mid_lit, hi_bound)
-
-    cond dflags = CmmMachOp lt [scrut, CmmLit cmm_lit]
-      where
-        cmm_lit = mkSimpleLit dflags mid_lit
-        lt      = MO_F_Lt rep
-
-
---------------
-label_default :: BlockId -> Maybe CmmAGraphScoped -> FCode (Maybe BlockId)
-label_default _ Nothing
-  = return Nothing
-label_default join_lbl (Just code)
-  = do lbl <- label_code join_lbl code
-       return (Just lbl)
-
---------------
-label_branches :: BlockId -> [(a,CmmAGraphScoped)] -> FCode [(a,BlockId)]
-label_branches _join_lbl []
-  = return []
-label_branches join_lbl ((tag,code):branches)
-  = do lbl <- label_code join_lbl code
-       branches' <- label_branches join_lbl branches
-       return ((tag,lbl):branches')
-
---------------
-label_code :: BlockId -> CmmAGraphScoped -> FCode BlockId
---  label_code J code
---      generates
---  [L: code; goto J]
--- and returns L
-label_code join_lbl (code,tsc) = do
-    lbl <- newBlockId
-    emitOutOfLine lbl (code MkGraph.<*> mkBranch join_lbl, tsc)
-    return lbl
-
---------------
-assignTemp' :: CmmExpr -> FCode CmmExpr
-assignTemp' e
-  | isTrivialCmmExpr e = return e
-  | otherwise = do
-       dflags <- getDynFlags
-       lreg <- newTemp (cmmExprType dflags e)
-       let reg = CmmLocal lreg
-       emitAssign reg e
-       return (CmmReg reg)
author	Sylvain Henry <sylvain@haskus.fr>	2019-08-13 17:26:32 +0200
committer	Sylvain Henry <sylvain@haskus.fr>	2019-09-10 00:04:50 +0200
commit	447864a94a1679b5b079e08bb7208a0005381cef (patch)
tree	baa469c52620ce7ae02def3e5e6a6f109cc89f40 /compiler/codeGen
parent	270fbe8512f04b6107755fa22bdec62205c0a567 (diff)
download	haskell-447864a94a1679b5b079e08bb7208a0005381cef.tar.gz