summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2019-08-13 17:26:32 +0200
committerSylvain Henry <sylvain@haskus.fr>2019-09-10 00:04:50 +0200
commit447864a94a1679b5b079e08bb7208a0005381cef (patch)
treebaa469c52620ce7ae02def3e5e6a6f109cc89f40 /compiler/codeGen
parent270fbe8512f04b6107755fa22bdec62205c0a567 (diff)
downloadhaskell-447864a94a1679b5b079e08bb7208a0005381cef.tar.gz
Module hierarchy: StgToCmm (#13009)
Add StgToCmm module hierarchy. Platform modules that are used in several other places (NCG, LLVM codegen, Cmm transformations) are put into GHC.Platform.
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/CgUtils.hs186
-rw-r--r--compiler/codeGen/CodeGen/Platform.hs107
-rw-r--r--compiler/codeGen/CodeGen/Platform/ARM.hs10
-rw-r--r--compiler/codeGen/CodeGen/Platform/ARM64.hs10
-rw-r--r--compiler/codeGen/CodeGen/Platform/NoRegs.hs9
-rw-r--r--compiler/codeGen/CodeGen/Platform/PPC.hs10
-rw-r--r--compiler/codeGen/CodeGen/Platform/SPARC.hs10
-rw-r--r--compiler/codeGen/CodeGen/Platform/X86.hs10
-rw-r--r--compiler/codeGen/CodeGen/Platform/X86_64.hs10
-rw-r--r--compiler/codeGen/StgCmm.hs223
-rw-r--r--compiler/codeGen/StgCmmArgRep.hs160
-rw-r--r--compiler/codeGen/StgCmmBind.hs753
-rw-r--r--compiler/codeGen/StgCmmBind.hs-boot6
-rw-r--r--compiler/codeGen/StgCmmClosure.hs1008
-rw-r--r--compiler/codeGen/StgCmmCon.hs285
-rw-r--r--compiler/codeGen/StgCmmEnv.hs208
-rw-r--r--compiler/codeGen/StgCmmExpr.hs992
-rw-r--r--compiler/codeGen/StgCmmExtCode.hs252
-rw-r--r--compiler/codeGen/StgCmmForeign.hs627
-rw-r--r--compiler/codeGen/StgCmmHeap.hs680
-rw-r--r--compiler/codeGen/StgCmmHpc.hs48
-rw-r--r--compiler/codeGen/StgCmmLayout.hs623
-rw-r--r--compiler/codeGen/StgCmmMonad.hs861
-rw-r--r--compiler/codeGen/StgCmmPrim.hs2622
-rw-r--r--compiler/codeGen/StgCmmProf.hs360
-rw-r--r--compiler/codeGen/StgCmmTicky.hs682
-rw-r--r--compiler/codeGen/StgCmmUtils.hs578
27 files changed, 0 insertions, 11330 deletions
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
deleted file mode 100644
index 0ff9bd8b56..0000000000
--- a/compiler/codeGen/CgUtils.hs
+++ /dev/null
@@ -1,186 +0,0 @@
-{-# LANGUAGE GADTs #-}
-
------------------------------------------------------------------------------
---
--- Code generator utilities; mostly monadic
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module CgUtils (
- fixStgRegisters,
- baseRegOffset,
- get_Regtable_addr_from_offset,
- regTableOffset,
- get_GlobalReg_addr,
- ) where
-
-import GhcPrelude
-
-import CodeGen.Platform
-import Cmm
-import Hoopl.Block
-import Hoopl.Graph
-import CmmUtils
-import CLabel
-import DynFlags
-import Outputable
-
--- -----------------------------------------------------------------------------
--- Information about global registers
-
-baseRegOffset :: DynFlags -> GlobalReg -> Int
-
-baseRegOffset dflags (VanillaReg 1 _) = oFFSET_StgRegTable_rR1 dflags
-baseRegOffset dflags (VanillaReg 2 _) = oFFSET_StgRegTable_rR2 dflags
-baseRegOffset dflags (VanillaReg 3 _) = oFFSET_StgRegTable_rR3 dflags
-baseRegOffset dflags (VanillaReg 4 _) = oFFSET_StgRegTable_rR4 dflags
-baseRegOffset dflags (VanillaReg 5 _) = oFFSET_StgRegTable_rR5 dflags
-baseRegOffset dflags (VanillaReg 6 _) = oFFSET_StgRegTable_rR6 dflags
-baseRegOffset dflags (VanillaReg 7 _) = oFFSET_StgRegTable_rR7 dflags
-baseRegOffset dflags (VanillaReg 8 _) = oFFSET_StgRegTable_rR8 dflags
-baseRegOffset dflags (VanillaReg 9 _) = oFFSET_StgRegTable_rR9 dflags
-baseRegOffset dflags (VanillaReg 10 _) = oFFSET_StgRegTable_rR10 dflags
-baseRegOffset _ (VanillaReg n _) = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")")
-baseRegOffset dflags (FloatReg 1) = oFFSET_StgRegTable_rF1 dflags
-baseRegOffset dflags (FloatReg 2) = oFFSET_StgRegTable_rF2 dflags
-baseRegOffset dflags (FloatReg 3) = oFFSET_StgRegTable_rF3 dflags
-baseRegOffset dflags (FloatReg 4) = oFFSET_StgRegTable_rF4 dflags
-baseRegOffset dflags (FloatReg 5) = oFFSET_StgRegTable_rF5 dflags
-baseRegOffset dflags (FloatReg 6) = oFFSET_StgRegTable_rF6 dflags
-baseRegOffset _ (FloatReg n) = panic ("Registers above F6 are not supported (tried to use F" ++ show n ++ ")")
-baseRegOffset dflags (DoubleReg 1) = oFFSET_StgRegTable_rD1 dflags
-baseRegOffset dflags (DoubleReg 2) = oFFSET_StgRegTable_rD2 dflags
-baseRegOffset dflags (DoubleReg 3) = oFFSET_StgRegTable_rD3 dflags
-baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
-baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
-baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
-baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
-baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
-baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
-baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
-baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
-baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
-baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
-baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
-baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags
-baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags
-baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags
-baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags
-baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags
-baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags
-baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
-baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags
-baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags
-baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags
-baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags
-baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags
-baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags
-baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
-baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
-baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
-baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
-baseRegOffset _ (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
-baseRegOffset dflags Hp = oFFSET_StgRegTable_rHp dflags
-baseRegOffset dflags HpLim = oFFSET_StgRegTable_rHpLim dflags
-baseRegOffset dflags CCCS = oFFSET_StgRegTable_rCCCS dflags
-baseRegOffset dflags CurrentTSO = oFFSET_StgRegTable_rCurrentTSO dflags
-baseRegOffset dflags CurrentNursery = oFFSET_StgRegTable_rCurrentNursery dflags
-baseRegOffset dflags HpAlloc = oFFSET_StgRegTable_rHpAlloc dflags
-baseRegOffset dflags EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo dflags
-baseRegOffset dflags GCEnter1 = oFFSET_stgGCEnter1 dflags
-baseRegOffset dflags GCFun = oFFSET_stgGCFun dflags
-baseRegOffset _ BaseReg = panic "CgUtils.baseRegOffset:BaseReg"
-baseRegOffset _ PicBaseReg = panic "CgUtils.baseRegOffset:PicBaseReg"
-baseRegOffset _ MachSp = panic "CgUtils.baseRegOffset:MachSp"
-baseRegOffset _ UnwindReturnReg = panic "CgUtils.baseRegOffset:UnwindReturnReg"
-
-
--- -----------------------------------------------------------------------------
---
--- STG/Cmm GlobalReg
---
--- -----------------------------------------------------------------------------
-
--- | We map STG registers onto appropriate CmmExprs. Either they map
--- to real machine registers or stored as offsets from BaseReg. Given
--- a GlobalReg, get_GlobalReg_addr always produces the
--- register table address for it.
-get_GlobalReg_addr :: DynFlags -> GlobalReg -> CmmExpr
-get_GlobalReg_addr dflags BaseReg = regTableOffset dflags 0
-get_GlobalReg_addr dflags mid
- = get_Regtable_addr_from_offset dflags (baseRegOffset dflags mid)
-
--- Calculate a literal representing an offset into the register table.
--- Used when we don't have an actual BaseReg to offset from.
-regTableOffset :: DynFlags -> Int -> CmmExpr
-regTableOffset dflags n =
- CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r dflags + n))
-
-get_Regtable_addr_from_offset :: DynFlags -> Int -> CmmExpr
-get_Regtable_addr_from_offset dflags offset =
- if haveRegBase (targetPlatform dflags)
- then CmmRegOff baseReg offset
- else regTableOffset dflags offset
-
--- | Fixup global registers so that they assign to locations within the
--- RegTable if they aren't pinned for the current target.
-fixStgRegisters :: DynFlags -> RawCmmDecl -> RawCmmDecl
-fixStgRegisters _ top@(CmmData _ _) = top
-
-fixStgRegisters dflags (CmmProc info lbl live graph) =
- let graph' = modifyGraph (mapGraphBlocks (fixStgRegBlock dflags)) graph
- in CmmProc info lbl live graph'
-
-fixStgRegBlock :: DynFlags -> Block CmmNode e x -> Block CmmNode e x
-fixStgRegBlock dflags block = mapBlock (fixStgRegStmt dflags) block
-
-fixStgRegStmt :: DynFlags -> CmmNode e x -> CmmNode e x
-fixStgRegStmt dflags stmt = fixAssign $ mapExpDeep fixExpr stmt
- where
- platform = targetPlatform dflags
-
- fixAssign stmt =
- case stmt of
- CmmAssign (CmmGlobal reg) src
- -- MachSp isn't an STG register; it's merely here for tracking unwind
- -- information
- | reg == MachSp -> stmt
- | otherwise ->
- let baseAddr = get_GlobalReg_addr dflags reg
- in case reg `elem` activeStgRegs (targetPlatform dflags) of
- True -> CmmAssign (CmmGlobal reg) src
- False -> CmmStore baseAddr src
- other_stmt -> other_stmt
-
- fixExpr expr = case expr of
- -- MachSp isn't an STG; it's merely here for tracking unwind information
- CmmReg (CmmGlobal MachSp) -> expr
- CmmReg (CmmGlobal reg) ->
- -- Replace register leaves with appropriate StixTrees for
- -- the given target. MagicIds which map to a reg on this
- -- arch are left unchanged. For the rest, BaseReg is taken
- -- to mean the address of the reg table in MainCapability,
- -- and for all others we generate an indirection to its
- -- location in the register table.
- case reg `elem` activeStgRegs platform of
- True -> expr
- False ->
- let baseAddr = get_GlobalReg_addr dflags reg
- in case reg of
- BaseReg -> baseAddr
- _other -> CmmLoad baseAddr (globalRegType dflags reg)
-
- CmmRegOff (CmmGlobal reg) offset ->
- -- RegOf leaves are just a shorthand form. If the reg maps
- -- to a real reg, we keep the shorthand, otherwise, we just
- -- expand it and defer to the above code.
- case reg `elem` activeStgRegs platform of
- True -> expr
- False -> CmmMachOp (MO_Add (wordWidth dflags)) [
- fixExpr (CmmReg (CmmGlobal reg)),
- CmmLit (CmmInt (fromIntegral offset)
- (wordWidth dflags))]
-
- other_expr -> other_expr
diff --git a/compiler/codeGen/CodeGen/Platform.hs b/compiler/codeGen/CodeGen/Platform.hs
deleted file mode 100644
index bc216758a0..0000000000
--- a/compiler/codeGen/CodeGen/Platform.hs
+++ /dev/null
@@ -1,107 +0,0 @@
-
-module CodeGen.Platform
- (callerSaves, activeStgRegs, haveRegBase, globalRegMaybe, freeReg)
- where
-
-import GhcPrelude
-
-import CmmExpr
-import GHC.Platform
-import Reg
-
-import qualified CodeGen.Platform.ARM as ARM
-import qualified CodeGen.Platform.ARM64 as ARM64
-import qualified CodeGen.Platform.PPC as PPC
-import qualified CodeGen.Platform.SPARC as SPARC
-import qualified CodeGen.Platform.X86 as X86
-import qualified CodeGen.Platform.X86_64 as X86_64
-import qualified CodeGen.Platform.NoRegs as NoRegs
-
--- | Returns 'True' if this global register is stored in a caller-saves
--- machine register.
-
-callerSaves :: Platform -> GlobalReg -> Bool
-callerSaves platform
- | platformUnregisterised platform = NoRegs.callerSaves
- | otherwise
- = case platformArch platform of
- ArchX86 -> X86.callerSaves
- ArchX86_64 -> X86_64.callerSaves
- ArchSPARC -> SPARC.callerSaves
- ArchARM {} -> ARM.callerSaves
- ArchARM64 -> ARM64.callerSaves
- arch
- | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
- PPC.callerSaves
-
- | otherwise -> NoRegs.callerSaves
-
--- | Here is where the STG register map is defined for each target arch.
--- The order matters (for the llvm backend anyway)! We must make sure to
--- maintain the order here with the order used in the LLVM calling conventions.
--- Note that also, this isn't all registers, just the ones that are currently
--- possbily mapped to real registers.
-activeStgRegs :: Platform -> [GlobalReg]
-activeStgRegs platform
- | platformUnregisterised platform = NoRegs.activeStgRegs
- | otherwise
- = case platformArch platform of
- ArchX86 -> X86.activeStgRegs
- ArchX86_64 -> X86_64.activeStgRegs
- ArchSPARC -> SPARC.activeStgRegs
- ArchARM {} -> ARM.activeStgRegs
- ArchARM64 -> ARM64.activeStgRegs
- arch
- | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
- PPC.activeStgRegs
-
- | otherwise -> NoRegs.activeStgRegs
-
-haveRegBase :: Platform -> Bool
-haveRegBase platform
- | platformUnregisterised platform = NoRegs.haveRegBase
- | otherwise
- = case platformArch platform of
- ArchX86 -> X86.haveRegBase
- ArchX86_64 -> X86_64.haveRegBase
- ArchSPARC -> SPARC.haveRegBase
- ArchARM {} -> ARM.haveRegBase
- ArchARM64 -> ARM64.haveRegBase
- arch
- | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
- PPC.haveRegBase
-
- | otherwise -> NoRegs.haveRegBase
-
-globalRegMaybe :: Platform -> GlobalReg -> Maybe RealReg
-globalRegMaybe platform
- | platformUnregisterised platform = NoRegs.globalRegMaybe
- | otherwise
- = case platformArch platform of
- ArchX86 -> X86.globalRegMaybe
- ArchX86_64 -> X86_64.globalRegMaybe
- ArchSPARC -> SPARC.globalRegMaybe
- ArchARM {} -> ARM.globalRegMaybe
- ArchARM64 -> ARM64.globalRegMaybe
- arch
- | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
- PPC.globalRegMaybe
-
- | otherwise -> NoRegs.globalRegMaybe
-
-freeReg :: Platform -> RegNo -> Bool
-freeReg platform
- | platformUnregisterised platform = NoRegs.freeReg
- | otherwise
- = case platformArch platform of
- ArchX86 -> X86.freeReg
- ArchX86_64 -> X86_64.freeReg
- ArchSPARC -> SPARC.freeReg
- ArchARM {} -> ARM.freeReg
- ArchARM64 -> ARM64.freeReg
- arch
- | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
- PPC.freeReg
-
- | otherwise -> NoRegs.freeReg
-
diff --git a/compiler/codeGen/CodeGen/Platform/ARM.hs b/compiler/codeGen/CodeGen/Platform/ARM.hs
deleted file mode 100644
index a2cb476e04..0000000000
--- a/compiler/codeGen/CodeGen/Platform/ARM.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.ARM where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_arm 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/ARM64.hs b/compiler/codeGen/CodeGen/Platform/ARM64.hs
deleted file mode 100644
index 6ace181356..0000000000
--- a/compiler/codeGen/CodeGen/Platform/ARM64.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.ARM64 where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_aarch64 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/NoRegs.hs b/compiler/codeGen/CodeGen/Platform/NoRegs.hs
deleted file mode 100644
index 4c074ee313..0000000000
--- a/compiler/codeGen/CodeGen/Platform/NoRegs.hs
+++ /dev/null
@@ -1,9 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.NoRegs where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/PPC.hs b/compiler/codeGen/CodeGen/Platform/PPC.hs
deleted file mode 100644
index f7eae6b4ca..0000000000
--- a/compiler/codeGen/CodeGen/Platform/PPC.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.PPC where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_powerpc 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/SPARC.hs b/compiler/codeGen/CodeGen/Platform/SPARC.hs
deleted file mode 100644
index 5d8dbb1da9..0000000000
--- a/compiler/codeGen/CodeGen/Platform/SPARC.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.SPARC where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_sparc 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/X86.hs b/compiler/codeGen/CodeGen/Platform/X86.hs
deleted file mode 100644
index 84d52c1585..0000000000
--- a/compiler/codeGen/CodeGen/Platform/X86.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.X86 where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_i386 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/CodeGen/Platform/X86_64.hs b/compiler/codeGen/CodeGen/Platform/X86_64.hs
deleted file mode 100644
index 1b2b5549ac..0000000000
--- a/compiler/codeGen/CodeGen/Platform/X86_64.hs
+++ /dev/null
@@ -1,10 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module CodeGen.Platform.X86_64 where
-
-import GhcPrelude
-
-#define MACHREGS_NO_REGS 0
-#define MACHREGS_x86_64 1
-#include "../../../../includes/CodeGen.Platform.hs"
-
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs
deleted file mode 100644
index 83409b6b24..0000000000
--- a/compiler/codeGen/StgCmm.hs
+++ /dev/null
@@ -1,223 +0,0 @@
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE DataKinds #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmm ( codeGen ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude as Prelude
-
-import StgCmmProf (initCostCentres, ldvEnter)
-import StgCmmMonad
-import StgCmmEnv
-import StgCmmBind
-import StgCmmCon
-import StgCmmLayout
-import StgCmmUtils
-import StgCmmClosure
-import StgCmmHpc
-import StgCmmTicky
-
-import Cmm
-import CmmUtils
-import CLabel
-
-import StgSyn
-import DynFlags
-import ErrUtils
-
-import HscTypes
-import CostCentre
-import Id
-import IdInfo
-import RepType
-import DataCon
-import TyCon
-import Module
-import Outputable
-import Stream
-import BasicTypes
-import VarSet ( isEmptyDVarSet )
-
-import OrdList
-import MkGraph
-
-import Data.IORef
-import Control.Monad (when,void)
-import Util
-
-codeGen :: DynFlags
- -> Module
- -> [TyCon]
- -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
- -> [CgStgTopBinding] -- Bindings to convert
- -> HpcInfo
- -> Stream IO CmmGroup () -- Output as a stream, so codegen can
- -- be interleaved with output
-
-codeGen dflags this_mod data_tycons
- cost_centre_info stg_binds hpc_info
- = do { -- cg: run the code generator, and yield the resulting CmmGroup
- -- Using an IORef to store the state is a bit crude, but otherwise
- -- we would need to add a state monad layer.
- ; cgref <- liftIO $ newIORef =<< initC
- ; let cg :: FCode () -> Stream IO CmmGroup ()
- cg fcode = do
- cmm <- liftIO . withTiming (return dflags) (text "STG -> Cmm") (`seq` ()) $ do
- st <- readIORef cgref
- let (a,st') = runC dflags this_mod st (getCmm fcode)
-
- -- NB. stub-out cgs_tops and cgs_stmts. This fixes
- -- a big space leak. DO NOT REMOVE!
- writeIORef cgref $! st'{ cgs_tops = nilOL,
- cgs_stmts = mkNop }
- return a
- yield cmm
-
- -- Note [codegen-split-init] the cmm_init block must come
- -- FIRST. This is because when -split-objs is on we need to
- -- combine this block with its initialisation routines; see
- -- Note [pipeline-split-init].
- ; cg (mkModuleInit cost_centre_info this_mod hpc_info)
-
- ; mapM_ (cg . cgTopBinding dflags) stg_binds
-
- -- Put datatype_stuff after code_stuff, because the
- -- datatype closure table (for enumeration types) to
- -- (say) PrelBase_True_closure, which is defined in
- -- code_stuff
- ; let do_tycon tycon = do
- -- Generate a table of static closures for an
- -- enumeration type Note that the closure pointers are
- -- tagged.
- when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon)
- mapM_ (cg . cgDataCon) (tyConDataCons tycon)
-
- ; mapM_ do_tycon data_tycons
- }
-
----------------------------------------------------------------
--- Top-level bindings
----------------------------------------------------------------
-
-{- 'cgTopBinding' is only used for top-level bindings, since they need
-to be allocated statically (not in the heap) and need to be labelled.
-No unboxed bindings can happen at top level.
-
-In the code below, the static bindings are accumulated in the
-@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@.
-This is so that we can write the top level processing in a compositional
-style, with the increasing static environment being plumbed as a state
-variable. -}
-
-cgTopBinding :: DynFlags -> CgStgTopBinding -> FCode ()
-cgTopBinding dflags (StgTopLifted (StgNonRec id rhs))
- = do { let (info, fcode) = cgTopRhs dflags NonRecursive id rhs
- ; fcode
- ; addBindC info
- }
-
-cgTopBinding dflags (StgTopLifted (StgRec pairs))
- = do { let (bndrs, rhss) = unzip pairs
- ; let pairs' = zip bndrs rhss
- r = unzipWith (cgTopRhs dflags Recursive) pairs'
- (infos, fcodes) = unzip r
- ; addBindsC infos
- ; sequence_ fcodes
- }
-
-cgTopBinding dflags (StgTopStringLit id str)
- = do { let label = mkBytesLabel (idName id)
- ; let (lit, decl) = mkByteStringCLit label str
- ; emitDecl decl
- ; addBindC (litIdInfo dflags id mkLFStringLit lit)
- }
-
-cgTopRhs :: DynFlags -> RecFlag -> Id -> CgStgRhs -> (CgIdInfo, FCode ())
- -- The Id is passed along for setting up a binding...
-
-cgTopRhs dflags _rec bndr (StgRhsCon _cc con args)
- = cgTopRhsCon dflags bndr con (assertNonVoidStgArgs args)
- -- con args are always non-void,
- -- see Note [Post-unarisation invariants] in UnariseStg
-
-cgTopRhs dflags rec bndr (StgRhsClosure fvs cc upd_flag args body)
- = ASSERT(isEmptyDVarSet fvs) -- There should be no free variables
- cgTopRhsClosure dflags rec bndr cc upd_flag args body
-
-
----------------------------------------------------------------
--- Module initialisation code
----------------------------------------------------------------
-
-mkModuleInit
- :: CollectedCCs -- cost centre info
- -> Module
- -> HpcInfo
- -> FCode ()
-
-mkModuleInit cost_centre_info this_mod hpc_info
- = do { initHpc this_mod hpc_info
- ; initCostCentres cost_centre_info
- }
-
-
----------------------------------------------------------------
--- Generating static stuff for algebraic data types
----------------------------------------------------------------
-
-
-cgEnumerationTyCon :: TyCon -> FCode ()
-cgEnumerationTyCon tycon
- = do dflags <- getDynFlags
- emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
- [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
- (tagForCon dflags con)
- | con <- tyConDataCons tycon]
-
-
-cgDataCon :: DataCon -> FCode ()
--- Generate the entry code, info tables, and (for niladic constructor)
--- the static closure, for a constructor.
-cgDataCon data_con
- = do { dflags <- getDynFlags
- ; let
- (tot_wds, -- #ptr_wds + #nonptr_wds
- ptr_wds) -- #ptr_wds
- = mkVirtConstrSizes dflags arg_reps
-
- nonptr_wds = tot_wds - ptr_wds
-
- dyn_info_tbl =
- mkDataConInfoTable dflags data_con False ptr_wds nonptr_wds
-
- -- We're generating info tables, so we don't know and care about
- -- what the actual arguments are. Using () here as the place holder.
- arg_reps :: [NonVoid PrimRep]
- arg_reps = [ NonVoid rep_ty
- | ty <- dataConRepArgTys data_con
- , rep_ty <- typePrimRep ty
- , not (isVoidRep rep_ty) ]
-
- ; emitClosureAndInfoTable dyn_info_tbl NativeDirectCall [] $
- -- NB: the closure pointer is assumed *untagged* on
- -- entry to a constructor. If the pointer is tagged,
- -- then we should not be entering it. This assumption
- -- is used in ldvEnter and when tagging the pointer to
- -- return it.
- -- NB 2: We don't set CC when entering data (WDP 94/06)
- do { tickyEnterDynCon
- ; ldvEnter (CmmReg nodeReg)
- ; tickyReturnOldCon (length arg_reps)
- ; void $ emitReturn [cmmOffsetB dflags (CmmReg nodeReg) (tagForCon dflags data_con)]
- }
- -- The case continuation code expects a tagged pointer
- }
diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs
deleted file mode 100644
index ef40fce464..0000000000
--- a/compiler/codeGen/StgCmmArgRep.hs
+++ /dev/null
@@ -1,160 +0,0 @@
------------------------------------------------------------------------------
---
--- Argument representations used in StgCmmLayout.
---
--- (c) The University of Glasgow 2013
---
------------------------------------------------------------------------------
-
-module StgCmmArgRep (
- ArgRep(..), toArgRep, argRepSizeW,
-
- argRepString, isNonV, idArgRep,
-
- slowCallPattern,
-
- ) where
-
-import GhcPrelude
-
-import StgCmmClosure ( idPrimRep )
-
-import SMRep ( WordOff )
-import Id ( Id )
-import TyCon ( PrimRep(..), primElemRepSizeB )
-import BasicTypes ( RepArity )
-import Constants ( wORD64_SIZE )
-import DynFlags
-
-import Outputable
-import FastString
-
--- I extricated this code as this new module in order to avoid a
--- cyclic dependency between StgCmmLayout and StgCmmTicky.
---
--- NSF 18 Feb 2013
-
--------------------------------------------------------------------------
--- Classifying arguments: ArgRep
--------------------------------------------------------------------------
-
--- ArgRep is re-exported by StgCmmLayout, but only for use in the
--- byte-code generator which also needs to know about the
--- classification of arguments.
-
-data ArgRep = P -- GC Ptr
- | N -- Word-sized non-ptr
- | L -- 64-bit non-ptr (long)
- | V -- Void
- | F -- Float
- | D -- Double
- | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
- | V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc.
- | V64 -- 64-byte (512-bit) vectors of Float/Double/Int8/Word32/etc.
-instance Outputable ArgRep where ppr = text . argRepString
-
-argRepString :: ArgRep -> String
-argRepString P = "P"
-argRepString N = "N"
-argRepString L = "L"
-argRepString V = "V"
-argRepString F = "F"
-argRepString D = "D"
-argRepString V16 = "V16"
-argRepString V32 = "V32"
-argRepString V64 = "V64"
-
-toArgRep :: PrimRep -> ArgRep
-toArgRep VoidRep = V
-toArgRep LiftedRep = P
-toArgRep UnliftedRep = P
-toArgRep IntRep = N
-toArgRep WordRep = N
-toArgRep Int8Rep = N -- Gets widened to native word width for calls
-toArgRep Word8Rep = N -- Gets widened to native word width for calls
-toArgRep Int16Rep = N -- Gets widened to native word width for calls
-toArgRep Word16Rep = N -- Gets widened to native word width for calls
-toArgRep Int32Rep = N -- Gets widened to native word width for calls
-toArgRep Word32Rep = N -- Gets widened to native word width for calls
-toArgRep AddrRep = N
-toArgRep Int64Rep = L
-toArgRep Word64Rep = L
-toArgRep FloatRep = F
-toArgRep DoubleRep = D
-toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of
- 16 -> V16
- 32 -> V32
- 64 -> V64
- _ -> error "toArgRep: bad vector primrep"
-
-isNonV :: ArgRep -> Bool
-isNonV V = False
-isNonV _ = True
-
-argRepSizeW :: DynFlags -> ArgRep -> WordOff -- Size in words
-argRepSizeW _ N = 1
-argRepSizeW _ P = 1
-argRepSizeW _ F = 1
-argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags
-argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
-argRepSizeW _ V = 0
-argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags
-argRepSizeW dflags V32 = 32 `quot` wORD_SIZE dflags
-argRepSizeW dflags V64 = 64 `quot` wORD_SIZE dflags
-
-idArgRep :: Id -> ArgRep
-idArgRep = toArgRep . idPrimRep
-
--- This list of argument patterns should be kept in sync with at least
--- the following:
---
--- * StgCmmLayout.stdPattern maybe to some degree?
---
--- * the RTS_RET(stg_ap_*) and RTS_FUN_DECL(stg_ap_*_fast)
--- declarations in includes/stg/MiscClosures.h
---
--- * the SLOW_CALL_*_ctr declarations in includes/stg/Ticky.h,
---
--- * the TICK_SLOW_CALL_*() #defines in includes/Cmm.h,
---
--- * the PR_CTR(SLOW_CALL_*_ctr) calls in rts/Ticky.c,
---
--- * and the SymI_HasProto(stg_ap_*_{ret,info,fast}) calls and
--- SymI_HasProto(SLOW_CALL_*_ctr) calls in rts/Linker.c
---
--- There may be more places that I haven't found; I merely igrep'd for
--- pppppp and excluded things that seemed ghci-specific.
---
--- Also, it seems at the moment that ticky counters with void
--- arguments will never be bumped, but I'm still declaring those
--- counters, defensively.
---
--- NSF 6 Mar 2013
-
-slowCallPattern :: [ArgRep] -> (FastString, RepArity)
--- Returns the generic apply function and arity
---
--- The first batch of cases match (some) specialised entries
--- The last group deals exhaustively with the cases for the first argument
--- (and the zero-argument case)
---
--- In 99% of cases this function will match *all* the arguments in one batch
-
-slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6)
-slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5)
-slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4)
-slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4)
-slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3)
-slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3)
-slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2)
-slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2)
-slowCallPattern (P: _) = (fsLit "stg_ap_p", 1)
-slowCallPattern (V: _) = (fsLit "stg_ap_v", 1)
-slowCallPattern (N: _) = (fsLit "stg_ap_n", 1)
-slowCallPattern (F: _) = (fsLit "stg_ap_f", 1)
-slowCallPattern (D: _) = (fsLit "stg_ap_d", 1)
-slowCallPattern (L: _) = (fsLit "stg_ap_l", 1)
-slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1)
-slowCallPattern (V32: _) = (fsLit "stg_ap_v32", 1)
-slowCallPattern (V64: _) = (fsLit "stg_ap_v64", 1)
-slowCallPattern [] = (fsLit "stg_ap_0", 0)
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
deleted file mode 100644
index 7189800f6e..0000000000
--- a/compiler/codeGen/StgCmmBind.hs
+++ /dev/null
@@ -1,753 +0,0 @@
------------------------------------------------------------------------------
---
--- Stg to C-- code generation: bindings
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmBind (
- cgTopRhsClosure,
- cgBind,
- emitBlackHoleCode,
- pushUpdateFrame, emitUpdateFrame
- ) where
-
-import GhcPrelude hiding ((<*>))
-
-import StgCmmExpr
-import StgCmmMonad
-import StgCmmEnv
-import StgCmmCon
-import StgCmmHeap
-import StgCmmProf (ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk,
- initUpdFrameProf)
-import StgCmmTicky
-import StgCmmLayout
-import StgCmmUtils
-import StgCmmClosure
-import StgCmmForeign (emitPrimCall)
-
-import MkGraph
-import CoreSyn ( AltCon(..), tickishIsCode )
-import BlockId
-import SMRep
-import Cmm
-import CmmInfo
-import CmmUtils
-import CLabel
-import StgSyn
-import CostCentre
-import Id
-import IdInfo
-import Name
-import Module
-import ListSetOps
-import Util
-import VarSet
-import BasicTypes
-import Outputable
-import FastString
-import DynFlags
-
-import Control.Monad
-
-------------------------------------------------------------------------
--- Top-level bindings
-------------------------------------------------------------------------
-
--- For closures bound at top level, allocate in static space.
--- They should have no free variables.
-
-cgTopRhsClosure :: DynFlags
- -> RecFlag -- member of a recursive group?
- -> Id
- -> CostCentreStack -- Optional cost centre annotation
- -> UpdateFlag
- -> [Id] -- Args
- -> CgStgExpr
- -> (CgIdInfo, FCode ())
-
-cgTopRhsClosure dflags rec id ccs upd_flag args body =
- let closure_label = mkLocalClosureLabel (idName id) (idCafInfo id)
- cg_id_info = litIdInfo dflags id lf_info (CmmLabel closure_label)
- lf_info = mkClosureLFInfo dflags id TopLevel [] upd_flag args
- in (cg_id_info, gen_code dflags lf_info closure_label)
- where
- -- special case for a indirection (f = g). We create an IND_STATIC
- -- closure pointing directly to the indirectee. This is exactly
- -- what the CAF will eventually evaluate to anyway, we're just
- -- shortcutting the whole process, and generating a lot less code
- -- (#7308). Eventually the IND_STATIC closure will be eliminated
- -- by assembly '.equiv' directives, where possible (#15155).
- -- See note [emit-time elimination of static indirections] in CLabel.
- --
- -- Note: we omit the optimisation when this binding is part of a
- -- recursive group, because the optimisation would inhibit the black
- -- hole detection from working in that case. Test
- -- concurrent/should_run/4030 fails, for instance.
- --
- gen_code dflags _ closure_label
- | StgApp f [] <- body, null args, isNonRec rec
- = do
- cg_info <- getCgIdInfo f
- let closure_rep = mkStaticClosureFields dflags
- indStaticInfoTable ccs MayHaveCafRefs
- [unLit (idInfoToAmode cg_info)]
- emitDataLits closure_label closure_rep
- return ()
-
- gen_code dflags lf_info _closure_label
- = do { let name = idName id
- ; mod_name <- getModuleName
- ; let descr = closureDescription dflags mod_name name
- closure_info = mkClosureInfo dflags True id lf_info 0 0 descr
-
- -- We don't generate the static closure here, because we might
- -- want to add references to static closures to it later. The
- -- static closure is generated by CmmBuildInfoTables.updInfoSRTs,
- -- See Note [SRTs], specifically the [FUN] optimisation.
-
- ; let fv_details :: [(NonVoid Id, ByteOff)]
- header = if isLFThunk lf_info then ThunkHeader else StdHeader
- (_, _, fv_details) = mkVirtHeapOffsets dflags header []
- -- Don't drop the non-void args until the closure info has been made
- ; forkClosureBody (closureCodeBody True id closure_info ccs
- (nonVoidIds args) (length args) body fv_details)
-
- ; return () }
-
- unLit (CmmLit l) = l
- unLit _ = panic "unLit"
-
-------------------------------------------------------------------------
--- Non-top-level bindings
-------------------------------------------------------------------------
-
-cgBind :: CgStgBinding -> FCode ()
-cgBind (StgNonRec name rhs)
- = do { (info, fcode) <- cgRhs name rhs
- ; addBindC info
- ; init <- fcode
- ; emit init }
- -- init cannot be used in body, so slightly better to sink it eagerly
-
-cgBind (StgRec pairs)
- = do { r <- sequence $ unzipWith cgRhs pairs
- ; let (id_infos, fcodes) = unzip r
- ; addBindsC id_infos
- ; (inits, body) <- getCodeR $ sequence fcodes
- ; emit (catAGraphs inits <*> body) }
-
-{- Note [cgBind rec]
-
- Recursive let-bindings are tricky.
- Consider the following pseudocode:
-
- let x = \_ -> ... y ...
- y = \_ -> ... z ...
- z = \_ -> ... x ...
- in ...
-
- For each binding, we need to allocate a closure, and each closure must
- capture the address of the other closures.
- We want to generate the following C-- code:
- // Initialization Code
- x = hp - 24; // heap address of x's closure
- y = hp - 40; // heap address of x's closure
- z = hp - 64; // heap address of x's closure
- // allocate and initialize x
- m[hp-8] = ...
- m[hp-16] = y // the closure for x captures y
- m[hp-24] = x_info;
- // allocate and initialize y
- m[hp-32] = z; // the closure for y captures z
- m[hp-40] = y_info;
- // allocate and initialize z
- ...
-
- For each closure, we must generate not only the code to allocate and
- initialize the closure itself, but also some initialization Code that
- sets a variable holding the closure pointer.
-
- We could generate a pair of the (init code, body code), but since
- the bindings are recursive we also have to initialise the
- environment with the CgIdInfo for all the bindings before compiling
- anything. So we do this in 3 stages:
-
- 1. collect all the CgIdInfos and initialise the environment
- 2. compile each binding into (init, body) code
- 3. emit all the inits, and then all the bodies
-
- We'd rather not have separate functions to do steps 1 and 2 for
- each binding, since in pratice they share a lot of code. So we
- have just one function, cgRhs, that returns a pair of the CgIdInfo
- for step 1, and a monadic computation to generate the code in step
- 2.
-
- The alternative to separating things in this way is to use a
- fixpoint. That's what we used to do, but it introduces a
- maintenance nightmare because there is a subtle dependency on not
- being too strict everywhere. Doing things this way means that the
- FCode monad can be strict, for example.
- -}
-
-cgRhs :: Id
- -> CgStgRhs
- -> FCode (
- CgIdInfo -- The info for this binding
- , FCode CmmAGraph -- A computation which will generate the
- -- code for the binding, and return an
- -- assignent of the form "x = Hp - n"
- -- (see above)
- )
-
-cgRhs id (StgRhsCon cc con args)
- = withNewTickyCounterCon (idName id) $
- buildDynCon id True cc con (assertNonVoidStgArgs args)
- -- con args are always non-void,
- -- see Note [Post-unarisation invariants] in UnariseStg
-
-{- See Note [GC recovery] in compiler/codeGen/StgCmmClosure.hs -}
-cgRhs id (StgRhsClosure fvs cc upd_flag args body)
- = do dflags <- getDynFlags
- mkRhsClosure dflags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
-
-------------------------------------------------------------------------
--- Non-constructor right hand sides
-------------------------------------------------------------------------
-
-mkRhsClosure :: DynFlags -> Id -> CostCentreStack
- -> [NonVoid Id] -- Free vars
- -> UpdateFlag
- -> [Id] -- Args
- -> CgStgExpr
- -> FCode (CgIdInfo, FCode CmmAGraph)
-
-{- mkRhsClosure looks for two special forms of the right-hand side:
- a) selector thunks
- b) AP thunks
-
-If neither happens, it just calls mkClosureLFInfo. You might think
-that mkClosureLFInfo should do all this, but it seems wrong for the
-latter to look at the structure of an expression
-
-Note [Selectors]
-~~~~~~~~~~~~~~~~
-We look at the body of the closure to see if it's a selector---turgid,
-but nothing deep. We are looking for a closure of {\em exactly} the
-form:
-
-... = [the_fv] \ u [] ->
- case the_fv of
- con a_1 ... a_n -> a_i
-
-Note [Ap thunks]
-~~~~~~~~~~~~~~~~
-A more generic AP thunk of the form
-
- x = [ x_1...x_n ] \.. [] -> x_1 ... x_n
-
-A set of these is compiled statically into the RTS, so we just use
-those. We could extend the idea to thunks where some of the x_i are
-global ids (and hence not free variables), but this would entail
-generating a larger thunk. It might be an option for non-optimising
-compilation, though.
-
-We only generate an Ap thunk if all the free variables are pointers,
-for semi-obvious reasons.
-
--}
-
----------- Note [Selectors] ------------------
-mkRhsClosure dflags bndr _cc
- [NonVoid the_fv] -- Just one free var
- upd_flag -- Updatable thunk
- [] -- A thunk
- expr
- | let strip = stripStgTicksTopE (not . tickishIsCode)
- , StgCase (StgApp scrutinee [{-no args-}])
- _ -- ignore bndr
- (AlgAlt _)
- [(DataAlt _, params, sel_expr)] <- strip expr
- , StgApp selectee [{-no args-}] <- strip sel_expr
- , the_fv == scrutinee -- Scrutinee is the only free variable
-
- , let (_, _, params_w_offsets) = mkVirtConstrOffsets dflags (addIdReps (assertNonVoidIds params))
- -- pattern binders are always non-void,
- -- see Note [Post-unarisation invariants] in UnariseStg
- , Just the_offset <- assocMaybe params_w_offsets (NonVoid selectee)
-
- , let offset_into_int = bytesToWordsRoundUp dflags the_offset
- - fixedHdrSizeW dflags
- , offset_into_int <= mAX_SPEC_SELECTEE_SIZE dflags -- Offset is small enough
- = -- NOT TRUE: ASSERT(is_single_constructor)
- -- The simplifier may have statically determined that the single alternative
- -- is the only possible case and eliminated the others, even if there are
- -- other constructors in the datatype. It's still ok to make a selector
- -- thunk in this case, because we *know* which constructor the scrutinee
- -- will evaluate to.
- --
- -- srt is discarded; it must be empty
- let lf_info = mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag)
- in cgRhsStdThunk bndr lf_info [StgVarArg the_fv]
-
----------- Note [Ap thunks] ------------------
-mkRhsClosure dflags bndr _cc
- fvs
- upd_flag
- [] -- No args; a thunk
- (StgApp fun_id args)
-
- -- We are looking for an "ApThunk"; see data con ApThunk in StgCmmClosure
- -- of form (x1 x2 .... xn), where all the xi are locals (not top-level)
- -- So the xi will all be free variables
- | args `lengthIs` (n_fvs-1) -- This happens only if the fun_id and
- -- args are all distinct local variables
- -- The "-1" is for fun_id
- -- Missed opportunity: (f x x) is not detected
- , all (isGcPtrRep . idPrimRep . fromNonVoid) fvs
- , isUpdatable upd_flag
- , n_fvs <= mAX_SPEC_AP_SIZE dflags
- , not (gopt Opt_SccProfilingOn dflags)
- -- not when profiling: we don't want to
- -- lose information about this particular
- -- thunk (e.g. its type) (#949)
- , idArity fun_id == unknownArity -- don't spoil a known call
-
- -- Ha! an Ap thunk
- = cgRhsStdThunk bndr lf_info payload
-
- where
- n_fvs = length fvs
- lf_info = mkApLFInfo bndr upd_flag n_fvs
- -- the payload has to be in the correct order, hence we can't
- -- just use the fvs.
- payload = StgVarArg fun_id : args
-
----------- Default case ------------------
-mkRhsClosure dflags bndr cc fvs upd_flag args body
- = do { let lf_info = mkClosureLFInfo dflags bndr NotTopLevel fvs upd_flag args
- ; (id_info, reg) <- rhsIdInfo bndr lf_info
- ; return (id_info, gen_code lf_info reg) }
- where
- gen_code lf_info reg
- = do { -- LAY OUT THE OBJECT
- -- If the binder is itself a free variable, then don't store
- -- it in the closure. Instead, just bind it to Node on entry.
- -- NB we can be sure that Node will point to it, because we
- -- haven't told mkClosureLFInfo about this; so if the binder
- -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is*
- -- stored in the closure itself, so it will make sure that
- -- Node points to it...
- ; let reduced_fvs = filter (NonVoid bndr /=) fvs
-
- -- MAKE CLOSURE INFO FOR THIS CLOSURE
- ; mod_name <- getModuleName
- ; dflags <- getDynFlags
- ; let name = idName bndr
- descr = closureDescription dflags mod_name name
- fv_details :: [(NonVoid Id, ByteOff)]
- header = if isLFThunk lf_info then ThunkHeader else StdHeader
- (tot_wds, ptr_wds, fv_details)
- = mkVirtHeapOffsets dflags header (addIdReps reduced_fvs)
- closure_info = mkClosureInfo dflags False -- Not static
- bndr lf_info tot_wds ptr_wds
- descr
-
- -- BUILD ITS INFO TABLE AND CODE
- ; forkClosureBody $
- -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere
- -- (b) ignore Sequel from context; use empty Sequel
- -- And compile the body
- closureCodeBody False bndr closure_info cc (nonVoidIds args)
- (length args) body fv_details
-
- -- BUILD THE OBJECT
--- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
- ; let use_cc = cccsExpr; blame_cc = cccsExpr
- ; emit (mkComment $ mkFastString "calling allocDynClosure")
- ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off)
- ; let info_tbl = mkCmmInfo closure_info bndr currentCCS
- ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info use_cc blame_cc
- (map toVarArg fv_details)
-
- -- RETURN
- ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
-
--------------------------
-cgRhsStdThunk
- :: Id
- -> LambdaFormInfo
- -> [StgArg] -- payload
- -> FCode (CgIdInfo, FCode CmmAGraph)
-
-cgRhsStdThunk bndr lf_info payload
- = do { (id_info, reg) <- rhsIdInfo bndr lf_info
- ; return (id_info, gen_code reg)
- }
- where
- gen_code reg -- AHA! A STANDARD-FORM THUNK
- = withNewTickyCounterStdThunk (lfUpdatable lf_info) (idName bndr) $
- do
- { -- LAY OUT THE OBJECT
- mod_name <- getModuleName
- ; dflags <- getDynFlags
- ; let header = if isLFThunk lf_info then ThunkHeader else StdHeader
- (tot_wds, ptr_wds, payload_w_offsets)
- = mkVirtHeapOffsets dflags header
- (addArgReps (nonVoidStgArgs payload))
-
- descr = closureDescription dflags mod_name (idName bndr)
- closure_info = mkClosureInfo dflags False -- Not static
- bndr lf_info tot_wds ptr_wds
- descr
-
--- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body
- ; let use_cc = cccsExpr; blame_cc = cccsExpr
-
-
- -- BUILD THE OBJECT
- ; let info_tbl = mkCmmInfo closure_info bndr currentCCS
- ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info
- use_cc blame_cc payload_w_offsets
-
- -- RETURN
- ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
-
-
-mkClosureLFInfo :: DynFlags
- -> Id -- The binder
- -> TopLevelFlag -- True of top level
- -> [NonVoid Id] -- Free vars
- -> UpdateFlag -- Update flag
- -> [Id] -- Args
- -> LambdaFormInfo
-mkClosureLFInfo dflags bndr top fvs upd_flag args
- | null args =
- mkLFThunk (idType bndr) top (map fromNonVoid fvs) upd_flag
- | otherwise =
- mkLFReEntrant top (map fromNonVoid fvs) args (mkArgDescr dflags args)
-
-
-------------------------------------------------------------------------
--- The code for closures
-------------------------------------------------------------------------
-
-closureCodeBody :: Bool -- whether this is a top-level binding
- -> Id -- the closure's name
- -> ClosureInfo -- Lots of information about this closure
- -> CostCentreStack -- Optional cost centre attached to closure
- -> [NonVoid Id] -- incoming args to the closure
- -> Int -- arity, including void args
- -> CgStgExpr
- -> [(NonVoid Id, ByteOff)] -- the closure's free vars
- -> FCode ()
-
-{- There are two main cases for the code for closures.
-
-* If there are *no arguments*, then the closure is a thunk, and not in
- normal form. So it should set up an update frame (if it is
- shared). NB: Thunks cannot have a primitive type!
-
-* If there is *at least one* argument, then this closure is in
- normal form, so there is no need to set up an update frame.
--}
-
-closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details
- | arity == 0 -- No args i.e. thunk
- = withNewTickyCounterThunk
- (isStaticClosure cl_info)
- (closureUpdReqd cl_info)
- (closureName cl_info) $
- emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
- \(_, node, _) -> thunkCode cl_info fv_details cc node arity body
- where
- lf_info = closureLFInfo cl_info
- info_tbl = mkCmmInfo cl_info bndr cc
-
-closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
- = -- Note: args may be [], if all args are Void
- withNewTickyCounterFun
- (closureSingleEntry cl_info)
- (closureName cl_info)
- args $ do {
-
- ; let
- lf_info = closureLFInfo cl_info
- info_tbl = mkCmmInfo cl_info bndr cc
-
- -- Emit the main entry code
- ; emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args $
- \(_offset, node, arg_regs) -> do
- -- Emit slow-entry code (for entering a closure through a PAP)
- { mkSlowEntryCode bndr cl_info arg_regs
- ; dflags <- getDynFlags
- ; let node_points = nodeMustPointToIt dflags lf_info
- node' = if node_points then Just node else Nothing
- ; loop_header_id <- newBlockId
- -- Extend reader monad with information that
- -- self-recursive tail calls can be optimized into local
- -- jumps. See Note [Self-recursive tail calls] in StgCmmExpr.
- ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do
- {
- -- Main payload
- ; entryHeapCheck cl_info node' arity arg_regs $ do
- { -- emit LDV code when profiling
- when node_points (ldvEnterClosure cl_info (CmmLocal node))
- -- ticky after heap check to avoid double counting
- ; tickyEnterFun cl_info
- ; enterCostCentreFun cc
- (CmmMachOp (mo_wordSub dflags)
- [ CmmReg (CmmLocal node) -- See [NodeReg clobbered with loopification]
- , mkIntExpr dflags (funTag dflags cl_info) ])
- ; fv_bindings <- mapM bind_fv fv_details
- -- Load free vars out of closure *after*
- -- heap check, to reduce live vars over check
- ; when node_points $ load_fvs node lf_info fv_bindings
- ; void $ cgExpr body
- }}}
-
- }
-
--- Note [NodeReg clobbered with loopification]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Previously we used to pass nodeReg (aka R1) here. With profiling, upon
--- entering a closure, enterFunCCS was called with R1 passed to it. But since R1
--- may get clobbered inside the body of a closure, and since a self-recursive
--- tail call does not restore R1, a subsequent call to enterFunCCS received a
--- possibly bogus value from R1. The solution is to not pass nodeReg (aka R1) to
--- enterFunCCS. Instead, we pass node, the callee-saved temporary that stores
--- the original value of R1. This way R1 may get modified but loopification will
--- not care.
-
--- A function closure pointer may be tagged, so we
--- must take it into account when accessing the free variables.
-bind_fv :: (NonVoid Id, ByteOff) -> FCode (LocalReg, ByteOff)
-bind_fv (id, off) = do { reg <- rebindToReg id; return (reg, off) }
-
-load_fvs :: LocalReg -> LambdaFormInfo -> [(LocalReg, ByteOff)] -> FCode ()
-load_fvs node lf_info = mapM_ (\ (reg, off) ->
- do dflags <- getDynFlags
- let tag = lfDynTag dflags lf_info
- emit $ mkTaggedObjectLoad dflags reg node off tag)
-
------------------------------------------
--- The "slow entry" code for a function. This entry point takes its
--- arguments on the stack. It loads the arguments into registers
--- according to the calling convention, and jumps to the function's
--- normal entry point. The function's closure is assumed to be in
--- R1/node.
---
--- The slow entry point is used for unknown calls: eg. stg_PAP_entry
-
-mkSlowEntryCode :: Id -> ClosureInfo -> [LocalReg] -> FCode ()
--- If this function doesn't have a specialised ArgDescr, we need
--- to generate the function's arg bitmap and slow-entry code.
--- Here, we emit the slow-entry code.
-mkSlowEntryCode bndr cl_info arg_regs -- function closure is already in `Node'
- | Just (_, ArgGen _) <- closureFunInfo cl_info
- = do dflags <- getDynFlags
- let node = idToReg dflags (NonVoid bndr)
- slow_lbl = closureSlowEntryLabel cl_info
- fast_lbl = closureLocalEntryLabel dflags cl_info
- -- mkDirectJump does not clobber `Node' containing function closure
- jump = mkJump dflags NativeNodeCall
- (mkLblExpr fast_lbl)
- (map (CmmReg . CmmLocal) (node : arg_regs))
- (initUpdFrameOff dflags)
- tscope <- getTickScope
- emitProcWithConvention Slow Nothing slow_lbl
- (node : arg_regs) (jump, tscope)
- | otherwise = return ()
-
------------------------------------------
-thunkCode :: ClosureInfo -> [(NonVoid Id, ByteOff)] -> CostCentreStack
- -> LocalReg -> Int -> CgStgExpr -> FCode ()
-thunkCode cl_info fv_details _cc node arity body
- = do { dflags <- getDynFlags
- ; let node_points = nodeMustPointToIt dflags (closureLFInfo cl_info)
- node' = if node_points then Just node else Nothing
- ; ldvEnterClosure cl_info (CmmLocal node) -- NB: Node always points when profiling
-
- -- Heap overflow check
- ; entryHeapCheck cl_info node' arity [] $ do
- { -- Overwrite with black hole if necessary
- -- but *after* the heap-overflow check
- ; tickyEnterThunk cl_info
- ; when (blackHoleOnEntry cl_info && node_points)
- (blackHoleIt node)
-
- -- Push update frame
- ; setupUpdate cl_info node $
- -- We only enter cc after setting up update so
- -- that cc of enclosing scope will be recorded
- -- in update frame CAF/DICT functions will be
- -- subsumed by this enclosing cc
- do { enterCostCentreThunk (CmmReg nodeReg)
- ; let lf_info = closureLFInfo cl_info
- ; fv_bindings <- mapM bind_fv fv_details
- ; load_fvs node lf_info fv_bindings
- ; void $ cgExpr body }}}
-
-
-------------------------------------------------------------------------
--- Update and black-hole wrappers
-------------------------------------------------------------------------
-
-blackHoleIt :: LocalReg -> FCode ()
--- Only called for closures with no args
--- Node points to the closure
-blackHoleIt node_reg
- = emitBlackHoleCode (CmmReg (CmmLocal node_reg))
-
-emitBlackHoleCode :: CmmExpr -> FCode ()
-emitBlackHoleCode node = do
- dflags <- getDynFlags
-
- -- Eager blackholing is normally disabled, but can be turned on with
- -- -feager-blackholing. When it is on, we replace the info pointer
- -- of the thunk with stg_EAGER_BLACKHOLE_info on entry.
-
- -- If we wanted to do eager blackholing with slop filling, we'd need
- -- to do it at the *end* of a basic block, otherwise we overwrite
- -- the free variables in the thunk that we still need. We have a
- -- patch for this from Andy Cheadle, but not incorporated yet. --SDM
- -- [6/2004]
- --
- -- Previously, eager blackholing was enabled when ticky-ticky was
- -- on. But it didn't work, and it wasn't strictly necessary to bring
- -- back minimal ticky-ticky, so now EAGER_BLACKHOLING is
- -- unconditionally disabled. -- krc 1/2007
-
- -- Note the eager-blackholing check is here rather than in blackHoleOnEntry,
- -- because emitBlackHoleCode is called from CmmParse.
-
- let eager_blackholing = not (gopt Opt_SccProfilingOn dflags)
- && gopt Opt_EagerBlackHoling dflags
- -- Profiling needs slop filling (to support LDV
- -- profiling), so currently eager blackholing doesn't
- -- work with profiling.
-
- when eager_blackholing $ do
- emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr
- -- See Note [Heap memory barriers] in SMP.h.
- emitPrimCall [] MO_WriteBarrier []
- emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo))
-
-setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()
- -- Nota Bene: this function does not change Node (even if it's a CAF),
- -- so that the cost centre in the original closure can still be
- -- extracted by a subsequent enterCostCentre
-setupUpdate closure_info node body
- | not (lfUpdatable (closureLFInfo closure_info))
- = body
-
- | not (isStaticClosure closure_info)
- = if not (closureUpdReqd closure_info)
- then do tickyUpdateFrameOmitted; body
- else do
- tickyPushUpdateFrame
- dflags <- getDynFlags
- let
- bh = blackHoleOnEntry closure_info &&
- not (gopt Opt_SccProfilingOn dflags) &&
- gopt Opt_EagerBlackHoling dflags
-
- lbl | bh = mkBHUpdInfoLabel
- | otherwise = mkUpdInfoLabel
-
- pushUpdateFrame lbl (CmmReg (CmmLocal node)) body
-
- | otherwise -- A static closure
- = do { tickyUpdateBhCaf closure_info
-
- ; if closureUpdReqd closure_info
- then do -- Blackhole the (updatable) CAF:
- { upd_closure <- link_caf node
- ; pushUpdateFrame mkBHUpdInfoLabel upd_closure body }
- else do {tickyUpdateFrameOmitted; body}
- }
-
------------------------------------------------------------------------------
--- Setting up update frames
-
--- Push the update frame on the stack in the Entry area,
--- leaving room for the return address that is already
--- at the old end of the area.
---
-pushUpdateFrame :: CLabel -> CmmExpr -> FCode () -> FCode ()
-pushUpdateFrame lbl updatee body
- = do
- updfr <- getUpdFrameOff
- dflags <- getDynFlags
- let
- hdr = fixedHdrSize dflags
- frame = updfr + hdr + sIZEOF_StgUpdateFrame_NoHdr dflags
- --
- emitUpdateFrame dflags (CmmStackSlot Old frame) lbl updatee
- withUpdFrameOff frame body
-
-emitUpdateFrame :: DynFlags -> CmmExpr -> CLabel -> CmmExpr -> FCode ()
-emitUpdateFrame dflags frame lbl updatee = do
- let
- hdr = fixedHdrSize dflags
- off_updatee = hdr + oFFSET_StgUpdateFrame_updatee dflags
- --
- emitStore frame (mkLblExpr lbl)
- emitStore (cmmOffset dflags frame off_updatee) updatee
- initUpdFrameProf frame
-
------------------------------------------------------------------------------
--- Entering a CAF
---
--- See Note [CAF management] in rts/sm/Storage.c
-
-link_caf :: LocalReg -- pointer to the closure
- -> FCode CmmExpr -- Returns amode for closure to be updated
--- This function returns the address of the black hole, so it can be
--- updated with the new value when available.
-link_caf node = do
- { dflags <- getDynFlags
- -- Call the RTS function newCAF, returning the newly-allocated
- -- blackhole indirection closure
- ; let newCAF_lbl = mkForeignLabel (fsLit "newCAF") Nothing
- ForeignLabelInExternalPackage IsFunction
- ; bh <- newTemp (bWord dflags)
- ; emitRtsCallGen [(bh,AddrHint)] newCAF_lbl
- [ (baseExpr, AddrHint),
- (CmmReg (CmmLocal node), AddrHint) ]
- False
-
- -- see Note [atomic CAF entry] in rts/sm/Storage.c
- ; updfr <- getUpdFrameOff
- ; let target = entryCode dflags (closureInfoPtr dflags (CmmReg (CmmLocal node)))
- ; emit =<< mkCmmIfThen
- (cmmEqWord dflags (CmmReg (CmmLocal bh)) (zeroExpr dflags))
- -- re-enter the CAF
- (mkJump dflags NativeNodeCall target [] updfr)
-
- ; return (CmmReg (CmmLocal bh)) }
-
-------------------------------------------------------------------------
--- Profiling
-------------------------------------------------------------------------
-
--- For "global" data constructors the description is simply occurrence
--- name of the data constructor itself. Otherwise it is determined by
--- @closureDescription@ from the let binding information.
-
-closureDescription :: DynFlags
- -> Module -- Module
- -> Name -- Id of closure binding
- -> String
- -- Not called for StgRhsCon which have global info tables built in
- -- CgConTbls.hs with a description generated from the data constructor
-closureDescription dflags mod_name name
- = showSDocDump dflags (char '<' <>
- (if isExternalName name
- then ppr name -- ppr will include the module name prefix
- else pprModule mod_name <> char '.' <> ppr name) <>
- char '>')
- -- showSDocDump, because we want to see the unique on the Name.
diff --git a/compiler/codeGen/StgCmmBind.hs-boot b/compiler/codeGen/StgCmmBind.hs-boot
deleted file mode 100644
index 8e3dd38ad8..0000000000
--- a/compiler/codeGen/StgCmmBind.hs-boot
+++ /dev/null
@@ -1,6 +0,0 @@
-module StgCmmBind where
-
-import StgCmmMonad( FCode )
-import StgSyn( CgStgBinding )
-
-cgBind :: CgStgBinding -> FCode ()
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
deleted file mode 100644
index ac8db1268f..0000000000
--- a/compiler/codeGen/StgCmmClosure.hs
+++ /dev/null
@@ -1,1008 +0,0 @@
-{-# LANGUAGE CPP, RecordWildCards #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation:
---
--- The types LambdaFormInfo
--- ClosureInfo
---
--- Nothing monadic in here!
---
------------------------------------------------------------------------------
-
-module StgCmmClosure (
- DynTag, tagForCon, isSmallFamily,
-
- idPrimRep, isVoidRep, isGcPtrRep, addIdReps, addArgReps,
- argPrimRep,
-
- NonVoid(..), fromNonVoid, nonVoidIds, nonVoidStgArgs,
- assertNonVoidIds, assertNonVoidStgArgs,
-
- -- * LambdaFormInfo
- LambdaFormInfo, -- Abstract
- StandardFormInfo, -- ...ditto...
- mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
- mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
- mkLFStringLit,
- lfDynTag,
- isLFThunk, isLFReEntrant, lfUpdatable,
-
- -- * Used by other modules
- CgLoc(..), SelfLoopInfo, CallMethod(..),
- nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod,
-
- -- * ClosureInfo
- ClosureInfo,
- mkClosureInfo,
- mkCmmInfo,
-
- -- ** Inspection
- closureLFInfo, closureName,
-
- -- ** Labels
- -- These just need the info table label
- closureInfoLabel, staticClosureLabel,
- closureSlowEntryLabel, closureLocalEntryLabel,
-
- -- ** Predicates
- -- These are really just functions on LambdaFormInfo
- closureUpdReqd, closureSingleEntry,
- closureReEntrant, closureFunInfo,
- isToplevClosure,
-
- blackHoleOnEntry, -- Needs LambdaFormInfo and SMRep
- isStaticClosure, -- Needs SMPre
-
- -- * InfoTables
- mkDataConInfoTable,
- cafBlackHoleInfoTable,
- indStaticInfoTable,
- staticClosureNeedsLink,
- ) where
-
-#include "../includes/MachDeps.h"
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import StgSyn
-import SMRep
-import Cmm
-import PprCmmExpr() -- For Outputable instances
-
-import CostCentre
-import BlockId
-import CLabel
-import Id
-import IdInfo
-import DataCon
-import Name
-import Type
-import TyCoRep
-import TcType
-import TyCon
-import RepType
-import BasicTypes
-import Outputable
-import DynFlags
-import Util
-
-import Data.Coerce (coerce)
-import qualified Data.ByteString.Char8 as BS8
-
------------------------------------------------------------------------------
--- Data types and synonyms
------------------------------------------------------------------------------
-
--- These data types are mostly used by other modules, especially StgCmmMonad,
--- but we define them here because some functions in this module need to
--- have access to them as well
-
-data CgLoc
- = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning
- -- Hp, so that it remains valid across calls
-
- | LneLoc BlockId [LocalReg] -- A join point
- -- A join point (= let-no-escape) should only
- -- be tail-called, and in a saturated way.
- -- To tail-call it, assign to these locals,
- -- and branch to the block id
-
-instance Outputable CgLoc where
- ppr (CmmLoc e) = text "cmm" <+> ppr e
- ppr (LneLoc b rs) = text "lne" <+> ppr b <+> ppr rs
-
-type SelfLoopInfo = (Id, BlockId, [LocalReg])
-
--- used by ticky profiling
-isKnownFun :: LambdaFormInfo -> Bool
-isKnownFun LFReEntrant{} = True
-isKnownFun LFLetNoEscape = True
-isKnownFun _ = False
-
-
--------------------------------------
--- Non-void types
--------------------------------------
--- We frequently need the invariant that an Id or a an argument
--- is of a non-void type. This type is a witness to the invariant.
-
-newtype NonVoid a = NonVoid a
- deriving (Eq, Show)
-
-fromNonVoid :: NonVoid a -> a
-fromNonVoid (NonVoid a) = a
-
-instance (Outputable a) => Outputable (NonVoid a) where
- ppr (NonVoid a) = ppr a
-
-nonVoidIds :: [Id] -> [NonVoid Id]
-nonVoidIds ids = [NonVoid id | id <- ids, not (isVoidTy (idType id))]
-
--- | Used in places where some invariant ensures that all these Ids are
--- non-void; e.g. constructor field binders in case expressions.
--- See Note [Post-unarisation invariants] in UnariseStg.
-assertNonVoidIds :: [Id] -> [NonVoid Id]
-assertNonVoidIds ids = ASSERT(not (any (isVoidTy . idType) ids))
- coerce ids
-
-nonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
-nonVoidStgArgs args = [NonVoid arg | arg <- args, not (isVoidTy (stgArgType arg))]
-
--- | Used in places where some invariant ensures that all these arguments are
--- non-void; e.g. constructor arguments.
--- See Note [Post-unarisation invariants] in UnariseStg.
-assertNonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
-assertNonVoidStgArgs args = ASSERT(not (any (isVoidTy . stgArgType) args))
- coerce args
-
-
------------------------------------------------------------------------------
--- Representations
------------------------------------------------------------------------------
-
--- Why are these here?
-
--- | Assumes that there is precisely one 'PrimRep' of the type. This assumption
--- holds after unarise.
--- See Note [Post-unarisation invariants]
-idPrimRep :: Id -> PrimRep
-idPrimRep id = typePrimRep1 (idType id)
- -- See also Note [VoidRep] in RepType
-
--- | Assumes that Ids have one PrimRep, which holds after unarisation.
--- See Note [Post-unarisation invariants]
-addIdReps :: [NonVoid Id] -> [NonVoid (PrimRep, Id)]
-addIdReps = map (\id -> let id' = fromNonVoid id
- in NonVoid (idPrimRep id', id'))
-
--- | Assumes that arguments have one PrimRep, which holds after unarisation.
--- See Note [Post-unarisation invariants]
-addArgReps :: [NonVoid StgArg] -> [NonVoid (PrimRep, StgArg)]
-addArgReps = map (\arg -> let arg' = fromNonVoid arg
- in NonVoid (argPrimRep arg', arg'))
-
--- | Assumes that the argument has one PrimRep, which holds after unarisation.
--- See Note [Post-unarisation invariants]
-argPrimRep :: StgArg -> PrimRep
-argPrimRep arg = typePrimRep1 (stgArgType arg)
-
-
------------------------------------------------------------------------------
--- LambdaFormInfo
------------------------------------------------------------------------------
-
--- Information about an identifier, from the code generator's point of
--- view. Every identifier is bound to a LambdaFormInfo in the
--- environment, which gives the code generator enough info to be able to
--- tail call or return that identifier.
-
-data LambdaFormInfo
- = LFReEntrant -- Reentrant closure (a function)
- TopLevelFlag -- True if top level
- OneShotInfo
- !RepArity -- Arity. Invariant: always > 0
- !Bool -- True <=> no fvs
- ArgDescr -- Argument descriptor (should really be in ClosureInfo)
-
- | LFThunk -- Thunk (zero arity)
- TopLevelFlag
- !Bool -- True <=> no free vars
- !Bool -- True <=> updatable (i.e., *not* single-entry)
- StandardFormInfo
- !Bool -- True <=> *might* be a function type
-
- | LFCon -- A saturated constructor application
- DataCon -- The constructor
-
- | LFUnknown -- Used for function arguments and imported things.
- -- We know nothing about this closure.
- -- Treat like updatable "LFThunk"...
- -- Imported things which we *do* know something about use
- -- one of the other LF constructors (eg LFReEntrant for
- -- known functions)
- !Bool -- True <=> *might* be a function type
- -- The False case is good when we want to enter it,
- -- because then we know the entry code will do
- -- For a function, the entry code is the fast entry point
-
- | LFUnlifted -- A value of unboxed type;
- -- always a value, needs evaluation
-
- | LFLetNoEscape -- See LetNoEscape module for precise description
-
-
--------------------------
--- StandardFormInfo tells whether this thunk has one of
--- a small number of standard forms
-
-data StandardFormInfo
- = NonStandardThunk
- -- The usual case: not of the standard forms
-
- | SelectorThunk
- -- A SelectorThunk is of form
- -- case x of
- -- con a1,..,an -> ak
- -- and the constructor is from a single-constr type.
- WordOff -- 0-origin offset of ak within the "goods" of
- -- constructor (Recall that the a1,...,an may be laid
- -- out in the heap in a non-obvious order.)
-
- | ApThunk
- -- An ApThunk is of form
- -- x1 ... xn
- -- The code for the thunk just pushes x2..xn on the stack and enters x1.
- -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled
- -- in the RTS to save space.
- RepArity -- Arity, n
-
-
-------------------------------------------------------
--- Building LambdaFormInfo
-------------------------------------------------------
-
-mkLFArgument :: Id -> LambdaFormInfo
-mkLFArgument id
- | isUnliftedType ty = LFUnlifted
- | might_be_a_function ty = LFUnknown True
- | otherwise = LFUnknown False
- where
- ty = idType id
-
--------------
-mkLFLetNoEscape :: LambdaFormInfo
-mkLFLetNoEscape = LFLetNoEscape
-
--------------
-mkLFReEntrant :: TopLevelFlag -- True of top level
- -> [Id] -- Free vars
- -> [Id] -- Args
- -> ArgDescr -- Argument descriptor
- -> LambdaFormInfo
-
-mkLFReEntrant _ _ [] _
- = pprPanic "mkLFReEntrant" empty
-mkLFReEntrant top fvs args arg_descr
- = LFReEntrant top os_info (length args) (null fvs) arg_descr
- where os_info = idOneShotInfo (head args)
-
--------------
-mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo
-mkLFThunk thunk_ty top fvs upd_flag
- = ASSERT( not (isUpdatable upd_flag) || not (isUnliftedType thunk_ty) )
- LFThunk top (null fvs)
- (isUpdatable upd_flag)
- NonStandardThunk
- (might_be_a_function thunk_ty)
-
---------------
-might_be_a_function :: Type -> Bool
--- Return False only if we are *sure* it's a data type
--- Look through newtypes etc as much as poss
-might_be_a_function ty
- | [LiftedRep] <- typePrimRep ty
- , Just tc <- tyConAppTyCon_maybe (unwrapType ty)
- , isDataTyCon tc
- = False
- | otherwise
- = True
-
--------------
-mkConLFInfo :: DataCon -> LambdaFormInfo
-mkConLFInfo con = LFCon con
-
--------------
-mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo
-mkSelectorLFInfo id offset updatable
- = LFThunk NotTopLevel False updatable (SelectorThunk offset)
- (might_be_a_function (idType id))
-
--------------
-mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo
-mkApLFInfo id upd_flag arity
- = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity)
- (might_be_a_function (idType id))
-
--------------
-mkLFImported :: Id -> LambdaFormInfo
-mkLFImported id
- | Just con <- isDataConWorkId_maybe id
- , isNullaryRepDataCon con
- = LFCon con -- An imported nullary constructor
- -- We assume that the constructor is evaluated so that
- -- the id really does point directly to the constructor
-
- | arity > 0
- = LFReEntrant TopLevel noOneShotInfo arity True (panic "arg_descr")
-
- | otherwise
- = mkLFArgument id -- Not sure of exact arity
- where
- arity = idFunRepArity id
-
--------------
-mkLFStringLit :: LambdaFormInfo
-mkLFStringLit = LFUnlifted
-
------------------------------------------------------
--- Dynamic pointer tagging
------------------------------------------------------
-
-type DynTag = Int -- The tag on a *pointer*
- -- (from the dynamic-tagging paper)
-
--- Note [Data constructor dynamic tags]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- The family size of a data type (the number of constructors
--- or the arity of a function) can be either:
--- * small, if the family size < 2**tag_bits
--- * big, otherwise.
---
--- Small families can have the constructor tag in the tag bits.
--- Big families only use the tag value 1 to represent evaluatedness.
--- We don't have very many tag bits: for example, we have 2 bits on
--- x86-32 and 3 bits on x86-64.
-
-isSmallFamily :: DynFlags -> Int -> Bool
-isSmallFamily dflags fam_size = fam_size <= mAX_PTR_TAG dflags
-
-tagForCon :: DynFlags -> DataCon -> DynTag
-tagForCon dflags con
- | isSmallFamily dflags fam_size = con_tag
- | otherwise = 1
- where
- con_tag = dataConTag con -- NB: 1-indexed
- fam_size = tyConFamilySize (dataConTyCon con)
-
-tagForArity :: DynFlags -> RepArity -> DynTag
-tagForArity dflags arity
- | isSmallFamily dflags arity = arity
- | otherwise = 0
-
-lfDynTag :: DynFlags -> LambdaFormInfo -> DynTag
--- Return the tag in the low order bits of a variable bound
--- to this LambdaForm
-lfDynTag dflags (LFCon con) = tagForCon dflags con
-lfDynTag dflags (LFReEntrant _ _ arity _ _) = tagForArity dflags arity
-lfDynTag _ _other = 0
-
-
------------------------------------------------------------------------------
--- Observing LambdaFormInfo
------------------------------------------------------------------------------
-
-------------
-isLFThunk :: LambdaFormInfo -> Bool
-isLFThunk (LFThunk {}) = True
-isLFThunk _ = False
-
-isLFReEntrant :: LambdaFormInfo -> Bool
-isLFReEntrant (LFReEntrant {}) = True
-isLFReEntrant _ = False
-
------------------------------------------------------------------------------
--- Choosing SM reps
------------------------------------------------------------------------------
-
-lfClosureType :: LambdaFormInfo -> ClosureTypeInfo
-lfClosureType (LFReEntrant _ _ arity _ argd) = Fun arity argd
-lfClosureType (LFCon con) = Constr (dataConTagZ con)
- (dataConIdentity con)
-lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel
-lfClosureType _ = panic "lfClosureType"
-
-thunkClosureType :: StandardFormInfo -> ClosureTypeInfo
-thunkClosureType (SelectorThunk off) = ThunkSelector off
-thunkClosureType _ = Thunk
-
--- We *do* get non-updatable top-level thunks sometimes. eg. f = g
--- gets compiled to a jump to g (if g has non-zero arity), instead of
--- messing around with update frames and PAPs. We set the closure type
--- to FUN_STATIC in this case.
-
------------------------------------------------------------------------------
--- nodeMustPointToIt
------------------------------------------------------------------------------
-
-nodeMustPointToIt :: DynFlags -> LambdaFormInfo -> Bool
--- If nodeMustPointToIt is true, then the entry convention for
--- this closure has R1 (the "Node" register) pointing to the
--- closure itself --- the "self" argument
-
-nodeMustPointToIt _ (LFReEntrant top _ _ no_fvs _)
- = not no_fvs -- Certainly if it has fvs we need to point to it
- || isNotTopLevel top -- See Note [GC recovery]
- -- For lex_profiling we also access the cost centre for a
- -- non-inherited (i.e. non-top-level) function.
- -- The isNotTopLevel test above ensures this is ok.
-
-nodeMustPointToIt dflags (LFThunk top no_fvs updatable NonStandardThunk _)
- = not no_fvs -- Self parameter
- || isNotTopLevel top -- Note [GC recovery]
- || updatable -- Need to push update frame
- || gopt Opt_SccProfilingOn dflags
- -- For the non-updatable (single-entry case):
- --
- -- True if has fvs (in which case we need access to them, and we
- -- should black-hole it)
- -- or profiling (in which case we need to recover the cost centre
- -- from inside it) ToDo: do we need this even for
- -- top-level thunks? If not,
- -- isNotTopLevel subsumes this
-
-nodeMustPointToIt _ (LFThunk {}) -- Node must point to a standard-form thunk
- = True
-
-nodeMustPointToIt _ (LFCon _) = True
-
- -- Strictly speaking, the above two don't need Node to point
- -- to it if the arity = 0. But this is a *really* unlikely
- -- situation. If we know it's nil (say) and we are entering
- -- it. Eg: let x = [] in x then we will certainly have inlined
- -- x, since nil is a simple atom. So we gain little by not
- -- having Node point to known zero-arity things. On the other
- -- hand, we do lose something; Patrick's code for figuring out
- -- when something has been updated but not entered relies on
- -- having Node point to the result of an update. SLPJ
- -- 27/11/92.
-
-nodeMustPointToIt _ (LFUnknown _) = True
-nodeMustPointToIt _ LFUnlifted = False
-nodeMustPointToIt _ LFLetNoEscape = False
-
-{- Note [GC recovery]
-~~~~~~~~~~~~~~~~~~~~~
-If we a have a local let-binding (function or thunk)
- let f = <body> in ...
-AND <body> allocates, then the heap-overflow check needs to know how
-to re-start the evaluation. It uses the "self" pointer to do this.
-So even if there are no free variables in <body>, we still make
-nodeMustPointToIt be True for non-top-level bindings.
-
-Why do any such bindings exist? After all, let-floating should have
-floated them out. Well, a clever optimiser might leave one there to
-avoid a space leak, deliberately recomputing a thunk. Also (and this
-really does happen occasionally) let-floating may make a function f smaller
-so it can be inlined, so now (f True) may generate a local no-fv closure.
-This actually happened during bootstrapping GHC itself, with f=mkRdrFunBind
-in TcGenDeriv.) -}
-
------------------------------------------------------------------------------
--- getCallMethod
------------------------------------------------------------------------------
-
-{- The entry conventions depend on the type of closure being entered,
-whether or not it has free variables, and whether we're running
-sequentially or in parallel.
-
-Closure Node Argument Enter
-Characteristics Par Req'd Passing Via
----------------------------------------------------------------------------
-Unknown & no & yes & stack & node
-Known fun (>1 arg), no fvs & no & no & registers & fast entry (enough args)
- & slow entry (otherwise)
-Known fun (>1 arg), fvs & no & yes & registers & fast entry (enough args)
-0 arg, no fvs \r,\s & no & no & n/a & direct entry
-0 arg, no fvs \u & no & yes & n/a & node
-0 arg, fvs \r,\s,selector & no & yes & n/a & node
-0 arg, fvs \r,\s & no & yes & n/a & direct entry
-0 arg, fvs \u & no & yes & n/a & node
-Unknown & yes & yes & stack & node
-Known fun (>1 arg), no fvs & yes & no & registers & fast entry (enough args)
- & slow entry (otherwise)
-Known fun (>1 arg), fvs & yes & yes & registers & node
-0 arg, fvs \r,\s,selector & yes & yes & n/a & node
-0 arg, no fvs \r,\s & yes & no & n/a & direct entry
-0 arg, no fvs \u & yes & yes & n/a & node
-0 arg, fvs \r,\s & yes & yes & n/a & node
-0 arg, fvs \u & yes & yes & n/a & node
-
-When black-holing, single-entry closures could also be entered via node
-(rather than directly) to catch double-entry. -}
-
-data CallMethod
- = EnterIt -- No args, not a function
-
- | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
-
- | ReturnIt -- It's a value (function, unboxed value,
- -- or constructor), so just return it.
-
- | SlowCall -- Unknown fun, or known fun with
- -- too few args.
-
- | DirectEntry -- Jump directly, with args in regs
- CLabel -- The code label
- RepArity -- Its arity
-
-getCallMethod :: DynFlags
- -> Name -- Function being applied
- -> Id -- Function Id used to chech if it can refer to
- -- CAF's and whether the function is tail-calling
- -- itself
- -> LambdaFormInfo -- Its info
- -> RepArity -- Number of available arguments
- -> RepArity -- Number of them being void arguments
- -> CgLoc -- Passed in from cgIdApp so that we can
- -- handle let-no-escape bindings and self-recursive
- -- tail calls using the same data constructor,
- -- JumpToIt. This saves us one case branch in
- -- cgIdApp
- -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call?
- -> CallMethod
-
-getCallMethod dflags _ id _ n_args v_args _cg_loc
- (Just (self_loop_id, block_id, args))
- | gopt Opt_Loopification dflags
- , id == self_loop_id
- , args `lengthIs` (n_args - v_args)
- -- If these patterns match then we know that:
- -- * loopification optimisation is turned on
- -- * function is performing a self-recursive call in a tail position
- -- * number of non-void parameters of the function matches functions arity.
- -- See Note [Self-recursive tail calls] and Note [Void arguments in
- -- self-recursive tail calls] in StgCmmExpr for more details
- = JumpToIt block_id args
-
-getCallMethod dflags name id (LFReEntrant _ _ arity _ _) n_args _v_args _cg_loc
- _self_loop_info
- | n_args == 0 -- No args at all
- && not (gopt Opt_SccProfilingOn dflags)
- -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
- = ASSERT( arity /= 0 ) ReturnIt
- | n_args < arity = SlowCall -- Not enough args
- | otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity
-
-getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info
- = ASSERT( n_args == 0 ) ReturnIt
-
-getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
- = ASSERT( n_args == 0 ) ReturnIt
- -- n_args=0 because it'd be ill-typed to apply a saturated
- -- constructor application to anything
-
-getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun)
- n_args _v_args _cg_loc _self_loop_info
- | is_fun -- it *might* be a function, so we must "call" it (which is always safe)
- = SlowCall -- We cannot just enter it [in eval/apply, the entry code
- -- is the fast-entry code]
-
- -- Since is_fun is False, we are *definitely* looking at a data value
- | updatable || gopt Opt_Ticky dflags -- to catch double entry
- {- OLD: || opt_SMP
- I decided to remove this, because in SMP mode it doesn't matter
- if we enter the same thunk multiple times, so the optimisation
- of jumping directly to the entry code is still valid. --SDM
- -}
- = EnterIt
-
- -- even a non-updatable selector thunk can be updated by the garbage
- -- collector, so we must enter it. (#8817)
- | SelectorThunk{} <- std_form_info
- = EnterIt
-
- -- We used to have ASSERT( n_args == 0 ), but actually it is
- -- possible for the optimiser to generate
- -- let bot :: Int = error Int "urk"
- -- in (bot `cast` unsafeCoerce Int (Int -> Int)) 3
- -- This happens as a result of the case-of-error transformation
- -- So the right thing to do is just to enter the thing
-
- | otherwise -- Jump direct to code for single-entry thunks
- = ASSERT( n_args == 0 )
- DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info
- updatable) 0
-
-getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info
- = SlowCall -- might be a function
-
-getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
- = ASSERT2( n_args == 0, ppr name <+> ppr n_args )
- EnterIt -- Not a function
-
-getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs)
- _self_loop_info
- = JumpToIt blk_id lne_regs
-
-getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method"
-
------------------------------------------------------------------------------
--- Data types for closure information
------------------------------------------------------------------------------
-
-
-{- ClosureInfo: information about a binding
-
- We make a ClosureInfo for each let binding (both top level and not),
- but not bindings for data constructors: for those we build a CmmInfoTable
- directly (see mkDataConInfoTable).
-
- To a first approximation:
- ClosureInfo = (LambdaFormInfo, CmmInfoTable)
-
- A ClosureInfo has enough information
- a) to construct the info table itself, and build other things
- related to the binding (e.g. slow entry points for a function)
- b) to allocate a closure containing that info pointer (i.e.
- it knows the info table label)
--}
-
-data ClosureInfo
- = ClosureInfo {
- closureName :: !Name, -- The thing bound to this closure
- -- we don't really need this field: it's only used in generating
- -- code for ticky and profiling, and we could pass the information
- -- around separately, but it doesn't do much harm to keep it here.
-
- closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon
- -- this tells us about what the closure contains: it's right-hand-side.
-
- -- the rest is just an unpacked CmmInfoTable.
- closureInfoLabel :: !CLabel,
- closureSMRep :: !SMRep, -- representation used by storage mgr
- closureProf :: !ProfilingInfo
- }
-
--- | Convert from 'ClosureInfo' to 'CmmInfoTable'.
-mkCmmInfo :: ClosureInfo -> Id -> CostCentreStack -> CmmInfoTable
-mkCmmInfo ClosureInfo {..} id ccs
- = CmmInfoTable { cit_lbl = closureInfoLabel
- , cit_rep = closureSMRep
- , cit_prof = closureProf
- , cit_srt = Nothing
- , cit_clo = if isStaticRep closureSMRep
- then Just (id,ccs)
- else Nothing }
-
---------------------------------------
--- Building ClosureInfos
---------------------------------------
-
-mkClosureInfo :: DynFlags
- -> Bool -- Is static
- -> Id
- -> LambdaFormInfo
- -> Int -> Int -- Total and pointer words
- -> String -- String descriptor
- -> ClosureInfo
-mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr
- = ClosureInfo { closureName = name
- , closureLFInfo = lf_info
- , closureInfoLabel = info_lbl -- These three fields are
- , closureSMRep = sm_rep -- (almost) an info table
- , closureProf = prof } -- (we don't have an SRT yet)
- where
- name = idName id
- sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info)
- prof = mkProfilingInfo dflags id val_descr
- nonptr_wds = tot_wds - ptr_wds
-
- info_lbl = mkClosureInfoTableLabel id lf_info
-
---------------------------------------
--- Other functions over ClosureInfo
---------------------------------------
-
--- Eager blackholing is normally disabled, but can be turned on with
--- -feager-blackholing. When it is on, we replace the info pointer of
--- the thunk with stg_EAGER_BLACKHOLE_info on entry.
-
--- If we wanted to do eager blackholing with slop filling,
--- we'd need to do it at the *end* of a basic block, otherwise
--- we overwrite the free variables in the thunk that we still
--- need. We have a patch for this from Andy Cheadle, but not
--- incorporated yet. --SDM [6/2004]
---
--- Previously, eager blackholing was enabled when ticky-ticky
--- was on. But it didn't work, and it wasn't strictly necessary
--- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING
--- is unconditionally disabled. -- krc 1/2007
---
--- Static closures are never themselves black-holed.
-
-blackHoleOnEntry :: ClosureInfo -> Bool
-blackHoleOnEntry cl_info
- | isStaticRep (closureSMRep cl_info)
- = False -- Never black-hole a static closure
-
- | otherwise
- = case closureLFInfo cl_info of
- LFReEntrant {} -> False
- LFLetNoEscape -> False
- LFThunk _ _no_fvs upd _ _ -> upd -- See Note [Black-holing non-updatable thunks]
- _other -> panic "blackHoleOnEntry"
-
-{- Note [Black-holing non-updatable thunks]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We must not black-hole non-updatable (single-entry) thunks otherwise
-we run into issues like #10414. Specifically:
-
- * There is no reason to black-hole a non-updatable thunk: it should
- not be competed for by multiple threads
-
- * It could, conceivably, cause a space leak if we don't black-hole
- it, if there was a live but never-followed pointer pointing to it.
- Let's hope that doesn't happen.
-
- * It is dangerous to black-hole a non-updatable thunk because
- - is not updated (of course)
- - hence, if it is black-holed and another thread tries to evaluate
- it, that thread will block forever
- This actually happened in #10414. So we do not black-hole
- non-updatable thunks.
-
- * How could two threads evaluate the same non-updatable (single-entry)
- thunk? See Reid Barton's example below.
-
- * Only eager blackholing could possibly black-hole a non-updatable
- thunk, because lazy black-holing only affects thunks with an
- update frame on the stack.
-
-Here is and example due to Reid Barton (#10414):
- x = \u [] concat [[1], []]
-with the following definitions,
-
- concat x = case x of
- [] -> []
- (:) x xs -> (++) x (concat xs)
-
- (++) xs ys = case xs of
- [] -> ys
- (:) x rest -> (:) x ((++) rest ys)
-
-Where we use the syntax @\u []@ to denote an updatable thunk and @\s []@ to
-denote a single-entry (i.e. non-updatable) thunk. After a thread evaluates @x@
-to WHNF and calls @(++)@ the heap will contain the following thunks,
-
- x = 1 : y
- y = \u [] (++) [] z
- z = \s [] concat []
-
-Now that the stage is set, consider the follow evaluations by two racing threads
-A and B,
-
- 1. Both threads enter @y@ before either is able to replace it with an
- indirection
-
- 2. Thread A does the case analysis in @(++)@ and consequently enters @z@,
- replacing it with a black-hole
-
- 3. At some later point thread B does the same case analysis and also attempts
- to enter @z@. However, it finds that it has been replaced with a black-hole
- so it blocks.
-
- 4. Thread A eventually finishes evaluating @z@ (to @[]@) and updates @y@
- accordingly. It does *not* update @z@, however, as it is single-entry. This
- leaves Thread B blocked forever on a black-hole which will never be
- updated.
-
-To avoid this sort of condition we never black-hole non-updatable thunks.
--}
-
-isStaticClosure :: ClosureInfo -> Bool
-isStaticClosure cl_info = isStaticRep (closureSMRep cl_info)
-
-closureUpdReqd :: ClosureInfo -> Bool
-closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info
-
-lfUpdatable :: LambdaFormInfo -> Bool
-lfUpdatable (LFThunk _ _ upd _ _) = upd
-lfUpdatable _ = False
-
-closureSingleEntry :: ClosureInfo -> Bool
-closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd
-closureSingleEntry (ClosureInfo { closureLFInfo = LFReEntrant _ OneShotLam _ _ _}) = True
-closureSingleEntry _ = False
-
-closureReEntrant :: ClosureInfo -> Bool
-closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant {} }) = True
-closureReEntrant _ = False
-
-closureFunInfo :: ClosureInfo -> Maybe (RepArity, ArgDescr)
-closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info
-
-lfFunInfo :: LambdaFormInfo -> Maybe (RepArity, ArgDescr)
-lfFunInfo (LFReEntrant _ _ arity _ arg_desc) = Just (arity, arg_desc)
-lfFunInfo _ = Nothing
-
-funTag :: DynFlags -> ClosureInfo -> DynTag
-funTag dflags (ClosureInfo { closureLFInfo = lf_info })
- = lfDynTag dflags lf_info
-
-isToplevClosure :: ClosureInfo -> Bool
-isToplevClosure (ClosureInfo { closureLFInfo = lf_info })
- = case lf_info of
- LFReEntrant TopLevel _ _ _ _ -> True
- LFThunk TopLevel _ _ _ _ -> True
- _other -> False
-
---------------------------------------
--- Label generation
---------------------------------------
-
-staticClosureLabel :: ClosureInfo -> CLabel
-staticClosureLabel = toClosureLbl . closureInfoLabel
-
-closureSlowEntryLabel :: ClosureInfo -> CLabel
-closureSlowEntryLabel = toSlowEntryLbl . closureInfoLabel
-
-closureLocalEntryLabel :: DynFlags -> ClosureInfo -> CLabel
-closureLocalEntryLabel dflags
- | tablesNextToCode dflags = toInfoLbl . closureInfoLabel
- | otherwise = toEntryLbl . closureInfoLabel
-
-mkClosureInfoTableLabel :: Id -> LambdaFormInfo -> CLabel
-mkClosureInfoTableLabel id lf_info
- = case lf_info of
- LFThunk _ _ upd_flag (SelectorThunk offset) _
- -> mkSelectorInfoLabel upd_flag offset
-
- LFThunk _ _ upd_flag (ApThunk arity) _
- -> mkApInfoTableLabel upd_flag arity
-
- LFThunk{} -> std_mk_lbl name cafs
- LFReEntrant{} -> std_mk_lbl name cafs
- _other -> panic "closureInfoTableLabel"
-
- where
- name = idName id
-
- std_mk_lbl | is_local = mkLocalInfoTableLabel
- | otherwise = mkInfoTableLabel
-
- cafs = idCafInfo id
- is_local = isDataConWorkId id
- -- Make the _info pointer for the implicit datacon worker
- -- binding local. The reason we can do this is that importing
- -- code always either uses the _closure or _con_info. By the
- -- invariants in CorePrep anything else gets eta expanded.
-
-
-thunkEntryLabel :: DynFlags -> Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel
--- thunkEntryLabel is a local help function, not exported. It's used from
--- getCallMethod.
-thunkEntryLabel dflags _thunk_id _ (ApThunk arity) upd_flag
- = enterApLabel dflags upd_flag arity
-thunkEntryLabel dflags _thunk_id _ (SelectorThunk offset) upd_flag
- = enterSelectorLabel dflags upd_flag offset
-thunkEntryLabel dflags thunk_id c _ _
- = enterIdLabel dflags thunk_id c
-
-enterApLabel :: DynFlags -> Bool -> Arity -> CLabel
-enterApLabel dflags is_updatable arity
- | tablesNextToCode dflags = mkApInfoTableLabel is_updatable arity
- | otherwise = mkApEntryLabel is_updatable arity
-
-enterSelectorLabel :: DynFlags -> Bool -> WordOff -> CLabel
-enterSelectorLabel dflags upd_flag offset
- | tablesNextToCode dflags = mkSelectorInfoLabel upd_flag offset
- | otherwise = mkSelectorEntryLabel upd_flag offset
-
-enterIdLabel :: DynFlags -> Name -> CafInfo -> CLabel
-enterIdLabel dflags id c
- | tablesNextToCode dflags = mkInfoTableLabel id c
- | otherwise = mkEntryLabel id c
-
-
---------------------------------------
--- Profiling
---------------------------------------
-
--- Profiling requires two pieces of information to be determined for
--- each closure's info table --- description and type.
-
--- The description is stored directly in the @CClosureInfoTable@ when the
--- info table is built.
-
--- The type is determined from the type information stored with the @Id@
--- in the closure info using @closureTypeDescr@.
-
-mkProfilingInfo :: DynFlags -> Id -> String -> ProfilingInfo
-mkProfilingInfo dflags id val_descr
- | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
- | otherwise = ProfilingInfo ty_descr_w8 (BS8.pack val_descr)
- where
- ty_descr_w8 = BS8.pack (getTyDescription (idType id))
-
-getTyDescription :: Type -> String
-getTyDescription ty
- = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) ->
- case tau_ty of
- TyVarTy _ -> "*"
- AppTy fun _ -> getTyDescription fun
- TyConApp tycon _ -> getOccString tycon
- FunTy {} -> '-' : fun_result tau_ty
- ForAllTy _ ty -> getTyDescription ty
- LitTy n -> getTyLitDescription n
- CastTy ty _ -> getTyDescription ty
- CoercionTy co -> pprPanic "getTyDescription" (ppr co)
- }
- where
- fun_result (FunTy { ft_res = res }) = '>' : fun_result res
- fun_result other = getTyDescription other
-
-getTyLitDescription :: TyLit -> String
-getTyLitDescription l =
- case l of
- NumTyLit n -> show n
- StrTyLit n -> show n
-
---------------------------------------
--- CmmInfoTable-related things
---------------------------------------
-
-mkDataConInfoTable :: DynFlags -> DataCon -> Bool -> Int -> Int -> CmmInfoTable
-mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds
- = CmmInfoTable { cit_lbl = info_lbl
- , cit_rep = sm_rep
- , cit_prof = prof
- , cit_srt = Nothing
- , cit_clo = Nothing }
- where
- name = dataConName data_con
- info_lbl = mkConInfoTableLabel name NoCafRefs
- sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type
- cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con)
- -- We keep the *zero-indexed* tag in the srt_len field
- -- of the info table of a data constructor.
-
- prof | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
- | otherwise = ProfilingInfo ty_descr val_descr
-
- ty_descr = BS8.pack $ occNameString $ getOccName $ dataConTyCon data_con
- val_descr = BS8.pack $ occNameString $ getOccName data_con
-
--- We need a black-hole closure info to pass to @allocDynClosure@ when we
--- want to allocate the black hole on entry to a CAF.
-
-cafBlackHoleInfoTable :: CmmInfoTable
-cafBlackHoleInfoTable
- = CmmInfoTable { cit_lbl = mkCAFBlackHoleInfoTableLabel
- , cit_rep = blackHoleRep
- , cit_prof = NoProfilingInfo
- , cit_srt = Nothing
- , cit_clo = Nothing }
-
-indStaticInfoTable :: CmmInfoTable
-indStaticInfoTable
- = CmmInfoTable { cit_lbl = mkIndStaticInfoLabel
- , cit_rep = indStaticRep
- , cit_prof = NoProfilingInfo
- , cit_srt = Nothing
- , cit_clo = Nothing }
-
-staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool
--- A static closure needs a link field to aid the GC when traversing
--- the static closure graph. But it only needs such a field if either
--- a) it has an SRT
--- b) it's a constructor with one or more pointer fields
--- In case (b), the constructor's fields themselves play the role
--- of the SRT.
-staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep }
- | isConRep smrep = not (isStaticNoCafCon smrep)
- | otherwise = has_srt
diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs
deleted file mode 100644
index 67a9776eac..0000000000
--- a/compiler/codeGen/StgCmmCon.hs
+++ /dev/null
@@ -1,285 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Stg to C--: code generation for constructors
---
--- This module provides the support code for StgCmm to deal with with
--- constructors on the RHSs of let(rec)s.
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmCon (
- cgTopRhsCon, buildDynCon, bindConArgs
- ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import StgSyn
-import CoreSyn ( AltCon(..) )
-
-import StgCmmMonad
-import StgCmmEnv
-import StgCmmHeap
-import StgCmmLayout
-import StgCmmUtils
-import StgCmmClosure
-
-import CmmExpr
-import CmmUtils
-import CLabel
-import MkGraph
-import SMRep
-import CostCentre
-import Module
-import DataCon
-import DynFlags
-import FastString
-import Id
-import RepType (countConRepArgs)
-import Literal
-import PrelInfo
-import Outputable
-import GHC.Platform
-import Util
-import MonadUtils (mapMaybeM)
-
-import Control.Monad
-import Data.Char
-
-
-
----------------------------------------------------------------
--- Top-level constructors
----------------------------------------------------------------
-
-cgTopRhsCon :: DynFlags
- -> Id -- Name of thing bound to this RHS
- -> DataCon -- Id
- -> [NonVoid StgArg] -- Args
- -> (CgIdInfo, FCode ())
-cgTopRhsCon dflags id con args =
- let id_info = litIdInfo dflags id (mkConLFInfo con) (CmmLabel closure_label)
- in (id_info, gen_code)
- where
- name = idName id
- caffy = idCafInfo id -- any stgArgHasCafRefs args
- closure_label = mkClosureLabel name caffy
-
- gen_code =
- do { this_mod <- getModuleName
- ; when (platformOS (targetPlatform dflags) == OSMinGW32) $
- -- Windows DLLs have a problem with static cross-DLL refs.
- MASSERT( not (isDllConApp dflags this_mod con (map fromNonVoid args)) )
- ; ASSERT( args `lengthIs` countConRepArgs con ) return ()
-
- -- LAY IT OUT
- ; let
- (tot_wds, -- #ptr_wds + #nonptr_wds
- ptr_wds, -- #ptr_wds
- nv_args_w_offsets) =
- mkVirtHeapOffsetsWithPadding dflags StdHeader (addArgReps args)
-
- mk_payload (Padding len _) = return (CmmInt 0 (widthFromBytes len))
- mk_payload (FieldOff arg _) = do
- amode <- getArgAmode arg
- case amode of
- CmmLit lit -> return lit
- _ -> panic "StgCmmCon.cgTopRhsCon"
-
- nonptr_wds = tot_wds - ptr_wds
-
- -- we're not really going to emit an info table, so having
- -- to make a CmmInfoTable is a bit overkill, but mkStaticClosureFields
- -- needs to poke around inside it.
- info_tbl = mkDataConInfoTable dflags con True ptr_wds nonptr_wds
-
-
- ; payload <- mapM mk_payload nv_args_w_offsets
- -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs
- -- NB2: all the amodes should be Lits!
- -- TODO (osa): Why?
-
- ; let closure_rep = mkStaticClosureFields
- dflags
- info_tbl
- dontCareCCS -- Because it's static data
- caffy -- Has CAF refs
- payload
-
- -- BUILD THE OBJECT
- ; emitDataLits closure_label closure_rep
-
- ; return () }
-
-
----------------------------------------------------------------
--- Lay out and allocate non-top-level constructors
----------------------------------------------------------------
-
-buildDynCon :: Id -- Name of the thing to which this constr will
- -- be bound
- -> Bool -- is it genuinely bound to that name, or just
- -- for profiling?
- -> CostCentreStack -- Where to grab cost centre from;
- -- current CCS if currentOrSubsumedCCS
- -> DataCon -- The data constructor
- -> [NonVoid StgArg] -- Its args
- -> FCode (CgIdInfo, FCode CmmAGraph)
- -- Return details about how to find it and initialization code
-buildDynCon binder actually_bound cc con args
- = do dflags <- getDynFlags
- buildDynCon' dflags (targetPlatform dflags) binder actually_bound cc con args
-
-
-buildDynCon' :: DynFlags
- -> Platform
- -> Id -> Bool
- -> CostCentreStack
- -> DataCon
- -> [NonVoid StgArg]
- -> FCode (CgIdInfo, FCode CmmAGraph)
-
-{- We used to pass a boolean indicating whether all the
-args were of size zero, so we could use a static
-constructor; but I concluded that it just isn't worth it.
-Now I/O uses unboxed tuples there just aren't any constructors
-with all size-zero args.
-
-The reason for having a separate argument, rather than looking at
-the addr modes of the args is that we may be in a "knot", and
-premature looking at the args will cause the compiler to black-hole!
--}
-
-
--------- buildDynCon': Nullary constructors --------------
--- First we deal with the case of zero-arity constructors. They
--- will probably be unfolded, so we don't expect to see this case much,
--- if at all, but it does no harm, and sets the scene for characters.
---
--- In the case of zero-arity constructors, or, more accurately, those
--- which have exclusively size-zero (VoidRep) args, we generate no code
--- at all.
-
-buildDynCon' dflags _ binder _ _cc con []
- | isNullaryRepDataCon con
- = return (litIdInfo dflags binder (mkConLFInfo con)
- (CmmLabel (mkClosureLabel (dataConName con) (idCafInfo binder))),
- return mkNop)
-
--------- buildDynCon': Charlike and Intlike constructors -----------
-{- The following three paragraphs about @Char@-like and @Int@-like
-closures are obsolete, but I don't understand the details well enough
-to properly word them, sorry. I've changed the treatment of @Char@s to
-be analogous to @Int@s: only a subset is preallocated, because @Char@
-has now 31 bits. Only literals are handled here. -- Qrczak
-
-Now for @Char@-like closures. We generate an assignment of the
-address of the closure to a temporary. It would be possible simply to
-generate no code, and record the addressing mode in the environment,
-but we'd have to be careful if the argument wasn't a constant --- so
-for simplicity we just always assign to a temporary.
-
-Last special case: @Int@-like closures. We only special-case the
-situation in which the argument is a literal in the range
-@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can
-work with any old argument, but for @Int@-like ones the argument has
-to be a literal. Reason: @Char@ like closures have an argument type
-which is guaranteed in range.
-
-Because of this, we use can safely return an addressing mode.
-
-We don't support this optimisation when compiling into Windows DLLs yet
-because they don't support cross package data references well.
--}
-
-buildDynCon' dflags platform binder _ _cc con [arg]
- | maybeIntLikeCon con
- , platformOS platform /= OSMinGW32 || not (positionIndependent dflags)
- , NonVoid (StgLitArg (LitNumber LitNumInt val _)) <- arg
- , val <= fromIntegral (mAX_INTLIKE dflags) -- Comparisons at type Integer!
- , val >= fromIntegral (mIN_INTLIKE dflags) -- ...ditto...
- = do { let intlike_lbl = mkCmmClosureLabel rtsUnitId (fsLit "stg_INTLIKE")
- val_int = fromIntegral val :: Int
- offsetW = (val_int - mIN_INTLIKE dflags) * (fixedHdrSizeW dflags + 1)
- -- INTLIKE closures consist of a header and one word payload
- intlike_amode = cmmLabelOffW dflags intlike_lbl offsetW
- ; return ( litIdInfo dflags binder (mkConLFInfo con) intlike_amode
- , return mkNop) }
-
-buildDynCon' dflags platform binder _ _cc con [arg]
- | maybeCharLikeCon con
- , platformOS platform /= OSMinGW32 || not (positionIndependent dflags)
- , NonVoid (StgLitArg (LitChar val)) <- arg
- , let val_int = ord val :: Int
- , val_int <= mAX_CHARLIKE dflags
- , val_int >= mIN_CHARLIKE dflags
- = do { let charlike_lbl = mkCmmClosureLabel rtsUnitId (fsLit "stg_CHARLIKE")
- offsetW = (val_int - mIN_CHARLIKE dflags) * (fixedHdrSizeW dflags + 1)
- -- CHARLIKE closures consist of a header and one word payload
- charlike_amode = cmmLabelOffW dflags charlike_lbl offsetW
- ; return ( litIdInfo dflags binder (mkConLFInfo con) charlike_amode
- , return mkNop) }
-
--------- buildDynCon': the general case -----------
-buildDynCon' dflags _ binder actually_bound ccs con args
- = do { (id_info, reg) <- rhsIdInfo binder lf_info
- ; return (id_info, gen_code reg)
- }
- where
- lf_info = mkConLFInfo con
-
- gen_code reg
- = do { let (tot_wds, ptr_wds, args_w_offsets)
- = mkVirtConstrOffsets dflags (addArgReps args)
- nonptr_wds = tot_wds - ptr_wds
- info_tbl = mkDataConInfoTable dflags con False
- ptr_wds nonptr_wds
- ; let ticky_name | actually_bound = Just binder
- | otherwise = Nothing
-
- ; hp_plus_n <- allocDynClosure ticky_name info_tbl lf_info
- use_cc blame_cc args_w_offsets
- ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
- where
- use_cc -- cost-centre to stick in the object
- | isCurrentCCS ccs = cccsExpr
- | otherwise = panic "buildDynCon: non-current CCS not implemented"
-
- blame_cc = use_cc -- cost-centre on which to blame the alloc (same)
-
-
----------------------------------------------------------------
--- Binding constructor arguments
----------------------------------------------------------------
-
-bindConArgs :: AltCon -> LocalReg -> [NonVoid Id] -> FCode [LocalReg]
--- bindConArgs is called from cgAlt of a case
--- (bindConArgs con args) augments the environment with bindings for the
--- binders args, assuming that we have just returned from a 'case' which
--- found a con
-bindConArgs (DataAlt con) base args
- = ASSERT(not (isUnboxedTupleCon con))
- do dflags <- getDynFlags
- let (_, _, args_w_offsets) = mkVirtConstrOffsets dflags (addIdReps args)
- tag = tagForCon dflags con
-
- -- The binding below forces the masking out of the tag bits
- -- when accessing the constructor field.
- bind_arg :: (NonVoid Id, ByteOff) -> FCode (Maybe LocalReg)
- bind_arg (arg@(NonVoid b), offset)
- | isDeadBinder b -- See Note [Dead-binder optimisation] in StgCmmExpr
- = return Nothing
- | otherwise
- = do { emit $ mkTaggedObjectLoad dflags (idToReg dflags arg)
- base offset tag
- ; Just <$> bindArgToReg arg }
-
- mapMaybeM bind_arg args_w_offsets
-
-bindConArgs _other_con _base args
- = ASSERT( null args ) return []
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
deleted file mode 100644
index e605762f1f..0000000000
--- a/compiler/codeGen/StgCmmEnv.hs
+++ /dev/null
@@ -1,208 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation: the binding environment
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-module StgCmmEnv (
- CgIdInfo,
-
- litIdInfo, lneIdInfo, rhsIdInfo, mkRhsInit,
- idInfoToAmode,
-
- addBindC, addBindsC,
-
- bindArgsToRegs, bindToReg, rebindToReg,
- bindArgToReg, idToReg,
- getArgAmode, getNonVoidArgAmodes,
- getCgIdInfo,
- maybeLetNoEscape,
- ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import TyCon
-import StgCmmMonad
-import StgCmmUtils
-import StgCmmClosure
-
-import CLabel
-
-import BlockId
-import CmmExpr
-import CmmUtils
-import DynFlags
-import Id
-import MkGraph
-import Name
-import Outputable
-import StgSyn
-import Type
-import TysPrim
-import UniqFM
-import Util
-import VarEnv
-
--------------------------------------
--- Manipulating CgIdInfo
--------------------------------------
-
-mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo
-mkCgIdInfo id lf expr
- = CgIdInfo { cg_id = id, cg_lf = lf
- , cg_loc = CmmLoc expr }
-
-litIdInfo :: DynFlags -> Id -> LambdaFormInfo -> CmmLit -> CgIdInfo
-litIdInfo dflags id lf lit
- = CgIdInfo { cg_id = id, cg_lf = lf
- , cg_loc = CmmLoc (addDynTag dflags (CmmLit lit) tag) }
- where
- tag = lfDynTag dflags lf
-
-lneIdInfo :: DynFlags -> Id -> [NonVoid Id] -> CgIdInfo
-lneIdInfo dflags id regs
- = CgIdInfo { cg_id = id, cg_lf = lf
- , cg_loc = LneLoc blk_id (map (idToReg dflags) regs) }
- where
- lf = mkLFLetNoEscape
- blk_id = mkBlockId (idUnique id)
-
-
-rhsIdInfo :: Id -> LambdaFormInfo -> FCode (CgIdInfo, LocalReg)
-rhsIdInfo id lf_info
- = do dflags <- getDynFlags
- reg <- newTemp (gcWord dflags)
- return (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)), reg)
-
-mkRhsInit :: DynFlags -> LocalReg -> LambdaFormInfo -> CmmExpr -> CmmAGraph
-mkRhsInit dflags reg lf_info expr
- = mkAssign (CmmLocal reg) (addDynTag dflags expr (lfDynTag dflags lf_info))
-
-idInfoToAmode :: CgIdInfo -> CmmExpr
--- Returns a CmmExpr for the *tagged* pointer
-idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e }) = e
-idInfoToAmode cg_info
- = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc
-
-addDynTag :: DynFlags -> CmmExpr -> DynTag -> CmmExpr
--- A tag adds a byte offset to the pointer
-addDynTag dflags expr tag = cmmOffsetB dflags expr tag
-
-maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg])
-maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args)
-maybeLetNoEscape _other = Nothing
-
-
-
----------------------------------------------------------
--- The binding environment
---
--- There are three basic routines, for adding (addBindC),
--- modifying(modifyBindC) and looking up (getCgIdInfo) bindings.
----------------------------------------------------------
-
-addBindC :: CgIdInfo -> FCode ()
-addBindC stuff_to_bind = do
- binds <- getBinds
- setBinds $ extendVarEnv binds (cg_id stuff_to_bind) stuff_to_bind
-
-addBindsC :: [CgIdInfo] -> FCode ()
-addBindsC new_bindings = do
- binds <- getBinds
- let new_binds = foldl' (\ binds info -> extendVarEnv binds (cg_id info) info)
- binds
- new_bindings
- setBinds new_binds
-
-getCgIdInfo :: Id -> FCode CgIdInfo
-getCgIdInfo id
- = do { dflags <- getDynFlags
- ; local_binds <- getBinds -- Try local bindings first
- ; case lookupVarEnv local_binds id of {
- Just info -> return info ;
- Nothing -> do {
-
- -- Should be imported; make up a CgIdInfo for it
- let name = idName id
- ; if isExternalName name then
- let ext_lbl
- | isUnliftedType (idType id) =
- -- An unlifted external Id must refer to a top-level
- -- string literal. See Note [Bytes label] in CLabel.
- ASSERT( idType id `eqType` addrPrimTy )
- mkBytesLabel name
- | otherwise = mkClosureLabel name $ idCafInfo id
- in return $
- litIdInfo dflags id (mkLFImported id) (CmmLabel ext_lbl)
- else
- cgLookupPanic id -- Bug
- }}}
-
-cgLookupPanic :: Id -> FCode a
-cgLookupPanic id
- = do local_binds <- getBinds
- pprPanic "StgCmmEnv: variable not found"
- (vcat [ppr id,
- text "local binds for:",
- pprUFM local_binds $ \infos ->
- vcat [ ppr (cg_id info) | info <- infos ]
- ])
-
-
---------------------
-getArgAmode :: NonVoid StgArg -> FCode CmmExpr
-getArgAmode (NonVoid (StgVarArg var)) = idInfoToAmode <$> getCgIdInfo var
-getArgAmode (NonVoid (StgLitArg lit)) = CmmLit <$> cgLit lit
-
-getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr]
--- NB: Filters out void args,
--- so the result list may be shorter than the argument list
-getNonVoidArgAmodes [] = return []
-getNonVoidArgAmodes (arg:args)
- | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args
- | otherwise = do { amode <- getArgAmode (NonVoid arg)
- ; amodes <- getNonVoidArgAmodes args
- ; return ( amode : amodes ) }
-
-
-------------------------------------------------------------------------
--- Interface functions for binding and re-binding names
-------------------------------------------------------------------------
-
-bindToReg :: NonVoid Id -> LambdaFormInfo -> FCode LocalReg
--- Bind an Id to a fresh LocalReg
-bindToReg nvid@(NonVoid id) lf_info
- = do dflags <- getDynFlags
- let reg = idToReg dflags nvid
- addBindC (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)))
- return reg
-
-rebindToReg :: NonVoid Id -> FCode LocalReg
--- Like bindToReg, but the Id is already in scope, so
--- get its LF info from the envt
-rebindToReg nvid@(NonVoid id)
- = do { info <- getCgIdInfo id
- ; bindToReg nvid (cg_lf info) }
-
-bindArgToReg :: NonVoid Id -> FCode LocalReg
-bindArgToReg nvid@(NonVoid id) = bindToReg nvid (mkLFArgument id)
-
-bindArgsToRegs :: [NonVoid Id] -> FCode [LocalReg]
-bindArgsToRegs args = mapM bindArgToReg args
-
-idToReg :: DynFlags -> NonVoid Id -> LocalReg
--- Make a register from an Id, typically a function argument,
--- free variable, or case binder
---
--- We re-use the Unique from the Id to make it easier to see what is going on
---
--- By now the Ids should be uniquely named; else one would worry
--- about accidental collision
-idToReg dflags (NonVoid id)
- = LocalReg (idUnique id)
- (primRepCmmType dflags (idPrimRep id))
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
deleted file mode 100644
index 70a044a7ab..0000000000
--- a/compiler/codeGen/StgCmmExpr.hs
+++ /dev/null
@@ -1,992 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation: expressions
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmExpr ( cgExpr ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude hiding ((<*>))
-
-import {-# SOURCE #-} StgCmmBind ( cgBind )
-
-import StgCmmMonad
-import StgCmmHeap
-import StgCmmEnv
-import StgCmmCon
-import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
-import StgCmmLayout
-import StgCmmPrim
-import StgCmmHpc
-import StgCmmTicky
-import StgCmmUtils
-import StgCmmClosure
-
-import StgSyn
-
-import MkGraph
-import BlockId
-import Cmm
-import CmmInfo
-import CoreSyn
-import DataCon
-import ForeignCall
-import Id
-import PrimOp
-import TyCon
-import Type ( isUnliftedType )
-import RepType ( isVoidTy, countConRepArgs, primRepSlot )
-import CostCentre ( CostCentreStack, currentCCS )
-import Maybes
-import Util
-import FastString
-import Outputable
-
-import Control.Monad (unless,void)
-import Control.Arrow (first)
-import Data.Function ( on )
-
-------------------------------------------------------------------------
--- cgExpr: the main function
-------------------------------------------------------------------------
-
-cgExpr :: CgStgExpr -> FCode ReturnKind
-
-cgExpr (StgApp fun args) = cgIdApp fun args
-
--- seq# a s ==> a
--- See Note [seq# magic] in PrelRules
-cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
- cgIdApp a []
-
--- dataToTag# :: a -> Int#
--- See Note [dataToTag#] in primops.txt.pp
-cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
- dflags <- getDynFlags
- emitComment (mkFastString "dataToTag#")
- tmp <- newTemp (bWord dflags)
- _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
- -- TODO: For small types look at the tag bits instead of reading info table
- emitReturn [getConstrTag dflags (cmmUntag dflags (CmmReg (CmmLocal tmp)))]
-
-cgExpr (StgOpApp op args ty) = cgOpApp op args ty
-cgExpr (StgConApp con args _)= cgConApp con args
-cgExpr (StgTick t e) = cgTick t >> cgExpr e
-cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
- emitReturn [CmmLit cmm_lit]
-
-cgExpr (StgLet _ binds expr) = do { cgBind binds; cgExpr expr }
-cgExpr (StgLetNoEscape _ binds expr) =
- do { u <- newUnique
- ; let join_id = mkBlockId u
- ; cgLneBinds join_id binds
- ; r <- cgExpr expr
- ; emitLabel join_id
- ; return r }
-
-cgExpr (StgCase expr bndr alt_type alts) =
- cgCase expr bndr alt_type alts
-
-cgExpr (StgLam {}) = panic "cgExpr: StgLam"
-
-------------------------------------------------------------------------
--- Let no escape
-------------------------------------------------------------------------
-
-{- Generating code for a let-no-escape binding, aka join point is very
-very similar to what we do for a case expression. The duality is
-between
- let-no-escape x = b
- in e
-and
- case e of ... -> b
-
-That is, the RHS of 'x' (ie 'b') will execute *later*, just like
-the alternative of the case; it needs to be compiled in an environment
-in which all volatile bindings are forgotten, and the free vars are
-bound only to stable things like stack locations.. The 'e' part will
-execute *next*, just like the scrutinee of a case. -}
-
--------------------------
-cgLneBinds :: BlockId -> CgStgBinding -> FCode ()
-cgLneBinds join_id (StgNonRec bndr rhs)
- = do { local_cc <- saveCurrentCostCentre
- -- See Note [Saving the current cost centre]
- ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
- ; fcode
- ; addBindC info }
-
-cgLneBinds join_id (StgRec pairs)
- = do { local_cc <- saveCurrentCostCentre
- ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
- ; let (infos, fcodes) = unzip r
- ; addBindsC infos
- ; sequence_ fcodes
- }
-
--------------------------
-cgLetNoEscapeRhs
- :: BlockId -- join point for successor of let-no-escape
- -> Maybe LocalReg -- Saved cost centre
- -> Id
- -> CgStgRhs
- -> FCode (CgIdInfo, FCode ())
-
-cgLetNoEscapeRhs join_id local_cc bndr rhs =
- do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
- ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
- ; let code = do { (_, body) <- getCodeScoped rhs_code
- ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
- ; return (info, code)
- }
-
-cgLetNoEscapeRhsBody
- :: Maybe LocalReg -- Saved cost centre
- -> Id
- -> CgStgRhs
- -> FCode (CgIdInfo, FCode ())
-cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure _ cc _upd args body)
- = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
-cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
- = cgLetNoEscapeClosure bndr local_cc cc []
- (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $
- text "StgRhsCon doesn't have type args"))
- -- For a constructor RHS we want to generate a single chunk of
- -- code which can be jumped to from many places, which will
- -- return the constructor. It's easy; just behave as if it
- -- was an StgRhsClosure with a ConApp inside!
-
--------------------------
-cgLetNoEscapeClosure
- :: Id -- binder
- -> Maybe LocalReg -- Slot for saved current cost centre
- -> CostCentreStack -- XXX: *** NOT USED *** why not?
- -> [NonVoid Id] -- Args (as in \ args -> body)
- -> CgStgExpr -- Body (as in above)
- -> FCode (CgIdInfo, FCode ())
-
-cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
- = do dflags <- getDynFlags
- return ( lneIdInfo dflags bndr args
- , code )
- where
- code = forkLneBody $ do {
- ; withNewTickyCounterLNE (idName bndr) args $ do
- ; restoreCurrentCostCentre cc_slot
- ; arg_regs <- bindArgsToRegs args
- ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
-
-
-------------------------------------------------------------------------
--- Case expressions
-------------------------------------------------------------------------
-
-{- Note [Compiling case expressions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It is quite interesting to decide whether to put a heap-check at the
-start of each alternative. Of course we certainly have to do so if
-the case forces an evaluation, or if there is a primitive op which can
-trigger GC.
-
-A more interesting situation is this (a Plan-B situation)
-
- !P!;
- ...P...
- case x# of
- 0# -> !Q!; ...Q...
- default -> !R!; ...R...
-
-where !x! indicates a possible heap-check point. The heap checks
-in the alternatives *can* be omitted, in which case the topmost
-heapcheck will take their worst case into account.
-
-In favour of omitting !Q!, !R!:
-
- - *May* save a heap overflow test,
- if ...P... allocates anything.
-
- - We can use relative addressing from a single Hp to
- get at all the closures so allocated.
-
- - No need to save volatile vars etc across heap checks
- in !Q!, !R!
-
-Against omitting !Q!, !R!
-
- - May put a heap-check into the inner loop. Suppose
- the main loop is P -> R -> P -> R...
- Q is the loop exit, and only it does allocation.
- This only hurts us if P does no allocation. If P allocates,
- then there is a heap check in the inner loop anyway.
-
- - May do more allocation than reqd. This sometimes bites us
- badly. For example, nfib (ha!) allocates about 30\% more space if the
- worst-casing is done, because many many calls to nfib are leaf calls
- which don't need to allocate anything.
-
- We can un-allocate, but that costs an instruction
-
-Neither problem hurts us if there is only one alternative.
-
-Suppose the inner loop is P->R->P->R etc. Then here is
-how many heap checks we get in the *inner loop* under various
-conditions
-
- Alloc Heap check in branches (!Q!, !R!)?
- P Q R yes no (absorb to !P!)
---------------------------------------
- n n n 0 0
- n y n 0 1
- n . y 1 1
- y . y 2 1
- y . n 1 1
-
-Best choices: absorb heap checks from Q and R into !P! iff
- a) P itself does some allocation
-or
- b) P does allocation, or there is exactly one alternative
-
-We adopt (b) because that is more likely to put the heap check at the
-entry to a function, when not many things are live. After a bunch of
-single-branch cases, we may have lots of things live
-
-Hence: two basic plans for
-
- case e of r { alts }
-
------- Plan A: the general case ---------
-
- ...save current cost centre...
-
- ...code for e,
- with sequel (SetLocals r)
-
- ...restore current cost centre...
- ...code for alts...
- ...alts do their own heap checks
-
------- Plan B: special case when ---------
- (i) e does not allocate or call GC
- (ii) either upstream code performs allocation
- or there is just one alternative
-
- Then heap allocation in the (single) case branch
- is absorbed by the upstream check.
- Very common example: primops on unboxed values
-
- ...code for e,
- with sequel (SetLocals r)...
-
- ...code for alts...
- ...no heap check...
--}
-
-
-
--------------------------------------
-data GcPlan
- = GcInAlts -- Put a GC check at the start the case alternatives,
- [LocalReg] -- which binds these registers
- | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
- -- primitive op which does no GC. Absorb the allocation
- -- of the case alternative(s) into the upstream check
-
--------------------------------------
-cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind
-
-cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
- | isEnumerationTyCon tycon -- Note [case on bool]
- = do { tag_expr <- do_enum_primop op args
-
- -- If the binder is not dead, convert the tag to a constructor
- -- and assign it. See Note [Dead-binder optimisation]
- ; unless (isDeadBinder bndr) $ do
- { dflags <- getDynFlags
- ; tmp_reg <- bindArgToReg (NonVoid bndr)
- ; emitAssign (CmmLocal tmp_reg)
- (tagToClosure dflags tycon tag_expr) }
-
- ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
- (NonVoid bndr) alts
- -- See Note [GC for conditionals]
- ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
- ; return AssignedDirectly
- }
- where
- do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
- do_enum_primop TagToEnumOp [arg] -- No code!
- = getArgAmode (NonVoid arg)
- do_enum_primop primop args
- = do dflags <- getDynFlags
- tmp <- newTemp (bWord dflags)
- cgPrimOp [tmp] primop args
- return (CmmReg (CmmLocal tmp))
-
-{-
-Note [case on bool]
-~~~~~~~~~~~~~~~~~~~
-This special case handles code like
-
- case a <# b of
- True ->
- False ->
-
---> case tagToEnum# (a <$# b) of
- True -> .. ; False -> ...
-
---> case (a <$# b) of r ->
- case tagToEnum# r of
- True -> .. ; False -> ...
-
-If we let the ordinary case code handle it, we'll get something like
-
- tmp1 = a < b
- tmp2 = Bool_closure_tbl[tmp1]
- if (tmp2 & 7 != 0) then ... // normal tagged case
-
-but this junk won't optimise away. What we really want is just an
-inline comparison:
-
- if (a < b) then ...
-
-So we add a special case to generate
-
- tmp1 = a < b
- if (tmp1 == 0) then ...
-
-and later optimisations will further improve this.
-
-Now that #6135 has been resolved it should be possible to remove that
-special case. The idea behind this special case and pre-6135 implementation
-of Bool-returning primops was that tagToEnum# was added implicitly in the
-codegen and then optimized away. Now the call to tagToEnum# is explicit
-in the source code, which allows to optimize it away at the earlier stages
-of compilation (i.e. at the Core level).
-
-Note [Scrutinising VoidRep]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Suppose we have this STG code:
- f = \[s : State# RealWorld] ->
- case s of _ -> blah
-This is very odd. Why are we scrutinising a state token? But it
-can arise with bizarre NOINLINE pragmas (#9964)
- crash :: IO ()
- crash = IO (\s -> let {-# NOINLINE s' #-}
- s' = s
- in (# s', () #))
-
-Now the trouble is that 's' has VoidRep, and we do not bind void
-arguments in the environment; they don't live anywhere. See the
-calls to nonVoidIds in various places. So we must not look up
-'s' in the environment. Instead, just evaluate the RHS! Simple.
-
-Note [Dead-binder optimisation]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A case-binder, or data-constructor argument, may be marked as dead,
-because we preserve occurrence-info on binders in CoreTidy (see
-CoreTidy.tidyIdBndr).
-
-If the binder is dead, we can sometimes eliminate a load. While
-CmmSink will eliminate that load, it's very easy to kill it at source
-(giving CmmSink less work to do), and in any case CmmSink only runs
-with -O. Since the majority of case binders are dead, this
-optimisation probably still has a great benefit-cost ratio and we want
-to keep it for -O0. See also Phab:D5358.
-
-This probably also was the reason for occurrence hack in Phab:D5339 to
-exist, perhaps because the occurrence information preserved by
-'CoreTidy.tidyIdBndr' was insufficient. But now that CmmSink does the
-job we deleted the hacks.
--}
-
-cgCase (StgApp v []) _ (PrimAlt _) alts
- | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
- , [(DEFAULT, _, rhs)] <- alts
- = cgExpr rhs
-
-{- Note [Dodgy unsafeCoerce 1]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
- case (x :: HValue) |> co of (y :: MutVar# Int)
- DEFAULT -> ...
-We want to gnerate an assignment
- y := x
-We want to allow this assignment to be generated in the case when the
-types are compatible, because this allows some slightly-dodgy but
-occasionally-useful casts to be used, such as in RtClosureInspect
-where we cast an HValue to a MutVar# so we can print out the contents
-of the MutVar#. If instead we generate code that enters the HValue,
-then we'll get a runtime panic, because the HValue really is a
-MutVar#. The types are compatible though, so we can just generate an
-assignment.
--}
-cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
- | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
- || reps_compatible
- = -- assignment suffices for unlifted types
- do { dflags <- getDynFlags
- ; unless reps_compatible $
- pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
- (pp_bndr v $$ pp_bndr bndr)
- ; v_info <- getCgIdInfo v
- ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
- (idInfoToAmode v_info)
- -- Add bndr to the environment
- ; _ <- bindArgToReg (NonVoid bndr)
- ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
- where
- reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr
- -- Must compare SlotTys, not proper PrimReps, because with unboxed sums,
- -- the types of the binders are generated from slotPrimRep and might not
- -- match. Test case:
- -- swap :: (# Int | Int #) -> (# Int | Int #)
- -- swap (# x | #) = (# | x #)
- -- swap (# | y #) = (# y | #)
-
- pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id))
-
-{- Note [Dodgy unsafeCoerce 2, #3132]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In all other cases of a lifted Id being cast to an unlifted type, the
-Id should be bound to bottom, otherwise this is an unsafe use of
-unsafeCoerce. We can generate code to enter the Id and assume that
-it will never return. Hence, we emit the usual enter/return code, and
-because bottom must be untagged, it will be entered. The Sequel is a
-type-correct assignment, albeit bogus. The (dead) continuation loops;
-it would be better to invoke some kind of panic function here.
--}
-cgCase scrut@(StgApp v []) _ (PrimAlt _) _
- = do { dflags <- getDynFlags
- ; mb_cc <- maybeSaveCostCentre True
- ; _ <- withSequel
- (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
- ; restoreCurrentCostCentre mb_cc
- ; emitComment $ mkFastString "should be unreachable code"
- ; l <- newBlockId
- ; emitLabel l
- ; emit (mkBranch l) -- an infinite loop
- ; return AssignedDirectly
- }
-
-{- Note [Handle seq#]
-~~~~~~~~~~~~~~~~~~~~~
-See Note [seq# magic] in PrelRules.
-The special case for seq# in cgCase does this:
-
- case seq# a s of v
- (# s', a' #) -> e
-==>
- case a of v
- (# s', a' #) -> e
-
-(taking advantage of the fact that the return convention for (# State#, a #)
-is the same as the return convention for just 'a')
--}
-
-cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
- = -- Note [Handle seq#]
- -- And see Note [seq# magic] in PrelRules
- -- Use the same return convention as vanilla 'a'.
- cgCase (StgApp a []) bndr alt_type alts
-
-cgCase scrut bndr alt_type alts
- = -- the general case
- do { dflags <- getDynFlags
- ; up_hp_usg <- getVirtHp -- Upstream heap usage
- ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
- alt_regs = map (idToReg dflags) ret_bndrs
- ; simple_scrut <- isSimpleScrut scrut alt_type
- ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals]
- | not simple_scrut = True
- | isSingleton alts = False
- | up_hp_usg > 0 = False
- | otherwise = True
- -- cf Note [Compiling case expressions]
- gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
-
- ; mb_cc <- maybeSaveCostCentre simple_scrut
-
- ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
- ; ret_kind <- withSequel sequel (cgExpr scrut)
- ; restoreCurrentCostCentre mb_cc
- ; _ <- bindArgsToRegs ret_bndrs
- ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
- }
- where
- is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
- is_cmp_op _ = False
-
-{- Note [GC for conditionals]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-For boolean conditionals it seems that we have always done NoGcInAlts.
-That is, we have always done the GC check before the conditional.
-This is enshrined in the special case for
- case tagToEnum# (a>b) of ...
-See Note [case on bool]
-
-It's odd, and it's flagrantly inconsistent with the rules described
-Note [Compiling case expressions]. However, after eliminating the
-tagToEnum# (#13397) we will have:
- case (a>b) of ...
-Rather than make it behave quite differently, I am testing for a
-comparison operator here in in the general case as well.
-
-ToDo: figure out what the Right Rule should be.
-
-Note [scrut sequel]
-~~~~~~~~~~~~~~~~~~~
-The job of the scrutinee is to assign its value(s) to alt_regs.
-Additionally, if we plan to do a heap-check in the alternatives (see
-Note [Compiling case expressions]), then we *must* retreat Hp to
-recover any unused heap before passing control to the sequel. If we
-don't do this, then any unused heap will become slop because the heap
-check will reset the heap usage. Slop in the heap breaks LDV profiling
-(+RTS -hb) which needs to do a linear sweep through the nursery.
-
-
-Note [Inlining out-of-line primops and heap checks]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If shouldInlinePrimOp returns True when called from StgCmmExpr for the
-purpose of heap check placement, we *must* inline the primop later in
-StgCmmPrim. If we don't things will go wrong.
--}
-
------------------
-maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
-maybeSaveCostCentre simple_scrut
- | simple_scrut = return Nothing
- | otherwise = saveCurrentCostCentre
-
-
------------------
-isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
--- Simple scrutinee, does not block or allocate; hence safe to amalgamate
--- heap usage from alternatives into the stuff before the case
--- NB: if you get this wrong, and claim that the expression doesn't allocate
--- when it does, you'll deeply mess up allocation
-isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
-isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
-isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
-isSimpleScrut _ _ = return False
-
-isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
--- True iff the op cannot block or allocate
-isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
--- dataToTag# evalautes its argument, see Note [dataToTag#] in primops.txt.pp
-isSimpleOp (StgPrimOp DataToTagOp) _ = return False
-isSimpleOp (StgPrimOp op) stg_args = do
- arg_exprs <- getNonVoidArgAmodes stg_args
- dflags <- getDynFlags
- -- See Note [Inlining out-of-line primops and heap checks]
- return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
-isSimpleOp (StgPrimCallOp _) _ = return False
-
------------------
-chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
--- These are the binders of a case that are assigned by the evaluation of the
--- scrutinee.
--- They're non-void, see Note [Post-unarisation invariants] in UnariseStg.
-chooseReturnBndrs bndr (PrimAlt _) _alts
- = assertNonVoidIds [bndr]
-
-chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
- = ASSERT2(ids `lengthIs` n, ppr n $$ ppr ids $$ ppr _bndr)
- assertNonVoidIds ids -- 'bndr' is not assigned!
-
-chooseReturnBndrs bndr (AlgAlt _) _alts
- = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
-
-chooseReturnBndrs bndr PolyAlt _alts
- = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
-
-chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
- -- MultiValAlt has only one alternative
-
--------------------------------------
-cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt]
- -> FCode ReturnKind
--- At this point the result of the case are in the binders
-cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
- = maybeAltHeapCheck gc_plan (cgExpr rhs)
-
-cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
- = maybeAltHeapCheck gc_plan (cgExpr rhs)
- -- Here bndrs are *already* in scope, so don't rebind them
-
-cgAlts gc_plan bndr (PrimAlt _) alts
- = do { dflags <- getDynFlags
-
- ; tagged_cmms <- cgAltRhss gc_plan bndr alts
-
- ; let bndr_reg = CmmLocal (idToReg dflags bndr)
- (DEFAULT,deflt) = head tagged_cmms
- -- PrimAlts always have a DEFAULT case
- -- and it always comes first
-
- tagged_cmms' = [(lit,code)
- | (LitAlt lit, code) <- tagged_cmms]
- ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
- ; return AssignedDirectly }
-
-cgAlts gc_plan bndr (AlgAlt tycon) alts
- = do { dflags <- getDynFlags
-
- ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
-
- ; let fam_sz = tyConFamilySize tycon
- bndr_reg = CmmLocal (idToReg dflags bndr)
-
- -- Is the constructor tag in the node reg?
- ; if isSmallFamily dflags fam_sz
- then do
- let -- Yes, bndr_reg has constr. tag in ls bits
- tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
- branches' = [(tag+1,branch) | (tag,branch) <- branches]
- emitSwitch tag_expr branches' mb_deflt 1 fam_sz
-
- else -- No, get tag from info table
- let -- Note that ptr _always_ has tag 1
- -- when the family size is big enough
- untagged_ptr = cmmRegOffB bndr_reg (-1)
- tag_expr = getConstrTag dflags (untagged_ptr)
- in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
-
- ; return AssignedDirectly }
-
-cgAlts _ _ _ _ = panic "cgAlts"
- -- UbxTupAlt and PolyAlt have only one alternative
-
-
--- Note [alg-alt heap check]
---
--- In an algebraic case with more than one alternative, we will have
--- code like
---
--- L0:
--- x = R1
--- goto L1
--- L1:
--- if (x & 7 >= 2) then goto L2 else goto L3
--- L2:
--- Hp = Hp + 16
--- if (Hp > HpLim) then goto L4
--- ...
--- L4:
--- call gc() returns to L5
--- L5:
--- x = R1
--- goto L1
-
--------------------
-cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
- -> FCode ( Maybe CmmAGraphScoped
- , [(ConTagZ, CmmAGraphScoped)] )
-cgAlgAltRhss gc_plan bndr alts
- = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
-
- ; let { mb_deflt = case tagged_cmms of
- ((DEFAULT,rhs) : _) -> Just rhs
- _other -> Nothing
- -- DEFAULT is always first, if present
-
- ; branches = [ (dataConTagZ con, cmm)
- | (DataAlt con, cmm) <- tagged_cmms ]
- }
-
- ; return (mb_deflt, branches)
- }
-
-
--------------------
-cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
- -> FCode [(AltCon, CmmAGraphScoped)]
-cgAltRhss gc_plan bndr alts = do
- dflags <- getDynFlags
- let
- base_reg = idToReg dflags bndr
- cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped)
- cg_alt (con, bndrs, rhs)
- = getCodeScoped $
- maybeAltHeapCheck gc_plan $
- do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
- -- alt binders are always non-void,
- -- see Note [Post-unarisation invariants] in UnariseStg
- ; _ <- cgExpr rhs
- ; return con }
- forkAlts (map cg_alt alts)
-
-maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
-maybeAltHeapCheck (NoGcInAlts,_) code = code
-maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
- altHeapCheck regs code
-maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
- altHeapCheckReturnsTo regs lret off code
-
------------------------------------------------------------------------------
--- Tail calls
------------------------------------------------------------------------------
-
-cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
-cgConApp con stg_args
- | isUnboxedTupleCon con -- Unboxed tuple: assign and return
- = do { arg_exprs <- getNonVoidArgAmodes stg_args
- ; tickyUnboxedTupleReturn (length arg_exprs)
- ; emitReturn arg_exprs }
-
- | otherwise -- Boxed constructors; allocate and return
- = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args )
- do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
- currentCCS con (assertNonVoidStgArgs stg_args)
- -- con args are always non-void,
- -- see Note [Post-unarisation invariants] in UnariseStg
- -- The first "con" says that the name bound to this
- -- closure is "con", which is a bit of a fudge, but
- -- it only affects profiling (hence the False)
-
- ; emit =<< fcode_init
- ; tickyReturnNewCon (length stg_args)
- ; emitReturn [idInfoToAmode idinfo] }
-
-cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
-cgIdApp fun_id args = do
- dflags <- getDynFlags
- fun_info <- getCgIdInfo fun_id
- self_loop_info <- getSelfLoop
- let fun_arg = StgVarArg fun_id
- fun_name = idName fun_id
- fun = idInfoToAmode fun_info
- lf_info = cg_lf fun_info
- n_args = length args
- v_args = length $ filter (isVoidTy . stgArgType) args
- node_points dflags = nodeMustPointToIt dflags lf_info
- case getCallMethod dflags fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
- -- A value in WHNF, so we can just return it.
- ReturnIt
- | isVoidTy (idType fun_id) -> emitReturn []
- | otherwise -> emitReturn [fun]
- -- ToDo: does ReturnIt guarantee tagged?
-
- EnterIt -> ASSERT( null args ) -- Discarding arguments
- emitEnter fun
-
- SlowCall -> do -- A slow function call via the RTS apply routines
- { tickySlowCall lf_info args
- ; emitComment $ mkFastString "slowCall"
- ; slowCall fun args }
-
- -- A direct function call (possibly with some left-over arguments)
- DirectEntry lbl arity -> do
- { tickyDirectCall arity args
- ; if node_points dflags
- then directCall NativeNodeCall lbl arity (fun_arg:args)
- else directCall NativeDirectCall lbl arity args }
-
- -- Let-no-escape call or self-recursive tail-call
- JumpToIt blk_id lne_regs -> do
- { adjustHpBackwards -- always do this before a tail-call
- ; cmm_args <- getNonVoidArgAmodes args
- ; emitMultiAssign lne_regs cmm_args
- ; emit (mkBranch blk_id)
- ; return AssignedDirectly }
-
--- Note [Self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Self-recursive tail calls can be optimized into a local jump in the same
--- way as let-no-escape bindings (see Note [What is a non-escaping let] in
--- stgSyn/CoreToStg.hs). Consider this:
---
--- foo.info:
--- a = R1 // calling convention
--- b = R2
--- goto L1
--- L1: ...
--- ...
--- ...
--- L2: R1 = x
--- R2 = y
--- call foo(R1,R2)
---
--- Instead of putting x and y into registers (or other locations required by the
--- calling convention) and performing a call we can put them into local
--- variables a and b and perform jump to L1:
---
--- foo.info:
--- a = R1
--- b = R2
--- goto L1
--- L1: ...
--- ...
--- ...
--- L2: a = x
--- b = y
--- goto L1
---
--- This can be done only when function is calling itself in a tail position
--- and only if the call passes number of parameters equal to function's arity.
--- Note that this cannot be performed if a function calls itself with a
--- continuation.
---
--- This in fact implements optimization known as "loopification". It was
--- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
--- by Krzysztof WoÅ›, though we use different approach. Krzysztof performed his
--- optimization at the Cmm level, whereas we perform ours during code generation
--- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
--- generated in the first place.
---
--- Implementation is spread across a couple of places in the code:
---
--- * FCode monad stores additional information in its reader environment
--- (cgd_self_loop field). This information tells us which function can
--- tail call itself in an optimized way (it is the function currently
--- being compiled), what is the label of a loop header (L1 in example above)
--- and information about local registers in which we should arguments
--- before making a call (this would be a and b in example above).
---
--- * Whenever we are compiling a function, we set that information to reflect
--- the fact that function currently being compiled can be jumped to, instead
--- of called. This is done in closureCodyBody in StgCmmBind.
---
--- * We also have to emit a label to which we will be jumping. We make sure
--- that the label is placed after a stack check but before the heap
--- check. The reason is that making a recursive tail-call does not increase
--- the stack so we only need to check once. But it may grow the heap, so we
--- have to repeat the heap check in every self-call. This is done in
--- do_checks in StgCmmHeap.
---
--- * When we begin compilation of another closure we remove the additional
--- information from the environment. This is done by forkClosureBody
--- in StgCmmMonad. Other functions that duplicate the environment -
--- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
--- words, we only need to clean the environment of the self-loop information
--- when compiling right hand side of a closure (binding).
---
--- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
--- of call will be generated. getCallMethod decides to generate a self
--- recursive tail call when (a) environment stores information about
--- possible self tail-call; (b) that tail call is to a function currently
--- being compiled; (c) number of passed non-void arguments is equal to
--- function's arity. (d) loopification is turned on via -floopification
--- command-line option.
---
--- * Command line option to turn loopification on and off is implemented in
--- DynFlags.
---
---
--- Note [Void arguments in self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- State# tokens can get in the way of the loopification optimization as seen in
--- #11372. Consider this:
---
--- foo :: [a]
--- -> (a -> State# s -> (# State s, Bool #))
--- -> State# s
--- -> (# State# s, Maybe a #)
--- foo [] f s = (# s, Nothing #)
--- foo (x:xs) f s = case f x s of
--- (# s', b #) -> case b of
--- True -> (# s', Just x #)
--- False -> foo xs f s'
---
--- We would like to compile the call to foo as a local jump instead of a call
--- (see Note [Self-recursive tail calls]). However, the generated function has
--- an arity of 2 while we apply it to 3 arguments, one of them being of void
--- type. Thus, we mustn't count arguments of void type when checking whether
--- we can turn a call into a self-recursive jump.
---
-
-emitEnter :: CmmExpr -> FCode ReturnKind
-emitEnter fun = do
- { dflags <- getDynFlags
- ; adjustHpBackwards
- ; sequel <- getSequel
- ; updfr_off <- getUpdFrameOff
- ; case sequel of
- -- For a return, we have the option of generating a tag-test or
- -- not. If the value is tagged, we can return directly, which
- -- is quicker than entering the value. This is a code
- -- size/speed trade-off: when optimising for speed rather than
- -- size we could generate the tag test.
- --
- -- Right now, we do what the old codegen did, and omit the tag
- -- test, just generating an enter.
- Return -> do
- { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
- ; emit $ mkJump dflags NativeNodeCall entry
- [cmmUntag dflags fun] updfr_off
- ; return AssignedDirectly
- }
-
- -- The result will be scrutinised in the sequel. This is where
- -- we generate a tag-test to avoid entering the closure if
- -- possible.
- --
- -- The generated code will be something like this:
- --
- -- R1 = fun -- copyout
- -- if (fun & 7 != 0) goto Lret else goto Lcall
- -- Lcall:
- -- call [fun] returns to Lret
- -- Lret:
- -- fun' = R1 -- copyin
- -- ...
- --
- -- Note in particular that the label Lret is used as a
- -- destination by both the tag-test and the call. This is
- -- because Lret will necessarily be a proc-point, and we want to
- -- ensure that we generate only one proc-point for this
- -- sequence.
- --
- -- Furthermore, we tell the caller that we generated a native
- -- return continuation by returning (ReturnedTo Lret off), so
- -- that the continuation can be reused by the heap-check failure
- -- code in the enclosing case expression.
- --
- AssignTo res_regs _ -> do
- { lret <- newBlockId
- ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
- ; lcall <- newBlockId
- ; updfr_off <- getUpdFrameOff
- ; let area = Young lret
- ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
- [fun] updfr_off []
- -- refer to fun via nodeReg after the copyout, to avoid having
- -- both live simultaneously; this sometimes enables fun to be
- -- inlined in the RHS of the R1 assignment.
- ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
- the_call = toCall entry (Just lret) updfr_off off outArgs regs
- ; tscope <- getTickScope
- ; emit $
- copyout <*>
- mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
- lret lcall Nothing <*>
- outOfLine lcall (the_call,tscope) <*>
- mkLabel lret tscope <*>
- copyin
- ; return (ReturnedTo lret off)
- }
- }
-
-------------------------------------------------------------------------
--- Ticks
-------------------------------------------------------------------------
-
--- | Generate Cmm code for a tick. Depending on the type of Tickish,
--- this will either generate actual Cmm instrumentation code, or
--- simply pass on the annotation as a @CmmTickish@.
-cgTick :: Tickish Id -> FCode ()
-cgTick tick
- = do { dflags <- getDynFlags
- ; case tick of
- ProfNote cc t p -> emitSetCCC cc t p
- HpcTick m n -> emit (mkTickBox dflags m n)
- SourceNote s n -> emitTick $ SourceNote s n
- _other -> return () -- ignore
- }
diff --git a/compiler/codeGen/StgCmmExtCode.hs b/compiler/codeGen/StgCmmExtCode.hs
deleted file mode 100644
index 1d35c3454e..0000000000
--- a/compiler/codeGen/StgCmmExtCode.hs
+++ /dev/null
@@ -1,252 +0,0 @@
-{-# LANGUAGE DeriveFunctor #-}
--- | Our extended FCode monad.
-
--- We add a mapping from names to CmmExpr, to support local variable names in
--- the concrete C-- code. The unique supply of the underlying FCode monad
--- is used to grab a new unique for each local variable.
-
--- In C--, a local variable can be declared anywhere within a proc,
--- and it scopes from the beginning of the proc to the end. Hence, we have
--- to collect declarations as we parse the proc, and feed the environment
--- back in circularly (to avoid a two-pass algorithm).
-
-module StgCmmExtCode (
- CmmParse, unEC,
- Named(..), Env,
-
- loopDecls,
- getEnv,
-
- withName,
- getName,
-
- newLocal,
- newLabel,
- newBlockId,
- newFunctionName,
- newImport,
- lookupLabel,
- lookupName,
-
- code,
- emit, emitLabel, emitAssign, emitStore,
- getCode, getCodeR, getCodeScoped,
- emitOutOfLine,
- withUpdFrameOff, getUpdFrameOff
-)
-
-where
-
-import GhcPrelude
-
-import qualified StgCmmMonad as F
-import StgCmmMonad (FCode, newUnique)
-
-import Cmm
-import CLabel
-import MkGraph
-
-import BlockId
-import DynFlags
-import FastString
-import Module
-import UniqFM
-import Unique
-import UniqSupply
-
-import Control.Monad (ap)
-
--- | The environment contains variable definitions or blockids.
-data Named
- = VarN CmmExpr -- ^ Holds CmmLit(CmmLabel ..) which gives the label type,
- -- eg, RtsLabel, ForeignLabel, CmmLabel etc.
-
- | FunN UnitId -- ^ A function name from this package
- | LabelN BlockId -- ^ A blockid of some code or data.
-
--- | An environment of named things.
-type Env = UniqFM Named
-
--- | Local declarations that are in scope during code generation.
-type Decls = [(FastString,Named)]
-
--- | Does a computation in the FCode monad, with a current environment
--- and a list of local declarations. Returns the resulting list of declarations.
-newtype CmmParse a
- = EC { unEC :: String -> Env -> Decls -> FCode (Decls, a) }
- deriving (Functor)
-
-type ExtCode = CmmParse ()
-
-returnExtFC :: a -> CmmParse a
-returnExtFC a = EC $ \_ _ s -> return (s, a)
-
-thenExtFC :: CmmParse a -> (a -> CmmParse b) -> CmmParse b
-thenExtFC (EC m) k = EC $ \c e s -> do (s',r) <- m c e s; unEC (k r) c e s'
-
-instance Applicative CmmParse where
- pure = returnExtFC
- (<*>) = ap
-
-instance Monad CmmParse where
- (>>=) = thenExtFC
-
-instance MonadUnique CmmParse where
- getUniqueSupplyM = code getUniqueSupplyM
- getUniqueM = EC $ \_ _ decls -> do
- u <- getUniqueM
- return (decls, u)
-
-instance HasDynFlags CmmParse where
- getDynFlags = EC (\_ _ d -> do dflags <- getDynFlags
- return (d, dflags))
-
-
--- | Takes the variable decarations and imports from the monad
--- and makes an environment, which is looped back into the computation.
--- In this way, we can have embedded declarations that scope over the whole
--- procedure, and imports that scope over the entire module.
--- Discards the local declaration contained within decl'
---
-loopDecls :: CmmParse a -> CmmParse a
-loopDecls (EC fcode) =
- EC $ \c e globalDecls -> do
- (_, a) <- F.fixC $ \ ~(decls, _) ->
- fcode c (addListToUFM e decls) globalDecls
- return (globalDecls, a)
-
-
--- | Get the current environment from the monad.
-getEnv :: CmmParse Env
-getEnv = EC $ \_ e s -> return (s, e)
-
--- | Get the current context name from the monad
-getName :: CmmParse String
-getName = EC $ \c _ s -> return (s, c)
-
--- | Set context name for a sub-parse
-withName :: String -> CmmParse a -> CmmParse a
-withName c' (EC fcode) = EC $ \_ e s -> fcode c' e s
-
-addDecl :: FastString -> Named -> ExtCode
-addDecl name named = EC $ \_ _ s -> return ((name, named) : s, ())
-
-
--- | Add a new variable to the list of local declarations.
--- The CmmExpr says where the value is stored.
-addVarDecl :: FastString -> CmmExpr -> ExtCode
-addVarDecl var expr = addDecl var (VarN expr)
-
--- | Add a new label to the list of local declarations.
-addLabel :: FastString -> BlockId -> ExtCode
-addLabel name block_id = addDecl name (LabelN block_id)
-
-
--- | Create a fresh local variable of a given type.
-newLocal
- :: CmmType -- ^ data type
- -> FastString -- ^ name of variable
- -> CmmParse LocalReg -- ^ register holding the value
-
-newLocal ty name = do
- u <- code newUnique
- let reg = LocalReg u ty
- addVarDecl name (CmmReg (CmmLocal reg))
- return reg
-
-
--- | Allocate a fresh label.
-newLabel :: FastString -> CmmParse BlockId
-newLabel name = do
- u <- code newUnique
- addLabel name (mkBlockId u)
- return (mkBlockId u)
-
--- | Add add a local function to the environment.
-newFunctionName
- :: FastString -- ^ name of the function
- -> UnitId -- ^ package of the current module
- -> ExtCode
-
-newFunctionName name pkg = addDecl name (FunN pkg)
-
-
--- | Add an imported foreign label to the list of local declarations.
--- If this is done at the start of the module the declaration will scope
--- over the whole module.
-newImport
- :: (FastString, CLabel)
- -> CmmParse ()
-
-newImport (name, cmmLabel)
- = addVarDecl name (CmmLit (CmmLabel cmmLabel))
-
-
--- | Lookup the BlockId bound to the label with this name.
--- If one hasn't been bound yet, create a fresh one based on the
--- Unique of the name.
-lookupLabel :: FastString -> CmmParse BlockId
-lookupLabel name = do
- env <- getEnv
- return $
- case lookupUFM env name of
- Just (LabelN l) -> l
- _other -> mkBlockId (newTagUnique (getUnique name) 'L')
-
-
--- | Lookup the location of a named variable.
--- Unknown names are treated as if they had been 'import'ed from the runtime system.
--- This saves us a lot of bother in the RTS sources, at the expense of
--- deferring some errors to link time.
-lookupName :: FastString -> CmmParse CmmExpr
-lookupName name = do
- env <- getEnv
- return $
- case lookupUFM env name of
- Just (VarN e) -> e
- Just (FunN pkg) -> CmmLit (CmmLabel (mkCmmCodeLabel pkg name))
- _other -> CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId name))
-
-
--- | Lift an FCode computation into the CmmParse monad
-code :: FCode a -> CmmParse a
-code fc = EC $ \_ _ s -> do
- r <- fc
- return (s, r)
-
-emit :: CmmAGraph -> CmmParse ()
-emit = code . F.emit
-
-emitLabel :: BlockId -> CmmParse ()
-emitLabel = code . F.emitLabel
-
-emitAssign :: CmmReg -> CmmExpr -> CmmParse ()
-emitAssign l r = code (F.emitAssign l r)
-
-emitStore :: CmmExpr -> CmmExpr -> CmmParse ()
-emitStore l r = code (F.emitStore l r)
-
-getCode :: CmmParse a -> CmmParse CmmAGraph
-getCode (EC ec) = EC $ \c e s -> do
- ((s',_), gr) <- F.getCodeR (ec c e s)
- return (s', gr)
-
-getCodeR :: CmmParse a -> CmmParse (a, CmmAGraph)
-getCodeR (EC ec) = EC $ \c e s -> do
- ((s', r), gr) <- F.getCodeR (ec c e s)
- return (s', (r,gr))
-
-getCodeScoped :: CmmParse a -> CmmParse (a, CmmAGraphScoped)
-getCodeScoped (EC ec) = EC $ \c e s -> do
- ((s', r), gr) <- F.getCodeScoped (ec c e s)
- return (s', (r,gr))
-
-emitOutOfLine :: BlockId -> CmmAGraphScoped -> CmmParse ()
-emitOutOfLine l g = code (F.emitOutOfLine l g)
-
-withUpdFrameOff :: UpdFrameOffset -> CmmParse () -> CmmParse ()
-withUpdFrameOff size inner
- = EC $ \c e s -> F.withUpdFrameOff size $ (unEC inner) c e s
-
-getUpdFrameOff :: CmmParse UpdFrameOffset
-getUpdFrameOff = code $ F.getUpdFrameOff
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
deleted file mode 100644
index 172dcba219..0000000000
--- a/compiler/codeGen/StgCmmForeign.hs
+++ /dev/null
@@ -1,627 +0,0 @@
------------------------------------------------------------------------------
---
--- Code generation for foreign calls.
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmForeign (
- cgForeignCall,
- emitPrimCall, emitCCall,
- emitForeignCall, -- For CmmParse
- emitSaveThreadState,
- saveThreadState,
- emitLoadThreadState,
- loadThreadState,
- emitOpenNursery,
- emitCloseNursery,
- ) where
-
-import GhcPrelude hiding( succ, (<*>) )
-
-import StgSyn
-import StgCmmProf (storeCurCCS, ccsType)
-import StgCmmEnv
-import StgCmmMonad
-import StgCmmUtils
-import StgCmmClosure
-import StgCmmLayout
-
-import BlockId (newBlockId)
-import Cmm
-import CmmUtils
-import MkGraph
-import Type
-import RepType
-import CLabel
-import SMRep
-import ForeignCall
-import DynFlags
-import Maybes
-import Outputable
-import UniqSupply
-import BasicTypes
-
-import TyCoRep
-import TysPrim
-import Util (zipEqual)
-
-import Control.Monad
-
------------------------------------------------------------------------------
--- Code generation for Foreign Calls
------------------------------------------------------------------------------
-
--- | Emit code for a foreign call, and return the results to the sequel.
--- Precondition: the length of the arguments list is the same as the
--- arity of the foreign function.
-cgForeignCall :: ForeignCall -- the op
- -> Type -- type of foreign function
- -> [StgArg] -- x,y arguments
- -> Type -- result type
- -> FCode ReturnKind
-
-cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
- = do { dflags <- getDynFlags
- ; let -- in the stdcall calling convention, the symbol needs @size appended
- -- to it, where size is the total number of bytes of arguments. We
- -- attach this info to the CLabel here, and the CLabel pretty printer
- -- will generate the suffix when the label is printed.
- call_size args
- | StdCallConv <- cconv = Just (sum (map arg_size args))
- | otherwise = Nothing
-
- -- ToDo: this might not be correct for 64-bit API
- arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
- (wORD_SIZE dflags)
- ; cmm_args <- getFCallArgs stg_args typ
- ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
- ; let ((call_args, arg_hints), cmm_target)
- = case target of
- StaticTarget _ _ _ False ->
- panic "cgForeignCall: unexpected FFI value import"
- StaticTarget _ lbl mPkgId True
- -> let labelSource
- = case mPkgId of
- Nothing -> ForeignLabelInThisPackage
- Just pkgId -> ForeignLabelInPackage pkgId
- size = call_size cmm_args
- in ( unzip cmm_args
- , CmmLit (CmmLabel
- (mkForeignLabel lbl size labelSource IsFunction)))
-
- DynamicTarget -> case cmm_args of
- (fn,_):rest -> (unzip rest, fn)
- [] -> panic "cgForeignCall []"
- fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
- call_target = ForeignTarget cmm_target fc
-
- -- we want to emit code for the call, and then emitReturn.
- -- However, if the sequel is AssignTo, we shortcut a little
- -- and generate a foreign call that assigns the results
- -- directly. Otherwise we end up generating a bunch of
- -- useless "r = r" assignments, which are not merely annoying:
- -- they prevent the common block elimination from working correctly
- -- in the case of a safe foreign call.
- -- See Note [safe foreign call convention]
- --
- ; sequel <- getSequel
- ; case sequel of
- AssignTo assign_to_these _ ->
- emitForeignCall safety assign_to_these call_target call_args
-
- _something_else ->
- do { _ <- emitForeignCall safety res_regs call_target call_args
- ; emitReturn (map (CmmReg . CmmLocal) res_regs)
- }
- }
-
-{- Note [safe foreign call convention]
-
-The simple thing to do for a safe foreign call would be the same as an
-unsafe one: just
-
- emitForeignCall ...
- emitReturn ...
-
-but consider what happens in this case
-
- case foo x y z of
- (# s, r #) -> ...
-
-The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
-as the result reg, and we generate
-
- r = foo(x,y,z) returns to L1 -- emitForeignCall
- L1:
- r = r -- emitReturn
- goto L2
-L2:
- ...
-
-Now L1 is a proc point (by definition, it is the continuation of the
-safe foreign call). If L2 does a heap check, then L2 will also be a
-proc point.
-
-Furthermore, the stack layout algorithm has to arrange to save r
-somewhere between the call and the jump to L1, which is annoying: we
-would have to treat r differently from the other live variables, which
-have to be saved *before* the call.
-
-So we adopt a special convention for safe foreign calls: the results
-are copied out according to the NativeReturn convention by the call,
-and the continuation of the call should copyIn the results. (The
-copyOut code is actually inserted when the safe foreign call is
-lowered later). The result regs attached to the safe foreign call are
-only used temporarily to hold the results before they are copied out.
-
-We will now generate this:
-
- r = foo(x,y,z) returns to L1
- L1:
- r = R1 -- copyIn, inserted by mkSafeCall
- goto L2
- L2:
- ... r ...
-
-And when the safe foreign call is lowered later (see Note [lower safe
-foreign calls]) we get this:
-
- suspendThread()
- r = foo(x,y,z)
- resumeThread()
- R1 = r -- copyOut, inserted by lowerSafeForeignCall
- jump L1
- L1:
- r = R1 -- copyIn, inserted by mkSafeCall
- goto L2
- L2:
- ... r ...
-
-Now consider what happens if L2 does a heap check: the Adams
-optimisation kicks in and commons up L1 with the heap-check
-continuation, resulting in just one proc point instead of two. Yay!
--}
-
-
-emitCCall :: [(CmmFormal,ForeignHint)]
- -> CmmExpr
- -> [(CmmActual,ForeignHint)]
- -> FCode ()
-emitCCall hinted_results fn hinted_args
- = void $ emitForeignCall PlayRisky results target args
- where
- (args, arg_hints) = unzip hinted_args
- (results, result_hints) = unzip hinted_results
- target = ForeignTarget fn fc
- fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
-
-
-emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
-emitPrimCall res op args
- = void $ emitForeignCall PlayRisky res (PrimTarget op) args
-
--- alternative entry point, used by CmmParse
-emitForeignCall
- :: Safety
- -> [CmmFormal] -- where to put the results
- -> ForeignTarget -- the op
- -> [CmmActual] -- arguments
- -> FCode ReturnKind
-emitForeignCall safety results target args
- | not (playSafe safety) = do
- dflags <- getDynFlags
- let (caller_save, caller_load) = callerSaveVolatileRegs dflags
- emit caller_save
- target' <- load_target_into_temp target
- args' <- mapM maybe_assign_temp args
- emit $ mkUnsafeCall target' results args'
- emit caller_load
- return AssignedDirectly
-
- | otherwise = do
- dflags <- getDynFlags
- updfr_off <- getUpdFrameOff
- target' <- load_target_into_temp target
- args' <- mapM maybe_assign_temp args
- k <- newBlockId
- let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
- -- see Note [safe foreign call convention]
- tscope <- getTickScope
- emit $
- ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
- (CmmLit (CmmBlock k))
- <*> mkLast (CmmForeignCall { tgt = target'
- , res = results
- , args = args'
- , succ = k
- , ret_args = off
- , ret_off = updfr_off
- , intrbl = playInterruptible safety })
- <*> mkLabel k tscope
- <*> copyout
- )
- return (ReturnedTo k off)
-
-load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
-load_target_into_temp (ForeignTarget expr conv) = do
- tmp <- maybe_assign_temp expr
- return (ForeignTarget tmp conv)
-load_target_into_temp other_target@(PrimTarget _) =
- return other_target
-
--- What we want to do here is create a new temporary for the foreign
--- call argument if it is not safe to use the expression directly,
--- because the expression mentions caller-saves GlobalRegs (see
--- Note [Register Parameter Passing]).
---
--- However, we can't pattern-match on the expression here, because
--- this is used in a loop by CmmParse, and testing the expression
--- results in a black hole. So we always create a temporary, and rely
--- on CmmSink to clean it up later. (Yuck, ToDo). The generated code
--- ends up being the same, at least for the RTS .cmm code.
---
-maybe_assign_temp :: CmmExpr -> FCode CmmExpr
-maybe_assign_temp e = do
- dflags <- getDynFlags
- reg <- newTemp (cmmExprType dflags e)
- emitAssign (CmmLocal reg) e
- return (CmmReg (CmmLocal reg))
-
--- -----------------------------------------------------------------------------
--- Save/restore the thread state in the TSO
-
--- This stuff can't be done in suspendThread/resumeThread, because it
--- refers to global registers which aren't available in the C world.
-
-emitSaveThreadState :: FCode ()
-emitSaveThreadState = do
- dflags <- getDynFlags
- code <- saveThreadState dflags
- emit code
-
--- | Produce code to save the current thread state to @CurrentTSO@
-saveThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
-saveThreadState dflags = do
- tso <- newTemp (gcWord dflags)
- close_nursery <- closeNursery dflags tso
- pure $ catAGraphs [
- -- tso = CurrentTSO;
- mkAssign (CmmLocal tso) currentTSOExpr,
- -- tso->stackobj->sp = Sp;
- mkStore (cmmOffset dflags
- (CmmLoad (cmmOffset dflags
- (CmmReg (CmmLocal tso))
- (tso_stackobj dflags))
- (bWord dflags))
- (stack_SP dflags))
- spExpr,
- close_nursery,
- -- and save the current cost centre stack in the TSO when profiling:
- if gopt Opt_SccProfilingOn dflags then
- mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) cccsExpr
- else mkNop
- ]
-
-emitCloseNursery :: FCode ()
-emitCloseNursery = do
- dflags <- getDynFlags
- tso <- newTemp (bWord dflags)
- code <- closeNursery dflags tso
- emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
-
-{- |
-@closeNursery dflags tso@ produces code to close the nursery.
-A local register holding the value of @CurrentTSO@ is expected for
-efficiency.
-
-Closing the nursery corresponds to the following code:
-
-@
- tso = CurrentTSO;
- cn = CurrentNuresry;
-
- // Update the allocation limit for the current thread. We don't
- // check to see whether it has overflowed at this point, that check is
- // made when we run out of space in the current heap block (stg_gc_noregs)
- // and in the scheduler when context switching (schedulePostRunThread).
- tso->alloc_limit -= Hp + WDS(1) - cn->start;
-
- // Set cn->free to the next unoccupied word in the block
- cn->free = Hp + WDS(1);
-@
--}
-closeNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
-closeNursery df tso = do
- let tsoreg = CmmLocal tso
- cnreg <- CmmLocal <$> newTemp (bWord df)
- pure $ catAGraphs [
- mkAssign cnreg currentNurseryExpr,
-
- -- CurrentNursery->free = Hp+1;
- mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df hpExpr 1),
-
- let alloc =
- CmmMachOp (mo_wordSub df)
- [ cmmOffsetW df hpExpr 1
- , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
- ]
-
- alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
- in
-
- -- tso->alloc_limit += alloc
- mkStore alloc_limit (CmmMachOp (MO_Sub W64)
- [ CmmLoad alloc_limit b64
- , CmmMachOp (mo_WordTo64 df) [alloc] ])
- ]
-
-emitLoadThreadState :: FCode ()
-emitLoadThreadState = do
- dflags <- getDynFlags
- code <- loadThreadState dflags
- emit code
-
--- | Produce code to load the current thread state from @CurrentTSO@
-loadThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
-loadThreadState dflags = do
- tso <- newTemp (gcWord dflags)
- stack <- newTemp (gcWord dflags)
- open_nursery <- openNursery dflags tso
- pure $ catAGraphs [
- -- tso = CurrentTSO;
- mkAssign (CmmLocal tso) currentTSOExpr,
- -- stack = tso->stackobj;
- mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
- -- Sp = stack->sp;
- mkAssign spReg (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
- -- SpLim = stack->stack + RESERVED_STACK_WORDS;
- mkAssign spLimReg (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
- (rESERVED_STACK_WORDS dflags)),
- -- HpAlloc = 0;
- -- HpAlloc is assumed to be set to non-zero only by a failed
- -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
- mkAssign hpAllocReg (zeroExpr dflags),
- open_nursery,
- -- and load the current cost centre stack from the TSO when profiling:
- if gopt Opt_SccProfilingOn dflags
- then storeCurCCS
- (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
- (tso_CCCS dflags)) (ccsType dflags))
- else mkNop
- ]
-
-
-emitOpenNursery :: FCode ()
-emitOpenNursery = do
- dflags <- getDynFlags
- tso <- newTemp (bWord dflags)
- code <- openNursery dflags tso
- emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
-
-{- |
-@openNursery dflags tso@ produces code to open the nursery. A local register
-holding the value of @CurrentTSO@ is expected for efficiency.
-
-Opening the nursery corresponds to the following code:
-
-@
- tso = CurrentTSO;
- cn = CurrentNursery;
- bdfree = CurrentNursery->free;
- bdstart = CurrentNursery->start;
-
- // We *add* the currently occupied portion of the nursery block to
- // the allocation limit, because we will subtract it again in
- // closeNursery.
- tso->alloc_limit += bdfree - bdstart;
-
- // Set Hp to the last occupied word of the heap block. Why not the
- // next unocupied word? Doing it this way means that we get to use
- // an offset of zero more often, which might lead to slightly smaller
- // code on some architectures.
- Hp = bdfree - WDS(1);
-
- // Set HpLim to the end of the current nursery block (note that this block
- // might be a block group, consisting of several adjacent blocks.
- HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
-@
--}
-openNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
-openNursery df tso = do
- let tsoreg = CmmLocal tso
- cnreg <- CmmLocal <$> newTemp (bWord df)
- bdfreereg <- CmmLocal <$> newTemp (bWord df)
- bdstartreg <- CmmLocal <$> newTemp (bWord df)
-
- -- These assignments are carefully ordered to reduce register
- -- pressure and generate not completely awful code on x86. To see
- -- what code we generate, look at the assembly for
- -- stg_returnToStackTop in rts/StgStartup.cmm.
- pure $ catAGraphs [
- mkAssign cnreg currentNurseryExpr,
- mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
-
- -- Hp = CurrentNursery->free - 1;
- mkAssign hpReg (cmmOffsetW df (CmmReg bdfreereg) (-1)),
-
- mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
-
- -- HpLim = CurrentNursery->start +
- -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
- mkAssign hpLimReg
- (cmmOffsetExpr df
- (CmmReg bdstartreg)
- (cmmOffset df
- (CmmMachOp (mo_wordMul df) [
- CmmMachOp (MO_SS_Conv W32 (wordWidth df))
- [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
- mkIntExpr df (bLOCK_SIZE df)
- ])
- (-1)
- )
- ),
-
- -- alloc = bd->free - bd->start
- let alloc =
- CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
-
- alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
- in
-
- -- tso->alloc_limit += alloc
- mkStore alloc_limit (CmmMachOp (MO_Add W64)
- [ CmmLoad alloc_limit b64
- , CmmMachOp (mo_WordTo64 df) [alloc] ])
-
- ]
-
-nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
- :: DynFlags -> CmmReg -> CmmExpr
-nursery_bdescr_free dflags cn =
- cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
-nursery_bdescr_start dflags cn =
- cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
-nursery_bdescr_blocks dflags cn =
- cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
-
-tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
-tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
-tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
-tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
-stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
-stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
-
-
-closureField :: DynFlags -> ByteOff -> ByteOff
-closureField dflags off = off + fixedHdrSize dflags
-
--- Note [Unlifted boxed arguments to foreign calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- For certain types passed to foreign calls, we adjust the actual
--- value passed to the call. For ByteArray#, Array#, SmallArray#,
--- and ArrayArray#, we pass the address of the array's payload, not
--- the address of the heap object. For example, consider
--- foreign import "c_foo" foo :: ByteArray# -> Int# -> IO ()
--- At a Haskell call like `foo x y`, we'll generate a C call that
--- is more like
--- c_foo( x+8, y )
--- where the "+8" takes the heap pointer (x :: ByteArray#) and moves
--- it past the header words of the ByteArray object to point directly
--- to the data inside the ByteArray#. (The exact offset depends
--- on the target architecture and on profiling) By contrast, (y :: Int#)
--- requires no such adjustment.
---
--- This adjustment is performed by 'add_shim'. The size of the
--- adjustment depends on the type of heap object. But
--- how can we determine that type? There are two available options.
--- We could use the types of the actual values that the foreign call
--- has been applied to, or we could use the types present in the
--- foreign function's type. Prior to GHC 8.10, we used the former
--- strategy since it's a little more simple. However, in issue #16650
--- and more compellingly in the comments of
--- https://gitlab.haskell.org/ghc/ghc/merge_requests/939, it was
--- demonstrated that this leads to bad behavior in the presence
--- of unsafeCoerce#. Returning to the above example, suppose the
--- Haskell call looked like
--- foo (unsafeCoerce# p)
--- where the types of expressions comprising the arguments are
--- p :: (Any :: TYPE 'UnliftedRep)
--- i :: Int#
--- so that the unsafe-coerce is between Any and ByteArray#.
--- These two types have the same kind (they are both represented by
--- a heap pointer) so no GC errors will occur if we do this unsafe coerce.
--- By the time this gets to the code generator the cast has been
--- discarded so we have
--- foo p y
--- But we *must* adjust the pointer to p by a ByteArray# shim,
--- *not* by an Any shim (the Any shim involves no offset at all).
---
--- To avoid this bad behavior, we adopt the second strategy: use
--- the types present in the foreign function's type.
--- In collectStgFArgTypes, we convert the foreign function's
--- type to a list of StgFArgType. Then, in add_shim, we interpret
--- these as numeric offsets.
-
-getFCallArgs ::
- [StgArg]
- -> Type -- the type of the foreign function
- -> FCode [(CmmExpr, ForeignHint)]
--- (a) Drop void args
--- (b) Add foreign-call shim code
--- It's (b) that makes this differ from getNonVoidArgAmodes
--- Precondition: args and typs have the same length
--- See Note [Unlifted boxed arguments to foreign calls]
-getFCallArgs args typ
- = do { mb_cmms <- mapM get (zipEqual "getFCallArgs" args (collectStgFArgTypes typ))
- ; return (catMaybes mb_cmms) }
- where
- get (arg,typ)
- | null arg_reps
- = return Nothing
- | otherwise
- = do { cmm <- getArgAmode (NonVoid arg)
- ; dflags <- getDynFlags
- ; return (Just (add_shim dflags typ cmm, hint)) }
- where
- arg_ty = stgArgType arg
- arg_reps = typePrimRep arg_ty
- hint = typeForeignHint arg_ty
-
--- The minimum amount of information needed to determine
--- the offset to apply to an argument to a foreign call.
--- See Note [Unlifted boxed arguments to foreign calls]
-data StgFArgType
- = StgPlainType
- | StgArrayType
- | StgSmallArrayType
- | StgByteArrayType
-
--- See Note [Unlifted boxed arguments to foreign calls]
-add_shim :: DynFlags -> StgFArgType -> CmmExpr -> CmmExpr
-add_shim dflags ty expr = case ty of
- StgPlainType -> expr
- StgArrayType -> cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
- StgSmallArrayType -> cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
- StgByteArrayType -> cmmOffsetB dflags expr (arrWordsHdrSize dflags)
-
--- From a function, extract information needed to determine
--- the offset of each argument when used as a C FFI argument.
--- See Note [Unlifted boxed arguments to foreign calls]
-collectStgFArgTypes :: Type -> [StgFArgType]
-collectStgFArgTypes = go []
- where
- -- Skip foralls
- go bs (ForAllTy _ res) = go bs res
- go bs (AppTy{}) = reverse bs
- go bs (TyConApp{}) = reverse bs
- go bs (LitTy{}) = reverse bs
- go bs (TyVarTy{}) = reverse bs
- go _ (CastTy{}) = panic "myCollectTypeArgs: CastTy"
- go _ (CoercionTy{}) = panic "myCollectTypeArgs: CoercionTy"
- go bs (FunTy {ft_arg = arg, ft_res=res}) =
- go (typeToStgFArgType arg:bs) res
-
--- Choose the offset based on the type. For anything other
--- than an unlifted boxed type, there is no offset.
--- See Note [Unlifted boxed arguments to foreign calls]
-typeToStgFArgType :: Type -> StgFArgType
-typeToStgFArgType typ
- | tycon == arrayPrimTyCon = StgArrayType
- | tycon == mutableArrayPrimTyCon = StgArrayType
- | tycon == arrayArrayPrimTyCon = StgArrayType
- | tycon == mutableArrayArrayPrimTyCon = StgArrayType
- | tycon == smallArrayPrimTyCon = StgSmallArrayType
- | tycon == smallMutableArrayPrimTyCon = StgSmallArrayType
- | tycon == byteArrayPrimTyCon = StgByteArrayType
- | tycon == mutableByteArrayPrimTyCon = StgByteArrayType
- | otherwise = StgPlainType
- where
- -- Should be a tycon app, since this is a foreign call. We look
- -- through newtypes so the offset does not change if a user replaces
- -- a type in a foreign function signature with a representationally
- -- equivalent newtype.
- tycon = tyConAppTyCon (unwrapType typ)
-
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
deleted file mode 100644
index da9e85f1e7..0000000000
--- a/compiler/codeGen/StgCmmHeap.hs
+++ /dev/null
@@ -1,680 +0,0 @@
------------------------------------------------------------------------------
---
--- Stg to C--: heap management functions
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmHeap (
- getVirtHp, setVirtHp, setRealHp,
- getHpRelOffset,
-
- entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
- heapStackCheckGen,
- entryHeapCheck',
-
- mkStaticClosureFields, mkStaticClosure,
-
- allocDynClosure, allocDynClosureCmm, allocHeapClosure,
- emitSetDynHdr
- ) where
-
-import GhcPrelude hiding ((<*>))
-
-import StgSyn
-import CLabel
-import StgCmmLayout
-import StgCmmUtils
-import StgCmmMonad
-import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
-import StgCmmTicky
-import StgCmmClosure
-import StgCmmEnv
-
-import MkGraph
-
-import Hoopl.Label
-import SMRep
-import BlockId
-import Cmm
-import CmmUtils
-import CostCentre
-import IdInfo( CafInfo(..), mayHaveCafRefs )
-import Id ( Id )
-import Module
-import DynFlags
-import FastString( mkFastString, fsLit )
-import Panic( sorry )
-
-import Control.Monad (when)
-import Data.Maybe (isJust)
-
------------------------------------------------------------
--- Initialise dynamic heap objects
------------------------------------------------------------
-
-allocDynClosure
- :: Maybe Id
- -> CmmInfoTable
- -> LambdaFormInfo
- -> CmmExpr -- Cost Centre to stick in the object
- -> CmmExpr -- Cost Centre to blame for this alloc
- -- (usually the same; sometimes "OVERHEAD")
-
- -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
- -- ie Info ptr has offset zero.
- -- No void args in here
- -> FCode CmmExpr -- returns Hp+n
-
-allocDynClosureCmm
- :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
- -> [(CmmExpr, ByteOff)]
- -> FCode CmmExpr -- returns Hp+n
-
--- allocDynClosure allocates the thing in the heap,
--- and modifies the virtual Hp to account for this.
--- The second return value is the graph that sets the value of the
--- returned LocalReg, which should point to the closure after executing
--- the graph.
-
--- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
--- only valid until Hp is changed. The caller should assign the
--- result to a LocalReg if it is required to remain live.
---
--- The reason we don't assign it to a LocalReg here is that the caller
--- is often about to call regIdInfo, which immediately assigns the
--- result of allocDynClosure to a new temp in order to add the tag.
--- So by not generating a LocalReg here we avoid a common source of
--- new temporaries and save some compile time. This can be quite
--- significant - see test T4801.
-
-
-allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
- let (args, offsets) = unzip args_w_offsets
- cmm_args <- mapM getArgAmode args -- No void args
- allocDynClosureCmm mb_id info_tbl lf_info
- use_cc _blame_cc (zip cmm_args offsets)
-
-
-allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
- -- SAY WHAT WE ARE ABOUT TO DO
- let rep = cit_rep info_tbl
- tickyDynAlloc mb_id rep lf_info
- let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
- allocHeapClosure rep info_ptr use_cc amodes_w_offsets
-
-
--- | Low-level heap object allocation.
-allocHeapClosure
- :: SMRep -- ^ representation of the object
- -> CmmExpr -- ^ info pointer
- -> CmmExpr -- ^ cost centre
- -> [(CmmExpr,ByteOff)] -- ^ payload
- -> FCode CmmExpr -- ^ returns the address of the object
-allocHeapClosure rep info_ptr use_cc payload = do
- profDynAlloc rep use_cc
-
- virt_hp <- getVirtHp
-
- -- Find the offset of the info-ptr word
- let info_offset = virt_hp + 1
- -- info_offset is the VirtualHpOffset of the first
- -- word of the new object
- -- Remember, virtHp points to last allocated word,
- -- ie 1 *before* the info-ptr word of new object.
-
- base <- getHpRelOffset info_offset
- emitComment $ mkFastString "allocHeapClosure"
- emitSetDynHdr base info_ptr use_cc
-
- -- Fill in the fields
- hpStore base payload
-
- -- Bump the virtual heap pointer
- dflags <- getDynFlags
- setVirtHp (virt_hp + heapClosureSizeW dflags rep)
-
- return base
-
-
-emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-emitSetDynHdr base info_ptr ccs
- = do dflags <- getDynFlags
- hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
- where
- header :: DynFlags -> [CmmExpr]
- header dflags = [info_ptr] ++ dynProfHdr dflags ccs
- -- ToDo: Parallel stuff
- -- No ticky header
-
--- Store the item (expr,off) in base[off]
-hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
-hpStore base vals = do
- dflags <- getDynFlags
- sequence_ $
- [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
-
------------------------------------------------------------
--- Layout of static closures
------------------------------------------------------------
-
--- Make a static closure, adding on any extra padding needed for CAFs,
--- and adding a static link field if necessary.
-
-mkStaticClosureFields
- :: DynFlags
- -> CmmInfoTable
- -> CostCentreStack
- -> CafInfo
- -> [CmmLit] -- Payload
- -> [CmmLit] -- The full closure
-mkStaticClosureFields dflags info_tbl ccs caf_refs payload
- = mkStaticClosure dflags info_lbl ccs payload padding
- static_link_field saved_info_field
- where
- info_lbl = cit_lbl info_tbl
-
- -- CAFs must have consistent layout, regardless of whether they
- -- are actually updatable or not. The layout of a CAF is:
- --
- -- 3 saved_info
- -- 2 static_link
- -- 1 indirectee
- -- 0 info ptr
- --
- -- the static_link and saved_info fields must always be in the
- -- same place. So we use isThunkRep rather than closureUpdReqd
- -- here:
-
- is_caf = isThunkRep (cit_rep info_tbl)
-
- padding
- | is_caf && null payload = [mkIntCLit dflags 0]
- | otherwise = []
-
- static_link_field
- | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
- = [static_link_value]
- | otherwise
- = []
-
- saved_info_field
- | is_caf = [mkIntCLit dflags 0]
- | otherwise = []
-
- -- For a static constructor which has NoCafRefs, we set the
- -- static link field to a non-zero value so the garbage
- -- collector will ignore it.
- static_link_value
- | mayHaveCafRefs caf_refs = mkIntCLit dflags 0
- | otherwise = mkIntCLit dflags 3 -- No CAF refs
- -- See Note [STATIC_LINK fields]
- -- in rts/sm/Storage.h
-
-mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
- -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
-mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
- = [CmmLabel info_lbl]
- ++ staticProfHdr dflags ccs
- ++ payload
- ++ padding
- ++ static_link_field
- ++ saved_info_field
-
------------------------------------------------------------
--- Heap overflow checking
------------------------------------------------------------
-
-{- Note [Heap checks]
- ~~~~~~~~~~~~~~~~~~
-Heap checks come in various forms. We provide the following entry
-points to the runtime system, all of which use the native C-- entry
-convention.
-
- * gc() performs garbage collection and returns
- nothing to its caller
-
- * A series of canned entry points like
- r = gc_1p( r )
- where r is a pointer. This performs gc, and
- then returns its argument r to its caller.
-
- * A series of canned entry points like
- gcfun_2p( f, x, y )
- where f is a function closure of arity 2
- This performs garbage collection, keeping alive the
- three argument ptrs, and then tail-calls f(x,y)
-
-These are used in the following circumstances
-
-* entryHeapCheck: Function entry
- (a) With a canned GC entry sequence
- f( f_clo, x:ptr, y:ptr ) {
- Hp = Hp+8
- if Hp > HpLim goto L
- ...
- L: HpAlloc = 8
- jump gcfun_2p( f_clo, x, y ) }
- Note the tail call to the garbage collector;
- it should do no register shuffling
-
- (b) No canned sequence
- f( f_clo, x:ptr, y:ptr, ...etc... ) {
- T: Hp = Hp+8
- if Hp > HpLim goto L
- ...
- L: HpAlloc = 8
- call gc() -- Needs an info table
- goto T }
-
-* altHeapCheck: Immediately following an eval
- Started as
- case f x y of r { (p,q) -> rhs }
- (a) With a canned sequence for the results of f
- (which is the very common case since
- all boxed cases return just one pointer
- ...
- r = f( x, y )
- K: -- K needs an info table
- Hp = Hp+8
- if Hp > HpLim goto L
- ...code for rhs...
-
- L: r = gc_1p( r )
- goto K }
-
- Here, the info table needed by the call
- to gc_1p should be the *same* as the
- one for the call to f; the C-- optimiser
- spots this sharing opportunity)
-
- (b) No canned sequence for results of f
- Note second info table
- ...
- (r1,r2,r3) = call f( x, y )
- K:
- Hp = Hp+8
- if Hp > HpLim goto L
- ...code for rhs...
-
- L: call gc() -- Extra info table here
- goto K
-
-* generalHeapCheck: Anywhere else
- e.g. entry to thunk
- case branch *not* following eval,
- or let-no-escape
- Exactly the same as the previous case:
-
- K: -- K needs an info table
- Hp = Hp+8
- if Hp > HpLim goto L
- ...
-
- L: call gc()
- goto K
--}
-
---------------------------------------------------------------
--- A heap/stack check at a function or thunk entry point.
-
-entryHeapCheck :: ClosureInfo
- -> Maybe LocalReg -- Function (closure environment)
- -> Int -- Arity -- not same as len args b/c of voids
- -> [LocalReg] -- Non-void args (empty for thunk)
- -> FCode ()
- -> FCode ()
-
-entryHeapCheck cl_info nodeSet arity args code
- = entryHeapCheck' is_fastf node arity args code
- where
- node = case nodeSet of
- Just r -> CmmReg (CmmLocal r)
- Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
-
- is_fastf = case closureFunInfo cl_info of
- Just (_, ArgGen _) -> False
- _otherwise -> True
-
--- | lower-level version for CmmParse
-entryHeapCheck' :: Bool -- is a known function pattern
- -> CmmExpr -- expression for the closure pointer
- -> Int -- Arity -- not same as len args b/c of voids
- -> [LocalReg] -- Non-void args (empty for thunk)
- -> FCode ()
- -> FCode ()
-entryHeapCheck' is_fastf node arity args code
- = do dflags <- getDynFlags
- let is_thunk = arity == 0
-
- args' = map (CmmReg . CmmLocal) args
- stg_gc_fun = CmmReg (CmmGlobal GCFun)
- stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
-
- {- Thunks: jump stg_gc_enter_1
-
- Function (fast): call (NativeNode) stg_gc_fun(fun, args)
-
- Function (slow): call (slow) stg_gc_fun(fun, args)
- -}
- gc_call upd
- | is_thunk
- = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
-
- | is_fastf
- = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
-
- | otherwise
- = mkJump dflags Slow stg_gc_fun (node : args') upd
-
- updfr_sz <- getUpdFrameOff
-
- loop_id <- newBlockId
- emitLabel loop_id
- heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
-
--- ------------------------------------------------------------
--- A heap/stack check in a case alternative
-
-
--- If there are multiple alts and we need to GC, but don't have a
--- continuation already (the scrut was simple), then we should
--- pre-generate the continuation. (if there are multiple alts it is
--- always a canned GC point).
-
--- altHeapCheck:
--- If we have a return continuation,
--- then if it is a canned GC pattern,
--- then we do mkJumpReturnsTo
--- else we do a normal call to stg_gc_noregs
--- else if it is a canned GC pattern,
--- then generate the continuation and do mkCallReturnsTo
--- else we do a normal call to stg_gc_noregs
-
-altHeapCheck :: [LocalReg] -> FCode a -> FCode a
-altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
-
-altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
-altOrNoEscapeHeapCheck checkYield regs code = do
- dflags <- getDynFlags
- case cannedGCEntryPoint dflags regs of
- Nothing -> genericGC checkYield code
- Just gc -> do
- lret <- newBlockId
- let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
- lcont <- newBlockId
- tscope <- getTickScope
- emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
- emitLabel lcont
- cannedGCReturnsTo checkYield False gc regs lret off code
-
-altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
-altHeapCheckReturnsTo regs lret off code
- = do dflags <- getDynFlags
- case cannedGCEntryPoint dflags regs of
- Nothing -> genericGC False code
- Just gc -> cannedGCReturnsTo False True gc regs lret off code
-
--- noEscapeHeapCheck is implemented identically to altHeapCheck (which
--- is more efficient), but cannot be optimized away in the non-allocating
--- case because it may occur in a loop
-noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
-noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
-
-cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
- -> FCode a
- -> FCode a
-cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
- = do dflags <- getDynFlags
- updfr_sz <- getUpdFrameOff
- heapCheck False checkYield (gc_call dflags gc updfr_sz) code
- where
- reg_exprs = map (CmmReg . CmmLocal) regs
- -- Note [stg_gc arguments]
-
- -- NB. we use the NativeReturn convention for passing arguments
- -- to the canned heap-check routines, because we are in a case
- -- alternative and hence the [LocalReg] was passed to us in the
- -- NativeReturn convention.
- gc_call dflags label sp
- | cont_on_stack
- = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
- | otherwise
- = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
-
-genericGC :: Bool -> FCode a -> FCode a
-genericGC checkYield code
- = do updfr_sz <- getUpdFrameOff
- lretry <- newBlockId
- emitLabel lretry
- call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
- heapCheck False checkYield (call <*> mkBranch lretry) code
-
-cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
-cannedGCEntryPoint dflags regs
- = case map localRegType regs of
- [] -> Just (mkGcLabel "stg_gc_noregs")
- [ty]
- | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
- | isFloatType ty -> case width of
- W32 -> Just (mkGcLabel "stg_gc_f1")
- W64 -> Just (mkGcLabel "stg_gc_d1")
- _ -> Nothing
-
- | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
- | width == W64 -> Just (mkGcLabel "stg_gc_l1")
- | otherwise -> Nothing
- where
- width = typeWidth ty
- [ty1,ty2]
- | isGcPtrType ty1
- && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
- [ty1,ty2,ty3]
- | isGcPtrType ty1
- && isGcPtrType ty2
- && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
- [ty1,ty2,ty3,ty4]
- | isGcPtrType ty1
- && isGcPtrType ty2
- && isGcPtrType ty3
- && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
- _otherwise -> Nothing
-
--- Note [stg_gc arguments]
--- It might seem that we could avoid passing the arguments to the
--- stg_gc function, because they are already in the right registers.
--- While this is usually the case, it isn't always. Sometimes the
--- code generator has cleverly avoided the eval in a case, e.g. in
--- ffi/should_run/4221.hs we found
---
--- case a_r1mb of z
--- FunPtr x y -> ...
---
--- where a_r1mb is bound a top-level constructor, and is known to be
--- evaluated. The codegen just assigns x, y and z, and continues;
--- R1 is never assigned.
---
--- So we'll have to rely on optimisations to eliminatethese
--- assignments where possible.
-
-
--- | The generic GC procedure; no params, no results
-generic_gc :: CmmExpr
-generic_gc = mkGcLabel "stg_gc_noregs"
-
--- | Create a CLabel for calling a garbage collector entry point
-mkGcLabel :: String -> CmmExpr
-mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
-
--------------------------------
-heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
-heapCheck checkStack checkYield do_gc code
- = getHeapUsage $ \ hpHw ->
- -- Emit heap checks, but be sure to do it lazily so
- -- that the conditionals on hpHw don't cause a black hole
- do { dflags <- getDynFlags
- ; let mb_alloc_bytes
- | hpHw > mBLOCK_SIZE = sorry $ unlines
- [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
- "",
- "This is currently not possible due to a limitation of GHC's code generator.",
- "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.",
- "Suggestion: read data from a file instead of having large static data",
- "structures in code."]
- | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
- | otherwise = Nothing
- where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags
- stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
- | otherwise = Nothing
- ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
- ; tickyAllocHeap True hpHw
- ; setRealHp hpHw
- ; code }
-
-heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
-heapStackCheckGen stk_hwm mb_bytes
- = do updfr_sz <- getUpdFrameOff
- lretry <- newBlockId
- emitLabel lretry
- call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
- do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
-
--- Note [Single stack check]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~
--- When compiling a function we can determine how much stack space it
--- will use. We therefore need to perform only a single stack check at
--- the beginning of a function to see if we have enough stack space.
---
--- The check boils down to comparing Sp-N with SpLim, where N is the
--- amount of stack space needed (see Note [Stack usage] below). *BUT*
--- at this stage of the pipeline we are not supposed to refer to Sp
--- itself, because the stack is not yet manifest, so we don't quite
--- know where Sp pointing.
-
--- So instead of referring directly to Sp - as we used to do in the
--- past - the code generator uses (old + 0) in the stack check. That
--- is the address of the first word of the old area, so if we add N
--- we'll get the address of highest used word.
---
--- This makes the check robust. For example, while we need to perform
--- only one stack check for each function, we could in theory place
--- more stack checks later in the function. They would be redundant,
--- but not incorrect (in a sense that they should not change program
--- behaviour). We need to make sure however that a stack check
--- inserted after incrementing the stack pointer checks for a
--- respectively smaller stack space. This would not be the case if the
--- code generator produced direct references to Sp. By referencing
--- (old + 0) we make sure that we always check for a correct amount of
--- stack: when converting (old + 0) to Sp the stack layout phase takes
--- into account changes already made to stack pointer. The idea for
--- this change came from observations made while debugging #8275.
-
--- Note [Stack usage]
--- ~~~~~~~~~~~~~~~~~~
--- At the moment we convert from STG to Cmm we don't know N, the
--- number of bytes of stack that the function will use, so we use a
--- special late-bound CmmLit, namely
--- CmmHighStackMark
--- to stand for the number of bytes needed. When the stack is made
--- manifest, the number of bytes needed is calculated, and used to
--- replace occurrences of CmmHighStackMark
---
--- The (Maybe CmmExpr) passed to do_checks is usually
--- Just (CmmLit CmmHighStackMark)
--- but can also (in certain hand-written RTS functions)
--- Just (CmmLit 8) or some other fixed valuet
--- If it is Nothing, we don't generate a stack check at all.
-
-do_checks :: Maybe CmmExpr -- Should we check the stack?
- -- See Note [Stack usage]
- -> Bool -- Should we check for preemption?
- -> Maybe CmmExpr -- Heap headroom (bytes)
- -> CmmAGraph -- What to do on failure
- -> FCode ()
-do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
- dflags <- getDynFlags
- gc_id <- newBlockId
-
- let
- Just alloc_lit = mb_alloc_lit
-
- bump_hp = cmmOffsetExprB dflags hpExpr alloc_lit
-
- -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
- -- At the beginning of a function old + 0 = Sp
- -- See Note [Single stack check]
- sp_oflo sp_hwm =
- CmmMachOp (mo_wordULt dflags)
- [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
- [CmmStackSlot Old 0, sp_hwm],
- CmmReg spLimReg]
-
- -- Hp overflow if (Hp > HpLim)
- -- (Hp has been incremented by now)
- -- HpLim points to the LAST WORD of valid allocation space.
- hp_oflo = CmmMachOp (mo_wordUGt dflags) [hpExpr, hpLimExpr]
-
- alloc_n = mkAssign hpAllocReg alloc_lit
-
- case mb_stk_hwm of
- Nothing -> return ()
- Just stk_hwm -> tickyStackCheck
- >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
-
- -- Emit new label that might potentially be a header
- -- of a self-recursive tail call.
- -- See Note [Self-recursive loop header].
- self_loop_info <- getSelfLoop
- case self_loop_info of
- Just (_, loop_header_id, _)
- | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
- _otherwise -> return ()
-
- if (isJust mb_alloc_lit)
- then do
- tickyHeapCheck
- emitAssign hpReg bump_hp
- emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False)
- else do
- when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
- -- Yielding if HpLim == 0
- let yielding = CmmMachOp (mo_wordEq dflags)
- [CmmReg hpLimReg,
- CmmLit (zeroCLit dflags)]
- emit =<< mkCmmIfGoto' yielding gc_id (Just False)
-
- tscope <- getTickScope
- emitOutOfLine gc_id
- (do_gc, tscope) -- this is expected to jump back somewhere
-
- -- Test for stack pointer exhaustion, then
- -- bump heap pointer, and test for heap exhaustion
- -- Note that we don't move the heap pointer unless the
- -- stack check succeeds. Otherwise we might end up
- -- with slop at the end of the current block, which can
- -- confuse the LDV profiler.
-
--- Note [Self-recursive loop header]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Self-recursive loop header is required by loopification optimization (See
--- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if:
---
--- 1. There is information about self-loop in the FCode environment. We don't
--- check the binder (first component of the self_loop_info) because we are
--- certain that if the self-loop info is present then we are compiling the
--- binder body. Reason: the only possible way to get here with the
--- self_loop_info present is from closureCodeBody.
---
--- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
--- to preempt the heap check (see #367 for motivation behind this check). It
--- is True for heap checks placed at the entry to a function and
--- let-no-escape heap checks but false for other heap checks (eg. in case
--- alternatives or created from hand-written high-level Cmm). The second
--- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
--- function and some heap checks created in hand-written Cmm. Otherwise it
--- is Nothing. In other words the only situation when both conditions are
--- true is when compiling stack and heap checks at the entry to a
--- function. This is the only situation when we want to emit a self-loop
--- label.
diff --git a/compiler/codeGen/StgCmmHpc.hs b/compiler/codeGen/StgCmmHpc.hs
deleted file mode 100644
index 8e9676bd33..0000000000
--- a/compiler/codeGen/StgCmmHpc.hs
+++ /dev/null
@@ -1,48 +0,0 @@
------------------------------------------------------------------------------
---
--- Code generation for coverage
---
--- (c) Galois Connections, Inc. 2006
---
------------------------------------------------------------------------------
-
-module StgCmmHpc ( initHpc, mkTickBox ) where
-
-import GhcPrelude
-
-import StgCmmMonad
-
-import MkGraph
-import CmmExpr
-import CLabel
-import Module
-import CmmUtils
-import StgCmmUtils
-import HscTypes
-import DynFlags
-
-import Control.Monad
-
-mkTickBox :: DynFlags -> Module -> Int -> CmmAGraph
-mkTickBox dflags mod n
- = mkStore tick_box (CmmMachOp (MO_Add W64)
- [ CmmLoad tick_box b64
- , CmmLit (CmmInt 1 W64)
- ])
- where
- tick_box = cmmIndex dflags W64
- (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod)
- n
-
-initHpc :: Module -> HpcInfo -> FCode ()
--- Emit top-level tables for HPC and return code to initialise
-initHpc _ (NoHpcInfo {})
- = return ()
-initHpc this_mod (HpcInfo tickCount _hashNo)
- = do dflags <- getDynFlags
- when (gopt Opt_Hpc dflags) $
- do emitDataLits (mkHpcTicksLabel this_mod)
- [ (CmmInt 0 W64)
- | _ <- take tickCount [0 :: Int ..]
- ]
-
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
deleted file mode 100644
index 78a7cf3f85..0000000000
--- a/compiler/codeGen/StgCmmLayout.hs
+++ /dev/null
@@ -1,623 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Building info tables.
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmLayout (
- mkArgDescr,
- emitCall, emitReturn, adjustHpBackwards,
-
- emitClosureProcAndInfoTable,
- emitClosureAndInfoTable,
-
- slowCall, directCall,
-
- FieldOffOrPadding(..),
- ClosureHeader(..),
- mkVirtHeapOffsets,
- mkVirtHeapOffsetsWithPadding,
- mkVirtConstrOffsets,
- mkVirtConstrSizes,
- getHpRelOffset,
-
- ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
- ) where
-
-
-#include "HsVersions.h"
-
-import GhcPrelude hiding ((<*>))
-
-import StgCmmClosure
-import StgCmmEnv
-import StgCmmArgRep -- notably: ( slowCallPattern )
-import StgCmmTicky
-import StgCmmMonad
-import StgCmmUtils
-
-import MkGraph
-import SMRep
-import BlockId
-import Cmm
-import CmmUtils
-import CmmInfo
-import CLabel
-import StgSyn
-import Id
-import TyCon ( PrimRep(..), primRepSizeB )
-import BasicTypes ( RepArity )
-import DynFlags
-import Module
-
-import Util
-import Data.List
-import Outputable
-import FastString
-import Control.Monad
-
-------------------------------------------------------------------------
--- Call and return sequences
-------------------------------------------------------------------------
-
--- | Return multiple values to the sequel
---
--- If the sequel is @Return@
---
--- > return (x,y)
---
--- If the sequel is @AssignTo [p,q]@
---
--- > p=x; q=y;
---
-emitReturn :: [CmmExpr] -> FCode ReturnKind
-emitReturn results
- = do { dflags <- getDynFlags
- ; sequel <- getSequel
- ; updfr_off <- getUpdFrameOff
- ; case sequel of
- Return ->
- do { adjustHpBackwards
- ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
- ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
- }
- AssignTo regs adjust ->
- do { when adjust adjustHpBackwards
- ; emitMultiAssign regs results }
- ; return AssignedDirectly
- }
-
-
--- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
--- using the call/return convention @conv@, passing @args@, and
--- returning the results to the current sequel.
---
-emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
-emitCall convs fun args
- = emitCallWithExtraStack convs fun args noExtraStack
-
-
--- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
--- entry-code of @fun@, using the call/return convention @conv@,
--- passing @args@, pushing some extra stack frames described by
--- @stack@, and returning the results to the current sequel.
---
-emitCallWithExtraStack
- :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
- -> [CmmExpr] -> FCode ReturnKind
-emitCallWithExtraStack (callConv, retConv) fun args extra_stack
- = do { dflags <- getDynFlags
- ; adjustHpBackwards
- ; sequel <- getSequel
- ; updfr_off <- getUpdFrameOff
- ; case sequel of
- Return -> do
- emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
- return AssignedDirectly
- AssignTo res_regs _ -> do
- k <- newBlockId
- let area = Young k
- (off, _, copyin) = copyInOflow dflags retConv area res_regs []
- copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
- extra_stack
- tscope <- getTickScope
- emit (copyout <*> mkLabel k tscope <*> copyin)
- return (ReturnedTo k off)
- }
-
-
-adjustHpBackwards :: FCode ()
--- This function adjusts the heap pointer just before a tail call or
--- return. At a call or return, the virtual heap pointer may be less
--- than the real Hp, because the latter was advanced to deal with
--- the worst-case branch of the code, and we may be in a better-case
--- branch. In that case, move the real Hp *back* and retract some
--- ticky allocation count.
---
--- It *does not* deal with high-water-mark adjustment. That's done by
--- functions which allocate heap.
-adjustHpBackwards
- = do { hp_usg <- getHpUsage
- ; let rHp = realHp hp_usg
- vHp = virtHp hp_usg
- adjust_words = vHp -rHp
- ; new_hp <- getHpRelOffset vHp
-
- ; emit (if adjust_words == 0
- then mkNop
- else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
-
- ; tickyAllocHeap False adjust_words -- ...ditto
-
- ; setRealHp vHp
- }
-
-
--------------------------------------------------------------------------
--- Making calls: directCall and slowCall
--------------------------------------------------------------------------
-
--- General plan is:
--- - we'll make *one* fast call, either to the function itself
--- (directCall) or to stg_ap_<pat>_fast (slowCall)
--- Any left-over arguments will be pushed on the stack,
---
--- e.g. Sp[old+8] = arg1
--- Sp[old+16] = arg2
--- Sp[old+32] = stg_ap_pp_info
--- R2 = arg3
--- R3 = arg4
--- call f() return to Nothing updfr_off: 32
-
-
-directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
--- (directCall f n args)
--- calls f(arg1, ..., argn), and applies the result to the remaining args
--- The function f has arity n, and there are guaranteed at least n args
--- Both arity and args include void args
-directCall conv lbl arity stg_args
- = do { argreps <- getArgRepsAmodes stg_args
- ; direct_call "directCall" conv lbl arity argreps }
-
-
-slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
--- (slowCall fun args) applies fun to args, returning the results to Sequel
-slowCall fun stg_args
- = do dflags <- getDynFlags
- argsreps <- getArgRepsAmodes stg_args
- let (rts_fun, arity) = slowCallPattern (map fst argsreps)
-
- (r, slow_code) <- getCodeR $ do
- r <- direct_call "slow_call" NativeNodeCall
- (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
- emitComment $ mkFastString ("slow_call for " ++
- showSDoc dflags (ppr fun) ++
- " with pat " ++ unpackFS rts_fun)
- return r
-
- -- Note [avoid intermediate PAPs]
- let n_args = length stg_args
- if n_args > arity && optLevel dflags >= 2
- then do
- funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun
- fun_iptr <- (CmmReg . CmmLocal) `fmap`
- assignTemp (closureInfoPtr dflags (cmmUntag dflags funv))
-
- -- ToDo: we could do slightly better here by reusing the
- -- continuation from the slow call, which we have in r.
- -- Also we'd like to push the continuation on the stack
- -- before the branch, so that we only get one copy of the
- -- code that saves all the live variables across the
- -- call, but that might need some improvements to the
- -- special case in the stack layout code to handle this
- -- (see Note [diamond proc point]).
-
- fast_code <- getCode $
- emitCall (NativeNodeCall, NativeReturn)
- (entryCode dflags fun_iptr)
- (nonVArgs ((P,Just funv):argsreps))
-
- slow_lbl <- newBlockId
- fast_lbl <- newBlockId
- is_tagged_lbl <- newBlockId
- end_lbl <- newBlockId
-
- let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr)
- (mkIntExpr dflags n_args)
-
- tscope <- getTickScope
- emit (mkCbranch (cmmIsTagged dflags funv)
- is_tagged_lbl slow_lbl (Just True)
- <*> mkLabel is_tagged_lbl tscope
- <*> mkCbranch correct_arity fast_lbl slow_lbl (Just True)
- <*> mkLabel fast_lbl tscope
- <*> fast_code
- <*> mkBranch end_lbl
- <*> mkLabel slow_lbl tscope
- <*> slow_code
- <*> mkLabel end_lbl tscope)
- return r
-
- else do
- emit slow_code
- return r
-
-
--- Note [avoid intermediate PAPs]
---
--- A slow call which needs multiple generic apply patterns will be
--- almost guaranteed to create one or more intermediate PAPs when
--- applied to a function that takes the correct number of arguments.
--- We try to avoid this situation by generating code to test whether
--- we are calling a function with the correct number of arguments
--- first, i.e.:
---
--- if (TAG(f) != 0} { // f is not a thunk
--- if (f->info.arity == n) {
--- ... make a fast call to f ...
--- }
--- }
--- ... otherwise make the slow call ...
---
--- We *only* do this when the call requires multiple generic apply
--- functions, which requires pushing extra stack frames and probably
--- results in intermediate PAPs. (I say probably, because it might be
--- that we're over-applying a function, but that seems even less
--- likely).
---
--- This very rarely applies, but if it does happen in an inner loop it
--- can have a severe impact on performance (#6084).
-
-
---------------
-direct_call :: String
- -> Convention -- e.g. NativeNodeCall or NativeDirectCall
- -> CLabel -> RepArity
- -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
-direct_call caller call_conv lbl arity args
- | debugIsOn && args `lengthLessThan` real_arity -- Too few args
- = do -- Caller should ensure that there enough args!
- pprPanic "direct_call" $
- text caller <+> ppr arity <+>
- ppr lbl <+> ppr (length args) <+>
- ppr (map snd args) <+> ppr (map fst args)
-
- | null rest_args -- Precisely the right number of arguments
- = emitCall (call_conv, NativeReturn) target (nonVArgs args)
-
- | otherwise -- Note [over-saturated calls]
- = do dflags <- getDynFlags
- emitCallWithExtraStack (call_conv, NativeReturn)
- target
- (nonVArgs fast_args)
- (nonVArgs (stack_args dflags))
- where
- target = CmmLit (CmmLabel lbl)
- (fast_args, rest_args) = splitAt real_arity args
- stack_args dflags = slowArgs dflags rest_args
- real_arity = case call_conv of
- NativeNodeCall -> arity+1
- _ -> arity
-
-
--- When constructing calls, it is easier to keep the ArgReps and the
--- CmmExprs zipped together. However, a void argument has no
--- representation, so we need to use Maybe CmmExpr (the alternative of
--- using zeroCLit or even undefined would work, but would be ugly).
---
-getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
-getArgRepsAmodes = mapM getArgRepAmode
- where getArgRepAmode arg
- | V <- rep = return (V, Nothing)
- | otherwise = do expr <- getArgAmode (NonVoid arg)
- return (rep, Just expr)
- where rep = toArgRep (argPrimRep arg)
-
-nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
-nonVArgs [] = []
-nonVArgs ((_,Nothing) : args) = nonVArgs args
-nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
-
-{-
-Note [over-saturated calls]
-
-The natural thing to do for an over-saturated call would be to call
-the function with the correct number of arguments, and then apply the
-remaining arguments to the value returned, e.g.
-
- f a b c d (where f has arity 2)
- -->
- r = call f(a,b)
- call r(c,d)
-
-but this entails
- - saving c and d on the stack
- - making a continuation info table
- - at the continuation, loading c and d off the stack into regs
- - finally, call r
-
-Note that since there are a fixed number of different r's
-(e.g. stg_ap_pp_fast), we can also pre-compile continuations
-that correspond to each of them, rather than generating a fresh
-one for each over-saturated call.
-
-Not only does this generate much less code, it is faster too. We will
-generate something like:
-
-Sp[old+16] = c
-Sp[old+24] = d
-Sp[old+32] = stg_ap_pp_info
-call f(a,b) -- usual calling convention
-
-For the purposes of the CmmCall node, we count this extra stack as
-just more arguments that we are passing on the stack (cml_args).
--}
-
--- | 'slowArgs' takes a list of function arguments and prepares them for
--- pushing on the stack for "extra" arguments to a function which requires
--- fewer arguments than we currently have.
-slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
-slowArgs _ [] = []
-slowArgs dflags args -- careful: reps contains voids (V), but args does not
- | gopt Opt_SccProfilingOn dflags
- = save_cccs ++ this_pat ++ slowArgs dflags rest_args
- | otherwise = this_pat ++ slowArgs dflags rest_args
- where
- (arg_pat, n) = slowCallPattern (map fst args)
- (call_args, rest_args) = splitAt n args
-
- stg_ap_pat = mkCmmRetInfoLabel rtsUnitId arg_pat
- this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
- save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just cccsExpr)]
- save_cccs_lbl = mkCmmRetInfoLabel rtsUnitId (fsLit "stg_restore_cccs")
-
--------------------------------------------------------------------------
----- Laying out objects on the heap and stack
--------------------------------------------------------------------------
-
--- The heap always grows upwards, so hpRel is easy to compute
-hpRel :: VirtualHpOffset -- virtual offset of Hp
- -> VirtualHpOffset -- virtual offset of The Thing
- -> WordOff -- integer word offset
-hpRel hp off = off - hp
-
-getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
--- See Note [Virtual and real heap pointers] in StgCmmMonad
-getHpRelOffset virtual_offset
- = do dflags <- getDynFlags
- hp_usg <- getHpUsage
- return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
-
-data FieldOffOrPadding a
- = FieldOff (NonVoid a) -- Something that needs an offset.
- ByteOff -- Offset in bytes.
- | Padding ByteOff -- Length of padding in bytes.
- ByteOff -- Offset in bytes.
-
--- | Used to tell the various @mkVirtHeapOffsets@ functions what kind
--- of header the object has. This will be accounted for in the
--- offsets of the fields returned.
-data ClosureHeader
- = NoHeader
- | StdHeader
- | ThunkHeader
-
-mkVirtHeapOffsetsWithPadding
- :: DynFlags
- -> ClosureHeader -- What kind of header to account for
- -> [NonVoid (PrimRep, a)] -- Things to make offsets for
- -> ( WordOff -- Total number of words allocated
- , WordOff -- Number of words allocated for *pointers*
- , [FieldOffOrPadding a] -- Either an offset or padding.
- )
-
--- Things with their offsets from start of object in order of
--- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
--- First in list gets lowest offset, which is initial offset + 1.
---
--- mkVirtHeapOffsetsWithPadding always returns boxed things with smaller offsets
--- than the unboxed things
-
-mkVirtHeapOffsetsWithPadding dflags header things =
- ASSERT(not (any (isVoidRep . fst . fromNonVoid) things))
- ( tot_wds
- , bytesToWordsRoundUp dflags bytes_of_ptrs
- , concat (ptrs_w_offsets ++ non_ptrs_w_offsets) ++ final_pad
- )
- where
- hdr_words = case header of
- NoHeader -> 0
- StdHeader -> fixedHdrSizeW dflags
- ThunkHeader -> thunkHdrSize dflags
- hdr_bytes = wordsToBytes dflags hdr_words
-
- (ptrs, non_ptrs) = partition (isGcPtrRep . fst . fromNonVoid) things
-
- (bytes_of_ptrs, ptrs_w_offsets) =
- mapAccumL computeOffset 0 ptrs
- (tot_bytes, non_ptrs_w_offsets) =
- mapAccumL computeOffset bytes_of_ptrs non_ptrs
-
- tot_wds = bytesToWordsRoundUp dflags tot_bytes
-
- final_pad_size = tot_wds * word_size - tot_bytes
- final_pad
- | final_pad_size > 0 = [(Padding final_pad_size
- (hdr_bytes + tot_bytes))]
- | otherwise = []
-
- word_size = wORD_SIZE dflags
-
- computeOffset bytes_so_far nv_thing =
- (new_bytes_so_far, with_padding field_off)
- where
- (rep, thing) = fromNonVoid nv_thing
-
- -- Size of the field in bytes.
- !sizeB = primRepSizeB dflags rep
-
- -- Align the start offset (eg, 2-byte value should be 2-byte aligned).
- -- But not more than to a word.
- !align = min word_size sizeB
- !start = roundUpTo bytes_so_far align
- !padding = start - bytes_so_far
-
- -- Final offset is:
- -- size of header + bytes_so_far + padding
- !final_offset = hdr_bytes + bytes_so_far + padding
- !new_bytes_so_far = start + sizeB
- field_off = FieldOff (NonVoid thing) final_offset
-
- with_padding field_off
- | padding == 0 = [field_off]
- | otherwise = [ Padding padding (hdr_bytes + bytes_so_far)
- , field_off
- ]
-
-
-mkVirtHeapOffsets
- :: DynFlags
- -> ClosureHeader -- What kind of header to account for
- -> [NonVoid (PrimRep,a)] -- Things to make offsets for
- -> (WordOff, -- _Total_ number of words allocated
- WordOff, -- Number of words allocated for *pointers*
- [(NonVoid a, ByteOff)])
-mkVirtHeapOffsets dflags header things =
- ( tot_wds
- , ptr_wds
- , [ (field, offset) | (FieldOff field offset) <- things_offsets ]
- )
- where
- (tot_wds, ptr_wds, things_offsets) =
- mkVirtHeapOffsetsWithPadding dflags header things
-
--- | Just like mkVirtHeapOffsets, but for constructors
-mkVirtConstrOffsets
- :: DynFlags -> [NonVoid (PrimRep, a)]
- -> (WordOff, WordOff, [(NonVoid a, ByteOff)])
-mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags StdHeader
-
--- | Just like mkVirtConstrOffsets, but used when we don't have the actual
--- arguments. Useful when e.g. generating info tables; we just need to know
--- sizes of pointer and non-pointer fields.
-mkVirtConstrSizes :: DynFlags -> [NonVoid PrimRep] -> (WordOff, WordOff)
-mkVirtConstrSizes dflags field_reps
- = (tot_wds, ptr_wds)
- where
- (tot_wds, ptr_wds, _) =
- mkVirtConstrOffsets dflags
- (map (\nv_rep -> NonVoid (fromNonVoid nv_rep, ())) field_reps)
-
--------------------------------------------------------------------------
---
--- Making argument descriptors
---
--- An argument descriptor describes the layout of args on the stack,
--- both for * GC (stack-layout) purposes, and
--- * saving/restoring registers when a heap-check fails
---
--- Void arguments aren't important, therefore (contrast constructSlowCall)
---
--------------------------------------------------------------------------
-
--- bring in ARG_P, ARG_N, etc.
-#include "../includes/rts/storage/FunTypes.h"
-
-mkArgDescr :: DynFlags -> [Id] -> ArgDescr
-mkArgDescr dflags args
- = let arg_bits = argBits dflags arg_reps
- arg_reps = filter isNonV (map idArgRep args)
- -- Getting rid of voids eases matching of standard patterns
- in case stdPattern arg_reps of
- Just spec_id -> ArgSpec spec_id
- Nothing -> ArgGen arg_bits
-
-argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr
-argBits _ [] = []
-argBits dflags (P : args) = False : argBits dflags args
-argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
- ++ argBits dflags args
-
-----------------------
-stdPattern :: [ArgRep] -> Maybe Int
-stdPattern reps
- = case reps of
- [] -> Just ARG_NONE -- just void args, probably
- [N] -> Just ARG_N
- [P] -> Just ARG_P
- [F] -> Just ARG_F
- [D] -> Just ARG_D
- [L] -> Just ARG_L
- [V16] -> Just ARG_V16
- [V32] -> Just ARG_V32
- [V64] -> Just ARG_V64
-
- [N,N] -> Just ARG_NN
- [N,P] -> Just ARG_NP
- [P,N] -> Just ARG_PN
- [P,P] -> Just ARG_PP
-
- [N,N,N] -> Just ARG_NNN
- [N,N,P] -> Just ARG_NNP
- [N,P,N] -> Just ARG_NPN
- [N,P,P] -> Just ARG_NPP
- [P,N,N] -> Just ARG_PNN
- [P,N,P] -> Just ARG_PNP
- [P,P,N] -> Just ARG_PPN
- [P,P,P] -> Just ARG_PPP
-
- [P,P,P,P] -> Just ARG_PPPP
- [P,P,P,P,P] -> Just ARG_PPPPP
- [P,P,P,P,P,P] -> Just ARG_PPPPPP
-
- _ -> Nothing
-
--------------------------------------------------------------------------
---
--- Generating the info table and code for a closure
---
--------------------------------------------------------------------------
-
--- Here we make an info table of type 'CmmInfo'. The concrete
--- representation as a list of 'CmmAddr' is handled later
--- in the pipeline by 'cmmToRawCmm'.
--- When loading the free variables, a function closure pointer may be tagged,
--- so we must take it into account.
-
-emitClosureProcAndInfoTable :: Bool -- top-level?
- -> Id -- name of the closure
- -> LambdaFormInfo
- -> CmmInfoTable
- -> [NonVoid Id] -- incoming arguments
- -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
- -> FCode ()
-emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
- = do { dflags <- getDynFlags
- -- Bind the binder itself, but only if it's not a top-level
- -- binding. We need non-top let-bindings to refer to the
- -- top-level binding, which this binding would incorrectly shadow.
- ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
- else bindToReg (NonVoid bndr) lf_info
- ; let node_points = nodeMustPointToIt dflags lf_info
- ; arg_regs <- bindArgsToRegs args
- ; let args' = if node_points then (node : arg_regs) else arg_regs
- conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
- else NativeDirectCall
- (offset, _, _) = mkCallEntry dflags conv args' []
- ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
- }
-
--- Data constructors need closures, but not with all the argument handling
--- needed for functions. The shared part goes here.
-emitClosureAndInfoTable ::
- CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
-emitClosureAndInfoTable info_tbl conv args body
- = do { (_, blks) <- getCodeScoped body
- ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
- ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
- }
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
deleted file mode 100644
index d6f84c6a0a..0000000000
--- a/compiler/codeGen/StgCmmMonad.hs
+++ /dev/null
@@ -1,861 +0,0 @@
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE GADTs #-}
-
------------------------------------------------------------------------------
---
--- Monad for Stg to C-- code generation
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmMonad (
- FCode, -- type
-
- initC, runC, fixC,
- newUnique,
-
- emitLabel,
-
- emit, emitDecl,
- emitProcWithConvention, emitProcWithStackFrame,
- emitOutOfLine, emitAssign, emitStore,
- emitComment, emitTick, emitUnwind,
-
- getCmm, aGraphToGraph,
- getCodeR, getCode, getCodeScoped, getHeapUsage,
-
- mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto,
- mkCmmIfThenElse', mkCmmIfThen', mkCmmIfGoto',
-
- mkCall, mkCmmCall,
-
- forkClosureBody, forkLneBody, forkAlts, forkAltPair, codeOnly,
-
- ConTagZ,
-
- Sequel(..), ReturnKind(..),
- withSequel, getSequel,
-
- setTickyCtrLabel, getTickyCtrLabel,
- tickScope, getTickScope,
-
- withUpdFrameOff, getUpdFrameOff, initUpdFrameOff,
-
- HeapUsage(..), VirtualHpOffset, initHpUsage,
- getHpUsage, setHpUsage, heapHWM,
- setVirtHp, getVirtHp, setRealHp,
-
- getModuleName,
-
- -- ideally we wouldn't export these, but some other modules access internal state
- getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, getThisPackage,
-
- -- more localised access to monad state
- CgIdInfo(..),
- getBinds, setBinds,
-
- -- out of general friendliness, we also export ...
- CgInfoDownwards(..), CgState(..) -- non-abstract
- ) where
-
-import GhcPrelude hiding( sequence, succ )
-
-import Cmm
-import StgCmmClosure
-import DynFlags
-import Hoopl.Collections
-import MkGraph
-import BlockId
-import CLabel
-import SMRep
-import Module
-import Id
-import VarEnv
-import OrdList
-import BasicTypes( ConTagZ )
-import Unique
-import UniqSupply
-import FastString
-import Outputable
-import Util
-
-import Control.Monad
-import Data.List
-
-
-
---------------------------------------------------------
--- The FCode monad and its types
---
--- FCode is the monad plumbed through the Stg->Cmm code generator, and
--- the Cmm parser. It contains the following things:
---
--- - A writer monad, collecting:
--- - code for the current function, in the form of a CmmAGraph.
--- The function "emit" appends more code to this.
--- - the top-level CmmDecls accumulated so far
---
--- - A state monad with:
--- - the local bindings in scope
--- - the current heap usage
--- - a UniqSupply
---
--- - A reader monad, for CgInfoDownwards, containing
--- - DynFlags,
--- - the current Module
--- - the update-frame offset
--- - the ticky counter label
--- - the Sequel (the continuation to return to)
--- - the self-recursive tail call information
-
---------------------------------------------------------
-
-newtype FCode a = FCode { doFCode :: CgInfoDownwards -> CgState -> (a, CgState) }
- deriving (Functor)
-
-instance Applicative FCode where
- pure val = FCode (\_info_down state -> (val, state))
- {-# INLINE pure #-}
- (<*>) = ap
-
-instance Monad FCode where
- FCode m >>= k = FCode $
- \info_down state ->
- case m info_down state of
- (m_result, new_state) ->
- case k m_result of
- FCode kcode -> kcode info_down new_state
- {-# INLINE (>>=) #-}
-
-instance MonadUnique FCode where
- getUniqueSupplyM = cgs_uniqs <$> getState
- getUniqueM = FCode $ \_ st ->
- let (u, us') = takeUniqFromSupply (cgs_uniqs st)
- in (u, st { cgs_uniqs = us' })
-
-initC :: IO CgState
-initC = do { uniqs <- mkSplitUniqSupply 'c'
- ; return (initCgState uniqs) }
-
-runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState)
-runC dflags mod st fcode = doFCode fcode (initCgInfoDown dflags mod) st
-
-fixC :: (a -> FCode a) -> FCode a
-fixC fcode = FCode $
- \info_down state -> let (v, s) = doFCode (fcode v) info_down state
- in (v, s)
-
---------------------------------------------------------
--- The code generator environment
---------------------------------------------------------
-
--- This monadery has some information that it only passes
--- *downwards*, as well as some ``state'' which is modified
--- as we go along.
-
-data CgInfoDownwards -- information only passed *downwards* by the monad
- = MkCgInfoDown {
- cgd_dflags :: DynFlags,
- cgd_mod :: Module, -- Module being compiled
- cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame
- cgd_ticky :: CLabel, -- Current destination for ticky counts
- cgd_sequel :: Sequel, -- What to do at end of basic block
- cgd_self_loop :: Maybe SelfLoopInfo,-- Which tail calls can be compiled
- -- as local jumps? See Note
- -- [Self-recursive tail calls] in
- -- StgCmmExpr
- cgd_tick_scope:: CmmTickScope -- Tick scope for new blocks & ticks
- }
-
-type CgBindings = IdEnv CgIdInfo
-
-data CgIdInfo
- = CgIdInfo
- { cg_id :: Id -- Id that this is the info for
- , cg_lf :: LambdaFormInfo
- , cg_loc :: CgLoc -- CmmExpr for the *tagged* value
- }
-
-instance Outputable CgIdInfo where
- ppr (CgIdInfo { cg_id = id, cg_loc = loc })
- = ppr id <+> text "-->" <+> ppr loc
-
--- Sequel tells what to do with the result of this expression
-data Sequel
- = Return -- Return result(s) to continuation found on the stack.
-
- | AssignTo
- [LocalReg] -- Put result(s) in these regs and fall through
- -- NB: no void arguments here
- --
- Bool -- Should we adjust the heap pointer back to
- -- recover space that's unused on this path?
- -- We need to do this only if the expression
- -- may allocate (e.g. it's a foreign call or
- -- allocating primOp)
-
-instance Outputable Sequel where
- ppr Return = text "Return"
- ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b
-
--- See Note [sharing continuations] below
-data ReturnKind
- = AssignedDirectly
- | ReturnedTo BlockId ByteOff
-
--- Note [sharing continuations]
---
--- ReturnKind says how the expression being compiled returned its
--- results: either by assigning directly to the registers specified
--- by the Sequel, or by returning to a continuation that does the
--- assignments. The point of this is we might be able to re-use the
--- continuation in a subsequent heap-check. Consider:
---
--- case f x of z
--- True -> <True code>
--- False -> <False code>
---
--- Naively we would generate
---
--- R2 = x -- argument to f
--- Sp[young(L1)] = L1
--- call f returns to L1
--- L1:
--- z = R1
--- if (z & 1) then Ltrue else Lfalse
--- Ltrue:
--- Hp = Hp + 24
--- if (Hp > HpLim) then L4 else L7
--- L4:
--- HpAlloc = 24
--- goto L5
--- L5:
--- R1 = z
--- Sp[young(L6)] = L6
--- call stg_gc_unpt_r1 returns to L6
--- L6:
--- z = R1
--- goto L1
--- L7:
--- <True code>
--- Lfalse:
--- <False code>
---
--- We want the gc call in L4 to return to L1, and discard L6. Note
--- that not only can we share L1 and L6, but the assignment of the
--- return address in L4 is unnecessary because the return address for
--- L1 is already on the stack. We used to catch the sharing of L1 and
--- L6 in the common-block-eliminator, but not the unnecessary return
--- address assignment.
---
--- Since this case is so common I decided to make it more explicit and
--- robust by programming the sharing directly, rather than relying on
--- the common-block eliminator to catch it. This makes
--- common-block-elimination an optional optimisation, and furthermore
--- generates less code in the first place that we have to subsequently
--- clean up.
---
--- There are some rarer cases of common blocks that we don't catch
--- this way, but that's ok. Common-block-elimination is still available
--- to catch them when optimisation is enabled. Some examples are:
---
--- - when both the True and False branches do a heap check, we
--- can share the heap-check failure code L4a and maybe L4
---
--- - in a case-of-case, there might be multiple continuations that
--- we can common up.
---
--- It is always safe to use AssignedDirectly. Expressions that jump
--- to the continuation from multiple places (e.g. case expressions)
--- fall back to AssignedDirectly.
---
-
-
-initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards
-initCgInfoDown dflags mod
- = MkCgInfoDown { cgd_dflags = dflags
- , cgd_mod = mod
- , cgd_updfr_off = initUpdFrameOff dflags
- , cgd_ticky = mkTopTickyCtrLabel
- , cgd_sequel = initSequel
- , cgd_self_loop = Nothing
- , cgd_tick_scope= GlobalScope }
-
-initSequel :: Sequel
-initSequel = Return
-
-initUpdFrameOff :: DynFlags -> UpdFrameOffset
-initUpdFrameOff dflags = widthInBytes (wordWidth dflags) -- space for the RA
-
-
---------------------------------------------------------
--- The code generator state
---------------------------------------------------------
-
-data CgState
- = MkCgState {
- cgs_stmts :: CmmAGraph, -- Current procedure
-
- cgs_tops :: OrdList CmmDecl,
- -- Other procedures and data blocks in this compilation unit
- -- Both are ordered only so that we can
- -- reduce forward references, when it's easy to do so
-
- cgs_binds :: CgBindings,
-
- cgs_hp_usg :: HeapUsage,
-
- cgs_uniqs :: UniqSupply }
-
-data HeapUsage -- See Note [Virtual and real heap pointers]
- = HeapUsage {
- virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word
- -- Incremented whenever we allocate
- realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr
- -- Used in instruction addressing modes
- }
-
-type VirtualHpOffset = WordOff
-
-
-{- Note [Virtual and real heap pointers]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The code generator can allocate one or more objects contiguously, performing
-one heap check to cover allocation of all the objects at once. Let's call
-this little chunk of heap space an "allocation chunk". The code generator
-will emit code to
- * Perform a heap-exhaustion check
- * Move the heap pointer to the end of the allocation chunk
- * Allocate multiple objects within the chunk
-
-The code generator uses VirtualHpOffsets to address words within a
-single allocation chunk; these start at one and increase positively.
-The first word of the chunk has VirtualHpOffset=1, the second has
-VirtualHpOffset=2, and so on.
-
- * The field realHp tracks (the VirtualHpOffset) where the real Hp
- register is pointing. Typically it'll be pointing to the end of the
- allocation chunk.
-
- * The field virtHp gives the VirtualHpOffset of the highest-allocated
- word so far. It starts at zero (meaning no word has been allocated),
- and increases whenever an object is allocated.
-
-The difference between realHp and virtHp gives the offset from the
-real Hp register of a particular word in the allocation chunk. This
-is what getHpRelOffset does. Since the returned offset is relative
-to the real Hp register, it is valid only until you change the real
-Hp register. (Changing virtHp doesn't matter.)
--}
-
-
-initCgState :: UniqSupply -> CgState
-initCgState uniqs
- = MkCgState { cgs_stmts = mkNop
- , cgs_tops = nilOL
- , cgs_binds = emptyVarEnv
- , cgs_hp_usg = initHpUsage
- , cgs_uniqs = uniqs }
-
-stateIncUsage :: CgState -> CgState -> CgState
--- stateIncUsage@ e1 e2 incorporates in e1
--- the heap high water mark found in e2.
-stateIncUsage s1 s2@(MkCgState { cgs_hp_usg = hp_usg })
- = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg }
- `addCodeBlocksFrom` s2
-
-addCodeBlocksFrom :: CgState -> CgState -> CgState
--- Add code blocks from the latter to the former
--- (The cgs_stmts will often be empty, but not always; see codeOnly)
-s1 `addCodeBlocksFrom` s2
- = s1 { cgs_stmts = cgs_stmts s1 MkGraph.<*> cgs_stmts s2,
- cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 }
-
-
--- The heap high water mark is the larger of virtHp and hwHp. The latter is
--- only records the high water marks of forked-off branches, so to find the
--- heap high water mark you have to take the max of virtHp and hwHp. Remember,
--- virtHp never retreats!
---
--- Note Jan 04: ok, so why do we only look at the virtual Hp??
-
-heapHWM :: HeapUsage -> VirtualHpOffset
-heapHWM = virtHp
-
-initHpUsage :: HeapUsage
-initHpUsage = HeapUsage { virtHp = 0, realHp = 0 }
-
-maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage
-hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw }
-
---------------------------------------------------------
--- Operators for getting and setting the state and "info_down".
---------------------------------------------------------
-
-getState :: FCode CgState
-getState = FCode $ \_info_down state -> (state, state)
-
-setState :: CgState -> FCode ()
-setState state = FCode $ \_info_down _ -> ((), state)
-
-getHpUsage :: FCode HeapUsage
-getHpUsage = do
- state <- getState
- return $ cgs_hp_usg state
-
-setHpUsage :: HeapUsage -> FCode ()
-setHpUsage new_hp_usg = do
- state <- getState
- setState $ state {cgs_hp_usg = new_hp_usg}
-
-setVirtHp :: VirtualHpOffset -> FCode ()
-setVirtHp new_virtHp
- = do { hp_usage <- getHpUsage
- ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
-
-getVirtHp :: FCode VirtualHpOffset
-getVirtHp
- = do { hp_usage <- getHpUsage
- ; return (virtHp hp_usage) }
-
-setRealHp :: VirtualHpOffset -> FCode ()
-setRealHp new_realHp
- = do { hp_usage <- getHpUsage
- ; setHpUsage (hp_usage {realHp = new_realHp}) }
-
-getBinds :: FCode CgBindings
-getBinds = do
- state <- getState
- return $ cgs_binds state
-
-setBinds :: CgBindings -> FCode ()
-setBinds new_binds = do
- state <- getState
- setState $ state {cgs_binds = new_binds}
-
-withState :: FCode a -> CgState -> FCode (a,CgState)
-withState (FCode fcode) newstate = FCode $ \info_down state ->
- case fcode info_down newstate of
- (retval, state2) -> ((retval,state2), state)
-
-newUniqSupply :: FCode UniqSupply
-newUniqSupply = do
- state <- getState
- let (us1, us2) = splitUniqSupply (cgs_uniqs state)
- setState $ state { cgs_uniqs = us1 }
- return us2
-
-newUnique :: FCode Unique
-newUnique = do
- state <- getState
- let (u,us') = takeUniqFromSupply (cgs_uniqs state)
- setState $ state { cgs_uniqs = us' }
- return u
-
-------------------
-getInfoDown :: FCode CgInfoDownwards
-getInfoDown = FCode $ \info_down state -> (info_down,state)
-
-getSelfLoop :: FCode (Maybe SelfLoopInfo)
-getSelfLoop = do
- info_down <- getInfoDown
- return $ cgd_self_loop info_down
-
-withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a
-withSelfLoop self_loop code = do
- info_down <- getInfoDown
- withInfoDown code (info_down {cgd_self_loop = Just self_loop})
-
-instance HasDynFlags FCode where
- getDynFlags = liftM cgd_dflags getInfoDown
-
-getThisPackage :: FCode UnitId
-getThisPackage = liftM thisPackage getDynFlags
-
-withInfoDown :: FCode a -> CgInfoDownwards -> FCode a
-withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state
-
--- ----------------------------------------------------------------------------
--- Get the current module name
-
-getModuleName :: FCode Module
-getModuleName = do { info <- getInfoDown; return (cgd_mod info) }
-
--- ----------------------------------------------------------------------------
--- Get/set the end-of-block info
-
-withSequel :: Sequel -> FCode a -> FCode a
-withSequel sequel code
- = do { info <- getInfoDown
- ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) }
-
-getSequel :: FCode Sequel
-getSequel = do { info <- getInfoDown
- ; return (cgd_sequel info) }
-
--- ----------------------------------------------------------------------------
--- Get/set the size of the update frame
-
--- We keep track of the size of the update frame so that we
--- can set the stack pointer to the proper address on return
--- (or tail call) from the closure.
--- There should be at most one update frame for each closure.
--- Note: I'm including the size of the original return address
--- in the size of the update frame -- hence the default case on `get'.
-
-withUpdFrameOff :: UpdFrameOffset -> FCode a -> FCode a
-withUpdFrameOff size code
- = do { info <- getInfoDown
- ; withInfoDown code (info {cgd_updfr_off = size }) }
-
-getUpdFrameOff :: FCode UpdFrameOffset
-getUpdFrameOff
- = do { info <- getInfoDown
- ; return $ cgd_updfr_off info }
-
--- ----------------------------------------------------------------------------
--- Get/set the current ticky counter label
-
-getTickyCtrLabel :: FCode CLabel
-getTickyCtrLabel = do
- info <- getInfoDown
- return (cgd_ticky info)
-
-setTickyCtrLabel :: CLabel -> FCode a -> FCode a
-setTickyCtrLabel ticky code = do
- info <- getInfoDown
- withInfoDown code (info {cgd_ticky = ticky})
-
--- ----------------------------------------------------------------------------
--- Manage tick scopes
-
--- | The current tick scope. We will assign this to generated blocks.
-getTickScope :: FCode CmmTickScope
-getTickScope = do
- info <- getInfoDown
- return (cgd_tick_scope info)
-
--- | Places blocks generated by the given code into a fresh
--- (sub-)scope. This will make sure that Cmm annotations in our scope
--- will apply to the Cmm blocks generated therein - but not the other
--- way around.
-tickScope :: FCode a -> FCode a
-tickScope code = do
- info <- getInfoDown
- if debugLevel (cgd_dflags info) == 0 then code else do
- u <- newUnique
- let scope' = SubScope u (cgd_tick_scope info)
- withInfoDown code info{ cgd_tick_scope = scope' }
-
-
---------------------------------------------------------
--- Forking
---------------------------------------------------------
-
-forkClosureBody :: FCode () -> FCode ()
--- forkClosureBody compiles body_code in environment where:
--- - sequel, update stack frame and self loop info are
--- set to fresh values
--- - state is set to a fresh value, except for local bindings
--- that are passed in unchanged. It's up to the enclosed code to
--- re-bind the free variables to a field of the closure.
-
-forkClosureBody body_code
- = do { dflags <- getDynFlags
- ; info <- getInfoDown
- ; us <- newUniqSupply
- ; state <- getState
- ; let body_info_down = info { cgd_sequel = initSequel
- , cgd_updfr_off = initUpdFrameOff dflags
- , cgd_self_loop = Nothing }
- fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
- ((),fork_state_out) = doFCode body_code body_info_down fork_state_in
- ; setState $ state `addCodeBlocksFrom` fork_state_out }
-
-forkLneBody :: FCode a -> FCode a
--- 'forkLneBody' takes a body of let-no-escape binding and compiles
--- it in the *current* environment, returning the graph thus constructed.
---
--- The current environment is passed on completely unchanged to
--- the successor. In particular, any heap usage from the enclosed
--- code is discarded; it should deal with its own heap consumption.
-forkLneBody body_code
- = do { info_down <- getInfoDown
- ; us <- newUniqSupply
- ; state <- getState
- ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
- (result, fork_state_out) = doFCode body_code info_down fork_state_in
- ; setState $ state `addCodeBlocksFrom` fork_state_out
- ; return result }
-
-codeOnly :: FCode () -> FCode ()
--- Emit any code from the inner thing into the outer thing
--- Do not affect anything else in the outer state
--- Used in almost-circular code to prevent false loop dependencies
-codeOnly body_code
- = do { info_down <- getInfoDown
- ; us <- newUniqSupply
- ; state <- getState
- ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state
- , cgs_hp_usg = cgs_hp_usg state }
- ((), fork_state_out) = doFCode body_code info_down fork_state_in
- ; setState $ state `addCodeBlocksFrom` fork_state_out }
-
-forkAlts :: [FCode a] -> FCode [a]
--- (forkAlts' bs d) takes fcodes 'bs' for the branches of a 'case', and
--- an fcode for the default case 'd', and compiles each in the current
--- environment. The current environment is passed on unmodified, except
--- that the virtual Hp is moved on to the worst virtual Hp for the branches
-
-forkAlts branch_fcodes
- = do { info_down <- getInfoDown
- ; us <- newUniqSupply
- ; state <- getState
- ; let compile us branch
- = (us2, doFCode branch info_down branch_state)
- where
- (us1,us2) = splitUniqSupply us
- branch_state = (initCgState us1) {
- cgs_binds = cgs_binds state
- , cgs_hp_usg = cgs_hp_usg state }
- (_us, results) = mapAccumL compile us branch_fcodes
- (branch_results, branch_out_states) = unzip results
- ; setState $ foldl' stateIncUsage state branch_out_states
- -- NB foldl. state is the *left* argument to stateIncUsage
- ; return branch_results }
-
-forkAltPair :: FCode a -> FCode a -> FCode (a,a)
--- Most common use of 'forkAlts'; having this helper function avoids
--- accidental use of failible pattern-matches in @do@-notation
-forkAltPair x y = do
- xy' <- forkAlts [x,y]
- case xy' of
- [x',y'] -> return (x',y')
- _ -> panic "forkAltPair"
-
--- collect the code emitted by an FCode computation
-getCodeR :: FCode a -> FCode (a, CmmAGraph)
-getCodeR fcode
- = do { state1 <- getState
- ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop })
- ; setState $ state2 { cgs_stmts = cgs_stmts state1 }
- ; return (a, cgs_stmts state2) }
-
-getCode :: FCode a -> FCode CmmAGraph
-getCode fcode = do { (_,stmts) <- getCodeR fcode; return stmts }
-
--- | Generate code into a fresh tick (sub-)scope and gather generated code
-getCodeScoped :: FCode a -> FCode (a, CmmAGraphScoped)
-getCodeScoped fcode
- = do { state1 <- getState
- ; ((a, tscope), state2) <-
- tickScope $
- flip withState state1 { cgs_stmts = mkNop } $
- do { a <- fcode
- ; scp <- getTickScope
- ; return (a, scp) }
- ; setState $ state2 { cgs_stmts = cgs_stmts state1 }
- ; return (a, (cgs_stmts state2, tscope)) }
-
-
--- 'getHeapUsage' applies a function to the amount of heap that it uses.
--- It initialises the heap usage to zeros, and passes on an unchanged
--- heap usage.
---
--- It is usually a prelude to performing a GC check, so everything must
--- be in a tidy and consistent state.
---
--- Note the slightly subtle fixed point behaviour needed here
-
-getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a
-getHeapUsage fcode
- = do { info_down <- getInfoDown
- ; state <- getState
- ; let fstate_in = state { cgs_hp_usg = initHpUsage }
- (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in
- hp_hw = heapHWM (cgs_hp_usg fstate_out) -- Loop here!
-
- ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state }
- ; return r }
-
--- ----------------------------------------------------------------------------
--- Combinators for emitting code
-
-emitCgStmt :: CgStmt -> FCode ()
-emitCgStmt stmt
- = do { state <- getState
- ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt }
- }
-
-emitLabel :: BlockId -> FCode ()
-emitLabel id = do tscope <- getTickScope
- emitCgStmt (CgLabel id tscope)
-
-emitComment :: FastString -> FCode ()
-emitComment s
- | debugIsOn = emitCgStmt (CgStmt (CmmComment s))
- | otherwise = return ()
-
-emitTick :: CmmTickish -> FCode ()
-emitTick = emitCgStmt . CgStmt . CmmTick
-
-emitUnwind :: [(GlobalReg, Maybe CmmExpr)] -> FCode ()
-emitUnwind regs = do
- dflags <- getDynFlags
- when (debugLevel dflags > 0) $ do
- emitCgStmt $ CgStmt $ CmmUnwind regs
-
-emitAssign :: CmmReg -> CmmExpr -> FCode ()
-emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r))
-
-emitStore :: CmmExpr -> CmmExpr -> FCode ()
-emitStore l r = emitCgStmt (CgStmt (CmmStore l r))
-
-emit :: CmmAGraph -> FCode ()
-emit ag
- = do { state <- getState
- ; setState $ state { cgs_stmts = cgs_stmts state MkGraph.<*> ag } }
-
-emitDecl :: CmmDecl -> FCode ()
-emitDecl decl
- = do { state <- getState
- ; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } }
-
-emitOutOfLine :: BlockId -> CmmAGraphScoped -> FCode ()
-emitOutOfLine l (stmts, tscope) = emitCgStmt (CgFork l stmts tscope)
-
-emitProcWithStackFrame
- :: Convention -- entry convention
- -> Maybe CmmInfoTable -- info table?
- -> CLabel -- label for the proc
- -> [CmmFormal] -- stack frame
- -> [CmmFormal] -- arguments
- -> CmmAGraphScoped -- code
- -> Bool -- do stack layout?
- -> FCode ()
-
-emitProcWithStackFrame _conv mb_info lbl _stk_args [] blocks False
- = do { dflags <- getDynFlags
- ; emitProc mb_info lbl [] blocks (widthInBytes (wordWidth dflags)) False
- }
-emitProcWithStackFrame conv mb_info lbl stk_args args (graph, tscope) True
- -- do layout
- = do { dflags <- getDynFlags
- ; let (offset, live, entry) = mkCallEntry dflags conv args stk_args
- graph' = entry MkGraph.<*> graph
- ; emitProc mb_info lbl live (graph', tscope) offset True
- }
-emitProcWithStackFrame _ _ _ _ _ _ _ = panic "emitProcWithStackFrame"
-
-emitProcWithConvention :: Convention -> Maybe CmmInfoTable -> CLabel
- -> [CmmFormal]
- -> CmmAGraphScoped
- -> FCode ()
-emitProcWithConvention conv mb_info lbl args blocks
- = emitProcWithStackFrame conv mb_info lbl [] args blocks True
-
-emitProc :: Maybe CmmInfoTable -> CLabel -> [GlobalReg] -> CmmAGraphScoped
- -> Int -> Bool -> FCode ()
-emitProc mb_info lbl live blocks offset do_layout
- = do { dflags <- getDynFlags
- ; l <- newBlockId
- ; let
- blks :: CmmGraph
- blks = labelAGraph l blocks
-
- infos | Just info <- mb_info = mapSingleton (g_entry blks) info
- | otherwise = mapEmpty
-
- sinfo = StackInfo { arg_space = offset
- , updfr_space = Just (initUpdFrameOff dflags)
- , do_layout = do_layout }
-
- tinfo = TopInfo { info_tbls = infos
- , stack_info=sinfo}
-
- proc_block = CmmProc tinfo lbl live blks
-
- ; state <- getState
- ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } }
-
-getCmm :: FCode () -> FCode CmmGroup
--- Get all the CmmTops (there should be no stmts)
--- Return a single Cmm which may be split from other Cmms by
--- object splitting (at a later stage)
-getCmm code
- = do { state1 <- getState
- ; ((), state2) <- withState code (state1 { cgs_tops = nilOL })
- ; setState $ state2 { cgs_tops = cgs_tops state1 }
- ; return (fromOL (cgs_tops state2)) }
-
-
-mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph
-mkCmmIfThenElse e tbranch fbranch = mkCmmIfThenElse' e tbranch fbranch Nothing
-
-mkCmmIfThenElse' :: CmmExpr -> CmmAGraph -> CmmAGraph
- -> Maybe Bool -> FCode CmmAGraph
-mkCmmIfThenElse' e tbranch fbranch likely = do
- tscp <- getTickScope
- endif <- newBlockId
- tid <- newBlockId
- fid <- newBlockId
-
- let
- (test, then_, else_, likely') = case likely of
- Just False | Just e' <- maybeInvertCmmExpr e
- -- currently NCG doesn't know about likely
- -- annotations. We manually switch then and
- -- else branch so the likely false branch
- -- becomes a fallthrough.
- -> (e', fbranch, tbranch, Just True)
- _ -> (e, tbranch, fbranch, likely)
-
- return $ catAGraphs [ mkCbranch test tid fid likely'
- , mkLabel tid tscp, then_, mkBranch endif
- , mkLabel fid tscp, else_, mkLabel endif tscp ]
-
-mkCmmIfGoto :: CmmExpr -> BlockId -> FCode CmmAGraph
-mkCmmIfGoto e tid = mkCmmIfGoto' e tid Nothing
-
-mkCmmIfGoto' :: CmmExpr -> BlockId -> Maybe Bool -> FCode CmmAGraph
-mkCmmIfGoto' e tid l = do
- endif <- newBlockId
- tscp <- getTickScope
- return $ catAGraphs [ mkCbranch e tid endif l, mkLabel endif tscp ]
-
-mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph
-mkCmmIfThen e tbranch = mkCmmIfThen' e tbranch Nothing
-
-mkCmmIfThen' :: CmmExpr -> CmmAGraph -> Maybe Bool -> FCode CmmAGraph
-mkCmmIfThen' e tbranch l = do
- endif <- newBlockId
- tid <- newBlockId
- tscp <- getTickScope
- return $ catAGraphs [ mkCbranch e tid endif l
- , mkLabel tid tscp, tbranch, mkLabel endif tscp ]
-
-mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmExpr]
- -> UpdFrameOffset -> [CmmExpr] -> FCode CmmAGraph
-mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do
- dflags <- getDynFlags
- k <- newBlockId
- tscp <- getTickScope
- let area = Young k
- (off, _, copyin) = copyInOflow dflags retConv area results []
- copyout = mkCallReturnsTo dflags f callConv actuals k off updfr_off extra_stack
- return $ catAGraphs [copyout, mkLabel k tscp, copyin]
-
-mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmExpr] -> UpdFrameOffset
- -> FCode CmmAGraph
-mkCmmCall f results actuals updfr_off
- = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off []
-
-
--- ----------------------------------------------------------------------------
--- turn CmmAGraph into CmmGraph, for making a new proc.
-
-aGraphToGraph :: CmmAGraphScoped -> FCode CmmGraph
-aGraphToGraph stmts
- = do { l <- newBlockId
- ; return (labelAGraph l stmts) }
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
deleted file mode 100644
index 61d88feabb..0000000000
--- a/compiler/codeGen/StgCmmPrim.hs
+++ /dev/null
@@ -1,2622 +0,0 @@
-{-# LANGUAGE CPP #-}
--- emitPrimOp is quite large
-{-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-}
-
-----------------------------------------------------------------------------
---
--- Stg to C--: primitive operations
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmPrim (
- cgOpApp,
- cgPrimOp, -- internal(ish), used by cgCase to get code for a
- -- comparison without also turning it into a Bool.
- shouldInlinePrimOp
- ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude hiding ((<*>))
-
-import StgCmmLayout
-import StgCmmForeign
-import StgCmmEnv
-import StgCmmMonad
-import StgCmmUtils
-import StgCmmTicky
-import StgCmmHeap
-import StgCmmProf ( costCentreFrom )
-
-import DynFlags
-import GHC.Platform
-import BasicTypes
-import BlockId
-import MkGraph
-import StgSyn
-import Cmm
-import Type ( Type, tyConAppTyCon )
-import TyCon
-import CLabel
-import CmmUtils
-import PrimOp
-import SMRep
-import FastString
-import Outputable
-import Util
-import Data.Maybe
-
-import Data.Bits ((.&.), bit)
-import Control.Monad (liftM, when, unless)
-
-------------------------------------------------------------------------
--- Primitive operations and foreign calls
-------------------------------------------------------------------------
-
-{- Note [Foreign call results]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A foreign call always returns an unboxed tuple of results, one
-of which is the state token. This seems to happen even for pure
-calls.
-
-Even if we returned a single result for pure calls, it'd still be
-right to wrap it in a singleton unboxed tuple, because the result
-might be a Haskell closure pointer, we don't want to evaluate it. -}
-
-----------------------------------
-cgOpApp :: StgOp -- The op
- -> [StgArg] -- Arguments
- -> Type -- Result type (always an unboxed tuple)
- -> FCode ReturnKind
-
--- Foreign calls
-cgOpApp (StgFCallOp fcall ty) stg_args res_ty
- = cgForeignCall fcall ty stg_args res_ty
- -- Note [Foreign call results]
-
--- tagToEnum# is special: we need to pull the constructor
--- out of the table, and perform an appropriate return.
-
-cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
- = ASSERT(isEnumerationTyCon tycon)
- do { dflags <- getDynFlags
- ; args' <- getNonVoidArgAmodes [arg]
- ; let amode = case args' of [amode] -> amode
- _ -> panic "TagToEnumOp had void arg"
- ; emitReturn [tagToClosure dflags tycon amode] }
- where
- -- If you're reading this code in the attempt to figure
- -- out why the compiler panic'ed here, it is probably because
- -- you used tagToEnum# in a non-monomorphic setting, e.g.,
- -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
- -- That won't work.
- tycon = tyConAppTyCon res_ty
-
-cgOpApp (StgPrimOp primop) args res_ty = do
- dflags <- getDynFlags
- cmm_args <- getNonVoidArgAmodes args
- case shouldInlinePrimOp dflags primop cmm_args of
- Nothing -> do -- out-of-line
- let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
- emitCall (NativeNodeCall, NativeReturn) fun cmm_args
-
- Just f -- inline
- | ReturnsPrim VoidRep <- result_info
- -> do f []
- emitReturn []
-
- | ReturnsPrim rep <- result_info
- -> do dflags <- getDynFlags
- res <- newTemp (primRepCmmType dflags rep)
- f [res]
- emitReturn [CmmReg (CmmLocal res)]
-
- | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
- -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
- f regs
- emitReturn (map (CmmReg . CmmLocal) regs)
-
- | otherwise -> panic "cgPrimop"
- where
- result_info = getPrimOpResultInfo primop
-
-cgOpApp (StgPrimCallOp primcall) args _res_ty
- = do { cmm_args <- getNonVoidArgAmodes args
- ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
- ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
-
--- | Interpret the argument as an unsigned value, assuming the value
--- is given in two-complement form in the given width.
---
--- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
---
--- This function is used to work around the fact that many array
--- primops take Int# arguments, but we interpret them as unsigned
--- quantities in the code gen. This means that we have to be careful
--- every time we work on e.g. a CmmInt literal that corresponds to the
--- array size, as it might contain a negative Integer value if the
--- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
--- literal.
-asUnsigned :: Width -> Integer -> Integer
-asUnsigned w n = n .&. (bit (widthInBits w) - 1)
-
--- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
--- ByteOff (or some other fixed width signed type) to represent
--- array sizes or indices. This means that these will overflow for
--- large enough sizes.
-
--- | Decide whether an out-of-line primop should be replaced by an
--- inline implementation. This might happen e.g. if there's enough
--- static information, such as statically know arguments, to emit a
--- more efficient implementation inline.
---
--- Returns 'Nothing' if this primop should use its out-of-line
--- implementation (defined elsewhere) and 'Just' together with a code
--- generating function that takes the output regs as arguments
--- otherwise.
-shouldInlinePrimOp :: DynFlags
- -> PrimOp -- ^ The primop
- -> [CmmExpr] -- ^ The primop arguments
- -> Maybe ([LocalReg] -> FCode ())
-
-shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
- | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
-
-shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] ->
- doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
- [ (mkIntExpr dflags (fromInteger n),
- fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
- , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
- fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
- ]
- (fromInteger n) init
-
-shouldInlinePrimOp _ CopyArrayOp
- [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
- Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp _ CopyMutableArrayOp
- [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
- Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp _ CopyArrayArrayOp
- [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
- Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp _ CopyMutableArrayArrayOp
- [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
- Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] ->
- doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
- [ (mkIntExpr dflags (fromInteger n),
- fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
- ]
- (fromInteger n) init
-
-shouldInlinePrimOp _ CopySmallArrayOp
- [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
- Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp _ CopySmallMutableArrayOp
- [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
- Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
-
-shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
- | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
-
-shouldInlinePrimOp dflags primop args
- | primOpOutOfLine primop = Nothing
- | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
-
--- TODO: Several primops, such as 'copyArray#', only have an inline
--- implementation (below) but could possibly have both an inline
--- implementation and an out-of-line implementation, just like
--- 'newArray#'. This would lower the amount of code generated,
--- hopefully without a performance impact (needs to be measured).
-
----------------------------------------------------
-cgPrimOp :: [LocalReg] -- where to put the results
- -> PrimOp -- the op
- -> [StgArg] -- arguments
- -> FCode ()
-
-cgPrimOp results op args
- = do dflags <- getDynFlags
- arg_exprs <- getNonVoidArgAmodes args
- emitPrimOp dflags results op arg_exprs
-
-
-------------------------------------------------------------------------
--- Emitting code for a primop
-------------------------------------------------------------------------
-
-emitPrimOp :: DynFlags
- -> [LocalReg] -- where to put the results
- -> PrimOp -- the op
- -> [CmmExpr] -- arguments
- -> FCode ()
-
--- First we handle various awkward cases specially. The remaining
--- easy cases are then handled by translateOp, defined below.
-
-emitPrimOp _ [res] ParOp [arg]
- =
- -- for now, just implement this in a C function
- -- later, we might want to inline it.
- emitCCall
- [(res,NoHint)]
- (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
- [(baseExpr, AddrHint), (arg,AddrHint)]
-
-emitPrimOp dflags [res] SparkOp [arg]
- = do
- -- returns the value of arg in res. We're going to therefore
- -- refer to arg twice (once to pass to newSpark(), and once to
- -- assign to res), so put it in a temporary.
- tmp <- assignTemp arg
- tmp2 <- newTemp (bWord dflags)
- emitCCall
- [(tmp2,NoHint)]
- (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
- [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
- emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
-
-emitPrimOp dflags [res] GetCCSOfOp [arg]
- = emitAssign (CmmLocal res) val
- where
- val
- | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
- | otherwise = CmmLit (zeroCLit dflags)
-
-emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
- = emitAssign (CmmLocal res) cccsExpr
-
-emitPrimOp _ [res] MyThreadIdOp []
- = emitAssign (CmmLocal res) currentTSOExpr
-
-emitPrimOp dflags [res] ReadMutVarOp [mutv]
- = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
-
-emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
- = do -- Without this write barrier, other CPUs may see this pointer before
- -- the writes for the closure it points to have occurred.
- emitPrimCall res MO_WriteBarrier []
- emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
- emitCCall
- [{-no results-}]
- (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
- [(baseExpr, AddrHint), (mutv,AddrHint)]
-
--- #define sizzeofByteArrayzh(r,a) \
--- r = ((StgArrBytes *)(a))->bytes
-emitPrimOp dflags [res] SizeofByteArrayOp [arg]
- = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
-
--- #define sizzeofMutableByteArrayzh(r,a) \
--- r = ((StgArrBytes *)(a))->bytes
-emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
- = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
-
--- #define getSizzeofMutableByteArrayzh(r,a) \
--- r = ((StgArrBytes *)(a))->bytes
-emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
- = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
-
-
--- #define touchzh(o) /* nothing */
-emitPrimOp _ res@[] TouchOp args@[_arg]
- = do emitPrimCall res MO_Touch args
-
--- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
-emitPrimOp dflags [res] ByteArrayContents_Char [arg]
- = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
-
--- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
-emitPrimOp dflags [res] StableNameToIntOp [arg]
- = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
-
-emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
- = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
-
--- #define addrToHValuezh(r,a) r=(P_)a
-emitPrimOp _ [res] AddrToAnyOp [arg]
- = emitAssign (CmmLocal res) arg
-
--- #define hvalueToAddrzh(r, a) r=(W_)a
-emitPrimOp _ [res] AnyToAddrOp [arg]
- = emitAssign (CmmLocal res) arg
-
-{- Freezing arrays-of-ptrs requires changing an info table, for the
- benefit of the generational collector. It needs to scavenge mutable
- objects, even if they are in old space. When they become immutable,
- they can be removed from this scavenge list. -}
-
--- #define unsafeFreezzeArrayzh(r,a)
--- {
--- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
--- r = a;
--- }
-emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
- = emit $ catAGraphs
- [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
- mkAssign (CmmLocal res) arg ]
-emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
- = emit $ catAGraphs
- [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
- mkAssign (CmmLocal res) arg ]
-emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
- = emit $ catAGraphs
- [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
- mkAssign (CmmLocal res) arg ]
-
--- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
-emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
- = emitAssign (CmmLocal res) arg
-
--- Reading/writing pointer arrays
-
-emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
-emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
-emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
-
-emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
-emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
-emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
-emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
-emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
-emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
-emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
-emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
-emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
-emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
-
-emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
-emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
-emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
-
--- Getting the size of pointer arrays
-
-emitPrimOp dflags [res] SizeofArrayOp [arg]
- = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
- (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
- (bWord dflags))
-emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
- = emitPrimOp dflags [res] SizeofArrayOp [arg]
-emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
- = emitPrimOp dflags [res] SizeofArrayOp [arg]
-emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
- = emitPrimOp dflags [res] SizeofArrayOp [arg]
-
-emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
- emit $ mkAssign (CmmLocal res)
- (cmmLoadIndexW dflags arg
- (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
- (bWord dflags))
-emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
- emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
-
--- IndexXXXoffAddr
-
-emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
-emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
-emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
-emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
-emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
-emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
-emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-
--- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
-
-emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
-emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
-emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
-emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
-emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
-emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
-emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
-emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
-
--- IndexXXXArray
-
-emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
-emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
-emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
-emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
-emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
-emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-
--- ReadXXXArray, identical to IndexXXXArray.
-
-emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
-emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
-emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
-emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
-emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
-emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
-emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
-
--- IndexWord8ArrayAsXXX
-
-emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
-emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
-emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
-emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
-emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
-emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
-
--- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
-
-emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
-emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
-emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
-emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
-emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
-emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
-
--- WriteXXXoffAddr
-
-emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
-emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
-emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
-emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
-emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
-emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
-emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
-emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
-emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
-
--- WriteXXXArray
-
-emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
-emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
-emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
-emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
-emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
-emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
-emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
-emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
-
--- WriteInt8ArrayAsXXX
-
-emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
-emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
-emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
-emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
-emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args
-
--- Copying and setting byte arrays
-emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
- doCopyByteArrayOp src src_off dst dst_off n
-emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
- doCopyMutableByteArrayOp src src_off dst dst_off n
-emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
- doCopyByteArrayToAddrOp src src_off dst n
-emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
- doCopyMutableByteArrayToAddrOp src src_off dst n
-emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
- doCopyAddrToByteArrayOp src dst dst_off n
-emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
- doSetByteArrayOp ba off len c
-
--- Comparing byte arrays
-emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
- doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
-
-emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
-emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
-emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
-emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
-
-emitPrimOp _ [res] BRev8Op [w] = emitBRevCall res w W8
-emitPrimOp _ [res] BRev16Op [w] = emitBRevCall res w W16
-emitPrimOp _ [res] BRev32Op [w] = emitBRevCall res w W32
-emitPrimOp _ [res] BRev64Op [w] = emitBRevCall res w W64
-emitPrimOp dflags [res] BRevOp [w] = emitBRevCall res w (wordWidth dflags)
-
--- Population count
-emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
-emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
-emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
-emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
-emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
-
--- Parallel bit deposit
-emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8
-emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
-emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
-emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
-emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags)
-
--- Parallel bit extract
-emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8
-emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16
-emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32
-emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64
-emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags)
-
--- count leading zeros
-emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
-emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
-emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
-emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
-emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
-
--- count trailing zeros
-emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
-emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
-emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
-emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
-emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
-
--- Unsigned int to floating point conversions
-emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
- (MO_UF_Conv W32) [w]
-emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
- (MO_UF_Conv W64) [w]
-
--- SIMD primops
-emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
- checkVecCompatibility dflags vcat n w
- doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
- where
- zeros :: CmmExpr
- zeros = CmmLit $ CmmVec (replicate n zero)
-
- zero :: CmmLit
- zero = case vcat of
- IntVec -> CmmInt 0 w
- WordVec -> CmmInt 0 w
- FloatVec -> CmmFloat 0 w
-
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
- checkVecCompatibility dflags vcat n w
- when (es `lengthIsNot` n) $
- panic "emitPrimOp: VecPackOp has wrong number of arguments"
- doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
- where
- zeros :: CmmExpr
- zeros = CmmLit $ CmmVec (replicate n zero)
-
- zero :: CmmLit
- zero = case vcat of
- IntVec -> CmmInt 0 w
- WordVec -> CmmInt 0 w
- FloatVec -> CmmFloat 0 w
-
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
- checkVecCompatibility dflags vcat n w
- when (res `lengthIsNot` n) $
- panic "emitPrimOp: VecUnpackOp has wrong number of results"
- doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
- where
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
- checkVecCompatibility dflags vcat n w
- doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
- where
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doIndexByteArrayOp Nothing ty res args
- where
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doIndexByteArrayOp Nothing ty res args
- where
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doWriteByteArrayOp Nothing ty res args
- where
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doIndexOffAddrOp Nothing ty res args
- where
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doIndexOffAddrOp Nothing ty res args
- where
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doWriteOffAddrOp Nothing ty res args
- where
- ty :: CmmType
- ty = vecVmmType vcat n w
-
-emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doIndexByteArrayOpAs Nothing vecty ty res args
- where
- vecty :: CmmType
- vecty = vecVmmType vcat n w
-
- ty :: CmmType
- ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doIndexByteArrayOpAs Nothing vecty ty res args
- where
- vecty :: CmmType
- vecty = vecVmmType vcat n w
-
- ty :: CmmType
- ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doWriteByteArrayOp Nothing ty res args
- where
- ty :: CmmType
- ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doIndexOffAddrOpAs Nothing vecty ty res args
- where
- vecty :: CmmType
- vecty = vecVmmType vcat n w
-
- ty :: CmmType
- ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doIndexOffAddrOpAs Nothing vecty ty res args
- where
- vecty :: CmmType
- vecty = vecVmmType vcat n w
-
- ty :: CmmType
- ty = vecCmmCat vcat w
-
-emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
- checkVecCompatibility dflags vcat n w
- doWriteOffAddrOp Nothing ty res args
- where
- ty :: CmmType
- ty = vecCmmCat vcat w
-
--- Prefetch
-emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
-emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
-emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
-emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
-
-emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
-emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
-emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
-emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
-
-emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
-emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
-emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
-emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
-
-emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
-emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
-emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
-emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
-
--- Atomic read-modify-write
-emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
- doAtomicRMW res AMO_Add mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
- doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
- doAtomicRMW res AMO_And mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
- doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
- doAtomicRMW res AMO_Or mba ix (bWord dflags) n
-emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
- doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
-emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
- doAtomicReadByteArray res mba ix (bWord dflags)
-emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
- doAtomicWriteByteArray mba ix (bWord dflags) val
-emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
- doCasByteArray res mba ix (bWord dflags) old new
-
--- The rest just translate straightforwardly
-emitPrimOp dflags [res] op [arg]
- | nopOp op
- = emitAssign (CmmLocal res) arg
-
- | Just (mop,rep) <- narrowOp op
- = emitAssign (CmmLocal res) $
- CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
-
-emitPrimOp dflags r@[res] op args
- | Just prim <- callishOp op
- = do emitPrimCall r prim args
-
- | Just mop <- translateOp dflags op
- = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
- emit stmt
-
-emitPrimOp dflags results op args
- = case callishPrimOpSupported dflags op args of
- Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
- Right gen -> gen results args
-
--- Note [QuotRem optimization]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
--- (shift, .&.).
---
--- Currently we only support optimization (performed in CmmOpt) when the
--- constant is a power of 2. #9041 tracks the implementation of the general
--- optimization.
---
--- `quotRem` can be optimized in the same way. However as it returns two values,
--- it is implemented as a "callish" primop which is harder to match and
--- to transform later on. For simplicity, the current implementation detects cases
--- that can be optimized (see `quotRemCanBeOptimized`) and converts STG quotRem
--- primop into two CMM quot and rem primops.
-
-type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
-
-callishPrimOpSupported :: DynFlags -> PrimOp -> [CmmExpr] -> Either CallishMachOp GenericOp
-callishPrimOpSupported dflags op args
- = case op of
- IntQuotRemOp | ncg && (x86ish || ppc)
- , not quotRemCanBeOptimized
- -> Left (MO_S_QuotRem (wordWidth dflags))
- | otherwise
- -> Right (genericIntQuotRemOp (wordWidth dflags))
-
- Int8QuotRemOp | ncg && (x86ish || ppc)
- , not quotRemCanBeOptimized
- -> Left (MO_S_QuotRem W8)
- | otherwise -> Right (genericIntQuotRemOp W8)
-
- Int16QuotRemOp | ncg && (x86ish || ppc)
- , not quotRemCanBeOptimized
- -> Left (MO_S_QuotRem W16)
- | otherwise -> Right (genericIntQuotRemOp W16)
-
-
- WordQuotRemOp | ncg && (x86ish || ppc)
- , not quotRemCanBeOptimized
- -> Left (MO_U_QuotRem (wordWidth dflags))
- | otherwise
- -> Right (genericWordQuotRemOp (wordWidth dflags))
-
- WordQuotRem2Op | (ncg && (x86ish || ppc))
- || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
- | otherwise -> Right (genericWordQuotRem2Op dflags)
-
- Word8QuotRemOp | ncg && (x86ish || ppc)
- , not quotRemCanBeOptimized
- -> Left (MO_U_QuotRem W8)
- | otherwise -> Right (genericWordQuotRemOp W8)
-
- Word16QuotRemOp| ncg && (x86ish || ppc)
- , not quotRemCanBeOptimized
- -> Left (MO_U_QuotRem W16)
- | otherwise -> Right (genericWordQuotRemOp W16)
-
- WordAdd2Op | (ncg && (x86ish || ppc))
- || llvm -> Left (MO_Add2 (wordWidth dflags))
- | otherwise -> Right genericWordAdd2Op
-
- WordAddCOp | (ncg && (x86ish || ppc))
- || llvm -> Left (MO_AddWordC (wordWidth dflags))
- | otherwise -> Right genericWordAddCOp
-
- WordSubCOp | (ncg && (x86ish || ppc))
- || llvm -> Left (MO_SubWordC (wordWidth dflags))
- | otherwise -> Right genericWordSubCOp
-
- IntAddCOp | (ncg && (x86ish || ppc))
- || llvm -> Left (MO_AddIntC (wordWidth dflags))
- | otherwise -> Right genericIntAddCOp
-
- IntSubCOp | (ncg && (x86ish || ppc))
- || llvm -> Left (MO_SubIntC (wordWidth dflags))
- | otherwise -> Right genericIntSubCOp
-
- WordMul2Op | ncg && (x86ish || ppc)
- || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
- | otherwise -> Right genericWordMul2Op
- FloatFabsOp | (ncg && x86ish || ppc)
- || llvm -> Left MO_F32_Fabs
- | otherwise -> Right $ genericFabsOp W32
- DoubleFabsOp | (ncg && x86ish || ppc)
- || llvm -> Left MO_F64_Fabs
- | otherwise -> Right $ genericFabsOp W64
-
- _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
- where
- -- See Note [QuotRem optimization]
- quotRemCanBeOptimized = case args of
- [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n)
- _ -> False
-
- ncg = case hscTarget dflags of
- HscAsm -> True
- _ -> False
- llvm = case hscTarget dflags of
- HscLlvm -> True
- _ -> False
- x86ish = case platformArch (targetPlatform dflags) of
- ArchX86 -> True
- ArchX86_64 -> True
- _ -> False
- ppc = case platformArch (targetPlatform dflags) of
- ArchPPC -> True
- ArchPPC_64 _ -> True
- _ -> False
-
-genericIntQuotRemOp :: Width -> GenericOp
-genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y]
- = emit $ mkAssign (CmmLocal res_q)
- (CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*>
- mkAssign (CmmLocal res_r)
- (CmmMachOp (MO_S_Rem width) [arg_x, arg_y])
-genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
-
-genericWordQuotRemOp :: Width -> GenericOp
-genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y]
- = emit $ mkAssign (CmmLocal res_q)
- (CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*>
- mkAssign (CmmLocal res_r)
- (CmmMachOp (MO_U_Rem width) [arg_x, arg_y])
-genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
-
-genericWordQuotRem2Op :: DynFlags -> GenericOp
-genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
- = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
- where ty = cmmExprType dflags arg_x_high
- shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
- shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
- or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
- ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
- ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
- minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
- times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
- zero = lit 0
- one = lit 1
- negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
- lit i = CmmLit (CmmInt i (wordWidth dflags))
-
- f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
- f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
- mkAssign (CmmLocal res_r) high)
- f i acc high low =
- do roverflowedBit <- newTemp ty
- rhigh' <- newTemp ty
- rhigh'' <- newTemp ty
- rlow' <- newTemp ty
- risge <- newTemp ty
- racc' <- newTemp ty
- let high' = CmmReg (CmmLocal rhigh')
- isge = CmmReg (CmmLocal risge)
- overflowedBit = CmmReg (CmmLocal roverflowedBit)
- let this = catAGraphs
- [mkAssign (CmmLocal roverflowedBit)
- (shr high negone),
- mkAssign (CmmLocal rhigh')
- (or (shl high one) (shr low negone)),
- mkAssign (CmmLocal rlow')
- (shl low one),
- mkAssign (CmmLocal risge)
- (or (overflowedBit `ne` zero)
- (high' `ge` arg_y)),
- mkAssign (CmmLocal rhigh'')
- (high' `minus` (arg_y `times` isge)),
- mkAssign (CmmLocal racc')
- (or (shl acc one) isge)]
- rest <- f (i - 1) (CmmReg (CmmLocal racc'))
- (CmmReg (CmmLocal rhigh''))
- (CmmReg (CmmLocal rlow'))
- return (this <*> rest)
-genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
-
-genericWordAdd2Op :: GenericOp
-genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
- = do dflags <- getDynFlags
- r1 <- newTemp (cmmExprType dflags arg_x)
- r2 <- newTemp (cmmExprType dflags arg_x)
- let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
- toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
- bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
- add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
- or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
- hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
- (wordWidth dflags))
- hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
- emit $ catAGraphs
- [mkAssign (CmmLocal r1)
- (add (bottomHalf arg_x) (bottomHalf arg_y)),
- mkAssign (CmmLocal r2)
- (add (topHalf (CmmReg (CmmLocal r1)))
- (add (topHalf arg_x) (topHalf arg_y))),
- mkAssign (CmmLocal res_h)
- (topHalf (CmmReg (CmmLocal r2))),
- mkAssign (CmmLocal res_l)
- (or (toTopHalf (CmmReg (CmmLocal r2)))
- (bottomHalf (CmmReg (CmmLocal r1))))]
-genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
-
--- | Implements branchless recovery of the carry flag @c@ by checking the
--- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
---
--- @
--- c = a&b | (a|b)&~r
--- @
---
--- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
-genericWordAddCOp :: GenericOp
-genericWordAddCOp [res_r, res_c] [aa, bb]
- = do dflags <- getDynFlags
- emit $ catAGraphs [
- mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
- mkAssign (CmmLocal res_c) $
- CmmMachOp (mo_wordUShr dflags) [
- CmmMachOp (mo_wordOr dflags) [
- CmmMachOp (mo_wordAnd dflags) [aa,bb],
- CmmMachOp (mo_wordAnd dflags) [
- CmmMachOp (mo_wordOr dflags) [aa,bb],
- CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)]
- ]
- ],
- mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
- ]
- ]
-genericWordAddCOp _ _ = panic "genericWordAddCOp"
-
--- | Implements branchless recovery of the carry flag @c@ by checking the
--- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
---
--- @
--- c = ~a&b | (~a|b)&r
--- @
---
--- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
-genericWordSubCOp :: GenericOp
-genericWordSubCOp [res_r, res_c] [aa, bb]
- = do dflags <- getDynFlags
- emit $ catAGraphs [
- mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
- mkAssign (CmmLocal res_c) $
- CmmMachOp (mo_wordUShr dflags) [
- CmmMachOp (mo_wordOr dflags) [
- CmmMachOp (mo_wordAnd dflags) [
- CmmMachOp (mo_wordNot dflags) [aa],
- bb
- ],
- CmmMachOp (mo_wordAnd dflags) [
- CmmMachOp (mo_wordOr dflags) [
- CmmMachOp (mo_wordNot dflags) [aa],
- bb
- ],
- CmmReg (CmmLocal res_r)
- ]
- ],
- mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
- ]
- ]
-genericWordSubCOp _ _ = panic "genericWordSubCOp"
-
-genericIntAddCOp :: GenericOp
-genericIntAddCOp [res_r, res_c] [aa, bb]
-{-
- With some bit-twiddling, we can define int{Add,Sub}Czh portably in
- C, and without needing any comparisons. This may not be the
- fastest way to do it - if you have better code, please send it! --SDM
-
- Return : r = a + b, c = 0 if no overflow, 1 on overflow.
-
- We currently don't make use of the r value if c is != 0 (i.e.
- overflow), we just convert to big integers and try again. This
- could be improved by making r and c the correct values for
- plugging into a new J#.
-
- { r = ((I_)(a)) + ((I_)(b)); \
- c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
- >> (BITS_IN (I_) - 1); \
- }
- Wading through the mass of bracketry, it seems to reduce to:
- c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
-
--}
- = do dflags <- getDynFlags
- emit $ catAGraphs [
- mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
- mkAssign (CmmLocal res_c) $
- CmmMachOp (mo_wordUShr dflags) [
- CmmMachOp (mo_wordAnd dflags) [
- CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
- CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
- ],
- mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
- ]
- ]
-genericIntAddCOp _ _ = panic "genericIntAddCOp"
-
-genericIntSubCOp :: GenericOp
-genericIntSubCOp [res_r, res_c] [aa, bb]
-{- Similarly:
- #define subIntCzh(r,c,a,b) \
- { r = ((I_)(a)) - ((I_)(b)); \
- c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
- >> (BITS_IN (I_) - 1); \
- }
-
- c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
--}
- = do dflags <- getDynFlags
- emit $ catAGraphs [
- mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
- mkAssign (CmmLocal res_c) $
- CmmMachOp (mo_wordUShr dflags) [
- CmmMachOp (mo_wordAnd dflags) [
- CmmMachOp (mo_wordXor dflags) [aa,bb],
- CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
- ],
- mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
- ]
- ]
-genericIntSubCOp _ _ = panic "genericIntSubCOp"
-
-genericWordMul2Op :: GenericOp
-genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
- = do dflags <- getDynFlags
- let t = cmmExprType dflags arg_x
- xlyl <- liftM CmmLocal $ newTemp t
- xlyh <- liftM CmmLocal $ newTemp t
- xhyl <- liftM CmmLocal $ newTemp t
- r <- liftM CmmLocal $ newTemp t
- -- This generic implementation is very simple and slow. We might
- -- well be able to do better, but for now this at least works.
- let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
- toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
- bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
- add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
- sum = foldl1 add
- mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
- or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
- hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
- (wordWidth dflags))
- hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
- emit $ catAGraphs
- [mkAssign xlyl
- (mul (bottomHalf arg_x) (bottomHalf arg_y)),
- mkAssign xlyh
- (mul (bottomHalf arg_x) (topHalf arg_y)),
- mkAssign xhyl
- (mul (topHalf arg_x) (bottomHalf arg_y)),
- mkAssign r
- (sum [topHalf (CmmReg xlyl),
- bottomHalf (CmmReg xhyl),
- bottomHalf (CmmReg xlyh)]),
- mkAssign (CmmLocal res_l)
- (or (bottomHalf (CmmReg xlyl))
- (toTopHalf (CmmReg r))),
- mkAssign (CmmLocal res_h)
- (sum [mul (topHalf arg_x) (topHalf arg_y),
- topHalf (CmmReg xhyl),
- topHalf (CmmReg xlyh),
- topHalf (CmmReg r)])]
-genericWordMul2Op _ _ = panic "genericWordMul2Op"
-
--- This replicates what we had in libraries/base/GHC/Float.hs:
---
--- abs x | x == 0 = 0 -- handles (-0.0)
--- | x > 0 = x
--- | otherwise = negateFloat x
-genericFabsOp :: Width -> GenericOp
-genericFabsOp w [res_r] [aa]
- = do dflags <- getDynFlags
- let zero = CmmLit (CmmFloat 0 w)
-
- eq x y = CmmMachOp (MO_F_Eq w) [x, y]
- gt x y = CmmMachOp (MO_F_Gt w) [x, y]
-
- neg x = CmmMachOp (MO_F_Neg w) [x]
-
- g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
- g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
-
- res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa)
- let g3 = catAGraphs [mkAssign res_t aa,
- mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
-
- g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
-
- emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
-
-genericFabsOp _ _ _ = panic "genericFabsOp"
-
--- These PrimOps are NOPs in Cmm
-
-nopOp :: PrimOp -> Bool
-nopOp Int2WordOp = True
-nopOp Word2IntOp = True
-nopOp Int2AddrOp = True
-nopOp Addr2IntOp = True
-nopOp ChrOp = True -- Int# and Char# are rep'd the same
-nopOp OrdOp = True
-nopOp _ = False
-
--- These PrimOps turn into double casts
-
-narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
-narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
-narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
-narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
-narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
-narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
-narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
-narrowOp _ = Nothing
-
--- Native word signless ops
-
-translateOp :: DynFlags -> PrimOp -> Maybe MachOp
-translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
-translateOp dflags IntSubOp = Just (mo_wordSub dflags)
-translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
-translateOp dflags WordSubOp = Just (mo_wordSub dflags)
-translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
-translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
-
-translateOp dflags IntEqOp = Just (mo_wordEq dflags)
-translateOp dflags IntNeOp = Just (mo_wordNe dflags)
-translateOp dflags WordEqOp = Just (mo_wordEq dflags)
-translateOp dflags WordNeOp = Just (mo_wordNe dflags)
-translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
-translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
-
-translateOp dflags AndOp = Just (mo_wordAnd dflags)
-translateOp dflags OrOp = Just (mo_wordOr dflags)
-translateOp dflags XorOp = Just (mo_wordXor dflags)
-translateOp dflags NotOp = Just (mo_wordNot dflags)
-translateOp dflags SllOp = Just (mo_wordShl dflags)
-translateOp dflags SrlOp = Just (mo_wordUShr dflags)
-
-translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
-
--- Native word signed ops
-
-translateOp dflags IntMulOp = Just (mo_wordMul dflags)
-translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
-translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
-translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
-translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
-
-
-translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
-translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
-translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
-translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
-
-translateOp dflags AndIOp = Just (mo_wordAnd dflags)
-translateOp dflags OrIOp = Just (mo_wordOr dflags)
-translateOp dflags XorIOp = Just (mo_wordXor dflags)
-translateOp dflags NotIOp = Just (mo_wordNot dflags)
-translateOp dflags ISllOp = Just (mo_wordShl dflags)
-translateOp dflags ISraOp = Just (mo_wordSShr dflags)
-translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
-
--- Native word unsigned ops
-
-translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
-translateOp dflags WordLeOp = Just (mo_wordULe dflags)
-translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
-translateOp dflags WordLtOp = Just (mo_wordULt dflags)
-
-translateOp dflags WordMulOp = Just (mo_wordMul dflags)
-translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
-translateOp dflags WordRemOp = Just (mo_wordURem dflags)
-
-translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
-translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
-translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
-translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
-
--- Int8# signed ops
-
-translateOp dflags Int8Extend = Just (MO_SS_Conv W8 (wordWidth dflags))
-translateOp dflags Int8Narrow = Just (MO_SS_Conv (wordWidth dflags) W8)
-translateOp _ Int8NegOp = Just (MO_S_Neg W8)
-translateOp _ Int8AddOp = Just (MO_Add W8)
-translateOp _ Int8SubOp = Just (MO_Sub W8)
-translateOp _ Int8MulOp = Just (MO_Mul W8)
-translateOp _ Int8QuotOp = Just (MO_S_Quot W8)
-translateOp _ Int8RemOp = Just (MO_S_Rem W8)
-
-translateOp _ Int8EqOp = Just (MO_Eq W8)
-translateOp _ Int8GeOp = Just (MO_S_Ge W8)
-translateOp _ Int8GtOp = Just (MO_S_Gt W8)
-translateOp _ Int8LeOp = Just (MO_S_Le W8)
-translateOp _ Int8LtOp = Just (MO_S_Lt W8)
-translateOp _ Int8NeOp = Just (MO_Ne W8)
-
--- Word8# unsigned ops
-
-translateOp dflags Word8Extend = Just (MO_UU_Conv W8 (wordWidth dflags))
-translateOp dflags Word8Narrow = Just (MO_UU_Conv (wordWidth dflags) W8)
-translateOp _ Word8NotOp = Just (MO_Not W8)
-translateOp _ Word8AddOp = Just (MO_Add W8)
-translateOp _ Word8SubOp = Just (MO_Sub W8)
-translateOp _ Word8MulOp = Just (MO_Mul W8)
-translateOp _ Word8QuotOp = Just (MO_U_Quot W8)
-translateOp _ Word8RemOp = Just (MO_U_Rem W8)
-
-translateOp _ Word8EqOp = Just (MO_Eq W8)
-translateOp _ Word8GeOp = Just (MO_U_Ge W8)
-translateOp _ Word8GtOp = Just (MO_U_Gt W8)
-translateOp _ Word8LeOp = Just (MO_U_Le W8)
-translateOp _ Word8LtOp = Just (MO_U_Lt W8)
-translateOp _ Word8NeOp = Just (MO_Ne W8)
-
--- Int16# signed ops
-
-translateOp dflags Int16Extend = Just (MO_SS_Conv W16 (wordWidth dflags))
-translateOp dflags Int16Narrow = Just (MO_SS_Conv (wordWidth dflags) W16)
-translateOp _ Int16NegOp = Just (MO_S_Neg W16)
-translateOp _ Int16AddOp = Just (MO_Add W16)
-translateOp _ Int16SubOp = Just (MO_Sub W16)
-translateOp _ Int16MulOp = Just (MO_Mul W16)
-translateOp _ Int16QuotOp = Just (MO_S_Quot W16)
-translateOp _ Int16RemOp = Just (MO_S_Rem W16)
-
-translateOp _ Int16EqOp = Just (MO_Eq W16)
-translateOp _ Int16GeOp = Just (MO_S_Ge W16)
-translateOp _ Int16GtOp = Just (MO_S_Gt W16)
-translateOp _ Int16LeOp = Just (MO_S_Le W16)
-translateOp _ Int16LtOp = Just (MO_S_Lt W16)
-translateOp _ Int16NeOp = Just (MO_Ne W16)
-
--- Word16# unsigned ops
-
-translateOp dflags Word16Extend = Just (MO_UU_Conv W16 (wordWidth dflags))
-translateOp dflags Word16Narrow = Just (MO_UU_Conv (wordWidth dflags) W16)
-translateOp _ Word16NotOp = Just (MO_Not W16)
-translateOp _ Word16AddOp = Just (MO_Add W16)
-translateOp _ Word16SubOp = Just (MO_Sub W16)
-translateOp _ Word16MulOp = Just (MO_Mul W16)
-translateOp _ Word16QuotOp = Just (MO_U_Quot W16)
-translateOp _ Word16RemOp = Just (MO_U_Rem W16)
-
-translateOp _ Word16EqOp = Just (MO_Eq W16)
-translateOp _ Word16GeOp = Just (MO_U_Ge W16)
-translateOp _ Word16GtOp = Just (MO_U_Gt W16)
-translateOp _ Word16LeOp = Just (MO_U_Le W16)
-translateOp _ Word16LtOp = Just (MO_U_Lt W16)
-translateOp _ Word16NeOp = Just (MO_Ne W16)
-
--- Char# ops
-
-translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
-translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
-translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
-translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
-translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
-translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
-
--- Double ops
-
-translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
-translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
-translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
-translateOp _ DoubleLeOp = Just (MO_F_Le W64)
-translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
-translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
-
-translateOp _ DoubleAddOp = Just (MO_F_Add W64)
-translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
-translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
-translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
-translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
-
--- Float ops
-
-translateOp _ FloatEqOp = Just (MO_F_Eq W32)
-translateOp _ FloatNeOp = Just (MO_F_Ne W32)
-translateOp _ FloatGeOp = Just (MO_F_Ge W32)
-translateOp _ FloatLeOp = Just (MO_F_Le W32)
-translateOp _ FloatGtOp = Just (MO_F_Gt W32)
-translateOp _ FloatLtOp = Just (MO_F_Lt W32)
-
-translateOp _ FloatAddOp = Just (MO_F_Add W32)
-translateOp _ FloatSubOp = Just (MO_F_Sub W32)
-translateOp _ FloatMulOp = Just (MO_F_Mul W32)
-translateOp _ FloatDivOp = Just (MO_F_Quot W32)
-translateOp _ FloatNegOp = Just (MO_F_Neg W32)
-
--- Vector ops
-
-translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
-translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
-translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
-translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
-translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
-
-translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
-translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
-translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
-translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
-translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
-translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
-
-translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
-translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
-translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
-translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
-translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
-
--- Conversions
-
-translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
-translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
-
-translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
-translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
-
-translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
-translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
-
--- Word comparisons masquerading as more exotic things.
-
-translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
-translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
-translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
-translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
-translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
-translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
-translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
-translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
--- See Note [Comparing stable names]
-translateOp dflags EqStableNameOp = Just (mo_wordEq dflags)
-
-translateOp _ _ = Nothing
-
--- Note [Comparing stable names]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- A StableName# is actually a pointer to a stable name object (SNO)
--- containing an index into the stable name table (SNT). We
--- used to compare StableName#s by following the pointers to the
--- SNOs and checking whether they held the same SNT indices. However,
--- this is not necessary: there is a one-to-one correspondence
--- between SNOs and entries in the SNT, so simple pointer equality
--- does the trick.
-
--- These primops are implemented by CallishMachOps, because they sometimes
--- turn into foreign calls depending on the backend.
-
-callishOp :: PrimOp -> Maybe CallishMachOp
-callishOp DoublePowerOp = Just MO_F64_Pwr
-callishOp DoubleSinOp = Just MO_F64_Sin
-callishOp DoubleCosOp = Just MO_F64_Cos
-callishOp DoubleTanOp = Just MO_F64_Tan
-callishOp DoubleSinhOp = Just MO_F64_Sinh
-callishOp DoubleCoshOp = Just MO_F64_Cosh
-callishOp DoubleTanhOp = Just MO_F64_Tanh
-callishOp DoubleAsinOp = Just MO_F64_Asin
-callishOp DoubleAcosOp = Just MO_F64_Acos
-callishOp DoubleAtanOp = Just MO_F64_Atan
-callishOp DoubleAsinhOp = Just MO_F64_Asinh
-callishOp DoubleAcoshOp = Just MO_F64_Acosh
-callishOp DoubleAtanhOp = Just MO_F64_Atanh
-callishOp DoubleLogOp = Just MO_F64_Log
-callishOp DoubleLog1POp = Just MO_F64_Log1P
-callishOp DoubleExpOp = Just MO_F64_Exp
-callishOp DoubleExpM1Op = Just MO_F64_ExpM1
-callishOp DoubleSqrtOp = Just MO_F64_Sqrt
-
-callishOp FloatPowerOp = Just MO_F32_Pwr
-callishOp FloatSinOp = Just MO_F32_Sin
-callishOp FloatCosOp = Just MO_F32_Cos
-callishOp FloatTanOp = Just MO_F32_Tan
-callishOp FloatSinhOp = Just MO_F32_Sinh
-callishOp FloatCoshOp = Just MO_F32_Cosh
-callishOp FloatTanhOp = Just MO_F32_Tanh
-callishOp FloatAsinOp = Just MO_F32_Asin
-callishOp FloatAcosOp = Just MO_F32_Acos
-callishOp FloatAtanOp = Just MO_F32_Atan
-callishOp FloatAsinhOp = Just MO_F32_Asinh
-callishOp FloatAcoshOp = Just MO_F32_Acosh
-callishOp FloatAtanhOp = Just MO_F32_Atanh
-callishOp FloatLogOp = Just MO_F32_Log
-callishOp FloatLog1POp = Just MO_F32_Log1P
-callishOp FloatExpOp = Just MO_F32_Exp
-callishOp FloatExpM1Op = Just MO_F32_ExpM1
-callishOp FloatSqrtOp = Just MO_F32_Sqrt
-
-callishOp _ = Nothing
-
-------------------------------------------------------------------------------
--- Helpers for translating various minor variants of array indexing.
-
-doIndexOffAddrOp :: Maybe MachOp
- -> CmmType
- -> [LocalReg]
- -> [CmmExpr]
- -> FCode ()
-doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
- = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
-doIndexOffAddrOp _ _ _ _
- = panic "StgCmmPrim: doIndexOffAddrOp"
-
-doIndexOffAddrOpAs :: Maybe MachOp
- -> CmmType
- -> CmmType
- -> [LocalReg]
- -> [CmmExpr]
- -> FCode ()
-doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
- = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
-doIndexOffAddrOpAs _ _ _ _ _
- = panic "StgCmmPrim: doIndexOffAddrOpAs"
-
-doIndexByteArrayOp :: Maybe MachOp
- -> CmmType
- -> [LocalReg]
- -> [CmmExpr]
- -> FCode ()
-doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
- = do dflags <- getDynFlags
- mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
-doIndexByteArrayOp _ _ _ _
- = panic "StgCmmPrim: doIndexByteArrayOp"
-
-doIndexByteArrayOpAs :: Maybe MachOp
- -> CmmType
- -> CmmType
- -> [LocalReg]
- -> [CmmExpr]
- -> FCode ()
-doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
- = do dflags <- getDynFlags
- mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
-doIndexByteArrayOpAs _ _ _ _ _
- = panic "StgCmmPrim: doIndexByteArrayOpAs"
-
-doReadPtrArrayOp :: LocalReg
- -> CmmExpr
- -> CmmExpr
- -> FCode ()
-doReadPtrArrayOp res addr idx
- = do dflags <- getDynFlags
- mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
-
-doWriteOffAddrOp :: Maybe MachOp
- -> CmmType
- -> [LocalReg]
- -> [CmmExpr]
- -> FCode ()
-doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
- = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
-doWriteOffAddrOp _ _ _ _
- = panic "StgCmmPrim: doWriteOffAddrOp"
-
-doWriteByteArrayOp :: Maybe MachOp
- -> CmmType
- -> [LocalReg]
- -> [CmmExpr]
- -> FCode ()
-doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
- = do dflags <- getDynFlags
- mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
-doWriteByteArrayOp _ _ _ _
- = panic "StgCmmPrim: doWriteByteArrayOp"
-
-doWritePtrArrayOp :: CmmExpr
- -> CmmExpr
- -> CmmExpr
- -> FCode ()
-doWritePtrArrayOp addr idx val
- = do dflags <- getDynFlags
- let ty = cmmExprType dflags val
- -- This write barrier is to ensure that the heap writes to the object
- -- referred to by val have happened before we write val into the array.
- -- See #12469 for details.
- emitPrimCall [] MO_WriteBarrier []
- mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
- emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
- -- the write barrier. We must write a byte into the mark table:
- -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
- emit $ mkStore (
- cmmOffsetExpr dflags
- (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
- (loadArrPtrsSize dflags addr))
- (CmmMachOp (mo_wordUShr dflags) [idx,
- mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
- ) (CmmLit (CmmInt 1 W8))
-
-loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
-loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
- where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
-
-mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
- -> Maybe MachOp -- Optional result cast
- -> CmmType -- Type of element we are accessing
- -> LocalReg -- Destination
- -> CmmExpr -- Base address
- -> CmmType -- Type of element by which we are indexing
- -> CmmExpr -- Index
- -> FCode ()
-mkBasicIndexedRead off Nothing ty res base idx_ty idx
- = do dflags <- getDynFlags
- emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
-mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
- = do dflags <- getDynFlags
- emitAssign (CmmLocal res) (CmmMachOp cast [
- cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
-
-mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
- -> Maybe MachOp -- Optional value cast
- -> CmmExpr -- Base address
- -> CmmType -- Type of element by which we are indexing
- -> CmmExpr -- Index
- -> CmmExpr -- Value to write
- -> FCode ()
-mkBasicIndexedWrite off Nothing base idx_ty idx val
- = do dflags <- getDynFlags
- emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
-mkBasicIndexedWrite off (Just cast) base idx_ty idx val
- = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
-
--- ----------------------------------------------------------------------------
--- Misc utils
-
-cmmIndexOffExpr :: DynFlags
- -> ByteOff -- Initial offset in bytes
- -> Width -- Width of element by which we are indexing
- -> CmmExpr -- Base address
- -> CmmExpr -- Index
- -> CmmExpr
-cmmIndexOffExpr dflags off width base idx
- = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
-
-cmmLoadIndexOffExpr :: DynFlags
- -> ByteOff -- Initial offset in bytes
- -> CmmType -- Type of element we are accessing
- -> CmmExpr -- Base address
- -> CmmType -- Type of element by which we are indexing
- -> CmmExpr -- Index
- -> CmmExpr
-cmmLoadIndexOffExpr dflags off ty base idx_ty idx
- = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
-
-setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
-setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
-
-------------------------------------------------------------------------------
--- Helpers for translating vector primops.
-
-vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
-vecVmmType pocat n w = vec n (vecCmmCat pocat w)
-
-vecCmmCat :: PrimOpVecCat -> Width -> CmmType
-vecCmmCat IntVec = cmmBits
-vecCmmCat WordVec = cmmBits
-vecCmmCat FloatVec = cmmFloat
-
-vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
-vecElemInjectCast _ FloatVec _ = Nothing
-vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
-vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
-vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
-vecElemInjectCast _ IntVec W64 = Nothing
-vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
-vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
-vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
-vecElemInjectCast _ WordVec W64 = Nothing
-vecElemInjectCast _ _ _ = Nothing
-
-vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
-vecElemProjectCast _ FloatVec _ = Nothing
-vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
-vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
-vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
-vecElemProjectCast _ IntVec W64 = Nothing
-vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
-vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
-vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
-vecElemProjectCast _ WordVec W64 = Nothing
-vecElemProjectCast _ _ _ = Nothing
-
-
--- NOTE [SIMD Design for the future]
--- Check to make sure that we can generate code for the specified vector type
--- given the current set of dynamic flags.
--- Currently these checks are specific to x86 and x86_64 architecture.
--- This should be fixed!
--- In particular,
--- 1) Add better support for other architectures! (this may require a redesign)
--- 2) Decouple design choices from LLVM's pseudo SIMD model!
--- The high level LLVM naive rep makes per CPU family SIMD generation is own
--- optimization problem, and hides important differences in eg ARM vs x86_64 simd
--- 3) Depending on the architecture, the SIMD registers may also support general
--- computations on Float/Double/Word/Int scalars, but currently on
--- for example x86_64, we always put Word/Int (or sized) in GPR
--- (general purpose) registers. Would relaxing that allow for
--- useful optimization opportunities?
--- Phrased differently, it is worth experimenting with supporting
--- different register mapping strategies than we currently have, especially if
--- someday we want SIMD to be a first class denizen in GHC along with scalar
--- values!
--- The current design with respect to register mapping of scalars could
--- very well be the best,but exploring the design space and doing careful
--- measurments is the only only way to validate that.
--- In some next generation CPU ISAs, notably RISC V, the SIMD extension
--- includes support for a sort of run time CPU dependent vectorization parameter,
--- where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
--- element chunk! Time will tell if that direction sees wide adoption,
--- but it is from that context that unifying our handling of simd and scalars
--- may benefit. It is not likely to benefit current architectures, though
--- it may very well be a design perspective that helps guide improving the NCG.
-
-
-checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
-checkVecCompatibility dflags vcat l w = do
- when (hscTarget dflags /= HscLlvm) $ do
- sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
- ,"Please use -fllvm."]
- check vecWidth vcat l w
- where
- check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
- check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
- sorry $ "128-bit wide single-precision floating point " ++
- "SIMD vector instructions require at least -msse."
- check W128 _ _ _ | not (isSse2Enabled dflags) =
- sorry $ "128-bit wide integer and double precision " ++
- "SIMD vector instructions require at least -msse2."
- check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
- sorry $ "256-bit wide floating point " ++
- "SIMD vector instructions require at least -mavx."
- check W256 _ _ _ | not (isAvx2Enabled dflags) =
- sorry $ "256-bit wide integer " ++
- "SIMD vector instructions require at least -mavx2."
- check W512 _ _ _ | not (isAvx512fEnabled dflags) =
- sorry $ "512-bit wide " ++
- "SIMD vector instructions require -mavx512f."
- check _ _ _ _ = return ()
-
- vecWidth = typeWidth (vecVmmType vcat l w)
-
-------------------------------------------------------------------------------
--- Helpers for translating vector packing and unpacking.
-
-doVecPackOp :: Maybe MachOp -- Cast from element to vector component
- -> CmmType -- Type of vector
- -> CmmExpr -- Initial vector
- -> [CmmExpr] -- Elements
- -> CmmFormal -- Destination for result
- -> FCode ()
-doVecPackOp maybe_pre_write_cast ty z es res = do
- dst <- newTemp ty
- emitAssign (CmmLocal dst) z
- vecPack dst es 0
- where
- vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
- vecPack src [] _ =
- emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
-
- vecPack src (e : es) i = do
- dst <- newTemp ty
- if isFloatType (vecElemType ty)
- then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
- [CmmReg (CmmLocal src), cast e, iLit])
- else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
- [CmmReg (CmmLocal src), cast e, iLit])
- vecPack dst es (i + 1)
- where
- -- vector indices are always 32-bits
- iLit = CmmLit (CmmInt (toInteger i) W32)
-
- cast :: CmmExpr -> CmmExpr
- cast val = case maybe_pre_write_cast of
- Nothing -> val
- Just cast -> CmmMachOp cast [val]
-
- len :: Length
- len = vecLength ty
-
- wid :: Width
- wid = typeWidth (vecElemType ty)
-
-doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
- -> CmmType -- Type of vector
- -> CmmExpr -- Vector
- -> [CmmFormal] -- Element results
- -> FCode ()
-doVecUnpackOp maybe_post_read_cast ty e res =
- vecUnpack res 0
- where
- vecUnpack :: [CmmFormal] -> Int -> FCode ()
- vecUnpack [] _ =
- return ()
-
- vecUnpack (r : rs) i = do
- if isFloatType (vecElemType ty)
- then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
- [e, iLit]))
- else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
- [e, iLit]))
- vecUnpack rs (i + 1)
- where
- -- vector indices are always 32-bits
- iLit = CmmLit (CmmInt (toInteger i) W32)
-
- cast :: CmmExpr -> CmmExpr
- cast val = case maybe_post_read_cast of
- Nothing -> val
- Just cast -> CmmMachOp cast [val]
-
- len :: Length
- len = vecLength ty
-
- wid :: Width
- wid = typeWidth (vecElemType ty)
-
-doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
- -> CmmType -- Vector type
- -> CmmExpr -- Source vector
- -> CmmExpr -- Element
- -> CmmExpr -- Index at which to insert element
- -> CmmFormal -- Destination for result
- -> FCode ()
-doVecInsertOp maybe_pre_write_cast ty src e idx res = do
- dflags <- getDynFlags
- -- vector indices are always 32-bits
- let idx' :: CmmExpr
- idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
- if isFloatType (vecElemType ty)
- then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
- else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
- where
- cast :: CmmExpr -> CmmExpr
- cast val = case maybe_pre_write_cast of
- Nothing -> val
- Just cast -> CmmMachOp cast [val]
-
- len :: Length
- len = vecLength ty
-
- wid :: Width
- wid = typeWidth (vecElemType ty)
-
-------------------------------------------------------------------------------
--- Helpers for translating prefetching.
-
-
--- | Translate byte array prefetch operations into proper primcalls.
-doPrefetchByteArrayOp :: Int
- -> [CmmExpr]
- -> FCode ()
-doPrefetchByteArrayOp locality [addr,idx]
- = do dflags <- getDynFlags
- mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
-doPrefetchByteArrayOp _ _
- = panic "StgCmmPrim: doPrefetchByteArrayOp"
-
--- | Translate mutable byte array prefetch operations into proper primcalls.
-doPrefetchMutableByteArrayOp :: Int
- -> [CmmExpr]
- -> FCode ()
-doPrefetchMutableByteArrayOp locality [addr,idx]
- = do dflags <- getDynFlags
- mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
-doPrefetchMutableByteArrayOp _ _
- = panic "StgCmmPrim: doPrefetchByteArrayOp"
-
--- | Translate address prefetch operations into proper primcalls.
-doPrefetchAddrOp ::Int
- -> [CmmExpr]
- -> FCode ()
-doPrefetchAddrOp locality [addr,idx]
- = mkBasicPrefetch locality 0 addr idx
-doPrefetchAddrOp _ _
- = panic "StgCmmPrim: doPrefetchAddrOp"
-
--- | Translate value prefetch operations into proper primcalls.
-doPrefetchValueOp :: Int
- -> [CmmExpr]
- -> FCode ()
-doPrefetchValueOp locality [addr]
- = do dflags <- getDynFlags
- mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
-doPrefetchValueOp _ _
- = panic "StgCmmPrim: doPrefetchValueOp"
-
--- | helper to generate prefetch primcalls
-mkBasicPrefetch :: Int -- Locality level 0-3
- -> ByteOff -- Initial offset in bytes
- -> CmmExpr -- Base address
- -> CmmExpr -- Index
- -> FCode ()
-mkBasicPrefetch locality off base idx
- = do dflags <- getDynFlags
- emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
- return ()
-
--- ----------------------------------------------------------------------------
--- Allocating byte arrays
-
--- | Takes a register to return the newly allocated array in and the
--- size of the new array in bytes. Allocates a new
--- 'MutableByteArray#'.
-doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
-doNewByteArrayOp res_r n = do
- dflags <- getDynFlags
-
- let info_ptr = mkLblExpr mkArrWords_infoLabel
- rep = arrWordsRep dflags n
-
- tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
- (mkIntExpr dflags (nonHdrSize dflags rep))
- (zeroExpr dflags)
-
- let hdr_size = fixedHdrSize dflags
-
- base <- allocHeapClosure rep info_ptr cccsExpr
- [ (mkIntExpr dflags n,
- hdr_size + oFFSET_StgArrBytes_bytes dflags)
- ]
-
- emit $ mkAssign (CmmLocal res_r) base
-
--- ----------------------------------------------------------------------------
--- Comparing byte arrays
-
-doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
- -> FCode ()
-doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
- dflags <- getDynFlags
- ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
- ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
-
- -- short-cut in case of equal pointers avoiding a costly
- -- subroutine call to the memcmp(3) routine; the Cmm logic below
- -- results in assembly code being generated for
- --
- -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
- -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
- --
- -- that looks like
- --
- -- leaq 16(%r14),%rax
- -- leaq 16(%rsi),%rbx
- -- xorl %ecx,%ecx
- -- cmpq %rbx,%rax
- -- je l_ptr_eq
- --
- -- ; NB: the common case (unequal pointers) falls-through
- -- ; the conditional jump, and therefore matches the
- -- ; usual static branch prediction convention of modern cpus
- --
- -- subq $8,%rsp
- -- movq %rbx,%rsi
- -- movq %rax,%rdi
- -- movl $10,%edx
- -- xorl %eax,%eax
- -- call memcmp
- -- addq $8,%rsp
- -- movslq %eax,%rax
- -- movq %rax,%rcx
- -- l_ptr_eq:
- -- movq %rcx,%rbx
- -- jmp *(%rbp)
-
- l_ptr_eq <- newBlockId
- l_ptr_ne <- newBlockId
-
- emit (mkAssign (CmmLocal res) (zeroExpr dflags))
- emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
- l_ptr_eq l_ptr_ne (Just False))
-
- emitLabel l_ptr_ne
- emitMemcmpCall res ba1_p ba2_p n 1
-
- emitLabel l_ptr_eq
-
--- ----------------------------------------------------------------------------
--- Copying byte arrays
-
--- | Takes a source 'ByteArray#', an offset in the source array, a
--- destination 'MutableByteArray#', an offset into the destination
--- array, and the number of bytes to copy. Copies the given number of
--- bytes from the source array to the destination array.
-doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
- -> FCode ()
-doCopyByteArrayOp = emitCopyByteArray copy
- where
- -- Copy data (we assume the arrays aren't overlapping since
- -- they're of different types)
- copy _src _dst dst_p src_p bytes align =
- emitMemcpyCall dst_p src_p bytes align
-
--- | Takes a source 'MutableByteArray#', an offset in the source
--- array, a destination 'MutableByteArray#', an offset into the
--- destination array, and the number of bytes to copy. Copies the
--- given number of bytes from the source array to the destination
--- array.
-doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
- -> FCode ()
-doCopyMutableByteArrayOp = emitCopyByteArray copy
- where
- -- The only time the memory might overlap is when the two arrays
- -- we were provided are the same array!
- -- TODO: Optimize branch for common case of no aliasing.
- copy src dst dst_p src_p bytes align = do
- dflags <- getDynFlags
- (moveCall, cpyCall) <- forkAltPair
- (getCode $ emitMemmoveCall dst_p src_p bytes align)
- (getCode $ emitMemcpyCall dst_p src_p bytes align)
- emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
-
-emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
- -> Alignment -> FCode ())
- -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
- -> FCode ()
-emitCopyByteArray copy src src_off dst dst_off n = do
- dflags <- getDynFlags
- let byteArrayAlignment = wordAlignment dflags
- srcOffAlignment = cmmExprAlignment src_off
- dstOffAlignment = cmmExprAlignment dst_off
- align = minimum [byteArrayAlignment, srcOffAlignment, dstOffAlignment]
- dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
- src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
- copy src dst dst_p src_p n align
-
--- | Takes a source 'ByteArray#', an offset in the source array, a
--- destination 'Addr#', and the number of bytes to copy. Copies the given
--- number of bytes from the source array to the destination memory region.
-doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-doCopyByteArrayToAddrOp src src_off dst_p bytes = do
- -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
- dflags <- getDynFlags
- src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
- emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
-
--- | Takes a source 'MutableByteArray#', an offset in the source array, a
--- destination 'Addr#', and the number of bytes to copy. Copies the given
--- number of bytes from the source array to the destination memory region.
-doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
- -> FCode ()
-doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
-
--- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
--- the destination array, and the number of bytes to copy. Copies the given
--- number of bytes from the source memory region to the destination array.
-doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
- -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
- dflags <- getDynFlags
- dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
- emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
-
-
--- ----------------------------------------------------------------------------
--- Setting byte arrays
-
--- | Takes a 'MutableByteArray#', an offset into the array, a length,
--- and a byte, and sets each of the selected bytes in the array to the
--- character.
-doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
- -> FCode ()
-doSetByteArrayOp ba off len c = do
- dflags <- getDynFlags
-
- let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
- offsetAlignment = cmmExprAlignment off
- align = min byteArrayAlignment offsetAlignment
-
- p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
- emitMemsetCall p c len align
-
--- ----------------------------------------------------------------------------
--- Allocating arrays
-
--- | Allocate a new array.
-doNewArrayOp :: CmmFormal -- ^ return register
- -> SMRep -- ^ representation of the array
- -> CLabel -- ^ info pointer
- -> [(CmmExpr, ByteOff)] -- ^ header payload
- -> WordOff -- ^ array size
- -> CmmExpr -- ^ initial element
- -> FCode ()
-doNewArrayOp res_r rep info payload n init = do
- dflags <- getDynFlags
-
- let info_ptr = mkLblExpr info
-
- tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
- (mkIntExpr dflags (nonHdrSize dflags rep))
- (zeroExpr dflags)
-
- base <- allocHeapClosure rep info_ptr cccsExpr payload
-
- arr <- CmmLocal `fmap` newTemp (bWord dflags)
- emit $ mkAssign arr base
-
- -- Initialise all elements of the array
- let mkOff off = cmmOffsetW dflags (CmmReg arr) (hdrSizeW dflags rep + off)
- initialization = [ mkStore (mkOff off) init | off <- [0.. n - 1] ]
- emit (catAGraphs initialization)
-
- emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-
--- ----------------------------------------------------------------------------
--- Copying pointer arrays
-
--- EZY: This code has an unusually high amount of assignTemp calls, seen
--- nowhere else in the code generator. This is mostly because these
--- "primitive" ops result in a surprisingly large amount of code. It
--- will likely be worthwhile to optimize what is emitted here, so that
--- our optimization passes don't waste time repeatedly optimizing the
--- same bits of code.
-
--- More closely imitates 'assignTemp' from the old code generator, which
--- returns a CmmExpr rather than a LocalReg.
-assignTempE :: CmmExpr -> FCode CmmExpr
-assignTempE e = do
- t <- assignTemp e
- return (CmmReg (CmmLocal t))
-
--- | Takes a source 'Array#', an offset in the source array, a
--- destination 'MutableArray#', an offset into the destination array,
--- and the number of elements to copy. Copies the given number of
--- elements from the source array to the destination array.
-doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
- -> FCode ()
-doCopyArrayOp = emitCopyArray copy
- where
- -- Copy data (we assume the arrays aren't overlapping since
- -- they're of different types)
- copy _src _dst dst_p src_p bytes =
- do dflags <- getDynFlags
- emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wordAlignment dflags)
-
-
--- | Takes a source 'MutableArray#', an offset in the source array, a
--- destination 'MutableArray#', an offset into the destination array,
--- and the number of elements to copy. Copies the given number of
--- elements from the source array to the destination array.
-doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
- -> FCode ()
-doCopyMutableArrayOp = emitCopyArray copy
- where
- -- The only time the memory might overlap is when the two arrays
- -- we were provided are the same array!
- -- TODO: Optimize branch for common case of no aliasing.
- copy src dst dst_p src_p bytes = do
- dflags <- getDynFlags
- (moveCall, cpyCall) <- forkAltPair
- (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
- (wordAlignment dflags))
- (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wordAlignment dflags))
- emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
-
-emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
- -> FCode ()) -- ^ copy function
- -> CmmExpr -- ^ source array
- -> CmmExpr -- ^ offset in source array
- -> CmmExpr -- ^ destination array
- -> CmmExpr -- ^ offset in destination array
- -> WordOff -- ^ number of elements to copy
- -> FCode ()
-emitCopyArray copy src0 src_off dst0 dst_off0 n =
- when (n /= 0) $ do
- dflags <- getDynFlags
-
- -- Passed as arguments (be careful)
- src <- assignTempE src0
- dst <- assignTempE dst0
- dst_off <- assignTempE dst_off0
-
- -- Set the dirty bit in the header.
- emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
-
- dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
- (arrPtrsHdrSize dflags)
- dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
- src_p <- assignTempE $ cmmOffsetExprW dflags
- (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
- let bytes = wordsToBytes dflags n
-
- copy src dst dst_p src_p bytes
-
- -- The base address of the destination card table
- dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
- (loadArrPtrsSize dflags dst)
-
- emitSetCards dst_off dst_cards_p n
-
-doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
- -> FCode ()
-doCopySmallArrayOp = emitCopySmallArray copy
- where
- -- Copy data (we assume the arrays aren't overlapping since
- -- they're of different types)
- copy _src _dst dst_p src_p bytes =
- do dflags <- getDynFlags
- emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wordAlignment dflags)
-
-
-doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
- -> FCode ()
-doCopySmallMutableArrayOp = emitCopySmallArray copy
- where
- -- The only time the memory might overlap is when the two arrays
- -- we were provided are the same array!
- -- TODO: Optimize branch for common case of no aliasing.
- copy src dst dst_p src_p bytes = do
- dflags <- getDynFlags
- (moveCall, cpyCall) <- forkAltPair
- (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
- (wordAlignment dflags))
- (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wordAlignment dflags))
- emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
-
-emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
- -> FCode ()) -- ^ copy function
- -> CmmExpr -- ^ source array
- -> CmmExpr -- ^ offset in source array
- -> CmmExpr -- ^ destination array
- -> CmmExpr -- ^ offset in destination array
- -> WordOff -- ^ number of elements to copy
- -> FCode ()
-emitCopySmallArray copy src0 src_off dst0 dst_off n =
- when (n /= 0) $ do
- dflags <- getDynFlags
-
- -- Passed as arguments (be careful)
- src <- assignTempE src0
- dst <- assignTempE dst0
-
- -- Set the dirty bit in the header.
- emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
-
- dst_p <- assignTempE $ cmmOffsetExprW dflags
- (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
- src_p <- assignTempE $ cmmOffsetExprW dflags
- (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
- let bytes = wordsToBytes dflags n
-
- copy src dst dst_p src_p bytes
-
--- | Takes an info table label, a register to return the newly
--- allocated array in, a source array, an offset in the source array,
--- and the number of elements to copy. Allocates a new array and
--- initializes it from the source array.
-emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
- -> FCode ()
-emitCloneArray info_p res_r src src_off n = do
- dflags <- getDynFlags
-
- let info_ptr = mkLblExpr info_p
- rep = arrPtrsRep dflags n
-
- tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
- (mkIntExpr dflags (nonHdrSize dflags rep))
- (zeroExpr dflags)
-
- let hdr_size = fixedHdrSize dflags
-
- base <- allocHeapClosure rep info_ptr cccsExpr
- [ (mkIntExpr dflags n,
- hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
- , (mkIntExpr dflags (nonHdrSizeW rep),
- hdr_size + oFFSET_StgMutArrPtrs_size dflags)
- ]
-
- arr <- CmmLocal `fmap` newTemp (bWord dflags)
- emit $ mkAssign arr base
-
- dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
- (arrPtrsHdrSize dflags)
- src_p <- assignTempE $ cmmOffsetExprW dflags src
- (cmmAddWord dflags
- (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
-
- emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
- (wordAlignment dflags)
-
- emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-
--- | Takes an info table label, a register to return the newly
--- allocated array in, a source array, an offset in the source array,
--- and the number of elements to copy. Allocates a new array and
--- initializes it from the source array.
-emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
- -> FCode ()
-emitCloneSmallArray info_p res_r src src_off n = do
- dflags <- getDynFlags
-
- let info_ptr = mkLblExpr info_p
- rep = smallArrPtrsRep n
-
- tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
- (mkIntExpr dflags (nonHdrSize dflags rep))
- (zeroExpr dflags)
-
- let hdr_size = fixedHdrSize dflags
-
- base <- allocHeapClosure rep info_ptr cccsExpr
- [ (mkIntExpr dflags n,
- hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
- ]
-
- arr <- CmmLocal `fmap` newTemp (bWord dflags)
- emit $ mkAssign arr base
-
- dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
- (smallArrPtrsHdrSize dflags)
- src_p <- assignTempE $ cmmOffsetExprW dflags src
- (cmmAddWord dflags
- (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
-
- emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
- (wordAlignment dflags)
-
- emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
-
--- | Takes and offset in the destination array, the base address of
--- the card table, and the number of elements affected (*not* the
--- number of cards). The number of elements may not be zero.
--- Marks the relevant cards as dirty.
-emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
-emitSetCards dst_start dst_cards_start n = do
- dflags <- getDynFlags
- start_card <- assignTempE $ cardCmm dflags dst_start
- let end_card = cardCmm dflags
- (cmmSubWord dflags
- (cmmAddWord dflags dst_start (mkIntExpr dflags n))
- (mkIntExpr dflags 1))
- emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
- (mkIntExpr dflags 1)
- (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
- (mkAlignment 1) -- no alignment (1 byte)
-
--- Convert an element index to a card index
-cardCmm :: DynFlags -> CmmExpr -> CmmExpr
-cardCmm dflags i =
- cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
-
-------------------------------------------------------------------------------
--- SmallArray PrimOp implementations
-
-doReadSmallPtrArrayOp :: LocalReg
- -> CmmExpr
- -> CmmExpr
- -> FCode ()
-doReadSmallPtrArrayOp res addr idx = do
- dflags <- getDynFlags
- mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
- (gcWord dflags) idx
-
-doWriteSmallPtrArrayOp :: CmmExpr
- -> CmmExpr
- -> CmmExpr
- -> FCode ()
-doWriteSmallPtrArrayOp addr idx val = do
- dflags <- getDynFlags
- let ty = cmmExprType dflags val
- emitPrimCall [] MO_WriteBarrier [] -- #12469
- mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
- emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
-
-------------------------------------------------------------------------------
--- Atomic read-modify-write
-
--- | Emit an atomic modification to a byte array element. The result
--- reg contains that previous value of the element. Implies a full
--- memory barrier.
-doAtomicRMW :: LocalReg -- ^ Result reg
- -> AtomicMachOp -- ^ Atomic op (e.g. add)
- -> CmmExpr -- ^ MutableByteArray#
- -> CmmExpr -- ^ Index
- -> CmmType -- ^ Type of element by which we are indexing
- -> CmmExpr -- ^ Op argument (e.g. amount to add)
- -> FCode ()
-doAtomicRMW res amop mba idx idx_ty n = do
- dflags <- getDynFlags
- let width = typeWidth idx_ty
- addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
- width mba idx
- emitPrimCall
- [ res ]
- (MO_AtomicRMW width amop)
- [ addr, n ]
-
--- | Emit an atomic read to a byte array that acts as a memory barrier.
-doAtomicReadByteArray
- :: LocalReg -- ^ Result reg
- -> CmmExpr -- ^ MutableByteArray#
- -> CmmExpr -- ^ Index
- -> CmmType -- ^ Type of element by which we are indexing
- -> FCode ()
-doAtomicReadByteArray res mba idx idx_ty = do
- dflags <- getDynFlags
- let width = typeWidth idx_ty
- addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
- width mba idx
- emitPrimCall
- [ res ]
- (MO_AtomicRead width)
- [ addr ]
-
--- | Emit an atomic write to a byte array that acts as a memory barrier.
-doAtomicWriteByteArray
- :: CmmExpr -- ^ MutableByteArray#
- -> CmmExpr -- ^ Index
- -> CmmType -- ^ Type of element by which we are indexing
- -> CmmExpr -- ^ Value to write
- -> FCode ()
-doAtomicWriteByteArray mba idx idx_ty val = do
- dflags <- getDynFlags
- let width = typeWidth idx_ty
- addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
- width mba idx
- emitPrimCall
- [ {- no results -} ]
- (MO_AtomicWrite width)
- [ addr, val ]
-
-doCasByteArray
- :: LocalReg -- ^ Result reg
- -> CmmExpr -- ^ MutableByteArray#
- -> CmmExpr -- ^ Index
- -> CmmType -- ^ Type of element by which we are indexing
- -> CmmExpr -- ^ Old value
- -> CmmExpr -- ^ New value
- -> FCode ()
-doCasByteArray res mba idx idx_ty old new = do
- dflags <- getDynFlags
- let width = (typeWidth idx_ty)
- addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
- width mba idx
- emitPrimCall
- [ res ]
- (MO_Cmpxchg width)
- [ addr, old, new ]
-
-------------------------------------------------------------------------------
--- Helpers for emitting function calls
-
--- | Emit a call to @memcpy@.
-emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
-emitMemcpyCall dst src n align = do
- emitPrimCall
- [ {-no results-} ]
- (MO_Memcpy (alignmentBytes align))
- [ dst, src, n ]
-
--- | Emit a call to @memmove@.
-emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
-emitMemmoveCall dst src n align = do
- emitPrimCall
- [ {- no results -} ]
- (MO_Memmove (alignmentBytes align))
- [ dst, src, n ]
-
--- | Emit a call to @memset@. The second argument must fit inside an
--- unsigned char.
-emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
-emitMemsetCall dst c n align = do
- emitPrimCall
- [ {- no results -} ]
- (MO_Memset (alignmentBytes align))
- [ dst, c, n ]
-
-emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
-emitMemcmpCall res ptr1 ptr2 n align = do
- -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
- -- code-gens currently call out to the @memcmp(3)@ C function.
- -- This was easier than moving the sign-extensions into
- -- all the code-gens.
- dflags <- getDynFlags
- let is32Bit = typeWidth (localRegType res) == W32
-
- cres <- if is32Bit
- then return res
- else newTemp b32
-
- emitPrimCall
- [ cres ]
- (MO_Memcmp align)
- [ ptr1, ptr2, n ]
-
- unless is32Bit $ do
- emit $ mkAssign (CmmLocal res)
- (CmmMachOp
- (mo_s_32ToWord dflags)
- [(CmmReg (CmmLocal cres))])
-
-emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitBSwapCall res x width = do
- emitPrimCall
- [ res ]
- (MO_BSwap width)
- [ x ]
-
-emitBRevCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitBRevCall res x width = do
- emitPrimCall
- [ res ]
- (MO_BRev width)
- [ x ]
-
-emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitPopCntCall res x width = do
- emitPrimCall
- [ res ]
- (MO_PopCnt width)
- [ x ]
-
-emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
-emitPdepCall res x y width = do
- emitPrimCall
- [ res ]
- (MO_Pdep width)
- [ x, y ]
-
-emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
-emitPextCall res x y width = do
- emitPrimCall
- [ res ]
- (MO_Pext width)
- [ x, y ]
-
-emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitClzCall res x width = do
- emitPrimCall
- [ res ]
- (MO_Clz width)
- [ x ]
-
-emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
-emitCtzCall res x width = do
- emitPrimCall
- [ res ]
- (MO_Ctz width)
- [ x ]
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
deleted file mode 100644
index 172b77c8f9..0000000000
--- a/compiler/codeGen/StgCmmProf.hs
+++ /dev/null
@@ -1,360 +0,0 @@
------------------------------------------------------------------------------
---
--- Code generation for profiling
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmProf (
- initCostCentres, ccType, ccsType,
- mkCCostCentre, mkCCostCentreStack,
-
- -- Cost-centre Profiling
- dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
- enterCostCentreThunk, enterCostCentreFun,
- costCentreFrom,
- storeCurCCS,
- emitSetCCC,
-
- saveCurrentCostCentre, restoreCurrentCostCentre,
-
- -- Lag/drag/void stuff
- ldvEnter, ldvEnterClosure, ldvRecordCreate
- ) where
-
-import GhcPrelude
-
-import StgCmmClosure
-import StgCmmUtils
-import StgCmmMonad
-import SMRep
-
-import MkGraph
-import Cmm
-import CmmUtils
-import CLabel
-
-import CostCentre
-import DynFlags
-import FastString
-import Module
-import Outputable
-
-import Control.Monad
-import Data.Char (ord)
-
------------------------------------------------------------------------------
---
--- Cost-centre-stack Profiling
---
------------------------------------------------------------------------------
-
--- Expression representing the current cost centre stack
-ccsType :: DynFlags -> CmmType -- Type of a cost-centre stack
-ccsType = bWord
-
-ccType :: DynFlags -> CmmType -- Type of a cost centre
-ccType = bWord
-
-storeCurCCS :: CmmExpr -> CmmAGraph
-storeCurCCS e = mkAssign cccsReg e
-
-mkCCostCentre :: CostCentre -> CmmLit
-mkCCostCentre cc = CmmLabel (mkCCLabel cc)
-
-mkCCostCentreStack :: CostCentreStack -> CmmLit
-mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
-
-costCentreFrom :: DynFlags
- -> CmmExpr -- A closure pointer
- -> CmmExpr -- The cost centre from that closure
-costCentreFrom dflags cl = CmmLoad (cmmOffsetB dflags cl (oFFSET_StgHeader_ccs dflags)) (ccsType dflags)
-
--- | The profiling header words in a static closure
-staticProfHdr :: DynFlags -> CostCentreStack -> [CmmLit]
-staticProfHdr dflags ccs
- = ifProfilingL dflags [mkCCostCentreStack ccs, staticLdvInit dflags]
-
--- | Profiling header words in a dynamic closure
-dynProfHdr :: DynFlags -> CmmExpr -> [CmmExpr]
-dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit dflags]
-
--- | Initialise the profiling field of an update frame
-initUpdFrameProf :: CmmExpr -> FCode ()
-initUpdFrameProf frame
- = ifProfiling $ -- frame->header.prof.ccs = CCCS
- do dflags <- getDynFlags
- emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) cccsExpr
- -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
- -- is unnecessary because it is not used anyhow.
-
----------------------------------------------------------------------------
--- Saving and restoring the current cost centre
----------------------------------------------------------------------------
-
-{- Note [Saving the current cost centre]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The current cost centre is like a global register. Like other
-global registers, it's a caller-saves one. But consider
- case (f x) of (p,q) -> rhs
-Since 'f' may set the cost centre, we must restore it
-before resuming rhs. So we want code like this:
- local_cc = CCC -- save
- r = f( x )
- CCC = local_cc -- restore
-That is, we explicitly "save" the current cost centre in
-a LocalReg, local_cc; and restore it after the call. The
-C-- infrastructure will arrange to save local_cc across the
-call.
-
-The same goes for join points;
- let j x = join-stuff
- in blah-blah
-We want this kind of code:
- local_cc = CCC -- save
- blah-blah
- J:
- CCC = local_cc -- restore
--}
-
-saveCurrentCostCentre :: FCode (Maybe LocalReg)
- -- Returns Nothing if profiling is off
-saveCurrentCostCentre
- = do dflags <- getDynFlags
- if not (gopt Opt_SccProfilingOn dflags)
- then return Nothing
- else do local_cc <- newTemp (ccType dflags)
- emitAssign (CmmLocal local_cc) cccsExpr
- return (Just local_cc)
-
-restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
-restoreCurrentCostCentre Nothing
- = return ()
-restoreCurrentCostCentre (Just local_cc)
- = emit (storeCurCCS (CmmReg (CmmLocal local_cc)))
-
-
--------------------------------------------------------------------------------
--- Recording allocation in a cost centre
--------------------------------------------------------------------------------
-
--- | Record the allocation of a closure. The CmmExpr is the cost
--- centre stack to which to attribute the allocation.
-profDynAlloc :: SMRep -> CmmExpr -> FCode ()
-profDynAlloc rep ccs
- = ifProfiling $
- do dflags <- getDynFlags
- profAlloc (mkIntExpr dflags (heapClosureSizeW dflags rep)) ccs
-
--- | Record the allocation of a closure (size is given by a CmmExpr)
--- The size must be in words, because the allocation counter in a CCS counts
--- in words.
-profAlloc :: CmmExpr -> CmmExpr -> FCode ()
-profAlloc words ccs
- = ifProfiling $
- do dflags <- getDynFlags
- let alloc_rep = rEP_CostCentreStack_mem_alloc dflags
- emit (addToMemE alloc_rep
- (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_mem_alloc dflags))
- (CmmMachOp (MO_UU_Conv (wordWidth dflags) (typeWidth alloc_rep)) $
- [CmmMachOp (mo_wordSub dflags) [words,
- mkIntExpr dflags (profHdrSize dflags)]]))
- -- subtract the "profiling overhead", which is the
- -- profiling header in a closure.
-
--- -----------------------------------------------------------------------
--- Setting the current cost centre on entry to a closure
-
-enterCostCentreThunk :: CmmExpr -> FCode ()
-enterCostCentreThunk closure =
- ifProfiling $ do
- dflags <- getDynFlags
- emit $ storeCurCCS (costCentreFrom dflags closure)
-
-enterCostCentreFun :: CostCentreStack -> CmmExpr -> FCode ()
-enterCostCentreFun ccs closure =
- ifProfiling $ do
- if isCurrentCCS ccs
- then do dflags <- getDynFlags
- emitRtsCall rtsUnitId (fsLit "enterFunCCS")
- [(baseExpr, AddrHint),
- (costCentreFrom dflags closure, AddrHint)] False
- else return () -- top-level function, nothing to do
-
-ifProfiling :: FCode () -> FCode ()
-ifProfiling code
- = do dflags <- getDynFlags
- if gopt Opt_SccProfilingOn dflags
- then code
- else return ()
-
-ifProfilingL :: DynFlags -> [a] -> [a]
-ifProfilingL dflags xs
- | gopt Opt_SccProfilingOn dflags = xs
- | otherwise = []
-
-
----------------------------------------------------------------
--- Initialising Cost Centres & CCSs
----------------------------------------------------------------
-
-initCostCentres :: CollectedCCs -> FCode ()
--- Emit the declarations
-initCostCentres (local_CCs, singleton_CCSs)
- = do dflags <- getDynFlags
- when (gopt Opt_SccProfilingOn dflags) $
- do mapM_ emitCostCentreDecl local_CCs
- mapM_ emitCostCentreStackDecl singleton_CCSs
-
-
-emitCostCentreDecl :: CostCentre -> FCode ()
-emitCostCentreDecl cc = do
- { dflags <- getDynFlags
- ; let is_caf | isCafCC cc = mkIntCLit dflags (ord 'c') -- 'c' == is a CAF
- | otherwise = zero dflags
- -- NB. bytesFS: we want the UTF-8 bytes here (#5559)
- ; label <- newByteStringCLit (bytesFS $ costCentreUserNameFS cc)
- ; modl <- newByteStringCLit (bytesFS $ Module.moduleNameFS
- $ Module.moduleName
- $ cc_mod cc)
- ; loc <- newByteStringCLit $ bytesFS $ mkFastString $
- showPpr dflags (costCentreSrcSpan cc)
- -- XXX going via FastString to get UTF-8 encoding is silly
- ; let
- lits = [ zero dflags, -- StgInt ccID,
- label, -- char *label,
- modl, -- char *module,
- loc, -- char *srcloc,
- zero64, -- StgWord64 mem_alloc
- zero dflags, -- StgWord time_ticks
- is_caf, -- StgInt is_caf
- zero dflags -- struct _CostCentre *link
- ]
- ; emitDataLits (mkCCLabel cc) lits
- }
-
-emitCostCentreStackDecl :: CostCentreStack -> FCode ()
-emitCostCentreStackDecl ccs
- = case maybeSingletonCCS ccs of
- Just cc ->
- do dflags <- getDynFlags
- let mk_lits cc = zero dflags :
- mkCCostCentre cc :
- replicate (sizeof_ccs_words dflags - 2) (zero dflags)
- -- Note: to avoid making any assumptions about how the
- -- C compiler (that compiles the RTS, in particular) does
- -- layouts of structs containing long-longs, simply
- -- pad out the struct with zero words until we hit the
- -- size of the overall struct (which we get via DerivedConstants.h)
- emitDataLits (mkCCSLabel ccs) (mk_lits cc)
- Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs)
-
-zero :: DynFlags -> CmmLit
-zero dflags = mkIntCLit dflags 0
-zero64 :: CmmLit
-zero64 = CmmInt 0 W64
-
-sizeof_ccs_words :: DynFlags -> Int
-sizeof_ccs_words dflags
- -- round up to the next word.
- | ms == 0 = ws
- | otherwise = ws + 1
- where
- (ws,ms) = sIZEOF_CostCentreStack dflags `divMod` wORD_SIZE dflags
-
--- ---------------------------------------------------------------------------
--- Set the current cost centre stack
-
-emitSetCCC :: CostCentre -> Bool -> Bool -> FCode ()
-emitSetCCC cc tick push
- = do dflags <- getDynFlags
- if not (gopt Opt_SccProfilingOn dflags)
- then return ()
- else do tmp <- newTemp (ccsType dflags)
- pushCostCentre tmp cccsExpr cc
- when tick $ emit (bumpSccCount dflags (CmmReg (CmmLocal tmp)))
- when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp)))
-
-pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
-pushCostCentre result ccs cc
- = emitRtsCallWithResult result AddrHint
- rtsUnitId
- (fsLit "pushCostCentre") [(ccs,AddrHint),
- (CmmLit (mkCCostCentre cc), AddrHint)]
- False
-
-bumpSccCount :: DynFlags -> CmmExpr -> CmmAGraph
-bumpSccCount dflags ccs
- = addToMem (rEP_CostCentreStack_scc_count dflags)
- (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_scc_count dflags)) 1
-
------------------------------------------------------------------------------
---
--- Lag/drag/void stuff
---
------------------------------------------------------------------------------
-
---
--- Initial value for the LDV field in a static closure
---
-staticLdvInit :: DynFlags -> CmmLit
-staticLdvInit = zeroCLit
-
---
--- Initial value of the LDV field in a dynamic closure
---
-dynLdvInit :: DynFlags -> CmmExpr
-dynLdvInit dflags = -- (era << LDV_SHIFT) | LDV_STATE_CREATE
- CmmMachOp (mo_wordOr dflags) [
- CmmMachOp (mo_wordShl dflags) [loadEra dflags, mkIntExpr dflags (lDV_SHIFT dflags)],
- CmmLit (mkWordCLit dflags (iLDV_STATE_CREATE dflags))
- ]
-
---
--- Initialise the LDV word of a new closure
---
-ldvRecordCreate :: CmmExpr -> FCode ()
-ldvRecordCreate closure = do
- dflags <- getDynFlags
- emit $ mkStore (ldvWord dflags closure) (dynLdvInit dflags)
-
---
--- | Called when a closure is entered, marks the closure as having
--- been "used". The closure is not an "inherently used" one. The
--- closure is not @IND@ because that is not considered for LDV profiling.
---
-ldvEnterClosure :: ClosureInfo -> CmmReg -> FCode ()
-ldvEnterClosure closure_info node_reg = do
- dflags <- getDynFlags
- let tag = funTag dflags closure_info
- -- don't forget to substract node's tag
- ldvEnter (cmmOffsetB dflags (CmmReg node_reg) (-tag))
-
-ldvEnter :: CmmExpr -> FCode ()
--- Argument is a closure pointer
-ldvEnter cl_ptr = do
- dflags <- getDynFlags
- let -- don't forget to substract node's tag
- ldv_wd = ldvWord dflags cl_ptr
- new_ldv_wd = cmmOrWord dflags (cmmAndWord dflags (CmmLoad ldv_wd (bWord dflags))
- (CmmLit (mkWordCLit dflags (iLDV_CREATE_MASK dflags))))
- (cmmOrWord dflags (loadEra dflags) (CmmLit (mkWordCLit dflags (iLDV_STATE_USE dflags))))
- ifProfiling $
- -- if (era > 0) {
- -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
- -- era | LDV_STATE_USE }
- emit =<< mkCmmIfThenElse (CmmMachOp (mo_wordUGt dflags) [loadEra dflags, CmmLit (zeroCLit dflags)])
- (mkStore ldv_wd new_ldv_wd)
- mkNop
-
-loadEra :: DynFlags -> CmmExpr
-loadEra dflags = CmmMachOp (MO_UU_Conv (cIntWidth dflags) (wordWidth dflags))
- [CmmLoad (mkLblExpr (mkCmmDataLabel rtsUnitId (fsLit "era")))
- (cInt dflags)]
-
-ldvWord :: DynFlags -> CmmExpr -> CmmExpr
--- Takes the address of a closure, and returns
--- the address of the LDV word in the closure
-ldvWord dflags closure_ptr
- = cmmOffsetB dflags closure_ptr (oFFSET_StgHeader_ldvw dflags)
diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs
deleted file mode 100644
index 868b52f402..0000000000
--- a/compiler/codeGen/StgCmmTicky.hs
+++ /dev/null
@@ -1,682 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-
------------------------------------------------------------------------------
---
--- Code generation for ticky-ticky profiling
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-{- OVERVIEW: ticky ticky profiling
-
-Please see
-https://gitlab.haskell.org/ghc/ghc/wikis/debugging/ticky-ticky and also
-edit it and the rest of this comment to keep them up-to-date if you
-change ticky-ticky. Thanks!
-
- *** All allocation ticky numbers are in bytes. ***
-
-Some of the relevant source files:
-
- ***not necessarily an exhaustive list***
-
- * some codeGen/ modules import this one
-
- * this module imports cmm/CLabel.hs to manage labels
-
- * cmm/CmmParse.y expands some macros using generators defined in
- this module
-
- * includes/stg/Ticky.h declares all of the global counters
-
- * includes/rts/Ticky.h declares the C data type for an
- STG-declaration's counters
-
- * some macros defined in includes/Cmm.h (and used within the RTS's
- CMM code) update the global ticky counters
-
- * at the end of execution rts/Ticky.c generates the final report
- +RTS -r<report-file> -RTS
-
-The rts/Ticky.c function that generates the report includes an
-STG-declaration's ticky counters if
-
- * that declaration was entered, or
-
- * it was allocated (if -ticky-allocd)
-
-On either of those events, the counter is "registered" by adding it to
-a linked list; cf the CMM generated by registerTickyCtr.
-
-Ticky-ticky profiling has evolved over many years. Many of the
-counters from its most sophisticated days are no longer
-active/accurate. As the RTS has changed, sometimes the ticky code for
-relevant counters was not accordingly updated. Unfortunately, neither
-were the comments.
-
-As of March 2013, there still exist deprecated code and comments in
-the code generator as well as the RTS because:
-
- * I don't know what is out-of-date versus merely commented out for
- momentary convenience, and
-
- * someone else might know how to repair it!
-
--}
-
-module StgCmmTicky (
- withNewTickyCounterFun,
- withNewTickyCounterLNE,
- withNewTickyCounterThunk,
- withNewTickyCounterStdThunk,
- withNewTickyCounterCon,
-
- tickyDynAlloc,
- tickyAllocHeap,
-
- tickyAllocPrim,
- tickyAllocThunk,
- tickyAllocPAP,
- tickyHeapCheck,
- tickyStackCheck,
-
- tickyUnknownCall, tickyDirectCall,
-
- tickyPushUpdateFrame,
- tickyUpdateFrameOmitted,
-
- tickyEnterDynCon,
- tickyEnterStaticCon,
- tickyEnterViaNode,
-
- tickyEnterFun,
- tickyEnterThunk, tickyEnterStdThunk, -- dynamic non-value
- -- thunks only
- tickyEnterLNE,
-
- tickyUpdateBhCaf,
- tickyBlackHole,
- tickyUnboxedTupleReturn,
- tickyReturnOldCon, tickyReturnNewCon,
-
- tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
- tickySlowCall, tickySlowCallPat,
- ) where
-
-import GhcPrelude
-
-import StgCmmArgRep ( slowCallPattern , toArgRep , argRepString )
-import StgCmmClosure
-import StgCmmUtils
-import StgCmmMonad
-
-import StgSyn
-import CmmExpr
-import MkGraph
-import CmmUtils
-import CLabel
-import SMRep
-
-import Module
-import Name
-import Id
-import BasicTypes
-import FastString
-import Outputable
-import Util
-
-import DynFlags
-
--- Turgid imports for showTypeCategory
-import PrelNames
-import TcType
-import Type
-import TyCon
-
-import Data.Maybe
-import qualified Data.Char
-import Control.Monad ( when )
-
------------------------------------------------------------------------------
---
--- Ticky-ticky profiling
---
------------------------------------------------------------------------------
-
-data TickyClosureType
- = TickyFun
- Bool -- True <-> single entry
- | TickyCon
- | TickyThunk
- Bool -- True <-> updateable
- Bool -- True <-> standard thunk (AP or selector), has no entry counter
- | TickyLNE
-
-withNewTickyCounterFun :: Bool -> Name -> [NonVoid Id] -> FCode a -> FCode a
-withNewTickyCounterFun single_entry = withNewTickyCounter (TickyFun single_entry)
-
-withNewTickyCounterLNE :: Name -> [NonVoid Id] -> FCode a -> FCode a
-withNewTickyCounterLNE nm args code = do
- b <- tickyLNEIsOn
- if not b then code else withNewTickyCounter TickyLNE nm args code
-
-thunkHasCounter :: Bool -> FCode Bool
-thunkHasCounter isStatic = do
- b <- tickyDynThunkIsOn
- pure (not isStatic && b)
-
-withNewTickyCounterThunk
- :: Bool -- ^ static
- -> Bool -- ^ updateable
- -> Name
- -> FCode a
- -> FCode a
-withNewTickyCounterThunk isStatic isUpdatable name code = do
- has_ctr <- thunkHasCounter isStatic
- if not has_ctr
- then code
- else withNewTickyCounter (TickyThunk isUpdatable False) name [] code
-
-withNewTickyCounterStdThunk
- :: Bool -- ^ updateable
- -> Name
- -> FCode a
- -> FCode a
-withNewTickyCounterStdThunk isUpdatable name code = do
- has_ctr <- thunkHasCounter False
- if not has_ctr
- then code
- else withNewTickyCounter (TickyThunk isUpdatable True) name [] code
-
-withNewTickyCounterCon
- :: Name
- -> FCode a
- -> FCode a
-withNewTickyCounterCon name code = do
- has_ctr <- thunkHasCounter False
- if not has_ctr
- then code
- else withNewTickyCounter TickyCon name [] code
-
--- args does not include the void arguments
-withNewTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode a -> FCode a
-withNewTickyCounter cloType name args m = do
- lbl <- emitTickyCounter cloType name args
- setTickyCtrLabel lbl m
-
-emitTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode CLabel
-emitTickyCounter cloType name args
- = let ctr_lbl = mkRednCountsLabel name in
- (>> return ctr_lbl) $
- ifTicky $ do
- { dflags <- getDynFlags
- ; parent <- getTickyCtrLabel
- ; mod_name <- getModuleName
-
- -- When printing the name of a thing in a ticky file, we
- -- want to give the module name even for *local* things. We
- -- print just "x (M)" rather that "M.x" to distinguish them
- -- from the global kind.
- ; let ppr_for_ticky_name :: SDoc
- ppr_for_ticky_name =
- let n = ppr name
- ext = case cloType of
- TickyFun single_entry -> parens $ hcat $ punctuate comma $
- [text "fun"] ++ [text "se"|single_entry]
- TickyCon -> parens (text "con")
- TickyThunk upd std -> parens $ hcat $ punctuate comma $
- [text "thk"] ++ [text "se"|not upd] ++ [text "std"|std]
- TickyLNE | isInternalName name -> parens (text "LNE")
- | otherwise -> panic "emitTickyCounter: how is this an external LNE?"
- p = case hasHaskellName parent of
- -- NB the default "top" ticky ctr does not
- -- have a Haskell name
- Just pname -> text "in" <+> ppr (nameUnique pname)
- _ -> empty
- in if isInternalName name
- then n <+> parens (ppr mod_name) <+> ext <+> p
- else n <+> ext <+> p
-
- ; fun_descr_lit <- newStringCLit $ showSDocDebug dflags ppr_for_ticky_name
- ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . fromNonVoid) args
- ; emitDataLits ctr_lbl
- -- Must match layout of includes/rts/Ticky.h's StgEntCounter
- --
- -- krc: note that all the fields are I32 now; some were I16
- -- before, but the code generator wasn't handling that
- -- properly and it led to chaos, panic and disorder.
- [ mkIntCLit dflags 0, -- registered?
- mkIntCLit dflags (length args), -- Arity
- mkIntCLit dflags 0, -- Heap allocated for this thing
- fun_descr_lit,
- arg_descr_lit,
- zeroCLit dflags, -- Entries into this thing
- zeroCLit dflags, -- Heap allocated by this thing
- zeroCLit dflags -- Link to next StgEntCounter
- ]
- }
-
--- -----------------------------------------------------------------------------
--- Ticky stack frames
-
-tickyPushUpdateFrame, tickyUpdateFrameOmitted :: FCode ()
-tickyPushUpdateFrame = ifTicky $ bumpTickyCounter (fsLit "UPDF_PUSHED_ctr")
-tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (fsLit "UPDF_OMITTED_ctr")
-
--- -----------------------------------------------------------------------------
--- Ticky entries
-
--- NB the name-specific entries are only available for names that have
--- dedicated Cmm code. As far as I know, this just rules out
--- constructor thunks. For them, there is no CMM code block to put the
--- bump of name-specific ticky counter into. On the other hand, we can
--- still track allocation their allocation.
-
-tickyEnterDynCon, tickyEnterStaticCon, tickyEnterViaNode :: FCode ()
-tickyEnterDynCon = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_CON_ctr")
-tickyEnterStaticCon = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_CON_ctr")
-tickyEnterViaNode = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr")
-
-tickyEnterThunk :: ClosureInfo -> FCode ()
-tickyEnterThunk cl_info
- = ifTicky $ do
- { bumpTickyCounter ctr
- ; has_ctr <- thunkHasCounter static
- ; when has_ctr $ do
- ticky_ctr_lbl <- getTickyCtrLabel
- registerTickyCtrAtEntryDyn ticky_ctr_lbl
- bumpTickyEntryCount ticky_ctr_lbl }
- where
- updatable = closureSingleEntry cl_info
- static = isStaticClosure cl_info
-
- ctr | static = if updatable then fsLit "ENT_STATIC_THK_SINGLE_ctr"
- else fsLit "ENT_STATIC_THK_MANY_ctr"
- | otherwise = if updatable then fsLit "ENT_DYN_THK_SINGLE_ctr"
- else fsLit "ENT_DYN_THK_MANY_ctr"
-
-tickyEnterStdThunk :: ClosureInfo -> FCode ()
-tickyEnterStdThunk = tickyEnterThunk
-
-tickyBlackHole :: Bool{-updatable-} -> FCode ()
-tickyBlackHole updatable
- = ifTicky (bumpTickyCounter ctr)
- where
- ctr | updatable = (fsLit "UPD_BH_SINGLE_ENTRY_ctr")
- | otherwise = (fsLit "UPD_BH_UPDATABLE_ctr")
-
-tickyUpdateBhCaf :: ClosureInfo -> FCode ()
-tickyUpdateBhCaf cl_info
- = ifTicky (bumpTickyCounter ctr)
- where
- ctr | closureUpdReqd cl_info = (fsLit "UPD_CAF_BH_SINGLE_ENTRY_ctr")
- | otherwise = (fsLit "UPD_CAF_BH_UPDATABLE_ctr")
-
-tickyEnterFun :: ClosureInfo -> FCode ()
-tickyEnterFun cl_info = ifTicky $ do
- ctr_lbl <- getTickyCtrLabel
-
- if isStaticClosure cl_info
- then do bumpTickyCounter (fsLit "ENT_STATIC_FUN_DIRECT_ctr")
- registerTickyCtr ctr_lbl
- else do bumpTickyCounter (fsLit "ENT_DYN_FUN_DIRECT_ctr")
- registerTickyCtrAtEntryDyn ctr_lbl
-
- bumpTickyEntryCount ctr_lbl
-
-tickyEnterLNE :: FCode ()
-tickyEnterLNE = ifTicky $ do
- bumpTickyCounter (fsLit "ENT_LNE_ctr")
- ifTickyLNE $ do
- ctr_lbl <- getTickyCtrLabel
- registerTickyCtr ctr_lbl
- bumpTickyEntryCount ctr_lbl
-
--- needn't register a counter upon entry if
---
--- 1) it's for a dynamic closure, and
---
--- 2) -ticky-allocd is on
---
--- since the counter was registered already upon being alloc'd
-registerTickyCtrAtEntryDyn :: CLabel -> FCode ()
-registerTickyCtrAtEntryDyn ctr_lbl = do
- already_registered <- tickyAllocdIsOn
- when (not already_registered) $ registerTickyCtr ctr_lbl
-
-registerTickyCtr :: CLabel -> FCode ()
--- Register a ticky counter
--- if ( ! f_ct.registeredp ) {
--- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */
--- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */
--- f_ct.registeredp = 1 }
-registerTickyCtr ctr_lbl = do
- dflags <- getDynFlags
- let
- -- krc: code generator doesn't handle Not, so we test for Eq 0 instead
- test = CmmMachOp (MO_Eq (wordWidth dflags))
- [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl
- (oFFSET_StgEntCounter_registeredp dflags))) (bWord dflags),
- zeroExpr dflags]
- register_stmts
- = [ mkStore (CmmLit (cmmLabelOffB ctr_lbl (oFFSET_StgEntCounter_link dflags)))
- (CmmLoad ticky_entry_ctrs (bWord dflags))
- , mkStore ticky_entry_ctrs (mkLblExpr ctr_lbl)
- , mkStore (CmmLit (cmmLabelOffB ctr_lbl
- (oFFSET_StgEntCounter_registeredp dflags)))
- (mkIntExpr dflags 1) ]
- ticky_entry_ctrs = mkLblExpr (mkCmmDataLabel rtsUnitId (fsLit "ticky_entry_ctrs"))
- emit =<< mkCmmIfThen test (catAGraphs register_stmts)
-
-tickyReturnOldCon, tickyReturnNewCon :: RepArity -> FCode ()
-tickyReturnOldCon arity
- = ifTicky $ do { bumpTickyCounter (fsLit "RET_OLD_ctr")
- ; bumpHistogram (fsLit "RET_OLD_hst") arity }
-tickyReturnNewCon arity
- = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr")
- ; bumpHistogram (fsLit "RET_NEW_hst") arity }
-
-tickyUnboxedTupleReturn :: RepArity -> FCode ()
-tickyUnboxedTupleReturn arity
- = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr")
- ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity }
-
--- -----------------------------------------------------------------------------
--- Ticky calls
-
--- Ticks at a *call site*:
-tickyDirectCall :: RepArity -> [StgArg] -> FCode ()
-tickyDirectCall arity args
- | args `lengthIs` arity = tickyKnownCallExact
- | otherwise = do tickyKnownCallExtraArgs
- tickySlowCallPat (map argPrimRep (drop arity args))
-
-tickyKnownCallTooFewArgs :: FCode ()
-tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_TOO_FEW_ARGS_ctr")
-
-tickyKnownCallExact :: FCode ()
-tickyKnownCallExact = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_ctr")
-
-tickyKnownCallExtraArgs :: FCode ()
-tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_EXTRA_ARGS_ctr")
-
-tickyUnknownCall :: FCode ()
-tickyUnknownCall = ifTicky $ bumpTickyCounter (fsLit "UNKNOWN_CALL_ctr")
-
--- Tick for the call pattern at slow call site (i.e. in addition to
--- tickyUnknownCall, tickyKnownCallExtraArgs, etc.)
-tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode ()
-tickySlowCall _ [] = return ()
-tickySlowCall lf_info args = do
- -- see Note [Ticky for slow calls]
- if isKnownFun lf_info
- then tickyKnownCallTooFewArgs
- else tickyUnknownCall
- tickySlowCallPat (map argPrimRep args)
-
-tickySlowCallPat :: [PrimRep] -> FCode ()
-tickySlowCallPat args = ifTicky $
- let argReps = map toArgRep args
- (_, n_matched) = slowCallPattern argReps
- in if n_matched > 0 && args `lengthIs` n_matched
- then bumpTickyLbl $ mkRtsSlowFastTickyCtrLabel $ concatMap (map Data.Char.toLower . argRepString) argReps
- else bumpTickyCounter $ fsLit "VERY_SLOW_CALL_ctr"
-
-{-
-
-Note [Ticky for slow calls]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Terminology is unfortunately a bit mixed up for these calls. codeGen
-uses "slow call" to refer to unknown calls and under-saturated known
-calls.
-
-Nowadays, though (ie as of the eval/apply paper), the significantly
-slower calls are actually just a subset of these: the ones with no
-built-in argument pattern (cf StgCmmArgRep.slowCallPattern)
-
-So for ticky profiling, we split slow calls into
-"SLOW_CALL_fast_<pattern>_ctr" (those matching a built-in pattern) and
-VERY_SLOW_CALL_ctr (those without a built-in pattern; these are very
-bad for both space and time).
-
--}
-
--- -----------------------------------------------------------------------------
--- Ticky allocation
-
-tickyDynAlloc :: Maybe Id -> SMRep -> LambdaFormInfo -> FCode ()
--- Called when doing a dynamic heap allocation; the LambdaFormInfo
--- used to distinguish between closure types
---
--- TODO what else to count while we're here?
-tickyDynAlloc mb_id rep lf = ifTicky $ getDynFlags >>= \dflags ->
- let bytes = wORD_SIZE dflags * heapClosureSizeW dflags rep
-
- countGlobal tot ctr = do
- bumpTickyCounterBy tot bytes
- bumpTickyCounter ctr
- countSpecific = ifTickyAllocd $ case mb_id of
- Nothing -> return ()
- Just id -> do
- let ctr_lbl = mkRednCountsLabel (idName id)
- registerTickyCtr ctr_lbl
- bumpTickyAllocd ctr_lbl bytes
-
- -- TODO are we still tracking "good stuff" (_gds) versus
- -- administrative (_adm) versus slop (_slp)? I'm going with all _gds
- -- for now, since I don't currently know neither if we do nor how to
- -- distinguish. NSF Mar 2013
-
- in case () of
- _ | isConRep rep ->
- ifTickyDynThunk countSpecific >>
- countGlobal (fsLit "ALLOC_CON_gds") (fsLit "ALLOC_CON_ctr")
- | isThunkRep rep ->
- ifTickyDynThunk countSpecific >>
- if lfUpdatable lf
- then countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_UP_THK_ctr")
- else countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_SE_THK_ctr")
- | isFunRep rep ->
- countSpecific >>
- countGlobal (fsLit "ALLOC_FUN_gds") (fsLit "ALLOC_FUN_ctr")
- | otherwise -> panic "How is this heap object not a con, thunk, or fun?"
-
-
-
-tickyAllocHeap ::
- Bool -> -- is this a genuine allocation? As opposed to
- -- StgCmmLayout.adjustHpBackwards
- VirtualHpOffset -> FCode ()
--- Called when doing a heap check [TICK_ALLOC_HEAP]
--- Must be lazy in the amount of allocation!
-tickyAllocHeap genuine hp
- = ifTicky $
- do { dflags <- getDynFlags
- ; ticky_ctr <- getTickyCtrLabel
- ; emit $ catAGraphs $
- -- only test hp from within the emit so that the monadic
- -- computation itself is not strict in hp (cf knot in
- -- StgCmmMonad.getHeapUsage)
- if hp == 0 then []
- else let !bytes = wORD_SIZE dflags * hp in [
- -- Bump the allocation total in the closure's StgEntCounter
- addToMem (rEP_StgEntCounter_allocs dflags)
- (CmmLit (cmmLabelOffB ticky_ctr (oFFSET_StgEntCounter_allocs dflags)))
- bytes,
- -- Bump the global allocation total ALLOC_HEAP_tot
- addToMemLbl (bWord dflags)
- (mkCmmDataLabel rtsUnitId (fsLit "ALLOC_HEAP_tot"))
- bytes,
- -- Bump the global allocation counter ALLOC_HEAP_ctr
- if not genuine then mkNop
- else addToMemLbl (bWord dflags)
- (mkCmmDataLabel rtsUnitId (fsLit "ALLOC_HEAP_ctr"))
- 1
- ]}
-
-
---------------------------------------------------------------------------------
--- these three are only called from CmmParse.y (ie ultimately from the RTS)
-
--- the units are bytes
-
-tickyAllocPrim :: CmmExpr -- ^ size of the full header, in bytes
- -> CmmExpr -- ^ size of the payload, in bytes
- -> CmmExpr -> FCode ()
-tickyAllocPrim _hdr _goods _slop = ifTicky $ do
- bumpTickyCounter (fsLit "ALLOC_PRIM_ctr")
- bumpTickyCounterByE (fsLit "ALLOC_PRIM_adm") _hdr
- bumpTickyCounterByE (fsLit "ALLOC_PRIM_gds") _goods
- bumpTickyCounterByE (fsLit "ALLOC_PRIM_slp") _slop
-
-tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode ()
-tickyAllocThunk _goods _slop = ifTicky $ do
- -- TODO is it ever called with a Single-Entry thunk?
- bumpTickyCounter (fsLit "ALLOC_UP_THK_ctr")
- bumpTickyCounterByE (fsLit "ALLOC_THK_gds") _goods
- bumpTickyCounterByE (fsLit "ALLOC_THK_slp") _slop
-
-tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode ()
-tickyAllocPAP _goods _slop = ifTicky $ do
- bumpTickyCounter (fsLit "ALLOC_PAP_ctr")
- bumpTickyCounterByE (fsLit "ALLOC_PAP_gds") _goods
- bumpTickyCounterByE (fsLit "ALLOC_PAP_slp") _slop
-
-tickyHeapCheck :: FCode ()
-tickyHeapCheck = ifTicky $ bumpTickyCounter (fsLit "HEAP_CHK_ctr")
-
-tickyStackCheck :: FCode ()
-tickyStackCheck = ifTicky $ bumpTickyCounter (fsLit "STK_CHK_ctr")
-
--- -----------------------------------------------------------------------------
--- Ticky utils
-
-ifTicky :: FCode () -> FCode ()
-ifTicky code =
- getDynFlags >>= \dflags -> when (gopt Opt_Ticky dflags) code
-
-tickyAllocdIsOn :: FCode Bool
-tickyAllocdIsOn = gopt Opt_Ticky_Allocd `fmap` getDynFlags
-
-tickyLNEIsOn :: FCode Bool
-tickyLNEIsOn = gopt Opt_Ticky_LNE `fmap` getDynFlags
-
-tickyDynThunkIsOn :: FCode Bool
-tickyDynThunkIsOn = gopt Opt_Ticky_Dyn_Thunk `fmap` getDynFlags
-
-ifTickyAllocd :: FCode () -> FCode ()
-ifTickyAllocd code = tickyAllocdIsOn >>= \b -> when b code
-
-ifTickyLNE :: FCode () -> FCode ()
-ifTickyLNE code = tickyLNEIsOn >>= \b -> when b code
-
-ifTickyDynThunk :: FCode () -> FCode ()
-ifTickyDynThunk code = tickyDynThunkIsOn >>= \b -> when b code
-
-bumpTickyCounter :: FastString -> FCode ()
-bumpTickyCounter lbl = bumpTickyLbl (mkCmmDataLabel rtsUnitId lbl)
-
-bumpTickyCounterBy :: FastString -> Int -> FCode ()
-bumpTickyCounterBy lbl = bumpTickyLblBy (mkCmmDataLabel rtsUnitId lbl)
-
-bumpTickyCounterByE :: FastString -> CmmExpr -> FCode ()
-bumpTickyCounterByE lbl = bumpTickyLblByE (mkCmmDataLabel rtsUnitId lbl)
-
-bumpTickyEntryCount :: CLabel -> FCode ()
-bumpTickyEntryCount lbl = do
- dflags <- getDynFlags
- bumpTickyLit (cmmLabelOffB lbl (oFFSET_StgEntCounter_entry_count dflags))
-
-bumpTickyAllocd :: CLabel -> Int -> FCode ()
-bumpTickyAllocd lbl bytes = do
- dflags <- getDynFlags
- bumpTickyLitBy (cmmLabelOffB lbl (oFFSET_StgEntCounter_allocd dflags)) bytes
-
-bumpTickyLbl :: CLabel -> FCode ()
-bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1
-
-bumpTickyLblBy :: CLabel -> Int -> FCode ()
-bumpTickyLblBy lhs = bumpTickyLitBy (cmmLabelOffB lhs 0)
-
-bumpTickyLblByE :: CLabel -> CmmExpr -> FCode ()
-bumpTickyLblByE lhs = bumpTickyLitByE (cmmLabelOffB lhs 0)
-
-bumpTickyLit :: CmmLit -> FCode ()
-bumpTickyLit lhs = bumpTickyLitBy lhs 1
-
-bumpTickyLitBy :: CmmLit -> Int -> FCode ()
-bumpTickyLitBy lhs n = do
- dflags <- getDynFlags
- emit (addToMem (bWord dflags) (CmmLit lhs) n)
-
-bumpTickyLitByE :: CmmLit -> CmmExpr -> FCode ()
-bumpTickyLitByE lhs e = do
- dflags <- getDynFlags
- emit (addToMemE (bWord dflags) (CmmLit lhs) e)
-
-bumpHistogram :: FastString -> Int -> FCode ()
-bumpHistogram lbl n = do
- dflags <- getDynFlags
- let offset = n `min` (tICKY_BIN_COUNT dflags - 1)
- emit (addToMem (bWord dflags)
- (cmmIndexExpr dflags
- (wordWidth dflags)
- (CmmLit (CmmLabel (mkCmmDataLabel rtsUnitId lbl)))
- (CmmLit (CmmInt (fromIntegral offset) (wordWidth dflags))))
- 1)
-
-------------------------------------------------------------------
--- Showing the "type category" for ticky-ticky profiling
-
-showTypeCategory :: Type -> Char
- {-
- + dictionary
-
- > function
-
- {C,I,F,D,W} char, int, float, double, word
- {c,i,f,d,w} unboxed ditto
-
- T tuple
-
- P other primitive type
- p unboxed ditto
-
- L list
- E enumeration type
- S other single-constructor type
- M other multi-constructor data-con type
-
- . other type
-
- - reserved for others to mark as "uninteresting"
-
- Accurate as of Mar 2013, but I eliminated the Array category instead
- of updating it, for simplicity. It's in P/p, I think --NSF
-
- -}
-showTypeCategory ty
- | isDictTy ty = '+'
- | otherwise = case tcSplitTyConApp_maybe ty of
- Nothing -> '.'
- Just (tycon, _) ->
- (if isUnliftedTyCon tycon then Data.Char.toLower else id) $
- let anyOf us = getUnique tycon `elem` us in
- case () of
- _ | anyOf [funTyConKey] -> '>'
- | anyOf [charPrimTyConKey, charTyConKey] -> 'C'
- | anyOf [doublePrimTyConKey, doubleTyConKey] -> 'D'
- | anyOf [floatPrimTyConKey, floatTyConKey] -> 'F'
- | anyOf [intPrimTyConKey, int32PrimTyConKey, int64PrimTyConKey,
- intTyConKey, int8TyConKey, int16TyConKey, int32TyConKey, int64TyConKey
- ] -> 'I'
- | anyOf [wordPrimTyConKey, word32PrimTyConKey, word64PrimTyConKey, wordTyConKey,
- word8TyConKey, word16TyConKey, word32TyConKey, word64TyConKey
- ] -> 'W'
- | anyOf [listTyConKey] -> 'L'
- | isTupleTyCon tycon -> 'T'
- | isPrimTyCon tycon -> 'P'
- | isEnumerationTyCon tycon -> 'E'
- | isJust (tyConSingleDataCon_maybe tycon) -> 'S'
- | otherwise -> 'M' -- oh, well...
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
deleted file mode 100644
index 766584e2c9..0000000000
--- a/compiler/codeGen/StgCmmUtils.hs
+++ /dev/null
@@ -1,578 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Code generator utilities; mostly monadic
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmUtils (
- cgLit, mkSimpleLit,
- emitDataLits, mkDataLits,
- emitRODataLits, mkRODataLits,
- emitRtsCall, emitRtsCallWithResult, emitRtsCallGen,
- assignTemp, newTemp,
-
- newUnboxedTupleRegs,
-
- emitMultiAssign, emitCmmLitSwitch, emitSwitch,
-
- tagToClosure, mkTaggedObjectLoad,
-
- callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
-
- cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
- cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
- cmmOffsetExprW, cmmOffsetExprB,
- cmmRegOffW, cmmRegOffB,
- cmmLabelOffW, cmmLabelOffB,
- cmmOffsetW, cmmOffsetB,
- cmmOffsetLitW, cmmOffsetLitB,
- cmmLoadIndexW,
- cmmConstrTag1,
-
- cmmUntag, cmmIsTagged,
-
- addToMem, addToMemE, addToMemLblE, addToMemLbl,
- mkWordCLit,
- newStringCLit, newByteStringCLit,
- blankWord,
- ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import StgCmmMonad
-import StgCmmClosure
-import Cmm
-import BlockId
-import MkGraph
-import CodeGen.Platform
-import CLabel
-import CmmUtils
-import CmmSwitch
-import CgUtils
-
-import ForeignCall
-import IdInfo
-import Type
-import TyCon
-import SMRep
-import Module
-import Literal
-import Digraph
-import Util
-import Unique
-import UniqSupply (MonadUnique(..))
-import DynFlags
-import FastString
-import Outputable
-import RepType
-
-import Data.ByteString (ByteString)
-import qualified Data.ByteString.Char8 as BS8
-import qualified Data.Map as M
-import Data.Char
-import Data.List
-import Data.Ord
-
-
--------------------------------------------------------------------------
---
--- Literals
---
--------------------------------------------------------------------------
-
-cgLit :: Literal -> FCode CmmLit
-cgLit (LitString s) = newByteStringCLit s
- -- not unpackFS; we want the UTF-8 byte stream.
-cgLit other_lit = do dflags <- getDynFlags
- return (mkSimpleLit dflags other_lit)
-
-mkSimpleLit :: DynFlags -> Literal -> CmmLit
-mkSimpleLit dflags (LitChar c) = CmmInt (fromIntegral (ord c))
- (wordWidth dflags)
-mkSimpleLit dflags LitNullAddr = zeroCLit dflags
-mkSimpleLit dflags (LitNumber LitNumInt i _) = CmmInt i (wordWidth dflags)
-mkSimpleLit _ (LitNumber LitNumInt64 i _) = CmmInt i W64
-mkSimpleLit dflags (LitNumber LitNumWord i _) = CmmInt i (wordWidth dflags)
-mkSimpleLit _ (LitNumber LitNumWord64 i _) = CmmInt i W64
-mkSimpleLit _ (LitFloat r) = CmmFloat r W32
-mkSimpleLit _ (LitDouble r) = CmmFloat r W64
-mkSimpleLit _ (LitLabel fs ms fod)
- = let -- TODO: Literal labels might not actually be in the current package...
- labelSrc = ForeignLabelInThisPackage
- in CmmLabel (mkForeignLabel fs ms labelSrc fod)
--- NB: LitRubbish should have been lowered in "CoreToStg"
-mkSimpleLit _ other = pprPanic "mkSimpleLit" (ppr other)
-
---------------------------------------------------------------------------
---
--- Incrementing a memory location
---
---------------------------------------------------------------------------
-
-addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph
-addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n
-
-addToMemLblE :: CmmType -> CLabel -> CmmExpr -> CmmAGraph
-addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl))
-
-addToMem :: CmmType -- rep of the counter
- -> CmmExpr -- Address
- -> Int -- What to add (a word)
- -> CmmAGraph
-addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep)))
-
-addToMemE :: CmmType -- rep of the counter
- -> CmmExpr -- Address
- -> CmmExpr -- What to add (a word-typed expression)
- -> CmmAGraph
-addToMemE rep ptr n
- = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n])
-
-
--------------------------------------------------------------------------
---
--- Loading a field from an object,
--- where the object pointer is itself tagged
---
--------------------------------------------------------------------------
-
-mkTaggedObjectLoad
- :: DynFlags -> LocalReg -> LocalReg -> ByteOff -> DynTag -> CmmAGraph
--- (loadTaggedObjectField reg base off tag) generates assignment
--- reg = bitsK[ base + off - tag ]
--- where K is fixed by 'reg'
-mkTaggedObjectLoad dflags reg base offset tag
- = mkAssign (CmmLocal reg)
- (CmmLoad (cmmOffsetB dflags
- (CmmReg (CmmLocal base))
- (offset - tag))
- (localRegType reg))
-
--------------------------------------------------------------------------
---
--- Converting a closure tag to a closure for enumeration types
--- (this is the implementation of tagToEnum#).
---
--------------------------------------------------------------------------
-
-tagToClosure :: DynFlags -> TyCon -> CmmExpr -> CmmExpr
-tagToClosure dflags tycon tag
- = CmmLoad (cmmOffsetExprW dflags closure_tbl tag) (bWord dflags)
- where closure_tbl = CmmLit (CmmLabel lbl)
- lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
-
--------------------------------------------------------------------------
---
--- Conditionals and rts calls
---
--------------------------------------------------------------------------
-
-emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
-emitRtsCall pkg fun args safe = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) args safe
-
-emitRtsCallWithResult :: LocalReg -> ForeignHint -> UnitId -> FastString
- -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
-emitRtsCallWithResult res hint pkg fun args safe
- = emitRtsCallGen [(res,hint)] (mkCmmCodeLabel pkg fun) args safe
-
--- Make a call to an RTS C procedure
-emitRtsCallGen
- :: [(LocalReg,ForeignHint)]
- -> CLabel
- -> [(CmmExpr,ForeignHint)]
- -> Bool -- True <=> CmmSafe call
- -> FCode ()
-emitRtsCallGen res lbl args safe
- = do { dflags <- getDynFlags
- ; updfr_off <- getUpdFrameOff
- ; let (caller_save, caller_load) = callerSaveVolatileRegs dflags
- ; emit caller_save
- ; call updfr_off
- ; emit caller_load }
- where
- call updfr_off =
- if safe then
- emit =<< mkCmmCall fun_expr res' args' updfr_off
- else do
- let conv = ForeignConvention CCallConv arg_hints res_hints CmmMayReturn
- emit $ mkUnsafeCall (ForeignTarget fun_expr conv) res' args'
- (args', arg_hints) = unzip args
- (res', res_hints) = unzip res
- fun_expr = mkLblExpr lbl
-
-
------------------------------------------------------------------------------
---
--- Caller-Save Registers
---
------------------------------------------------------------------------------
-
--- Here we generate the sequence of saves/restores required around a
--- foreign call instruction.
-
--- TODO: reconcile with includes/Regs.h
--- * Regs.h claims that BaseReg should be saved last and loaded first
--- * This might not have been tickled before since BaseReg is callee save
--- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
---
--- This code isn't actually used right now, because callerSaves
--- only ever returns true in the current universe for registers NOT in
--- system_regs (just do a grep for CALLER_SAVES in
--- includes/stg/MachRegs.h). It's all one giant no-op, and for
--- good reason: having to save system registers on every foreign call
--- would be very expensive, so we avoid assigning them to those
--- registers when we add support for an architecture.
---
--- Note that the old code generator actually does more work here: it
--- also saves other global registers. We can't (nor want) to do that
--- here, as we don't have liveness information. And really, we
--- shouldn't be doing the workaround at this point in the pipeline, see
--- Note [Register parameter passing] and the ToDo on CmmCall in
--- cmm/CmmNode.hs. Right now the workaround is to avoid inlining across
--- unsafe foreign calls in rewriteAssignments, but this is strictly
--- temporary.
-callerSaveVolatileRegs :: DynFlags -> (CmmAGraph, CmmAGraph)
-callerSaveVolatileRegs dflags = (caller_save, caller_load)
- where
- platform = targetPlatform dflags
-
- caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save)
- caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save)
-
- system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery
- {- ,SparkHd,SparkTl,SparkBase,SparkLim -}
- , BaseReg ]
-
- regs_to_save = filter (callerSaves platform) system_regs
-
- callerSaveGlobalReg reg
- = mkStore (get_GlobalReg_addr dflags reg) (CmmReg (CmmGlobal reg))
-
- callerRestoreGlobalReg reg
- = mkAssign (CmmGlobal reg)
- (CmmLoad (get_GlobalReg_addr dflags reg) (globalRegType dflags reg))
-
-
--------------------------------------------------------------------------
---
--- Strings generate a top-level data block
---
--------------------------------------------------------------------------
-
-emitDataLits :: CLabel -> [CmmLit] -> FCode ()
--- Emit a data-segment data block
-emitDataLits lbl lits = emitDecl (mkDataLits (Section Data lbl) lbl lits)
-
-emitRODataLits :: CLabel -> [CmmLit] -> FCode ()
--- Emit a read-only data block
-emitRODataLits lbl lits = emitDecl (mkRODataLits lbl lits)
-
-newStringCLit :: String -> FCode CmmLit
--- Make a global definition for the string,
--- and return its label
-newStringCLit str = newByteStringCLit (BS8.pack str)
-
-newByteStringCLit :: ByteString -> FCode CmmLit
-newByteStringCLit bytes
- = do { uniq <- newUnique
- ; let (lit, decl) = mkByteStringCLit (mkStringLitLabel uniq) bytes
- ; emitDecl decl
- ; return lit }
-
--------------------------------------------------------------------------
---
--- Assigning expressions to temporaries
---
--------------------------------------------------------------------------
-
-assignTemp :: CmmExpr -> FCode LocalReg
--- Make sure the argument is in a local register.
--- We don't bother being particularly aggressive with avoiding
--- unnecessary local registers, since we can rely on a later
--- optimization pass to inline as necessary (and skipping out
--- on things like global registers can be a little dangerous
--- due to them being trashed on foreign calls--though it means
--- the optimization pass doesn't have to do as much work)
-assignTemp (CmmReg (CmmLocal reg)) = return reg
-assignTemp e = do { dflags <- getDynFlags
- ; uniq <- newUnique
- ; let reg = LocalReg uniq (cmmExprType dflags e)
- ; emitAssign (CmmLocal reg) e
- ; return reg }
-
-newTemp :: MonadUnique m => CmmType -> m LocalReg
-newTemp rep = do { uniq <- getUniqueM
- ; return (LocalReg uniq rep) }
-
-newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint])
--- Choose suitable local regs to use for the components
--- of an unboxed tuple that we are about to return to
--- the Sequel. If the Sequel is a join point, using the
--- regs it wants will save later assignments.
-newUnboxedTupleRegs res_ty
- = ASSERT( isUnboxedTupleType res_ty )
- do { dflags <- getDynFlags
- ; sequel <- getSequel
- ; regs <- choose_regs dflags sequel
- ; ASSERT( regs `equalLength` reps )
- return (regs, map primRepForeignHint reps) }
- where
- reps = typePrimRep res_ty
- choose_regs _ (AssignTo regs _) = return regs
- choose_regs dflags _ = mapM (newTemp . primRepCmmType dflags) reps
-
-
-
--------------------------------------------------------------------------
--- emitMultiAssign
--------------------------------------------------------------------------
-
-emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode ()
--- Emit code to perform the assignments in the
--- input simultaneously, using temporary variables when necessary.
-
-type Key = Int
-type Vrtx = (Key, Stmt) -- Give each vertex a unique number,
- -- for fast comparison
-type Stmt = (LocalReg, CmmExpr) -- r := e
-
--- We use the strongly-connected component algorithm, in which
--- * the vertices are the statements
--- * an edge goes from s1 to s2 iff
--- s1 assigns to something s2 uses
--- that is, if s1 should *follow* s2 in the final order
-
-emitMultiAssign [] [] = return ()
-emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs
-emitMultiAssign regs rhss = do
- dflags <- getDynFlags
- ASSERT2( equalLength regs rhss, ppr regs $$ ppr rhss )
- unscramble dflags ([1..] `zip` (regs `zip` rhss))
-
-unscramble :: DynFlags -> [Vrtx] -> FCode ()
-unscramble dflags vertices = mapM_ do_component components
- where
- edges :: [ Node Key Vrtx ]
- edges = [ DigraphNode vertex key1 (edges_from stmt1)
- | vertex@(key1, stmt1) <- vertices ]
-
- edges_from :: Stmt -> [Key]
- edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
- stmt1 `mustFollow` stmt2 ]
-
- components :: [SCC Vrtx]
- components = stronglyConnCompFromEdgedVerticesUniq edges
-
- -- do_components deal with one strongly-connected component
- -- Not cyclic, or singleton? Just do it
- do_component :: SCC Vrtx -> FCode ()
- do_component (AcyclicSCC (_,stmt)) = mk_graph stmt
- do_component (CyclicSCC []) = panic "do_component"
- do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt
-
- -- Cyclic? Then go via temporaries. Pick one to
- -- break the loop and try again with the rest.
- do_component (CyclicSCC ((_,first_stmt) : rest)) = do
- dflags <- getDynFlags
- u <- newUnique
- let (to_tmp, from_tmp) = split dflags u first_stmt
- mk_graph to_tmp
- unscramble dflags rest
- mk_graph from_tmp
-
- split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt)
- split dflags uniq (reg, rhs)
- = ((tmp, rhs), (reg, CmmReg (CmmLocal tmp)))
- where
- rep = cmmExprType dflags rhs
- tmp = LocalReg uniq rep
-
- mk_graph :: Stmt -> FCode ()
- mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs
-
- mustFollow :: Stmt -> Stmt -> Bool
- (reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs
-
--------------------------------------------------------------------------
--- mkSwitch
--------------------------------------------------------------------------
-
-
-emitSwitch :: CmmExpr -- Tag to switch on
- -> [(ConTagZ, CmmAGraphScoped)] -- Tagged branches
- -> Maybe CmmAGraphScoped -- Default branch (if any)
- -> ConTagZ -> ConTagZ -- Min and Max possible values;
- -- behaviour outside this range is
- -- undefined
- -> FCode ()
-
--- First, two rather common cases in which there is no work to do
-emitSwitch _ [] (Just code) _ _ = emit (fst code)
-emitSwitch _ [(_,code)] Nothing _ _ = emit (fst code)
-
--- Right, off we go
-emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do
- join_lbl <- newBlockId
- mb_deflt_lbl <- label_default join_lbl mb_deflt
- branches_lbls <- label_branches join_lbl branches
- tag_expr' <- assignTemp' tag_expr
-
- -- Sort the branches before calling mk_discrete_switch
- let branches_lbls' = [ (fromIntegral i, l) | (i,l) <- sortBy (comparing fst) branches_lbls ]
- let range = (fromIntegral lo_tag, fromIntegral hi_tag)
-
- emit $ mk_discrete_switch False tag_expr' branches_lbls' mb_deflt_lbl range
-
- emitLabel join_lbl
-
-mk_discrete_switch :: Bool -- ^ Use signed comparisons
- -> CmmExpr
- -> [(Integer, BlockId)]
- -> Maybe BlockId
- -> (Integer, Integer)
- -> CmmAGraph
-
--- SINGLETON TAG RANGE: no case analysis to do
-mk_discrete_switch _ _tag_expr [(tag, lbl)] _ (lo_tag, hi_tag)
- | lo_tag == hi_tag
- = ASSERT( tag == lo_tag )
- mkBranch lbl
-
--- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
-mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _
- = mkBranch lbl
- -- The simplifier might have eliminated a case
- -- so we may have e.g. case xs of
- -- [] -> e
- -- In that situation we can be sure the (:) case
- -- can't happen, so no need to test
-
--- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans
--- See Note [Cmm Switches, the general plan] in CmmSwitch
-mk_discrete_switch signed tag_expr branches mb_deflt range
- = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches)
-
-divideBranches :: Ord a => [(a,b)] -> ([(a,b)], a, [(a,b)])
-divideBranches branches = (lo_branches, mid, hi_branches)
- where
- -- 2 branches => n_branches `div` 2 = 1
- -- => branches !! 1 give the *second* tag
- -- There are always at least 2 branches here
- (mid,_) = branches !! (length branches `div` 2)
- (lo_branches, hi_branches) = span is_lo branches
- is_lo (t,_) = t < mid
-
---------------
-emitCmmLitSwitch :: CmmExpr -- Tag to switch on
- -> [(Literal, CmmAGraphScoped)] -- Tagged branches
- -> CmmAGraphScoped -- Default branch (always)
- -> FCode () -- Emit the code
-emitCmmLitSwitch _scrut [] deflt = emit $ fst deflt
-emitCmmLitSwitch scrut branches deflt = do
- scrut' <- assignTemp' scrut
- join_lbl <- newBlockId
- deflt_lbl <- label_code join_lbl deflt
- branches_lbls <- label_branches join_lbl branches
-
- dflags <- getDynFlags
- let cmm_ty = cmmExprType dflags scrut
- rep = typeWidth cmm_ty
-
- -- We find the necessary type information in the literals in the branches
- let signed = case head branches of
- (LitNumber nt _ _, _) -> litNumIsSigned nt
- _ -> False
-
- let range | signed = (tARGET_MIN_INT dflags, tARGET_MAX_INT dflags)
- | otherwise = (0, tARGET_MAX_WORD dflags)
-
- if isFloatType cmm_ty
- then emit =<< mk_float_switch rep scrut' deflt_lbl noBound branches_lbls
- else emit $ mk_discrete_switch
- signed
- scrut'
- [(litValue lit,l) | (lit,l) <- branches_lbls]
- (Just deflt_lbl)
- range
- emitLabel join_lbl
-
--- | lower bound (inclusive), upper bound (exclusive)
-type LitBound = (Maybe Literal, Maybe Literal)
-
-noBound :: LitBound
-noBound = (Nothing, Nothing)
-
-mk_float_switch :: Width -> CmmExpr -> BlockId
- -> LitBound
- -> [(Literal,BlockId)]
- -> FCode CmmAGraph
-mk_float_switch rep scrut deflt _bounds [(lit,blk)]
- = do dflags <- getDynFlags
- return $ mkCbranch (cond dflags) deflt blk Nothing
- where
- cond dflags = CmmMachOp ne [scrut, CmmLit cmm_lit]
- where
- cmm_lit = mkSimpleLit dflags lit
- ne = MO_F_Ne rep
-
-mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches
- = do dflags <- getDynFlags
- lo_blk <- mk_float_switch rep scrut deflt_blk_id bounds_lo lo_branches
- hi_blk <- mk_float_switch rep scrut deflt_blk_id bounds_hi hi_branches
- mkCmmIfThenElse (cond dflags) lo_blk hi_blk
- where
- (lo_branches, mid_lit, hi_branches) = divideBranches branches
-
- bounds_lo = (lo_bound, Just mid_lit)
- bounds_hi = (Just mid_lit, hi_bound)
-
- cond dflags = CmmMachOp lt [scrut, CmmLit cmm_lit]
- where
- cmm_lit = mkSimpleLit dflags mid_lit
- lt = MO_F_Lt rep
-
-
---------------
-label_default :: BlockId -> Maybe CmmAGraphScoped -> FCode (Maybe BlockId)
-label_default _ Nothing
- = return Nothing
-label_default join_lbl (Just code)
- = do lbl <- label_code join_lbl code
- return (Just lbl)
-
---------------
-label_branches :: BlockId -> [(a,CmmAGraphScoped)] -> FCode [(a,BlockId)]
-label_branches _join_lbl []
- = return []
-label_branches join_lbl ((tag,code):branches)
- = do lbl <- label_code join_lbl code
- branches' <- label_branches join_lbl branches
- return ((tag,lbl):branches')
-
---------------
-label_code :: BlockId -> CmmAGraphScoped -> FCode BlockId
--- label_code J code
--- generates
--- [L: code; goto J]
--- and returns L
-label_code join_lbl (code,tsc) = do
- lbl <- newBlockId
- emitOutOfLine lbl (code MkGraph.<*> mkBranch join_lbl, tsc)
- return lbl
-
---------------
-assignTemp' :: CmmExpr -> FCode CmmExpr
-assignTemp' e
- | isTrivialCmmExpr e = return e
- | otherwise = do
- dflags <- getDynFlags
- lreg <- newTemp (cmmExprType dflags e)
- let reg = CmmLocal lreg
- emitAssign reg e
- return (CmmReg reg)